desc: Tests of converstion to and from the RQL string type tests: - def: cd: japanese_hello = 'こんにちは' # Python supports unicode strings with the u'' pattern, except 3.0-3.2 py: japanese_hello = u'こんにちは' py3.0: japanese_hello = 'こんにちは' py3.1: japanese_hello = 'こんにちは' py3.2: japanese_hello = 'こんにちは' # Simple strings - cd: - r('str') - r.expr('str') py: r.expr('str') ot: "str" - cd: - r("str") - r.expr("str") py: r.expr("str") ot: "str" # Unicode - cd: py: cd: r.expr(u'str') ot: u'str' py3.0: r.expr('str') py3.1: r.expr('str') py3.2: r.expr('str') ot: 'str' - cd: r.expr(japanese_hello) ot: cd: 'こんにちは' py: u'こんにちは' py3.0: 'こんにちは' py3.1: 'こんにちは' py3.2: 'こんにちは' - cd: r.expr('foo').type_of() ot: 'STRING' # test coercions - cd: r.expr('foo').coerce_to('string') ot: 'foo' - cd: r.expr('-1.2').coerce_to('NUMBER') ot: -1.2 - cd: r.expr('--1.2').coerce_to('NUMBER') ot: err("ReqlQueryLogicError", "Could not coerce `--1.2` to NUMBER.", []) - cd: r.expr('-1.2-').coerce_to('NUMBER') ot: err("ReqlQueryLogicError", "Could not coerce `-1.2-` to NUMBER.", []) - cd: r.expr('0xa').coerce_to('NUMBER') ot: 10 - cd: r.expr('inf').coerce_to('NUMBER') ot: err("ReqlQueryLogicError", "Non-finite number: inf", []) # count is defined as the number of unicode codepoints - cd: r.expr('hello, world!').count() ot: 13 - cd: r.expr(japanese_hello).count() ot: 5 # slice is defined on unicode codepoints - cd: r.expr('hello').slice(1) ot: 'ello' - cd: r.expr('hello').slice(-1) ot: 'o' - cd: r.expr('hello').slice(-4,3) ot: 'el' - cd: r.expr('hello').slice(-99) ot: 'hello' - cd: r.expr('hello').slice(0) ot: 'hello' - cd: r.expr(japanese_hello).slice(1) ot: cd: 'んにちは' py: u'んにちは' py3.0: 'んにちは' py3.1: 'んにちは' py3.2: 'んにちは' - cd: r.expr(japanese_hello).slice(1,2) ot: cd: 'ん' py: u'ん' py3.0: 'ん' py3.1: 'ん' py3.2: 'ん' - cd: r.expr(japanese_hello).slice(-3) ot: cd: 'にちは' py: u'にちは' py3.0: 'にちは' py3.1: 'にちは' py3.2: 'にちは' # This is how these edge cases are handled in Python. - cd: r.expr('').split() ot: [] - cd: r.expr('').split(null) ot: [] - cd: r.expr('').split(' ') ot: [''] - cd: r.expr('').split('') ot: [] - cd: r.expr('').split(null, 5) ot: [] - cd: r.expr('').split(' ', 5) ot: [''] - cd: r.expr('').split('', 5) ot: [] - cd: r.expr('aaaa bbbb cccc ').split() ot: ['aaaa', 'bbbb', 'cccc'] - cd: r.expr('aaaa bbbb cccc ').split(null) ot: ['aaaa', 'bbbb', 'cccc'] - cd: r.expr('aaaa bbbb cccc ').split(' ') ot: ['aaaa', 'bbbb', '', 'cccc', ''] - cd: r.expr('aaaa bbbb cccc ').split('') ot: ['a', 'a', 'a', 'a', ' ', 'b', 'b', 'b', 'b', ' ', ' ', 'c', 'c', 'c', 'c', ' '] - cd: r.expr('aaaa bbbb cccc ').split('b') ot: ['aaaa ', '', '', '', ' cccc '] - cd: r.expr('aaaa bbbb cccc ').split('bb') ot: ['aaaa ', '', ' cccc '] - cd: r.expr('aaaa bbbb cccc ').split(' bbbb ') ot: ['aaaa', 'cccc '] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split('bb') ot: ['aaaa ', '', ' cccc b d ', ' e ', '', ' f'] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ') ot: ['aaaa', 'cccc b d bb e bbbb f'] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ') ot: ['aaaa', 'cccc b d bb e', 'f'] - cd: r.expr('aaaa bbbb cccc ').split(null, 3) ot: ['aaaa', 'bbbb', 'cccc'] - cd: r.expr('aaaa bbbb cccc ').split(' ', 5) ot: ['aaaa', 'bbbb', '', 'cccc', ''] - cd: r.expr('aaaa bbbb cccc ').split('', 5) ot: ['a', 'a', 'a', 'a', ' ', 'bbbb cccc '] - cd: r.expr('aaaa bbbb cccc ').split('b', 5) ot: ['aaaa ', '', '', '', ' cccc '] - cd: r.expr('aaaa bbbb cccc ').split('bb', 3) ot: ['aaaa ', '', ' cccc '] - cd: r.expr('aaaa bbbb cccc ').split(' bbbb ', 2) ot: ['aaaa', 'cccc '] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split('bb', 6) ot: ['aaaa ', '', ' cccc b d ', ' e ', '', ' f'] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 2) ot: ['aaaa', 'cccc b d bb e bbbb f'] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 3) ot: ['aaaa', 'cccc b d bb e', 'f'] - cd: r.expr('aaaa bbbb cccc ').split(null, 2) ot: ['aaaa', 'bbbb', 'cccc '] - cd: r.expr("a b ").split(null, 2) ot: ["a", "b"] - cd: r.expr('aaaa bbbb cccc ').split(' ', 4) ot: ['aaaa', 'bbbb', '', 'cccc', ''] - cd: r.expr('aaaa bbbb cccc ').split('', 4) ot: ['a', 'a', 'a', 'a', ' bbbb cccc '] - cd: r.expr('aaaa bbbb cccc ').split('b', 4) ot: ['aaaa ', '', '', '', ' cccc '] - cd: r.expr('aaaa bbbb cccc ').split('bb', 2) ot: ['aaaa ', '', ' cccc '] - cd: r.expr('aaaa bbbb cccc ').split(' bbbb ', 1) ot: ['aaaa', 'cccc '] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split('bb', 5) ot: ['aaaa ', '', ' cccc b d ', ' e ', '', ' f'] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 1) ot: ['aaaa', 'cccc b d bb e bbbb f'] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 2) ot: ['aaaa', 'cccc b d bb e', 'f'] - cd: r.expr('aaaa bbbb cccc ').split(null, 1) ot: ['aaaa', 'bbbb cccc '] - cd: r.expr('aaaa bbbb cccc ').split(' ', 2) ot: ['aaaa', 'bbbb', ' cccc '] - cd: r.expr('aaaa bbbb cccc ').split('', 2) ot: ['a', 'a', 'aa bbbb cccc '] - cd: r.expr('aaaa bbbb cccc ').split('b', 2) ot: ['aaaa ', '', 'bb cccc '] - cd: r.expr('aaaa bbbb cccc ').split('bb', 2) ot: ['aaaa ', '', ' cccc '] - cd: r.expr('aaaa bbbb cccc ').split(' bbbb ', 2) ot: ['aaaa', 'cccc '] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split('bb', 2) ot: ['aaaa ', '', ' cccc b d bb e bbbb f'] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 2) ot: ['aaaa', 'cccc b d bb e bbbb f'] - cd: r.expr('aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 2) ot: ['aaaa', 'cccc b d bb e', 'f'] - cd: r.expr(' ').split() ot: [] - cd: r.expr(' ').split(null) ot: [] - cd: r.expr(' ').split(' ') ot: ['', '', ''] - cd: r.expr(' ').split(null, 5) ot: [] - cd: r.expr(' ').split(' ', 5) ot: ['', '', ''] - cd: r.expr(' aaaa bbbb cccc ').split() ot: ['aaaa', 'bbbb', 'cccc'] - cd: r.expr(' aaaa bbbb cccc ').split(null) ot: ['aaaa', 'bbbb', 'cccc'] - cd: r.expr(' aaaa bbbb cccc ').split(' ') ot: ['', '', 'aaaa', 'bbbb', '', 'cccc', ''] - cd: r.expr(' aaaa bbbb cccc ').split('b') ot: [' aaaa ', '', '', '', ' cccc '] - cd: r.expr(' aaaa bbbb cccc ').split('bb') ot: [' aaaa ', '', ' cccc '] - cd: r.expr(' aaaa bbbb cccc ').split(' bbbb ') ot: [' aaaa', 'cccc '] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split('bb') ot: [' aaaa ', '', ' cccc b d ', ' e ', '', ' f'] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ') ot: [' aaaa', 'cccc b d bb e bbbb f'] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ') ot: [' aaaa', 'cccc b d bb e', 'f'] - cd: r.expr(' aaaa bbbb cccc ').split(null, 3) ot: ['aaaa', 'bbbb', 'cccc'] - cd: r.expr(' aaaa bbbb cccc ').split(' ', 5) ot: ['', '', 'aaaa', 'bbbb', '', 'cccc '] - cd: r.expr(' aaaa bbbb cccc ').split('b', 5) ot: [' aaaa ', '', '', '', ' cccc '] - cd: r.expr(' aaaa bbbb cccc ').split('bb', 3) ot: [' aaaa ', '', ' cccc '] - cd: r.expr(' aaaa bbbb cccc ').split(' bbbb ', 2) ot: [' aaaa', 'cccc '] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split('bb', 6) ot: [' aaaa ', '', ' cccc b d ', ' e ', '', ' f'] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 2) ot: [' aaaa', 'cccc b d bb e bbbb f'] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 3) ot: [' aaaa', 'cccc b d bb e', 'f'] - cd: r.expr(' aaaa bbbb cccc ').split(null, 2) ot: ['aaaa', 'bbbb', 'cccc '] - cd: r.expr("a b ").split(null, 2) ot: ["a", "b"] - cd: r.expr(' aaaa bbbb cccc ').split(' ', 4) ot: ['', '', 'aaaa', 'bbbb', ' cccc '] - cd: r.expr(' aaaa bbbb cccc ').split('b', 4) ot: [' aaaa ', '', '', '', ' cccc '] - cd: r.expr(' aaaa bbbb cccc ').split('bb', 2) ot: [' aaaa ', '', ' cccc '] - cd: r.expr(' aaaa bbbb cccc ').split(' bbbb ', 1) ot: [' aaaa', 'cccc '] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split('bb', 5) ot: [' aaaa ', '', ' cccc b d ', ' e ', '', ' f'] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 1) ot: [' aaaa', 'cccc b d bb e bbbb f'] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 2) ot: [' aaaa', 'cccc b d bb e', 'f'] - cd: r.expr(' aaaa bbbb cccc ').split(null, 1) ot: ['aaaa', 'bbbb cccc '] - cd: r.expr(' aaaa bbbb cccc ').split(' ', 2) ot: ['', '', 'aaaa bbbb cccc '] - cd: r.expr(' aaaa bbbb cccc ').split('b', 2) ot: [' aaaa ', '', 'bb cccc '] - cd: r.expr(' aaaa bbbb cccc ').split('bb', 2) ot: [' aaaa ', '', ' cccc '] - cd: r.expr(' aaaa bbbb cccc ').split(' bbbb ', 2) ot: [' aaaa', 'cccc '] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split('bb', 2) ot: [' aaaa ', '', ' cccc b d bb e bbbb f'] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 2) ot: [' aaaa', 'cccc b d bb e bbbb f'] - cd: r.expr(' aaaa bbbb cccc b d bb e bbbb f').split(' bbbb ', 2) ot: [' aaaa', 'cccc b d bb e', 'f'] - cd: r.expr("abc-dEf-GHJ").upcase() ot: "ABC-DEF-GHJ" - cd: r.expr("abc-dEf-GHJ").downcase() ot: "abc-def-ghj" # Same 3.0-3.2 caveats - py: cd: r.expr(u"f\u00e9oo").split("") ot: [u"f", u"\u00e9", u"o", u"o"] py3.0: r.expr("f\u00e9oo").split("") py3.1: r.expr("f\u00e9oo").split("") py3.2: r.expr("f\u00e9oo").split("") cd: r.expr("f\u00e9oo").split("") ot: ["f", "\u00e9", "o", "o"] - py: cd: r.expr(u"fe\u0301oo").split("") ot: [u"f", u"e\u0301", u"o", u"o"] py3.0: r.expr("fe\u0301oo").split("") py3.1: r.expr("fe\u0301oo").split("") py3.2: r.expr("fe\u0301oo").split("") cd: r.expr("fe\u0301oo").split("") ot: ["f", "e\u0301", "o", "o"] ## Unicode spacing characters. ## original set from previous work: - cd: r.expr("foo bar\tbaz\nquux\rfred\u000bbarney\u000cwilma").split() py: cd: r.expr(u"foo bar\tbaz\nquux\rfred\u000bbarney\u000cwilma").split() ot: ["foo", "bar", "baz", "quux", "fred", "barney", "wilma"] py3.0: r.expr("foo bar\tbaz\nquux\rfred\u000bbarney\u000cwilma").split() py3.1: r.expr("foo bar\tbaz\nquux\rfred\u000bbarney\u000cwilma").split() py3.2: r.expr("foo bar\tbaz\nquux\rfred\u000bbarney\u000cwilma").split() ot: ["foo", "bar", "baz", "quux", "fred", "barney", "wilma"] ## some specialized Unicode horrors: ## - U+00A0 is nonbreaking space and is in the Zs category ## - U+0085 is the next line character and is not in the Zs category but is considered whitespace ## - U+2001 is em quad space and is in the Zs category ## - U+200B is a zero width space and is not in the Zs category and is not considered whitespace ## - U+2060 is a word joining zero width nonbreaking space and is NOT in any of the Z categories ## - U+2028 is a line separator and is in the Zl category ## - U+2029 is a paragraph separator and is in the Zp category - py: cd: r.expr(u"foo\u00a0bar\u2001baz\u2060quux\u2028fred\u2028barney\u2029wilma\u0085betty\u200b").split() ot: ["foo", "bar", u"baz\u2060quux", "fred", "barney", "wilma", u"betty\u200b"] py3.0: r.expr("foo\u00a0bar\u2001baz\u2060quux\u2028fred\u2028barney\u2029wilma\u0085betty\u200b").split() py3.1: r.expr("foo\u00a0bar\u2001baz\u2060quux\u2028fred\u2028barney\u2029wilma\u0085betty\u200b").split() py3.2: r.expr("foo\u00a0bar\u2001baz\u2060quux\u2028fred\u2028barney\u2029wilma\u0085betty\u200b").split() cd: r.expr("foo\u00a0bar\u2001baz\u2060quux\u2028fred\u2028barney\u2029wilma\u0085betty\u200b").split() ot: ["foo", "bar", "baz\u2060quux", "fred", "barney", "wilma", "betty\u200b"]