diff options
author | Don Anderson <dda@mongodb.com> | 2016-05-30 21:39:51 -0400 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2016-05-31 11:39:51 +1000 |
commit | 27981762bac2b4b1d854826f845866f4f523a270 (patch) | |
tree | 1171c55e2bc95c42c088395be4ef8da892bd8ac1 | |
parent | 12b772612a180db5db82f68920b00e297c43da2a (diff) | |
download | mongo-27981762bac2b4b1d854826f845866f4f523a270.tar.gz |
WT-2268 WT-2597 JSON load/dump Unicode fixes (#2749)
* WT-2268 Change dump representation so every byte represents a single 0x00-0xff
Unicode character. Otherwise arbitrary binary data (that does not conform to
Unicode) cannot be represented. Fix an off-by-one error in counting bytes
on input. Added some better Unicode tests.
* JSON Dump now uses a 'dump version' stamp, as well as showing WT version.
Dump input that is too old, and input that is too new will be rejected.
* In setting a key for dump, converting a JSON string must always occur first.
* When loading JSON, treat binary data just like strings.
Fixed an error in determining the string length of JSON in the presence
of Unicode strings.
* Add LSM tests to JSON dump/load testing.
* Add more extensive testing for dump/reload of JSON binary data.
* Byte arrays differ from strings for JSON input, they do not null terminate.
Handle escapes like '\n', '\t', etc. on input.
* Added tests of JSON dump/load of all byte codes.
* whitespace
-rw-r--r-- | src/cursor/cur_dump.c | 21 | ||||
-rw-r--r-- | src/cursor/cur_json.c | 98 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/include/packing.i | 12 | ||||
-rw-r--r-- | src/utilities/util_dump.c | 20 | ||||
-rw-r--r-- | src/utilities/util_dump.h | 11 | ||||
-rw-r--r-- | src/utilities/util_load_json.c | 29 | ||||
-rw-r--r-- | test/suite/test_jsondump01.py | 35 | ||||
-rw-r--r-- | test/suite/test_jsondump02.py | 172 |
9 files changed, 310 insertions, 90 deletions
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c index a7b1c98871a..32353e0a28d 100644 --- a/src/cursor/cur_dump.c +++ b/src/cursor/cur_dump.c @@ -155,7 +155,9 @@ __curdump_set_key(WT_CURSOR *cursor, ...) WT_SESSION_IMPL *session; uint64_t recno; va_list ap; + const uint8_t *up; const char *p; + bool json; cdump = (WT_CURSOR_DUMP *)cursor; child = cdump->child; @@ -168,16 +170,23 @@ __curdump_set_key(WT_CURSOR *cursor, ...) p = va_arg(ap, const char *); va_end(ap); + json = F_ISSET(cursor, WT_CURSTD_DUMP_JSON); + if (json) + WT_ERR(__wt_json_to_item(session, p, cursor->key_format, + (WT_CURSOR_JSON *)cursor->json_private, true, + &cursor->key)); + if (WT_CURSOR_RECNO(cursor) && !F_ISSET(cursor, WT_CURSTD_RAW)) { - WT_ERR(str2recno(session, p, &recno)); + if (json) { + up = (const uint8_t *)cursor->key.data; + WT_ERR(__wt_vunpack_uint(&up, cursor->key.size, + &recno)); + } else + WT_ERR(str2recno(session, p, &recno)); child->set_key(child, recno); } else { - if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) - WT_ERR(__wt_json_to_item(session, p, cursor->key_format, - (WT_CURSOR_JSON *)cursor->json_private, true, - &cursor->key)); - else + if (!json) WT_ERR(__dump_to_raw(session, p, &cursor->key, F_ISSET(cursor, WT_CURSTD_DUMP_HEX))); diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c index fcb66d3e8b3..133b7b9ac9b 100644 --- a/src/cursor/cur_json.c +++ b/src/cursor/cur_json.c @@ -48,6 +48,10 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, case 't': \ WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \ break; \ + case 'u': \ + WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \ + pv.type = 'K'; \ + break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ } \ @@ -493,7 +497,7 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, "invalid Unicode within JSON string"); return (-1); } - src += 5; + src += 4; } backslash = false; } @@ -840,20 +844,17 @@ __wt_json_strlen(const char *src, size_t srclen) if (__wt_hex2byte((const u_char *)src, &lo)) return (-1); src += 2; - /* RFC 3629 */ - if (hi >= 0x8) { - /* 3 bytes total */ - dstlen += 2; - } - else if (hi != 0 || lo >= 0x80) { - /* 2 bytes total */ - dstlen++; - } - /* else 1 byte total */ + if (hi != 0) + /* + * For our dump representation, + * every Unicode character on input + * represents a single byte. + */ + return (-1); } - } + } else + src++; dstlen++; - src++; } if (src != srcend) return (-1); /* invalid input, e.g. final char is '\\' */ @@ -867,55 +868,58 @@ __wt_json_strlen(const char *src, size_t srclen) * the result if zero padded. */ int -__wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen) +__wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, + const char *src, size_t srclen) { - char *dst; + WT_SESSION_IMPL *session; + char ch, *dst; const char *dstend, *srcend; u_char hi, lo; + session = (WT_SESSION_IMPL *)wt_session; + dst = *pdst; dstend = dst + dstlen; srcend = src + srclen; while (src < srcend && dst < dstend) { /* JSON can include any UTF-8 expressed in 4 hex chars. */ - if (*src == '\\') { - if (*++src == 'u') { - if (__wt_hex2byte((const u_char *)++src, &hi)) + if ((ch = *src++) == '\\') + switch (ch = *src++) { + case 'u': + if (__wt_hex2byte((const u_char *)src, &hi)) return (EINVAL); src += 2; if (__wt_hex2byte((const u_char *)src, &lo)) return (EINVAL); src += 2; - /* RFC 3629 */ - if (hi >= 0x8) { - /* 3 bytes total */ - /* byte 0: 1110HHHH */ - /* byte 1: 10HHHHLL */ - /* byte 2: 10LLLLLL */ - *dst++ = (char)(0xe0 | - ((hi >> 4) & 0x0f)); - *dst++ = (char)(0x80 | - ((hi << 2) & 0x3c) | - ((lo >> 6) & 0x03)); - *dst++ = (char)(0x80 | (lo & 0x3f)); - } else if (hi != 0 || lo >= 0x80) { - /* 2 bytes total */ - /* byte 0: 110HHHLL */ - /* byte 1: 10LLLLLL */ - *dst++ = (char)(0xc0 | - (hi << 2) | - ((lo >> 6) & 0x03)); - *dst++ = (char)(0x80 | (lo & 0x3f)); - } else - /* else 1 byte total */ - /* byte 0: 0LLLLLLL */ - *dst++ = (char)lo; + if (hi != 0) { + __wt_errx(NULL, "Unicode \"%6.6s\"" + " byte out of range in JSON", + src - 6); + return (EINVAL); + } + *dst++ = (char)lo; + break; + case 'f': + *dst++ = '\f'; + break; + case 'n': + *dst++ = '\n'; + break; + case 'r': + *dst++ = '\r'; + break; + case 't': + *dst++ = '\t'; + break; + case '"': + case '\\': + *dst++ = ch; + break; + WT_ILLEGAL_VALUE(session); } - else - *dst++ = *src; - } else - *dst++ = *src; - src++; + else + *dst++ = ch; } if (src != srcend) return (ENOMEM); diff --git a/src/include/extern.h b/src/include/extern.h index e8c20930aaf..bb2e6ae47cc 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -297,7 +297,7 @@ extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype extern const char *__wt_json_tokname(int toktype); extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item); extern ssize_t __wt_json_strlen(const char *src, size_t srclen); -extern int __wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen); +extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src, size_t srclen); extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_schema_create_final( WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret); extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); diff --git a/src/include/packing.i b/src/include/packing.i index 35b2ddc43db..9d5971ed99f 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -260,6 +260,8 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv) return (pv->size); case 'j': case 'J': + case 'K': + /* These formats are only used internally. */ if (pv->type == 'j' || pv->havesize) s = pv->size; else { @@ -269,7 +271,7 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv) len = __wt_json_strlen(pv->u.item.data, pv->u.item.size); WT_ASSERT(session, len >= 0); - s = (size_t)len + 1; + s = (size_t)len + (pv->type == 'K' ? 0 : 1); } return (s); case 's': @@ -357,18 +359,22 @@ __pack_write( break; case 'j': case 'J': + case 'K': + /* These formats are only used internally. */ s = pv->u.item.size; if ((pv->type == 'j' || pv->havesize) && pv->size < s) { s = pv->size; pad = 0; } else if (pv->havesize) pad = pv->size - s; + else if (pv->type == 'K') + pad = 0; else pad = 1; if (s > 0) { oldp = *pp; - WT_RET(__wt_json_strncpy((char **)pp, maxlen, - pv->u.item.data, s)); + WT_RET(__wt_json_strncpy((WT_SESSION *)session, + (char **)pp, maxlen, pv->u.item.data, s)); maxlen -= (size_t)(*pp - oldp); } if (pad > 0) { diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index 0f09009cd4c..3314b5ba485 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -7,6 +7,7 @@ */ #include "util.h" +#include "util_dump.h" static int dump_config(WT_SESSION *, const char *, bool, bool); static int dump_json_begin(WT_SESSION *); @@ -73,7 +74,9 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) if (argc < 1 || (argc != 1 && !json)) return (usage()); - if (json && (ret = dump_json_begin(session)) != 0) + if (json && + ((ret = dump_json_begin(session)) != 0 || + (ret = dump_prefix(session, hex, json)) != 0)) goto err; for (i = 0; i < argc; i++) { @@ -155,7 +158,7 @@ dump_config(WT_SESSION *session, const char *uri, bool hex, bool json) */ cursor->set_key(cursor, uri); if ((ret = cursor->search(cursor)) == 0) { - if (dump_prefix(session, hex, json) != 0 || + if ((!json && dump_prefix(session, hex, json) != 0) || dump_table_config(session, cursor, uri, json) != 0 || dump_suffix(session, json) != 0) ret = 1; @@ -456,17 +459,20 @@ dump_prefix(WT_SESSION *session, bool hex, bool json) { int vmajor, vminor, vpatch; - if (json) - return (0); - (void)wiredtiger_version(&vmajor, &vminor, &vpatch); - if (printf( + if (!json && (printf( "WiredTiger Dump (WiredTiger Version %d.%d.%d)\n", vmajor, vminor, vpatch) < 0 || printf("Format=%s\n", hex ? "hex" : "print") < 0 || - printf("Header\n") < 0) + printf("Header\n") < 0)) return (util_err(session, EIO, NULL)); + else if (json && printf( + " \"%s\" : \"%d (%d.%d.%d)\",\n", + DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION, + vmajor, vminor, vpatch) < 0) + return (util_err(session, EIO, NULL)); + return (0); } diff --git a/src/utilities/util_dump.h b/src/utilities/util_dump.h new file mode 100644 index 00000000000..e3fd8e6a501 --- /dev/null +++ b/src/utilities/util_dump.h @@ -0,0 +1,11 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#define DUMP_JSON_VERSION_MARKER "WiredTiger Dump Version" +#define DUMP_JSON_CURRENT_VERSION 1 +#define DUMP_JSON_SUPPORTED_VERSION 1 diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c index 3a1f847a95f..f1f6675e99c 100644 --- a/src/utilities/util_load_json.c +++ b/src/utilities/util_load_json.c @@ -7,6 +7,7 @@ */ #include "util.h" +#include "util_dump.h" #include "util_load.h" /* @@ -186,9 +187,8 @@ json_strdup(WT_SESSION *session, JSON_INPUT_STATE *ins, char **resultp) } *resultp = result; resultcpy = result; - if ((ret = __wt_json_strncpy(&resultcpy, (size_t)resultlen, src, - srclen)) - != 0) { + if ((ret = __wt_json_strncpy( + session, &resultcpy, (size_t)resultlen, src, srclen)) != 0) { ret = util_err(session, ret, NULL); goto err; } @@ -344,13 +344,16 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags) { CONFIG_LIST cl; WT_DECL_RET; - int toktype; static const char *json_markers[] = { "\"config\"", "\"colgroups\"", "\"indices\"", "\"data\"", NULL }; char *config, *tableuri; + int curversion, toktype; + bool hasversion; memset(&cl, 0, sizeof(cl)); tableuri = NULL; + hasversion = false; + JSON_EXPECT(session, ins, '{'); while (json_peek(session, ins) == 's') { JSON_EXPECT(session, ins, 's'); @@ -358,6 +361,24 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags) snprintf(tableuri, ins->toklen, "%.*s", (int)(ins->toklen - 2), ins->tokstart + 1); JSON_EXPECT(session, ins, ':'); + if (!hasversion) { + if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) { + ret = util_err(session, ENOTSUP, + "missing \"%s\"", DUMP_JSON_VERSION_MARKER); + goto err; + } + hasversion = true; + JSON_EXPECT(session, ins, 's'); + if ((curversion = atoi(ins->tokstart + 1)) <= 0 || + curversion > DUMP_JSON_SUPPORTED_VERSION) { + ret = util_err(session, ENOTSUP, + "unsupported JSON dump version \"%.*s\"", + (int)(ins->toklen - 1), ins->tokstart + 1); + goto err; + } + JSON_EXPECT(session, ins, ','); + continue; + } /* * Allow any ordering of 'config', 'colgroups', diff --git a/test/suite/test_jsondump01.py b/test/suite/test_jsondump01.py index ddf871d9a24..10262edc777 100644 --- a/test/suite/test_jsondump01.py +++ b/test/suite/test_jsondump01.py @@ -77,16 +77,22 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): ('string', dict(keyfmt='S')) ] types = [ - ('file', dict(type='file:', - name='file', + ('file', dict(uri='file:', config='', lsm=False, populate=simple_populate, populate_check=simple_populate_check_cursor)), - ('table-simple', dict(type='table:', - name='table-simple', + ('lsm', dict(uri='lsm:', config='', lsm=True, populate=simple_populate, populate_check=simple_populate_check_cursor)), - ('table-complex', dict(type='table:', - name='table-complex', + ('table-simple', dict(uri='table:', config='', lsm=False, + populate=simple_populate, + populate_check=simple_populate_check_cursor)), + ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True, + populate=simple_populate, + populate_check=simple_populate_check_cursor)), + ('table-complex', dict(uri='table:', config='', lsm=False, + populate=complex_populate, + populate_check=complex_populate_check_cursor)), + ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=complex_populate, populate_check=complex_populate_check_cursor)) ] @@ -95,9 +101,14 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): # Dump using util, re-load using python's JSON, and do a content comparison. def test_jsondump_util(self): + # LSM and column-store isn't a valid combination. + if self.lsm and self.keyfmt == 'r': + return + # Create the object. - uri = self.type + self.name - self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries) + uri = self.uri + self.name + self.populate(self, uri, self.config + ',key_format=' + self.keyfmt, + self.nentries) # Dump the object. self.runWt(['dump', '-j', uri], outfilename='jsondump.out') @@ -125,9 +136,13 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): # Dump using util, re-load using python's JSON, and do a content comparison. def test_jsonload_util(self): + # LSM and column-store isn't a valid combination. + if self.lsm and self.keyfmt == 'r': + return + # Create the object. - uri = self.type + self.name - uri2 = self.type + self.name2 + uri = self.uri + self.name + uri2 = self.uri + self.name2 self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries) # Dump the object. diff --git a/test/suite/test_jsondump02.py b/test/suite/test_jsondump02.py index c6cd464e453..251237f3faf 100644 --- a/test/suite/test_jsondump02.py +++ b/test/suite/test_jsondump02.py @@ -28,16 +28,19 @@ import os import wiredtiger, wttest +from suite_subprocess import suite_subprocess # test_jsondump.py # Test dump output from json cursors. -class test_jsondump02(wttest.WiredTigerTestCase): +class test_jsondump02(wttest.WiredTigerTestCase, suite_subprocess): table_uri1 = 'table:jsondump02a.wt' table_uri2 = 'table:jsondump02b.wt' table_uri3 = 'table:jsondump02c.wt' basename_uri4 = 'jsondump02d.wt' table_uri4 = 'table:' + basename_uri4 + table_uri5 = 'table:jsondump02e.wt' + table_uri6 = 'table:jsondump02f.wt' def set_kv(self, uri, key, val): cursor = self.session.open_cursor(uri, None, None) @@ -80,15 +83,14 @@ class test_jsondump02(wttest.WiredTigerTestCase): pos = 0 try: for insert in inserts: - #tty_pr('Insert: ' + str(insert)) cursor[insert[0]] = insert[1] finally: cursor.close() - # Create JSON cursors and test them directly. def test_json_cursor(self): """ - Create a table, add a key, get it back + Create JSON cursors and test them directly, also test + dump/load commands. """ extra_params = ',allocation_size=512,' +\ 'internal_page_max=16384,leaf_page_max=131072' @@ -112,7 +114,12 @@ class test_jsondump02(wttest.WiredTigerTestCase): self.session.create(uri4index3, "columns=(i2,i4)") self.set_kv(self.table_uri1, 'KEY000', 'string value') - self.set_kv(self.table_uri1, 'KEY001', '\'\"({[]})\"\', etc. allowed') + self.set_kv(self.table_uri1, 'KEY001', '\'\"({[]})\"\'\\, etc. allowed') + # \u03c0 is pi in Unicode, converted by Python to UTF-8: 0xcf 0x80. + # Here's how UTF-8 might be used. + self.set_kv(self.table_uri1, 'KEY002', u'\u03c0'.encode('utf-8')) + # 0xf5-0xff are illegal in Unicode, but may occur legally in C strings. + self.set_kv(self.table_uri1, 'KEY003', '\xff\xfe') self.set_kv2(self.table_uri2, 'KEY000', 123, 'str0') self.set_kv2(self.table_uri2, 'KEY001', 234, 'str1') self.set_kv(self.table_uri3, 1, '\x01\x02\x03') @@ -122,7 +129,9 @@ class test_jsondump02(wttest.WiredTigerTestCase): table1_json = ( ('"key0" : "KEY000"', '"value0" : "string value"'), ('"key0" : "KEY001"', '"value0" : ' + - '"\'\\\"({[]})\\\"\', etc. allowed"')) + '"\'\\\"({[]})\\\"\'\\\\, etc. allowed"'), + ('"key0" : "KEY002"', '"value0" : "\\u00cf\\u0080"'), + ('"key0" : "KEY003"', '"value0" : "\\u00ff\\u00fe"')) self.check_json(self.table_uri1, table1_json) self.session.truncate(self.table_uri1, None, None, None) @@ -206,11 +215,12 @@ class test_jsondump02(wttest.WiredTigerTestCase): ((' "key0"\n:\t"KEY003" ', '"value0":456,"value1"\n\n\r\n:\t\n"str3"'),)) - self.check_json(self.table_uri3, ( - ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'), - ('"key0" : 2', - '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"'))) - self.check_json(self.table_uri4, ( + table3_json = ( + ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'), + ('"key0" : 2', + '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"')) + self.check_json(self.table_uri3, table3_json) + table4_json = ( ('"ikey" : 1,\n"Skey" : "key1"', '"S1" : "val1",\n"i2" : 1,\n"S3" : "val1",\n"i4" : 1'), ('"ikey" : 2,\n"Skey" : "key2"', @@ -218,7 +228,8 @@ class test_jsondump02(wttest.WiredTigerTestCase): ('"ikey" : 3,\n"Skey" : "key3"', '"S1" : "val9",\n"i2" : 9,\n"S3" : "val27",\n"i4" : 27'), ('"ikey" : 4,\n"Skey" : "key4"', - '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64'))) + '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64')) + self.check_json(self.table_uri4, table4_json) # The dump config currently is not supported for the index type. self.check_json(uri4index1, ( ('"Skey" : "key1"', @@ -248,5 +259,142 @@ class test_jsondump02(wttest.WiredTigerTestCase): ('"i2" : 16,\n"i4" : 64', '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64'))) + # Dump all the tables into a single file, and also each + # table into its own file. + self.runWt(['dump', '-j', + self.table_uri1, + self.table_uri2, + self.table_uri3, + self.table_uri4], + outfilename='jsondump-all.out') + self.runWt(['dump', '-j', self.table_uri1], outfilename='jsondump1.out') + self.runWt(['dump', '-j', self.table_uri2], outfilename='jsondump2.out') + self.runWt(['dump', '-j', self.table_uri3], outfilename='jsondump3.out') + self.runWt(['dump', '-j', self.table_uri4], outfilename='jsondump4.out') + self.session.drop(self.table_uri1) + self.session.drop(self.table_uri2) + self.session.drop(self.table_uri3) + self.session.drop(self.table_uri4) + self.runWt(['load', '-jf', 'jsondump1.out']) + self.session.drop(self.table_uri1) + self.runWt(['load', '-jf', 'jsondump2.out']) + self.session.drop(self.table_uri2) + self.runWt(['load', '-jf', 'jsondump3.out']) + self.session.drop(self.table_uri3) + self.runWt(['load', '-jf', 'jsondump4.out']) + self.session.drop(self.table_uri4) + + # Note: only the first table is loaded. + self.runWt(['load', '-jf', 'jsondump-all.out']) + self.check_json(self.table_uri1, table1_json) + #self.check_json(self.table_uri2, table2_json) + #self.check_json(self.table_uri3, table3_json) + #self.check_json(self.table_uri4, table4_json) + + # Generate two byte keys that cover some range of byte values. + # For simplicity, the keys are monotonically increasing. + # A null byte is disallowed in a string key, so we don't use it. + def generate_key(self, i, k): + k[0] = ((i & 0xffc0) >> 6) + 1 + k[1] = (i & 0x3f) + 1 + + # Generate three byte values: + # i==0 : v:[0x00, 0x01, 0x02] + # i==1 : v:[0x01, 0x02, 0x03] + # etc. + # A null byte is disallowed in a string value, it is replaced by 'X' + def generate_value(self, i, v, isstring): + for j in range(0, 3): + val = (i + j) % 256 + if isstring and val == 0: + val = 88 # 'X' + v[j] = val + + def test_json_all_bytes(self): + """ + Test the generated JSON for all byte values in byte array and + string formats. + """ + self.session.create(self.table_uri5, 'key_format=u,value_format=u') + self.session.create(self.table_uri6, 'key_format=S,value_format=S') + + c5 = self.session.open_cursor(self.table_uri5, None, None) + c6 = self.session.open_cursor(self.table_uri6, None, None) + k = bytearray(b'\x00\x00') + v = bytearray(b'\x00\x00\x00') + for i in range(0, 512): + self.generate_key(i, k) + self.generate_value(i, v, False) + c5[str(k)] = str(v) + self.generate_value(i, v, True) # no embedded nuls + c6[str(k)] = str(v) + c5.close() + c6.close() + + # Build table5_json, we want it to look like this: + # ('"key0" : "\u0001\u0001"', '"value0" : "\u0000\u0001\u0002"'), + # ('"key0" : "\u0001\u0002"', '"value0" : "\u0001\u0002\u0003"')) + # ('"key0" : "\u0001\u0003"', '"value0" : "\u0003\u0003\u0004"')) + # ... + # table6_json is similar, except that printable values like '\u0041' + # would appear as 'A'. The string type cannot have embedded nulls, + # so '\u0000' in table6_json appears instead as an 'X'. + # + # Start by creating two tables of individual Unicode values. + # bin_unicode[] contains only the \u escape sequences. + # mix_unicode[] contains printable characters or \t \n etc. escapes + bin_unicode = [] + mix_unicode = [] + for i in range(0, 256): + u = "\\u00" + hex(256 + i)[3:] # e.g. "\u00ab") + bin_unicode.append(u) + mix_unicode.append(u) + for i in range(0x20, 0x7f): + mix_unicode[i] = chr(i) + mix_unicode[ord('"')] = '\\"' + mix_unicode[ord('\\')] = '\\\\' + mix_unicode[ord('\f')] = '\\f' + mix_unicode[ord('\n')] = '\\n' + mix_unicode[ord('\r')] = '\\r' + mix_unicode[ord('\t')] = '\\t' + + table5_json = [] + table6_json = [] + for i in range(0, 512): + self.generate_key(i, k) + self.generate_value(i, v, False) + j = i if (i > 0 and i < 254) or (i > 256 and i < 510) else 88 + table5_json.append(('"key0" : "' + bin_unicode[k[0]] + + bin_unicode[k[1]] + '"', + '"value0" : "' + bin_unicode[v[0]] + + bin_unicode[v[1]] + + bin_unicode[v[2]] + '"')) + self.generate_value(i, v, True) + table6_json.append(('"key0" : "' + mix_unicode[k[0]] + + mix_unicode[k[1]] + '"', + '"value0" : "' + mix_unicode[v[0]] + + mix_unicode[v[1]] + + mix_unicode[v[2]] + '"')) + + self.check_json(self.table_uri5, table5_json) + self.check_json(self.table_uri6, table6_json) + + self.session.truncate(self.table_uri5, None, None, None) + self.session.truncate(self.table_uri6, None, None, None) + self.load_json(self.table_uri5, table5_json) + self.load_json(self.table_uri6, table6_json) + self.check_json(self.table_uri5, table5_json) + self.check_json(self.table_uri6, table6_json) + + self.runWt(['dump', '-j', self.table_uri5], outfilename='jsondump5.out') + self.runWt(['dump', '-j', self.table_uri6], outfilename='jsondump6.out') + self.session.drop(self.table_uri5) + self.session.drop(self.table_uri6) + self.runWt(['load', '-jf', 'jsondump5.out']) + self.runWt(['load', '-jf', 'jsondump6.out']) + self.session.drop(self.table_uri5) + self.session.drop(self.table_uri6) + + if __name__ == '__main__': wttest.run() |