diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_huffman.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_huffman.c | 142 |
1 files changed, 82 insertions, 60 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c index c31b3f2fdf1..6604bd9c9e1 100644 --- a/src/third_party/wiredtiger/src/btree/bt_huffman.c +++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c @@ -128,28 +128,66 @@ static const struct __wt_huffman_table __wt_huffman_nytenglish[] = { static int __wt_huffman_read(WT_SESSION_IMPL *, WT_CONFIG_ITEM *, struct __wt_huffman_table **, u_int *, u_int *); -#define WT_HUFFMAN_CONFIG_VALID(str, len) \ - (WT_STRING_CASE_MATCH("english", (str), (len)) || \ - WT_PREFIX_MATCH((str), "utf8") || WT_PREFIX_MATCH((str), "utf16")) - /* - * __btree_huffman_config -- - * Verify the key or value strings passed in. + * __huffman_confchk_file -- + * Check for a Huffman configuration file and return the file name. */ static int -__btree_huffman_config(WT_SESSION_IMPL *session, - WT_CONFIG_ITEM *key_conf, WT_CONFIG_ITEM *value_conf) +__huffman_confchk_file( + WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, int *is_utf8p, FILE **fpp) { - if (key_conf->len != 0 && - !WT_HUFFMAN_CONFIG_VALID(key_conf->str, key_conf->len)) - WT_RET_MSG( - session, EINVAL, "illegal Huffman key configuration"); - if (value_conf->len != 0 && - !WT_HUFFMAN_CONFIG_VALID(value_conf->str, value_conf->len)) - WT_RET_MSG( - session, EINVAL, "illegal Huffman value configuration"); - return (0); + FILE *fp; + WT_DECL_RET; + size_t len; + char *fname; + + /* Look for a prefix and file name. */ + len = 0; + if (is_utf8p != NULL) + *is_utf8p = 0; + if (WT_PREFIX_MATCH(v->str, "utf8")) { + if (is_utf8p != NULL) + *is_utf8p = 1; + len = strlen("utf8"); + } else if (WT_PREFIX_MATCH(v->str, "utf16")) + len = strlen("utf16"); + if (len == 0 || len >= v->len) + WT_RET_MSG(session, EINVAL, + "illegal Huffman configuration: %.*s", (int)v->len, v->str); + + /* Check the file exists. */ + WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname)); + WT_ERR(__wt_fopen(session, + fname, WT_FHANDLE_READ, WT_FOPEN_FIXED, &fp)); + + /* Optionally return the file handle. */ + if (fpp == NULL) + (void)__wt_fclose(session, &fp, WT_FHANDLE_READ); + else + *fpp = fp; + +err: __wt_free(session, fname); + + return (ret); +} + +/* + * __wt_huffman_confchk -- + * Verify Huffman configuration. + */ +int +__wt_huffman_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v) +{ + if (v->len == 0) + return (0); + + /* Standard Huffman encodings, no work to be done. */ + if (WT_STRING_MATCH("english", v->str, v->len)) + return (0); + if (WT_STRING_MATCH("none", v->str, v->len)) + return (0); + return (__huffman_confchk_file(session, v, NULL, NULL)); } /* @@ -170,11 +208,12 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) cfg = btree->dhandle->cfg; WT_RET(__wt_config_gets_none(session, cfg, "huffman_key", &key_conf)); + WT_RET(__wt_huffman_confchk(session, &key_conf)); WT_RET( __wt_config_gets_none(session, cfg, "huffman_value", &value_conf)); + WT_RET(__wt_huffman_confchk(session, &value_conf)); if (key_conf.len == 0 && value_conf.len == 0) return (0); - WT_RET(__btree_huffman_config(session, &key_conf, &value_conf)); switch (btree->type) { /* Check file type compatibility. */ case BTREE_COL_FIX: @@ -193,7 +232,7 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) if (key_conf.len == 0) { ; - } else if (strncasecmp(key_conf.str, "english", key_conf.len) == 0) { + } else if (strncmp(key_conf.str, "english", key_conf.len) == 0) { struct __wt_huffman_table copy[WT_ELEMENTS(__wt_huffman_nytenglish)]; @@ -204,7 +243,7 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) 1, &btree->huffman_key)); /* Check for a shared key/value table. */ - if (value_conf.len != 0 && strncasecmp( + if (value_conf.len != 0 && strncmp( value_conf.str, "english", value_conf.len) == 0) { btree->huffman_value = btree->huffman_key; return (0); @@ -228,8 +267,7 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) if (value_conf.len == 0) { ; - } else if ( - strncasecmp(value_conf.str, "english", value_conf.len) == 0) { + } else if (strncmp(value_conf.str, "english", value_conf.len) == 0) { struct __wt_huffman_table copy[WT_ELEMENTS(__wt_huffman_nytenglish)]; @@ -262,54 +300,37 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, struct __wt_huffman_table *table, *tp; FILE *fp; WT_DECL_RET; - uint64_t symbol, frequency; + int64_t symbol, frequency; u_int entries, lineno; - char *file; + int is_utf8; *tablep = NULL; *entriesp = *numbytesp = 0; fp = NULL; - file = NULL; table = NULL; /* + * Try and open the backing file. + */ + WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fp)); + + /* * UTF-8 table is 256 bytes, with a range of 0-255. * UTF-16 is 128KB (2 * 65536) bytes, with a range of 0-65535. */ - if (strncasecmp(ip->str, "utf8", 4) == 0) { + if (is_utf8) { entries = UINT8_MAX; *numbytesp = 1; WT_ERR(__wt_calloc_def(session, entries, &table)); - - if (ip->len == 4) - WT_ERR_MSG(session, EINVAL, - "no Huffman table file name specified"); - WT_ERR(__wt_calloc_def(session, ip->len, &file)); - memcpy(file, ip->str + 4, ip->len - 4); - } else if (strncasecmp(ip->str, "utf16", 5) == 0) { + } else { entries = UINT16_MAX; *numbytesp = 2; WT_ERR(__wt_calloc_def(session, entries, &table)); - - if (ip->len == 5) - WT_ERR_MSG(session, EINVAL, - "no Huffman table file name specified"); - WT_ERR(__wt_calloc_def(session, ip->len, &file)); - memcpy(file, ip->str + 5, ip->len - 5); - } else { - WT_ERR_MSG(session, EINVAL, - "unknown Huffman configuration value %.*s", - (int)ip->len, ip->str); } - if ((fp = fopen(file, "r")) == NULL) - WT_ERR_MSG(session, __wt_errno(), - "unable to read Huffman table file %.*s", - (int)ip->len, ip->str); - for (tp = table, lineno = 1; (ret = - fscanf(fp, "%" SCNu64 " %" SCNu64, &symbol, &frequency)) != EOF; + fscanf(fp, "%" SCNi64 " %" SCNi64, &symbol, &frequency)) != EOF; ++tp, ++lineno) { if (lineno > entries) WT_ERR_MSG(session, EINVAL, @@ -321,16 +342,19 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, "line %u of Huffman table file %.*s is corrupted: " "expected two unsigned integral values", lineno, (int)ip->len, ip->str); - if (symbol > entries) + if (symbol < 0 || symbol > entries) WT_ERR_MSG(session, EINVAL, - "line %u of Huffman table file %.*s is corrupted: " - "symbol larger than maximum value of %u", - lineno, (int)ip->len, ip->str, entries); - if (frequency > UINT32_MAX) + "line %u of Huffman file %.*s is corrupted; " + "symbol %" PRId64 " not in range, maximum " + "value is %u", + lineno, (int)ip->len, ip->str, symbol, entries); + if (frequency < 0 || frequency > UINT32_MAX) WT_ERR_MSG(session, EINVAL, - "line %u of Huffman table file %.*s is corrupted: " - "frequency larger than maximum value of %" PRIu32, - lineno, (int)ip->len, ip->str, UINT32_MAX); + "line %u of Huffman file %.*s is corrupted; " + "frequency %" PRId64 " not in range, maximum " + "value is %" PRIu32, + lineno, (int)ip->len, ip->str, frequency, + (uint32_t)UINT32_MAX); tp->symbol = (uint32_t)symbol; tp->frequency = (uint32_t)frequency; @@ -344,9 +368,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, if (0) { err: __wt_free(session, table); } - if (fp != NULL) - (void)fclose(fp); - __wt_free(session, file); + (void)__wt_fclose(session, &fp, WT_FHANDLE_READ); return (ret); } |