diff options
author | Alexander Barkov <bar@mariadb.com> | 2020-05-21 18:16:27 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2020-05-22 07:47:49 +0400 |
commit | cb9c49a9b20dd8d1eee39641176134e207a4d84f (patch) | |
tree | 8cf85c16991abe5834eb61fe5f1a592276f231e9 /sql | |
parent | 836d70899780f28ea2a6870428ff6daac21ca993 (diff) | |
download | mariadb-git-cb9c49a9b20dd8d1eee39641176134e207a4d84f.tar.gz |
MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed
The code incorrectly assumed in multiple places that TYPELIB
values cannot have 0x00 bytes inside. In fact they can:
CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY);
Note, the TYPELIB value encoding used in FRM is ambiguous about 0x00.
So this fix is partial.
It fixes 0x00 bytes in many (but not all) places:
- In the middle or in the end of a value:
CREATE TABLE t1 (a ENUM(0x6100) ...);
CREATE TABLE t1 (a ENUM(0x610062) ...);
- In the beginning of the first value:
CREATE TABLE t1 (a ENUM(0x0061));
CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b'));
- In the beginning of the second (and following) value of the *last* ENUM/SET
in the table:
CREATE TABLE t1 (a ENUM('a',0x0061));
CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061));
However, it does not fix 0x00 when:
- 0x00 byte is in the beginning of a value of a non-last ENUM/SET
causes an error:
CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b'));
ERROR 1033 (HY000): Incorrect information in file: './test/t1.frm'
This is an ambuguous case and will be fixed separately.
We need a new TYPELIB encoding to fix this.
Details:
- unireg.cc
The function pack_header() incorrectly used strlen() to detect
a TYPELIB value length. Adding a new function typelib_values_packed_length()
which uses TYPELIB::type_lengths[n] to detect the n-th value length,
and reusing the new function in pack_header() and packed_fields_length()
- table.cc
fix_type_pointers() assumed in multiple places that values cannot have
0x00 inside and used strlen(TYPELIB::type_names[n]) to set
the corresponding TYPELIB::type_lengths[n].
Also, fix_type_pointers() did not check the encoded data for consistency.
Rewriting fix_type_pointers() code to populate TYPELIB::type_names[n] and
TYPELIB::type_lengths[n] at the same time, so no additional loop
with strlen() is needed any more.
Adding many data consistency tests.
Fixing the main loop in fix_type_pointers() to use memchr() instead of
strchr() to handle 0x00 properly.
Fixing create_key_infos() to return the result in a LEX_STRING rather
that in a char*.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/table.cc | 170 | ||||
-rw-r--r-- | sql/unireg.cc | 23 |
2 files changed, 133 insertions, 60 deletions
diff --git a/sql/table.cc b/sql/table.cc index f1a4b322a80..192faa0d00e 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -64,8 +64,11 @@ LEX_STRING parse_vcol_keyword= { C_STRING_WITH_LEN("PARSE_VCOL_EXPR ") }; /* Functions defined in this file */ -static void fix_type_pointers(const char ***array, TYPELIB *point_to_type, - uint types, char **names); +static bool fix_type_pointers(const char ***typelib_value_names, + uint **typelib_value_lengths, + TYPELIB *point_to_type, uint types, + char *names, size_t names_length); + static uint find_field(Field **fields, uchar *record, uint start, uint length); inline bool is_system_table_name(const char *name, uint length); @@ -670,7 +673,8 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end, uint keys, KEY *keyinfo, uint new_frm_ver, uint &ext_key_parts, TABLE_SHARE *share, uint len, - KEY *first_keyinfo, char* &keynames) + KEY *first_keyinfo, + LEX_STRING *keynames) { uint i, j, n_length; KEY_PART_INFO *key_part= NULL; @@ -813,10 +817,13 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end, } share->ext_key_parts+= keyinfo->ext_key_parts; } - keynames=(char*) key_part; - strpos+= strnmov(keynames, (char *) strpos, frm_image_end - strpos) - keynames; + keynames->str= (char*) key_part; + keynames->length= strnmov(keynames->str, (char *) strpos, + frm_image_end - strpos) - keynames->str; + strpos+= keynames->length; if (*strpos++) // key names are \0-terminated return 1; + keynames->length++; // Include '\0', to make fix_type_pointers() happy. //reading index comments for (keyinfo= share->key_info, i=0; i < keys; i++, keyinfo++) @@ -918,12 +925,14 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, TABLE_SHARE *share= this; uint new_frm_ver, field_pack_length, new_field_pack_flag; uint interval_count, interval_parts, read_length, int_length; + uint total_typelib_value_count; uint db_create_options, keys, key_parts, n_length; uint com_length, null_bit_pos; uint extra_rec_buf_length; uint i; bool use_hash; - char *keynames, *names, *comment_pos; + LEX_STRING keynames= {NULL, 0}; + char *names, *comment_pos; const uchar *forminfo, *extra2; const uchar *frm_image_end = frm_image + frm_length; uchar *record, *null_flags, *null_pos; @@ -935,6 +944,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, KEY_PART_INFO *key_part= NULL; Field **field_ptr, *reg_field; const char **interval_array; + uint *typelib_value_lengths= NULL; enum legacy_db_type legacy_db_type; my_bitmap_map *bitmaps; bool null_bits_are_used; @@ -1237,7 +1247,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo, new_frm_ver, ext_key_parts, - share, len, &first_keyinfo, keynames)) + share, len, &first_keyinfo, &keynames)) goto err; if (next_chunk + 5 < buff_end) @@ -1330,7 +1340,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, { if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo, new_frm_ver, ext_key_parts, - share, len, &first_keyinfo, keynames)) + share, len, &first_keyinfo, &keynames)) goto err; } @@ -1372,6 +1382,24 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d com_length: %d vcol_screen_length: %d", interval_count,interval_parts, keys,n_length,int_length, com_length, vcol_screen_length)); + /* + We load the following things into TYPELIBs: + - One TYPELIB for field names + - interval_count TYPELIBs for ENUM/SET values + - One TYPELIB for key names + Every TYPELIB requires one extra value with a NULL pointer and zero length, + which is the end-of-values marker. + TODO-10.5+: + Note, we should eventually reuse this total_typelib_value_count + to allocate interval_array. The above code reserves less space + than total_typelib_value_count pointers. So it seems `interval_array` + and `names` overlap in the memory. Too dangerous to fix in 10.1. + */ + total_typelib_value_count= + (share->fields + 1/*end-of-values marker*/) + + (interval_parts + interval_count/*end-of-values markers*/) + + (keys + 1/*end-of-values marker*/); + if (!(field_ptr = (Field **) alloc_root(&share->mem_root, @@ -1383,6 +1411,12 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, vcol_screen_length))))) goto err; /* purecov: inspected */ + + if (!(typelib_value_lengths= (uint *) alloc_root(&share->mem_root, + total_typelib_value_count * + sizeof(uint *)))) + goto err; + share->field= field_ptr; read_length=(uint) (share->fields * field_pack_length + pos+ (uint) (n_length+int_length+com_length+ @@ -1391,6 +1425,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, share->intervals= (TYPELIB*) (field_ptr+share->fields+1); interval_array= (const char **) (share->intervals+interval_count); + // This looks wrong: shouldn't it be (+2+interval_count) instread of (+3) ? names= (char*) (interval_array+share->fields+interval_parts+keys+3); if (!interval_count) share->intervals= 0; // For better debugging @@ -1403,34 +1438,21 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, memcpy(vcol_screen_pos, disk_buff+read_length-vcol_screen_length, vcol_screen_length); - fix_type_pointers(&interval_array, &share->fieldnames, 1, &names); - if (share->fieldnames.count != share->fields) + if (fix_type_pointers(&interval_array, &typelib_value_lengths, + &share->fieldnames, 1, names, n_length) || + share->fieldnames.count != share->fields) goto err; - fix_type_pointers(&interval_array, share->intervals, interval_count, - &names); - { - /* Set ENUM and SET lengths */ - TYPELIB *interval; - for (interval= share->intervals; - interval < share->intervals + interval_count; - interval++) - { - uint count= (uint) (interval->count + 1) * sizeof(uint); - if (!(interval->type_lengths= (uint *) alloc_root(&share->mem_root, - count))) - goto err; - for (count= 0; count < interval->count; count++) - { - char *val= (char*) interval->type_names[count]; - interval->type_lengths[count]= strlen(val); - } - interval->type_lengths[count]= 0; - } - } + if (fix_type_pointers(&interval_array, &typelib_value_lengths, + share->intervals, interval_count, + names + n_length, int_length)) + goto err; - if (keynames) - fix_type_pointers(&interval_array, &share->keynames, 1, &keynames); + if (keynames.length && + (fix_type_pointers(&interval_array, &typelib_value_lengths, + &share->keynames, 1, keynames.str, keynames.length) || + share->keynames.count != keys)) + goto err; /* Allocate handler */ if (!(handler_file= get_new_handler(share, thd->mem_root, @@ -3216,37 +3238,81 @@ void open_table_error(TABLE_SHARE *share, enum open_frm_error error, ** with a '\0' */ -static void -fix_type_pointers(const char ***array, TYPELIB *point_to_type, uint types, - char **names) +static bool +fix_type_pointers(const char ***typelib_value_names, + uint **typelib_value_lengths, + TYPELIB *point_to_type, uint types, + char *ptr, size_t length) { - char *type_name, *ptr; - char chr; + const char *end= ptr + length; - ptr= *names; while (types--) { + char sep; point_to_type->name=0; - point_to_type->type_names= *array; + point_to_type->type_names= *typelib_value_names; + point_to_type->type_lengths= *typelib_value_lengths; - if ((chr= *ptr)) /* Test if empty type */ + /* + Typelib can be encoded as: + 1) 0x00 - empty typelib + 2) 0xFF 0x00 - empty typelib (index names) + 3) sep (value sep)... 0x00 - non-empty typelib (where sep is a separator) + */ + if (length == 2 && ptr[0] == (char) 0xFF && ptr[1] == '\0') + { + /* + This is a special case #2. + If there are no indexes at all, index names can be encoded + as a two byte sequence: 0xFF 0x00 + TODO: Check if it's a bug in the FRM packing routine. + It should probably write just 0x00 instead of 0xFF00. + */ + ptr+= 2; + } + else if ((sep= *ptr++)) // A non-empty typelib { - while ((type_name=strchr(ptr+1,chr)) != NullS) + for ( ; ptr < end; ) { - *((*array)++) = ptr+1; - *type_name= '\0'; /* End string */ - ptr=type_name; + // Now scan the next value+sep pair + char *vend= (char*) memchr(ptr, sep, end - ptr); + if (!vend) + return true; // Bad format + *((*typelib_value_names)++)= ptr; + *((*typelib_value_lengths)++)= vend - ptr; + *vend= '\0'; // Change sep to '\0' + ptr= vend + 1; // Shift from sep to the next byte + /* + Now we can have either: + - the end-of-typelib marker (0x00) + - more value+sep pairs + */ + if (!*ptr) + { + /* + We have an ambiguity here. 0x00 can be an end-of-typelib marker, + but it can also be a part of the next value: + CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY); + If this is the last ENUM/SET in the table and there is still more + packed data left after 0x00, then we know for sure that 0x00 + is a part of the next value. + TODO-10.5+: we should eventually introduce a new unambiguous + typelib encoding for FRM. + */ + if (!types && ptr + 1 < end) + continue; // A binary value starting with 0x00 + ptr++; // Consume the end-of-typelib marker + break; // End of the current typelib + } } - ptr+=2; /* Skip end mark and last 0 */ } - else - ptr++; - point_to_type->count= (uint) (*array - point_to_type->type_names); + point_to_type->count= (uint) (*typelib_value_names - + point_to_type->type_names); point_to_type++; - *((*array)++)= NullS; /* End of type */ + *((*typelib_value_names)++)= NullS; /* End of type */ + *((*typelib_value_lengths)++)= 0; /* End of type */ } - *names=ptr; /* Update end */ - return; + return ptr != end; } /* fix_type_pointers */ diff --git a/sql/unireg.cc b/sql/unireg.cc index b116218b60e..2ccc440f71b 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -498,6 +498,18 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo, } /* pack_keys */ +static uint typelib_values_packed_length(const TYPELIB *t) +{ + uint length= 0; + for (uint i= 0; t->type_names[i]; i++) + { + length+= t->type_lengths[i]; + length++; /* Separator */ + } + return length; +} + + /* Make formheader */ static bool pack_header(THD *thd, uchar *forminfo, @@ -619,9 +631,8 @@ static bool pack_header(THD *thd, uchar *forminfo, field->interval_id=get_interval_id(&int_count,create_fields,field); if (old_int_count != int_count) { - for (const char **pos=field->interval->type_names ; *pos ; pos++) - int_length+=(uint) strlen(*pos)+1; // field + suffix prefix - int_parts+=field->interval->count+1; + int_length+= typelib_values_packed_length(field->interval); + int_parts+= field->interval->count + 1; } } if (f_maybe_null(field->pack_flag)) @@ -710,11 +721,7 @@ static size_t packed_fields_length(List<Create_field> &create_fields) { int_count= field->interval_id; length++; - for (int i=0; field->interval->type_names[i]; i++) - { - length+= field->interval->type_lengths[i]; - length++; - } + length+= typelib_values_packed_length(field->interval); length++; } if (field->vcol_info) |