diff options
-rw-r--r-- | innobase/data/data0type.c | 11 | ||||
-rw-r--r-- | innobase/include/data0type.h | 7 | ||||
-rw-r--r-- | innobase/include/row0mysql.h | 2 | ||||
-rw-r--r-- | innobase/include/row0mysql.ic | 31 | ||||
-rw-r--r-- | innobase/rem/rem0cmp.c | 16 | ||||
-rw-r--r-- | innobase/row/row0sel.c | 68 | ||||
-rw-r--r-- | sql/ha_innodb.cc | 2 |
7 files changed, 110 insertions, 27 deletions
diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c index 714cf92bc65..dab14df4240 100644 --- a/innobase/data/data0type.c +++ b/innobase/data/data0type.c @@ -166,6 +166,17 @@ dtype_is_non_binary_string_type( } /************************************************************************* +Gets the MySQL charset-collation code for MySQL string types. */ + +ulint +dtype_get_charset_coll_noninline( +/*=============================*/ + ulint prtype) /* in: precise data type */ +{ + return(dtype_get_charset_coll(prtype)); +} + +/************************************************************************* Forms a precise type from the < 4.1.2 format precise type plus the charset-collation code. */ diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h index c263d2bf613..02c874836fd 100644 --- a/innobase/include/data0type.h +++ b/innobase/include/data0type.h @@ -234,6 +234,13 @@ dtype_get_prtype( dtype_t* type); /************************************************************************* Gets the MySQL charset-collation code for MySQL string types. */ + +ulint +dtype_get_charset_coll_noninline( +/*=============================*/ + ulint prtype);/* in: precise data type */ +/************************************************************************* +Gets the MySQL charset-collation code for MySQL string types. */ UNIV_INLINE ulint dtype_get_charset_coll( diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h index f47ce74ce37..062dae4e60c 100644 --- a/innobase/include/row0mysql.h +++ b/innobase/include/row0mysql.h @@ -454,6 +454,8 @@ struct mysql_row_templ_struct { zero if column cannot be NULL */ ulint type; /* column type in Innobase mtype numbers DATA_CHAR... */ + ulint charset; /* MySQL charset-collation code + of the column, or zero */ ulint is_unsigned; /* if a column type is an integer type and this field is != 0, then it is an unsigned integer type */ diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic index 4ecd66e06ec..fc922b52d0a 100644 --- a/innobase/include/row0mysql.ic +++ b/innobase/include/row0mysql.ic @@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format( } } else if (type == DATA_VARCHAR || type == DATA_VARMYSQL || type == DATA_BINARY) { + /* Remove trailing spaces. */ + + /* Handle UCS2 strings differently. As no new + collations will be introduced in 4.1, we hardcode the + charset-collation codes here. In 5.0, the logic will + be based on mbminlen. */ + ulint cset = dtype_get_charset_coll( + dtype_get_prtype(dfield_get_type(dfield))); ptr = row_mysql_read_var_ref(&col_len, mysql_data); - - /* Remove trailing spaces */ - while (col_len > 0 && ptr[col_len - 1] == ' ') { - col_len--; - } + if (cset == 35/*ucs2_general_ci*/ + || cset == 90/*ucs2_bin*/ + || (cset >= 128/*ucs2_unicode_ci*/ + && cset <= 144/*ucs2_persian_ci*/)) { + /* space=0x0020 */ + /* Trim "half-chars", just in case. */ + col_len &= ~1; + + while (col_len >= 2 && ptr[col_len - 2] == 0x00 + && ptr[col_len - 1] == 0x20) { + col_len -= 2; + } + } else { + /* space=0x20 */ + while (col_len > 0 && ptr[col_len - 1] == 0x20) { + col_len--; + } + } } else if (type == DATA_BLOB) { ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); } diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c index 041fb7914e2..cf549284acc 100644 --- a/innobase/rem/rem0cmp.c +++ b/innobase/rem/rem0cmp.c @@ -261,22 +261,6 @@ cmp_whole_field( "InnoDB: comparison!\n"); } - /* MySQL does not pad the ends of strings with spaces in a - comparison. That would cause a foreign key check to fail for - non-latin1 character sets if we have different length columns. - To prevent that we remove trailing spaces here before doing - the comparison. NOTE that if we in the future map more MySQL - types to DATA_MYSQL or DATA_VARMYSQL, we have to change this - code. */ - - while (a_length > 0 && a[a_length - 1] == ' ') { - a_length--; - } - - while (b_length > 0 && b[b_length - 1] == ' ') { - b_length--; - } - return(innobase_mysql_cmp( (int)(type->prtype & DATA_MYSQL_TYPE_MASK), (uint)dtype_get_charset_coll(type->prtype), diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index ce76f48e7a7..61ba0b53172 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format( dest = row_mysql_store_var_len(dest, len); ut_memcpy(dest, data, len); - /* Pad with trailing spaces */ - memset(dest + len, ' ', col_len - len); - /* ut_ad(col_len >= len + 2); No real var implemented in MySQL yet! */ @@ -2334,7 +2331,45 @@ row_sel_store_mysql_rec( mysql_rec + templ->mysql_col_offset, templ->mysql_col_len, data, len, templ->type, templ->is_unsigned); - + + if (templ->type == DATA_VARCHAR + || templ->type == DATA_VARMYSQL + || templ->type == DATA_BINARY) { + /* Pad with trailing spaces */ + data = mysql_rec + templ->mysql_col_offset; + + /* Handle UCS2 strings differently. As no new + collations will be introduced in 4.1, we + hardcode the charset-collation codes here. + 5.0 will use a different approach. */ + if (templ->charset == 35 + || templ->charset == 90 + || (templ->charset >= 128 + && templ->charset <= 144)) { + /* space=0x0020 */ + ulint col_len = templ->mysql_col_len; + + ut_a(!(col_len & 1)); + if (len & 1) { + /* A 0x20 has been stripped + from the column. + Pad it back. */ + goto pad_0x20; + } + /* Pad the rest of the string + with 0x0020 */ + while (len < col_len) { + data[len++] = 0x00; + pad_0x20: + data[len++] = 0x20; + } + } else { + /* space=0x20 */ + memset(data + len, 0x20, + templ->mysql_col_len - len); + } + } + /* Cleanup */ if (extern_field_heap) { mem_heap_free(extern_field_heap); @@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec( pad_char = '\0'; } - memset(mysql_rec + templ->mysql_col_offset, pad_char, - templ->mysql_col_len); + /* Handle UCS2 strings differently. As no new + collations will be introduced in 4.1, + we hardcode the charset-collation codes here. + 5.0 will use a different approach. */ + if (templ->charset == 35 + || templ->charset == 90 + || (templ->charset >= 128 + && templ->charset <= 144)) { + /* There are two bytes per char, so the length + has to be an even number. */ + ut_a(!(templ->mysql_col_len & 1)); + data = mysql_rec + templ->mysql_col_offset; + len = templ->mysql_col_len; + /* Pad with 0x0020. */ + while (len >= 2) { + *data++ = 0x00; + *data++ = 0x20; + len -= 2; + } + } else { + memset(mysql_rec + templ->mysql_col_offset, + pad_char, templ->mysql_col_len); + } } } diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index cc69762cbdb..8110df1063f 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -2248,6 +2248,8 @@ build_template( templ->mysql_col_len = (ulint) field->pack_length(); templ->type = get_innobase_type_from_mysql_type(field); + templ->charset = dtype_get_charset_coll_noninline( + index->table->cols[i].type.prtype); templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG); if (templ->type == DATA_BLOB) { |