summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--innobase/data/data0type.c11
-rw-r--r--innobase/include/data0type.h7
-rw-r--r--innobase/include/row0mysql.h2
-rw-r--r--innobase/include/row0mysql.ic31
-rw-r--r--innobase/rem/rem0cmp.c16
-rw-r--r--innobase/row/row0sel.c68
-rw-r--r--sql/ha_innodb.cc2
7 files changed, 110 insertions, 27 deletions
diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c
index 714cf92bc65..dab14df4240 100644
--- a/innobase/data/data0type.c
+++ b/innobase/data/data0type.c
@@ -166,6 +166,17 @@ dtype_is_non_binary_string_type(
}
/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+
+ulint
+dtype_get_charset_coll_noninline(
+/*=============================*/
+ ulint prtype) /* in: precise data type */
+{
+ return(dtype_get_charset_coll(prtype));
+}
+
+/*************************************************************************
Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code. */
diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h
index c263d2bf613..02c874836fd 100644
--- a/innobase/include/data0type.h
+++ b/innobase/include/data0type.h
@@ -234,6 +234,13 @@ dtype_get_prtype(
dtype_t* type);
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
+
+ulint
+dtype_get_charset_coll_noninline(
+/*=============================*/
+ ulint prtype);/* in: precise data type */
+/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
UNIV_INLINE
ulint
dtype_get_charset_coll(
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index f47ce74ce37..062dae4e60c 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -454,6 +454,8 @@ struct mysql_row_templ_struct {
zero if column cannot be NULL */
ulint type; /* column type in Innobase mtype
numbers DATA_CHAR... */
+ ulint charset; /* MySQL charset-collation code
+ of the column, or zero */
ulint is_unsigned; /* if a column type is an integer
type and this field is != 0, then
it is an unsigned integer type */
diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic
index 4ecd66e06ec..fc922b52d0a 100644
--- a/innobase/include/row0mysql.ic
+++ b/innobase/include/row0mysql.ic
@@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format(
}
} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
|| type == DATA_BINARY) {
+ /* Remove trailing spaces. */
+
+ /* Handle UCS2 strings differently. As no new
+ collations will be introduced in 4.1, we hardcode the
+ charset-collation codes here. In 5.0, the logic will
+ be based on mbminlen. */
+ ulint cset = dtype_get_charset_coll(
+ dtype_get_prtype(dfield_get_type(dfield)));
ptr = row_mysql_read_var_ref(&col_len, mysql_data);
-
- /* Remove trailing spaces */
- while (col_len > 0 && ptr[col_len - 1] == ' ') {
- col_len--;
- }
+ if (cset == 35/*ucs2_general_ci*/
+ || cset == 90/*ucs2_bin*/
+ || (cset >= 128/*ucs2_unicode_ci*/
+ && cset <= 144/*ucs2_persian_ci*/)) {
+ /* space=0x0020 */
+ /* Trim "half-chars", just in case. */
+ col_len &= ~1;
+
+ while (col_len >= 2 && ptr[col_len - 2] == 0x00
+ && ptr[col_len - 1] == 0x20) {
+ col_len -= 2;
+ }
+ } else {
+ /* space=0x20 */
+ while (col_len > 0 && ptr[col_len - 1] == 0x20) {
+ col_len--;
+ }
+ }
} else if (type == DATA_BLOB) {
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
}
diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c
index 041fb7914e2..cf549284acc 100644
--- a/innobase/rem/rem0cmp.c
+++ b/innobase/rem/rem0cmp.c
@@ -261,22 +261,6 @@ cmp_whole_field(
"InnoDB: comparison!\n");
}
- /* MySQL does not pad the ends of strings with spaces in a
- comparison. That would cause a foreign key check to fail for
- non-latin1 character sets if we have different length columns.
- To prevent that we remove trailing spaces here before doing
- the comparison. NOTE that if we in the future map more MySQL
- types to DATA_MYSQL or DATA_VARMYSQL, we have to change this
- code. */
-
- while (a_length > 0 && a[a_length - 1] == ' ') {
- a_length--;
- }
-
- while (b_length > 0 && b[b_length - 1] == ' ') {
- b_length--;
- }
-
return(innobase_mysql_cmp(
(int)(type->prtype & DATA_MYSQL_TYPE_MASK),
(uint)dtype_get_charset_coll(type->prtype),
diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c
index ce76f48e7a7..61ba0b53172 100644
--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format(
dest = row_mysql_store_var_len(dest, len);
ut_memcpy(dest, data, len);
- /* Pad with trailing spaces */
- memset(dest + len, ' ', col_len - len);
-
/* ut_ad(col_len >= len + 2); No real var implemented in
MySQL yet! */
@@ -2334,7 +2331,45 @@ row_sel_store_mysql_rec(
mysql_rec + templ->mysql_col_offset,
templ->mysql_col_len, data, len,
templ->type, templ->is_unsigned);
-
+
+ if (templ->type == DATA_VARCHAR
+ || templ->type == DATA_VARMYSQL
+ || templ->type == DATA_BINARY) {
+ /* Pad with trailing spaces */
+ data = mysql_rec + templ->mysql_col_offset;
+
+ /* Handle UCS2 strings differently. As no new
+ collations will be introduced in 4.1, we
+ hardcode the charset-collation codes here.
+ 5.0 will use a different approach. */
+ if (templ->charset == 35
+ || templ->charset == 90
+ || (templ->charset >= 128
+ && templ->charset <= 144)) {
+ /* space=0x0020 */
+ ulint col_len = templ->mysql_col_len;
+
+ ut_a(!(col_len & 1));
+ if (len & 1) {
+ /* A 0x20 has been stripped
+ from the column.
+ Pad it back. */
+ goto pad_0x20;
+ }
+ /* Pad the rest of the string
+ with 0x0020 */
+ while (len < col_len) {
+ data[len++] = 0x00;
+ pad_0x20:
+ data[len++] = 0x20;
+ }
+ } else {
+ /* space=0x20 */
+ memset(data + len, 0x20,
+ templ->mysql_col_len - len);
+ }
+ }
+
/* Cleanup */
if (extern_field_heap) {
mem_heap_free(extern_field_heap);
@@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec(
pad_char = '\0';
}
- memset(mysql_rec + templ->mysql_col_offset, pad_char,
- templ->mysql_col_len);
+ /* Handle UCS2 strings differently. As no new
+ collations will be introduced in 4.1,
+ we hardcode the charset-collation codes here.
+ 5.0 will use a different approach. */
+ if (templ->charset == 35
+ || templ->charset == 90
+ || (templ->charset >= 128
+ && templ->charset <= 144)) {
+ /* There are two bytes per char, so the length
+ has to be an even number. */
+ ut_a(!(templ->mysql_col_len & 1));
+ data = mysql_rec + templ->mysql_col_offset;
+ len = templ->mysql_col_len;
+ /* Pad with 0x0020. */
+ while (len >= 2) {
+ *data++ = 0x00;
+ *data++ = 0x20;
+ len -= 2;
+ }
+ } else {
+ memset(mysql_rec + templ->mysql_col_offset,
+ pad_char, templ->mysql_col_len);
+ }
}
}
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index cc69762cbdb..8110df1063f 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -2248,6 +2248,8 @@ build_template(
templ->mysql_col_len = (ulint) field->pack_length();
templ->type = get_innobase_type_from_mysql_type(field);
+ templ->charset = dtype_get_charset_coll_noninline(
+ index->table->cols[i].type.prtype);
templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
if (templ->type == DATA_BLOB) {