diff options
author | Alexander Barkov <bar@mariadb.com> | 2021-09-29 15:13:57 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2022-01-21 12:16:07 +0400 |
commit | b915f79e4e004fde4f6ac8f341afee980e11792b (patch) | |
tree | 2568032d75c7af9a72c6669b306fda4418b5ed20 | |
parent | db574173d19731f1e5dc75d325f72398afac8d59 (diff) | |
download | mariadb-git-bb-10.4-bar-MDEV-25904.tar.gz |
MDEV-25904 New collation functions to compare InnoDB style trimmed NO PAD stringsbb-10.4-bar-MDEV-25904
-rw-r--r-- | include/m_ctype.h | 54 | ||||
-rw-r--r-- | sql/field.cc | 47 | ||||
-rw-r--r-- | strings/ctype-big5.c | 4 | ||||
-rw-r--r-- | strings/ctype-bin.c | 25 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 4 | ||||
-rw-r--r-- | strings/ctype-czech.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 4 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 4 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 4 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 4 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 1 | ||||
-rw-r--r-- | strings/ctype-simple.c | 14 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 4 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 2 | ||||
-rw-r--r-- | strings/ctype-uca-scanner_next.inl | 179 | ||||
-rw-r--r-- | strings/ctype-uca.c | 38 | ||||
-rw-r--r-- | strings/ctype-uca.ic | 276 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 16 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 4 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 11 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 1 | ||||
-rw-r--r-- | strings/ctype.c | 29 | ||||
-rw-r--r-- | strings/strcoll.ic | 50 | ||||
-rw-r--r-- | strings/strings_def.h | 10 | ||||
-rw-r--r-- | unittest/strings/strings-t.c | 508 |
25 files changed, 1150 insertions, 144 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 0f6e6a11666..187c8710929 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -330,6 +330,60 @@ struct my_collation_handler_st const uchar *, size_t, const uchar *, size_t, my_bool); int (*strnncollsp)(CHARSET_INFO *, const uchar *, size_t, const uchar *, size_t); + /* + strnncollsp_nchars() - similar to strnncollsp() but assumes that both + strings were originally CHAR(N) values with the + same N, then were optionally space-padded, + or optionally space-trimmed. + + In other words, this function compares in the way + if we insert both values into a CHAR(N) column + and then compare the two column values. + + It compares the same amount of characters from the two strings. + This is especially important for NOPAD collations. + + If CHAR_LENGTH of the two strings are different, + the shorter string is virtually padded with trailing spaces + up to CHAR_LENGTH of the longer string, to guarantee that the + same amount of characters are compared. + This is important if the two CHAR(N) strings are space-trimmed + (e.g. like in InnoDB compact format for CHAR). + + The function compares not more than "nchars" characters only. + This can be useful to compare CHAR(N) space-padded strings + (when the exact N is known) without having to truncate them before + the comparison. + + For example, Field_string stores a "CHAR(3) CHARACTER SET utf8mb4" value + of "aaa" as 12 bytes in a record buffer: + - 3 bytes of the actual data, followed by + - 9 bytes of spaces (just fillers, not real data) + The caller can pass nchars=3 to compare CHAR(3) record values. + In such case, the comparator won't go inside the 9 bytes of the fillers. + + If N is not known, the caller can pass max(len1,len2) as the "nchars" value + (i.e. the maximum of the OCTET_LENGTH of the two strings). + + Notes on complex collations. + + This function counts contraction parts as individual characters. + For example, the Czech letter 'ch' (in Czech collations) + is ordinarily counted by the "nchars" limit as TWO characters + (although it is only one letter). + This corresponds to what CHAR(N) does in INSERT. + + If the "nchars" limit tears apart a contraction, only the part fitting + into "nchars" characters is used. For example, in case of a Czech collation, + the string "ach" with nchars=2 is compared as 'ac': the contraction + 'ch' is torn apart and the letter 'c' acts as an individual character. + This emulates the same comparison result with the scenario when we insert + 'ach' into a CHAR(2) column and then compare it. + */ + int (*strnncollsp_nchars)(CHARSET_INFO *, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars); size_t (*strnxfrm)(CHARSET_INFO *, uchar *dst, size_t dstlen, uint nweights, const uchar *src, size_t srclen, uint flags); diff --git a/sql/field.cc b/sql/field.cc index 2226137b043..e3aa7d149a0 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -7433,23 +7433,10 @@ Field_string::compatible_field_size(uint field_metadata, int Field_string::cmp(const uchar *a_ptr, const uchar *b_ptr) { - size_t a_len, b_len; - - if (field_charset->mbmaxlen != 1) - { - size_t char_len= field_length/field_charset->mbmaxlen; - a_len= my_charpos(field_charset, a_ptr, a_ptr + field_length, char_len); - b_len= my_charpos(field_charset, b_ptr, b_ptr + field_length, char_len); - } - else - a_len= b_len= field_length; - /* - We have to remove end space to be able to compare multi-byte-characters - like in latin_de 'ae' and 0xe4 - */ - return field_charset->coll->strnncollsp(field_charset, - a_ptr, a_len, - b_ptr, b_len); + return field_charset->coll->strnncollsp_nchars(field_charset, + a_ptr, field_length, + b_ptr, field_length, + Field_string::char_length()); } @@ -7848,19 +7835,6 @@ int Field_varstring::cmp(const uchar *a_ptr, const uchar *b_ptr) } -static int cmp_str_prefix(const uchar *ua, size_t alen, const uchar *ub, - size_t blen, size_t prefix, CHARSET_INFO *cs) -{ - const char *a= (char*)ua, *b= (char*)ub; - MY_STRCOPY_STATUS status; - prefix/= cs->mbmaxlen; - alen= cs->cset->well_formed_char_length(cs, a, a + alen, prefix, &status); - blen= cs->cset->well_formed_char_length(cs, b, b + blen, prefix, &status); - return cs->coll->strnncollsp(cs, ua, alen, ub, blen); -} - - - int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, size_t prefix_len) { @@ -7880,8 +7854,12 @@ int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, a_length= uint2korr(a_ptr); b_length= uint2korr(b_ptr); } - return cmp_str_prefix(a_ptr+length_bytes, a_length, b_ptr+length_bytes, - b_length, prefix_len, field_charset); + return field_charset->coll->strnncollsp_nchars(field_charset, + a_ptr + length_bytes, + a_length, + b_ptr + length_bytes, + b_length, + prefix_len / field_charset->mbmaxlen); } @@ -8659,7 +8637,10 @@ int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, memcpy(&blob1, a_ptr+packlength, sizeof(char*)); memcpy(&blob2, b_ptr+packlength, sizeof(char*)); size_t a_len= get_length(a_ptr), b_len= get_length(b_ptr); - return cmp_str_prefix(blob1, a_len, blob2, b_len, prefix_len, field_charset); + return field_charset->coll->strnncollsp_nchars(field_charset, + blob1, a_len, + blob2, b_len, + prefix_len / field_charset->mbmaxlen); } diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 3991a219ab5..fdaa34eeaf0 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6711,6 +6711,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci= NULL, /* init */ my_strnncoll_big5_chinese_ci, my_strnncollsp_big5_chinese_ci, + my_strnncollsp_nchars_big5_chinese_ci, my_strnxfrm_big5_chinese_ci, my_strnxfrmlen_simple, my_like_range_mb, @@ -6727,6 +6728,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin= NULL, /* init */ my_strnncoll_big5_bin, my_strnncollsp_big5_bin, + my_strnncollsp_nchars_big5_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -6743,6 +6745,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_nopad_ci= NULL, /* init */ my_strnncoll_big5_chinese_ci, my_strnncollsp_big5_chinese_nopad_ci, + my_strnncollsp_nchars_big5_chinese_nopad_ci, my_strnxfrm_big5_chinese_nopad_ci, my_strnxfrmlen_simple, my_like_range_mb, @@ -6759,6 +6762,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_nopad_bin= NULL, /* init */ my_strnncoll_big5_bin, my_strnncollsp_big5_nopad_bin, + my_strnncollsp_nchars_big5_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index bc0d794db3d..2893aadd99f 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -125,6 +125,17 @@ static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), } +static int my_strnncollsp_nchars_binary(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + size_t nchars) +{ + set_if_smaller(slen, nchars); + set_if_smaller(tlen, nchars); + return my_strnncoll_binary(cs, s, slen, t, tlen, 0); +} + + static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), const uchar *s, size_t slen, const uchar *t, size_t tlen, @@ -199,6 +210,17 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), } +static int my_strnncollsp_nchars_8bit_bin(CHARSET_INFO * cs, + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + size_t nchars) +{ + set_if_smaller(a_length, nchars); + set_if_smaller(b_length, nchars); + return my_strnncollsp_8bit_bin(cs, a, a_length, b, b_length); +} + + static int my_strnncollsp_8bit_nopad_bin(CHARSET_INFO * cs __attribute__((unused)), const uchar *a, size_t a_length, @@ -487,6 +509,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler = my_coll_init_8bit_bin, my_strnncoll_8bit_bin, my_strnncollsp_8bit_bin, + my_strnncollsp_nchars_8bit_bin, my_strnxfrm_8bit_bin, my_strnxfrmlen_simple, my_like_range_simple, @@ -503,6 +526,7 @@ MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler = my_coll_init_8bit_bin, my_strnncoll_8bit_bin, my_strnncollsp_8bit_nopad_bin, + my_strnncollsp_nchars_8bit_bin, my_strnxfrm_8bit_nopad_bin, my_strnxfrmlen_simple, my_like_range_simple, @@ -519,6 +543,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler = NULL, /* init */ my_strnncoll_binary, my_strnncollsp_binary, + my_strnncollsp_nchars_binary, my_strnxfrm_8bit_bin, my_strnxfrmlen_simple, my_like_range_simple, diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index bf97d1feb83..94450af4b91 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -34667,6 +34667,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci= NULL, /* init */ my_strnncoll_cp932_japanese_ci, my_strnncollsp_cp932_japanese_ci, + my_strnncollsp_nchars_cp932_japanese_ci, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -34683,6 +34684,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin= NULL, /* init */ my_strnncoll_cp932_bin, my_strnncollsp_cp932_bin, + my_strnncollsp_nchars_cp932_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -34699,6 +34701,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_nopad_ci= NULL, /* init */ my_strnncoll_cp932_japanese_ci, my_strnncollsp_cp932_japanese_nopad_ci, + my_strnncollsp_nchars_cp932_japanese_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -34715,6 +34718,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_nopad_bin= NULL, /* init */ my_strnncoll_cp932_bin, my_strnncollsp_cp932_nopad_bin, + my_strnncollsp_nchars_cp932_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 17c4c98c24e..33d43d4dd4e 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -610,6 +610,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = NULL, /* init */ my_strnncoll_czech, my_strnncollsp_czech, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_czech, my_strnxfrmlen_czech, my_like_range_czech, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index deb13957900..22f8c4ec7c0 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -9957,6 +9957,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci= NULL, /* init */ my_strnncoll_euckr_korean_ci, my_strnncollsp_euckr_korean_ci, + my_strnncollsp_nchars_euckr_korean_ci, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -9973,6 +9974,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin= NULL, /* init */ my_strnncoll_euckr_bin, my_strnncollsp_euckr_bin, + my_strnncollsp_nchars_euckr_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -9989,6 +9991,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_nopad_ci= NULL, /* init */ my_strnncoll_euckr_korean_ci, my_strnncollsp_euckr_korean_nopad_ci, + my_strnncollsp_nchars_euckr_korean_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -10005,6 +10008,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_nopad_bin= NULL, /* init */ my_strnncoll_euckr_bin, my_strnncollsp_euckr_nopad_bin, + my_strnncollsp_nchars_euckr_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index 118e8286703..58ea37d36e6 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -67495,6 +67495,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_ci_handler = NULL, /* init */ my_strnncoll_eucjpms_japanese_ci, my_strnncollsp_eucjpms_japanese_ci, + my_strnncollsp_nchars_eucjpms_japanese_ci, my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -67511,6 +67512,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler = NULL, /* init */ my_strnncoll_eucjpms_bin, my_strnncollsp_eucjpms_bin, + my_strnncollsp_nchars_eucjpms_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -67527,6 +67529,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_nopad_ci_handler = NULL, /* init */ my_strnncoll_eucjpms_japanese_ci, my_strnncollsp_eucjpms_japanese_nopad_ci, + my_strnncollsp_nchars_eucjpms_japanese_nopad_ci, my_strnxfrm_mb_nopad, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -67543,6 +67546,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_nopad_bin_handler = NULL, /* init */ my_strnncoll_eucjpms_bin, my_strnncollsp_eucjpms_nopad_bin, + my_strnncollsp_nchars_eucjpms_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 166619bf5cc..84246ad6671 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -6362,6 +6362,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci= NULL, /* init */ my_strnncoll_gb2312_chinese_ci, my_strnncollsp_gb2312_chinese_ci, + my_strnncollsp_nchars_gb2312_chinese_ci, my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -6378,6 +6379,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin= NULL, /* init */ my_strnncoll_gb2312_bin, my_strnncollsp_gb2312_bin, + my_strnncollsp_nchars_gb2312_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -6394,6 +6396,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_nopad_ci= NULL, /* init */ my_strnncoll_gb2312_chinese_ci, my_strnncollsp_gb2312_chinese_nopad_ci, + my_strnncollsp_nchars_gb2312_chinese_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -6410,6 +6413,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_nopad_bin= NULL, /* init */ my_strnncoll_gb2312_bin, my_strnncollsp_gb2312_nopad_bin, + my_strnncollsp_nchars_gb2312_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index efaa2e5c728..d7ea47c409f 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -10645,6 +10645,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci= NULL, /* init */ my_strnncoll_gbk_chinese_ci, my_strnncollsp_gbk_chinese_ci, + my_strnncollsp_nchars_gbk_chinese_ci, my_strnxfrm_gbk_chinese_ci, my_strnxfrmlen_simple, my_like_range_mb, @@ -10661,6 +10662,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin= NULL, /* init */ my_strnncoll_gbk_bin, my_strnncollsp_gbk_bin, + my_strnncollsp_nchars_gbk_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -10677,6 +10679,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_nopad_ci= NULL, /* init */ my_strnncoll_gbk_chinese_ci, my_strnncollsp_gbk_chinese_nopad_ci, + my_strnncollsp_nchars_gbk_chinese_nopad_ci, my_strnxfrm_gbk_chinese_nopad_ci, my_strnxfrmlen_simple, my_like_range_mb, @@ -10693,6 +10696,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_nopad_bin= NULL, /* init */ my_strnncoll_gbk_bin, my_strnncollsp_gbk_nopad_bin, + my_strnncollsp_nchars_gbk_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index f9fa1488aa6..bcf1cc6c9f1 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -726,6 +726,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler= NULL, /* init */ my_strnncoll_latin1_de, my_strnncollsp_latin1_de, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_latin1_de, my_strnxfrmlen_simple, my_like_range_simple, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 9c6cb34137d..d150e457673 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -208,6 +208,18 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length, } +static int +my_strnncollsp_nchars_simple(CHARSET_INFO * cs, + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + size_t nchars) +{ + set_if_smaller(a_length, nchars); + set_if_smaller(b_length, nchars); + return my_strnncollsp_simple(cs, a, a_length, b, b_length); +} + + int my_strnncollsp_simple_nopad(CHARSET_INFO * cs, const uchar *a, size_t a_length, const uchar *b, size_t b_length) @@ -2096,6 +2108,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = my_coll_init_simple, /* init */ my_strnncoll_simple, my_strnncollsp_simple, + my_strnncollsp_nchars_simple, my_strnxfrm_simple, my_strnxfrmlen_simple, my_like_range_simple, @@ -2112,6 +2125,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler = my_coll_init_simple, /* init */ my_strnncoll_simple, my_strnncollsp_simple_nopad, + my_strnncollsp_nchars_simple, my_strnxfrm_simple_nopad, my_strnxfrmlen_simple, my_like_range_simple, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 902034b435d..bd2bf432a34 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -34046,6 +34046,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci= NULL, /* init */ my_strnncoll_sjis_japanese_ci, my_strnncollsp_sjis_japanese_ci, + my_strnncollsp_nchars_sjis_japanese_ci, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -34062,6 +34063,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin= NULL, /* init */ my_strnncoll_sjis_bin, my_strnncollsp_sjis_bin, + my_strnncollsp_nchars_sjis_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -34078,6 +34080,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_nopad_ci= NULL, /* init */ my_strnncoll_sjis_japanese_ci, my_strnncollsp_sjis_japanese_nopad_ci, + my_strnncollsp_nchars_sjis_japanese_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -34094,6 +34097,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_nopad_bin= NULL, /* init */ my_strnncoll_sjis_bin, my_strnncollsp_sjis_nopad_bin, + my_strnncollsp_nchars_sjis_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 9760ea25162..d5367393c86 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -852,6 +852,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_tis620, my_strnncollsp_tis620, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_tis620, my_strnxfrmlen_simple, my_like_range_simple, @@ -867,6 +868,7 @@ static MY_COLLATION_HANDLER my_collation_nopad_ci_handler = NULL, /* init */ my_strnncoll_tis620, my_strnncollsp_tis620_nopad, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_tis620_nopad, my_strnxfrmlen_simple, my_like_range_simple, diff --git a/strings/ctype-uca-scanner_next.inl b/strings/ctype-uca-scanner_next.inl new file mode 100644 index 00000000000..79d25487b42 --- /dev/null +++ b/strings/ctype-uca-scanner_next.inl @@ -0,0 +1,179 @@ +/* Copyright (c) 2004, 2013, Oracle and/or its affiliates. + Copyright (c) 2009, 2021, MariaDB + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; version 2 + of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + MA 02110-1335 USA */ + + +#ifdef SCANNER_NEXT_NCHARS + +#define SCANNER_NEXT_RETURN(_w,_n) \ + do { weight_and_nchars_t rc= {_w, _n}; return rc; } while(0) + +#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \ + do { \ + weight_and_nchars_t rc= { _cnt->weight[0], \ + _ignorable_nchars + \ + my_contraction_char_length(_cnt) }; \ + return rc; \ + } while(0) + +#else + +#define SCANNER_NEXT_RETURN(_w,_n) do { return _w; } while (0) + +#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \ + do { return _cnt->weight[0]; } while(0) + +#endif + +static inline +#ifdef SCANNER_NEXT_NCHARS +weight_and_nchars_t +MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner, + size_t nchars) +#else +int +MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner) +#endif +{ +#ifdef SCANNER_NEXT_NCHARS + uint ignorable_nchars; +#define LOCAL_MAX_CONTRACTION_LENGTH nchars +#else +#define LOCAL_MAX_CONTRACTION_LENGTH MY_UCA_MAX_CONTRACTION +#endif + /* + Check if the weights for the previous character have been + already fully scanned. If yes, then get the next character and + initialize wbeg and wlength to its weight string. + */ + + if (scanner->wbeg[0]) + { + /* + More weights left from the previous step. + Return the next weight from the current expansion. + Return "0" as "nchars". The real nchars was set on a previous + iteration. + */ + SCANNER_NEXT_RETURN(*scanner->wbeg++, 0); + } + +#ifdef SCANNER_NEXT_NCHARS + for (ignorable_nchars= 0 ; ; ignorable_nchars++) +#else + for ( ; ; ) +#endif + { + const uint16 *wpage; + my_wc_t wc[MY_UCA_MAX_CONTRACTION]; + int mblen; + + /* Get next character */ +#if MY_UCA_ASCII_OPTIMIZE + /* Get next ASCII character */ + if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80) + { + wc[0]= scanner->sbeg[0]; + scanner->sbeg+= 1; + +#if MY_UCA_COMPILE_CONTRACTIONS + if (my_uca_needs_context_handling(scanner->level, wc[0])) + { + const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc, + LOCAL_MAX_CONTRACTION_LENGTH); + if (cnt) + SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars); + } +#endif + + scanner->page= 0; + scanner->code= (int) wc[0]; + scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0]; + if (scanner->wbeg[0]) + SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1); + continue; + } + else +#endif + /* Get next MB character */ + if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg, + scanner->send)) <= 0)) + { + if (scanner->sbeg >= scanner->send) + { + /* No more bytes, end of line reached */ + SCANNER_NEXT_RETURN(-1, ignorable_nchars); + } + /* + There are some more bytes left. Non-positive mb_len means that + we got an incomplete or a bad byte sequence. Consume mbminlen bytes. + */ + if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send) + { + /* For safety purposes don't go beyond the string range. */ + scanner->sbeg= scanner->send; + } + /* + Treat every complete or incomplete mbminlen unit as a weight which is + greater than weight for any possible normal character. + 0xFFFF is greater than any possible weight in the UCA weight table. + */ + SCANNER_NEXT_RETURN(0xFFFF, ignorable_nchars + 1); + } + + scanner->sbeg+= mblen; + if (wc[0] > scanner->level->maxchar) + { + /* Return 0xFFFD as weight for all characters outside BMP */ + scanner->wbeg= nochar; + SCANNER_NEXT_RETURN(0xFFFD, ignorable_nchars + 1); + } + +#if MY_UCA_COMPILE_CONTRACTIONS + if (my_uca_needs_context_handling(scanner->level, wc[0])) + { + const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc, + LOCAL_MAX_CONTRACTION_LENGTH); + if (cnt) + SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars); + } +#endif + + /* Process single character */ + scanner->page= wc[0] >> 8; + scanner->code= wc[0] & 0xFF; + + /* If weight page for w[0] does not exist, then calculate algoritmically */ + if (!(wpage= scanner->level->weights[scanner->page])) + SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner), + ignorable_nchars + 1); + + /* Calculate pointer to w[0]'s weight, using page and offset */ + scanner->wbeg= wpage + + scanner->code * scanner->level->lengths[scanner->page]; + if (scanner->wbeg[0]) + break; + /* Skip ignorable character and continue the loop */ + } + + SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1); +} + +#undef SCANNER_NEXT_NCHARS +#undef SCANNER_NEXT_RETURN +#undef SCANNER_NEXT_RETURN_CONTRACTION +#undef LOCAL_MAX_CONTRACTION_LENGTH diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 161830088a5..551efd8b0be 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -35,6 +35,12 @@ #include "strings_def.h" #include <m_ctype.h> +typedef struct +{ + int weight; + uint nchars; +} weight_and_nchars_t; + #define MY_CS_COMMON_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NON1TO1) #define MY_UCA_CNT_FLAG_SIZE 4096 @@ -31450,6 +31456,21 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len) } +/* + Return the number of characters in a contraction. +*/ +static inline uint my_contraction_char_length(const MY_CONTRACTION *cnt) +{ + uint i; + for (i= 2; i < array_elements(cnt->ch); i++) + { + if (cnt->ch[i] == 0) + return i; + } + return array_elements(cnt->ch); +} + + /** Check if a string is a contraction, and return its weight array on success. @@ -31487,8 +31508,9 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len) a contraction part. Then try to find real contraction among the candidates, starting from the longest. - @param scanner Pointer to UCA scanner - @param[OUT] *wc Where to store the scanned string + @param scanner Pointer to UCA scanner + @param[OUT] *wc Where to store the scanned string + @param max_char_length The longest contraction character length allowed @return Weight array @retval NULL - no contraction found @@ -31496,7 +31518,8 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len) */ static const MY_CONTRACTION * -my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc) +my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc, + size_t max_char_length) { size_t clen= 1; int flag; @@ -31505,7 +31528,7 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc) /* Scan all contraction candidates */ for (s= scanner->sbeg, flag= MY_UCA_CNT_MID1; - clen < MY_UCA_MAX_CONTRACTION; + clen < max_char_length; flag<<= 1) { int mblen; @@ -31582,11 +31605,14 @@ my_uca_previous_context_find(my_uca_scanner *scanner, If wc[0] and the previous character make a previous context pair, then wc[1] is set to the previous character. + @param max_char_length - the longest contraction character length allowed. + @retval NULL if could not find any contextual weights for wc[0] @retval non null pointer - the address of MY_CONTRACTION found */ static inline const MY_CONTRACTION * -my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc) +my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc, + size_t max_char_length) { const MY_CONTRACTION *cnt; DBUG_ASSERT(scanner->level->contractions.nitems); @@ -31614,7 +31640,7 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc) wc[0])) { /* Check if w[0] starts a contraction */ - if ((cnt= my_uca_scanner_contraction_find(scanner, wc))) + if ((cnt= my_uca_scanner_contraction_find(scanner, wc, max_char_length))) return cnt; } return NULL; diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic index bb0eee85886..7c9d34d217e 100644 --- a/strings/ctype-uca.ic +++ b/strings/ctype-uca.ic @@ -35,108 +35,9 @@ #error MY_UCA_COLL_INIT is not defined #endif - -static inline int -MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner) -{ - /* - Check if the weights for the previous character have been - already fully scanned. If yes, then get the next character and - initialize wbeg and wlength to its weight string. - */ - - if (scanner->wbeg[0]) /* More weights left from the previous step: */ - return *scanner->wbeg++; /* return the next weight from expansion */ - - do - { - const uint16 *wpage; - my_wc_t wc[MY_UCA_MAX_CONTRACTION]; - int mblen; - - /* Get next character */ -#if MY_UCA_ASCII_OPTIMIZE - /* Get next ASCII character */ - if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80) - { - wc[0]= scanner->sbeg[0]; - scanner->sbeg+= 1; - -#if MY_UCA_COMPILE_CONTRACTIONS - if (my_uca_needs_context_handling(scanner->level, wc[0])) - { - const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc); - if (cnt) - return cnt->weight[0]; - } -#endif - - scanner->page= 0; - scanner->code= (int) wc[0]; - scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0]; - if (scanner->wbeg[0]) - return *scanner->wbeg++; - continue; - } - else -#endif - /* Get next MB character */ - if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg, - scanner->send)) <= 0)) - { - if (scanner->sbeg >= scanner->send) - return -1; /* No more bytes, end of line reached */ - /* - There are some more bytes left. Non-positive mb_len means that - we got an incomplete or a bad byte sequence. Consume mbminlen bytes. - */ - if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send) - { - /* For safety purposes don't go beyond the string range. */ - scanner->sbeg= scanner->send; - } - /* - Treat every complete or incomplete mbminlen unit as a weight which is - greater than weight for any possible normal character. - 0xFFFF is greater than any possible weight in the UCA weight table. - */ - return 0xFFFF; - } - - scanner->sbeg+= mblen; - if (wc[0] > scanner->level->maxchar) - { - /* Return 0xFFFD as weight for all characters outside BMP */ - scanner->wbeg= nochar; - return 0xFFFD; - } - -#if MY_UCA_COMPILE_CONTRACTIONS - if (my_uca_needs_context_handling(scanner->level, wc[0])) - { - const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc); - if (cnt) - return cnt->weight[0]; - } -#endif - - /* Process single character */ - scanner->page= wc[0] >> 8; - scanner->code= wc[0] & 0xFF; - - /* If weight page for w[0] does not exist, then calculate algoritmically */ - if (!(wpage= scanner->level->weights[scanner->page])) - return my_uca_scanner_next_implicit(scanner); - - /* Calculate pointer to w[0]'s weight, using page and offset */ - scanner->wbeg= wpage + - scanner->code * scanner->level->lengths[scanner->page]; - } while (!scanner->wbeg[0]); /* Skip ignorable characters */ - - return *scanner->wbeg++; -} - - +#include "ctype-uca-scanner_next.inl" +#define SCANNER_NEXT_NCHARS +#include "ctype-uca-scanner_next.inl" /* Compares two strings according to the collation @@ -409,6 +310,173 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs, } +/* + Scan the next weight and perform space padding + or trimming according to "nchars". +*/ +static inline weight_and_nchars_t +MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner, + size_t nchars, + uint *generated) +{ + weight_and_nchars_t res; + if (nchars > 0 || + scanner->wbeg[0] /* Some weights from a previous expansion left */) + { + if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner, + nchars)).weight < 0) + { + /* + We reached the end of the string, but the caller wants more weights. + Perform space padding. + */ + res.weight= my_space_weight(scanner->level); + res.nchars= 1; + (*generated)++; + } + else if (res.nchars > nchars) + { + /* + We scanned the next collation element, but it does not fit into + the "nchars" limit. This is possible in case of: + - A contraction, e.g. Czech 'ch' with nchars=1 + - A sequence of ignorable characters followed by non-ignorable ones, + e.g. CONCAT(x'00','a') with nchars=1. + Perform trimming. + */ + res.weight= scanner->cs->state & MY_CS_NOPAD ? + 0 : my_space_weight(scanner->level); + res.nchars= (uint) nchars; + (*generated)++; + } + } + else + { + /* The caller wants nchars==0. Perform trimming. */ + res.weight= scanner->cs->state & MY_CS_NOPAD ? + 0 : my_space_weight(scanner->level); + res.nchars= 0; + (*generated)++; + } + return res; +} + + +static int +MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs, + const MY_UCA_WEIGHT_LEVEL *level, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + size_t nchars) +{ + my_uca_scanner sscanner; + my_uca_scanner tscanner; + size_t s_nchars_left= nchars; + size_t t_nchars_left= nchars; + + my_uca_scanner_init_any(&sscanner, cs, level, s, slen); + my_uca_scanner_init_any(&tscanner, cs, level, t, tlen); + + for ( ; ; ) + { + weight_and_nchars_t s_res; + weight_and_nchars_t t_res; + uint generated= 0; + int diff; + + s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left, + &generated); + t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left, + &generated); + if ((diff= (s_res.weight - t_res.weight))) + return diff; + + if (generated == 2) + { + if (cs->state & MY_CS_NOPAD) + { + /* + Both values are auto-generated. There's no real data any more. + We need to handle the remaining virtual trailing spaces. + The two strings still have s_nchars_left and t_nchars_left imaginary + trailing spaces at the end. If s_nchars_left != t_nchars_left, + the strings will be not equal in case of a NOPAD collation. + + Example: + "B" is German "U+00DF LATIN SMALL LETTER SHARP S" + When we have these values in a + CHAR(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_nopad_ci + column: + 'B ' (one character, two trailing spaces) + 'ss ' (two characters, one trailing space) + The 'B ' is greater than the 'ss '. + They are compared in the following steps: + 1. 'B' == 'ss' + 2. ' ' == ' ' + 3. ' ' > '' + + We need to emulate the same behavior in this function even if + it's called with strings 'B' and 'ss' (with space trimmed). + The side which has more remaining virtual spaces at the end + is greater. + */ + if (s_nchars_left < t_nchars_left) + return -1; + if (s_nchars_left > t_nchars_left) + return +1; + } + return 0; + } + + DBUG_ASSERT(s_nchars_left >= s_res.nchars); + DBUG_ASSERT(t_nchars_left >= t_res.nchars); + s_nchars_left-= s_res.nchars; + t_nchars_left-= t_res.nchars; + } + + return 0; +} + + +/* + One-level collations. +*/ +static int +MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + size_t nchars) +{ + return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0], + s, slen, t, tlen, + nchars); +} + + +/* + Multi-level collations. +*/ +static int +MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + size_t nchars) +{ + uint num_level= cs->levels_for_order; + uint i; + for (i= 0; i != num_level; i++) + { + int ret= MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, + &cs->uca->level[i], + s, slen, + t, tlen, + nchars); + if (ret) + return ret; + } + return 0; +} + /* Calculates hash value for the given string, @@ -752,6 +820,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)= MY_UCA_COLL_INIT, MY_FUNCTION_NAME(strnncoll), MY_FUNCTION_NAME(strnncollsp), + MY_FUNCTION_NAME(strnncollsp_nchars), MY_FUNCTION_NAME(strnxfrm), my_strnxfrmlen_any_uca, MY_LIKE_RANGE, @@ -773,6 +842,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)= MY_UCA_COLL_INIT, MY_FUNCTION_NAME(strnncoll), MY_FUNCTION_NAME(strnncollsp_nopad), + MY_FUNCTION_NAME(strnncollsp_nchars), MY_FUNCTION_NAME(strnxfrm_nopad), my_strnxfrmlen_any_uca, MY_LIKE_RANGE, /* my_like_range_mb or my_like_range_generic */ @@ -792,6 +862,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)= MY_UCA_COLL_INIT, MY_FUNCTION_NAME(strnncoll_multilevel), MY_FUNCTION_NAME(strnncollsp_multilevel), + MY_FUNCTION_NAME(strnncollsp_nchars_multilevel), MY_FUNCTION_NAME(strnxfrm_multilevel), my_strnxfrmlen_any_uca_multilevel, MY_LIKE_RANGE, @@ -811,6 +882,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)= MY_UCA_COLL_INIT, MY_FUNCTION_NAME(strnncoll_multilevel), MY_FUNCTION_NAME(strnncollsp_nopad_multilevel), + MY_FUNCTION_NAME(strnncollsp_nchars_multilevel), MY_FUNCTION_NAME(strnxfrm_multilevel), my_strnxfrmlen_any_uca_multilevel, MY_LIKE_RANGE, diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 0c153793e8e..36ab6f5c0b1 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1505,6 +1505,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = NULL, /* init */ my_strnncoll_utf16_general_ci, my_strnncollsp_utf16_general_ci, + my_strnncollsp_nchars_utf16_general_ci, my_strnxfrm_utf16_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1521,6 +1522,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = NULL, /* init */ my_strnncoll_utf16_bin, my_strnncollsp_utf16_bin, + my_strnncollsp_nchars_utf16_bin, my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -1537,6 +1539,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_utf16_general_ci, my_strnncollsp_utf16_general_nopad_ci, + my_strnncollsp_nchars_utf16_general_nopad_ci, my_strnxfrm_nopad_utf16_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1553,6 +1556,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf16_bin, my_strnncollsp_utf16_nopad_bin, + my_strnncollsp_nchars_utf16_nopad_bin, my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -1845,6 +1849,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler = NULL, /* init */ my_strnncoll_utf16le_general_ci, my_strnncollsp_utf16le_general_ci, + my_strnncollsp_nchars_utf16le_general_ci, my_strnxfrm_utf16le_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1861,6 +1866,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = NULL, /* init */ my_strnncoll_utf16le_bin, my_strnncollsp_utf16le_bin, + my_strnncollsp_nchars_utf16le_bin, my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -1877,6 +1883,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_utf16le_general_ci, my_strnncollsp_utf16le_general_nopad_ci, + my_strnncollsp_nchars_utf16le_general_nopad_ci, my_strnxfrm_nopad_utf16le_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1893,6 +1900,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf16le_bin, my_strnncollsp_utf16le_nopad_bin, + my_strnncollsp_nchars_utf16le_nopad_bin, my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -2671,6 +2679,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = NULL, /* init */ my_strnncoll_utf32_general_ci, my_strnncollsp_utf32_general_ci, + my_strnncollsp_nchars_utf32_general_ci, my_strnxfrm_utf32_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -2687,6 +2696,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = NULL, /* init */ my_strnncoll_utf32_bin, my_strnncollsp_utf32_bin, + my_strnncollsp_nchars_utf32_bin, my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -2703,6 +2713,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_utf32_general_ci, my_strnncollsp_utf32_general_nopad_ci, + my_strnncollsp_nchars_utf32_general_nopad_ci, my_strnxfrm_nopad_utf32_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -2719,6 +2730,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf32_bin, my_strnncollsp_utf32_nopad_bin, + my_strnncollsp_nchars_utf32_nopad_bin, my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -3261,6 +3273,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = NULL, /* init */ my_strnncoll_ucs2_general_ci, my_strnncollsp_ucs2_general_ci, + my_strnncollsp_nchars_ucs2_general_ci, my_strnxfrm_ucs2_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -3277,6 +3290,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = NULL, /* init */ my_strnncoll_ucs2_bin, my_strnncollsp_ucs2_bin, + my_strnncollsp_nchars_ucs2_bin, my_strnxfrm_ucs2_bin, my_strnxfrmlen_unicode, my_like_range_generic, @@ -3293,6 +3307,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_ucs2_general_ci, my_strnncollsp_ucs2_general_nopad_ci, + my_strnncollsp_nchars_ucs2_general_nopad_ci, my_strnxfrm_nopad_ucs2_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -3309,6 +3324,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler = NULL, /* init */ my_strnncoll_ucs2_bin, my_strnncollsp_ucs2_nopad_bin, + my_strnncollsp_nchars_ucs2_nopad_bin, my_strnxfrm_nopad_ucs2_bin, my_strnxfrmlen_unicode, my_like_range_generic, diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 949f3aadc36..34600eda1a5 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -67239,6 +67239,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_ci_handler = NULL, /* init */ my_strnncoll_ujis_japanese_ci, my_strnncollsp_ujis_japanese_ci, + my_strnncollsp_nchars_ujis_japanese_ci, my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -67255,6 +67256,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler = NULL, /* init */ my_strnncoll_ujis_bin, my_strnncollsp_ujis_bin, + my_strnncollsp_nchars_ujis_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -67271,6 +67273,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_nopad_ci_handler = NULL, /* init */ my_strnncoll_ujis_japanese_ci, my_strnncollsp_ujis_japanese_nopad_ci, + my_strnncollsp_nchars_ujis_japanese_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -67287,6 +67290,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_nopad_bin_handler = NULL, /* init */ my_strnncoll_ujis_bin, my_strnncollsp_ujis_nopad_bin, + my_strnncollsp_nchars_ujis_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index e579d7b2bc6..7a87dbb7c05 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -5357,6 +5357,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler = NULL, /* init */ my_strnncoll_utf8_general_ci, my_strnncollsp_utf8_general_ci, + my_strnncollsp_nchars_utf8_general_ci, my_strnxfrm_utf8_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5373,6 +5374,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler = NULL, /* init */ my_strnncoll_utf8_general_mysql500_ci, my_strnncollsp_utf8_general_mysql500_ci, + my_strnncollsp_nchars_utf8_general_mysql500_ci, my_strnxfrm_utf8_general_mysql500_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5389,6 +5391,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler = NULL, /* init */ my_strnncoll_utf8_bin, my_strnncollsp_utf8_bin, + my_strnncollsp_nchars_utf8_bin, my_strnxfrm_utf8_bin, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5405,6 +5408,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_utf8_general_ci, my_strnncollsp_utf8_general_nopad_ci, + my_strnncollsp_nchars_utf8_general_nopad_ci, my_strnxfrm_nopad_utf8_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5421,6 +5425,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf8_bin, my_strnncollsp_utf8_nopad_bin, + my_strnncollsp_nchars_utf8_nopad_bin, my_strnxfrm_nopad_utf8_bin, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5750,6 +5755,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler = NULL, /* init */ my_strnncoll_utf8_cs, my_strnncollsp_utf8_cs, + my_strnncollsp_nchars_generic, my_strnxfrm_utf8_general_ci, my_strnxfrmlen_unicode, my_like_range_simple, @@ -7058,6 +7064,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = NULL, /* init */ my_strnncoll_simple, my_strnncollsp_simple, + my_strnncollsp_nchars_generic, my_strnxfrm_filename, my_strnxfrmlen_unicode, my_like_range_mb, @@ -7697,6 +7704,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler= NULL, /* init */ my_strnncoll_utf8mb4_general_ci, my_strnncollsp_utf8mb4_general_ci, + my_strnncollsp_nchars_utf8mb4_general_ci, my_strnxfrm_utf8mb4_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -7713,6 +7721,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler = NULL, /* init */ my_strnncoll_utf8mb4_bin, my_strnncollsp_utf8mb4_bin, + my_strnncollsp_nchars_utf8mb4_bin, my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_mb, @@ -7729,6 +7738,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler= NULL, /* init */ my_strnncoll_utf8mb4_general_ci, my_strnncollsp_utf8mb4_general_nopad_ci, + my_strnncollsp_nchars_utf8mb4_general_nopad_ci, my_strnxfrm_nopad_utf8mb4_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -7745,6 +7755,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf8mb4_bin, my_strnncollsp_utf8mb4_nopad_bin, + my_strnncollsp_nchars_utf8mb4_nopad_bin, my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_mb, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index f33a83294d6..15fa6299e4e 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -674,6 +674,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler = NULL, /* init */ my_strnncoll_win1250ch, my_strnncollsp_win1250ch, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_win1250ch, my_strnxfrmlen_simple, my_like_range_win1250ch, diff --git a/strings/ctype.c b/strings/ctype.c index 32c41e6e9e7..0cf1131ab57 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -1210,3 +1210,32 @@ outp: copy_status->m_source_end_pos= from; return to - to_start; } + + +int my_strnncollsp_nchars_generic(CHARSET_INFO *cs, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars) +{ + int error; + len1= my_well_formed_length(cs, (const char *) str1, + (const char *) str1 + len1, + nchars, &error); + len2= my_well_formed_length(cs, (const char *) str2, + (const char *) str2 + len2, + nchars, &error); + DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0); + return cs->coll->strnncollsp(cs, str1, len1, str2, len2); +} + + +int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars) +{ + set_if_smaller(len1, nchars); + set_if_smaller(len2, nchars); + DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0); + return cs->coll->strnncollsp(cs, str1, len1, str2, len2); +} diff --git a/strings/strcoll.ic b/strings/strcoll.ic index 86789fc4189..392a5dac589 100644 --- a/strings/strcoll.ic +++ b/strings/strcoll.ic @@ -287,6 +287,56 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)), } #endif + +/** + Compare two strings according to the collation, + with trailing space padding or trimming, according to "nchars". + + @param cs - the character set and collation + @param a - the left string + @param a_length - the length of the left string + @param b - the right string + @param b_length - the length of the right string + @param nchars - compare this amount of characters only + @return - the comparison result +*/ +static int +MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + size_t nchars) +{ + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + for ( ; nchars ; nchars--) + { + int a_weight, b_weight, res; + uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end); + uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end); + + if ((res= (a_weight - b_weight))) + { + /* Got two different weights. See comments in strnncollsp above. */ + return res; + } + if (!a_wlen && !b_wlen) + { + /* Got two auto-generated trailing spaces. */ + DBUG_ASSERT(a == a_end); + DBUG_ASSERT(b == b_end); + return 0; + } + /* + At least one of the strings has not ended yet, continue comparison. + */ + DBUG_ASSERT(a < a_end || b < b_end); + a+= a_wlen; + b+= b_wlen; + } + return 0; +} + + #endif /* DEFINE_STRNNCOLL */ diff --git a/strings/strings_def.h b/strings/strings_def.h index b3727321e19..8bf089ec695 100644 --- a/strings/strings_def.h +++ b/strings/strings_def.h @@ -105,6 +105,16 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len) } +int my_strnncollsp_nchars_generic(CHARSET_INFO *cs, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars); + +int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars); + uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs); uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs); diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index 00d49971595..97b9eb1a95e 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -19,6 +19,30 @@ /* + U+00DF LATIN SMALL LETTER SHARP S = _utf8 x'C39F' = _latin1 x'DF' +*/ + +#define UTF8_sz "\xC3\x9F" +#define LATIN1_sz "\xDF" + +/* + U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE = _utf8 x'C385' +*/ + +#define UTF8_ARING "\xC3\x85" + +/* + U+00E4 LATIN SMALL LETTER A WITH DIAERESIS = _utf8 x'C3A4' +*/ +#define UTF8_auml "\xC3\xA4" +#define LATIN1_auml "\xE4" + +#define UCS2_a "\x00\x61" +#define UCS2_b "\x00\x62" +#define UCS2_sp "\x00\x20" + + +/* Test that like_range() returns well-formed results. */ static int @@ -758,11 +782,483 @@ test_strcollsp() } -int main() +typedef struct +{ + LEX_CSTRING a; + LEX_CSTRING b; + size_t nchars; + int res; +} STRNNCOLLSP_CHAR_PARAM; + + +/* + Some lines in the below test data are marked as follows: + + IF - An ignorable failure. The scanner finds an ignorable character + followed by a normal character (or by a contraction), + but the "nchars" limit allows only one character to be scanned. + The whole sequence is ignored an is treated as end-of-line. + CF - A contraction failure. The scanner finds a contraction consisting + of two characters, but the "nchars" limit allows only one character + to be scanned. The whole contraction is ignored and is treated + as end-of-line. +*/ + + +/* + Tests for mbminlen1 character sets, + for both PAD SPACE and NOPAD collations +*/ +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen1_xpad_common[]= +{ + {{CSTR("a")}, {CSTR("a")}, 0, 0}, + {{CSTR("a")}, {CSTR("a")}, 1, 0}, + {{CSTR("a")}, {CSTR("a")}, 2, 0}, + {{CSTR("a")}, {CSTR("a")}, 3, 0}, + {{CSTR("a")}, {CSTR("a")}, 100, 0}, + + {{CSTR("a")}, {CSTR("ab")}, 0, 0}, + {{CSTR("a")}, {CSTR("ab")}, 1, 0}, + {{CSTR("a")}, {CSTR("ab")}, 2, -1}, + {{CSTR("a")}, {CSTR("ab")}, 3, -1}, + {{CSTR("a")}, {CSTR("ab")}, 100, -1}, + + {{CSTR("a")}, {CSTR("a ")}, 0, 0}, + {{CSTR("a")}, {CSTR("a ")}, 1, 0}, + {{CSTR("a")}, {CSTR("a ")}, 2, 0}, + {{CSTR("a")}, {CSTR("a ")}, 3, 0}, + {{CSTR("a")}, {CSTR("a ")}, 100, 0}, + + {{CSTR("a")}, {CSTR("a ")}, 0, 0}, + {{CSTR("a")}, {CSTR("a ")}, 1, 0}, + {{CSTR("a")}, {CSTR("a ")}, 2, 0}, + {{CSTR("a")}, {CSTR("a ")}, 3, 0}, + {{CSTR("a")}, {CSTR("a ")}, 100, 0}, + + {{CSTR("ss")}, {CSTR("ss")}, 0, 0}, + {{CSTR("ss")}, {CSTR("ss")}, 1, 0}, + {{CSTR("ss")}, {CSTR("ss")}, 2, 0}, + {{CSTR("ss")}, {CSTR("ss")}, 3, 0}, + {{CSTR("ss")}, {CSTR("ss")}, 100, 0}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +/* Tests for utf8, for both PAD SPACE and NOPAD collations */ +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_xpad_common[]= +{ + {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 0, 0}, + {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 1, 0}, + {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 2, 0}, + {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 3, 0}, + {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 100, 0}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +/* Tests for latin1, for both PAD and NOPAD collations */ +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_latin1_xpad_common[]= +{ + {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 0, 0}, + {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 1, 0}, + {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 2, 0}, + {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 3, 0}, + {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 100, 0}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +/* Tests for utf8 collations that sort "A WITH DIAERESIS" equal to "A" */ +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_xpad_a_eq_auml[]= +{ + {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 0, 0}, + {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 1, 0}, + {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 2, 0}, + {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 3, 0}, + {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 100, 0}, + + {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 0, 0}, + {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 1, 0}, + {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 2, 0}, + {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 3, 0}, + {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 100, 0}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_ci[]= +{ + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 0, 0}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 1, 0}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}/*IF*/, 2, 1}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 3, 0}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 4, 0}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 100, 0}, + + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, 0}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, 0}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, 0}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, 0}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_nopad_ci[]= +{ + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 0, 0}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 1, 0}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}/*IF*/, 2, 1}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 3, 1}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 4, 1}, + {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 100, 1}, + + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, -1}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, -1}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, -1}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, -1}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_danish_ci[]= +{ + {{CSTR("aa")}, {CSTR("")}, 0, 0}, + {{CSTR("aa")}/*CF*/, {CSTR("")}, 1, 1}, + {{CSTR("aa")}, {CSTR("")}, 2, 1}, + {{CSTR("aa")}, {CSTR("")}, 3, 1}, + {{CSTR("aa")}, {CSTR("")}, 100, 1}, + + {{CSTR("aa")}, {CSTR("a")}, 0, 0}, + {{CSTR("aa")}/*CF*/, {CSTR("a")}, 1, 0}, + {{CSTR("aa")}, {CSTR("a")}, 2, 1}, + {{CSTR("aa")}, {CSTR("a")}, 3, 1}, + {{CSTR("aa")}, {CSTR("a")}, 100, 1}, + + {{CSTR("aa")}, {CSTR("aa")}, 0, 0}, + {{CSTR("aa")}/*CF*/, {CSTR("aa")}/*CF*/, 1, 0}, + {{CSTR("aa")}, {CSTR("aa")}, 2, 0}, + {{CSTR("aa")}, {CSTR("aa")}, 3, 0}, + {{CSTR("aa")}, {CSTR("aa")}, 100, 0}, + + {{CSTR("aa")}, {CSTR("\x00" "a")}, 0, 0}, + {{CSTR("aa")}/*CF*/, {CSTR("\x00" "a")}/*IF*/, 1, 1}, + {{CSTR("aa")}, {CSTR("\x00" "a")}, 2, 1}, + {{CSTR("aa")}, {CSTR("\x00" "a")}, 3, 1}, + {{CSTR("aa")}, {CSTR("\x00" "a")}, 100, 1}, + + {{CSTR("aa")}, {CSTR("\x00" "aa")}, 0, 0}, + {{CSTR("aa")}/*CF*/, {CSTR("\x00" "aa")}/*IF*/, 1, 1}, + {{CSTR("aa")}, {CSTR("\x00" "aa")}/*IF*/, 2, 1}, + {{CSTR("aa")}, {CSTR("\x00" "aa")}, 3, 0}, + {{CSTR("aa")}, {CSTR("\x00" "aa")}, 100, 0}, + + {{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 0, 0}, + {{CSTR("aa")}/*CF*/, {CSTR("a" "\x00" "a")}, 1, 0}, + {{CSTR("aa")}, {CSTR("a" "\x00" "a")}/*IF*/, 2, 1}, + {{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 3, 1}, + {{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 100, 1}, + + {{CSTR("aa")}, {CSTR(UTF8_ARING)}, 0, 0}, + {{CSTR("aa")}/*CF*/, {CSTR(UTF8_ARING)}, 1, -1}, + {{CSTR("aa")}, {CSTR(UTF8_ARING)}, 2, 0}, + {{CSTR("aa")}, {CSTR(UTF8_ARING)}, 3, 0}, + {{CSTR("aa")}, {CSTR(UTF8_ARING)}, 100, 0}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_latin1_german2_ci[]= +{ + {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 0, 0}, + {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 1, -1}, + {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 2, 0}, + {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 3, 0}, + {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 100, 0}, + + {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 0, 0}, + {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 1, -1}, + {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 2, 0}, + {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 3, 0}, + {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 100, 0}, + + {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 0, 0}, + {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 1, -1}, + {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 2, 0}, + {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 3, 0}, + {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 100, 0}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_german2_ci[]= +{ + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, 0}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, 0}, + {{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, 0}, + + {{CSTR("ae")}, {CSTR(UTF8_auml)}, 0, 0}, + {{CSTR("ae")}, {CSTR(UTF8_auml)}, 1, -1}, + {{CSTR("ae")}, {CSTR(UTF8_auml)}, 2, 0}, + {{CSTR("ae")}, {CSTR(UTF8_auml)}, 3, 0}, + {{CSTR("ae")}, {CSTR(UTF8_auml)}, 100, 0}, + + {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 0, 0}, + {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 1, -1}, + {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 2, 0}, + {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 3, 0}, + {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 100, 0}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen1_xpad_czech[]= +{ + {{CSTR("c")}, {CSTR("ch")}, 0, 0}, + {{CSTR("c")}, {CSTR("ch")}, 1, 0}, + {{CSTR("c")}, {CSTR("ch")}, 2, -1}, + + {{CSTR("h")}, {CSTR("ch")}, 0, 0}, + {{CSTR("h")}, {CSTR("ch")}, 1, 1}, + {{CSTR("h")}, {CSTR("ch")}, 2, -1}, + + {{CSTR("i")}, {CSTR("ch")}, 0, 0}, + {{CSTR("i")}, {CSTR("ch")}, 1, 1}, + {{CSTR("i")}, {CSTR("ch")}, 2, 1}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen2_xpad_common[]= +{ + {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 0, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 1, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 2, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 3, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 100, 0}, + + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 0, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 1, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 2, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 3, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 100, 0}, + + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 0, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 1, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 2, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 3, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 100, 0}, + + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 0, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 1, 0}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 2, -1}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 3, -1}, + {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 100, -1}, + + {{NULL, 0}, {NULL, 0}, 0, 0} +}; + + +static int +strnncollsp_char_one(CHARSET_INFO *cs, const STRNNCOLLSP_CHAR_PARAM *p) +{ + int failed= 0; + char ahex[64], bhex[64]; + int res= cs->coll->strnncollsp_nchars(cs, + (uchar *) p->a.str, p->a.length, + (uchar *) p->b.str, p->b.length, + p->nchars); + str2hex(ahex, sizeof(ahex), p->a.str, p->a.length); + str2hex(bhex, sizeof(bhex), p->b.str, p->b.length); + diag("%-25s %-12s %-12s %3d %7d %7d%s", + cs->name, ahex, bhex, (int) p->nchars, p->res, res, + eqres(res, p->res) ? "" : " FAILED"); + if (!eqres(res, p->res)) + { + failed++; + } + else + { + /* Test in reverse order */ + res= cs->coll->strnncollsp_nchars(cs, + (uchar *) p->b.str, p->b.length, + (uchar *) p->a.str, p->a.length, + p->nchars); + if (!eqres(res, -p->res)) + { + diag("Comparison in reverse order failed. Expected %d, got %d", + -p->res, res); + failed++; + } + } + return failed; +} + + +static int +strnncollsp_char(const char *collation, const STRNNCOLLSP_CHAR_PARAM *param) +{ + int failed= 0; + const STRNNCOLLSP_CHAR_PARAM *p; + CHARSET_INFO *cs= get_charset_by_name(collation, MYF(0)); + + if (!cs) + { + diag("get_charset_by_name() failed"); + return 1; + } + + diag("%-25s %-12s %-12s %-3s %7s %7s", + "Collation", "a", "b", "Nch", "ExpSign", "Actual"); + + for (p= param; p->a.str; p++) + { + failed+= strnncollsp_char_one(cs, p); + } + + return failed; +} + + +static int +strnncollsp_char_mbminlen1(const char *collation, + const STRNNCOLLSP_CHAR_PARAM *specific) +{ + int failed= 0; + failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common); + if (specific) + failed+= strnncollsp_char(collation, specific); + return failed; +} + + +static int +strnncollsp_char_mbminlen2(const char *collation, + const STRNNCOLLSP_CHAR_PARAM *specific) +{ + int failed= 0; + failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen2_xpad_common); + if (specific) + failed+= strnncollsp_char(collation, specific); + return failed; +} + + +static int +strnncollsp_char_latin1(const char *collation, + const STRNNCOLLSP_CHAR_PARAM *specific) +{ + int failed= 0; + failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common); + failed+= strnncollsp_char(collation, strnncollsp_char_latin1_xpad_common); + if (specific) + failed+= strnncollsp_char(collation, specific); + return failed; +} + + +static int +strnncollsp_char_utf8mbx(const char *collation, + const STRNNCOLLSP_CHAR_PARAM *specific) +{ + int failed= 0; + failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common); + failed+= strnncollsp_char(collation, strnncollsp_char_utf8mbx_xpad_common); + + if (!strstr(collation, "_bin") && + !strstr(collation, "_german2") && + !strstr(collation, "_danish")) + failed+= strnncollsp_char(collation, + strnncollsp_char_utf8mbx_xpad_a_eq_auml); + if (specific) + failed+= strnncollsp_char(collation, specific); + return failed; +} + + +static int +test_strnncollsp_char() +{ + int failed= 0; + failed+= strnncollsp_char_latin1("latin1_swedish_ci", NULL); + failed+= strnncollsp_char_latin1("latin1_swedish_nopad_ci", NULL); + failed+= strnncollsp_char_latin1("latin1_bin", NULL); + failed+= strnncollsp_char_latin1("latin1_nopad_bin", NULL); + failed+= strnncollsp_char_latin1("latin1_german2_ci", + strnncollsp_char_latin1_german2_ci); + +#ifdef HAVE_CHARSET_cp1250 + failed+= strnncollsp_char_mbminlen1("cp1250_czech_cs", + strnncollsp_char_mbminlen1_xpad_czech); +#endif + +#ifdef HAVE_CHARSET_latin2 + failed+= strnncollsp_char_mbminlen1("latin2_czech_cs", + strnncollsp_char_mbminlen1_xpad_czech); +#endif + +#ifdef HAVE_CHARSET_tis620 + failed+= strnncollsp_char_mbminlen1("tis620_thai_ci", NULL); +#endif + +#ifdef HAVE_CHARSET_big5 + failed+= strnncollsp_char_mbminlen1("big5_chinese_ci", NULL); + failed+= strnncollsp_char_mbminlen1("big5_chinese_nopad_ci", NULL); + failed+= strnncollsp_char_mbminlen1("big5_bin", NULL); + failed+= strnncollsp_char_mbminlen1("big5_nopad_bin", NULL); +#endif + + failed+= strnncollsp_char_utf8mbx("utf8mb3_general_ci", NULL); + failed+= strnncollsp_char_utf8mbx("utf8mb3_general_nopad_ci", NULL); + failed+= strnncollsp_char_utf8mbx("utf8mb3_bin", NULL); + failed+= strnncollsp_char_utf8mbx("utf8mb3_nopad_bin", NULL); + + failed+= strnncollsp_char_utf8mbx("utf8mb3_unicode_ci", + strnncollsp_char_utf8mb3_unicode_ci); + failed+= strnncollsp_char_utf8mbx("utf8mb3_unicode_nopad_ci", + strnncollsp_char_utf8mb3_unicode_nopad_ci); + failed+= strnncollsp_char_utf8mbx("utf8mb3_danish_ci", + strnncollsp_char_utf8mb3_danish_ci); + failed+= strnncollsp_char_utf8mbx("utf8mb3_german2_ci", + strnncollsp_char_utf8mbx_german2_ci); + failed+= strnncollsp_char_utf8mbx("utf8mb3_czech_ci", + strnncollsp_char_mbminlen1_xpad_czech); + +#ifdef HAVE_CHARSET_ucs2 + failed+= strnncollsp_char_mbminlen2("ucs2_general_ci", NULL); + failed+= strnncollsp_char_mbminlen2("ucs2_general_nopad_ci", NULL); + failed+= strnncollsp_char_mbminlen2("ucs2_bin", NULL); + failed+= strnncollsp_char_mbminlen2("ucs2_nopad_bin", NULL); + failed+= strnncollsp_char_mbminlen2("ucs2_unicode_ci", NULL); + failed+= strnncollsp_char_mbminlen2("ucs2_unicode_nopad_ci", NULL); +#endif + + return failed; +} + + +int main(int ac, char **av) { size_t i, failed= 0; - - plan(2); + + MY_INIT(av[0]); + + plan(3); diag("Testing my_like_range_xxx() functions"); for (i= 0; i < array_elements(charset_list); i++) @@ -780,5 +1276,11 @@ int main() failed= test_strcollsp(); ok(failed == 0, "Testing cs->coll->strnncollsp()"); + diag("Testing cs->coll->strnncollsp_char()"); + failed= test_strnncollsp_char(); + ok(failed == 0, "Testing cs->coll->strnncollsp_char()"); + + my_end(0); + return exit_status(); } |