summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2021-09-29 15:13:57 +0400
committerAlexander Barkov <bar@mariadb.com>2022-01-21 12:16:07 +0400
commitb915f79e4e004fde4f6ac8f341afee980e11792b (patch)
tree2568032d75c7af9a72c6669b306fda4418b5ed20
parentdb574173d19731f1e5dc75d325f72398afac8d59 (diff)
downloadmariadb-git-bb-10.4-bar-MDEV-25904.tar.gz
MDEV-25904 New collation functions to compare InnoDB style trimmed NO PAD stringsbb-10.4-bar-MDEV-25904
-rw-r--r--include/m_ctype.h54
-rw-r--r--sql/field.cc47
-rw-r--r--strings/ctype-big5.c4
-rw-r--r--strings/ctype-bin.c25
-rw-r--r--strings/ctype-cp932.c4
-rw-r--r--strings/ctype-czech.c1
-rw-r--r--strings/ctype-euc_kr.c4
-rw-r--r--strings/ctype-eucjpms.c4
-rw-r--r--strings/ctype-gb2312.c4
-rw-r--r--strings/ctype-gbk.c4
-rw-r--r--strings/ctype-latin1.c1
-rw-r--r--strings/ctype-simple.c14
-rw-r--r--strings/ctype-sjis.c4
-rw-r--r--strings/ctype-tis620.c2
-rw-r--r--strings/ctype-uca-scanner_next.inl179
-rw-r--r--strings/ctype-uca.c38
-rw-r--r--strings/ctype-uca.ic276
-rw-r--r--strings/ctype-ucs2.c16
-rw-r--r--strings/ctype-ujis.c4
-rw-r--r--strings/ctype-utf8.c11
-rw-r--r--strings/ctype-win1250ch.c1
-rw-r--r--strings/ctype.c29
-rw-r--r--strings/strcoll.ic50
-rw-r--r--strings/strings_def.h10
-rw-r--r--unittest/strings/strings-t.c508
25 files changed, 1150 insertions, 144 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 0f6e6a11666..187c8710929 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -330,6 +330,60 @@ struct my_collation_handler_st
const uchar *, size_t, const uchar *, size_t, my_bool);
int (*strnncollsp)(CHARSET_INFO *,
const uchar *, size_t, const uchar *, size_t);
+ /*
+ strnncollsp_nchars() - similar to strnncollsp() but assumes that both
+ strings were originally CHAR(N) values with the
+ same N, then were optionally space-padded,
+ or optionally space-trimmed.
+
+ In other words, this function compares in the way
+ if we insert both values into a CHAR(N) column
+ and then compare the two column values.
+
+ It compares the same amount of characters from the two strings.
+ This is especially important for NOPAD collations.
+
+ If CHAR_LENGTH of the two strings are different,
+ the shorter string is virtually padded with trailing spaces
+ up to CHAR_LENGTH of the longer string, to guarantee that the
+ same amount of characters are compared.
+ This is important if the two CHAR(N) strings are space-trimmed
+ (e.g. like in InnoDB compact format for CHAR).
+
+ The function compares not more than "nchars" characters only.
+ This can be useful to compare CHAR(N) space-padded strings
+ (when the exact N is known) without having to truncate them before
+ the comparison.
+
+ For example, Field_string stores a "CHAR(3) CHARACTER SET utf8mb4" value
+ of "aaa" as 12 bytes in a record buffer:
+ - 3 bytes of the actual data, followed by
+ - 9 bytes of spaces (just fillers, not real data)
+ The caller can pass nchars=3 to compare CHAR(3) record values.
+ In such case, the comparator won't go inside the 9 bytes of the fillers.
+
+ If N is not known, the caller can pass max(len1,len2) as the "nchars" value
+ (i.e. the maximum of the OCTET_LENGTH of the two strings).
+
+ Notes on complex collations.
+
+ This function counts contraction parts as individual characters.
+ For example, the Czech letter 'ch' (in Czech collations)
+ is ordinarily counted by the "nchars" limit as TWO characters
+ (although it is only one letter).
+ This corresponds to what CHAR(N) does in INSERT.
+
+ If the "nchars" limit tears apart a contraction, only the part fitting
+ into "nchars" characters is used. For example, in case of a Czech collation,
+ the string "ach" with nchars=2 is compared as 'ac': the contraction
+ 'ch' is torn apart and the letter 'c' acts as an individual character.
+ This emulates the same comparison result with the scenario when we insert
+ 'ach' into a CHAR(2) column and then compare it.
+ */
+ int (*strnncollsp_nchars)(CHARSET_INFO *,
+ const uchar *str1, size_t len1,
+ const uchar *str2, size_t len2,
+ size_t nchars);
size_t (*strnxfrm)(CHARSET_INFO *,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags);
diff --git a/sql/field.cc b/sql/field.cc
index 2226137b043..e3aa7d149a0 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -7433,23 +7433,10 @@ Field_string::compatible_field_size(uint field_metadata,
int Field_string::cmp(const uchar *a_ptr, const uchar *b_ptr)
{
- size_t a_len, b_len;
-
- if (field_charset->mbmaxlen != 1)
- {
- size_t char_len= field_length/field_charset->mbmaxlen;
- a_len= my_charpos(field_charset, a_ptr, a_ptr + field_length, char_len);
- b_len= my_charpos(field_charset, b_ptr, b_ptr + field_length, char_len);
- }
- else
- a_len= b_len= field_length;
- /*
- We have to remove end space to be able to compare multi-byte-characters
- like in latin_de 'ae' and 0xe4
- */
- return field_charset->coll->strnncollsp(field_charset,
- a_ptr, a_len,
- b_ptr, b_len);
+ return field_charset->coll->strnncollsp_nchars(field_charset,
+ a_ptr, field_length,
+ b_ptr, field_length,
+ Field_string::char_length());
}
@@ -7848,19 +7835,6 @@ int Field_varstring::cmp(const uchar *a_ptr, const uchar *b_ptr)
}
-static int cmp_str_prefix(const uchar *ua, size_t alen, const uchar *ub,
- size_t blen, size_t prefix, CHARSET_INFO *cs)
-{
- const char *a= (char*)ua, *b= (char*)ub;
- MY_STRCOPY_STATUS status;
- prefix/= cs->mbmaxlen;
- alen= cs->cset->well_formed_char_length(cs, a, a + alen, prefix, &status);
- blen= cs->cset->well_formed_char_length(cs, b, b + blen, prefix, &status);
- return cs->coll->strnncollsp(cs, ua, alen, ub, blen);
-}
-
-
-
int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
size_t prefix_len)
{
@@ -7880,8 +7854,12 @@ int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
a_length= uint2korr(a_ptr);
b_length= uint2korr(b_ptr);
}
- return cmp_str_prefix(a_ptr+length_bytes, a_length, b_ptr+length_bytes,
- b_length, prefix_len, field_charset);
+ return field_charset->coll->strnncollsp_nchars(field_charset,
+ a_ptr + length_bytes,
+ a_length,
+ b_ptr + length_bytes,
+ b_length,
+ prefix_len / field_charset->mbmaxlen);
}
@@ -8659,7 +8637,10 @@ int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
memcpy(&blob1, a_ptr+packlength, sizeof(char*));
memcpy(&blob2, b_ptr+packlength, sizeof(char*));
size_t a_len= get_length(a_ptr), b_len= get_length(b_ptr);
- return cmp_str_prefix(blob1, a_len, blob2, b_len, prefix_len, field_charset);
+ return field_charset->coll->strnncollsp_nchars(field_charset,
+ blob1, a_len,
+ blob2, b_len,
+ prefix_len / field_charset->mbmaxlen);
}
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 3991a219ab5..fdaa34eeaf0 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6711,6 +6711,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci=
NULL, /* init */
my_strnncoll_big5_chinese_ci,
my_strnncollsp_big5_chinese_ci,
+ my_strnncollsp_nchars_big5_chinese_ci,
my_strnxfrm_big5_chinese_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -6727,6 +6728,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
NULL, /* init */
my_strnncoll_big5_bin,
my_strnncollsp_big5_bin,
+ my_strnncollsp_nchars_big5_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -6743,6 +6745,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_nopad_ci=
NULL, /* init */
my_strnncoll_big5_chinese_ci,
my_strnncollsp_big5_chinese_nopad_ci,
+ my_strnncollsp_nchars_big5_chinese_nopad_ci,
my_strnxfrm_big5_chinese_nopad_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -6759,6 +6762,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_nopad_bin=
NULL, /* init */
my_strnncoll_big5_bin,
my_strnncollsp_big5_nopad_bin,
+ my_strnncollsp_nchars_big5_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index bc0d794db3d..2893aadd99f 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -125,6 +125,17 @@ static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
}
+static int my_strnncollsp_nchars_binary(CHARSET_INFO * cs __attribute__((unused)),
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ size_t nchars)
+{
+ set_if_smaller(slen, nchars);
+ set_if_smaller(tlen, nchars);
+ return my_strnncoll_binary(cs, s, slen, t, tlen, 0);
+}
+
+
static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
@@ -199,6 +210,17 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
}
+static int my_strnncollsp_nchars_8bit_bin(CHARSET_INFO * cs,
+ const uchar *a, size_t a_length,
+ const uchar *b, size_t b_length,
+ size_t nchars)
+{
+ set_if_smaller(a_length, nchars);
+ set_if_smaller(b_length, nchars);
+ return my_strnncollsp_8bit_bin(cs, a, a_length, b, b_length);
+}
+
+
static int my_strnncollsp_8bit_nopad_bin(CHARSET_INFO * cs
__attribute__((unused)),
const uchar *a, size_t a_length,
@@ -487,6 +509,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
my_coll_init_8bit_bin,
my_strnncoll_8bit_bin,
my_strnncollsp_8bit_bin,
+ my_strnncollsp_nchars_8bit_bin,
my_strnxfrm_8bit_bin,
my_strnxfrmlen_simple,
my_like_range_simple,
@@ -503,6 +526,7 @@ MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler =
my_coll_init_8bit_bin,
my_strnncoll_8bit_bin,
my_strnncollsp_8bit_nopad_bin,
+ my_strnncollsp_nchars_8bit_bin,
my_strnxfrm_8bit_nopad_bin,
my_strnxfrmlen_simple,
my_like_range_simple,
@@ -519,6 +543,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
NULL, /* init */
my_strnncoll_binary,
my_strnncollsp_binary,
+ my_strnncollsp_nchars_binary,
my_strnxfrm_8bit_bin,
my_strnxfrmlen_simple,
my_like_range_simple,
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index bf97d1feb83..94450af4b91 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -34667,6 +34667,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci=
NULL, /* init */
my_strnncoll_cp932_japanese_ci,
my_strnncollsp_cp932_japanese_ci,
+ my_strnncollsp_nchars_cp932_japanese_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -34683,6 +34684,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
NULL, /* init */
my_strnncoll_cp932_bin,
my_strnncollsp_cp932_bin,
+ my_strnncollsp_nchars_cp932_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -34699,6 +34701,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_nopad_ci=
NULL, /* init */
my_strnncoll_cp932_japanese_ci,
my_strnncollsp_cp932_japanese_nopad_ci,
+ my_strnncollsp_nchars_cp932_japanese_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -34715,6 +34718,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_nopad_bin=
NULL, /* init */
my_strnncoll_cp932_bin,
my_strnncollsp_cp932_nopad_bin,
+ my_strnncollsp_nchars_cp932_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 17c4c98c24e..33d43d4dd4e 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -610,6 +610,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
NULL, /* init */
my_strnncoll_czech,
my_strnncollsp_czech,
+ my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_czech,
my_strnxfrmlen_czech,
my_like_range_czech,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index deb13957900..22f8c4ec7c0 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -9957,6 +9957,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci=
NULL, /* init */
my_strnncoll_euckr_korean_ci,
my_strnncollsp_euckr_korean_ci,
+ my_strnncollsp_nchars_euckr_korean_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -9973,6 +9974,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
NULL, /* init */
my_strnncoll_euckr_bin,
my_strnncollsp_euckr_bin,
+ my_strnncollsp_nchars_euckr_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -9989,6 +9991,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_nopad_ci=
NULL, /* init */
my_strnncoll_euckr_korean_ci,
my_strnncollsp_euckr_korean_nopad_ci,
+ my_strnncollsp_nchars_euckr_korean_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -10005,6 +10008,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_nopad_bin=
NULL, /* init */
my_strnncoll_euckr_bin,
my_strnncollsp_euckr_nopad_bin,
+ my_strnncollsp_nchars_euckr_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 118e8286703..58ea37d36e6 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -67495,6 +67495,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_ci_handler =
NULL, /* init */
my_strnncoll_eucjpms_japanese_ci,
my_strnncollsp_eucjpms_japanese_ci,
+ my_strnncollsp_nchars_eucjpms_japanese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@@ -67511,6 +67512,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
NULL, /* init */
my_strnncoll_eucjpms_bin,
my_strnncollsp_eucjpms_bin,
+ my_strnncollsp_nchars_eucjpms_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -67527,6 +67529,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_nopad_ci_handler =
NULL, /* init */
my_strnncoll_eucjpms_japanese_ci,
my_strnncollsp_eucjpms_japanese_nopad_ci,
+ my_strnncollsp_nchars_eucjpms_japanese_nopad_ci,
my_strnxfrm_mb_nopad, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@@ -67543,6 +67546,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_nopad_bin_handler =
NULL, /* init */
my_strnncoll_eucjpms_bin,
my_strnncollsp_eucjpms_nopad_bin,
+ my_strnncollsp_nchars_eucjpms_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 166619bf5cc..84246ad6671 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -6362,6 +6362,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci=
NULL, /* init */
my_strnncoll_gb2312_chinese_ci,
my_strnncollsp_gb2312_chinese_ci,
+ my_strnncollsp_nchars_gb2312_chinese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@@ -6378,6 +6379,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
NULL, /* init */
my_strnncoll_gb2312_bin,
my_strnncollsp_gb2312_bin,
+ my_strnncollsp_nchars_gb2312_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -6394,6 +6396,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_nopad_ci=
NULL, /* init */
my_strnncoll_gb2312_chinese_ci,
my_strnncollsp_gb2312_chinese_nopad_ci,
+ my_strnncollsp_nchars_gb2312_chinese_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -6410,6 +6413,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_nopad_bin=
NULL, /* init */
my_strnncoll_gb2312_bin,
my_strnncollsp_gb2312_nopad_bin,
+ my_strnncollsp_nchars_gb2312_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index efaa2e5c728..d7ea47c409f 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -10645,6 +10645,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci=
NULL, /* init */
my_strnncoll_gbk_chinese_ci,
my_strnncollsp_gbk_chinese_ci,
+ my_strnncollsp_nchars_gbk_chinese_ci,
my_strnxfrm_gbk_chinese_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -10661,6 +10662,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
NULL, /* init */
my_strnncoll_gbk_bin,
my_strnncollsp_gbk_bin,
+ my_strnncollsp_nchars_gbk_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -10677,6 +10679,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_nopad_ci=
NULL, /* init */
my_strnncoll_gbk_chinese_ci,
my_strnncollsp_gbk_chinese_nopad_ci,
+ my_strnncollsp_nchars_gbk_chinese_nopad_ci,
my_strnxfrm_gbk_chinese_nopad_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -10693,6 +10696,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_nopad_bin=
NULL, /* init */
my_strnncoll_gbk_bin,
my_strnncollsp_gbk_nopad_bin,
+ my_strnncollsp_nchars_gbk_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index f9fa1488aa6..bcf1cc6c9f1 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -726,6 +726,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
NULL, /* init */
my_strnncoll_latin1_de,
my_strnncollsp_latin1_de,
+ my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_latin1_de,
my_strnxfrmlen_simple,
my_like_range_simple,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 9c6cb34137d..d150e457673 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -208,6 +208,18 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
}
+static int
+my_strnncollsp_nchars_simple(CHARSET_INFO * cs,
+ const uchar *a, size_t a_length,
+ const uchar *b, size_t b_length,
+ size_t nchars)
+{
+ set_if_smaller(a_length, nchars);
+ set_if_smaller(b_length, nchars);
+ return my_strnncollsp_simple(cs, a, a_length, b, b_length);
+}
+
+
int my_strnncollsp_simple_nopad(CHARSET_INFO * cs,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length)
@@ -2096,6 +2108,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
my_coll_init_simple, /* init */
my_strnncoll_simple,
my_strnncollsp_simple,
+ my_strnncollsp_nchars_simple,
my_strnxfrm_simple,
my_strnxfrmlen_simple,
my_like_range_simple,
@@ -2112,6 +2125,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler =
my_coll_init_simple, /* init */
my_strnncoll_simple,
my_strnncollsp_simple_nopad,
+ my_strnncollsp_nchars_simple,
my_strnxfrm_simple_nopad,
my_strnxfrmlen_simple,
my_like_range_simple,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 902034b435d..bd2bf432a34 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -34046,6 +34046,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci=
NULL, /* init */
my_strnncoll_sjis_japanese_ci,
my_strnncollsp_sjis_japanese_ci,
+ my_strnncollsp_nchars_sjis_japanese_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -34062,6 +34063,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
NULL, /* init */
my_strnncoll_sjis_bin,
my_strnncollsp_sjis_bin,
+ my_strnncollsp_nchars_sjis_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -34078,6 +34080,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_nopad_ci=
NULL, /* init */
my_strnncoll_sjis_japanese_ci,
my_strnncollsp_sjis_japanese_nopad_ci,
+ my_strnncollsp_nchars_sjis_japanese_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -34094,6 +34097,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_nopad_bin=
NULL, /* init */
my_strnncoll_sjis_bin,
my_strnncollsp_sjis_nopad_bin,
+ my_strnncollsp_nchars_sjis_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 9760ea25162..d5367393c86 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -852,6 +852,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
NULL, /* init */
my_strnncoll_tis620,
my_strnncollsp_tis620,
+ my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_tis620,
my_strnxfrmlen_simple,
my_like_range_simple,
@@ -867,6 +868,7 @@ static MY_COLLATION_HANDLER my_collation_nopad_ci_handler =
NULL, /* init */
my_strnncoll_tis620,
my_strnncollsp_tis620_nopad,
+ my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_tis620_nopad,
my_strnxfrmlen_simple,
my_like_range_simple,
diff --git a/strings/ctype-uca-scanner_next.inl b/strings/ctype-uca-scanner_next.inl
new file mode 100644
index 00000000000..79d25487b42
--- /dev/null
+++ b/strings/ctype-uca-scanner_next.inl
@@ -0,0 +1,179 @@
+/* Copyright (c) 2004, 2013, Oracle and/or its affiliates.
+ Copyright (c) 2009, 2021, MariaDB
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; version 2
+ of the License.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ MA 02110-1335 USA */
+
+
+#ifdef SCANNER_NEXT_NCHARS
+
+#define SCANNER_NEXT_RETURN(_w,_n) \
+ do { weight_and_nchars_t rc= {_w, _n}; return rc; } while(0)
+
+#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \
+ do { \
+ weight_and_nchars_t rc= { _cnt->weight[0], \
+ _ignorable_nchars + \
+ my_contraction_char_length(_cnt) }; \
+ return rc; \
+ } while(0)
+
+#else
+
+#define SCANNER_NEXT_RETURN(_w,_n) do { return _w; } while (0)
+
+#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \
+ do { return _cnt->weight[0]; } while(0)
+
+#endif
+
+static inline
+#ifdef SCANNER_NEXT_NCHARS
+weight_and_nchars_t
+MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner,
+ size_t nchars)
+#else
+int
+MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
+#endif
+{
+#ifdef SCANNER_NEXT_NCHARS
+ uint ignorable_nchars;
+#define LOCAL_MAX_CONTRACTION_LENGTH nchars
+#else
+#define LOCAL_MAX_CONTRACTION_LENGTH MY_UCA_MAX_CONTRACTION
+#endif
+ /*
+ Check if the weights for the previous character have been
+ already fully scanned. If yes, then get the next character and
+ initialize wbeg and wlength to its weight string.
+ */
+
+ if (scanner->wbeg[0])
+ {
+ /*
+ More weights left from the previous step.
+ Return the next weight from the current expansion.
+ Return "0" as "nchars". The real nchars was set on a previous
+ iteration.
+ */
+ SCANNER_NEXT_RETURN(*scanner->wbeg++, 0);
+ }
+
+#ifdef SCANNER_NEXT_NCHARS
+ for (ignorable_nchars= 0 ; ; ignorable_nchars++)
+#else
+ for ( ; ; )
+#endif
+ {
+ const uint16 *wpage;
+ my_wc_t wc[MY_UCA_MAX_CONTRACTION];
+ int mblen;
+
+ /* Get next character */
+#if MY_UCA_ASCII_OPTIMIZE
+ /* Get next ASCII character */
+ if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
+ {
+ wc[0]= scanner->sbeg[0];
+ scanner->sbeg+= 1;
+
+#if MY_UCA_COMPILE_CONTRACTIONS
+ if (my_uca_needs_context_handling(scanner->level, wc[0]))
+ {
+ const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc,
+ LOCAL_MAX_CONTRACTION_LENGTH);
+ if (cnt)
+ SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
+ }
+#endif
+
+ scanner->page= 0;
+ scanner->code= (int) wc[0];
+ scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
+ if (scanner->wbeg[0])
+ SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1);
+ continue;
+ }
+ else
+#endif
+ /* Get next MB character */
+ if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
+ scanner->send)) <= 0))
+ {
+ if (scanner->sbeg >= scanner->send)
+ {
+ /* No more bytes, end of line reached */
+ SCANNER_NEXT_RETURN(-1, ignorable_nchars);
+ }
+ /*
+ There are some more bytes left. Non-positive mb_len means that
+ we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
+ */
+ if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
+ {
+ /* For safety purposes don't go beyond the string range. */
+ scanner->sbeg= scanner->send;
+ }
+ /*
+ Treat every complete or incomplete mbminlen unit as a weight which is
+ greater than weight for any possible normal character.
+ 0xFFFF is greater than any possible weight in the UCA weight table.
+ */
+ SCANNER_NEXT_RETURN(0xFFFF, ignorable_nchars + 1);
+ }
+
+ scanner->sbeg+= mblen;
+ if (wc[0] > scanner->level->maxchar)
+ {
+ /* Return 0xFFFD as weight for all characters outside BMP */
+ scanner->wbeg= nochar;
+ SCANNER_NEXT_RETURN(0xFFFD, ignorable_nchars + 1);
+ }
+
+#if MY_UCA_COMPILE_CONTRACTIONS
+ if (my_uca_needs_context_handling(scanner->level, wc[0]))
+ {
+ const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc,
+ LOCAL_MAX_CONTRACTION_LENGTH);
+ if (cnt)
+ SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
+ }
+#endif
+
+ /* Process single character */
+ scanner->page= wc[0] >> 8;
+ scanner->code= wc[0] & 0xFF;
+
+ /* If weight page for w[0] does not exist, then calculate algoritmically */
+ if (!(wpage= scanner->level->weights[scanner->page]))
+ SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner),
+ ignorable_nchars + 1);
+
+ /* Calculate pointer to w[0]'s weight, using page and offset */
+ scanner->wbeg= wpage +
+ scanner->code * scanner->level->lengths[scanner->page];
+ if (scanner->wbeg[0])
+ break;
+ /* Skip ignorable character and continue the loop */
+ }
+
+ SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1);
+}
+
+#undef SCANNER_NEXT_NCHARS
+#undef SCANNER_NEXT_RETURN
+#undef SCANNER_NEXT_RETURN_CONTRACTION
+#undef LOCAL_MAX_CONTRACTION_LENGTH
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 161830088a5..551efd8b0be 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -35,6 +35,12 @@
#include "strings_def.h"
#include <m_ctype.h>
+typedef struct
+{
+ int weight;
+ uint nchars;
+} weight_and_nchars_t;
+
#define MY_CS_COMMON_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NON1TO1)
#define MY_UCA_CNT_FLAG_SIZE 4096
@@ -31450,6 +31456,21 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len)
}
+/*
+ Return the number of characters in a contraction.
+*/
+static inline uint my_contraction_char_length(const MY_CONTRACTION *cnt)
+{
+ uint i;
+ for (i= 2; i < array_elements(cnt->ch); i++)
+ {
+ if (cnt->ch[i] == 0)
+ return i;
+ }
+ return array_elements(cnt->ch);
+}
+
+
/**
Check if a string is a contraction,
and return its weight array on success.
@@ -31487,8 +31508,9 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
a contraction part. Then try to find real contraction among the
candidates, starting from the longest.
- @param scanner Pointer to UCA scanner
- @param[OUT] *wc Where to store the scanned string
+ @param scanner Pointer to UCA scanner
+ @param[OUT] *wc Where to store the scanned string
+ @param max_char_length The longest contraction character length allowed
@return Weight array
@retval NULL - no contraction found
@@ -31496,7 +31518,8 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
*/
static const MY_CONTRACTION *
-my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
+my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc,
+ size_t max_char_length)
{
size_t clen= 1;
int flag;
@@ -31505,7 +31528,7 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
/* Scan all contraction candidates */
for (s= scanner->sbeg, flag= MY_UCA_CNT_MID1;
- clen < MY_UCA_MAX_CONTRACTION;
+ clen < max_char_length;
flag<<= 1)
{
int mblen;
@@ -31582,11 +31605,14 @@ my_uca_previous_context_find(my_uca_scanner *scanner,
If wc[0] and the previous character make a previous context
pair, then wc[1] is set to the previous character.
+ @param max_char_length - the longest contraction character length allowed.
+
@retval NULL if could not find any contextual weights for wc[0]
@retval non null pointer - the address of MY_CONTRACTION found
*/
static inline const MY_CONTRACTION *
-my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
+my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc,
+ size_t max_char_length)
{
const MY_CONTRACTION *cnt;
DBUG_ASSERT(scanner->level->contractions.nitems);
@@ -31614,7 +31640,7 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
wc[0]))
{
/* Check if w[0] starts a contraction */
- if ((cnt= my_uca_scanner_contraction_find(scanner, wc)))
+ if ((cnt= my_uca_scanner_contraction_find(scanner, wc, max_char_length)))
return cnt;
}
return NULL;
diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic
index bb0eee85886..7c9d34d217e 100644
--- a/strings/ctype-uca.ic
+++ b/strings/ctype-uca.ic
@@ -35,108 +35,9 @@
#error MY_UCA_COLL_INIT is not defined
#endif
-
-static inline int
-MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
-{
- /*
- Check if the weights for the previous character have been
- already fully scanned. If yes, then get the next character and
- initialize wbeg and wlength to its weight string.
- */
-
- if (scanner->wbeg[0]) /* More weights left from the previous step: */
- return *scanner->wbeg++; /* return the next weight from expansion */
-
- do
- {
- const uint16 *wpage;
- my_wc_t wc[MY_UCA_MAX_CONTRACTION];
- int mblen;
-
- /* Get next character */
-#if MY_UCA_ASCII_OPTIMIZE
- /* Get next ASCII character */
- if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
- {
- wc[0]= scanner->sbeg[0];
- scanner->sbeg+= 1;
-
-#if MY_UCA_COMPILE_CONTRACTIONS
- if (my_uca_needs_context_handling(scanner->level, wc[0]))
- {
- const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc);
- if (cnt)
- return cnt->weight[0];
- }
-#endif
-
- scanner->page= 0;
- scanner->code= (int) wc[0];
- scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
- if (scanner->wbeg[0])
- return *scanner->wbeg++;
- continue;
- }
- else
-#endif
- /* Get next MB character */
- if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
- scanner->send)) <= 0))
- {
- if (scanner->sbeg >= scanner->send)
- return -1; /* No more bytes, end of line reached */
- /*
- There are some more bytes left. Non-positive mb_len means that
- we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
- */
- if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
- {
- /* For safety purposes don't go beyond the string range. */
- scanner->sbeg= scanner->send;
- }
- /*
- Treat every complete or incomplete mbminlen unit as a weight which is
- greater than weight for any possible normal character.
- 0xFFFF is greater than any possible weight in the UCA weight table.
- */
- return 0xFFFF;
- }
-
- scanner->sbeg+= mblen;
- if (wc[0] > scanner->level->maxchar)
- {
- /* Return 0xFFFD as weight for all characters outside BMP */
- scanner->wbeg= nochar;
- return 0xFFFD;
- }
-
-#if MY_UCA_COMPILE_CONTRACTIONS
- if (my_uca_needs_context_handling(scanner->level, wc[0]))
- {
- const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc);
- if (cnt)
- return cnt->weight[0];
- }
-#endif
-
- /* Process single character */
- scanner->page= wc[0] >> 8;
- scanner->code= wc[0] & 0xFF;
-
- /* If weight page for w[0] does not exist, then calculate algoritmically */
- if (!(wpage= scanner->level->weights[scanner->page]))
- return my_uca_scanner_next_implicit(scanner);
-
- /* Calculate pointer to w[0]'s weight, using page and offset */
- scanner->wbeg= wpage +
- scanner->code * scanner->level->lengths[scanner->page];
- } while (!scanner->wbeg[0]); /* Skip ignorable characters */
-
- return *scanner->wbeg++;
-}
-
-
+#include "ctype-uca-scanner_next.inl"
+#define SCANNER_NEXT_NCHARS
+#include "ctype-uca-scanner_next.inl"
/*
Compares two strings according to the collation
@@ -409,6 +310,173 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
}
+/*
+ Scan the next weight and perform space padding
+ or trimming according to "nchars".
+*/
+static inline weight_and_nchars_t
+MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
+ size_t nchars,
+ uint *generated)
+{
+ weight_and_nchars_t res;
+ if (nchars > 0 ||
+ scanner->wbeg[0] /* Some weights from a previous expansion left */)
+ {
+ if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner,
+ nchars)).weight < 0)
+ {
+ /*
+ We reached the end of the string, but the caller wants more weights.
+ Perform space padding.
+ */
+ res.weight= my_space_weight(scanner->level);
+ res.nchars= 1;
+ (*generated)++;
+ }
+ else if (res.nchars > nchars)
+ {
+ /*
+ We scanned the next collation element, but it does not fit into
+ the "nchars" limit. This is possible in case of:
+ - A contraction, e.g. Czech 'ch' with nchars=1
+ - A sequence of ignorable characters followed by non-ignorable ones,
+ e.g. CONCAT(x'00','a') with nchars=1.
+ Perform trimming.
+ */
+ res.weight= scanner->cs->state & MY_CS_NOPAD ?
+ 0 : my_space_weight(scanner->level);
+ res.nchars= (uint) nchars;
+ (*generated)++;
+ }
+ }
+ else
+ {
+ /* The caller wants nchars==0. Perform trimming. */
+ res.weight= scanner->cs->state & MY_CS_NOPAD ?
+ 0 : my_space_weight(scanner->level);
+ res.nchars= 0;
+ (*generated)++;
+ }
+ return res;
+}
+
+
+static int
+MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ size_t nchars)
+{
+ my_uca_scanner sscanner;
+ my_uca_scanner tscanner;
+ size_t s_nchars_left= nchars;
+ size_t t_nchars_left= nchars;
+
+ my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
+ my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
+
+ for ( ; ; )
+ {
+ weight_and_nchars_t s_res;
+ weight_and_nchars_t t_res;
+ uint generated= 0;
+ int diff;
+
+ s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left,
+ &generated);
+ t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left,
+ &generated);
+ if ((diff= (s_res.weight - t_res.weight)))
+ return diff;
+
+ if (generated == 2)
+ {
+ if (cs->state & MY_CS_NOPAD)
+ {
+ /*
+ Both values are auto-generated. There's no real data any more.
+ We need to handle the remaining virtual trailing spaces.
+ The two strings still have s_nchars_left and t_nchars_left imaginary
+ trailing spaces at the end. If s_nchars_left != t_nchars_left,
+ the strings will be not equal in case of a NOPAD collation.
+
+ Example:
+ "B" is German "U+00DF LATIN SMALL LETTER SHARP S"
+ When we have these values in a
+ CHAR(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_nopad_ci
+ column:
+ 'B ' (one character, two trailing spaces)
+ 'ss ' (two characters, one trailing space)
+ The 'B ' is greater than the 'ss '.
+ They are compared in the following steps:
+ 1. 'B' == 'ss'
+ 2. ' ' == ' '
+ 3. ' ' > ''
+
+ We need to emulate the same behavior in this function even if
+ it's called with strings 'B' and 'ss' (with space trimmed).
+ The side which has more remaining virtual spaces at the end
+ is greater.
+ */
+ if (s_nchars_left < t_nchars_left)
+ return -1;
+ if (s_nchars_left > t_nchars_left)
+ return +1;
+ }
+ return 0;
+ }
+
+ DBUG_ASSERT(s_nchars_left >= s_res.nchars);
+ DBUG_ASSERT(t_nchars_left >= t_res.nchars);
+ s_nchars_left-= s_res.nchars;
+ t_nchars_left-= t_res.nchars;
+ }
+
+ return 0;
+}
+
+
+/*
+ One-level collations.
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ size_t nchars)
+{
+ return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0],
+ s, slen, t, tlen,
+ nchars);
+}
+
+
+/*
+ Multi-level collations.
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ size_t nchars)
+{
+ uint num_level= cs->levels_for_order;
+ uint i;
+ for (i= 0; i != num_level; i++)
+ {
+ int ret= MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs,
+ &cs->uca->level[i],
+ s, slen,
+ t, tlen,
+ nchars);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
/*
Calculates hash value for the given string,
@@ -752,6 +820,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
MY_UCA_COLL_INIT,
MY_FUNCTION_NAME(strnncoll),
MY_FUNCTION_NAME(strnncollsp),
+ MY_FUNCTION_NAME(strnncollsp_nchars),
MY_FUNCTION_NAME(strnxfrm),
my_strnxfrmlen_any_uca,
MY_LIKE_RANGE,
@@ -773,6 +842,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
MY_UCA_COLL_INIT,
MY_FUNCTION_NAME(strnncoll),
MY_FUNCTION_NAME(strnncollsp_nopad),
+ MY_FUNCTION_NAME(strnncollsp_nchars),
MY_FUNCTION_NAME(strnxfrm_nopad),
my_strnxfrmlen_any_uca,
MY_LIKE_RANGE, /* my_like_range_mb or my_like_range_generic */
@@ -792,6 +862,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
MY_UCA_COLL_INIT,
MY_FUNCTION_NAME(strnncoll_multilevel),
MY_FUNCTION_NAME(strnncollsp_multilevel),
+ MY_FUNCTION_NAME(strnncollsp_nchars_multilevel),
MY_FUNCTION_NAME(strnxfrm_multilevel),
my_strnxfrmlen_any_uca_multilevel,
MY_LIKE_RANGE,
@@ -811,6 +882,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
MY_UCA_COLL_INIT,
MY_FUNCTION_NAME(strnncoll_multilevel),
MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
+ MY_FUNCTION_NAME(strnncollsp_nchars_multilevel),
MY_FUNCTION_NAME(strnxfrm_multilevel),
my_strnxfrmlen_any_uca_multilevel,
MY_LIKE_RANGE,
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 0c153793e8e..36ab6f5c0b1 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1505,6 +1505,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
NULL, /* init */
my_strnncoll_utf16_general_ci,
my_strnncollsp_utf16_general_ci,
+ my_strnncollsp_nchars_utf16_general_ci,
my_strnxfrm_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -1521,6 +1522,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
NULL, /* init */
my_strnncoll_utf16_bin,
my_strnncollsp_utf16_bin,
+ my_strnncollsp_nchars_utf16_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@@ -1537,6 +1539,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf16_general_ci,
my_strnncollsp_utf16_general_nopad_ci,
+ my_strnncollsp_nchars_utf16_general_nopad_ci,
my_strnxfrm_nopad_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -1553,6 +1556,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf16_bin,
my_strnncollsp_utf16_nopad_bin,
+ my_strnncollsp_nchars_utf16_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@@ -1845,6 +1849,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
NULL, /* init */
my_strnncoll_utf16le_general_ci,
my_strnncollsp_utf16le_general_ci,
+ my_strnncollsp_nchars_utf16le_general_ci,
my_strnxfrm_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -1861,6 +1866,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
NULL, /* init */
my_strnncoll_utf16le_bin,
my_strnncollsp_utf16le_bin,
+ my_strnncollsp_nchars_utf16le_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@@ -1877,6 +1883,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf16le_general_ci,
my_strnncollsp_utf16le_general_nopad_ci,
+ my_strnncollsp_nchars_utf16le_general_nopad_ci,
my_strnxfrm_nopad_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -1893,6 +1900,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf16le_bin,
my_strnncollsp_utf16le_nopad_bin,
+ my_strnncollsp_nchars_utf16le_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@@ -2671,6 +2679,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
NULL, /* init */
my_strnncoll_utf32_general_ci,
my_strnncollsp_utf32_general_ci,
+ my_strnncollsp_nchars_utf32_general_ci,
my_strnxfrm_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -2687,6 +2696,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
NULL, /* init */
my_strnncoll_utf32_bin,
my_strnncollsp_utf32_bin,
+ my_strnncollsp_nchars_utf32_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@@ -2703,6 +2713,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf32_general_ci,
my_strnncollsp_utf32_general_nopad_ci,
+ my_strnncollsp_nchars_utf32_general_nopad_ci,
my_strnxfrm_nopad_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -2719,6 +2730,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf32_bin,
my_strnncollsp_utf32_nopad_bin,
+ my_strnncollsp_nchars_utf32_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@@ -3261,6 +3273,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
NULL, /* init */
my_strnncoll_ucs2_general_ci,
my_strnncollsp_ucs2_general_ci,
+ my_strnncollsp_nchars_ucs2_general_ci,
my_strnxfrm_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -3277,6 +3290,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_bin,
+ my_strnncollsp_nchars_ucs2_bin,
my_strnxfrm_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -3293,6 +3307,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_ucs2_general_ci,
my_strnncollsp_ucs2_general_nopad_ci,
+ my_strnncollsp_nchars_ucs2_general_nopad_ci,
my_strnxfrm_nopad_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@@ -3309,6 +3324,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_nopad_bin,
+ my_strnncollsp_nchars_ucs2_nopad_bin,
my_strnxfrm_nopad_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 949f3aadc36..34600eda1a5 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -67239,6 +67239,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_ci_handler =
NULL, /* init */
my_strnncoll_ujis_japanese_ci,
my_strnncollsp_ujis_japanese_ci,
+ my_strnncollsp_nchars_ujis_japanese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@@ -67255,6 +67256,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
NULL, /* init */
my_strnncoll_ujis_bin,
my_strnncollsp_ujis_bin,
+ my_strnncollsp_nchars_ujis_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -67271,6 +67273,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_nopad_ci_handler =
NULL, /* init */
my_strnncoll_ujis_japanese_ci,
my_strnncollsp_ujis_japanese_nopad_ci,
+ my_strnncollsp_nchars_ujis_japanese_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@@ -67287,6 +67290,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_nopad_bin_handler =
NULL, /* init */
my_strnncoll_ujis_bin,
my_strnncollsp_ujis_nopad_bin,
+ my_strnncollsp_nchars_ujis_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index e579d7b2bc6..7a87dbb7c05 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5357,6 +5357,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_ci,
my_strnncollsp_utf8_general_ci,
+ my_strnncollsp_nchars_utf8_general_ci,
my_strnxfrm_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@@ -5373,6 +5374,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_mysql500_ci,
my_strnncollsp_utf8_general_mysql500_ci,
+ my_strnncollsp_nchars_utf8_general_mysql500_ci,
my_strnxfrm_utf8_general_mysql500_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@@ -5389,6 +5391,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
NULL, /* init */
my_strnncoll_utf8_bin,
my_strnncollsp_utf8_bin,
+ my_strnncollsp_nchars_utf8_bin,
my_strnxfrm_utf8_bin,
my_strnxfrmlen_unicode,
my_like_range_mb,
@@ -5405,6 +5408,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_ci,
my_strnncollsp_utf8_general_nopad_ci,
+ my_strnncollsp_nchars_utf8_general_nopad_ci,
my_strnxfrm_nopad_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@@ -5421,6 +5425,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf8_bin,
my_strnncollsp_utf8_nopad_bin,
+ my_strnncollsp_nchars_utf8_nopad_bin,
my_strnxfrm_nopad_utf8_bin,
my_strnxfrmlen_unicode,
my_like_range_mb,
@@ -5750,6 +5755,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler =
NULL, /* init */
my_strnncoll_utf8_cs,
my_strnncollsp_utf8_cs,
+ my_strnncollsp_nchars_generic,
my_strnxfrm_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_simple,
@@ -7058,6 +7064,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
NULL, /* init */
my_strnncoll_simple,
my_strnncollsp_simple,
+ my_strnncollsp_nchars_generic,
my_strnxfrm_filename,
my_strnxfrmlen_unicode,
my_like_range_mb,
@@ -7697,6 +7704,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
NULL, /* init */
my_strnncoll_utf8mb4_general_ci,
my_strnncollsp_utf8mb4_general_ci,
+ my_strnncollsp_nchars_utf8mb4_general_ci,
my_strnxfrm_utf8mb4_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@@ -7713,6 +7721,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler =
NULL, /* init */
my_strnncoll_utf8mb4_bin,
my_strnncollsp_utf8mb4_bin,
+ my_strnncollsp_nchars_utf8mb4_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_mb,
@@ -7729,6 +7738,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler=
NULL, /* init */
my_strnncoll_utf8mb4_general_ci,
my_strnncollsp_utf8mb4_general_nopad_ci,
+ my_strnncollsp_nchars_utf8mb4_general_nopad_ci,
my_strnxfrm_nopad_utf8mb4_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@@ -7745,6 +7755,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf8mb4_bin,
my_strnncollsp_utf8mb4_nopad_bin,
+ my_strnncollsp_nchars_utf8mb4_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_mb,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index f33a83294d6..15fa6299e4e 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -674,6 +674,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
NULL, /* init */
my_strnncoll_win1250ch,
my_strnncollsp_win1250ch,
+ my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_win1250ch,
my_strnxfrmlen_simple,
my_like_range_win1250ch,
diff --git a/strings/ctype.c b/strings/ctype.c
index 32c41e6e9e7..0cf1131ab57 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -1210,3 +1210,32 @@ outp:
copy_status->m_source_end_pos= from;
return to - to_start;
}
+
+
+int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
+ const uchar *str1, size_t len1,
+ const uchar *str2, size_t len2,
+ size_t nchars)
+{
+ int error;
+ len1= my_well_formed_length(cs, (const char *) str1,
+ (const char *) str1 + len1,
+ nchars, &error);
+ len2= my_well_formed_length(cs, (const char *) str2,
+ (const char *) str2 + len2,
+ nchars, &error);
+ DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0);
+ return cs->coll->strnncollsp(cs, str1, len1, str2, len2);
+}
+
+
+int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs,
+ const uchar *str1, size_t len1,
+ const uchar *str2, size_t len2,
+ size_t nchars)
+{
+ set_if_smaller(len1, nchars);
+ set_if_smaller(len2, nchars);
+ DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0);
+ return cs->coll->strnncollsp(cs, str1, len1, str2, len2);
+}
diff --git a/strings/strcoll.ic b/strings/strcoll.ic
index 86789fc4189..392a5dac589 100644
--- a/strings/strcoll.ic
+++ b/strings/strcoll.ic
@@ -287,6 +287,56 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
}
#endif
+
+/**
+ Compare two strings according to the collation,
+ with trailing space padding or trimming, according to "nchars".
+
+ @param cs - the character set and collation
+ @param a - the left string
+ @param a_length - the length of the left string
+ @param b - the right string
+ @param b_length - the length of the right string
+ @param nchars - compare this amount of characters only
+ @return - the comparison result
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, size_t a_length,
+ const uchar *b, size_t b_length,
+ size_t nchars)
+{
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ for ( ; nchars ; nchars--)
+ {
+ int a_weight, b_weight, res;
+ uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
+ uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
+
+ if ((res= (a_weight - b_weight)))
+ {
+ /* Got two different weights. See comments in strnncollsp above. */
+ return res;
+ }
+ if (!a_wlen && !b_wlen)
+ {
+ /* Got two auto-generated trailing spaces. */
+ DBUG_ASSERT(a == a_end);
+ DBUG_ASSERT(b == b_end);
+ return 0;
+ }
+ /*
+ At least one of the strings has not ended yet, continue comparison.
+ */
+ DBUG_ASSERT(a < a_end || b < b_end);
+ a+= a_wlen;
+ b+= b_wlen;
+ }
+ return 0;
+}
+
+
#endif /* DEFINE_STRNNCOLL */
diff --git a/strings/strings_def.h b/strings/strings_def.h
index b3727321e19..8bf089ec695 100644
--- a/strings/strings_def.h
+++ b/strings/strings_def.h
@@ -105,6 +105,16 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
}
+int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
+ const uchar *str1, size_t len1,
+ const uchar *str2, size_t len2,
+ size_t nchars);
+
+int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs,
+ const uchar *str1, size_t len1,
+ const uchar *str2, size_t len2,
+ size_t nchars);
+
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c
index 00d49971595..97b9eb1a95e 100644
--- a/unittest/strings/strings-t.c
+++ b/unittest/strings/strings-t.c
@@ -19,6 +19,30 @@
/*
+ U+00DF LATIN SMALL LETTER SHARP S = _utf8 x'C39F' = _latin1 x'DF'
+*/
+
+#define UTF8_sz "\xC3\x9F"
+#define LATIN1_sz "\xDF"
+
+/*
+ U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE = _utf8 x'C385'
+*/
+
+#define UTF8_ARING "\xC3\x85"
+
+/*
+ U+00E4 LATIN SMALL LETTER A WITH DIAERESIS = _utf8 x'C3A4'
+*/
+#define UTF8_auml "\xC3\xA4"
+#define LATIN1_auml "\xE4"
+
+#define UCS2_a "\x00\x61"
+#define UCS2_b "\x00\x62"
+#define UCS2_sp "\x00\x20"
+
+
+/*
Test that like_range() returns well-formed results.
*/
static int
@@ -758,11 +782,483 @@ test_strcollsp()
}
-int main()
+typedef struct
+{
+ LEX_CSTRING a;
+ LEX_CSTRING b;
+ size_t nchars;
+ int res;
+} STRNNCOLLSP_CHAR_PARAM;
+
+
+/*
+ Some lines in the below test data are marked as follows:
+
+ IF - An ignorable failure. The scanner finds an ignorable character
+ followed by a normal character (or by a contraction),
+ but the "nchars" limit allows only one character to be scanned.
+ The whole sequence is ignored an is treated as end-of-line.
+ CF - A contraction failure. The scanner finds a contraction consisting
+ of two characters, but the "nchars" limit allows only one character
+ to be scanned. The whole contraction is ignored and is treated
+ as end-of-line.
+*/
+
+
+/*
+ Tests for mbminlen1 character sets,
+ for both PAD SPACE and NOPAD collations
+*/
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen1_xpad_common[]=
+{
+ {{CSTR("a")}, {CSTR("a")}, 0, 0},
+ {{CSTR("a")}, {CSTR("a")}, 1, 0},
+ {{CSTR("a")}, {CSTR("a")}, 2, 0},
+ {{CSTR("a")}, {CSTR("a")}, 3, 0},
+ {{CSTR("a")}, {CSTR("a")}, 100, 0},
+
+ {{CSTR("a")}, {CSTR("ab")}, 0, 0},
+ {{CSTR("a")}, {CSTR("ab")}, 1, 0},
+ {{CSTR("a")}, {CSTR("ab")}, 2, -1},
+ {{CSTR("a")}, {CSTR("ab")}, 3, -1},
+ {{CSTR("a")}, {CSTR("ab")}, 100, -1},
+
+ {{CSTR("a")}, {CSTR("a ")}, 0, 0},
+ {{CSTR("a")}, {CSTR("a ")}, 1, 0},
+ {{CSTR("a")}, {CSTR("a ")}, 2, 0},
+ {{CSTR("a")}, {CSTR("a ")}, 3, 0},
+ {{CSTR("a")}, {CSTR("a ")}, 100, 0},
+
+ {{CSTR("a")}, {CSTR("a ")}, 0, 0},
+ {{CSTR("a")}, {CSTR("a ")}, 1, 0},
+ {{CSTR("a")}, {CSTR("a ")}, 2, 0},
+ {{CSTR("a")}, {CSTR("a ")}, 3, 0},
+ {{CSTR("a")}, {CSTR("a ")}, 100, 0},
+
+ {{CSTR("ss")}, {CSTR("ss")}, 0, 0},
+ {{CSTR("ss")}, {CSTR("ss")}, 1, 0},
+ {{CSTR("ss")}, {CSTR("ss")}, 2, 0},
+ {{CSTR("ss")}, {CSTR("ss")}, 3, 0},
+ {{CSTR("ss")}, {CSTR("ss")}, 100, 0},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+/* Tests for utf8, for both PAD SPACE and NOPAD collations */
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_xpad_common[]=
+{
+ {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 0, 0},
+ {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 1, 0},
+ {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 2, 0},
+ {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 3, 0},
+ {{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 100, 0},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+/* Tests for latin1, for both PAD and NOPAD collations */
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_latin1_xpad_common[]=
+{
+ {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 0, 0},
+ {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 1, 0},
+ {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 2, 0},
+ {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 3, 0},
+ {{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 100, 0},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+/* Tests for utf8 collations that sort "A WITH DIAERESIS" equal to "A" */
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_xpad_a_eq_auml[]=
+{
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 0, 0},
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 1, 0},
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 2, 0},
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 3, 0},
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 100, 0},
+
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 0, 0},
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 1, 0},
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 2, 0},
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 3, 0},
+ {{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 100, 0},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_ci[]=
+{
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 0, 0},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 1, 0},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}/*IF*/, 2, 1},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 3, 0},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 4, 0},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 100, 0},
+
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, 0},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, 0},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, 0},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, 0},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_nopad_ci[]=
+{
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 0, 0},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 1, 0},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}/*IF*/, 2, 1},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 3, 1},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 4, 1},
+ {{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 100, 1},
+
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, -1},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, -1},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, -1},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, -1},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_danish_ci[]=
+{
+ {{CSTR("aa")}, {CSTR("")}, 0, 0},
+ {{CSTR("aa")}/*CF*/, {CSTR("")}, 1, 1},
+ {{CSTR("aa")}, {CSTR("")}, 2, 1},
+ {{CSTR("aa")}, {CSTR("")}, 3, 1},
+ {{CSTR("aa")}, {CSTR("")}, 100, 1},
+
+ {{CSTR("aa")}, {CSTR("a")}, 0, 0},
+ {{CSTR("aa")}/*CF*/, {CSTR("a")}, 1, 0},
+ {{CSTR("aa")}, {CSTR("a")}, 2, 1},
+ {{CSTR("aa")}, {CSTR("a")}, 3, 1},
+ {{CSTR("aa")}, {CSTR("a")}, 100, 1},
+
+ {{CSTR("aa")}, {CSTR("aa")}, 0, 0},
+ {{CSTR("aa")}/*CF*/, {CSTR("aa")}/*CF*/, 1, 0},
+ {{CSTR("aa")}, {CSTR("aa")}, 2, 0},
+ {{CSTR("aa")}, {CSTR("aa")}, 3, 0},
+ {{CSTR("aa")}, {CSTR("aa")}, 100, 0},
+
+ {{CSTR("aa")}, {CSTR("\x00" "a")}, 0, 0},
+ {{CSTR("aa")}/*CF*/, {CSTR("\x00" "a")}/*IF*/, 1, 1},
+ {{CSTR("aa")}, {CSTR("\x00" "a")}, 2, 1},
+ {{CSTR("aa")}, {CSTR("\x00" "a")}, 3, 1},
+ {{CSTR("aa")}, {CSTR("\x00" "a")}, 100, 1},
+
+ {{CSTR("aa")}, {CSTR("\x00" "aa")}, 0, 0},
+ {{CSTR("aa")}/*CF*/, {CSTR("\x00" "aa")}/*IF*/, 1, 1},
+ {{CSTR("aa")}, {CSTR("\x00" "aa")}/*IF*/, 2, 1},
+ {{CSTR("aa")}, {CSTR("\x00" "aa")}, 3, 0},
+ {{CSTR("aa")}, {CSTR("\x00" "aa")}, 100, 0},
+
+ {{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 0, 0},
+ {{CSTR("aa")}/*CF*/, {CSTR("a" "\x00" "a")}, 1, 0},
+ {{CSTR("aa")}, {CSTR("a" "\x00" "a")}/*IF*/, 2, 1},
+ {{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 3, 1},
+ {{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 100, 1},
+
+ {{CSTR("aa")}, {CSTR(UTF8_ARING)}, 0, 0},
+ {{CSTR("aa")}/*CF*/, {CSTR(UTF8_ARING)}, 1, -1},
+ {{CSTR("aa")}, {CSTR(UTF8_ARING)}, 2, 0},
+ {{CSTR("aa")}, {CSTR(UTF8_ARING)}, 3, 0},
+ {{CSTR("aa")}, {CSTR(UTF8_ARING)}, 100, 0},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_latin1_german2_ci[]=
+{
+ {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 0, 0},
+ {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 1, -1},
+ {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 2, 0},
+ {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 3, 0},
+ {{CSTR("ss")}, {CSTR(LATIN1_sz)}, 100, 0},
+
+ {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 0, 0},
+ {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 1, -1},
+ {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 2, 0},
+ {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 3, 0},
+ {{CSTR("ae")}, {CSTR(LATIN1_auml)}, 100, 0},
+
+ {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 0, 0},
+ {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 1, -1},
+ {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 2, 0},
+ {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 3, 0},
+ {{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 100, 0},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_german2_ci[]=
+{
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, 0},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, 0},
+ {{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, 0},
+
+ {{CSTR("ae")}, {CSTR(UTF8_auml)}, 0, 0},
+ {{CSTR("ae")}, {CSTR(UTF8_auml)}, 1, -1},
+ {{CSTR("ae")}, {CSTR(UTF8_auml)}, 2, 0},
+ {{CSTR("ae")}, {CSTR(UTF8_auml)}, 3, 0},
+ {{CSTR("ae")}, {CSTR(UTF8_auml)}, 100, 0},
+
+ {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 0, 0},
+ {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 1, -1},
+ {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 2, 0},
+ {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 3, 0},
+ {{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 100, 0},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen1_xpad_czech[]=
+{
+ {{CSTR("c")}, {CSTR("ch")}, 0, 0},
+ {{CSTR("c")}, {CSTR("ch")}, 1, 0},
+ {{CSTR("c")}, {CSTR("ch")}, 2, -1},
+
+ {{CSTR("h")}, {CSTR("ch")}, 0, 0},
+ {{CSTR("h")}, {CSTR("ch")}, 1, 1},
+ {{CSTR("h")}, {CSTR("ch")}, 2, -1},
+
+ {{CSTR("i")}, {CSTR("ch")}, 0, 0},
+ {{CSTR("i")}, {CSTR("ch")}, 1, 1},
+ {{CSTR("i")}, {CSTR("ch")}, 2, 1},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen2_xpad_common[]=
+{
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 0, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 1, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 2, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 3, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 100, 0},
+
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 0, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 1, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 2, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 3, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 100, 0},
+
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 0, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 1, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 2, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 3, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 100, 0},
+
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 0, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 1, 0},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 2, -1},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 3, -1},
+ {{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 100, -1},
+
+ {{NULL, 0}, {NULL, 0}, 0, 0}
+};
+
+
+static int
+strnncollsp_char_one(CHARSET_INFO *cs, const STRNNCOLLSP_CHAR_PARAM *p)
+{
+ int failed= 0;
+ char ahex[64], bhex[64];
+ int res= cs->coll->strnncollsp_nchars(cs,
+ (uchar *) p->a.str, p->a.length,
+ (uchar *) p->b.str, p->b.length,
+ p->nchars);
+ str2hex(ahex, sizeof(ahex), p->a.str, p->a.length);
+ str2hex(bhex, sizeof(bhex), p->b.str, p->b.length);
+ diag("%-25s %-12s %-12s %3d %7d %7d%s",
+ cs->name, ahex, bhex, (int) p->nchars, p->res, res,
+ eqres(res, p->res) ? "" : " FAILED");
+ if (!eqres(res, p->res))
+ {
+ failed++;
+ }
+ else
+ {
+ /* Test in reverse order */
+ res= cs->coll->strnncollsp_nchars(cs,
+ (uchar *) p->b.str, p->b.length,
+ (uchar *) p->a.str, p->a.length,
+ p->nchars);
+ if (!eqres(res, -p->res))
+ {
+ diag("Comparison in reverse order failed. Expected %d, got %d",
+ -p->res, res);
+ failed++;
+ }
+ }
+ return failed;
+}
+
+
+static int
+strnncollsp_char(const char *collation, const STRNNCOLLSP_CHAR_PARAM *param)
+{
+ int failed= 0;
+ const STRNNCOLLSP_CHAR_PARAM *p;
+ CHARSET_INFO *cs= get_charset_by_name(collation, MYF(0));
+
+ if (!cs)
+ {
+ diag("get_charset_by_name() failed");
+ return 1;
+ }
+
+ diag("%-25s %-12s %-12s %-3s %7s %7s",
+ "Collation", "a", "b", "Nch", "ExpSign", "Actual");
+
+ for (p= param; p->a.str; p++)
+ {
+ failed+= strnncollsp_char_one(cs, p);
+ }
+
+ return failed;
+}
+
+
+static int
+strnncollsp_char_mbminlen1(const char *collation,
+ const STRNNCOLLSP_CHAR_PARAM *specific)
+{
+ int failed= 0;
+ failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
+ if (specific)
+ failed+= strnncollsp_char(collation, specific);
+ return failed;
+}
+
+
+static int
+strnncollsp_char_mbminlen2(const char *collation,
+ const STRNNCOLLSP_CHAR_PARAM *specific)
+{
+ int failed= 0;
+ failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen2_xpad_common);
+ if (specific)
+ failed+= strnncollsp_char(collation, specific);
+ return failed;
+}
+
+
+static int
+strnncollsp_char_latin1(const char *collation,
+ const STRNNCOLLSP_CHAR_PARAM *specific)
+{
+ int failed= 0;
+ failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
+ failed+= strnncollsp_char(collation, strnncollsp_char_latin1_xpad_common);
+ if (specific)
+ failed+= strnncollsp_char(collation, specific);
+ return failed;
+}
+
+
+static int
+strnncollsp_char_utf8mbx(const char *collation,
+ const STRNNCOLLSP_CHAR_PARAM *specific)
+{
+ int failed= 0;
+ failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
+ failed+= strnncollsp_char(collation, strnncollsp_char_utf8mbx_xpad_common);
+
+ if (!strstr(collation, "_bin") &&
+ !strstr(collation, "_german2") &&
+ !strstr(collation, "_danish"))
+ failed+= strnncollsp_char(collation,
+ strnncollsp_char_utf8mbx_xpad_a_eq_auml);
+ if (specific)
+ failed+= strnncollsp_char(collation, specific);
+ return failed;
+}
+
+
+static int
+test_strnncollsp_char()
+{
+ int failed= 0;
+ failed+= strnncollsp_char_latin1("latin1_swedish_ci", NULL);
+ failed+= strnncollsp_char_latin1("latin1_swedish_nopad_ci", NULL);
+ failed+= strnncollsp_char_latin1("latin1_bin", NULL);
+ failed+= strnncollsp_char_latin1("latin1_nopad_bin", NULL);
+ failed+= strnncollsp_char_latin1("latin1_german2_ci",
+ strnncollsp_char_latin1_german2_ci);
+
+#ifdef HAVE_CHARSET_cp1250
+ failed+= strnncollsp_char_mbminlen1("cp1250_czech_cs",
+ strnncollsp_char_mbminlen1_xpad_czech);
+#endif
+
+#ifdef HAVE_CHARSET_latin2
+ failed+= strnncollsp_char_mbminlen1("latin2_czech_cs",
+ strnncollsp_char_mbminlen1_xpad_czech);
+#endif
+
+#ifdef HAVE_CHARSET_tis620
+ failed+= strnncollsp_char_mbminlen1("tis620_thai_ci", NULL);
+#endif
+
+#ifdef HAVE_CHARSET_big5
+ failed+= strnncollsp_char_mbminlen1("big5_chinese_ci", NULL);
+ failed+= strnncollsp_char_mbminlen1("big5_chinese_nopad_ci", NULL);
+ failed+= strnncollsp_char_mbminlen1("big5_bin", NULL);
+ failed+= strnncollsp_char_mbminlen1("big5_nopad_bin", NULL);
+#endif
+
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_general_ci", NULL);
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_general_nopad_ci", NULL);
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_bin", NULL);
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_nopad_bin", NULL);
+
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_unicode_ci",
+ strnncollsp_char_utf8mb3_unicode_ci);
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_unicode_nopad_ci",
+ strnncollsp_char_utf8mb3_unicode_nopad_ci);
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_danish_ci",
+ strnncollsp_char_utf8mb3_danish_ci);
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_german2_ci",
+ strnncollsp_char_utf8mbx_german2_ci);
+ failed+= strnncollsp_char_utf8mbx("utf8mb3_czech_ci",
+ strnncollsp_char_mbminlen1_xpad_czech);
+
+#ifdef HAVE_CHARSET_ucs2
+ failed+= strnncollsp_char_mbminlen2("ucs2_general_ci", NULL);
+ failed+= strnncollsp_char_mbminlen2("ucs2_general_nopad_ci", NULL);
+ failed+= strnncollsp_char_mbminlen2("ucs2_bin", NULL);
+ failed+= strnncollsp_char_mbminlen2("ucs2_nopad_bin", NULL);
+ failed+= strnncollsp_char_mbminlen2("ucs2_unicode_ci", NULL);
+ failed+= strnncollsp_char_mbminlen2("ucs2_unicode_nopad_ci", NULL);
+#endif
+
+ return failed;
+}
+
+
+int main(int ac, char **av)
{
size_t i, failed= 0;
-
- plan(2);
+
+ MY_INIT(av[0]);
+
+ plan(3);
diag("Testing my_like_range_xxx() functions");
for (i= 0; i < array_elements(charset_list); i++)
@@ -780,5 +1276,11 @@ int main()
failed= test_strcollsp();
ok(failed == 0, "Testing cs->coll->strnncollsp()");
+ diag("Testing cs->coll->strnncollsp_char()");
+ failed= test_strnncollsp_char();
+ ok(failed == 0, "Testing cs->coll->strnncollsp_char()");
+
+ my_end(0);
+
return exit_status();
}