diff options
author | Oleksandr Byelkin <sanja@mariadb.com> | 2022-02-01 20:33:04 +0100 |
---|---|---|
committer | Oleksandr Byelkin <sanja@mariadb.com> | 2022-02-01 20:33:04 +0100 |
commit | cf63eecef44f189ce2d221612dee9dfc1885ba4e (patch) | |
tree | 93b4e6645a1d371bd8012a0aa8e6e3a3d541b2a6 /strings | |
parent | fb40a2fabf8d8cf765c83a0b8e609dd893c75ec3 (diff) | |
parent | c04a203a10e282e1f33fd04d8a1b7ff0b076bce5 (diff) | |
download | mariadb-git-cf63eecef44f189ce2d221612dee9dfc1885ba4e.tar.gz |
Merge branch '10.4' into 10.5
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 14 | ||||
-rw-r--r-- | strings/ctype-bin.c | 25 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 14 | ||||
-rw-r--r-- | strings/ctype-czech.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 14 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 14 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 14 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 14 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 1 | ||||
-rw-r--r-- | strings/ctype-mb.inl (renamed from strings/ctype-mb.ic) | 0 | ||||
-rw-r--r-- | strings/ctype-simple.c | 14 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 14 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 2 | ||||
-rw-r--r-- | strings/ctype-uca-scanner_next.inl | 179 | ||||
-rw-r--r-- | strings/ctype-uca.c | 82 | ||||
-rw-r--r-- | strings/ctype-uca.ic | 276 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 54 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 14 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 39 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 1 | ||||
-rw-r--r-- | strings/ctype.c | 29 | ||||
-rw-r--r-- | strings/strcoll.inl (renamed from strings/strcoll.ic) | 50 | ||||
-rw-r--r-- | strings/strings_def.h | 10 |
23 files changed, 673 insertions, 202 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index f5dd92f736e..b12da023604 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -52,7 +52,7 @@ const char charset_name_big5[]= "big5"; #define IS_MB1_CHAR(x) ((uchar) (x) < 0x80) #define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y)) #define DEFINE_ASIAN_ROUTINES -#include "ctype-mb.ic" +#include "ctype-mb.inl" static const uchar ctype_big5[257] = @@ -6683,13 +6683,13 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_MB2(x,y) (big5code(x, y)) #define WEIGHT_MB2_FRM(x,y) (big5strokexfrm((uint16) WEIGHT_MB2(x, y))) #define DEFINE_STRNXFRM -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _big5_bin #define WEIGHT_MB1(x) ((uchar) (x)) #define WEIGHT_MB2(x,y) (big5code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -6698,14 +6698,14 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_MB2(x,y) (big5code(x, y)) #define WEIGHT_MB2_FRM(x,y) (big5strokexfrm((uint16) WEIGHT_MB2(x, y))) #define DEFINE_STRNXFRM -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _big5_nopad_bin #define WEIGHT_MB1(x) ((uchar) (x)) #define WEIGHT_MB2(x,y) (big5code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci= @@ -6713,6 +6713,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci= NULL, /* init */ my_strnncoll_big5_chinese_ci, my_strnncollsp_big5_chinese_ci, + my_strnncollsp_nchars_big5_chinese_ci, my_strnxfrm_big5_chinese_ci, my_strnxfrmlen_simple, my_like_range_mb, @@ -6729,6 +6730,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin= NULL, /* init */ my_strnncoll_big5_bin, my_strnncollsp_big5_bin, + my_strnncollsp_nchars_big5_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -6745,6 +6747,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_nopad_ci= NULL, /* init */ my_strnncoll_big5_chinese_ci, my_strnncollsp_big5_chinese_nopad_ci, + my_strnncollsp_nchars_big5_chinese_nopad_ci, my_strnxfrm_big5_chinese_nopad_ci, my_strnxfrmlen_simple, my_like_range_mb, @@ -6761,6 +6764,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_nopad_bin= NULL, /* init */ my_strnncoll_big5_bin, my_strnncollsp_big5_nopad_bin, + my_strnncollsp_nchars_big5_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index c11be2e5926..a8420722f25 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -127,6 +127,17 @@ static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), } +static int my_strnncollsp_nchars_binary(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + size_t nchars) +{ + set_if_smaller(slen, nchars); + set_if_smaller(tlen, nchars); + return my_strnncoll_binary(cs, s, slen, t, tlen, 0); +} + + static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), const uchar *s, size_t slen, const uchar *t, size_t tlen, @@ -201,6 +212,17 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), } +static int my_strnncollsp_nchars_8bit_bin(CHARSET_INFO * cs, + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + size_t nchars) +{ + set_if_smaller(a_length, nchars); + set_if_smaller(b_length, nchars); + return my_strnncollsp_8bit_bin(cs, a, a_length, b, b_length); +} + + static int my_strnncollsp_8bit_nopad_bin(CHARSET_INFO * cs __attribute__((unused)), const uchar *a, size_t a_length, @@ -489,6 +511,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler = my_coll_init_8bit_bin, my_strnncoll_8bit_bin, my_strnncollsp_8bit_bin, + my_strnncollsp_nchars_8bit_bin, my_strnxfrm_8bit_bin, my_strnxfrmlen_simple, my_like_range_simple, @@ -505,6 +528,7 @@ MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler = my_coll_init_8bit_bin, my_strnncoll_8bit_bin, my_strnncollsp_8bit_nopad_bin, + my_strnncollsp_nchars_8bit_bin, my_strnxfrm_8bit_nopad_bin, my_strnxfrmlen_simple, my_like_range_simple, @@ -521,6 +545,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler = NULL, /* init */ my_strnncoll_binary, my_strnncollsp_binary, + my_strnncollsp_nchars_binary, my_strnxfrm_8bit_bin, my_strnxfrmlen_simple, my_like_range_simple, diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 28fea965bf3..5196a21ebeb 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -189,7 +189,7 @@ static const uchar sort_order_cp932[]= #define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || iscp932kata(x)) #define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y)) #define DEFINE_ASIAN_ROUTINES -#include "ctype-mb.ic" +#include "ctype-mb.inl" #define cp932code(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d)) @@ -34637,14 +34637,14 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_PAD_SPACE (256 * (int) ' ') #define WEIGHT_MB1(x) (256 * (int) sort_order_cp932[(uchar) (x)]) #define WEIGHT_MB2(x,y) (cp932code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _cp932_bin #define WEIGHT_PAD_SPACE (256 * (int) ' ') #define WEIGHT_MB1(x) (256 * (int) (uchar) (x)) #define WEIGHT_MB2(x,y) (cp932code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -34652,7 +34652,7 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_PAD_SPACE (256 * (int) ' ') #define WEIGHT_MB1(x) (256 * (int) sort_order_cp932[(uchar) (x)]) #define WEIGHT_MB2(x,y) (cp932code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -34660,7 +34660,7 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_PAD_SPACE (256 * (int) ' ') #define WEIGHT_MB1(x) (256 * (int) (uchar) (x)) #define WEIGHT_MB2(x,y) (cp932code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci= @@ -34668,6 +34668,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci= NULL, /* init */ my_strnncoll_cp932_japanese_ci, my_strnncollsp_cp932_japanese_ci, + my_strnncollsp_nchars_cp932_japanese_ci, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -34684,6 +34685,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin= NULL, /* init */ my_strnncoll_cp932_bin, my_strnncollsp_cp932_bin, + my_strnncollsp_nchars_cp932_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -34700,6 +34702,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_nopad_ci= NULL, /* init */ my_strnncoll_cp932_japanese_ci, my_strnncollsp_cp932_japanese_nopad_ci, + my_strnncollsp_nchars_cp932_japanese_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -34716,6 +34719,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_nopad_bin= NULL, /* init */ my_strnncoll_cp932_bin, my_strnncollsp_cp932_nopad_bin, + my_strnncollsp_nchars_cp932_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index b80fe1ae8ed..74ac4fc1494 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -610,6 +610,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = NULL, /* init */ my_strnncoll_czech, my_strnncollsp_czech, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_czech, my_strnxfrmlen_czech, my_like_range_czech, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 61c64dbcc18..54fae4de0c0 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -208,7 +208,7 @@ static const uchar sort_order_euc_kr[]= #define IS_MB1_CHAR(x) ((uchar) (x) < 0x80) #define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y)) #define DEFINE_ASIAN_ROUTINES -#include "ctype-mb.ic" +#include "ctype-mb.inl" static MY_UNICASE_CHARACTER cA3[256]= @@ -9930,27 +9930,27 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_korean_ci #define WEIGHT_MB1(x) (sort_order_euc_kr[(uchar) (x)]) #define WEIGHT_MB2(x,y) (euckrcode(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_bin #define WEIGHT_MB1(x) ((uchar) (x)) #define WEIGHT_MB2(x,y) (euckrcode(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_korean_nopad_ci #define WEIGHT_MB1(x) (sort_order_euc_kr[(uchar) (x)]) #define WEIGHT_MB2(x,y) (euckrcode(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_nopad_bin #define WEIGHT_MB1(x) ((uchar) (x)) #define WEIGHT_MB2(x,y) (euckrcode(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci= @@ -9958,6 +9958,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci= NULL, /* init */ my_strnncoll_euckr_korean_ci, my_strnncollsp_euckr_korean_ci, + my_strnncollsp_nchars_euckr_korean_ci, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -9974,6 +9975,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin= NULL, /* init */ my_strnncoll_euckr_bin, my_strnncollsp_euckr_bin, + my_strnncollsp_nchars_euckr_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -9990,6 +9992,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_nopad_ci= NULL, /* init */ my_strnncoll_euckr_korean_ci, my_strnncollsp_euckr_korean_nopad_ci, + my_strnncollsp_nchars_euckr_korean_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -10006,6 +10009,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_nopad_bin= NULL, /* init */ my_strnncoll_euckr_bin, my_strnncollsp_euckr_nopad_bin, + my_strnncollsp_nchars_euckr_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index 2c24bc5ffda..27e2d1cbae6 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -203,7 +203,7 @@ static const uchar sort_order_eucjpms[]= #define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z)) #define IS_MB_PREFIX2(x,y) (iseucjpms_ss3(x) && iseucjpms(y)) #define DEFINE_ASIAN_ROUTINES -#include "ctype-mb.ic" +#include "ctype-mb.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms_japanese_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) @@ -211,7 +211,7 @@ static const uchar sort_order_eucjpms[]= #define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ (((uint) (uchar) (y)) << 8)) #define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms_bin @@ -220,7 +220,7 @@ static const uchar sort_order_eucjpms[]= #define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ (((uint) (uchar) (y)) << 8)) #define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -230,7 +230,7 @@ static const uchar sort_order_eucjpms[]= #define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ (((uint) (uchar) (y)) << 8)) #define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -240,7 +240,7 @@ static const uchar sort_order_eucjpms[]= #define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ (((uint) (uchar) (y)) << 8)) #define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) -#include "strcoll.ic" +#include "strcoll.inl" /* Case info pages for JIS-X-0208 range */ @@ -67497,6 +67497,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_ci_handler = NULL, /* init */ my_strnncoll_eucjpms_japanese_ci, my_strnncollsp_eucjpms_japanese_ci, + my_strnncollsp_nchars_eucjpms_japanese_ci, my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -67513,6 +67514,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler = NULL, /* init */ my_strnncoll_eucjpms_bin, my_strnncollsp_eucjpms_bin, + my_strnncollsp_nchars_eucjpms_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -67529,6 +67531,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_nopad_ci_handler = NULL, /* init */ my_strnncoll_eucjpms_japanese_ci, my_strnncollsp_eucjpms_japanese_nopad_ci, + my_strnncollsp_nchars_eucjpms_japanese_nopad_ci, my_strnxfrm_mb_nopad, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -67545,6 +67548,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_nopad_bin_handler = NULL, /* init */ my_strnncoll_eucjpms_bin, my_strnncollsp_eucjpms_nopad_bin, + my_strnncollsp_nchars_eucjpms_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 91ee2504b65..a96931e0ba4 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -172,7 +172,7 @@ static const uchar sort_order_gb2312[]= #define IS_MB1_CHAR(x) ((uchar) (x) < 0x80) #define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y)) #define DEFINE_ASIAN_ROUTINES -#include "ctype-mb.ic" +#include "ctype-mb.inl" static MY_UNICASE_CHARACTER cA2[256]= @@ -6336,27 +6336,27 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_chinese_ci #define WEIGHT_MB1(x) (sort_order_gb2312[(uchar) (x)]) #define WEIGHT_MB2(x,y) (gb2312code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_bin #define WEIGHT_MB1(x) ((uchar) (x)) #define WEIGHT_MB2(x,y) (gb2312code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_chinese_nopad_ci #define WEIGHT_MB1(x) (sort_order_gb2312[(uchar) (x)]) #define WEIGHT_MB2(x,y) (gb2312code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_nopad_bin #define WEIGHT_MB1(x) ((uchar) (x)) #define WEIGHT_MB2(x,y) (gb2312code(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci= @@ -6364,6 +6364,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci= NULL, /* init */ my_strnncoll_gb2312_chinese_ci, my_strnncollsp_gb2312_chinese_ci, + my_strnncollsp_nchars_gb2312_chinese_ci, my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -6380,6 +6381,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin= NULL, /* init */ my_strnncoll_gb2312_bin, my_strnncollsp_gb2312_bin, + my_strnncollsp_nchars_gb2312_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -6396,6 +6398,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_nopad_ci= NULL, /* init */ my_strnncoll_gb2312_chinese_ci, my_strnncollsp_gb2312_chinese_nopad_ci, + my_strnncollsp_nchars_gb2312_chinese_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -6412,6 +6415,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_nopad_bin= NULL, /* init */ my_strnncoll_gb2312_bin, my_strnncollsp_gb2312_nopad_bin, + my_strnncollsp_nchars_gb2312_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 041fe318eed..865d3bbc5df 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -48,7 +48,7 @@ const char charset_name_gbk[]= "gbk"; #define IS_MB1_CHAR(x) ((uchar) (x) < 0x80) #define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y)) #define DEFINE_ASIAN_ROUTINES -#include "ctype-mb.ic" +#include "ctype-mb.inl" static const uchar ctype_gbk[257] = @@ -10617,13 +10617,13 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_MB1(x) (sort_order_gbk[(uchar) (x)]) #define WEIGHT_MB2(x,y) (gbksortorder(gbkcode(x,y))) #define DEFINE_STRNXFRM -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_bin #define WEIGHT_MB1(x) ((uchar) (x)) #define WEIGHT_MB2(x,y) (gbkcode(x,y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -10631,14 +10631,14 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_MB1(x) (sort_order_gbk[(uchar) (x)]) #define WEIGHT_MB2(x,y) (gbksortorder(gbkcode(x,y))) #define DEFINE_STRNXFRM -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_nopad_bin #define WEIGHT_MB1(x) ((uchar) (x)) #define WEIGHT_MB2(x,y) (gbkcode(x,y)) -#include "strcoll.ic" +#include "strcoll.inl" static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci= @@ -10646,6 +10646,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci= NULL, /* init */ my_strnncoll_gbk_chinese_ci, my_strnncollsp_gbk_chinese_ci, + my_strnncollsp_nchars_gbk_chinese_ci, my_strnxfrm_gbk_chinese_ci, my_strnxfrmlen_simple, my_like_range_mb, @@ -10662,6 +10663,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin= NULL, /* init */ my_strnncoll_gbk_bin, my_strnncollsp_gbk_bin, + my_strnncollsp_nchars_gbk_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -10678,6 +10680,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_nopad_ci= NULL, /* init */ my_strnncoll_gbk_chinese_ci, my_strnncollsp_gbk_chinese_nopad_ci, + my_strnncollsp_nchars_gbk_chinese_nopad_ci, my_strnxfrm_gbk_chinese_nopad_ci, my_strnxfrmlen_simple, my_like_range_mb, @@ -10694,6 +10697,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_nopad_bin= NULL, /* init */ my_strnncoll_gbk_bin, my_strnncollsp_gbk_nopad_bin, + my_strnncollsp_nchars_gbk_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 4753ca737a6..eeb00d29592 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -729,6 +729,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler= NULL, /* init */ my_strnncoll_latin1_de, my_strnncollsp_latin1_de, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_latin1_de, my_strnxfrmlen_simple, my_like_range_simple, diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.inl index 6cde31a34ad..6cde31a34ad 100644 --- a/strings/ctype-mb.ic +++ b/strings/ctype-mb.inl diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index f9471f35f79..220cecbabfe 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -208,6 +208,18 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length, } +static int +my_strnncollsp_nchars_simple(CHARSET_INFO * cs, + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + size_t nchars) +{ + set_if_smaller(a_length, nchars); + set_if_smaller(b_length, nchars); + return my_strnncollsp_simple(cs, a, a_length, b, b_length); +} + + int my_strnncollsp_simple_nopad(CHARSET_INFO * cs, const uchar *a, size_t a_length, const uchar *b, size_t b_length) @@ -2097,6 +2109,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = my_coll_init_simple, /* init */ my_strnncoll_simple, my_strnncollsp_simple, + my_strnncollsp_nchars_simple, my_strnxfrm_simple, my_strnxfrmlen_simple, my_like_range_simple, @@ -2113,6 +2126,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler = my_coll_init_simple, /* init */ my_strnncoll_simple, my_strnncollsp_simple_nopad, + my_strnncollsp_nchars_simple, my_strnxfrm_simple_nopad, my_strnxfrmlen_simple, my_like_range_simple, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 57458ca3a48..7ecff065883 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -190,7 +190,7 @@ static const uchar sort_order_sjis[]= #define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || issjiskata(x)) #define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y)) #define DEFINE_ASIAN_ROUTINES -#include "ctype-mb.ic" +#include "ctype-mb.inl" #define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d)) @@ -34025,14 +34025,14 @@ my_wc_to_printable_sjis(CHARSET_INFO *cs, my_wc_t wc, #define WEIGHT_PAD_SPACE (256 * (int) ' ') #define WEIGHT_MB1(x) (256 * (int) sort_order_sjis[(uchar) (x)]) #define WEIGHT_MB2(x,y) (sjiscode(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_bin #define WEIGHT_PAD_SPACE (256 * (int) ' ') #define WEIGHT_MB1(x) (256 * (int) (uchar) (x)) #define WEIGHT_MB2(x,y) (sjiscode(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -34040,7 +34040,7 @@ my_wc_to_printable_sjis(CHARSET_INFO *cs, my_wc_t wc, #define WEIGHT_PAD_SPACE (256 * (int) ' ') #define WEIGHT_MB1(x) (256 * (int) sort_order_sjis[(uchar) (x)]) #define WEIGHT_MB2(x,y) (sjiscode(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -34048,7 +34048,7 @@ my_wc_to_printable_sjis(CHARSET_INFO *cs, my_wc_t wc, #define WEIGHT_PAD_SPACE (256 * (int) ' ') #define WEIGHT_MB1(x) (256 * (int) (uchar) (x)) #define WEIGHT_MB2(x,y) (sjiscode(x, y)) -#include "strcoll.ic" +#include "strcoll.inl" static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci= @@ -34056,6 +34056,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci= NULL, /* init */ my_strnncoll_sjis_japanese_ci, my_strnncollsp_sjis_japanese_ci, + my_strnncollsp_nchars_sjis_japanese_ci, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -34072,6 +34073,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin= NULL, /* init */ my_strnncoll_sjis_bin, my_strnncollsp_sjis_bin, + my_strnncollsp_nchars_sjis_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -34088,6 +34090,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_nopad_ci= NULL, /* init */ my_strnncoll_sjis_japanese_ci, my_strnncollsp_sjis_japanese_nopad_ci, + my_strnncollsp_nchars_sjis_japanese_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -34104,6 +34107,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_nopad_bin= NULL, /* init */ my_strnncoll_sjis_bin, my_strnncollsp_sjis_nopad_bin, + my_strnncollsp_nchars_sjis_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 25d900492e3..b6ac880dc9f 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -854,6 +854,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_tis620, my_strnncollsp_tis620, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_tis620, my_strnxfrmlen_simple, my_like_range_simple, @@ -869,6 +870,7 @@ static MY_COLLATION_HANDLER my_collation_nopad_ci_handler = NULL, /* init */ my_strnncoll_tis620, my_strnncollsp_tis620_nopad, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_tis620_nopad, my_strnxfrmlen_simple, my_like_range_simple, diff --git a/strings/ctype-uca-scanner_next.inl b/strings/ctype-uca-scanner_next.inl new file mode 100644 index 00000000000..79d25487b42 --- /dev/null +++ b/strings/ctype-uca-scanner_next.inl @@ -0,0 +1,179 @@ +/* Copyright (c) 2004, 2013, Oracle and/or its affiliates. + Copyright (c) 2009, 2021, MariaDB + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; version 2 + of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + MA 02110-1335 USA */ + + +#ifdef SCANNER_NEXT_NCHARS + +#define SCANNER_NEXT_RETURN(_w,_n) \ + do { weight_and_nchars_t rc= {_w, _n}; return rc; } while(0) + +#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \ + do { \ + weight_and_nchars_t rc= { _cnt->weight[0], \ + _ignorable_nchars + \ + my_contraction_char_length(_cnt) }; \ + return rc; \ + } while(0) + +#else + +#define SCANNER_NEXT_RETURN(_w,_n) do { return _w; } while (0) + +#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \ + do { return _cnt->weight[0]; } while(0) + +#endif + +static inline +#ifdef SCANNER_NEXT_NCHARS +weight_and_nchars_t +MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner, + size_t nchars) +#else +int +MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner) +#endif +{ +#ifdef SCANNER_NEXT_NCHARS + uint ignorable_nchars; +#define LOCAL_MAX_CONTRACTION_LENGTH nchars +#else +#define LOCAL_MAX_CONTRACTION_LENGTH MY_UCA_MAX_CONTRACTION +#endif + /* + Check if the weights for the previous character have been + already fully scanned. If yes, then get the next character and + initialize wbeg and wlength to its weight string. + */ + + if (scanner->wbeg[0]) + { + /* + More weights left from the previous step. + Return the next weight from the current expansion. + Return "0" as "nchars". The real nchars was set on a previous + iteration. + */ + SCANNER_NEXT_RETURN(*scanner->wbeg++, 0); + } + +#ifdef SCANNER_NEXT_NCHARS + for (ignorable_nchars= 0 ; ; ignorable_nchars++) +#else + for ( ; ; ) +#endif + { + const uint16 *wpage; + my_wc_t wc[MY_UCA_MAX_CONTRACTION]; + int mblen; + + /* Get next character */ +#if MY_UCA_ASCII_OPTIMIZE + /* Get next ASCII character */ + if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80) + { + wc[0]= scanner->sbeg[0]; + scanner->sbeg+= 1; + +#if MY_UCA_COMPILE_CONTRACTIONS + if (my_uca_needs_context_handling(scanner->level, wc[0])) + { + const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc, + LOCAL_MAX_CONTRACTION_LENGTH); + if (cnt) + SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars); + } +#endif + + scanner->page= 0; + scanner->code= (int) wc[0]; + scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0]; + if (scanner->wbeg[0]) + SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1); + continue; + } + else +#endif + /* Get next MB character */ + if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg, + scanner->send)) <= 0)) + { + if (scanner->sbeg >= scanner->send) + { + /* No more bytes, end of line reached */ + SCANNER_NEXT_RETURN(-1, ignorable_nchars); + } + /* + There are some more bytes left. Non-positive mb_len means that + we got an incomplete or a bad byte sequence. Consume mbminlen bytes. + */ + if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send) + { + /* For safety purposes don't go beyond the string range. */ + scanner->sbeg= scanner->send; + } + /* + Treat every complete or incomplete mbminlen unit as a weight which is + greater than weight for any possible normal character. + 0xFFFF is greater than any possible weight in the UCA weight table. + */ + SCANNER_NEXT_RETURN(0xFFFF, ignorable_nchars + 1); + } + + scanner->sbeg+= mblen; + if (wc[0] > scanner->level->maxchar) + { + /* Return 0xFFFD as weight for all characters outside BMP */ + scanner->wbeg= nochar; + SCANNER_NEXT_RETURN(0xFFFD, ignorable_nchars + 1); + } + +#if MY_UCA_COMPILE_CONTRACTIONS + if (my_uca_needs_context_handling(scanner->level, wc[0])) + { + const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc, + LOCAL_MAX_CONTRACTION_LENGTH); + if (cnt) + SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars); + } +#endif + + /* Process single character */ + scanner->page= wc[0] >> 8; + scanner->code= wc[0] & 0xFF; + + /* If weight page for w[0] does not exist, then calculate algoritmically */ + if (!(wpage= scanner->level->weights[scanner->page])) + SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner), + ignorable_nchars + 1); + + /* Calculate pointer to w[0]'s weight, using page and offset */ + scanner->wbeg= wpage + + scanner->code * scanner->level->lengths[scanner->page]; + if (scanner->wbeg[0]) + break; + /* Skip ignorable character and continue the loop */ + } + + SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1); +} + +#undef SCANNER_NEXT_NCHARS +#undef SCANNER_NEXT_RETURN +#undef SCANNER_NEXT_RETURN_CONTRACTION +#undef LOCAL_MAX_CONTRACTION_LENGTH diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 72f90ab79c6..e15109bb6ca 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -35,6 +35,12 @@ #include "strings_def.h" #include <m_ctype.h> +typedef struct +{ + int weight; + uint nchars; +} weight_and_nchars_t; + #define MY_CS_COMMON_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NON1TO1) #define MY_UCA_CNT_FLAG_SIZE 4096 @@ -31450,6 +31456,21 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len) } +/* + Return the number of characters in a contraction. +*/ +static inline uint my_contraction_char_length(const MY_CONTRACTION *cnt) +{ + uint i; + for (i= 2; i < array_elements(cnt->ch); i++) + { + if (cnt->ch[i] == 0) + return i; + } + return array_elements(cnt->ch); +} + + /** Check if a string is a contraction, and return its weight array on success. @@ -31460,11 +31481,11 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len) @return Weight array @retval NULL - Input string is not a known contraction - @retval ptr - contraction weight array + @retval ptr - the address of the MY_CONTRACTION found */ -static inline uint16 * -my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len) +static inline const MY_CONTRACTION * +my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len) { MY_CONTRACTION *c, *last; DBUG_ASSERT(len <= MY_UCA_MAX_CONTRACTION); @@ -31474,7 +31495,7 @@ my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len) if ((len >= MY_UCA_MAX_CONTRACTION || c->ch[len] == 0) && !c->with_context && !my_wmemcmp(c->ch, wc, len)) - return c->weight; + return c; } return NULL; } @@ -31487,16 +31508,18 @@ my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len) a contraction part. Then try to find real contraction among the candidates, starting from the longest. - @param scanner Pointer to UCA scanner - @param[OUT] *wc Where to store the scanned string + @param scanner Pointer to UCA scanner + @param[OUT] *wc Where to store the scanned string + @param max_char_length The longest contraction character length allowed @return Weight array @retval NULL - no contraction found - @retval ptr - contraction weight array + @retval ptr - the address of MY_CONTRACTION found */ -static uint16 * -my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc) +static const MY_CONTRACTION * +my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc, + size_t max_char_length) { size_t clen= 1; int flag; @@ -31505,7 +31528,7 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc) /* Scan all contraction candidates */ for (s= scanner->sbeg, flag= MY_UCA_CNT_MID1; - clen < MY_UCA_MAX_CONTRACTION; + clen < max_char_length; flag<<= 1) { int mblen; @@ -31520,15 +31543,15 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc) /* Find among candidates the longest real contraction */ for ( ; clen > 1; clen--) { - uint16 *cweight; + const MY_CONTRACTION *cnt; if (my_uca_can_be_contraction_tail(&scanner->level->contractions, wc[clen - 1]) && - (cweight= my_uca_contraction_weight(&scanner->level->contractions, - wc, clen))) + (cnt= my_uca_contraction_find(&scanner->level->contractions, + wc, clen))) { - scanner->wbeg= cweight + 1; + scanner->wbeg= cnt->weight + 1; scanner->sbeg= beg[clen - 1]; - return cweight; + return cnt; } } @@ -31546,10 +31569,10 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc) @return Weight array @retval NULL - no contraction with context found - @retval ptr - contraction weight array + @retval ptr - the address of MY_CONTRACTION found */ -static uint16 * +static const MY_CONTRACTION * my_uca_previous_context_find(my_uca_scanner *scanner, my_wc_t wc0, my_wc_t wc1) { @@ -31560,7 +31583,7 @@ my_uca_previous_context_find(my_uca_scanner *scanner, if (c->with_context && wc0 == c->ch[0] && wc1 == c->ch[1]) { scanner->wbeg= c->weight + 1; - return c->weight; + return c; } } return NULL; @@ -31581,13 +31604,16 @@ my_uca_previous_context_find(my_uca_scanner *scanner, If wc[0] and the previous character make a previous context pair, then wc[1] is set to the previous character. + @param max_char_length - the longest contraction character length allowed. + @retval NULL if could not find any contextual weights for wc[0] - @retval non null pointer to a zero-terminated weight string otherwise + @retval non null pointer - the address of MY_CONTRACTION found */ -static inline uint16 * -my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc) +static inline const MY_CONTRACTION * +my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc, + size_t max_char_length) { - uint16 *cweight; + const MY_CONTRACTION *cnt; DBUG_ASSERT(scanner->level->contractions.nitems); /* If we have scanned a character which can have previous context, @@ -31604,17 +31630,17 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc) my_uca_can_be_previous_context_head(&scanner->level->contractions, (wc[1]= ((scanner->page << 8) + scanner->code))) && - (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0]))) + (cnt= my_uca_previous_context_find(scanner, wc[1], wc[0]))) { scanner->page= scanner->code= 0; /* Clear for the next character */ - return cweight; + return cnt; } else if (my_uca_can_be_contraction_head(&scanner->level->contractions, wc[0])) { /* Check if w[0] starts a contraction */ - if ((cweight= my_uca_scanner_contraction_find(scanner, wc))) - return cweight; + if ((cnt= my_uca_scanner_contraction_find(scanner, wc, max_char_length))) + return cnt; } return NULL; } @@ -33212,9 +33238,11 @@ my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst, for (chlen= len; chlen > 1; chlen--) { + const MY_CONTRACTION *cnt; if (chlen <= MY_UCA_MAX_CONTRACTION && - (from= my_uca_contraction_weight(&dst->contractions, str, chlen))) + (cnt= my_uca_contraction_find(&dst->contractions, str, chlen))) { + from= cnt->weight; str+= chlen; len-= chlen; break; diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic index e47f1e1fd82..ddc397ce212 100644 --- a/strings/ctype-uca.ic +++ b/strings/ctype-uca.ic @@ -35,108 +35,9 @@ #error MY_UCA_COLL_INIT is not defined #endif - -static inline int -MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner) -{ - /* - Check if the weights for the previous character have been - already fully scanned. If yes, then get the next character and - initialize wbeg and wlength to its weight string. - */ - - if (scanner->wbeg[0]) /* More weights left from the previous step: */ - return *scanner->wbeg++; /* return the next weight from expansion */ - - do - { - const uint16 *wpage; - my_wc_t wc[MY_UCA_MAX_CONTRACTION]; - int mblen; - - /* Get next character */ -#if MY_UCA_ASCII_OPTIMIZE - /* Get next ASCII character */ - if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80) - { - wc[0]= scanner->sbeg[0]; - scanner->sbeg+= 1; - -#if MY_UCA_COMPILE_CONTRACTIONS - if (my_uca_needs_context_handling(scanner->level, wc[0])) - { - uint16 *cweight= my_uca_context_weight_find(scanner, wc); - if (cweight) - return *cweight; - } -#endif - - scanner->page= 0; - scanner->code= (int) wc[0]; - scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0]; - if (scanner->wbeg[0]) - return *scanner->wbeg++; - continue; - } - else -#endif - /* Get next MB character */ - if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg, - scanner->send)) <= 0)) - { - if (scanner->sbeg >= scanner->send) - return -1; /* No more bytes, end of line reached */ - /* - There are some more bytes left. Non-positive mb_len means that - we got an incomplete or a bad byte sequence. Consume mbminlen bytes. - */ - if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send) - { - /* For safety purposes don't go beyond the string range. */ - scanner->sbeg= scanner->send; - } - /* - Treat every complete or incomplete mbminlen unit as a weight which is - greater than weight for any possible normal character. - 0xFFFF is greater than any possible weight in the UCA weight table. - */ - return 0xFFFF; - } - - scanner->sbeg+= mblen; - if (wc[0] > scanner->level->maxchar) - { - /* Return 0xFFFD as weight for all characters outside BMP */ - scanner->wbeg= nochar; - return 0xFFFD; - } - -#if MY_UCA_COMPILE_CONTRACTIONS - if (my_uca_needs_context_handling(scanner->level, wc[0])) - { - uint16 *cweight= my_uca_context_weight_find(scanner, wc); - if (cweight) - return *cweight; - } -#endif - - /* Process single character */ - scanner->page= wc[0] >> 8; - scanner->code= wc[0] & 0xFF; - - /* If weight page for w[0] does not exist, then calculate algoritmically */ - if (!(wpage= scanner->level->weights[scanner->page])) - return my_uca_scanner_next_implicit(scanner); - - /* Calculate pointer to w[0]'s weight, using page and offset */ - scanner->wbeg= wpage + - scanner->code * scanner->level->lengths[scanner->page]; - } while (!scanner->wbeg[0]); /* Skip ignorable characters */ - - return *scanner->wbeg++; -} - - +#include "ctype-uca-scanner_next.inl" +#define SCANNER_NEXT_NCHARS +#include "ctype-uca-scanner_next.inl" /* Compares two strings according to the collation @@ -409,6 +310,173 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs, } +/* + Scan the next weight and perform space padding + or trimming according to "nchars". +*/ +static inline weight_and_nchars_t +MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner, + size_t nchars, + uint *generated) +{ + weight_and_nchars_t res; + if (nchars > 0 || + scanner->wbeg[0] /* Some weights from a previous expansion left */) + { + if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner, + nchars)).weight < 0) + { + /* + We reached the end of the string, but the caller wants more weights. + Perform space padding. + */ + res.weight= my_space_weight(scanner->level); + res.nchars= 1; + (*generated)++; + } + else if (res.nchars > nchars) + { + /* + We scanned the next collation element, but it does not fit into + the "nchars" limit. This is possible in case of: + - A contraction, e.g. Czech 'ch' with nchars=1 + - A sequence of ignorable characters followed by non-ignorable ones, + e.g. CONCAT(x'00','a') with nchars=1. + Perform trimming. + */ + res.weight= scanner->cs->state & MY_CS_NOPAD ? + 0 : my_space_weight(scanner->level); + res.nchars= (uint) nchars; + (*generated)++; + } + } + else + { + /* The caller wants nchars==0. Perform trimming. */ + res.weight= scanner->cs->state & MY_CS_NOPAD ? + 0 : my_space_weight(scanner->level); + res.nchars= 0; + (*generated)++; + } + return res; +} + + +static int +MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs, + const MY_UCA_WEIGHT_LEVEL *level, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + size_t nchars) +{ + my_uca_scanner sscanner; + my_uca_scanner tscanner; + size_t s_nchars_left= nchars; + size_t t_nchars_left= nchars; + + my_uca_scanner_init_any(&sscanner, cs, level, s, slen); + my_uca_scanner_init_any(&tscanner, cs, level, t, tlen); + + for ( ; ; ) + { + weight_and_nchars_t s_res; + weight_and_nchars_t t_res; + uint generated= 0; + int diff; + + s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left, + &generated); + t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left, + &generated); + if ((diff= (s_res.weight - t_res.weight))) + return diff; + + if (generated == 2) + { + if (cs->state & MY_CS_NOPAD) + { + /* + Both values are auto-generated. There's no real data any more. + We need to handle the remaining virtual trailing spaces. + The two strings still have s_nchars_left and t_nchars_left imaginary + trailing spaces at the end. If s_nchars_left != t_nchars_left, + the strings will be not equal in case of a NOPAD collation. + + Example: + "B" is German "U+00DF LATIN SMALL LETTER SHARP S" + When we have these values in a + CHAR(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_nopad_ci + column: + 'B ' (one character, two trailing spaces) + 'ss ' (two characters, one trailing space) + The 'B ' is greater than the 'ss '. + They are compared in the following steps: + 1. 'B' == 'ss' + 2. ' ' == ' ' + 3. ' ' > '' + + We need to emulate the same behavior in this function even if + it's called with strings 'B' and 'ss' (with space trimmed). + The side which has more remaining virtual spaces at the end + is greater. + */ + if (s_nchars_left < t_nchars_left) + return -1; + if (s_nchars_left > t_nchars_left) + return +1; + } + return 0; + } + + DBUG_ASSERT(s_nchars_left >= s_res.nchars); + DBUG_ASSERT(t_nchars_left >= t_res.nchars); + s_nchars_left-= s_res.nchars; + t_nchars_left-= t_res.nchars; + } + + return 0; +} + + +/* + One-level collations. +*/ +static int +MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + size_t nchars) +{ + return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0], + s, slen, t, tlen, + nchars); +} + + +/* + Multi-level collations. +*/ +static int +MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + size_t nchars) +{ + uint num_level= cs->levels_for_order; + uint i; + for (i= 0; i != num_level; i++) + { + int ret= MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, + &cs->uca->level[i], + s, slen, + t, tlen, + nchars); + if (ret) + return ret; + } + return 0; +} + /* Calculates hash value for the given string, @@ -752,6 +820,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)= MY_UCA_COLL_INIT, MY_FUNCTION_NAME(strnncoll), MY_FUNCTION_NAME(strnncollsp), + MY_FUNCTION_NAME(strnncollsp_nchars), MY_FUNCTION_NAME(strnxfrm), my_strnxfrmlen_any_uca, MY_LIKE_RANGE, @@ -773,6 +842,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)= MY_UCA_COLL_INIT, MY_FUNCTION_NAME(strnncoll), MY_FUNCTION_NAME(strnncollsp_nopad), + MY_FUNCTION_NAME(strnncollsp_nchars), MY_FUNCTION_NAME(strnxfrm_nopad), my_strnxfrmlen_any_uca, MY_LIKE_RANGE, /* my_like_range_mb or my_like_range_generic */ @@ -792,6 +862,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)= MY_UCA_COLL_INIT, MY_FUNCTION_NAME(strnncoll_multilevel), MY_FUNCTION_NAME(strnncollsp_multilevel), + MY_FUNCTION_NAME(strnncollsp_nchars_multilevel), MY_FUNCTION_NAME(strnxfrm_multilevel), my_strnxfrmlen_any_uca_multilevel, MY_LIKE_RANGE, @@ -811,6 +882,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)= MY_UCA_COLL_INIT, MY_FUNCTION_NAME(strnncoll_multilevel), MY_FUNCTION_NAME(strnncollsp_nopad_multilevel), + MY_FUNCTION_NAME(strnncollsp_nchars_multilevel), MY_FUNCTION_NAME(strnxfrm_multilevel), my_strnxfrmlen_any_uca_multilevel, MY_LIKE_RANGE, diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 166ec55b9b3..fd79a98e59a 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1210,27 +1210,27 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1) #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1) #define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1)) #define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_nopad_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1) #define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_nopad_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1)) #define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3)) -#include "strcoll.ic" +#include "strcoll.inl" #undef IS_MB2_CHAR #undef IS_MB4_CHAR @@ -1412,7 +1412,7 @@ my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end) #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16 #define CHARLEN(cs,str,end) my_charlen_utf16(cs,str,end) #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN -#include "ctype-mb.ic" +#include "ctype-mb.inl" #undef MY_FUNCTION_NAME #undef CHARLEN #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN @@ -1505,6 +1505,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = NULL, /* init */ my_strnncoll_utf16_general_ci, my_strnncollsp_utf16_general_ci, + my_strnncollsp_nchars_utf16_general_ci, my_strnxfrm_utf16_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1521,6 +1522,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = NULL, /* init */ my_strnncoll_utf16_bin, my_strnncollsp_utf16_bin, + my_strnncollsp_nchars_utf16_bin, my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -1537,6 +1539,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_utf16_general_ci, my_strnncollsp_utf16_general_nopad_ci, + my_strnncollsp_nchars_utf16_general_nopad_ci, my_strnxfrm_nopad_utf16_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1553,6 +1556,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf16_bin, my_strnncollsp_utf16_nopad_bin, + my_strnncollsp_nchars_utf16_nopad_bin, my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -1745,27 +1749,27 @@ struct charset_info_st my_charset_utf16_nopad_bin= #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0) #define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0)) #define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_nopad_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0) #define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_nopad_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0)) #define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2)) -#include "strcoll.ic" +#include "strcoll.inl" #undef IS_MB2_CHAR #undef IS_MB4_CHAR @@ -1846,6 +1850,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler = NULL, /* init */ my_strnncoll_utf16le_general_ci, my_strnncollsp_utf16le_general_ci, + my_strnncollsp_nchars_utf16le_general_ci, my_strnxfrm_utf16le_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1862,6 +1867,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = NULL, /* init */ my_strnncoll_utf16le_bin, my_strnncollsp_utf16le_bin, + my_strnncollsp_nchars_utf16le_bin, my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -1878,6 +1884,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_utf16le_general_ci, my_strnncollsp_utf16le_general_nopad_ci, + my_strnncollsp_nchars_utf16le_general_nopad_ci, my_strnxfrm_nopad_utf16le_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1894,6 +1901,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf16le_bin, my_strnncollsp_utf16le_nopad_bin, + my_strnncollsp_nchars_utf16le_nopad_bin, my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -2109,24 +2117,24 @@ static inline int my_weight_utf32_general_ci(uchar b0, uchar b1, #define UNICASE_PAGES my_unicase_default_pages #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_nopad_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_nopad_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3)) -#include "strcoll.ic" +#include "strcoll.inl" #undef IS_MB2_CHAR #undef IS_MB4_CHAR @@ -2296,7 +2304,7 @@ my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _utf32 #define CHARLEN(cs,str,end) my_charlen_utf32(cs,str,end) #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN -#include "ctype-mb.ic" +#include "ctype-mb.inl" #undef MY_FUNCTION_NAME #undef CHARLEN #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN @@ -2672,6 +2680,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = NULL, /* init */ my_strnncoll_utf32_general_ci, my_strnncollsp_utf32_general_ci, + my_strnncollsp_nchars_utf32_general_ci, my_strnxfrm_utf32_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -2688,6 +2697,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = NULL, /* init */ my_strnncoll_utf32_bin, my_strnncollsp_utf32_bin, + my_strnncollsp_nchars_utf32_bin, my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -2704,6 +2714,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_utf32_general_ci, my_strnncollsp_utf32_general_nopad_ci, + my_strnncollsp_nchars_utf32_general_nopad_ci, my_strnxfrm_nopad_utf32_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -2720,6 +2731,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf32_bin, my_strnncollsp_utf32_nopad_bin, + my_strnncollsp_nchars_utf32_nopad_bin, my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, @@ -2964,7 +2976,7 @@ static const uchar to_upper_ucs2[] = { }; -/* Definitions for strcoll.ic */ +/* Definitions for strcoll.inl */ #define IS_MB2_CHAR(x,y) (1) #define UCS2_CODE(b0,b1) (((uchar) b0) << 8 | ((uchar) b1)) @@ -2987,7 +2999,7 @@ static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1) #define UNICASE_PAGES my_unicase_default_pages #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin @@ -2996,21 +3008,21 @@ static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1) #define OPTIMIZE_ASCII 0 #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_nopad_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD #define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_nopad_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1) -#include "strcoll.ic" +#include "strcoll.inl" static int @@ -3263,6 +3275,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = NULL, /* init */ my_strnncoll_ucs2_general_ci, my_strnncollsp_ucs2_general_ci, + my_strnncollsp_nchars_ucs2_general_ci, my_strnxfrm_ucs2_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -3279,6 +3292,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = NULL, /* init */ my_strnncoll_ucs2_bin, my_strnncollsp_ucs2_bin, + my_strnncollsp_nchars_ucs2_bin, my_strnxfrm_ucs2_bin, my_strnxfrmlen_unicode, my_like_range_generic, @@ -3295,6 +3309,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_ucs2_general_ci, my_strnncollsp_ucs2_general_nopad_ci, + my_strnncollsp_nchars_ucs2_general_nopad_ci, my_strnxfrm_nopad_ucs2_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, @@ -3311,6 +3326,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler = NULL, /* init */ my_strnncoll_ucs2_bin, my_strnncollsp_ucs2_nopad_bin, + my_strnncollsp_nchars_ucs2_nopad_bin, my_strnxfrm_nopad_ucs2_bin, my_strnxfrmlen_unicode, my_like_range_generic, diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index b90b3ef3a6a..42e2a5cdd15 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -201,7 +201,7 @@ static const uchar sort_order_ujis[]= #define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z)) #define IS_MB_PREFIX2(x,y) (isujis_ss3(x) && isujis(y)) #define DEFINE_ASIAN_ROUTINES -#include "ctype-mb.ic" +#include "ctype-mb.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _ujis_japanese_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) @@ -209,7 +209,7 @@ static const uchar sort_order_ujis[]= #define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ (((uint) (uchar) (y)) << 8)) #define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _ujis_bin @@ -218,7 +218,7 @@ static const uchar sort_order_ujis[]= #define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ (((uint) (uchar) (y)) << 8)) #define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -228,7 +228,7 @@ static const uchar sort_order_ujis[]= #define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ (((uint) (uchar) (y)) << 8)) #define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -238,7 +238,7 @@ static const uchar sort_order_ujis[]= #define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ (((uint) (uchar) (y)) << 8)) #define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) -#include "strcoll.ic" +#include "strcoll.inl" static @@ -67240,6 +67240,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_ci_handler = NULL, /* init */ my_strnncoll_ujis_japanese_ci, my_strnncollsp_ujis_japanese_ci, + my_strnncollsp_nchars_ujis_japanese_ci, my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -67256,6 +67257,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler = NULL, /* init */ my_strnncoll_ujis_bin, my_strnncollsp_ujis_bin, + my_strnncollsp_nchars_ujis_bin, my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, @@ -67272,6 +67274,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_nopad_ci_handler = NULL, /* init */ my_strnncoll_ujis_japanese_ci, my_strnncollsp_ujis_japanese_nopad_ci, + my_strnncollsp_nchars_ujis_japanese_nopad_ci, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, @@ -67288,6 +67291,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_nopad_bin_handler = NULL, /* init */ my_strnncoll_ujis_bin, my_strnncollsp_ujis_nopad_bin, + my_strnncollsp_nchars_ujis_nopad_bin, my_strnxfrm_mb_nopad, my_strnxfrmlen_simple, my_like_range_mb, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index cdf89aa03ff..b119826b4cc 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -31,7 +31,7 @@ #include "ctype-unidata.h" -/* Definitions for strcoll.ic */ +/* Definitions for strcoll.inl */ #define IS_MB1_CHAR(x) ((uchar) (x) < 0x80) #define IS_MB1_MBHEAD_UNUSED_GAP(x) ((uchar) (x) < 0xC2) #define IS_MB2_CHAR(x,y) IS_UTF8MB2_STEP2(x,y) @@ -5213,7 +5213,7 @@ int my_charlen_utf8mb3(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3 #define CHARLEN(cs,str,end) my_charlen_utf8mb3(cs,str,end) #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN -#include "ctype-mb.ic" +#include "ctype-mb.inl" #undef MY_FUNCTION_NAME #undef CHARLEN #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN @@ -5254,7 +5254,7 @@ static inline int my_weight_mb3_utf8mb3_general_ci(uchar b0, uchar b1, uchar b2) #define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_ci(x) #define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) #define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -5263,7 +5263,7 @@ static inline int my_weight_mb3_utf8mb3_general_ci(uchar b0, uchar b1, uchar b2) #define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_ci(x) #define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) #define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) -#include "strcoll.ic" +#include "strcoll.inl" static inline int my_weight_mb1_utf8mb3_general_mysql500_ci(uchar b) @@ -5300,7 +5300,7 @@ my_weight_mb3_utf8mb3_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_mysql500_ci(x) #define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_mysql500_ci(x,y) #define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_mysql500_ci(x,y,z) -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_bin @@ -5311,7 +5311,7 @@ my_weight_mb3_utf8mb3_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #define WEIGHT_MB1(x) ((int) (uchar) (x)) #define WEIGHT_MB2(x,y) ((int) UTF8MB2_CODE(x,y)) #define WEIGHT_MB3(x,y,z) ((int) UTF8MB3_CODE(x,y,z)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -5320,7 +5320,7 @@ my_weight_mb3_utf8mb3_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #define WEIGHT_MB1(x) ((int) (uchar) (x)) #define WEIGHT_MB2(x,y) ((int) UTF8MB2_CODE(x,y)) #define WEIGHT_MB3(x,y,z) ((int) UTF8MB3_CODE(x,y,z)) -#include "strcoll.ic" +#include "strcoll.inl" /* TODO-10.2: join this with pad_max_char() in ctype-mb.c @@ -5359,6 +5359,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_general_ci_handler = NULL, /* init */ my_strnncoll_utf8mb3_general_ci, my_strnncollsp_utf8mb3_general_ci, + my_strnncollsp_nchars_utf8mb3_general_ci, my_strnxfrm_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5375,6 +5376,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_general_mysql500_ci_handler = NULL, /* init */ my_strnncoll_utf8mb3_general_mysql500_ci, my_strnncollsp_utf8mb3_general_mysql500_ci, + my_strnncollsp_nchars_utf8mb3_general_mysql500_ci, my_strnxfrm_utf8mb3_general_mysql500_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5391,6 +5393,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_bin_handler = NULL, /* init */ my_strnncoll_utf8mb3_bin, my_strnncollsp_utf8mb3_bin, + my_strnncollsp_nchars_utf8mb3_bin, my_strnxfrm_utf8mb3_bin, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5407,6 +5410,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_general_nopad_ci_handler = NULL, /* init */ my_strnncoll_utf8mb3_general_ci, my_strnncollsp_utf8mb3_general_nopad_ci, + my_strnncollsp_nchars_utf8mb3_general_nopad_ci, my_strnxfrm_nopad_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5423,6 +5427,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf8mb3_bin, my_strnncollsp_utf8mb3_nopad_bin, + my_strnncollsp_nchars_utf8mb3_nopad_bin, my_strnxfrm_nopad_utf8mb3_bin, my_strnxfrmlen_unicode, my_like_range_mb, @@ -5753,6 +5758,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler = NULL, /* init */ my_strnncoll_utf8mb3_cs, my_strnncollsp_utf8mb3_cs, + my_strnncollsp_nchars_generic, my_strnxfrm_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_simple, @@ -7042,7 +7048,7 @@ my_wc_to_printable_filename(CHARSET_INFO *cs, my_wc_t wc, #define MY_FUNCTION_NAME(x) my_ ## x ## _filename #define CHARLEN(cs,str,end) my_charlen_filename(cs,str,end) #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN -#include "ctype-mb.ic" +#include "ctype-mb.inl" #undef MY_FUNCTION_NAME #undef CHARLEN #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN @@ -7063,7 +7069,7 @@ my_wc_to_printable_filename(CHARSET_INFO *cs, my_wc_t wc, #define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) #define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) */ -#include "strcoll.ic" +#include "strcoll.inl" static MY_COLLATION_HANDLER my_collation_filename_handler = @@ -7071,6 +7077,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = NULL, /* init */ my_strnncoll_simple, my_strnncollsp_simple, + my_strnncollsp_nchars_generic, my_strnxfrm_filename, my_strnxfrmlen_unicode, my_like_range_mb, @@ -7646,7 +7653,7 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4 #define CHARLEN(cs,str,end) my_charlen_utf8mb4(cs,str,end) #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN -#include "ctype-mb.ic" +#include "ctype-mb.inl" #undef MY_FUNCTION_NAME #undef CHARLEN #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN @@ -7670,7 +7677,7 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), All non-BMP characters have the same weight. */ #define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER -#include "strcoll.ic" +#include "strcoll.inl" #define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_bin @@ -7679,7 +7686,7 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_MB2(b0,b1) ((int) UTF8MB2_CODE(b0,b1)) #define WEIGHT_MB3(b0,b1,b2) ((int) UTF8MB3_CODE(b0,b1,b2)) #define WEIGHT_MB4(b0,b1,b2,b3) ((int) UTF8MB4_CODE(b0,b1,b2,b3)) -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -7693,7 +7700,7 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), All non-BMP characters have the same weight. */ #define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER -#include "strcoll.ic" +#include "strcoll.inl" #define DEFINE_STRNNCOLLSP_NOPAD @@ -7703,7 +7710,7 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), #define WEIGHT_MB2(b0,b1) ((int) UTF8MB2_CODE(b0,b1)) #define WEIGHT_MB3(b0,b1,b2) ((int) UTF8MB3_CODE(b0,b1,b2)) #define WEIGHT_MB4(b0,b1,b2,b3) ((int) UTF8MB4_CODE(b0,b1,b2,b3)) -#include "strcoll.ic" +#include "strcoll.inl" static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler= @@ -7711,6 +7718,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler= NULL, /* init */ my_strnncoll_utf8mb4_general_ci, my_strnncollsp_utf8mb4_general_ci, + my_strnncollsp_nchars_utf8mb4_general_ci, my_strnxfrm_utf8mb4_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -7727,6 +7735,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler = NULL, /* init */ my_strnncoll_utf8mb4_bin, my_strnncollsp_utf8mb4_bin, + my_strnncollsp_nchars_utf8mb4_bin, my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_mb, @@ -7743,6 +7752,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler= NULL, /* init */ my_strnncoll_utf8mb4_general_ci, my_strnncollsp_utf8mb4_general_nopad_ci, + my_strnncollsp_nchars_utf8mb4_general_nopad_ci, my_strnxfrm_nopad_utf8mb4_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, @@ -7759,6 +7769,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_nopad_bin_handler = NULL, /* init */ my_strnncoll_utf8mb4_bin, my_strnncollsp_utf8mb4_nopad_bin, + my_strnncollsp_nchars_utf8mb4_nopad_bin, my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_mb, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index 643c7a2a583..4efdce5eaf7 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -675,6 +675,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler = NULL, /* init */ my_strnncoll_win1250ch, my_strnncollsp_win1250ch, + my_strnncollsp_nchars_generic_8bit, my_strnxfrm_win1250ch, my_strnxfrmlen_simple, my_like_range_win1250ch, diff --git a/strings/ctype.c b/strings/ctype.c index 46951c3ae1f..f18d5b61a37 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -1351,3 +1351,32 @@ outp: copy_status->m_source_end_pos= from; return to - to_start; } + + +int my_strnncollsp_nchars_generic(CHARSET_INFO *cs, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars) +{ + int error; + len1= my_well_formed_length(cs, (const char *) str1, + (const char *) str1 + len1, + nchars, &error); + len2= my_well_formed_length(cs, (const char *) str2, + (const char *) str2 + len2, + nchars, &error); + DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0); + return cs->coll->strnncollsp(cs, str1, len1, str2, len2); +} + + +int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars) +{ + set_if_smaller(len1, nchars); + set_if_smaller(len2, nchars); + DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0); + return cs->coll->strnncollsp(cs, str1, len1, str2, len2); +} diff --git a/strings/strcoll.ic b/strings/strcoll.inl index 6aca0d0c460..50849c06e7d 100644 --- a/strings/strcoll.ic +++ b/strings/strcoll.inl @@ -288,6 +288,56 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)), } #endif + +/** + Compare two strings according to the collation, + with trailing space padding or trimming, according to "nchars". + + @param cs - the character set and collation + @param a - the left string + @param a_length - the length of the left string + @param b - the right string + @param b_length - the length of the right string + @param nchars - compare this amount of characters only + @return - the comparison result +*/ +static int +MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + size_t nchars) +{ + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + for ( ; nchars ; nchars--) + { + int a_weight, b_weight, res; + uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end); + uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end); + + if ((res= (a_weight - b_weight))) + { + /* Got two different weights. See comments in strnncollsp above. */ + return res; + } + if (!a_wlen && !b_wlen) + { + /* Got two auto-generated trailing spaces. */ + DBUG_ASSERT(a == a_end); + DBUG_ASSERT(b == b_end); + return 0; + } + /* + At least one of the strings has not ended yet, continue comparison. + */ + DBUG_ASSERT(a < a_end || b < b_end); + a+= a_wlen; + b+= b_wlen; + } + return 0; +} + + #endif /* DEFINE_STRNNCOLL */ diff --git a/strings/strings_def.h b/strings/strings_def.h index 111942a9a1a..ba7601f5886 100644 --- a/strings/strings_def.h +++ b/strings/strings_def.h @@ -105,6 +105,16 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len) } +int my_strnncollsp_nchars_generic(CHARSET_INFO *cs, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars); + +int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs, + const uchar *str1, size_t len1, + const uchar *str2, size_t len2, + size_t nchars); + uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs); uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs); |