diff options
author | Alexander Barkov <bar@mariadb.com> | 2021-09-23 18:46:37 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2021-09-27 17:10:22 +0400 |
commit | 0d68b0a2d6e52cbbbd971cd66dab8989dd1e48ac (patch) | |
tree | e81085e05ea6db392ee488cfb809b9a44c4f30cc | |
parent | 76972163711f965402d51055f081ab51ae4a3bb7 (diff) | |
download | mariadb-git-0d68b0a2d6e52cbbbd971cd66dab8989dd1e48ac.tar.gz |
MDEV-26669 Add MY_COLLATION_HANDLER functions min_str() and max_str()bb-10.6-bar-MDEV-26669
-rw-r--r-- | include/m_ctype.h | 37 | ||||
-rw-r--r-- | mysys/charset-def.c | 4 | ||||
-rw-r--r-- | strings/ctype-big5.c | 17 | ||||
-rw-r--r-- | strings/ctype-bin.c | 13 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 17 | ||||
-rw-r--r-- | strings/ctype-czech.c | 13 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 17 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 17 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 17 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 17 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 5 | ||||
-rw-r--r-- | strings/ctype-mb.c | 41 | ||||
-rw-r--r-- | strings/ctype-mb.h | 37 | ||||
-rw-r--r-- | strings/ctype-simple.c | 38 | ||||
-rw-r--r-- | strings/ctype-simple.h | 37 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 17 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 9 | ||||
-rw-r--r-- | strings/ctype-uca.ic | 17 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 65 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 17 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 45 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 14 | ||||
-rw-r--r-- | unittest/strings/strings-t.c | 259 |
23 files changed, 683 insertions, 87 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 279e8a74ddc..41523913c10 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -377,6 +377,12 @@ struct my_collation_handler_st void (*hash_sort)(CHARSET_INFO *cs, const uchar *key, size_t len, ulong *nr1, ulong *nr2); my_bool (*propagate)(CHARSET_INFO *cs, const uchar *str, size_t len); + /* + Make minimum and maximum strings for the collation. + Put not more than "nchars" characters. + */ + size_t (*min_str)(CHARSET_INFO *cs, uchar *dst, size_t dstlen, size_t nchars); + size_t (*max_str)(CHARSET_INFO *cs, uchar *dst, size_t dstlen, size_t nchars); }; extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler; @@ -589,8 +595,23 @@ struct charset_info_st uchar casedn_multiply; uint mbminlen; uint mbmaxlen; + /* + min_sort_char and max_sort_char represent the minimum + and the maximum character in the collation respectively. + + For Unicode collations, these numbers are Unicode code points. + For non-Unicode collations these numbers are native character codes. + For example, in all 8bit collations these numbers are + in the range 0x00..0xFF. + + min_sort_char and max_sort_char normally should not be used directly. + They are used internally in the following virtual functions: + - MY_COLLATION_HANDLER::like_range() + - MY_COLLATION_HANDLER::min_str() + - MY_COLLATION_HANDLER::max_str() + */ my_wc_t min_sort_char; - my_wc_t max_sort_char; /* For LIKE optimization */ + my_wc_t max_sort_char; uchar pad_char; my_bool escape_with_backslash_is_dangerous; uchar levels_for_order; @@ -852,6 +873,16 @@ struct charset_info_st return (coll->propagate)(this, str, len); } + size_t min_str(uchar *dst, size_t dstlen, size_t nchars) const + { + return (coll->min_str)(this, dst, dstlen, nchars); + } + + size_t max_str(uchar *dst, size_t dstlen, size_t nchars) const + { + return (coll->max_str)(this, dst, dstlen, nchars); + } + #endif /* __cplusplus */ }; @@ -1110,7 +1141,7 @@ extern struct charset_info_st my_charset_big5_bin; extern struct charset_info_st my_charset_big5_chinese_ci; extern struct charset_info_st my_charset_big5_nopad_bin; extern struct charset_info_st my_charset_big5_chinese_nopad_ci; -extern struct charset_info_st my_charset_cp1250_czech_ci; +extern struct charset_info_st my_charset_cp1250_czech_cs; extern struct charset_info_st my_charset_cp932_bin; extern struct charset_info_st my_charset_cp932_japanese_ci; extern struct charset_info_st my_charset_cp932_nopad_bin; @@ -1134,7 +1165,7 @@ extern struct charset_info_st my_charset_gbk_chinese_nopad_ci; extern struct charset_info_st my_charset_latin1_bin; extern struct charset_info_st my_charset_latin1_nopad_bin; extern struct charset_info_st my_charset_latin1_german2_ci; -extern struct charset_info_st my_charset_latin2_czech_ci; +extern struct charset_info_st my_charset_latin2_czech_cs; extern struct charset_info_st my_charset_sjis_bin; extern struct charset_info_st my_charset_sjis_japanese_ci; extern struct charset_info_st my_charset_sjis_nopad_bin; diff --git a/mysys/charset-def.c b/mysys/charset-def.c index 737a1c88a47..2e23adc5147 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -202,7 +202,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) #endif #ifdef HAVE_CHARSET_cp1250 - add_compiled_collation(&my_charset_cp1250_czech_ci); + add_compiled_collation(&my_charset_cp1250_czech_cs); #endif #ifdef HAVE_CHARSET_cp932 @@ -213,7 +213,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) #endif #ifdef HAVE_CHARSET_latin2 - add_compiled_collation(&my_charset_latin2_czech_ci); + add_compiled_collation(&my_charset_latin2_czech_cs); #endif #ifdef HAVE_CHARSET_eucjpms diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 73e3d8ea306..989d6b5f1a8 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -29,6 +29,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef HAVE_CHARSET_big5 @@ -6721,7 +6722,9 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci= my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -6737,7 +6740,9 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -6753,7 +6758,9 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_nopad_ci= my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -6769,7 +6776,9 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_nopad_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 2902c2edda1..b142cab523a 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -22,6 +22,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-simple.h" const char charset_name_binary[]= "binary"; #define charset_name_binary_length (sizeof(charset_name_binary)-1) @@ -497,7 +498,9 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler = my_strcasecmp_bin, my_instr_bin, my_hash_sort_8bit_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple, + my_max_str_8bit_simple }; @@ -513,7 +516,9 @@ MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler = my_strcasecmp_bin, my_instr_bin, my_hash_sort_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple_nopad, + my_max_str_8bit_simple }; @@ -529,7 +534,9 @@ static MY_COLLATION_HANDLER my_collation_binary_handler = my_strcasecmp_bin, my_instr_bin, my_hash_sort_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple_nopad, + my_max_str_8bit_simple }; diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 859fd029744..e3c29ae1a32 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -19,6 +19,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef HAVE_CHARSET_cp932 @@ -34676,7 +34677,9 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci= my_strcasecmp_8bit, my_instr_mb, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -34692,7 +34695,9 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -34708,7 +34713,9 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_nopad_ci= my_strcasecmp_8bit, my_instr_mb, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -34724,7 +34731,9 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_nopad_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 40baae67dd3..ca331dc667f 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -69,6 +69,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-simple.h" #else @@ -605,7 +606,7 @@ static MY_UNI_IDX idx_uni_8859_2[]={ }; -static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = +static MY_COLLATION_HANDLER my_collation_latin2_czech_cs_handler = { NULL, /* init */ my_strnncoll_czech, @@ -617,10 +618,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = my_strcasecmp_8bit, my_instr_simple, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple, + my_max_str_8bit_simple }; -struct charset_info_st my_charset_latin2_czech_ci = +struct charset_info_st my_charset_latin2_czech_cs = { 2,0,0, /* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT| @@ -645,12 +648,12 @@ struct charset_info_st my_charset_latin2_czech_ci = 1, /* mbminlen */ 1, /* mbmaxlen */ 0, /* min_sort_char */ - 0, /* max_sort_char */ + 0xAE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 4, /* levels_for_order */ &my_charset_8bit_handler, - &my_collation_latin2_czech_ci_handler + &my_collation_latin2_czech_cs_handler }; diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index e152862c0ef..3c257acb460 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -28,6 +28,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef HAVE_CHARSET_euckr @@ -9966,7 +9967,9 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci= my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -9982,7 +9985,9 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -9998,7 +10003,9 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_nopad_ci= my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -10014,7 +10021,9 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_nopad_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index 4c365b4ff6a..ca790291a4a 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -30,6 +30,7 @@ ctype-ujis.c file. #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef HAVE_CHARSET_eucjpms @@ -67504,7 +67505,9 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_ci_handler = my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -67520,7 +67523,9 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler = my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -67536,7 +67541,9 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_nopad_ci_handler = my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -67552,7 +67559,9 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_nopad_bin_handler = my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 56709b06bf7..a8e42945259 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -26,6 +26,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef HAVE_CHARSET_gb2312 @@ -6372,7 +6373,9 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci= my_strcasecmp_mb, /* instr */ my_instr_mb, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -6388,7 +6391,9 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -6404,7 +6409,9 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_nopad_ci= my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -6420,7 +6427,9 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_nopad_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 95f59a18d99..c8aade2eb31 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -28,6 +28,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef HAVE_CHARSET_gbk @@ -10654,7 +10655,9 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci= my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -10670,7 +10673,9 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -10686,7 +10691,9 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_nopad_ci= my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -10702,7 +10709,9 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_nopad_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; static MY_CHARSET_HANDLER my_charset_handler= diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 2a0983ee16f..9412f212420 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -17,6 +17,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-simple.h" const char charset_name_latin1[]= "latin1"; #define charset_name_latin1_length sizeof(charset_name_latin1)-1 @@ -737,7 +738,9 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler= my_strcasecmp_8bit, my_instr_simple, my_hash_sort_latin1_de, - my_propagate_complex + my_propagate_complex, + my_min_str_8bit_simple, + my_max_str_8bit_simple }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index b84c91efe5f..66f82a1d09c 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -16,6 +16,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef USE_MB @@ -640,6 +641,46 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), } +static inline size_t +my_repeat_char_native(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars, + my_wc_t native_code) +{ + uchar *dst0= dst; + uchar *dstend= dst + dst_size; + int chlen= my_ci_native_to_mb(cs, native_code, dst, dstend); + if (chlen < 1 /* Not enough space */ || !nchars) + return 0; + for (dst+= chlen, nchars--; + dst + chlen <= dstend && nchars > 0; + dst+= chlen, nchars--) + memcpy(dst, dst0, chlen); + return dst - dst0; +} + + +size_t my_min_str_mb_simple(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars) +{ + return my_repeat_char_native(cs, dst, dst_size, nchars, cs->min_sort_char); +} + + +size_t my_min_str_mb_simple_nopad(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars) +{ + /* For NOPAD collations, the empty string is the smallest possible */ + return 0; +} + + +size_t my_max_str_mb_simple(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars) +{ + return my_repeat_char_native(cs, dst, dst_size, nchars, cs->max_sort_char); +} + + /* Fill the given buffer with 'maximum character' for given charset SYNOPSIS diff --git a/strings/ctype-mb.h b/strings/ctype-mb.h new file mode 100644 index 00000000000..b6f06e8df0d --- /dev/null +++ b/strings/ctype-mb.h @@ -0,0 +1,37 @@ +#ifndef CTYPE_MB_INCLUDED +#define CTYPE_MB_INCLUDED +/* Copyright (C) 2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* This file is to be include first in all files in the string directory */ + +#undef DBUG_ASSERT_AS_PRINTF +#include <my_global.h> /* Define standard vars */ +#include "m_string.h" /* Exernal definitions of string functions */ + + +size_t +my_min_str_mb_simple(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars); + +size_t +my_min_str_mb_simple_nopad(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars); + +size_t +my_max_str_mb_simple(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars); + +#endif /*CTYPE_MB_INCLUDED */ diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index f9471f35f79..a0ba4357d5e 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -16,6 +16,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-simple.h" #include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */ #include <errno.h> @@ -891,6 +892,35 @@ cnv: } +size_t my_min_str_8bit_simple(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, + size_t nchars) +{ + set_if_smaller(dst_size, nchars); + memset(dst, cs->min_sort_char, dst_size); + return dst_size; +} + + +size_t my_min_str_8bit_simple_nopad(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, + size_t nchars) +{ + /* For NOPAD collations, the empty string is always the smallest */ + return 0; +} + + +size_t my_max_str_8bit_simple(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, + size_t nchars) +{ + set_if_smaller(dst_size, nchars); + memset(dst, cs->max_sort_char, dst_size); + return dst_size; +} + + /* ** Compare string against string with wildcard ** 0 if matched @@ -2104,7 +2134,9 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = my_strcasecmp_8bit, my_instr_simple, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple, + my_max_str_8bit_simple }; @@ -2120,5 +2152,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler = my_strcasecmp_8bit, my_instr_simple, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple_nopad, + my_max_str_8bit_simple }; diff --git a/strings/ctype-simple.h b/strings/ctype-simple.h new file mode 100644 index 00000000000..321c107a61f --- /dev/null +++ b/strings/ctype-simple.h @@ -0,0 +1,37 @@ +#ifndef CTYPE_SIMPLE_INCLUDED +#define CTYPE_SIMPLE_INCLUDED +/* Copyright (C) 2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* This file is to be include first in all files in the string directory */ + +#undef DBUG_ASSERT_AS_PRINTF +#include <my_global.h> /* Define standard vars */ +#include "m_string.h" /* Exernal definitions of string functions */ + + +size_t +my_min_str_8bit_simple(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars); + +size_t +my_min_str_8bit_simple_nopad(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars); + +size_t +my_max_str_8bit_simple(CHARSET_INFO *cs, + uchar *dst, size_t dst_size, size_t nchars); + +#endif /*CTYPE_SIMPLE_INCLUDED */ diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index b6b212f2c0d..d26083202fc 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -19,6 +19,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef HAVE_CHARSET_sjis @@ -34064,7 +34065,9 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci= my_strcasecmp_8bit, my_instr_mb, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -34080,7 +34083,9 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -34096,7 +34101,9 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_nopad_ci= my_strcasecmp_8bit, my_instr_mb, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -34112,7 +34119,9 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_nopad_bin= my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 849f4897231..dac44295f3c 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -35,6 +35,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-simple.h" #include "t_ctype.h" #include <my_sys.h> @@ -862,7 +863,9 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_strcasecmp_8bit, my_instr_simple, /* QQ: To be fixed */ my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple, + my_max_str_8bit_simple }; static MY_COLLATION_HANDLER my_collation_nopad_ci_handler = @@ -877,7 +880,9 @@ static MY_COLLATION_HANDLER my_collation_nopad_ci_handler = my_strcasecmp_8bit, my_instr_simple, /* QQ: To be fixed */ my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple_nopad, + my_max_str_8bit_simple }; static MY_CHARSET_HANDLER my_charset_handler= diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic index e47f1e1fd82..cee12cf4d7b 100644 --- a/strings/ctype-uca.ic +++ b/strings/ctype-uca.ic @@ -15,6 +15,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "ctype-mb.h" #ifndef MY_FUNCTION_NAME #error MY_FUNCTION_NAME is not defined @@ -759,7 +760,9 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)= NULL, /* strcasecmp() */ my_instr_mb, MY_FUNCTION_NAME(hash_sort), - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -780,7 +783,9 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)= NULL, /* strcasecmp() */ my_instr_mb, MY_FUNCTION_NAME(hash_sort_nopad), - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -799,7 +804,9 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)= NULL, /* strcasecmp() */ my_instr_mb, MY_FUNCTION_NAME(hash_sort), - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -818,7 +825,9 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)= NULL, /* strcasecmp() */ my_instr_mb, MY_FUNCTION_NAME(hash_sort), - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index dbf0d86dc05..c230c65d3d6 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -20,6 +20,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #include <my_sys.h> #include <stdarg.h> @@ -1513,7 +1514,9 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -1529,7 +1532,9 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -1545,7 +1550,9 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -1561,7 +1568,9 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -1854,7 +1863,9 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -1870,7 +1881,9 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -1886,7 +1899,9 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -1902,7 +1917,9 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -2680,7 +2697,9 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -2696,7 +2715,9 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -2712,7 +2733,9 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -2728,7 +2751,9 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -3271,7 +3296,9 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -3287,7 +3314,9 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -3303,7 +3332,9 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -3319,7 +3350,9 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler = my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index fa5856ebc0d..c190496b364 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -29,6 +29,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifdef HAVE_CHARSET_ujis @@ -67248,7 +67249,9 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_ci_handler = my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -67264,7 +67267,9 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler = my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -67280,7 +67285,9 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_nopad_ci_handler = my_strcasecmp_mb, my_instr_mb, my_hash_sort_simple_nopad, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -67296,7 +67303,9 @@ static MY_COLLATION_HANDLER my_collation_ujis_nopad_bin_handler = my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index ee50d53829e..7fdc88352b0 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -21,6 +21,7 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-mb.h" #ifndef EILSEQ #define EILSEQ ENOENT @@ -5366,7 +5367,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_general_ci_handler = my_strcasecmp_utf8mb3, my_instr_mb, my_hash_sort_utf8mb3, - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -5382,7 +5385,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_general_mysql500_ci_handler = my_strcasecmp_utf8mb3, my_instr_mb, my_hash_sort_utf8mb3, - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -5398,7 +5403,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_bin_handler = my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -5414,7 +5421,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_general_nopad_ci_handler = my_strcasecmp_utf8mb3, my_instr_mb, my_hash_sort_utf8mb3_nopad, - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -5430,7 +5439,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb3_nopad_bin_handler = my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -5760,7 +5771,9 @@ static MY_COLLATION_HANDLER my_collation_cs_handler = my_strcasecmp_utf8mb3, my_instr_mb, my_hash_sort_utf8mb3, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; struct charset_info_st my_charset_utf8mb3_general_cs= @@ -7078,7 +7091,9 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = my_strcasecmp_utf8mb3, my_instr_mb, my_hash_sort_utf8mb3, - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple, + my_max_str_mb_simple }; static MY_CHARSET_HANDLER my_charset_filename_handler= @@ -7718,7 +7733,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler= my_strcasecmp_utf8mb4, my_instr_mb, my_hash_sort_utf8mb4, - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -7734,7 +7751,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler = my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple }; @@ -7750,7 +7769,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler= my_strcasecmp_utf8mb4, my_instr_mb, my_hash_sort_utf8mb4_nopad, - my_propagate_complex + my_propagate_complex, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; @@ -7766,7 +7787,9 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_nopad_bin_handler = my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_nopad_bin, - my_propagate_simple + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple }; diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index a53a9768fde..689b20b8936 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -42,6 +42,8 @@ #include "strings_def.h" #include <m_ctype.h> +#include "ctype-simple.h" + #else @@ -671,7 +673,7 @@ my_like_range_win1250ch(CHARSET_INFO *cs __attribute__((unused)), } -static MY_COLLATION_HANDLER my_collation_czech_ci_handler = +static MY_COLLATION_HANDLER my_collation_czech_cs_handler = { NULL, /* init */ my_strnncoll_win1250ch, @@ -683,11 +685,13 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler = my_strcasecmp_8bit, my_instr_simple, my_hash_sort_simple, - my_propagate_simple + my_propagate_simple, + my_min_str_8bit_simple, + my_max_str_8bit_simple }; -struct charset_info_st my_charset_cp1250_czech_ci = +struct charset_info_st my_charset_cp1250_czech_cs = { 34,0,0, /* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT| @@ -712,12 +716,12 @@ struct charset_info_st my_charset_cp1250_czech_ci = 1, /* mbminlen */ 1, /* mbmaxlen */ 0, /* min_sort_char */ - 0, /* max_sort_char */ + 0xFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 2, /* levels_for_order */ &my_charset_8bit_handler, - &my_collation_czech_ci_handler + &my_collation_czech_cs_handler }; diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index 39e48a33627..2141d09c949 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -760,11 +760,264 @@ test_strcollsp() } +typedef struct +{ + size_t size; + size_t nchars; + LEX_CSTRING min; + LEX_CSTRING max; +} MINMAX_PARAM; + + +static MINMAX_PARAM minmax_param_latin1_swedish_ci[]= +{ + {0, 0, {CSTR("")}, {CSTR("")}}, + {0, 1, {CSTR("")}, {CSTR("")}}, + {0, 2, {CSTR("")}, {CSTR("")}}, + {0, 3, {CSTR("")}, {CSTR("")}}, + {1, 0, {CSTR("")}, {CSTR("")}}, + {1, 1, {CSTR("\x00")}, {CSTR("\xFF")}}, + {1, 2, {CSTR("\x00")}, {CSTR("\xFF")}}, + {1, 3, {CSTR("\x00")}, {CSTR("\xFF")}}, + {2, 0, {CSTR("")}, {CSTR("")}}, + {2, 1, {CSTR("\x00")}, {CSTR("\xFF")}}, + {2, 2, {CSTR("\x00\x00")}, {CSTR("\xFF\xFF")}}, + {2, 3, {CSTR("\x00\x00")}, {CSTR("\xFF\xFF")}}, + {3, 0, {CSTR("")}, {CSTR("")}}, + {3, 1, {CSTR("\x00")}, {CSTR("\xFF")}}, + {3, 2, {CSTR("\x00\x00")}, {CSTR("\xFF\xFF")}}, + {3, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xFF\xFF\xFF")}} +}; + + +static MINMAX_PARAM minmax_param_latin1_nopad_bin[]= +{ + {0, 0, {CSTR("")}, {CSTR("")}}, + {0, 1, {CSTR("")}, {CSTR("")}}, + {0, 2, {CSTR("")}, {CSTR("")}}, + {0, 3, {CSTR("")}, {CSTR("")}}, + {1, 0, {CSTR("")}, {CSTR("")}}, + {1, 1, {CSTR("")}, {CSTR("\xFF")}}, + {1, 2, {CSTR("")}, {CSTR("\xFF")}}, + {1, 3, {CSTR("")}, {CSTR("\xFF")}}, + {2, 0, {CSTR("")}, {CSTR("")}}, + {2, 1, {CSTR("")}, {CSTR("\xFF")}}, + {2, 2, {CSTR("")}, {CSTR("\xFF\xFF")}}, + {2, 3, {CSTR("")}, {CSTR("\xFF\xFF")}}, + {3, 0, {CSTR("")}, {CSTR("")}}, + {3, 1, {CSTR("")}, {CSTR("\xFF")}}, + {3, 2, {CSTR("")}, {CSTR("\xFF\xFF")}}, + {3, 3, {CSTR("")}, {CSTR("\xFF\xFF\xFF")}} +}; + + +static MINMAX_PARAM minmax_param_utf8mb3_unicode_ci[]= +{ + {0, 0, {CSTR("")}, {CSTR("")}}, + {0, 1, {CSTR("")}, {CSTR("")}}, + {0, 2, {CSTR("")}, {CSTR("")}}, + {0, 3, {CSTR("")}, {CSTR("")}}, + {1, 0, {CSTR("")}, {CSTR("")}}, + {1, 1, {CSTR("\x09")}, {CSTR("")}}, + {1, 2, {CSTR("\x09")}, {CSTR("")}}, + {1, 3, {CSTR("\x09")}, {CSTR("")}}, + {2, 0, {CSTR("")}, {CSTR("")}}, + {2, 1, {CSTR("\x09")}, {CSTR("")}}, + {2, 2, {CSTR("\x09\x09")}, {CSTR("")}}, + {2, 3, {CSTR("\x09\x09")}, {CSTR("")}}, + {3, 0, {CSTR("")}, {CSTR("")}}, + {3, 1, {CSTR("\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {3, 2, {CSTR("\x09\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {3, 3, {CSTR("\x09\x09\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {4, 0, {CSTR("")}, {CSTR("")}}, + {4, 1, {CSTR("\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {4, 2, {CSTR("\x09\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {4, 3, {CSTR("\x09\x09\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {5, 0, {CSTR("")}, {CSTR("")}}, + {5, 1, {CSTR("\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {5, 2, {CSTR("\x09\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {5, 3, {CSTR("\x09\x09\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {6, 0, {CSTR("")}, {CSTR("")}}, + {6, 1, {CSTR("\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {6, 2, {CSTR("\x09\x09")}, {CSTR("\xEF\xBF\xBF\xEF\xBF\xBF")}}, + {6, 3, {CSTR("\x09\x09\x09")}, {CSTR("\xEF\xBF\xBF\xEF\xBF\xBF")}}, + {7, 0, {CSTR("")}, {CSTR("")}}, + {7, 1, {CSTR("\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {7, 2, {CSTR("\x09\x09")}, {CSTR("\xEF\xBF\xBF\xEF\xBF\xBF")}}, + {7, 3, {CSTR("\x09\x09\x09")}, {CSTR("\xEF\xBF\xBF\xEF\xBF\xBF")}}, + {8, 0, {CSTR("")}, {CSTR("")}}, + {8, 1, {CSTR("\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {8, 2, {CSTR("\x09\x09")}, {CSTR("\xEF\xBF\xBF\xEF\xBF\xBF")}}, + {8, 3, {CSTR("\x09\x09\x09")}, {CSTR("\xEF\xBF\xBF\xEF\xBF\xBF")}}, + {9, 0, {CSTR("")}, {CSTR("")}}, + {9, 1, {CSTR("\x09")}, {CSTR("\xEF\xBF\xBF")}}, + {9, 2, {CSTR("\x09\x09")}, {CSTR("\xEF\xBF\xBF\xEF\xBF\xBF")}}, + {9, 3, {CSTR("\x09\x09\x09")}, {CSTR("\xEF\xBF\xBF\xEF\xBF\xBF\xEF\xBF\xBF")}}, +}; + + +#ifdef HAVE_CHARSET_big5 +static MINMAX_PARAM minmax_param_big5_chinese_ci[]= +{ + {0, 0, {CSTR("")}, {CSTR("")}}, + {0, 1, {CSTR("")}, {CSTR("")}}, + {0, 2, {CSTR("")}, {CSTR("")}}, + {0, 3, {CSTR("")}, {CSTR("")}}, + {1, 0, {CSTR("")}, {CSTR("")}}, + {1, 1, {CSTR("\x00")}, {CSTR("")}}, + {1, 2, {CSTR("\x00")}, {CSTR("")}}, + {1, 3, {CSTR("\x00")}, {CSTR("")}}, + {2, 0, {CSTR("")}, {CSTR("")}}, + {2, 1, {CSTR("\x00")}, {CSTR("\xF9\xD5")}}, + {2, 2, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5")}}, + {2, 3, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5")}}, + {3, 0, {CSTR("")}, {CSTR("")}}, + {3, 1, {CSTR("\x00")}, {CSTR("\xF9\xD5")}}, + {3, 2, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5")}}, + {3, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xF9\xD5")}}, + {4, 0, {CSTR("")}, {CSTR("")}}, + {4, 1, {CSTR("\x00")}, {CSTR("\xF9\xD5")}}, + {4, 2, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5")}}, + {4, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5")}}, + {5, 0, {CSTR("")}, {CSTR("")}}, + {5, 1, {CSTR("\x00")}, {CSTR("\xF9\xD5")}}, + {5, 2, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5")}}, + {5, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5")}}, + {6, 0, {CSTR("")}, {CSTR("")}}, + {6, 1, {CSTR("\x00")}, {CSTR("\xF9\xD5")}}, + {6, 2, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5")}}, + {6, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5\xF9\xD5")}}, + {7, 0, {CSTR("")}, {CSTR("")}}, + {7, 1, {CSTR("\x00")}, {CSTR("\xF9\xD5")}}, + {7, 2, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5")}}, + {7, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5\xF9\xD5")}}, + {8, 0, {CSTR("")}, {CSTR("")}}, + {8, 1, {CSTR("\x00")}, {CSTR("\xF9\xD5")}}, + {8, 2, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5")}}, + {8, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5\xF9\xD5")}}, + {9, 0, {CSTR("")}, {CSTR("")}}, + {9, 1, {CSTR("\x00")}, {CSTR("\xF9\xD5")}}, + {9, 2, {CSTR("\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5")}}, + {9, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xF9\xD5\xF9\xD5\xF9\xD5")}}, +}; +#endif + +#ifdef HAVE_CHARSET_cp1250 +static MINMAX_PARAM minmax_param_cp1250_czech_cs[]= +{ + {0, 0, {CSTR("")}, {CSTR("")}}, + {0, 1, {CSTR("")}, {CSTR("")}}, + {0, 2, {CSTR("")}, {CSTR("")}}, + {0, 3, {CSTR("")}, {CSTR("")}}, + {1, 0, {CSTR("")}, {CSTR("")}}, + {1, 1, {CSTR("\x00")}, {CSTR("\xFF")}}, + {1, 2, {CSTR("\x00")}, {CSTR("\xFF")}}, + {1, 3, {CSTR("\x00")}, {CSTR("\xFF")}}, + {2, 0, {CSTR("")}, {CSTR("")}}, + {2, 1, {CSTR("\x00")}, {CSTR("\xFF")}}, + {2, 2, {CSTR("\x00\x00")}, {CSTR("\xFF\xFF")}}, + {2, 3, {CSTR("\x00\x00")}, {CSTR("\xFF\xFF")}}, + {3, 0, {CSTR("")}, {CSTR("")}}, + {3, 1, {CSTR("\x00")}, {CSTR("\xFF")}}, + {3, 2, {CSTR("\x00\x00")}, {CSTR("\xFF\xFF")}}, + {3, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xFF\xFF\xFF")}} +}; +#endif + + +#ifdef HAVE_CHARSET_latin2 +static MINMAX_PARAM minmax_param_latin2_czech_cs[]= +{ + {0, 0, {CSTR("")}, {CSTR("")}}, + {0, 1, {CSTR("")}, {CSTR("")}}, + {0, 2, {CSTR("")}, {CSTR("")}}, + {0, 3, {CSTR("")}, {CSTR("")}}, + {1, 0, {CSTR("")}, {CSTR("")}}, + {1, 1, {CSTR("\x00")}, {CSTR("\xAE")}}, + {1, 2, {CSTR("\x00")}, {CSTR("\xAE")}}, + {1, 3, {CSTR("\x00")}, {CSTR("\xAE")}}, + {2, 0, {CSTR("")}, {CSTR("")}}, + {2, 1, {CSTR("\x00")}, {CSTR("\xAE")}}, + {2, 2, {CSTR("\x00\x00")}, {CSTR("\xAE\xAE")}}, + {2, 3, {CSTR("\x00\x00")}, {CSTR("\xAE\xAE")}}, + {3, 0, {CSTR("")}, {CSTR("")}}, + {3, 1, {CSTR("\x00")}, {CSTR("\xAE")}}, + {3, 2, {CSTR("\x00\x00")}, {CSTR("\xAE\xAE")}}, + {3, 3, {CSTR("\x00\x00\x00")}, {CSTR("\xAE\xAE\xAE")}} +}; +#endif + + +static int test_minmax_str_one(CHARSET_INFO *cs, + const MINMAX_PARAM *params, size_t count) +{ + size_t i; + int failed_total= 0; + for (i= 0; i < count; i++) + { + int failed; + char min[32], hmin[64]; + char max[32], hmax[64]; + const MINMAX_PARAM *prm= ¶ms[i]; + size_t minlen= cs->coll->min_str(cs, (uchar *) min, prm->size, + prm->nchars); + size_t maxlen= cs->coll->max_str(cs, (uchar *) max, prm->size, + prm->nchars); + failed= minlen != prm->min.length || memcmp(min, prm->min.str, minlen) || + maxlen != prm->max.length || memcmp(max, prm->max.str, maxlen); + + str2hex(hmin, sizeof(hmin), min, minlen); + str2hex(hmax, sizeof(hmax), max, maxlen); + diag("%-32s %2d %2d %-10s %-10s%s", + cs->coll_name.str, (int) prm->size, (int) prm->nchars, hmin, hmax, + failed ? " FAILED" : ""); + if (failed) + { + str2hex(hmin, sizeof(hmin), prm->min.str, prm->min.length); + str2hex(hmax, sizeof(hmax), prm->max.str, prm->max.length); + diag("%-40s %-10s %-10s EXPECTED", cs->coll_name.str, hmin, hmax); + } + failed_total+= failed; + } + return failed_total; +} + + +static int test_minmax_str() +{ + int failed= 0; + failed+= test_minmax_str_one(&my_charset_latin1_nopad_bin, + minmax_param_latin1_nopad_bin, + array_elements(minmax_param_latin1_nopad_bin)); + failed+= test_minmax_str_one(&my_charset_latin1, + minmax_param_latin1_swedish_ci, + array_elements(minmax_param_latin1_swedish_ci)); + failed+= test_minmax_str_one(&my_charset_utf8mb3_unicode_ci, + minmax_param_utf8mb3_unicode_ci, + array_elements(minmax_param_utf8mb3_unicode_ci)); +#ifdef HAVE_CHARSET_big5 + failed+= test_minmax_str_one(&my_charset_big5_chinese_ci, + minmax_param_big5_chinese_ci, + array_elements(minmax_param_big5_chinese_ci)); +#endif +#ifdef HAVE_CHARSET_cp1250 + failed+= test_minmax_str_one(&my_charset_cp1250_czech_cs, + minmax_param_cp1250_czech_cs, + array_elements(minmax_param_cp1250_czech_cs)); +#endif +#ifdef HAVE_CHARSET_latin2 + failed+= test_minmax_str_one(&my_charset_latin2_czech_cs, + minmax_param_latin2_czech_cs, + array_elements(minmax_param_latin2_czech_cs)); +#endif + return failed; +} + int main() { size_t i, failed= 0; - plan(2); + plan(3); diag("Testing my_like_range_xxx() functions"); for (i= 0; i < array_elements(charset_list); i++) @@ -782,5 +1035,9 @@ int main() failed= test_strcollsp(); ok(failed == 0, "Testing my_ci_strnncollsp()"); + diag("Testing min_str() and max_str()"); + failed= test_minmax_str(); + ok(failed == 0, "Testing min_str() and max_str() functions"); + return exit_status(); } |