diff options
author | Alexander Barkov <bar@mariadb.org> | 2016-09-03 09:05:56 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2016-09-03 09:05:56 +0400 |
commit | 1ca595fbf7d186bbe9f2f9896869b316d6e9567a (patch) | |
tree | b6751102168660d7eaf923b001cb21d40975603b /strings | |
parent | addb38f4763faa0378cd369106372a0eb0f0ee75 (diff) | |
download | mariadb-git-1ca595fbf7d186bbe9f2f9896869b316d6e9567a.tar.gz |
LDML refactoring for "MDEV-9711 NO PAD collations"
- Moving detection of the MY_CS_CSSORT, MY_CS_PUREASCII, MY_CS_NONASCII
flags of loadable collations from add_collation() in mysys.c
to my_cset_init_8bit() and my_coll_init_simple() in ctype-simple.c.
- Adding tests that these flags are set properly for loadable collations
- Moving LDML test related *.xml files from mysql-test/std_data/
to mysql-test/std_data/ldml/, as there will be more *.xml test files
Diffstat (limited to 'strings')
-rw-r--r-- | strings/conf_to_src.c | 16 | ||||
-rw-r--r-- | strings/ctype-simple.c | 72 | ||||
-rw-r--r-- | strings/ctype.c | 42 | ||||
-rw-r--r-- | strings/strings_def.h | 5 |
4 files changed, 82 insertions, 53 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index 5b9793f388d..31093fe4230 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -193,25 +193,19 @@ static int my_read_charset_file(const char *filename) return FALSE; } -static int -is_case_sensitive(CHARSET_INFO *cs) -{ - return (cs->sort_order && - cs->sort_order['A'] < cs->sort_order['a'] && - cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0; -} - void dispcset(FILE *f,CHARSET_INFO *cs) { + uint flags= my_8bit_charset_flags_from_data(cs) | + my_8bit_collation_flags_from_data(cs); fprintf(f,"{\n"); fprintf(f," %d,%d,%d,\n",cs->number,0,0); fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n", cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", - is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "", - my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "", - !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": ""); + flags & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "", + flags & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "", + flags & MY_CS_NONASCII ? "|MY_CS_NONASCII" : ""); if (cs->name) { diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index f405c4f327b..e6cea06b17b 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1340,9 +1340,80 @@ create_fromuni(struct charset_info_st *cs, return FALSE; } + +/* + Detect if a character set is 8bit, + and it is pure ascii, i.e. doesn't have + characters outside U+0000..U+007F + This functions is shared between "conf_to_src" + and dynamic charsets loader in "mysqld". +*/ +static my_bool +my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) +{ + size_t code; + if (!cs->tab_to_uni) + return 0; + for (code= 0; code < 256; code++) + { + if (cs->tab_to_uni[code] > 0x7F) + return 0; + } + return 1; +} + + +/* + Shared function between conf_to_src and mysys. + Check if a 8bit character set is compatible with + ascii on the range 0x00..0x7F. +*/ +static my_bool +my_charset_is_ascii_compatible(CHARSET_INFO *cs) +{ + uint i; + if (!cs->tab_to_uni) + return 1; + for (i= 0; i < 128; i++) + { + if (cs->tab_to_uni[i] != i) + return 0; + } + return 1; +} + + +uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs) +{ + uint flags= 0; + if (my_charset_is_8bit_pure_ascii(cs)) + flags|= MY_CS_PUREASCII; + if (!my_charset_is_ascii_compatible(cs)) + flags|= MY_CS_NONASCII; + return flags; +} + + +/* + Check if case sensitive sort order: A < a < B. + We need MY_CS_FLAG for regex library, and for + case sensitivity flag for 5.0 client protocol, + to support isCaseSensitive() method in JDBC driver +*/ +uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs) +{ + uint flags= 0; + if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] && + cs->sort_order['a'] < cs->sort_order['B']) + flags|= MY_CS_CSSORT; + return flags; +} + + static my_bool my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader) { + cs->state|= my_8bit_charset_flags_from_data(cs); cs->caseup_multiply= 1; cs->casedn_multiply= 1; cs->pad_char= ' '; @@ -1371,6 +1442,7 @@ static void set_max_sort_char(struct charset_info_st *cs) static my_bool my_coll_init_simple(struct charset_info_st *cs, MY_CHARSET_LOADER *loader __attribute__((unused))) { + cs->state|= my_8bit_collation_flags_from_data(cs); set_max_sort_char(cs); return FALSE; } diff --git a/strings/ctype.c b/strings/ctype.c index 620c7e13503..be8a8cb506e 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -974,48 +974,6 @@ my_charset_is_ascii_based(CHARSET_INFO *cs) /* - Detect if a character set is 8bit, - and it is pure ascii, i.e. doesn't have - characters outside U+0000..U+007F - This functions is shared between "conf_to_src" - and dynamic charsets loader in "mysqld". -*/ -my_bool -my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) -{ - size_t code; - if (!cs->tab_to_uni) - return 0; - for (code= 0; code < 256; code++) - { - if (cs->tab_to_uni[code] > 0x7F) - return 0; - } - return 1; -} - - -/* - Shared function between conf_to_src and mysys. - Check if a 8bit character set is compatible with - ascii on the range 0x00..0x7F. -*/ -my_bool -my_charset_is_ascii_compatible(CHARSET_INFO *cs) -{ - uint i; - if (!cs->tab_to_uni) - return 1; - for (i= 0; i < 128; i++) - { - if (cs->tab_to_uni[i] != i) - return 0; - } - return 1; -} - - -/* Convert a string between two character sets. 'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes. diff --git a/strings/strings_def.h b/strings/strings_def.h index fb280b6bb6b..36d3d2b2fe9 100644 --- a/strings/strings_def.h +++ b/strings/strings_def.h @@ -101,6 +101,11 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len) return (end); } + +uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs); +uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs); + + /* Macros for hashing characters */ #define MY_HASH_ADD(A, B, value) \ |