summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2016-09-03 09:05:56 +0400
committerAlexander Barkov <bar@mariadb.org>2016-09-03 09:05:56 +0400
commit1ca595fbf7d186bbe9f2f9896869b316d6e9567a (patch)
treeb6751102168660d7eaf923b001cb21d40975603b /strings
parentaddb38f4763faa0378cd369106372a0eb0f0ee75 (diff)
downloadmariadb-git-1ca595fbf7d186bbe9f2f9896869b316d6e9567a.tar.gz
LDML refactoring for "MDEV-9711 NO PAD collations"
- Moving detection of the MY_CS_CSSORT, MY_CS_PUREASCII, MY_CS_NONASCII flags of loadable collations from add_collation() in mysys.c to my_cset_init_8bit() and my_coll_init_simple() in ctype-simple.c. - Adding tests that these flags are set properly for loadable collations - Moving LDML test related *.xml files from mysql-test/std_data/ to mysql-test/std_data/ldml/, as there will be more *.xml test files
Diffstat (limited to 'strings')
-rw-r--r--strings/conf_to_src.c16
-rw-r--r--strings/ctype-simple.c72
-rw-r--r--strings/ctype.c42
-rw-r--r--strings/strings_def.h5
4 files changed, 82 insertions, 53 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c
index 5b9793f388d..31093fe4230 100644
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@@ -193,25 +193,19 @@ static int my_read_charset_file(const char *filename)
return FALSE;
}
-static int
-is_case_sensitive(CHARSET_INFO *cs)
-{
- return (cs->sort_order &&
- cs->sort_order['A'] < cs->sort_order['a'] &&
- cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
-}
-
void dispcset(FILE *f,CHARSET_INFO *cs)
{
+ uint flags= my_8bit_charset_flags_from_data(cs) |
+ my_8bit_collation_flags_from_data(cs);
fprintf(f,"{\n");
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n",
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
- is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
- my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
- !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
+ flags & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "",
+ flags & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "",
+ flags & MY_CS_NONASCII ? "|MY_CS_NONASCII" : "");
if (cs->name)
{
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index f405c4f327b..e6cea06b17b 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1340,9 +1340,80 @@ create_fromuni(struct charset_info_st *cs,
return FALSE;
}
+
+/*
+ Detect if a character set is 8bit,
+ and it is pure ascii, i.e. doesn't have
+ characters outside U+0000..U+007F
+ This functions is shared between "conf_to_src"
+ and dynamic charsets loader in "mysqld".
+*/
+static my_bool
+my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
+{
+ size_t code;
+ if (!cs->tab_to_uni)
+ return 0;
+ for (code= 0; code < 256; code++)
+ {
+ if (cs->tab_to_uni[code] > 0x7F)
+ return 0;
+ }
+ return 1;
+}
+
+
+/*
+ Shared function between conf_to_src and mysys.
+ Check if a 8bit character set is compatible with
+ ascii on the range 0x00..0x7F.
+*/
+static my_bool
+my_charset_is_ascii_compatible(CHARSET_INFO *cs)
+{
+ uint i;
+ if (!cs->tab_to_uni)
+ return 1;
+ for (i= 0; i < 128; i++)
+ {
+ if (cs->tab_to_uni[i] != i)
+ return 0;
+ }
+ return 1;
+}
+
+
+uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs)
+{
+ uint flags= 0;
+ if (my_charset_is_8bit_pure_ascii(cs))
+ flags|= MY_CS_PUREASCII;
+ if (!my_charset_is_ascii_compatible(cs))
+ flags|= MY_CS_NONASCII;
+ return flags;
+}
+
+
+/*
+ Check if case sensitive sort order: A < a < B.
+ We need MY_CS_FLAG for regex library, and for
+ case sensitivity flag for 5.0 client protocol,
+ to support isCaseSensitive() method in JDBC driver
+*/
+uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs)
+{
+ uint flags= 0;
+ if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] &&
+ cs->sort_order['a'] < cs->sort_order['B'])
+ flags|= MY_CS_CSSORT;
+ return flags;
+}
+
+
static my_bool
my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
+ cs->state|= my_8bit_charset_flags_from_data(cs);
cs->caseup_multiply= 1;
cs->casedn_multiply= 1;
cs->pad_char= ' ';
@@ -1371,6 +1442,7 @@ static void set_max_sort_char(struct charset_info_st *cs)
static my_bool my_coll_init_simple(struct charset_info_st *cs,
MY_CHARSET_LOADER *loader __attribute__((unused)))
{
+ cs->state|= my_8bit_collation_flags_from_data(cs);
set_max_sort_char(cs);
return FALSE;
}
diff --git a/strings/ctype.c b/strings/ctype.c
index 620c7e13503..be8a8cb506e 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -974,48 +974,6 @@ my_charset_is_ascii_based(CHARSET_INFO *cs)
/*
- Detect if a character set is 8bit,
- and it is pure ascii, i.e. doesn't have
- characters outside U+0000..U+007F
- This functions is shared between "conf_to_src"
- and dynamic charsets loader in "mysqld".
-*/
-my_bool
-my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
-{
- size_t code;
- if (!cs->tab_to_uni)
- return 0;
- for (code= 0; code < 256; code++)
- {
- if (cs->tab_to_uni[code] > 0x7F)
- return 0;
- }
- return 1;
-}
-
-
-/*
- Shared function between conf_to_src and mysys.
- Check if a 8bit character set is compatible with
- ascii on the range 0x00..0x7F.
-*/
-my_bool
-my_charset_is_ascii_compatible(CHARSET_INFO *cs)
-{
- uint i;
- if (!cs->tab_to_uni)
- return 1;
- for (i= 0; i < 128; i++)
- {
- if (cs->tab_to_uni[i] != i)
- return 0;
- }
- return 1;
-}
-
-
-/*
Convert a string between two character sets.
'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
diff --git a/strings/strings_def.h b/strings/strings_def.h
index fb280b6bb6b..36d3d2b2fe9 100644
--- a/strings/strings_def.h
+++ b/strings/strings_def.h
@@ -101,6 +101,11 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
return (end);
}
+
+uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
+uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
+
+
/* Macros for hashing characters */
#define MY_HASH_ADD(A, B, value) \