diff options
author | bar@bar.myoffice.izhnet.ru <> | 2007-08-03 15:30:31 +0500 |
---|---|---|
committer | bar@bar.myoffice.izhnet.ru <> | 2007-08-03 15:30:31 +0500 |
commit | c01ce7b1e3bda657cf4277df413b49945593360f (patch) | |
tree | c657f898b9581b0e2a19433212ad42eca9646b98 /strings | |
parent | 4c208499a409d9e558ca019e9e101115d801bab7 (diff) | |
parent | 4eebfd09c2b045258615bf07990deb6f0f73f729 (diff) | |
download | mariadb-git-c01ce7b1e3bda657cf4277df413b49945593360f.tar.gz |
Merge mysql.com:/home/bar/mysql-work/mysql-5.0.b28875
into mysql.com:/home/bar/mysql-work/mysql-5.0-rpl
Diffstat (limited to 'strings')
-rw-r--r-- | strings/conf_to_src.c | 10 | ||||
-rw-r--r-- | strings/ctype-extra.c | 4 | ||||
-rw-r--r-- | strings/ctype.c | 86 |
3 files changed, 94 insertions, 6 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index 75776d5e6d0..dc2a300a2ec 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs) cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0; } + void dispcset(FILE *f,CHARSET_INFO *cs) { fprintf(f,"{\n"); fprintf(f," %d,%d,%d,\n",cs->number,0,0); - fprintf(f," MY_CS_COMPILED%s%s%s,\n", - cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", - cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", - is_case_sensitive(cs) ? "|MY_CS_CSSORT" : ""); + fprintf(f," MY_CS_COMPILED%s%s%s%s,\n", + cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", + cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", + is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "", + my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : ""); if (cs->name) { diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index bf45b5b5d75..38aa3a05adf 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -6722,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_ascii { 11,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII, "ascii", /* cset name */ "ascii_general_ci", /* coll name */ "", /* comment */ @@ -7811,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_ascii { 65,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII, "ascii", /* cset name */ "ascii_bin", /* coll name */ "", /* comment */ diff --git a/strings/ctype.c b/strings/ctype.c index e7399c5438b..372a1a8a468 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len, my_xml_parser_free(&p); return rc; } + + +/* + Check repertoire: detect pure ascii strings +*/ +uint +my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length) +{ + const char *strend= str + length; + if (cs->mbminlen == 1) + { + for ( ; str < strend; str++) + { + if (((uchar) *str) > 0x7F) + return MY_REPERTOIRE_UNICODE30; + } + } + else + { + my_wc_t wc; + int chlen; + for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen) + { + if (wc > 0x7F) + return MY_REPERTOIRE_UNICODE30; + } + } + return MY_REPERTOIRE_ASCII; +} + + +/* + Detect whether a character set is ASCII compatible. + + Returns TRUE for: + + - all 8bit character sets whose Unicode mapping of 0x7B is '{' + (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS") + + - all multi-byte character sets having mbminlen == 1 + (ignores ucs2 whose mbminlen is 2) + + TODO: + + When merging to 5.2, this function should be changed + to check a new flag MY_CS_NONASCII, + + return (cs->flag & MY_CS_NONASCII) ? 0 : 1; + + This flag was previously added into 5.2 under terms + of WL#3759 "Optimize identifier conversion in client-server protocol" + especially to mark character sets not compatible with ASCII. + + We won't backport this flag to 5.0 or 5.1. + This function is Ok for 5.0 and 5.1, because we're not going + to introduce new tricky character sets between 5.0 and 5.2. +*/ +my_bool +my_charset_is_ascii_based(CHARSET_INFO *cs) +{ + return + (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') || + (cs->mbminlen == 1 && cs->mbmaxlen > 1); +} + + +/* + Detect if a character set is 8bit, + and it is pure ascii, i.e. doesn't have + characters outside U+0000..U+007F + This functions is shared between "conf_to_src" + and dynamic charsets loader in "mysqld". +*/ +my_bool +my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) +{ + size_t code; + if (!cs->tab_to_uni) + return 0; + for (code= 0; code < 256; code++) + { + if (cs->tab_to_uni[code] > 0x7F) + return 0; + } + return 1; +} |