summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorbar@bar.myoffice.izhnet.ru <>2007-08-03 15:30:31 +0500
committerbar@bar.myoffice.izhnet.ru <>2007-08-03 15:30:31 +0500
commitc01ce7b1e3bda657cf4277df413b49945593360f (patch)
treec657f898b9581b0e2a19433212ad42eca9646b98 /strings
parent4c208499a409d9e558ca019e9e101115d801bab7 (diff)
parent4eebfd09c2b045258615bf07990deb6f0f73f729 (diff)
downloadmariadb-git-c01ce7b1e3bda657cf4277df413b49945593360f.tar.gz
Merge mysql.com:/home/bar/mysql-work/mysql-5.0.b28875
into mysql.com:/home/bar/mysql-work/mysql-5.0-rpl
Diffstat (limited to 'strings')
-rw-r--r--strings/conf_to_src.c10
-rw-r--r--strings/ctype-extra.c4
-rw-r--r--strings/ctype.c86
3 files changed, 94 insertions, 6 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c
index 75776d5e6d0..dc2a300a2ec 100644
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
}
+
void dispcset(FILE *f,CHARSET_INFO *cs)
{
fprintf(f,"{\n");
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
- fprintf(f," MY_CS_COMPILED%s%s%s,\n",
- cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
- cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
- is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "");
+ fprintf(f," MY_CS_COMPILED%s%s%s%s,\n",
+ cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
+ cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
+ is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
+ my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
if (cs->name)
{
diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c
index bf45b5b5d75..38aa3a05adf 100644
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@@ -6722,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
#ifdef HAVE_CHARSET_ascii
{
11,0,0,
- MY_CS_COMPILED|MY_CS_PRIMARY,
+ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
"ascii", /* cset name */
"ascii_general_ci", /* coll name */
"", /* comment */
@@ -7811,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
#ifdef HAVE_CHARSET_ascii
{
65,0,0,
- MY_CS_COMPILED|MY_CS_BINSORT,
+ MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
"ascii", /* cset name */
"ascii_bin", /* coll name */
"", /* comment */
diff --git a/strings/ctype.c b/strings/ctype.c
index e7399c5438b..372a1a8a468 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
my_xml_parser_free(&p);
return rc;
}
+
+
+/*
+ Check repertoire: detect pure ascii strings
+*/
+uint
+my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
+{
+ const char *strend= str + length;
+ if (cs->mbminlen == 1)
+ {
+ for ( ; str < strend; str++)
+ {
+ if (((uchar) *str) > 0x7F)
+ return MY_REPERTOIRE_UNICODE30;
+ }
+ }
+ else
+ {
+ my_wc_t wc;
+ int chlen;
+ for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
+ {
+ if (wc > 0x7F)
+ return MY_REPERTOIRE_UNICODE30;
+ }
+ }
+ return MY_REPERTOIRE_ASCII;
+}
+
+
+/*
+ Detect whether a character set is ASCII compatible.
+
+ Returns TRUE for:
+
+ - all 8bit character sets whose Unicode mapping of 0x7B is '{'
+ (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
+
+ - all multi-byte character sets having mbminlen == 1
+ (ignores ucs2 whose mbminlen is 2)
+
+ TODO:
+
+ When merging to 5.2, this function should be changed
+ to check a new flag MY_CS_NONASCII,
+
+ return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
+
+ This flag was previously added into 5.2 under terms
+ of WL#3759 "Optimize identifier conversion in client-server protocol"
+ especially to mark character sets not compatible with ASCII.
+
+ We won't backport this flag to 5.0 or 5.1.
+ This function is Ok for 5.0 and 5.1, because we're not going
+ to introduce new tricky character sets between 5.0 and 5.2.
+*/
+my_bool
+my_charset_is_ascii_based(CHARSET_INFO *cs)
+{
+ return
+ (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
+ (cs->mbminlen == 1 && cs->mbmaxlen > 1);
+}
+
+
+/*
+ Detect if a character set is 8bit,
+ and it is pure ascii, i.e. doesn't have
+ characters outside U+0000..U+007F
+ This functions is shared between "conf_to_src"
+ and dynamic charsets loader in "mysqld".
+*/
+my_bool
+my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
+{
+ size_t code;
+ if (!cs->tab_to_uni)
+ return 0;
+ for (code= 0; code < 256; code++)
+ {
+ if (cs->tab_to_uni[code] > 0x7F)
+ return 0;
+ }
+ return 1;
+}