diff options
author | Alexander Barkov <bar@mysql.com> | 2009-09-30 10:09:28 +0500 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2009-09-30 10:09:28 +0500 |
commit | f02800bd9733efbdc1328f866ad0ef2779939f7c (patch) | |
tree | 0b2f97100efd6c5566704a4bcd828c90c91da331 | |
parent | a8edd0aabb5059935b99076fbcc92403079535df (diff) | |
download | mariadb-git-f02800bd9733efbdc1328f866ad0ef2779939f7c.tar.gz |
Backporting WL#3759 Optimize identifier conversion in client-server protocol
This patch provides performance improvements:
- send_fields() when character_set_results = latin1
is now about twice faster for column/table/database
names, consisting on ASCII characters.
Changes:
- Protocol doesn't use "convert" temporary buffer anymore,
and converts strings directly to "packet".
- General conversion optimization: quick conversion
of ASCII strings was added.
modified files:
include/m_ctype.h
- Adding a new flag.
- Adding a new function prototype
libmysqld/lib_sql.cc
- Adding quick conversion method for embedded library:
conversion is now done directly to result buffer,
without using a temporary buffer.
mysys/charset.c
- Mark all dynamic ucs2 character sets as non-ASCII
- Mark some dymamic 7bit and 8bit charsets as non-ASCII
(for example swe7 is not fully ASCII compatible).
sql/protocol.cc
- Adding quick method to convert a string directly
into protocol buffer, without using a temporary buffer.
sql/protocol.h
- Adding a new method prototype
sql/sql_string.cc
Optimization for conversion between two ASCII-compatible charsets:
- quickly convert ASCII strings,
switch to mc_wc->wc_mb method only when a non-ASCII character is met.
- copy four ASCII characters at once on i386
strings/conf_to_src.c
- Marking non-ASCII character sets with a flag.
strings/ctype-extra.c
- Regenerating ctype-extra.c by running "conf_to_src".
strings/ctype-uca.c
- Marking UCS2 character set as non-ASCII.
strings/ctype-ucs2.c
- Marking UCS2 character set as non-ASCII.
strings/ctype.c
- A new function to detect if a 7bit or 8bit character set
is ascii compatible.
-rw-r--r-- | include/m_ctype.h | 2 | ||||
-rw-r--r-- | libmysqld/lib_sql.cc | 24 | ||||
-rw-r--r-- | mysys/charset.c | 3 | ||||
-rw-r--r-- | sql/protocol.cc | 64 | ||||
-rw-r--r-- | sql/protocol.h | 2 | ||||
-rw-r--r-- | sql/sql_string.cc | 68 | ||||
-rw-r--r-- | strings/conf_to_src.c | 5 | ||||
-rw-r--r-- | strings/ctype-extra.c | 72 | ||||
-rw-r--r-- | strings/ctype-uca.c | 76 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 4 | ||||
-rw-r--r-- | strings/ctype.c | 20 |
11 files changed, 222 insertions, 118 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 451c8db549b..f6503a54935 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -87,6 +87,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_CS_CSSORT 1024 /* if case sensitive sort order */ #define MY_CS_HIDDEN 2048 /* don't display in SHOW */ #define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */ +#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */ #define MY_CHARSET_UNDEFINED 0 /* Character repertoire flags */ @@ -474,6 +475,7 @@ my_bool my_charset_is_ascii_based(CHARSET_INFO *cs); my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs); uint my_charset_repertoire(CHARSET_INFO *cs); +my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs); #define _MY_U 01 /* Upper case */ #define _MY_L 02 /* Lower case */ diff --git a/libmysqld/lib_sql.cc b/libmysqld/lib_sql.cc index 64822f8fad6..2ac2556d4de 100644 --- a/libmysqld/lib_sql.cc +++ b/libmysqld/lib_sql.cc @@ -1175,3 +1175,27 @@ int vprint_msg_to_log(enum loglevel level __attribute__((unused)), mysql_server_last_errno= CR_UNKNOWN_ERROR; return 0; } + + +bool Protocol::net_store_data(const uchar *from, size_t length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +{ + uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen; + uint dummy_error; + char *field_buf; + if (!thd->mysql) // bootstrap file handling + return false; + + if (!(field_buf= (char*) alloc_root(alloc, conv_length + sizeof(uint) + 1))) + return true; + *next_field= field_buf + sizeof(uint); + length= copy_and_convert(*next_field, conv_length, to_cs, + (const char*) from, length, from_cs, &dummy_error); + *(uint *) field_buf= length; + (*next_field)[length]= 0; + if (next_mysql_field->max_length < length) + next_mysql_field->max_length= length; + ++next_field; + ++next_mysql_field; + return false; +} diff --git a/mysys/charset.c b/mysys/charset.c index b23ab084e90..214ca170757 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -248,6 +248,7 @@ static int add_collation(CHARSET_INFO *cs) { #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS) copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; #endif } else if (!strcmp(cs->csname, "utf8")) @@ -280,6 +281,8 @@ static int add_collation(CHARSET_INFO *cs) if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number])) all_charsets[cs->number]->state|= MY_CS_PUREASCII; + if (!my_charset_is_ascii_compatible(cs)) + all_charsets[cs->number]->state|= MY_CS_NONASCII; } } else diff --git a/sql/protocol.cc b/sql/protocol.cc index 54e17ff5c3b..7abc051af68 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -58,6 +58,64 @@ bool Protocol_binary::net_store_data(const uchar *from, size_t length) } + + +/* + net_store_data() - extended version with character set conversion. + + It is optimized for short strings whose length after + conversion is garanteed to be less than 251, which accupies + exactly one byte to store length. It allows not to use + the "convert" member as a temporary buffer, conversion + is done directly to the "packet" member. + The limit 251 is good enough to optimize send_result_set_metadata() + because column, table, database names fit into this limit. +*/ + +#ifndef EMBEDDED_LIBRARY +bool Protocol::net_store_data(const uchar *from, size_t length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +{ + uint dummy_errors; + /* Calculate maxumum possible result length */ + uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen; + if (conv_length > 250) + { + /* + For strings with conv_length greater than 250 bytes + we don't know how many bytes we will need to store length: one or two, + because we don't know result length until conversion is done. + For example, when converting from utf8 (mbmaxlen=3) to latin1, + conv_length=300 means that the result length can vary between 100 to 300. + length=100 needs one byte, length=300 needs to bytes. + + Thus conversion directly to "packet" is not worthy. + Let's use "convert" as a temporary buffer. + */ + return (convert->copy((const char*) from, length, from_cs, + to_cs, &dummy_errors) || + net_store_data((const uchar*) convert->ptr(), convert->length())); + } + + ulong packet_length= packet->length(); + ulong new_length= packet_length + conv_length + 1; + + if (new_length > packet->alloced_length() && packet->realloc(new_length)) + return 1; + + char *length_pos= (char*) packet->ptr() + packet_length; + char *to= length_pos + 1; + + to+= copy_and_convert(to, conv_length, to_cs, + (const char*) from, length, from_cs, &dummy_errors); + + net_store_length((uchar*) length_pos, to - length_pos - 1); + packet->length((uint) (to - packet->ptr())); + return 0; +} +#endif + + /** Send a error string to client. @@ -827,10 +885,10 @@ bool Protocol::store_string_aux(const char *from, size_t length, fromcs != &my_charset_bin && tocs != &my_charset_bin) { - uint dummy_errors; - return (convert->copy(from, length, fromcs, tocs, &dummy_errors) || - net_store_data((uchar*) convert->ptr(), convert->length())); + /* Store with conversion */ + return net_store_data((uchar*) from, length, fromcs, tocs); } + /* Store without conversion */ return net_store_data((uchar*) from, length); } diff --git a/sql/protocol.h b/sql/protocol.h index 1e584295028..3b6a3bdb863 100644 --- a/sql/protocol.h +++ b/sql/protocol.h @@ -43,6 +43,8 @@ protected: MYSQL_FIELD *next_mysql_field; MEM_ROOT *alloc; #endif + bool net_store_data(const uchar *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs); bool store_string_aux(const char *from, size_t length, CHARSET_INFO *fromcs, CHARSET_INFO *tocs); public: diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 7c9793b273b..593450cacd5 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -794,10 +794,11 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length) */ -uint32 -copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, - const char *from, uint32 from_length, CHARSET_INFO *from_cs, - uint *errors) +static uint32 +copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, + CHARSET_INFO *from_cs, + uint *errors) { int cnvres; my_wc_t wc; @@ -849,6 +850,65 @@ outp: } +/* + Optimized for quick copying of ASCII characters in the range 0x00..0x7F. +*/ +uint32 +copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, CHARSET_INFO *from_cs, + uint *errors) +{ + /* + If any of the character sets is not ASCII compatible, + immediately switch to slow mb_wc->wc_mb method. + */ + if ((to_cs->state | from_cs->state) & MY_CS_NONASCII) + return copy_and_convert_extended(to, to_length, to_cs, + from, from_length, from_cs, errors); + + uint32 length= min(to_length, from_length), length2= length; + +#if defined(__i386__) + /* + Special loop for i386, it allows to refer to a + non-aligned memory block as UINT32, which makes + it possible to copy four bytes at once. This + gives about 10% performance improvement comparing + to byte-by-byte loop. + */ + for ( ; length >= 4; length-= 4, from+= 4, to+= 4) + { + if ((*(uint32*)from) & 0x80808080) + break; + *((uint32*) to)= *((const uint32*) from); + } +#endif + + for (; ; *to++= *from++, length--) + { + if (!length) + { + *errors= 0; + return length2; + } + if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */ + { + uint32 copied_length= length2 - length; + to_length-= copied_length; + from_length-= copied_length; + return copied_length + copy_and_convert_extended(to, to_length, + to_cs, + from, from_length, + from_cs, + errors); + } + } + + DBUG_ASSERT(FALSE); // Should never get to here + return 0; // Make compiler happy +} + + /** Copy string with HEX-encoding of "bad" characters. diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index 7e742050aa8..9f1ed9b2441 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -184,11 +184,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs) { fprintf(f,"{\n"); fprintf(f," %d,%d,%d,\n",cs->number,0,0); - fprintf(f," MY_CS_COMPILED%s%s%s%s,\n", + fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n", cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "", - my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : ""); + my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "", + !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": ""); if (cs->name) { diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index 75244e40435..ba12f3f4267 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -6,7 +6,7 @@ ./conf_to_src ../sql/share/charsets/ > FILE */ -/* Copyright (C) 2000-2007 MySQL AB +/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -6804,7 +6804,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_swe7 { 10,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII, "swe7", /* cset name */ "swe7_swedish_ci", /* coll name */ "", /* comment */ @@ -8454,7 +8454,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_swe7 { 82,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, "swe7", /* cset name */ "swe7_bin", /* coll name */ "", /* comment */ @@ -8550,72 +8550,6 @@ CHARSET_INFO compiled_charsets[] = { } , #endif -#ifdef HAVE_CHARSET_geostd8 -{ - 92,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, - "geostd8", /* cset name */ - "geostd8_general_ci", /* coll name */ - "", /* comment */ - NULL, /* tailoring */ - ctype_geostd8_general_ci, /* ctype */ - to_lower_geostd8_general_ci, /* lower */ - to_upper_geostd8_general_ci, /* upper */ - sort_order_geostd8_general_ci, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ - to_uni_geostd8_general_ci, /* to_uni */ - NULL, /* from_uni */ - my_unicase_default, /* caseinfo */ - NULL, /* state map */ - NULL, /* ident map */ - 1, /* strxfrm_multiply*/ - 1, /* caseup_multiply*/ - 1, /* casedn_multiply*/ - 1, /* mbminlen */ - 1, /* mbmaxlen */ - 0, /* min_sort_char */ - 255, /* max_sort_char */ - ' ', /* pad_char */ - 0, /* escape_with_backslash_is_dangerous */ - &my_charset_8bit_handler, - &my_collation_8bit_simple_ci_handler, -} -, -#endif -#ifdef HAVE_CHARSET_geostd8 -{ - 93,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, - "geostd8", /* cset name */ - "geostd8_bin", /* coll name */ - "", /* comment */ - NULL, /* tailoring */ - ctype_geostd8_bin, /* ctype */ - to_lower_geostd8_bin, /* lower */ - to_upper_geostd8_bin, /* upper */ - NULL, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ - to_uni_geostd8_bin, /* to_uni */ - NULL, /* from_uni */ - my_unicase_default, /* caseinfo */ - NULL, /* state map */ - NULL, /* ident map */ - 1, /* strxfrm_multiply*/ - 1, /* caseup_multiply*/ - 1, /* casedn_multiply*/ - 1, /* mbminlen */ - 1, /* mbmaxlen */ - 0, /* min_sort_char */ - 255, /* max_sort_char */ - ' ', /* pad_char */ - 0, /* escape_with_backslash_is_dangerous */ - &my_charset_8bit_handler, - &my_collation_8bit_bin_handler, -} -, -#endif #ifdef HAVE_CHARSET_latin1 { 94,0,0, diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 2ea48ddab2f..566cc58ab0a 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -8087,7 +8087,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = CHARSET_INFO my_charset_ucs2_unicode_ci= { 128,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_unicode_ci", /* name */ "", /* comment */ @@ -8119,7 +8119,7 @@ CHARSET_INFO my_charset_ucs2_unicode_ci= CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= { 129,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_icelandic_ci",/* name */ "", /* comment */ @@ -8151,7 +8151,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= CHARSET_INFO my_charset_ucs2_latvian_uca_ci= { 130,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_latvian_ci", /* name */ "", /* comment */ @@ -8183,7 +8183,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci= CHARSET_INFO my_charset_ucs2_romanian_uca_ci= { 131,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_romanian_ci", /* name */ "", /* comment */ @@ -8215,7 +8215,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci= CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= { 132,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovenian_ci",/* name */ "", /* comment */ @@ -8247,7 +8247,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= CHARSET_INFO my_charset_ucs2_polish_uca_ci= { 133,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_polish_ci", /* name */ "", /* comment */ @@ -8279,7 +8279,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci= CHARSET_INFO my_charset_ucs2_estonian_uca_ci= { 134,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_estonian_ci", /* name */ "", /* comment */ @@ -8311,7 +8311,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci= CHARSET_INFO my_charset_ucs2_spanish_uca_ci= { 135,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish_ci", /* name */ "", /* comment */ @@ -8343,7 +8343,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci= CHARSET_INFO my_charset_ucs2_swedish_uca_ci= { 136,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_swedish_ci", /* name */ "", /* comment */ @@ -8375,7 +8375,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci= CHARSET_INFO my_charset_ucs2_turkish_uca_ci= { 137,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_turkish_ci", /* name */ "", /* comment */ @@ -8407,7 +8407,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci= CHARSET_INFO my_charset_ucs2_czech_uca_ci= { 138,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_czech_ci", /* name */ "", /* comment */ @@ -8440,7 +8440,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_ci= CHARSET_INFO my_charset_ucs2_danish_uca_ci= { 139,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_danish_ci", /* name */ "", /* comment */ @@ -8472,7 +8472,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ci= CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= { 140,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_lithuanian_ci",/* name */ "", /* comment */ @@ -8504,7 +8504,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= CHARSET_INFO my_charset_ucs2_slovak_uca_ci= { 141,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovak_ci", /* name */ "", /* comment */ @@ -8536,7 +8536,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ci= CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= { 142,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish2_ci", /* name */ "", /* comment */ @@ -8569,7 +8569,7 @@ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= CHARSET_INFO my_charset_ucs2_roman_uca_ci= { 143,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_roman_ci", /* name */ "", /* comment */ @@ -8602,7 +8602,7 @@ CHARSET_INFO my_charset_ucs2_roman_uca_ci= CHARSET_INFO my_charset_ucs2_persian_uca_ci= { 144,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_persian_ci", /* name */ "", /* comment */ @@ -8635,7 +8635,7 @@ CHARSET_INFO my_charset_ucs2_persian_uca_ci= CHARSET_INFO my_charset_ucs2_esperanto_uca_ci= { 145,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_esperanto_ci",/* name */ "", /* comment */ @@ -8668,7 +8668,7 @@ CHARSET_INFO my_charset_ucs2_esperanto_uca_ci= CHARSET_INFO my_charset_ucs2_hungarian_uca_ci= { 146,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_hungarian_ci",/* name */ "", /* comment */ @@ -8748,7 +8748,7 @@ extern MY_CHARSET_HANDLER my_charset_utf8_handler; CHARSET_INFO my_charset_utf8_unicode_ci= { 192,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_unicode_ci", /* name */ "", /* comment */ @@ -8781,7 +8781,7 @@ CHARSET_INFO my_charset_utf8_unicode_ci= CHARSET_INFO my_charset_utf8_icelandic_uca_ci= { 193,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_icelandic_ci",/* name */ "", /* comment */ @@ -8813,7 +8813,7 @@ CHARSET_INFO my_charset_utf8_icelandic_uca_ci= CHARSET_INFO my_charset_utf8_latvian_uca_ci= { 194,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_latvian_ci", /* name */ "", /* comment */ @@ -8845,7 +8845,7 @@ CHARSET_INFO my_charset_utf8_latvian_uca_ci= CHARSET_INFO my_charset_utf8_romanian_uca_ci= { 195,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_romanian_ci", /* name */ "", /* comment */ @@ -8877,7 +8877,7 @@ CHARSET_INFO my_charset_utf8_romanian_uca_ci= CHARSET_INFO my_charset_utf8_slovenian_uca_ci= { 196,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_slovenian_ci",/* name */ "", /* comment */ @@ -8909,7 +8909,7 @@ CHARSET_INFO my_charset_utf8_slovenian_uca_ci= CHARSET_INFO my_charset_utf8_polish_uca_ci= { 197,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_polish_ci", /* name */ "", /* comment */ @@ -8941,7 +8941,7 @@ CHARSET_INFO my_charset_utf8_polish_uca_ci= CHARSET_INFO my_charset_utf8_estonian_uca_ci= { 198,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_estonian_ci", /* name */ "", /* comment */ @@ -8973,7 +8973,7 @@ CHARSET_INFO my_charset_utf8_estonian_uca_ci= CHARSET_INFO my_charset_utf8_spanish_uca_ci= { 199,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_spanish_ci", /* name */ "", /* comment */ @@ -9005,7 +9005,7 @@ CHARSET_INFO my_charset_utf8_spanish_uca_ci= CHARSET_INFO my_charset_utf8_swedish_uca_ci= { 200,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_swedish_ci", /* name */ "", /* comment */ @@ -9037,7 +9037,7 @@ CHARSET_INFO my_charset_utf8_swedish_uca_ci= CHARSET_INFO my_charset_utf8_turkish_uca_ci= { 201,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_turkish_ci", /* name */ "", /* comment */ @@ -9069,7 +9069,7 @@ CHARSET_INFO my_charset_utf8_turkish_uca_ci= CHARSET_INFO my_charset_utf8_czech_uca_ci= { 202,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_czech_ci", /* name */ "", /* comment */ @@ -9102,7 +9102,7 @@ CHARSET_INFO my_charset_utf8_czech_uca_ci= CHARSET_INFO my_charset_utf8_danish_uca_ci= { 203,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_danish_ci", /* name */ "", /* comment */ @@ -9134,7 +9134,7 @@ CHARSET_INFO my_charset_utf8_danish_uca_ci= CHARSET_INFO my_charset_utf8_lithuanian_uca_ci= { 204,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_lithuanian_ci",/* name */ "", /* comment */ @@ -9166,7 +9166,7 @@ CHARSET_INFO my_charset_utf8_lithuanian_uca_ci= CHARSET_INFO my_charset_utf8_slovak_uca_ci= { 205,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_slovak_ci", /* name */ "", /* comment */ @@ -9198,7 +9198,7 @@ CHARSET_INFO my_charset_utf8_slovak_uca_ci= CHARSET_INFO my_charset_utf8_spanish2_uca_ci= { 206,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_spanish2_ci", /* name */ "", /* comment */ @@ -9230,7 +9230,7 @@ CHARSET_INFO my_charset_utf8_spanish2_uca_ci= CHARSET_INFO my_charset_utf8_roman_uca_ci= { 207,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_roman_ci", /* name */ "", /* comment */ @@ -9262,7 +9262,7 @@ CHARSET_INFO my_charset_utf8_roman_uca_ci= CHARSET_INFO my_charset_utf8_persian_uca_ci= { 208,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_persian_ci", /* name */ "", /* comment */ @@ -9294,7 +9294,7 @@ CHARSET_INFO my_charset_utf8_persian_uca_ci= CHARSET_INFO my_charset_utf8_esperanto_uca_ci= { 209,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_esperanto_ci",/* name */ "", /* comment */ @@ -9326,7 +9326,7 @@ CHARSET_INFO my_charset_utf8_esperanto_uca_ci= CHARSET_INFO my_charset_utf8_hungarian_uca_ci= { 210,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "utf8", /* cs name */ "utf8_hungarian_ci",/* name */ "", /* comment */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index a1c691a462b..f030c08523c 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1712,7 +1712,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= CHARSET_INFO my_charset_ucs2_general_ci= { 35,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_general_ci", /* name */ "", /* comment */ @@ -1744,7 +1744,7 @@ CHARSET_INFO my_charset_ucs2_general_ci= CHARSET_INFO my_charset_ucs2_bin= { 90,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_bin", /* name */ "", /* comment */ diff --git a/strings/ctype.c b/strings/ctype.c index 17ad1256e74..75d76aceea3 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -405,3 +405,23 @@ my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) } return 1; } + + +/* + Shared function between conf_to_src and mysys. + Check if a 8bit character set is compatible with + ascii on the range 0x00..0x7F. +*/ +my_bool +my_charset_is_ascii_compatible(CHARSET_INFO *cs) +{ + uint i; + if (!cs->tab_to_uni) + return 1; + for (i= 0; i < 128; i++) + { + if (cs->tab_to_uni[i] != i) + return 0; + } + return 1; +} |