diff options
author | Michael Widenius <monty@askmonty.org> | 2009-07-02 13:15:33 +0300 |
---|---|---|
committer | Michael Widenius <monty@askmonty.org> | 2009-07-02 13:15:33 +0300 |
commit | 9db357e2bfebf9207a507c4a2244499899a960a2 (patch) | |
tree | ec4503d3142abaf8379c46752cb5a3adcd7c9471 /strings | |
parent | f1183fc160b504af3c976d7e1e95e19128b6ecbe (diff) | |
download | mariadb-git-9db357e2bfebf9207a507c4a2244499899a960a2.tar.gz |
Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f
This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client
and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f.
This code is based on Alexander Barkov's code that he has done in MySQL 6.0
include/m_ctype.h:
Added MY_CS_NONASCII marker
libmysqld/lib_sql.cc:
Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
mysys/charset.c:
Mark character sets with MY_CS_NONASCII
scripts/mysql_install_db.sh:
Fixed messages to refer to MariaDB instead of MySQL
sql/protocol.cc:
Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
sql/protocol.h:
Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
sql/sql_string.cc:
Quicker copy of strings with no characters above 0x7f
strings/conf_to_src.c:
Added printing of MY_CS_NONASCII
strings/ctype-extra.c:
Mark incompatible character sets with MY_CS_NONASCII
Removed duplicated character set geostd
strings/ctype-sjis.c:
Mark incompatible character sets with MY_CS_NONASCII
strings/ctype-uca.c:
Mark incompatible character sets with MY_CS_NONASCII
strings/ctype-ucs2.c:
Mark incompatible character sets with MY_CS_NONASCII
strings/ctype-utf8.c:
Mark incompatible character sets with MY_CS_NONASCII
strings/ctype.c:
Added function to check if character set is compatible with latin1 in ranges 0x00-0x7f
Diffstat (limited to 'strings')
-rw-r--r-- | strings/conf_to_src.c | 5 | ||||
-rw-r--r-- | strings/ctype-extra.c | 70 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 4 | ||||
-rw-r--r-- | strings/ctype-uca.c | 38 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 4 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 2 | ||||
-rw-r--r-- | strings/ctype.c | 20 |
7 files changed, 49 insertions, 94 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index 7e742050aa8..aa40a269848 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -184,11 +184,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs) { fprintf(f,"{\n"); fprintf(f," %d,%d,%d,\n",cs->number,0,0); - fprintf(f," MY_CS_COMPILED%s%s%s%s,\n", + fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n", cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "", - my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : ""); + my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "", + !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": ""); if (cs->name) { diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index 75244e40435..64cf30e3673 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -6804,7 +6804,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_swe7 { 10,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII, "swe7", /* cset name */ "swe7_swedish_ci", /* coll name */ "", /* comment */ @@ -8454,7 +8454,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_swe7 { 82,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, "swe7", /* cset name */ "swe7_bin", /* coll name */ "", /* comment */ @@ -8550,72 +8550,6 @@ CHARSET_INFO compiled_charsets[] = { } , #endif -#ifdef HAVE_CHARSET_geostd8 -{ - 92,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, - "geostd8", /* cset name */ - "geostd8_general_ci", /* coll name */ - "", /* comment */ - NULL, /* tailoring */ - ctype_geostd8_general_ci, /* ctype */ - to_lower_geostd8_general_ci, /* lower */ - to_upper_geostd8_general_ci, /* upper */ - sort_order_geostd8_general_ci, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ - to_uni_geostd8_general_ci, /* to_uni */ - NULL, /* from_uni */ - my_unicase_default, /* caseinfo */ - NULL, /* state map */ - NULL, /* ident map */ - 1, /* strxfrm_multiply*/ - 1, /* caseup_multiply*/ - 1, /* casedn_multiply*/ - 1, /* mbminlen */ - 1, /* mbmaxlen */ - 0, /* min_sort_char */ - 255, /* max_sort_char */ - ' ', /* pad_char */ - 0, /* escape_with_backslash_is_dangerous */ - &my_charset_8bit_handler, - &my_collation_8bit_simple_ci_handler, -} -, -#endif -#ifdef HAVE_CHARSET_geostd8 -{ - 93,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, - "geostd8", /* cset name */ - "geostd8_bin", /* coll name */ - "", /* comment */ - NULL, /* tailoring */ - ctype_geostd8_bin, /* ctype */ - to_lower_geostd8_bin, /* lower */ - to_upper_geostd8_bin, /* upper */ - NULL, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ - to_uni_geostd8_bin, /* to_uni */ - NULL, /* from_uni */ - my_unicase_default, /* caseinfo */ - NULL, /* state map */ - NULL, /* ident map */ - 1, /* strxfrm_multiply*/ - 1, /* caseup_multiply*/ - 1, /* casedn_multiply*/ - 1, /* mbminlen */ - 1, /* mbmaxlen */ - 0, /* min_sort_char */ - 255, /* max_sort_char */ - ' ', /* pad_char */ - 0, /* escape_with_backslash_is_dangerous */ - &my_charset_8bit_handler, - &my_collation_8bit_bin_handler, -} -, -#endif #ifdef HAVE_CHARSET_latin1 { 94,0,0, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 3925b76869c..15c66d85ad6 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4672,7 +4672,7 @@ static MY_CHARSET_HANDLER my_charset_handler= CHARSET_INFO my_charset_sjis_japanese_ci= { 13,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM, /* state */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_NONASCII, /* state */ "sjis", /* cs name */ "sjis_japanese_ci", /* name */ "", /* comment */ @@ -4704,7 +4704,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci= CHARSET_INFO my_charset_sjis_bin= { 88,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT, /* state */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, /* state */ "sjis", /* cs name */ "sjis_bin", /* name */ "", /* comment */ diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 1ed758bc105..c4c69e395d9 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -8086,7 +8086,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = CHARSET_INFO my_charset_ucs2_unicode_ci= { 128,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_unicode_ci", /* name */ "", /* comment */ @@ -8118,7 +8118,7 @@ CHARSET_INFO my_charset_ucs2_unicode_ci= CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= { 129,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_icelandic_ci",/* name */ "", /* comment */ @@ -8150,7 +8150,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= CHARSET_INFO my_charset_ucs2_latvian_uca_ci= { 130,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_latvian_ci", /* name */ "", /* comment */ @@ -8182,7 +8182,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci= CHARSET_INFO my_charset_ucs2_romanian_uca_ci= { 131,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_romanian_ci", /* name */ "", /* comment */ @@ -8214,7 +8214,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci= CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= { 132,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovenian_ci",/* name */ "", /* comment */ @@ -8246,7 +8246,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= CHARSET_INFO my_charset_ucs2_polish_uca_ci= { 133,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_polish_ci", /* name */ "", /* comment */ @@ -8278,7 +8278,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci= CHARSET_INFO my_charset_ucs2_estonian_uca_ci= { 134,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_estonian_ci", /* name */ "", /* comment */ @@ -8310,7 +8310,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci= CHARSET_INFO my_charset_ucs2_spanish_uca_ci= { 135,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish_ci", /* name */ "", /* comment */ @@ -8342,7 +8342,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci= CHARSET_INFO my_charset_ucs2_swedish_uca_ci= { 136,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_swedish_ci", /* name */ "", /* comment */ @@ -8374,7 +8374,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci= CHARSET_INFO my_charset_ucs2_turkish_uca_ci= { 137,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_turkish_ci", /* name */ "", /* comment */ @@ -8406,7 +8406,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci= CHARSET_INFO my_charset_ucs2_czech_uca_ci= { 138,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_czech_ci", /* name */ "", /* comment */ @@ -8439,7 +8439,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_ci= CHARSET_INFO my_charset_ucs2_danish_uca_ci= { 139,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_danish_ci", /* name */ "", /* comment */ @@ -8471,7 +8471,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ci= CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= { 140,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_lithuanian_ci",/* name */ "", /* comment */ @@ -8503,7 +8503,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= CHARSET_INFO my_charset_ucs2_slovak_uca_ci= { 141,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovak_ci", /* name */ "", /* comment */ @@ -8535,7 +8535,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ci= CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= { 142,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish2_ci", /* name */ "", /* comment */ @@ -8568,7 +8568,7 @@ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= CHARSET_INFO my_charset_ucs2_roman_uca_ci= { 143,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_roman_ci", /* name */ "", /* comment */ @@ -8601,7 +8601,7 @@ CHARSET_INFO my_charset_ucs2_roman_uca_ci= CHARSET_INFO my_charset_ucs2_persian_uca_ci= { 144,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_persian_ci", /* name */ "", /* comment */ @@ -8634,7 +8634,7 @@ CHARSET_INFO my_charset_ucs2_persian_uca_ci= CHARSET_INFO my_charset_ucs2_esperanto_uca_ci= { 145,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_esperanto_ci",/* name */ "", /* comment */ @@ -8667,7 +8667,7 @@ CHARSET_INFO my_charset_ucs2_esperanto_uca_ci= CHARSET_INFO my_charset_ucs2_hungarian_uca_ci= { 146,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_hungarian_ci",/* name */ "", /* comment */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 421bcd2f3cc..1eccefe7e40 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1717,7 +1717,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= CHARSET_INFO my_charset_ucs2_general_ci= { 35,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_general_ci", /* name */ "", /* comment */ @@ -1749,7 +1749,7 @@ CHARSET_INFO my_charset_ucs2_general_ci= CHARSET_INFO my_charset_ucs2_bin= { 90,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_bin", /* name */ "", /* comment */ diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 2afb1930f3d..a6467ffae33 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -4204,7 +4204,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler= CHARSET_INFO my_charset_filename= { 17,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN|MY_CS_NONASCII, "filename", /* cs name */ "filename", /* name */ "", /* comment */ diff --git a/strings/ctype.c b/strings/ctype.c index 17ad1256e74..75d76aceea3 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -405,3 +405,23 @@ my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) } return 1; } + + +/* + Shared function between conf_to_src and mysys. + Check if a 8bit character set is compatible with + ascii on the range 0x00..0x7F. +*/ +my_bool +my_charset_is_ascii_compatible(CHARSET_INFO *cs) +{ + uint i; + if (!cs->tab_to_uni) + return 1; + for (i= 0; i < 128; i++) + { + if (cs->tab_to_uni[i] != i) + return 0; + } + return 1; +} |