diff options
author | unknown <bar@mysql.com/bar.myoffice.izhnet.ru> | 2007-08-03 15:25:23 +0500 |
---|---|---|
committer | unknown <bar@mysql.com/bar.myoffice.izhnet.ru> | 2007-08-03 15:25:23 +0500 |
commit | 53df09a9a6a99b82e2a8869eb16737a78772b29e (patch) | |
tree | 01c0ee2f5244a4d68fc6cdfb6555fa1af4589a8c /strings | |
parent | b307fc4d8fd5380cec948c07550b5ae73624e274 (diff) | |
download | mariadb-git-53df09a9a6a99b82e2a8869eb16737a78772b29e.tar.gz |
Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
(Regression, caused by a patch for the bug 22646).
Problem: when result type of date_format() was changed from
binary string to character string, mixing date_format()
with a ascii column in CONCAT() stopped to work.
Fix:
- adding "repertoire" flag into DTCollation class,
to mark items which can return only pure ASCII strings.
- allow character set conversion from pure ASCII to other character sets.
include/m_ctype.h:
Defining new flags.
Adding new function prototypes.
mysql-test/r/ctype_ucs.result:
Adding tests.
mysql-test/r/ctype_utf8.result:
Adding tests.
mysql-test/r/func_time.result:
Adding tests.
mysql-test/t/ctype_ucs.test:
Adding tests.
mysql-test/t/ctype_utf8.test:
Adding tests.
mysql-test/t/func_time.test:
Adding test.
mysys/charset.c:
Adding pure ASCII detection when loading a dynamic character set.
sql/item.cc:
- Moving detection of a Unicode superset into function.
- Adding detection of a ASCII subset.
- Adding creation of to-ASCII character set convertor when
safe_charset_converter() failed and when the argument.
repertoire is know to be pure ASCII.
sql/item.h:
- Adding "repertoire" member into DTCollation class.
- Adding "repertoire" argument to constructors.
- Adding new methods:
set_repertoire_from_charset()
set_repertoire_from_value()
sql/item_func.cc:
Adding "repertoire" argument.
sql/item_strfunc.cc:
Adding "repertoire" argument.
sql/item_timefunc.cc:
Initializing the result repertoire taking into account the "is_ascii"
flag of the current locale.
sql/sql_lex.cc:
Detect 7bit strings, return in Lex->text_string_is_7bit.
sql/sql_lex.h:
Adding new member into LEX structure.
Adding new member into Lex_input_stream
sql/sql_string.cc:
Allow simple copy from pure ASCII to a ASCII-based character set.
sql/sql_yacc.yy:
Depening on Lex->text_string_is_7bit and character set features,
create Item_string with MY_REPERTOIRE_ASCII when it is possible.
strings/conf_to_src.c:
- Adding printing of the "MY_CS_PUREASCII" flag
- Adding printing of copyright
strings/ctype-extra.c:
Recreating ctype-extra.c: ascii_general_ci and ascii_bin
are now marked with MY_CS_PUREASCII flag.
strings/ctype.c:
Adding new functions.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/conf_to_src.c | 33 | ||||
-rw-r--r-- | strings/ctype-extra.c | 7 | ||||
-rw-r--r-- | strings/ctype.c | 86 |
3 files changed, 119 insertions, 7 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index e2ac9846c85..dc2a300a2ec 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs) cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0; } + void dispcset(FILE *f,CHARSET_INFO *cs) { fprintf(f,"{\n"); fprintf(f," %d,%d,%d,\n",cs->number,0,0); - fprintf(f," MY_CS_COMPILED%s%s%s,\n", - cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", - cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", - is_case_sensitive(cs) ? "|MY_CS_CSSORT" : ""); + fprintf(f," MY_CS_COMPILED%s%s%s%s,\n", + cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", + cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", + is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "", + my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : ""); if (cs->name) { @@ -243,6 +245,28 @@ void dispcset(FILE *f,CHARSET_INFO *cs) } +static void +fprint_copyright(FILE *file) +{ + fprintf(file, +"/* Copyright (C) 2000-2007 MySQL AB\n" +"\n" +" This program is free software; you can redistribute it and/or modify\n" +" it under the terms of the GNU General Public License as published by\n" +" the Free Software Foundation; version 2 of the License.\n" +"\n" +" This program is distributed in the hope that it will be useful,\n" +" but WITHOUT ANY WARRANTY; without even the implied warranty of\n" +" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" +" GNU General Public License for more details.\n" +"\n" +" You should have received a copy of the GNU General Public License\n" +" along with this program; if not, write to the Free Software\n" +" Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */\n" +"\n"); +} + + int main(int argc, char **argv __attribute__((unused))) { @@ -283,6 +307,7 @@ main(int argc, char **argv __attribute__((unused))) "directory:\n"); fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n"); fprintf(f, "*/\n\n"); + fprint_copyright(f); fprintf(f,"#include <my_global.h>\n"); fprintf(f,"#include <m_ctype.h>\n\n"); diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index 1c20828ea54..2a7fcbd383e 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -5,7 +5,8 @@ To re-generate, run the following in the strings/ directory: ./conf_to_src ../sql/share/charsets/ > FILE */ -/* Copyright (C) 2000-2003 MySQL AB + +/* Copyright (C) 2000-2007 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -6721,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_ascii { 11,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII, "ascii", /* cset name */ "ascii_general_ci", /* coll name */ "", /* comment */ @@ -7810,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_ascii { 65,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII, "ascii", /* cset name */ "ascii_bin", /* coll name */ "", /* comment */ diff --git a/strings/ctype.c b/strings/ctype.c index e7399c5438b..372a1a8a468 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len, my_xml_parser_free(&p); return rc; } + + +/* + Check repertoire: detect pure ascii strings +*/ +uint +my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length) +{ + const char *strend= str + length; + if (cs->mbminlen == 1) + { + for ( ; str < strend; str++) + { + if (((uchar) *str) > 0x7F) + return MY_REPERTOIRE_UNICODE30; + } + } + else + { + my_wc_t wc; + int chlen; + for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen) + { + if (wc > 0x7F) + return MY_REPERTOIRE_UNICODE30; + } + } + return MY_REPERTOIRE_ASCII; +} + + +/* + Detect whether a character set is ASCII compatible. + + Returns TRUE for: + + - all 8bit character sets whose Unicode mapping of 0x7B is '{' + (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS") + + - all multi-byte character sets having mbminlen == 1 + (ignores ucs2 whose mbminlen is 2) + + TODO: + + When merging to 5.2, this function should be changed + to check a new flag MY_CS_NONASCII, + + return (cs->flag & MY_CS_NONASCII) ? 0 : 1; + + This flag was previously added into 5.2 under terms + of WL#3759 "Optimize identifier conversion in client-server protocol" + especially to mark character sets not compatible with ASCII. + + We won't backport this flag to 5.0 or 5.1. + This function is Ok for 5.0 and 5.1, because we're not going + to introduce new tricky character sets between 5.0 and 5.2. +*/ +my_bool +my_charset_is_ascii_based(CHARSET_INFO *cs) +{ + return + (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') || + (cs->mbminlen == 1 && cs->mbmaxlen > 1); +} + + +/* + Detect if a character set is 8bit, + and it is pure ascii, i.e. doesn't have + characters outside U+0000..U+007F + This functions is shared between "conf_to_src" + and dynamic charsets loader in "mysqld". +*/ +my_bool +my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) +{ + size_t code; + if (!cs->tab_to_uni) + return 0; + for (code= 0; code < 256; code++) + { + if (cs->tab_to_uni[code] > 0x7F) + return 0; + } + return 1; +} |