summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <bar@mysql.com/bar.myoffice.izhnet.ru>2007-08-03 15:25:23 +0500
committerunknown <bar@mysql.com/bar.myoffice.izhnet.ru>2007-08-03 15:25:23 +0500
commit53df09a9a6a99b82e2a8869eb16737a78772b29e (patch)
tree01c0ee2f5244a4d68fc6cdfb6555fa1af4589a8c /strings
parentb307fc4d8fd5380cec948c07550b5ae73624e274 (diff)
downloadmariadb-git-53df09a9a6a99b82e2a8869eb16737a78772b29e.tar.gz
Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
(Regression, caused by a patch for the bug 22646). Problem: when result type of date_format() was changed from binary string to character string, mixing date_format() with a ascii column in CONCAT() stopped to work. Fix: - adding "repertoire" flag into DTCollation class, to mark items which can return only pure ASCII strings. - allow character set conversion from pure ASCII to other character sets. include/m_ctype.h: Defining new flags. Adding new function prototypes. mysql-test/r/ctype_ucs.result: Adding tests. mysql-test/r/ctype_utf8.result: Adding tests. mysql-test/r/func_time.result: Adding tests. mysql-test/t/ctype_ucs.test: Adding tests. mysql-test/t/ctype_utf8.test: Adding tests. mysql-test/t/func_time.test: Adding test. mysys/charset.c: Adding pure ASCII detection when loading a dynamic character set. sql/item.cc: - Moving detection of a Unicode superset into function. - Adding detection of a ASCII subset. - Adding creation of to-ASCII character set convertor when safe_charset_converter() failed and when the argument. repertoire is know to be pure ASCII. sql/item.h: - Adding "repertoire" member into DTCollation class. - Adding "repertoire" argument to constructors. - Adding new methods: set_repertoire_from_charset() set_repertoire_from_value() sql/item_func.cc: Adding "repertoire" argument. sql/item_strfunc.cc: Adding "repertoire" argument. sql/item_timefunc.cc: Initializing the result repertoire taking into account the "is_ascii" flag of the current locale. sql/sql_lex.cc: Detect 7bit strings, return in Lex->text_string_is_7bit. sql/sql_lex.h: Adding new member into LEX structure. Adding new member into Lex_input_stream sql/sql_string.cc: Allow simple copy from pure ASCII to a ASCII-based character set. sql/sql_yacc.yy: Depening on Lex->text_string_is_7bit and character set features, create Item_string with MY_REPERTOIRE_ASCII when it is possible. strings/conf_to_src.c: - Adding printing of the "MY_CS_PUREASCII" flag - Adding printing of copyright strings/ctype-extra.c: Recreating ctype-extra.c: ascii_general_ci and ascii_bin are now marked with MY_CS_PUREASCII flag. strings/ctype.c: Adding new functions.
Diffstat (limited to 'strings')
-rw-r--r--strings/conf_to_src.c33
-rw-r--r--strings/ctype-extra.c7
-rw-r--r--strings/ctype.c86
3 files changed, 119 insertions, 7 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c
index e2ac9846c85..dc2a300a2ec 100644
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
}
+
void dispcset(FILE *f,CHARSET_INFO *cs)
{
fprintf(f,"{\n");
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
- fprintf(f," MY_CS_COMPILED%s%s%s,\n",
- cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
- cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
- is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "");
+ fprintf(f," MY_CS_COMPILED%s%s%s%s,\n",
+ cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
+ cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
+ is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
+ my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
if (cs->name)
{
@@ -243,6 +245,28 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
}
+static void
+fprint_copyright(FILE *file)
+{
+ fprintf(file,
+"/* Copyright (C) 2000-2007 MySQL AB\n"
+"\n"
+" This program is free software; you can redistribute it and/or modify\n"
+" it under the terms of the GNU General Public License as published by\n"
+" the Free Software Foundation; version 2 of the License.\n"
+"\n"
+" This program is distributed in the hope that it will be useful,\n"
+" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
+" GNU General Public License for more details.\n"
+"\n"
+" You should have received a copy of the GNU General Public License\n"
+" along with this program; if not, write to the Free Software\n"
+" Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */\n"
+"\n");
+}
+
+
int
main(int argc, char **argv __attribute__((unused)))
{
@@ -283,6 +307,7 @@ main(int argc, char **argv __attribute__((unused)))
"directory:\n");
fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n");
fprintf(f, "*/\n\n");
+ fprint_copyright(f);
fprintf(f,"#include <my_global.h>\n");
fprintf(f,"#include <m_ctype.h>\n\n");
diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c
index 1c20828ea54..2a7fcbd383e 100644
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@@ -5,7 +5,8 @@
To re-generate, run the following in the strings/ directory:
./conf_to_src ../sql/share/charsets/ > FILE
*/
-/* Copyright (C) 2000-2003 MySQL AB
+
+/* Copyright (C) 2000-2007 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -6721,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
#ifdef HAVE_CHARSET_ascii
{
11,0,0,
- MY_CS_COMPILED|MY_CS_PRIMARY,
+ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
"ascii", /* cset name */
"ascii_general_ci", /* coll name */
"", /* comment */
@@ -7810,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
#ifdef HAVE_CHARSET_ascii
{
65,0,0,
- MY_CS_COMPILED|MY_CS_BINSORT,
+ MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
"ascii", /* cset name */
"ascii_bin", /* coll name */
"", /* comment */
diff --git a/strings/ctype.c b/strings/ctype.c
index e7399c5438b..372a1a8a468 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
my_xml_parser_free(&p);
return rc;
}
+
+
+/*
+ Check repertoire: detect pure ascii strings
+*/
+uint
+my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
+{
+ const char *strend= str + length;
+ if (cs->mbminlen == 1)
+ {
+ for ( ; str < strend; str++)
+ {
+ if (((uchar) *str) > 0x7F)
+ return MY_REPERTOIRE_UNICODE30;
+ }
+ }
+ else
+ {
+ my_wc_t wc;
+ int chlen;
+ for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
+ {
+ if (wc > 0x7F)
+ return MY_REPERTOIRE_UNICODE30;
+ }
+ }
+ return MY_REPERTOIRE_ASCII;
+}
+
+
+/*
+ Detect whether a character set is ASCII compatible.
+
+ Returns TRUE for:
+
+ - all 8bit character sets whose Unicode mapping of 0x7B is '{'
+ (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
+
+ - all multi-byte character sets having mbminlen == 1
+ (ignores ucs2 whose mbminlen is 2)
+
+ TODO:
+
+ When merging to 5.2, this function should be changed
+ to check a new flag MY_CS_NONASCII,
+
+ return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
+
+ This flag was previously added into 5.2 under terms
+ of WL#3759 "Optimize identifier conversion in client-server protocol"
+ especially to mark character sets not compatible with ASCII.
+
+ We won't backport this flag to 5.0 or 5.1.
+ This function is Ok for 5.0 and 5.1, because we're not going
+ to introduce new tricky character sets between 5.0 and 5.2.
+*/
+my_bool
+my_charset_is_ascii_based(CHARSET_INFO *cs)
+{
+ return
+ (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
+ (cs->mbminlen == 1 && cs->mbmaxlen > 1);
+}
+
+
+/*
+ Detect if a character set is 8bit,
+ and it is pure ascii, i.e. doesn't have
+ characters outside U+0000..U+007F
+ This functions is shared between "conf_to_src"
+ and dynamic charsets loader in "mysqld".
+*/
+my_bool
+my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
+{
+ size_t code;
+ if (!cs->tab_to_uni)
+ return 0;
+ for (code= 0; code < 256; code++)
+ {
+ if (cs->tab_to_uni[code] > 0x7F)
+ return 0;
+ }
+ return 1;
+}