summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2014-09-04 21:58:48 +0400
committerAlexander Barkov <bar@mariadb.org>2014-09-04 21:58:48 +0400
commit9392d0e280c622c56d1b533762d8b577ed5b82c6 (patch)
tree11dc8978b370ead259ae9eaa419e2a2710feaf73 /strings
parentbf4347eba07a7e8f11af07a684381d48d673e028 (diff)
downloadmariadb-git-9392d0e280c622c56d1b533762d8b577ed5b82c6.tar.gz
- MDEV-6695 Bad column name for UCS2 string literals
The Item_string constructors called set_name() on the source string, which was wrong because in case of UCS2/UTF16/UTF32 the source value might be a not well formed string (e.g. have incomplete leftmost character). Now set_name() is called on str_value after its copied (with optionally left zero padding) from the source string. - MDEV-6694 Illegal mix of collation with a PS parameter Item_param::convert_str_value() did not set repertoire. Introducing a new structure MY_STRING_METADATA to collect character length and repertoire of a string in a single loop, to avoid two separate loops. Adding a new class Item_basic_value::Metadata as a convenience wrapper around MY_STRING_METADATA, to reuse the code between Item_string and Item_param.
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype.c93
1 files changed, 86 insertions, 7 deletions
diff --git a/strings/ctype.c b/strings/ctype.c
index 566dc0331c4..ba130078153 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -818,23 +818,102 @@ my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len)
}
+uint
+my_string_repertoire_8bit(CHARSET_INFO *cs, const char *str, ulong length)
+{
+ const char *strend;
+ if ((cs->state & MY_CS_NONASCII) && length > 0)
+ return MY_REPERTOIRE_UNICODE30;
+ for (strend= str + length; str < strend; str++)
+ {
+ if (((uchar) *str) > 0x7F)
+ return MY_REPERTOIRE_UNICODE30;
+ }
+ return MY_REPERTOIRE_ASCII;
+}
+
+
+static void
+my_string_metadata_init(MY_STRING_METADATA *metadata)
+{
+ metadata->repertoire= MY_REPERTOIRE_ASCII;
+ metadata->char_length= 0;
+}
+
+
+/**
+ This should probably eventually go as a virtual function into
+ MY_CHARSET_HANDLER or MY_COLLATION_HANDLER.
+*/
+static void
+my_string_metadata_get_mb(MY_STRING_METADATA *metadata,
+ CHARSET_INFO *cs, const char *str, ulong length)
+{
+ const char *strend= str + length;
+ for (my_string_metadata_init(metadata) ;
+ str < strend;
+ metadata->char_length++)
+ {
+ my_wc_t wc;
+ int mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) str,
+ (const uchar *) strend);
+ if (mblen > 0) /* Assigned character */
+ {
+ if (wc > 0x7F)
+ metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
+ str+= mblen;
+ }
+ else if (mblen == MY_CS_ILSEQ) /* Bad byte sequence */
+ {
+ metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
+ str++;
+ }
+ else if (mblen > MY_CS_TOOSMALL) /* Unassigned character */
+ {
+ metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
+ str+= (-mblen);
+ }
+ else /* Incomplete character, premature end-of-line */
+ {
+ metadata->repertoire|= MY_REPERTOIRE_EXTENDED; /* Just in case */
+ break;
+ }
+ }
+}
+
+
+/**
+ Collect string metadata: length in characters and repertoire.
+*/
+void
+my_string_metadata_get(MY_STRING_METADATA *metadata,
+ CHARSET_INFO *cs, const char *str, ulong length)
+{
+ if (cs->mbmaxlen == 1 && !(cs->state & MY_CS_NONASCII))
+ {
+ metadata->char_length= length;
+ metadata->repertoire= my_string_repertoire_8bit(cs, str, length);
+ }
+ else
+ {
+ my_string_metadata_get_mb(metadata, cs, str, length);
+ }
+}
+
+
/*
Check repertoire: detect pure ascii strings
*/
uint
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
{
- const char *strend= str + length;
- if (cs->mbminlen == 1)
+ if (cs->mbminlen == 1 && !(cs->state & MY_CS_NONASCII))
{
- for ( ; str < strend; str++)
- {
- if (((uchar) *str) > 0x7F)
- return MY_REPERTOIRE_UNICODE30;
- }
+ return my_string_repertoire_8bit(cs, str, length);
}
else
{
+ const char *strend= str + length;
my_wc_t wc;
int chlen;
for (;