diff options
author | Alexander Barkov <bar@mariadb.org> | 2014-09-04 21:58:48 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2014-09-04 21:58:48 +0400 |
commit | 9392d0e280c622c56d1b533762d8b577ed5b82c6 (patch) | |
tree | 11dc8978b370ead259ae9eaa419e2a2710feaf73 /strings | |
parent | bf4347eba07a7e8f11af07a684381d48d673e028 (diff) | |
download | mariadb-git-9392d0e280c622c56d1b533762d8b577ed5b82c6.tar.gz |
- MDEV-6695 Bad column name for UCS2 string literals
The Item_string constructors called set_name() on the source string,
which was wrong because in case of UCS2/UTF16/UTF32 the source value
might be a not well formed string (e.g. have incomplete leftmost character).
Now set_name() is called on str_value after its copied
(with optionally left zero padding) from the source string.
- MDEV-6694 Illegal mix of collation with a PS parameter
Item_param::convert_str_value() did not set repertoire.
Introducing a new structure MY_STRING_METADATA to collect
character length and repertoire of a string in a single loop,
to avoid two separate loops. Adding a new class Item_basic_value::Metadata
as a convenience wrapper around MY_STRING_METADATA, to reuse the
code between Item_string and Item_param.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype.c | 93 |
1 files changed, 86 insertions, 7 deletions
diff --git a/strings/ctype.c b/strings/ctype.c index 566dc0331c4..ba130078153 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -818,23 +818,102 @@ my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len) } +uint +my_string_repertoire_8bit(CHARSET_INFO *cs, const char *str, ulong length) +{ + const char *strend; + if ((cs->state & MY_CS_NONASCII) && length > 0) + return MY_REPERTOIRE_UNICODE30; + for (strend= str + length; str < strend; str++) + { + if (((uchar) *str) > 0x7F) + return MY_REPERTOIRE_UNICODE30; + } + return MY_REPERTOIRE_ASCII; +} + + +static void +my_string_metadata_init(MY_STRING_METADATA *metadata) +{ + metadata->repertoire= MY_REPERTOIRE_ASCII; + metadata->char_length= 0; +} + + +/** + This should probably eventually go as a virtual function into + MY_CHARSET_HANDLER or MY_COLLATION_HANDLER. +*/ +static void +my_string_metadata_get_mb(MY_STRING_METADATA *metadata, + CHARSET_INFO *cs, const char *str, ulong length) +{ + const char *strend= str + length; + for (my_string_metadata_init(metadata) ; + str < strend; + metadata->char_length++) + { + my_wc_t wc; + int mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) str, + (const uchar *) strend); + if (mblen > 0) /* Assigned character */ + { + if (wc > 0x7F) + metadata->repertoire|= MY_REPERTOIRE_EXTENDED; + str+= mblen; + } + else if (mblen == MY_CS_ILSEQ) /* Bad byte sequence */ + { + metadata->repertoire|= MY_REPERTOIRE_EXTENDED; + str++; + } + else if (mblen > MY_CS_TOOSMALL) /* Unassigned character */ + { + metadata->repertoire|= MY_REPERTOIRE_EXTENDED; + str+= (-mblen); + } + else /* Incomplete character, premature end-of-line */ + { + metadata->repertoire|= MY_REPERTOIRE_EXTENDED; /* Just in case */ + break; + } + } +} + + +/** + Collect string metadata: length in characters and repertoire. +*/ +void +my_string_metadata_get(MY_STRING_METADATA *metadata, + CHARSET_INFO *cs, const char *str, ulong length) +{ + if (cs->mbmaxlen == 1 && !(cs->state & MY_CS_NONASCII)) + { + metadata->char_length= length; + metadata->repertoire= my_string_repertoire_8bit(cs, str, length); + } + else + { + my_string_metadata_get_mb(metadata, cs, str, length); + } +} + + /* Check repertoire: detect pure ascii strings */ uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length) { - const char *strend= str + length; - if (cs->mbminlen == 1) + if (cs->mbminlen == 1 && !(cs->state & MY_CS_NONASCII)) { - for ( ; str < strend; str++) - { - if (((uchar) *str) > 0x7F) - return MY_REPERTOIRE_UNICODE30; - } + return my_string_repertoire_8bit(cs, str, length); } else { + const char *strend= str + length; my_wc_t wc; int chlen; for (; |