From 8994fad85db18b4ab31fc67e2f8e15f1203d0b1a Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 24 Feb 2010 13:15:34 +0400 Subject: Backporting WL#1213 config/ac-macros/character_sets.m4: - Adding configure definitions for utf8mb4, utf16, utf32 include/config-win.h: - Enabling utf8mb4, utf16, utf32 in Windows build include/m_ctype.h: - Adding new flags - Adding new shared functions prototypes mysql-test/include/ctype_datetime.inc: - Adding test to check that datetime functions work with "real" multibyte character sets. mysql-test/include/ctype_like.inc: - Adding LIKE tests mysql-test/include/have_utf16.inc: New file mysql-test/include/have_utf32.inc: New file mysql-test/include/have_utf8mb4.inc: New file mysql-test/r/ctype_ldml.result: - Adding tests for utf8mb4, utf16, utf32 mysql-test/r/ctype_many.result: - Adding tests to check superset/subset relations between all Unicode character sets. mysql-test/r/ctype_utf16.result: New file mysql-test/r/ctype_utf16_uca.result: New file mysql-test/r/ctype_utf32.result: New file mysql-test/r/ctype_utf32_uca.result: New file mysql-test/r/ctype_utf8.result: - Adding tests for utf8mn3 alias mysql-test/r/ctype_utf8mb4.result: - Adding tests for utf8mb4 mysql-test/r/have_utf16.require: New file mysql-test/r/have_utf32.require: New file mysql-test/r/have_utf8mb4.require: New file mysql-test/std_data/Index.xml: - Adding tests for loadable utf8m4, utf16, utf32 collations mysql-test/suite/sys_vars/r/character_set_client_basic.result: - Adding tests for utf16, utf32. - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_connection_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_database_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_results_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/t/character_set_client_basic.test: - Adding tests for new character sets mysql-test/suite/sys_vars/t/character_set_connection_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_database_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_results_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/t/ctype_ldml.test: - Adding tests for dynamic utf8mb4, utf16, utf32 collations mysql-test/t/ctype_many.test: - Adding tests to check superset/subset relations between all Unicode character sets mysql-test/t/ctype_utf16.test: New file mysql-test/t/ctype_utf16_uca.test: New file mysql-test/t/ctype_utf32.test: New file mysql-test/t/ctype_utf32_uca.test: New file mysql-test/t/ctype_utf8.test: - Adding tests for utf8mb4 alias mysql-test/t/ctype_utf8mb4.test: New file mysys/charset-def.c: - Adding initialization of utf8mb4, utf16, utf32 built-int collations mysys/charset.c: - Adding initialization of utf8mb4, utf16, utf32 dynamic collations sql/field.cc: - Fixing "truncated" error with datetime functions: Force conversion in case of non-ascii character sets. sql/item.cc: - Adding superset/subset relation check for utf8mb4/utf8 sql/item_strfunc.cc: - Fixing a problem with CHAR(x USING utf32) sql/sql_string.cc: - Fixing problems with zero padding for UTF32 sql/sql_table.cc: - Fixing buffer size, to make utf32 comma fit. strings/ctype-mb.c: - Making handlers for multi-byte binary collations public strings/ctype-uca.c: - Adding definitions for utf8mb4, utf16, utf32 UCA collations strings/ctype-ucs2.c: - Adding functions which are shared between ucs2, utf16, utf32 - Ading utf16 implementation - Adding utf32 implementation strings/ctype-utf8.c: - Adding functions shared between utf8 and utf8mb4 - Adding implementation of utf8mb4 --- include/config-win.h | 3 +++ include/m_ctype.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/config-win.h b/include/config-win.h index 57d4ed26307..269ec0e925a 100644 --- a/include/config-win.h +++ b/include/config-win.h @@ -432,6 +432,9 @@ inline ulonglong double2ulonglong(double d) #define HAVE_CHARSET_ucs2 1 #define HAVE_CHARSET_ujis 1 #define HAVE_CHARSET_utf8 1 +#define HAVE_CHARSET_utf8mb4 1 +#define HAVE_CHARSET_utf16 1 +#define HAVE_CHARSET_utf32 1 #define HAVE_UCA_COLLATIONS 1 #define HAVE_BOOL 1 diff --git a/include/m_ctype.h b/include/m_ctype.h index 7cf5ce113f8..d97c0c87b6e 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -98,13 +98,14 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_CS_BINSORT 16 /* if binary sort order */ #define MY_CS_PRIMARY 32 /* if primary collation */ #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */ -#define MY_CS_UNICODE 128 /* is a charset is full unicode */ +#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */ #define MY_CS_READY 256 /* if a charset is initialized */ #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/ #define MY_CS_CSSORT 1024 /* if case sensitive sort order */ #define MY_CS_HIDDEN 2048 /* don't display in SHOW */ #define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */ #define MY_CS_NONASCII 8192 /* if not ASCII-compatible */ +#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */ #define MY_CHARSET_UNDEFINED 0 /* Character repertoire flags */ @@ -112,7 +113,6 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */ #define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */ - typedef struct my_uni_idx_st { uint16 from; @@ -304,10 +304,14 @@ typedef struct charset_info_st extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin; +extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1; +extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename; + extern CHARSET_INFO my_charset_big5_chinese_ci; extern CHARSET_INFO my_charset_big5_bin; extern CHARSET_INFO my_charset_cp932_japanese_ci; extern CHARSET_INFO my_charset_cp932_bin; +extern CHARSET_INFO my_charset_cp1250_czech_ci; extern CHARSET_INFO my_charset_eucjpms_japanese_ci; extern CHARSET_INFO my_charset_eucjpms_bin; extern CHARSET_INFO my_charset_euckr_korean_ci; @@ -316,7 +320,6 @@ extern CHARSET_INFO my_charset_gb2312_chinese_ci; extern CHARSET_INFO my_charset_gb2312_bin; extern CHARSET_INFO my_charset_gbk_chinese_ci; extern CHARSET_INFO my_charset_gbk_bin; -extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1; extern CHARSET_INFO my_charset_latin1_german2_ci; extern CHARSET_INFO my_charset_latin1_bin; extern CHARSET_INFO my_charset_latin2_czech_ci; @@ -329,11 +332,22 @@ extern CHARSET_INFO my_charset_ucs2_bin; extern CHARSET_INFO my_charset_ucs2_unicode_ci; extern CHARSET_INFO my_charset_ujis_japanese_ci; extern CHARSET_INFO my_charset_ujis_bin; +extern CHARSET_INFO my_charset_utf16_bin; +extern CHARSET_INFO my_charset_utf16_general_ci; +extern CHARSET_INFO my_charset_utf16_unicode_ci; +extern CHARSET_INFO my_charset_utf32_bin; +extern CHARSET_INFO my_charset_utf32_general_ci; +extern CHARSET_INFO my_charset_utf32_unicode_ci; + extern CHARSET_INFO my_charset_utf8_general_ci; extern CHARSET_INFO my_charset_utf8_unicode_ci; extern CHARSET_INFO my_charset_utf8_bin; -extern CHARSET_INFO my_charset_cp1250_czech_ci; -extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename; +extern CHARSET_INFO my_charset_utf8mb4_bin; +extern CHARSET_INFO my_charset_utf8mb4_general_ci; +extern CHARSET_INFO my_charset_utf8mb4_unicode_ci; +#define MY_UTF8MB3 "utf8" +#define MY_UTF8MB4 "utf8mb4" + /* declarations for simple charsets */ extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t, @@ -430,6 +444,19 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, char *min_str, char *max_str, size_t *min_length, size_t *max_length); +my_bool my_like_range_utf16(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str, char *max_str, + size_t *min_length, size_t *max_length); + +my_bool my_like_range_utf32(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str, char *max_str, + size_t *min_length, size_t *max_length); int my_wildcmp_8bit(CHARSET_INFO *, const char *str,const char *str_end, @@ -480,6 +507,31 @@ uint my_instr_mb(struct charset_info_st *, const char *s, size_t s_length, my_match_t *match, uint nmatch); +int my_strnncoll_mb_bin(CHARSET_INFO * cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix); + +int my_strnncollsp_mb_bin(CHARSET_INFO *cs, + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + my_bool diff_if_only_endspace_difference); + +int my_wildcmp_mb_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many); + +int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const char *s, const char *t); + +void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, size_t len,ulong *nr1, ulong *nr2); + +size_t my_strnxfrm_unicode(CHARSET_INFO *, + uchar *dst, size_t dstlen, + const uchar *src, size_t srclen); + int my_wildcmp_unicode(CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, -- cgit v1.2.1