summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2016-05-30 16:56:29 +0400
committerAlexander Barkov <bar@mariadb.org>2016-05-30 16:56:29 +0400
commitbc546225c08d46f33bf0630a7755ef568b9ac3cc (patch)
tree7691ddf2c9b80dc44e54c2ea7b4b3a023cac230b /strings
parentc5733e57e73ddf5211c8cb94137db16a96fca51d (diff)
downloadmariadb-git-bc546225c08d46f33bf0630a7755ef568b9ac3cc.tar.gz
Adding collationsmaster
utf8mb4_thai_520_w2, ucs2_thai_520_w2, utf16_thai_520_w2, utf32_thai_520_w2
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-uca.c163
-rw-r--r--strings/ctype-utf8.c36
2 files changed, 181 insertions, 18 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index c49dff6f6a9..8a092e8f085 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -34167,6 +34167,23 @@ static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
}
+
+MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
+{
+ my_coll_init_uca_multilevel,
+ my_strnncoll_any_uca_multilevel,
+ my_strnncollsp_any_uca_multilevel,
+ my_strnxfrm_any_uca_multilevel,
+ my_strnxfrmlen_any_uca_multilevel,
+ my_like_range_generic,
+ my_wildcmp_uca,
+ NULL,
+ my_instr_mb,
+ my_hash_sort_any_uca,
+ my_propagate_complex
+};
+
+
#ifdef HAVE_CHARSET_ucs2
/*
UCS2 optimized CHARSET_INFO compatible wrappers.
@@ -35000,6 +35017,38 @@ struct charset_info_st my_charset_ucs2_myanmar_uca_ci=
};
+struct charset_info_st my_charset_ucs2_thai_520_w2=
+{
+ MY_PAGE2_COLLATION_ID_UCS2+2,0,0, /* number */
+ MY_CS_UCS2_UCA_FLAGS,/* flags */
+ "ucs2", /* csname */
+ "ucs2_thai_520_w2", /* name */
+ "", /* comment */
+ "", /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ &my_uca_v520_th, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_unicode520,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 4, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 2, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ 2, /* levels_for_order */
+ &my_charset_ucs2_handler,
+ &my_collation_any_uca_handler_multilevel
+};
+
struct charset_info_st my_charset_ucs2_unicode_520_ci=
{
150,0,0, /* number */
@@ -35085,21 +35134,6 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler =
my_propagate_complex
};
-MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
-{
- my_coll_init_uca_multilevel, /* init */
- my_strnncoll_any_uca_multilevel,
- my_strnncollsp_any_uca_multilevel,
- my_strnxfrm_any_uca_multilevel,
- my_strnxfrmlen_any_uca_multilevel,
- my_like_range_mb,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
-
/*
We consider bytes with code more than 127 as a letter.
This garantees that word boundaries work fine with regular
@@ -35941,7 +35975,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2=
MY_PAGE2_COLLATION_ID_UTF8+2,0,0, /* number */
MY_CS_UTF8MB3_UCA_FLAGS,/* flags */
MY_UTF8MB3, /* csname */
- MY_UTF8MB3 "_thai_520_w2",/* name */
+ MY_UTF8MB3 "_thai_520_w2",/* name */
"", /* comment */
"", /* tailoring */
ctype_utf8, /* ctype */
@@ -36782,6 +36816,37 @@ struct charset_info_st my_charset_utf8mb4_myanmar_uca_ci=
&my_collation_any_uca_handler
};
+struct charset_info_st my_charset_utf8mb4_thai_520_w2=
+{
+ MY_PAGE2_COLLATION_ID_UTF8MB4+2,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* flags */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_thai_520_w2", /* name */
+ "", /* comment */
+ "", /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ &my_uca_v520_th, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_unicode520,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 4, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ 2, /* levels_for_order */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler_multilevel
+};
struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
{
@@ -37645,6 +37710,39 @@ struct charset_info_st my_charset_utf32_myanmar_uca_ci=
};
+struct charset_info_st my_charset_utf32_thai_520_w2=
+{
+ MY_PAGE2_COLLATION_ID_UTF32+2,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_thai_520_w2",/* name */
+ "", /* comment */
+ "", /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ &my_uca_v520_th, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_unicode520,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 4, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ 2, /* levels_for_order */
+ &my_charset_utf32_handler,
+ &my_collation_any_uca_handler_multilevel
+};
+
+
struct charset_info_st my_charset_utf32_unicode_520_ci=
{
182,0,0, /* number */
@@ -38510,6 +38608,39 @@ struct charset_info_st my_charset_utf16_myanmar_uca_ci=
};
+struct charset_info_st my_charset_utf16_thai_520_w2=
+{
+ MY_PAGE2_COLLATION_ID_UTF16+2,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_thai_520_w2",/* name */
+ "", /* comment */
+ "", /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ &my_uca_v520_th, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_unicode520,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 4, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ 2, /* levels_for_order */
+ &my_charset_utf16_handler,
+ &my_collation_any_uca_handler_multilevel
+};
+
+
struct charset_info_st my_charset_utf16_unicode_520_ci=
{
123,0,0, /* number */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index c0865157ad5..e4eb2832dff 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5447,6 +5447,38 @@ static uint my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ TODO-10.2: join this with pad_max_char() in ctype-mb.c
+*/
+static void
+my_fill_utf8_mb(CHARSET_INFO *cs, char *str, size_t length, int fill)
+{
+ char *end= str + length;
+ char buf[10];
+ char buflen= cs->cset->native_to_mb(cs, (my_wc_t) fill, (uchar*) buf,
+ (uchar*) buf + sizeof(buf));
+ DBUG_ASSERT(buflen > 0);
+ for ( ; str + buflen <= end ; )
+ {
+ memcpy(str, buf, buflen);
+ str+= buflen;
+ }
+
+ for ( ; str < end; )
+ *str++= ' ';
+}
+
+
+static void
+my_fill_utf8(CHARSET_INFO *cs, char *str, size_t length, int fill)
+{
+ if (fill < 0x80)
+ my_fill_8bit(cs, str, length, fill);
+ else
+ my_fill_utf8_mb(cs, str, length, fill);
+}
+
+
static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
{
NULL, /* init */
@@ -5514,7 +5546,7 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
my_snprintf_8bit,
my_long10_to_str_8bit,
my_longlong10_to_str_8bit,
- my_fill_8bit,
+ my_fill_utf8,
my_strntol_8bit,
my_strntoul_8bit,
my_strntoll_8bit,
@@ -7869,7 +7901,7 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
my_snprintf_8bit,
my_long10_to_str_8bit,
my_longlong10_to_str_8bit,
- my_fill_8bit,
+ my_fill_utf8,
my_strntol_8bit,
my_strntoul_8bit,
my_strntoll_8bit,