summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2016-11-08 20:57:19 +0400
committerAlexander Barkov <bar@mariadb.org>2016-11-08 20:57:19 +0400
commit0259b3cbbe47448beb385a33348af56300004aa6 (patch)
tree56ad0b2aa945a60a9a5ff2934a5931bd7eab1a48 /strings
parent90c5b2f505855c13e0bdc2e73e11fa83983712bd (diff)
downloadmariadb-git-0259b3cbbe47448beb385a33348af56300004aa6.tar.gz
MDEV-11255 LDML: allow defining 2-level UCA collations
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-uca.c187
-rw-r--r--strings/ctype.c2
2 files changed, 106 insertions, 83 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 73a813feed9..79ad8ca7302 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -6542,6 +6542,17 @@ MY_UCA_INFO my_uca_v400=
},
0 /* levelno */
},
+ {
+ 0,
+ NULL,
+ NULL,
+ {
+ 0,
+ NULL,
+ NULL
+ },
+ 1 /* levelno */
+ },
},
/* Logical positions */
@@ -30134,6 +30145,18 @@ MY_UCA_INFO my_uca_v520=
},
0 /* levelno */
},
+
+ {
+ 0x10FFFF, /* maxchar */
+ (uchar *) uca520_length_w2,
+ (uint16 **) uca520_weight_w2,
+ { /* Contractions: */
+ 0, /* nitems */
+ NULL, /* item */
+ NULL /* flags */
+ },
+ 1 /* levelno */
+ },
},
0x0009, /* first_non_ignorable p != ignore */
@@ -31851,6 +31874,25 @@ static int my_strnncoll_uca_multilevel(CHARSET_INFO *cs,
}
+static int
+my_strnncollsp_generic_uca_nopad_multilevel(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ uint num_level= cs->levels_for_order;
+ uint i;
+ for (i= 0; i != num_level; i++)
+ {
+ int ret= my_strnncoll_uca_onelevel(cs, &my_any_uca_scanner_handler,
+ &cs->uca->level[i],
+ s, slen, t, tlen, FALSE);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+
static inline int
my_space_weight(const MY_UCA_WEIGHT_LEVEL *level)
{
@@ -32181,6 +32223,16 @@ my_strnxfrm_uca_onelevel(CHARSET_INFO *cs,
}
+/*
+ Return the minimum possible weight on a level.
+*/
+static uint min_weight_on_level(MY_UCA_WEIGHT_LEVEL *level)
+{
+ DBUG_ASSERT(level->levelno < 2); /* No 3-level NOPAD collations yet */
+ return level->levelno == 0 ? 0x0200 : 0x0020;
+}
+
+
static uchar *
my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
@@ -32194,12 +32246,9 @@ my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
dst, de, &nweights,
src, srclen);
DBUG_ASSERT(dst <= de);
- /*
- Pad with the minimum possible primary weight 0x0200.
- */
- DBUG_ASSERT(level->levelno == 0); /* No multi-level NOPAD collations yet */
+ /* Pad with the minimum possible weight on this level */
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
- dst= my_strnxfrm_uca_padn(dst, de, nweights, 0x0200);
+ dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
DBUG_ASSERT(dst <= de);
my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
return dst;
@@ -32294,7 +32343,12 @@ my_strnxfrm_uca_multilevel(CHARSET_INFO *cs,
{
if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
(flags & (MY_STRXFRM_LEVEL1 << current_level)))
- dst= my_strnxfrm_uca_onelevel(cs, scanner_handler,
+ dst= cs->state & MY_CS_NOPAD ?
+ my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler,
+ &cs->uca->level[current_level],
+ dst, de, nweights,
+ src, srclen, flags) :
+ my_strnxfrm_uca_onelevel(cs, scanner_handler,
&cs->uca->level[current_level],
dst, de, nweights,
src, srclen, flags);
@@ -32970,6 +33024,7 @@ typedef enum
typedef struct my_coll_rules_st
{
uint version; /* Unicode version, e.g. 400 or 520 */
+ uint strength; /* Number of levels */
MY_UCA_INFO *uca; /* Unicode weight data */
size_t nrules; /* Number of rules in the rule array */
size_t mrules; /* Number of allocated rules */
@@ -33251,6 +33306,10 @@ my_coll_parser_scan_setting(MY_COLL_RULE_PARSER *p)
{
rules->shift_after_method= my_shift_method_simple;
}
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[strength 1]")))
+ rules->strength= 1;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[strength 2]")))
+ rules->strength= 2;
else
{
return 0;
@@ -34189,6 +34248,10 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
}
+MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel;
+MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel;
+
+
/*
This function copies an UCS2 collation from
the default Unicode Collation Algorithm (UCA)
@@ -34213,66 +34276,6 @@ create_tailoring(struct charset_info_st *cs,
MY_COLL_RULES rules;
MY_UCA_INFO new_uca, *src_uca= NULL;
int rc= 0;
-
- *loader->error= '\0';
-
- if (!cs->tailoring)
- return 0; /* Ok to add a collation without tailoring */
-
- memset(&rules, 0, sizeof(rules));
- rules.loader= loader;
- rules.uca= cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
- memset(&new_uca, 0, sizeof(new_uca));
-
- /* Parse ICU Collation Customization expression */
- if ((rc= my_coll_rule_parse(&rules,
- cs->tailoring,
- cs->tailoring + strlen(cs->tailoring))))
- goto ex;
-
- if (rules.version == 520) /* Unicode-5.2.0 requested */
- {
- src_uca= &my_uca_v520;
- cs->caseinfo= &my_unicase_unicode520;
- }
- else if (rules.version == 400) /* Unicode-4.0.0 requested */
- {
- src_uca= &my_uca_v400;
- cs->caseinfo= &my_unicase_default;
- }
- else /* No Unicode version specified */
- {
- src_uca= cs->uca ? cs->uca : &my_uca_v400;
- if (!cs->caseinfo)
- cs->caseinfo= &my_unicase_default;
- }
-
- if ((rc= init_weight_level(loader, &rules,
- &new_uca.level[0], &src_uca->level[0])))
- goto ex;
-
- if (!(cs->uca= (MY_UCA_INFO *) (loader->once_alloc)(sizeof(MY_UCA_INFO))))
- {
- rc= 1;
- goto ex;
- }
- cs->uca[0]= new_uca;
-
-ex:
- (loader->free)(rules.rule);
- if (rc != 0 && loader->error[0])
- loader->reporter(ERROR_LEVEL, "%s", loader->error);
- return rc;
-}
-
-static my_bool
-create_tailoring_multilevel(struct charset_info_st *cs,
- MY_CHARSET_LOADER *loader)
-{
- uint num_level= cs->levels_for_order;
- MY_COLL_RULES rules;
- MY_UCA_INFO new_uca, *src_uca= NULL;
- int rc= 0;
uint i;
*loader->error= '\0';
@@ -34307,9 +34310,17 @@ create_tailoring_multilevel(struct charset_info_st *cs,
if (!cs->caseinfo)
cs->caseinfo= &my_unicase_default;
}
+ cs->levels_for_order= rules.strength ? rules.strength : 1;
- for (i= 0; i != num_level; i++)
+ for (i= 0; i != cs->levels_for_order; i++)
{
+ if ((rc= (src_uca->level[i].maxchar == 0)))
+ {
+ my_snprintf(loader->error, sizeof(loader->error) - 1,
+ "%s: no level #%d data for this Unicode version.",
+ cs->name, i + 1);
+ goto ex;
+ }
if ((rc= init_weight_level(loader, &rules,
&new_uca.level[i], &src_uca->level[i])))
goto ex;
@@ -34321,6 +34332,10 @@ create_tailoring_multilevel(struct charset_info_st *cs,
goto ex;
}
cs->uca[0]= new_uca;
+ if (cs->levels_for_order > 1)
+ cs->coll= (cs->state & MY_CS_NOPAD) ?
+ &my_collation_generic_uca_nopad_handler_multilevel :
+ &my_collation_any_uca_handler_multilevel;
ex:
(loader->free)(rules.rule);
@@ -34345,16 +34360,6 @@ my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
return create_tailoring(cs, loader);
}
-static my_bool
-my_coll_init_uca_multilevel(struct charset_info_st *cs,
- MY_CHARSET_LOADER *loader)
-{
- cs->pad_char= ' ';
- cs->ctype= my_charset_utf8_unicode_ci.ctype;
- if (!cs->caseinfo)
- cs->caseinfo= &my_unicase_default;
- return create_tailoring_multilevel(cs, loader);
-}
static int my_strnncoll_any_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
@@ -34489,7 +34494,7 @@ MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler =
MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
{
- my_coll_init_uca_multilevel,
+ my_coll_init_uca,
my_strnncoll_any_uca_multilevel,
my_strnncollsp_any_uca_multilevel,
my_strnxfrm_any_uca_multilevel,
@@ -34503,6 +34508,22 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
};
+MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel =
+{
+ my_coll_init_uca,
+ my_strnncoll_any_uca_multilevel,
+ my_strnncollsp_generic_uca_nopad_multilevel,
+ my_strnxfrm_any_uca_multilevel,
+ my_strnxfrmlen_any_uca_multilevel,
+ my_like_range_generic,
+ my_wildcmp_uca,
+ NULL,
+ my_instr_mb,
+ my_hash_sort_generic_uca_nopad,
+ my_propagate_complex
+};
+
+
#ifdef HAVE_CHARSET_ucs2
/*
UCS2 optimized CHARSET_INFO compatible wrappers.
@@ -35342,7 +35363,7 @@ struct charset_info_st my_charset_ucs2_thai_520_w2=
"ucs2", /* csname */
"ucs2_thai_520_w2", /* name */
"", /* comment */
- "", /* tailoring */
+ "[strength 2]", /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
@@ -36363,7 +36384,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2=
MY_UTF8MB3, /* csname */
MY_UTF8MB3 "_thai_520_w2",/* name */
"", /* comment */
- "", /* tailoring */
+ "[strength 2]", /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
@@ -37275,7 +37296,7 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2=
MY_UTF8MB4, /* csname */
MY_UTF8MB4 "_thai_520_w2", /* name */
"", /* comment */
- "", /* tailoring */
+ "[strength 2]", /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
@@ -38237,7 +38258,7 @@ struct charset_info_st my_charset_utf32_thai_520_w2=
"utf32", /* csname */
"utf32_thai_520_w2",/* name */
"", /* comment */
- "", /* tailoring */
+ "[strength 2]", /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
@@ -39204,7 +39225,7 @@ struct charset_info_st my_charset_utf16_thai_520_w2=
"utf16", /* cs name */
"utf16_thai_520_w2",/* name */
"", /* comment */
- "", /* tailoring */
+ "[strength 2]", /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
diff --git a/strings/ctype.c b/strings/ctype.c
index 7348cbfa1f9..12d511162d7 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -667,6 +667,8 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
case _CS_ST_STRENGTH:
/* 1, 2, 3, 4, 5, or primary, secondary, tertiary, quaternary, identical */
rc= tailoring_append(st, "[strength %.*s]", len, attr);
+ if (len && attr[0] >= '1' && attr[0] <= '9')
+ i->cs.levels_for_order= attr[0] - '0';
break;
case _CS_ST_ALTERNATE: