summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2016-09-06 12:37:11 +0400
committerAlexander Barkov <bar@mariadb.org>2016-09-06 12:37:11 +0400
commite4f6fd5e1252f8c68f449fe820bae88c18bca8f5 (patch)
tree18830c670cfe3020e72565220096cdf4290d8858 /strings
parent8ae65920fabddb9157cddb547cf914b4a63539bf (diff)
downloadmariadb-git-e4f6fd5e1252f8c68f449fe820bae88c18bca8f5.tar.gz
MDEV-10743 LDML: a new syntax to reuse sort order from another 8bit simple collation
Diffstat (limited to 'strings')
-rw-r--r--strings/conf_to_src.c128
-rw-r--r--strings/ctype-simple.c4
-rw-r--r--strings/ctype.c8
3 files changed, 118 insertions, 22 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c
index beb62d35693..1382ec69a0a 100644
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@@ -119,7 +119,10 @@ static void simple_cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from)
if (from->name)
to->name= strdup(from->name);
-
+
+ if (from->tailoring)
+ to->tailoring= strdup(from->tailoring);
+
if (from->ctype)
to->ctype= (uchar*) mdup((char*) from->ctype, MY_CS_CTYPE_TABLE_SIZE);
if (from->to_lower)
@@ -144,30 +147,60 @@ static void simple_cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from)
}
-static void inherit_data(struct charset_info_st *cs, CHARSET_INFO *refcs)
+/*
+ cs->xxx arrays can be NULL in case when a collation has an entry only
+ in Index.xml and has no entry in csname.xml (e.g. in case of a binary
+ collation or a collation using <import> command).
+
+ refcs->xxx arrays can be NULL if <import> refers to a collation
+ which is not defined in csname.xml, e.g. an always compiled collation
+ such as latin1_swedish_ci.
+*/
+static void inherit_charset_data(struct charset_info_st *cs,
+ CHARSET_INFO *refcs)
{
- if (refcs->ctype &&
+ cs->state|= (refcs->state & (MY_CS_PUREASCII|MY_CS_NONASCII));
+ if (refcs->ctype && cs->ctype &&
!memcmp(cs->ctype, refcs->ctype, MY_CS_CTYPE_TABLE_SIZE))
cs->ctype= NULL;
- if (refcs->to_lower &&
+ if (refcs->to_lower && cs->to_lower &&
!memcmp(cs->to_lower, refcs->to_lower, MY_CS_TO_LOWER_TABLE_SIZE))
cs->to_lower= NULL;
- if (refcs->to_upper &&
+ if (refcs->to_upper && cs->to_upper &&
!memcmp(cs->to_upper, refcs->to_upper, MY_CS_TO_LOWER_TABLE_SIZE))
cs->to_upper= NULL;
- if (refcs->tab_to_uni &&
+ if (refcs->tab_to_uni && cs->tab_to_uni &&
!memcmp(cs->tab_to_uni, refcs->tab_to_uni,
MY_CS_TO_UNI_TABLE_SIZE * sizeof(uint16)))
cs->tab_to_uni= NULL;
}
+static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs)
+{
+ CHARSET_INFO *refcs;
+ uint refid= get_charset_number_internal(cs->csname, MY_CS_PRIMARY);
+ return refid && refid != cs->number &&
+ (refcs= &all_charsets[refid]) &&
+ (refcs->state & MY_CS_LOADED) ? refcs : NULL;
+}
+
+
+/**
+ Detect if "cs" needs further loading from csname.xml
+ @param cs - the character set pointer
+ @retval FALSE - if the current data (e.g. loaded from from Index.xml)
+ is not enough to dump the character set and requires
+ further reading from the csname.xml file.
+ @retval TRUE - if the current data is enough to dump,
+ no reading of csname.xml is needed.
+*/
static my_bool simple_cs_is_full(CHARSET_INFO *cs)
{
return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
cs->to_lower) &&
(cs->number && cs->name &&
- (cs->sort_order || (cs->state & MY_CS_BINSORT))));
+ (cs->sort_order || cs->tailoring || (cs->state & MY_CS_BINSORT))));
}
static int add_collation(struct charset_info_st *cs)
@@ -183,6 +216,7 @@ static int add_collation(struct charset_info_st *cs)
cs->number= 0;
cs->name= NULL;
+ cs->tailoring= NULL;
cs->state= 0;
cs->sort_order= NULL;
cs->state= 0;
@@ -255,6 +289,55 @@ void print_arrays(FILE *f, CHARSET_INFO *cs)
}
+/**
+ Print an array member of a CHARSET_INFO.
+ @param f - the file to print into
+ @param cs0 - reference to the CHARSET_INFO to print
+ @param array0 - pointer to the array data (can be NULL)
+ @param cs1 - reference to the CHARSET_INFO that the data
+ can be inherited from (e.g. primary collation)
+ @param array1 - pointer to the array data in cs1 (can be NULL)
+ @param name - name of the member
+
+ If array0 is not null, then the CHARSET_INFO being dumped has its
+ own array (e.g. the default collation for the character set).
+ We print the name of this array using cs0->name and return.
+
+ If array1 is not null, then the CHARSET_INFO being dumpled reuses
+ the array from another collation. We print the name of the array of
+ the referenced collation using cs1->name and return.
+
+ Otherwise (if both array0 and array1 are NULL), we have a collation
+ of a character set whose primary collation is not available now,
+ and which does not have its own entry in csname.xml file.
+
+ For example, Index.xml has this entry:
+ <collation name="latin1_swedish_ci_copy">
+ <rules>
+ <import source="latin1_swedish_ci"/>
+ </rules>
+ </collation>
+ and latin1.xml does not have entries for latin1_swedish_ci_copy.
+
+ In such cases we print NULL as a pointer to the array.
+ It will be set to a not-null data during the first initialization
+ by the inherit_charset_data() call (see mysys/charset.c for details).
+*/
+static void
+print_array_ref(FILE *f,
+ CHARSET_INFO *cs0, const void *array0,
+ CHARSET_INFO *cs1, const void *array1,
+ const char *name)
+{
+ CHARSET_INFO *cs= array0 ? cs0 : array1 ? cs1 : NULL;
+ if (cs)
+ fprintf(f," %s_%s, /* %s */\n",
+ name, cs->name, name);
+ else
+ fprintf(f," NULL, /* %s */\n", name);
+}
+
+
void dispcset(FILE *f,CHARSET_INFO *cs)
{
fprintf(f,"{\n");
@@ -272,21 +355,23 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
fprintf(f," \"%s\", /* cset name */\n",cs->csname);
fprintf(f," \"%s\", /* coll name */\n",cs->name);
fprintf(f," \"\", /* comment */\n");
- fprintf(f," NULL, /* tailoring */\n");
-
- fprintf(f," ctype_%s, /* ctype */\n",
- cs->ctype ? cs->name : srccs->name);
- fprintf(f," to_lower_%s, /* lower */\n",
- cs->to_lower ? cs->name : srccs->name);
- fprintf(f," to_upper_%s, /* upper */\n",
- cs->to_upper ? cs->name : srccs->name);
+ if (cs->tailoring)
+ fprintf(f, " \"%s\", /* tailoring */\n", cs->tailoring);
+ else
+ fprintf(f," NULL, /* tailoring */\n");
+
+ print_array_ref(f, cs, cs->ctype, srccs, srccs->ctype, "ctype");
+ print_array_ref(f, cs, cs->to_lower, srccs, srccs->to_lower, "to_lower");
+ print_array_ref(f, cs, cs->to_upper, srccs, srccs->to_upper, "to_upper");
+
if (cs->sort_order)
fprintf(f," sort_order_%s, /* sort_order */\n",cs->name);
else
fprintf(f," NULL, /* sort_order */\n");
+
fprintf(f," NULL, /* uca */\n");
- fprintf(f," to_uni_%s, /* to_uni */\n",
- cs->tab_to_uni ? cs->name : srccs->name);
+
+ print_array_ref(f, cs, cs->tab_to_uni, srccs, srccs->tab_to_uni, "to_uni");
}
else
{
@@ -403,14 +488,13 @@ main(int argc, char **argv __attribute__((unused)))
{
if (cs->state & MY_CS_LOADED)
{
- uint refid= get_charset_number_internal(cs->csname, MY_CS_PRIMARY);
+ CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs);
cs->state|= my_8bit_charset_flags_from_data(cs) |
my_8bit_collation_flags_from_data(cs);
- if (refid && cs->number != refid)
+ if (refcs)
{
- CHARSET_INFO *refcs= &all_charsets[refid];
- refids[cs->number]= refid;
- inherit_data(cs, refcs);
+ refids[cs->number]= refcs->number;
+ inherit_charset_data(cs, refcs);
}
fprintf(f,"#ifdef HAVE_CHARSET_%s\n",cs->csname);
print_arrays(f, cs);
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index e6cea06b17b..f02d96aa798 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1417,6 +1417,8 @@ my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
cs->caseup_multiply= 1;
cs->casedn_multiply= 1;
cs->pad_char= ' ';
+ if (!cs->to_lower || !cs->to_upper || !cs->ctype || !cs->tab_to_uni)
+ return TRUE;
return create_fromuni(cs, loader);
}
@@ -1442,6 +1444,8 @@ static void set_max_sort_char(struct charset_info_st *cs)
static my_bool my_coll_init_simple(struct charset_info_st *cs,
MY_CHARSET_LOADER *loader __attribute__((unused)))
{
+ if (!cs->sort_order)
+ return TRUE;
cs->state|= my_8bit_collation_flags_from_data(cs);
set_max_sort_char(cs);
return FALSE;
diff --git a/strings/ctype.c b/strings/ctype.c
index be8a8cb506e..2764a327bb9 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -88,6 +88,8 @@ struct my_cs_file_section_st
#define _CS_CL_SUPPRESS_CONTRACTIONS 101
#define _CS_CL_OPTIMIZE 102
#define _CS_CL_SHIFT_AFTER_METHOD 103
+#define _CS_CL_RULES_IMPORT 104
+#define _CS_CL_RULES_IMPORT_SOURCE 105
/* Collation Settings */
@@ -188,6 +190,8 @@ static const struct my_cs_file_section_st sec[] =
{_CS_CL_SUPPRESS_CONTRACTIONS, "charsets/charset/collation/suppress_contractions"},
{_CS_CL_OPTIMIZE, "charsets/charset/collation/optimize"},
{_CS_CL_SHIFT_AFTER_METHOD, "charsets/charset/collation/shift-after-method"},
+ {_CS_CL_RULES_IMPORT, "charsets/charset/collation/rules/import"},
+ {_CS_CL_RULES_IMPORT_SOURCE, "charsets/charset/collation/rules/import/source"},
/* Collation Settings */
{_CS_ST_SETTINGS, "charsets/charset/collation/settings"},
@@ -641,6 +645,10 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
rc= tailoring_append(st, "[version %.*s]", len, attr);
break;
+ case _CS_CL_RULES_IMPORT_SOURCE:
+ rc= tailoring_append(st, "[import %.*s]", len, attr);
+ break;
+
case _CS_CL_SUPPRESS_CONTRACTIONS:
rc= tailoring_append(st, "[suppress contractions %.*s]", len, attr);
break;