diff options
author | Alexander Barkov <bar@mariadb.org> | 2016-09-06 12:37:11 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2016-09-06 12:37:11 +0400 |
commit | e4f6fd5e1252f8c68f449fe820bae88c18bca8f5 (patch) | |
tree | 18830c670cfe3020e72565220096cdf4290d8858 /strings | |
parent | 8ae65920fabddb9157cddb547cf914b4a63539bf (diff) | |
download | mariadb-git-e4f6fd5e1252f8c68f449fe820bae88c18bca8f5.tar.gz |
MDEV-10743 LDML: a new syntax to reuse sort order from another 8bit simple collation
Diffstat (limited to 'strings')
-rw-r--r-- | strings/conf_to_src.c | 128 | ||||
-rw-r--r-- | strings/ctype-simple.c | 4 | ||||
-rw-r--r-- | strings/ctype.c | 8 |
3 files changed, 118 insertions, 22 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index beb62d35693..1382ec69a0a 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -119,7 +119,10 @@ static void simple_cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from) if (from->name) to->name= strdup(from->name); - + + if (from->tailoring) + to->tailoring= strdup(from->tailoring); + if (from->ctype) to->ctype= (uchar*) mdup((char*) from->ctype, MY_CS_CTYPE_TABLE_SIZE); if (from->to_lower) @@ -144,30 +147,60 @@ static void simple_cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from) } -static void inherit_data(struct charset_info_st *cs, CHARSET_INFO *refcs) +/* + cs->xxx arrays can be NULL in case when a collation has an entry only + in Index.xml and has no entry in csname.xml (e.g. in case of a binary + collation or a collation using <import> command). + + refcs->xxx arrays can be NULL if <import> refers to a collation + which is not defined in csname.xml, e.g. an always compiled collation + such as latin1_swedish_ci. +*/ +static void inherit_charset_data(struct charset_info_st *cs, + CHARSET_INFO *refcs) { - if (refcs->ctype && + cs->state|= (refcs->state & (MY_CS_PUREASCII|MY_CS_NONASCII)); + if (refcs->ctype && cs->ctype && !memcmp(cs->ctype, refcs->ctype, MY_CS_CTYPE_TABLE_SIZE)) cs->ctype= NULL; - if (refcs->to_lower && + if (refcs->to_lower && cs->to_lower && !memcmp(cs->to_lower, refcs->to_lower, MY_CS_TO_LOWER_TABLE_SIZE)) cs->to_lower= NULL; - if (refcs->to_upper && + if (refcs->to_upper && cs->to_upper && !memcmp(cs->to_upper, refcs->to_upper, MY_CS_TO_LOWER_TABLE_SIZE)) cs->to_upper= NULL; - if (refcs->tab_to_uni && + if (refcs->tab_to_uni && cs->tab_to_uni && !memcmp(cs->tab_to_uni, refcs->tab_to_uni, MY_CS_TO_UNI_TABLE_SIZE * sizeof(uint16))) cs->tab_to_uni= NULL; } +static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs) +{ + CHARSET_INFO *refcs; + uint refid= get_charset_number_internal(cs->csname, MY_CS_PRIMARY); + return refid && refid != cs->number && + (refcs= &all_charsets[refid]) && + (refcs->state & MY_CS_LOADED) ? refcs : NULL; +} + + +/** + Detect if "cs" needs further loading from csname.xml + @param cs - the character set pointer + @retval FALSE - if the current data (e.g. loaded from from Index.xml) + is not enough to dump the character set and requires + further reading from the csname.xml file. + @retval TRUE - if the current data is enough to dump, + no reading of csname.xml is needed. +*/ static my_bool simple_cs_is_full(CHARSET_INFO *cs) { return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper && cs->to_lower) && (cs->number && cs->name && - (cs->sort_order || (cs->state & MY_CS_BINSORT)))); + (cs->sort_order || cs->tailoring || (cs->state & MY_CS_BINSORT)))); } static int add_collation(struct charset_info_st *cs) @@ -183,6 +216,7 @@ static int add_collation(struct charset_info_st *cs) cs->number= 0; cs->name= NULL; + cs->tailoring= NULL; cs->state= 0; cs->sort_order= NULL; cs->state= 0; @@ -255,6 +289,55 @@ void print_arrays(FILE *f, CHARSET_INFO *cs) } +/** + Print an array member of a CHARSET_INFO. + @param f - the file to print into + @param cs0 - reference to the CHARSET_INFO to print + @param array0 - pointer to the array data (can be NULL) + @param cs1 - reference to the CHARSET_INFO that the data + can be inherited from (e.g. primary collation) + @param array1 - pointer to the array data in cs1 (can be NULL) + @param name - name of the member + + If array0 is not null, then the CHARSET_INFO being dumped has its + own array (e.g. the default collation for the character set). + We print the name of this array using cs0->name and return. + + If array1 is not null, then the CHARSET_INFO being dumpled reuses + the array from another collation. We print the name of the array of + the referenced collation using cs1->name and return. + + Otherwise (if both array0 and array1 are NULL), we have a collation + of a character set whose primary collation is not available now, + and which does not have its own entry in csname.xml file. + + For example, Index.xml has this entry: + <collation name="latin1_swedish_ci_copy"> + <rules> + <import source="latin1_swedish_ci"/> + </rules> + </collation> + and latin1.xml does not have entries for latin1_swedish_ci_copy. + + In such cases we print NULL as a pointer to the array. + It will be set to a not-null data during the first initialization + by the inherit_charset_data() call (see mysys/charset.c for details). +*/ +static void +print_array_ref(FILE *f, + CHARSET_INFO *cs0, const void *array0, + CHARSET_INFO *cs1, const void *array1, + const char *name) +{ + CHARSET_INFO *cs= array0 ? cs0 : array1 ? cs1 : NULL; + if (cs) + fprintf(f," %s_%s, /* %s */\n", + name, cs->name, name); + else + fprintf(f," NULL, /* %s */\n", name); +} + + void dispcset(FILE *f,CHARSET_INFO *cs) { fprintf(f,"{\n"); @@ -272,21 +355,23 @@ void dispcset(FILE *f,CHARSET_INFO *cs) fprintf(f," \"%s\", /* cset name */\n",cs->csname); fprintf(f," \"%s\", /* coll name */\n",cs->name); fprintf(f," \"\", /* comment */\n"); - fprintf(f," NULL, /* tailoring */\n"); - - fprintf(f," ctype_%s, /* ctype */\n", - cs->ctype ? cs->name : srccs->name); - fprintf(f," to_lower_%s, /* lower */\n", - cs->to_lower ? cs->name : srccs->name); - fprintf(f," to_upper_%s, /* upper */\n", - cs->to_upper ? cs->name : srccs->name); + if (cs->tailoring) + fprintf(f, " \"%s\", /* tailoring */\n", cs->tailoring); + else + fprintf(f," NULL, /* tailoring */\n"); + + print_array_ref(f, cs, cs->ctype, srccs, srccs->ctype, "ctype"); + print_array_ref(f, cs, cs->to_lower, srccs, srccs->to_lower, "to_lower"); + print_array_ref(f, cs, cs->to_upper, srccs, srccs->to_upper, "to_upper"); + if (cs->sort_order) fprintf(f," sort_order_%s, /* sort_order */\n",cs->name); else fprintf(f," NULL, /* sort_order */\n"); + fprintf(f," NULL, /* uca */\n"); - fprintf(f," to_uni_%s, /* to_uni */\n", - cs->tab_to_uni ? cs->name : srccs->name); + + print_array_ref(f, cs, cs->tab_to_uni, srccs, srccs->tab_to_uni, "to_uni"); } else { @@ -403,14 +488,13 @@ main(int argc, char **argv __attribute__((unused))) { if (cs->state & MY_CS_LOADED) { - uint refid= get_charset_number_internal(cs->csname, MY_CS_PRIMARY); + CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs); cs->state|= my_8bit_charset_flags_from_data(cs) | my_8bit_collation_flags_from_data(cs); - if (refid && cs->number != refid) + if (refcs) { - CHARSET_INFO *refcs= &all_charsets[refid]; - refids[cs->number]= refid; - inherit_data(cs, refcs); + refids[cs->number]= refcs->number; + inherit_charset_data(cs, refcs); } fprintf(f,"#ifdef HAVE_CHARSET_%s\n",cs->csname); print_arrays(f, cs); diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index e6cea06b17b..f02d96aa798 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1417,6 +1417,8 @@ my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader) cs->caseup_multiply= 1; cs->casedn_multiply= 1; cs->pad_char= ' '; + if (!cs->to_lower || !cs->to_upper || !cs->ctype || !cs->tab_to_uni) + return TRUE; return create_fromuni(cs, loader); } @@ -1442,6 +1444,8 @@ static void set_max_sort_char(struct charset_info_st *cs) static my_bool my_coll_init_simple(struct charset_info_st *cs, MY_CHARSET_LOADER *loader __attribute__((unused))) { + if (!cs->sort_order) + return TRUE; cs->state|= my_8bit_collation_flags_from_data(cs); set_max_sort_char(cs); return FALSE; diff --git a/strings/ctype.c b/strings/ctype.c index be8a8cb506e..2764a327bb9 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -88,6 +88,8 @@ struct my_cs_file_section_st #define _CS_CL_SUPPRESS_CONTRACTIONS 101 #define _CS_CL_OPTIMIZE 102 #define _CS_CL_SHIFT_AFTER_METHOD 103 +#define _CS_CL_RULES_IMPORT 104 +#define _CS_CL_RULES_IMPORT_SOURCE 105 /* Collation Settings */ @@ -188,6 +190,8 @@ static const struct my_cs_file_section_st sec[] = {_CS_CL_SUPPRESS_CONTRACTIONS, "charsets/charset/collation/suppress_contractions"}, {_CS_CL_OPTIMIZE, "charsets/charset/collation/optimize"}, {_CS_CL_SHIFT_AFTER_METHOD, "charsets/charset/collation/shift-after-method"}, + {_CS_CL_RULES_IMPORT, "charsets/charset/collation/rules/import"}, + {_CS_CL_RULES_IMPORT_SOURCE, "charsets/charset/collation/rules/import/source"}, /* Collation Settings */ {_CS_ST_SETTINGS, "charsets/charset/collation/settings"}, @@ -641,6 +645,10 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len) rc= tailoring_append(st, "[version %.*s]", len, attr); break; + case _CS_CL_RULES_IMPORT_SOURCE: + rc= tailoring_append(st, "[import %.*s]", len, attr); + break; + case _CS_CL_SUPPRESS_CONTRACTIONS: rc= tailoring_append(st, "[suppress contractions %.*s]", len, attr); break; |