summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2004-06-11 16:29:16 +0500
committerunknown <bar@mysql.com>2004-06-11 16:29:16 +0500
commit5275eb21c25eef0c5b799315da10f26b779fbfe9 (patch)
tree72245220b637b8d5caf3b8267ea6801af59f0bc7 /strings
parent4047b5ade3b02d73c60eac7ddae7e5e928de7fd3 (diff)
downloadmariadb-git-5275eb21c25eef0c5b799315da10f26b779fbfe9.tar.gz
Allocate memory when a character set is requested:
- For simple character sets: from_uni convertion table. - For UCA: alternative weight arrays. Use mbminlen instead of MY_CS_NONTEXT
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-big5.c2
-rw-r--r--strings/ctype-bin.c2
-rw-r--r--strings/ctype-czech.c1
-rw-r--r--strings/ctype-euc_kr.c2
-rw-r--r--strings/ctype-gb2312.c2
-rw-r--r--strings/ctype-gbk.c2
-rw-r--r--strings/ctype-latin1.c2
-rw-r--r--strings/ctype-mb.c1
-rw-r--r--strings/ctype-simple.c100
-rw-r--r--strings/ctype-sjis.c2
-rw-r--r--strings/ctype-tis620.c2
-rw-r--r--strings/ctype-uca.c458
-rw-r--r--strings/ctype-ucs2.c7
-rw-r--r--strings/ctype-ujis.c6
-rw-r--r--strings/ctype-utf8.c2
-rw-r--r--strings/ctype-win1250ch.c1
16 files changed, 587 insertions, 5 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index fb72dec7385..3d9cb92bf0d 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6269,6 +6269,7 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
{
+ NULL, /* init */
my_strnncoll_big5,
my_strnncollsp_big5,
my_strnxfrm_big5,
@@ -6281,6 +6282,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
static MY_CHARSET_HANDLER my_charset_big5_handler=
{
+ NULL, /* init */
ismbchar_big5,
mbcharlen_big5,
my_numchars_mb,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 7b3164bf438..54fe4476ae6 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -333,6 +333,7 @@ skip:
MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
{
+ NULL, /* init */
my_strnncoll_binary,
my_strnncoll_binary,
my_strnxfrm_bin,
@@ -346,6 +347,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
NULL, /* ismbchar */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 3218fdee673..8fde9498ed9 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -572,6 +572,7 @@ static MY_UNI_IDX idx_uni_8859_2[]={
static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
{
+ NULL, /* init */
my_strnncoll_czech,
my_strnncollsp_czech,
my_strnxfrm_czech,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index c387246b4c6..e739339b9e4 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8637,6 +8637,7 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
+ NULL, /* init */
my_strnncoll_simple, /* strnncoll */
my_strnncollsp_simple,
my_strnxfrm_simple, /* strnxfrm */
@@ -8649,6 +8650,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
ismbchar_euc_kr,
mbcharlen_euc_kr,
my_numchars_mb,
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index fe1f72e7eda..aee4ed55af6 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5688,6 +5688,7 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
+ NULL, /* init */
my_strnncoll_simple, /* strnncoll */
my_strnncollsp_simple,
my_strnxfrm_simple, /* strnxfrm */
@@ -5700,6 +5701,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
ismbchar_gb2312,
mbcharlen_gb2312,
my_numchars_mb,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 8b659cb55f9..d6063e9f80f 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -9918,6 +9918,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
+ NULL, /* init */
my_strnncoll_gbk,
my_strnncollsp_gbk,
my_strnxfrm_gbk,
@@ -9930,6 +9931,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
ismbchar_gbk,
mbcharlen_gbk,
my_numchars_mb,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 03d4e71377b..86c80ff5a66 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -380,6 +380,7 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
NULL,
my_mbcharlen_8bit,
my_numchars_8bit,
@@ -674,6 +675,7 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
{
+ NULL, /* init */
my_strnncoll_latin1_de,
my_strnncollsp_latin1_de,
my_strnxfrm_latin1_de,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 9b02cd3b3da..c143994dbc3 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -512,6 +512,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
{
+ NULL, /* init */
my_strnncoll_mb_bin,
my_strnncoll_mb_bin,
my_strnxfrm_mb_bin,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 5f0a7426db3..c28df91ae86 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1142,8 +1142,107 @@ skip:
}
+typedef struct
+{
+ int nchars;
+ MY_UNI_IDX uidx;
+} uni_idx;
+
+#define PLANE_SIZE 0x100
+#define PLANE_NUM 0x100
+#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
+
+static int pcmp(const void * f, const void * s)
+{
+ const uni_idx *F= (const uni_idx*) f;
+ const uni_idx *S= (const uni_idx*) s;
+ int res;
+
+ if (!(res=((S->nchars)-(F->nchars))))
+ res=((F->uidx.from)-(S->uidx.to));
+ return res;
+}
+
+static my_bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+ uni_idx idx[PLANE_NUM];
+ int i,n;
+
+ /* Clear plane statistics */
+ bzero(idx,sizeof(idx));
+
+ /* Count number of characters in each plane */
+ for (i=0; i< 0x100; i++)
+ {
+ uint16 wc=cs->tab_to_uni[i];
+ int pl= PLANE_NUMBER(wc);
+
+ if (wc || !i)
+ {
+ if (!idx[pl].nchars)
+ {
+ idx[pl].uidx.from=wc;
+ idx[pl].uidx.to=wc;
+ }else
+ {
+ idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
+ idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
+ }
+ idx[pl].nchars++;
+ }
+ }
+
+ /* Sort planes in descending order */
+ qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
+
+ for (i=0; i < PLANE_NUM; i++)
+ {
+ int ch,numchars;
+
+ /* Skip empty plane */
+ if (!idx[i].nchars)
+ break;
+
+ numchars=idx[i].uidx.to-idx[i].uidx.from+1;
+ if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
+ return TRUE;
+
+ bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
+
+ for (ch=1; ch < PLANE_SIZE; ch++)
+ {
+ uint16 wc=cs->tab_to_uni[ch];
+ if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
+ {
+ int ofs= wc - idx[i].uidx.from;
+ idx[i].uidx.tab[ofs]= ch;
+ }
+ }
+ }
+
+ /* Allocate and fill reverse table for each plane */
+ n=i;
+ if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
+ return TRUE;
+
+ for (i=0; i< n; i++)
+ cs->tab_from_uni[i]= idx[i].uidx;
+
+ /* Set end-of-list marker */
+ bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
+ return FALSE;
+}
+
+static my_bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+ return create_fromuni(cs, alloc);
+}
+
+
+
MY_CHARSET_HANDLER my_charset_8bit_handler=
{
+ my_cset_init_8bit,
NULL, /* ismbchar */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
@@ -1170,6 +1269,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
{
+ NULL, /* init */
my_strnncoll_simple,
my_strnncollsp_simple,
my_strnxfrm_simple,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index b4a131d3410..3744711447a 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4534,6 +4534,7 @@ my_mb_wc_sjis(CHARSET_INFO *cs __attribute__((unused)),
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
+ NULL, /* init */
my_strnncoll_sjis,
my_strnncollsp_sjis,
my_strnxfrm_sjis,
@@ -4547,6 +4548,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
ismbchar_sjis,
mbcharlen_sjis,
my_numchars_mb,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 79ac2079720..a0ba1a266ea 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -906,6 +906,7 @@ int my_wc_mb_tis620(CHARSET_INFO *cs __attribute__((unused)),
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
+ NULL, /* init */
my_strnncoll_tis620,
my_strnncollsp_tis620,
my_strnxfrm_tis620,
@@ -918,6 +919,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
NULL, /* ismbchar */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index e6b68b8c9b2..846f17982c3 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7036,8 +7036,464 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
}
+/*
+ Collation language is implemented according to
+ subset of ICU Collation Customization (tailorings):
+ http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
+
+ Collation language elements:
+ Delimiters:
+ space - skipped
+
+ <char> := A-Z | a-z | \uXXXX
+
+ Shift command:
+ <shift> := & - reset at this letter.
+
+ Diff command:
+ <d1> := < - Identifies a primary difference.
+ <d2> := << - Identifies a secondary difference.
+ <d3> := <<< - Idenfifies a tertiary difference.
+
+
+ Collation rules:
+ <ruleset> := <rule> { <ruleset> }
+
+ <rule> := <d1> <string>
+ | <d2> <string>
+ | <d3> <string>
+ | <shift> <char>
+
+ <string> := <char> [ <string> ]
+
+ An example, Polish collation:
+
+ &A < \u0105 <<< \u0104
+ &C < \u0107 <<< \u0106
+ &E < \u0119 <<< \u0118
+ &L < \u0142 <<< \u0141
+ &N < \u0144 <<< \u0143
+ &O < \u00F3 <<< \u00D3
+ &S < \u015B <<< \u015A
+ &Z < \u017A <<< \u017B
+*/
+
+
+typedef enum my_coll_lexem_num_en
+{
+ MY_COLL_LEXEM_EOF = 0,
+ MY_COLL_LEXEM_DIFF = 1,
+ MY_COLL_LEXEM_SHIFT = 4,
+ MY_COLL_LEXEM_CHAR = 5,
+ MY_COLL_LEXEM_ERROR = 6
+} my_coll_lexem_num;
+
+
+typedef struct my_coll_lexem_st
+{
+ const char *beg;
+ const char *end;
+ const char *prev;
+ int diff;
+ int code;
+} MY_COLL_LEXEM;
+
+
+/*
+ Initialize collation rule lexical anilizer
+
+ SYNOPSIS
+ my_coll_lexem_init
+ lexem Lex analizer to init
+ str Const string to parse
+ strend End of the string
+ USAGE
+
+ RETURN VALUES
+ N/A
+*/
+
+static void my_coll_lexem_init(MY_COLL_LEXEM *lexem,
+ const char *str, const char *strend)
+{
+ lexem->beg= str;
+ lexem->prev= str;
+ lexem->end= strend;
+ lexem->diff= 0;
+ lexem->code= 0;
+}
+
+
+/*
+ Print collation customization expression parse error, with context.
+
+ SYNOPSIS
+ my_coll_lexem_print_error
+ lexem Lex analizer to take context from
+ errstr sting to write error to
+ errsize errstr size
+ txt error message
+ USAGE
+
+ RETURN VALUES
+ N/A
+*/
+
+static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
+ char *errstr, size_t errsize,
+ const char *txt)
+{
+ char tail[30];
+ size_t len= lexem->end - lexem->prev;
+ strmake (tail, lexem->prev, min(len, sizeof(tail)-1));
+ errstr[errsize-1]= '\0';
+ my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
+}
+
+
+/*
+ Convert a hex digit into its numeric value
+
+ SYNOPSIS
+ ch2x
+ ch hex digit to convert
+ USAGE
+
+ RETURN VALUES
+ an integer value in the range 0..15
+ -1 on error
+*/
+
+static int ch2x(int ch)
+{
+ if (ch >= '0' && ch <= '9')
+ return ch - '0';
+
+ if (ch >= 'a' && ch <= 'f')
+ return 10 + ch - 'a';
+
+ if (ch >= 'A' && ch <= 'F')
+ return 10 + ch - 'A';
+
+ return -1;
+}
+
+
+/*
+ Collation language lexical parser:
+ Scans the next lexem.
+
+ SYNOPSIS
+ my_coll_lexem_next
+ lexem Lex analizer, previously initialized by
+ my_coll_lexem_init.
+ USAGE
+ Call this function in a loop
+
+ RETURN VALUES
+ Lexem number: eof, diff, shift, char or error.
+*/
+
+static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
+{
+ for ( ;lexem->beg < lexem->end ; lexem->beg++)
+ {
+ lexem->prev= lexem->beg;
+ if (lexem->beg[0] == ' ' || lexem->beg[0] == '\t' ||
+ lexem->beg[0] == '\r' || lexem->beg[0] == '\n')
+ continue;
+
+ if (lexem->beg[0] == '&')
+ {
+ lexem->beg++;
+ return MY_COLL_LEXEM_SHIFT;
+ }
+
+ if (lexem->beg[0] == '<')
+ {
+ for (lexem->beg++, lexem->diff=1;
+ (lexem->beg < lexem->end) &&
+ (lexem->beg[0] == '<') && (lexem->diff<3);
+ lexem->beg++, lexem->diff++);
+ return MY_COLL_LEXEM_DIFF;
+ }
+
+ if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') ||
+ (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z'))
+ {
+ lexem->code= lexem->beg[0];
+ lexem->beg++;
+ return MY_COLL_LEXEM_CHAR;
+ }
+
+ if ((lexem->beg[0] == '\\') &&
+ (lexem->beg+2 < lexem->end) &&
+ (lexem->beg[1] == 'u'))
+ {
+ int ch;
+
+ lexem->code= 0;
+ for (lexem->beg+=2;
+ (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ;
+ lexem->beg++)
+ {
+ lexem->code= (lexem->code << 4) + ch;
+ }
+ return MY_COLL_LEXEM_CHAR;
+ }
+
+ return MY_COLL_LEXEM_ERROR;
+ }
+ return MY_COLL_LEXEM_EOF;
+}
+
+
+/*
+ Collation rule item
+*/
+
+typedef struct my_coll_rule_item_st
+{
+ uint base; /* Base character */
+ uint curr; /* Current character */
+ int diff[3]; /* Primary, Secondary and Tertiary difference */
+} MY_COLL_RULE;
+
+
+/*
+ Collation language syntax parser.
+ Uses lexical parser.
+
+ SYNOPSIS
+ my_coll_rule_parse
+ rule Collation rule list to load to.
+ str A string containin collation language expression.
+ strend End of the string.
+ USAGE
+
+ RETURN VALUES
+ 0 - OK
+ 1 - ERROR, e.g. too many items.
+*/
+
+static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
+ const char *str, const char *strend,
+ char *errstr, size_t errsize)
+{
+ MY_COLL_LEXEM lexem;
+ my_coll_lexem_num lexnum;
+ my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR;
+ MY_COLL_RULE item;
+ int state= 0;
+ size_t nitems= 0;
+
+ /* Init all variables */
+ errstr[0]= '\0';
+ bzero(&item, sizeof(item));
+ my_coll_lexem_init(&lexem, str, strend);
+
+ while ((lexnum= my_coll_lexem_next(&lexem)))
+ {
+ if (lexnum == MY_COLL_LEXEM_ERROR)
+ {
+ my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character");
+ return -1;
+ }
+
+ switch (state) {
+ case 0:
+ if (lexnum != MY_COLL_LEXEM_SHIFT)
+ {
+ my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected");
+ return -1;
+ }
+ prevlexnum= lexnum;
+ state= 2;
+ continue;
+
+ case 1:
+ if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF)
+ {
+ my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected");
+ return -1;
+ }
+ prevlexnum= lexnum;
+ state= 2;
+ continue;
+
+ case 2:
+ if (lexnum != MY_COLL_LEXEM_CHAR)
+ {
+ my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected");
+ return -1;
+ }
+
+ if (prevlexnum == MY_COLL_LEXEM_SHIFT)
+ {
+ item.base= lexem.code;
+ item.diff[0]= 0;
+ item.diff[1]= 0;
+ item.diff[2]= 0;
+ }
+ else if (prevlexnum == MY_COLL_LEXEM_DIFF)
+ {
+ item.curr= lexem.code;
+ if (lexem.diff == 3)
+ {
+ item.diff[2]++;
+ }
+ else if (lexem.diff == 2)
+ {
+ item.diff[1]++;
+ item.diff[2]= 0;
+ }
+ else if (lexem.diff == 1)
+ {
+ item.diff[0]++;
+ item.diff[1]= 0;
+ item.diff[2]= 0;
+ }
+ if (nitems >= mitems)
+ {
+ my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");
+ return -1;
+ }
+ rule[nitems++]= item;
+ }
+ else
+ {
+ my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen");
+ return -1;
+ }
+ state= 1;
+ continue;
+ }
+ }
+ return (size_t) nitems;
+}
+
+#define MY_MAX_COLL_RULE 64
+
+/*
+ This function copies an UCS2 collation from
+ the default Unicode Collation Algorithm (UCA)
+ weights applying tailorings, i.e. a set of
+ alternative weights for some characters.
+
+ The default UCA weights are stored in my_charset_ucs2_general_uca.
+ They consist of 256 pages, 256 character each.
+
+ If a page is not overwritten by tailoring rules,
+ it is copies as is from UCA as is.
+
+ If a page contains some overwritten characters, it is
+ allocated. Untouched characters are copied from the
+ default weights.
+*/
+
+static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+ MY_COLL_RULE rule[MY_MAX_COLL_RULE];
+ char errstr[128];
+ uchar *newlengths;
+ uint16 **newweights;
+ const uchar *deflengths= my_charset_ucs2_general_uca.sort_order;
+ uint16 **defweights= my_charset_ucs2_general_uca.sort_order_big;
+ int rc, i;
+
+ if (!cs->tailoring)
+ return 1;
+
+ /* Parse ICU Collation Customization expression */
+ if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
+ cs->tailoring,
+ cs->tailoring + strlen(cs->tailoring),
+ errstr, sizeof(errstr))) <= 0)
+ {
+ /*
+ TODO: add error message reporting.
+ printf("Error: %d '%s'\n", rc, errstr);
+ */
+ return 1;
+ }
+
+ if (!(newweights= (uint16**) alloc(256*sizeof(uint16*))))
+ return 1;
+ bzero(newweights, 256*sizeof(uint16*));
+
+ if (!(newlengths= (uchar*) alloc(256)))
+ return 1;
+
+ memcpy(newlengths, deflengths, 256);
+
+ /*
+ Calculate maximum lenghts for the pages
+ which will be overwritten.
+ */
+ for (i=0; i < rc; i++)
+ {
+ uint pageb= (rule[i].base >> 8) & 0xFF;
+ uint pagec= (rule[i].curr >> 8) & 0xFF;
+
+ if (newlengths[pagec] < deflengths[pageb])
+ newlengths[pagec]= deflengths[pageb];
+ }
+
+ for (i=0; i < rc; i++)
+ {
+ uint pageb= (rule[i].base >> 8) & 0xFF;
+ uint pagec= (rule[i].curr >> 8) & 0xFF;
+ uint chb, chc;
+
+ if (!newweights[pagec])
+ {
+ /* Alloc new page and copy the default UCA weights */
+ uint size= 256*newlengths[pagec]*sizeof(uint16);
+
+ if (!(newweights[pagec]= (uint16*) alloc(size)))
+ return 1;
+ bzero((void*) newweights[pagec], size);
+
+ for (chc=0 ; chc < 256; chc++)
+ {
+ memcpy(newweights[pagec] + chc*newlengths[pagec],
+ defweights[pagec] + chc*deflengths[pagec],
+ deflengths[pagec]*sizeof(uint16));
+ }
+ }
+
+ /*
+ Aply the alternative rule:
+ shift to the base character and primary difference.
+ */
+ chc= rule[i].curr & 0xFF;
+ chb= rule[i].base & 0xFF;
+ memcpy(newweights[pagec] + chc*newlengths[pagec],
+ defweights[pageb] + chb*deflengths[pageb],
+ deflengths[pageb]*sizeof(uint16));
+ /* Apply primary difference */
+ newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0];
+ }
+
+ /* Copy non-overwritten pages from the default UCA weights */
+ for (i= 0; i < 256 ; i++)
+ if (!newweights[i])
+ newweights[i]= defweights[i];
+
+ cs->sort_order= newlengths;
+ cs->sort_order_big= newweights;
+
+ return 0;
+}
+
+static my_bool my_coll_init_uca(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+ return create_tailoring(cs, alloc);
+}
+
MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
{
+ my_coll_init_uca, /* init */
my_strnncoll_uca,
my_strnncollsp_uca,
my_strnxfrm_uca,
@@ -7051,7 +7507,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
CHARSET_INFO my_charset_ucs2_general_uca=
{
45,0,0, /* number */
- MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT,
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */
"ucs2_general_uca", /* name */
"", /* comment */
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 9f1accf841f..f05e85a9d88 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1372,6 +1372,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
{
+ NULL, /* init */
my_strnncoll_ucs2,
my_strnncoll_ucs2,
my_strnxfrm_ucs2,
@@ -1385,6 +1386,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
{
+ NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncoll_ucs2_bin,
my_strnxfrm_ucs2_bin,
@@ -1398,6 +1400,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
MY_CHARSET_HANDLER my_charset_ucs2_handler=
{
+ NULL, /* init */
my_ismbchar_ucs2, /* ismbchar */
my_mbcharlen_ucs2, /* mbcharlen */
my_numchars_ucs2,
@@ -1426,7 +1429,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
CHARSET_INFO my_charset_ucs2_general_ci=
{
35,0,0, /* number */
- MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT,
+ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */
"ucs2_general_ci", /* name */
"", /* comment */
@@ -1452,7 +1455,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
CHARSET_INFO my_charset_ucs2_bin=
{
90,0,0, /* number */
- MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONTEXT,
+ MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
"ucs2", /* cs name */
"ucs2_bin", /* name */
"", /* comment */
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index f28ea165f80..fb7946a6b98 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -8423,6 +8423,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
+ NULL, /* init */
my_strnncoll_simple,/* strnncoll */
my_strnncollsp_simple,
my_strnxfrm_simple, /* strnxfrm */
@@ -8435,14 +8436,15 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
ismbchar_ujis,
mbcharlen_ujis,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_mb,
my_lengthsp_8bit,
- my_mb_wc_euc_jp, /* mb_wc */
- my_wc_mb_euc_jp, /* wc_mb */
+ my_mb_wc_euc_jp, /* mb_wc */
+ my_wc_mb_euc_jp, /* wc_mb */
my_caseup_str_mb,
my_casedn_str_mb,
my_caseup_mb,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 39e9260ffed..99ac114de9c 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2045,6 +2045,7 @@ static int my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)) , uint c)
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
+ NULL, /* init */
my_strnncoll_utf8,
my_strnncollsp_utf8,
my_strnxfrm_utf8,
@@ -2057,6 +2058,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
+ NULL, /* init */
my_ismbchar_utf8,
my_mbcharlen_utf8,
my_numchars_mb,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index 670318a082e..a2c5768b16c 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -605,6 +605,7 @@ my_like_range_win1250ch(CHARSET_INFO *cs __attribute__((unused)),
static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
{
+ NULL, /* init */
my_strnncoll_win1250ch,
my_strnncollsp_win1250ch,
my_strnxfrm_win1250ch,