Allocate memory when a character set is requested:

- For simple character sets: from_uni convertion table. - For UCA: alternative weight arrays. Use mbminlen instead of MY_CS_NONTEXT
author: unknown <bar@mysql.com> 2004-06-11 16:29:16 +0500
committer: unknown <bar@mysql.com> 2004-06-11 16:29:16 +0500
commit: 5275eb21c25eef0c5b799315da10f26b779fbfe9 (patch)
tree: 72245220b637b8d5caf3b8267ea6801af59f0bc7 /strings
parent: 4047b5ade3b02d73c60eac7ddae7e5e928de7fd3 (diff)
download: mariadb-git-5275eb21c25eef0c5b799315da10f26b779fbfe9.tar.gz
16 files changed, 587 insertions, 5 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index fb72dec7385..3d9cb92bf0d 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6269,6 +6269,7 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_big5,
   my_strnncollsp_big5,
   my_strnxfrm_big5,
@@ -6281,6 +6282,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_big5_handler=
 {
+  NULL,			/* init */
   ismbchar_big5,
   mbcharlen_big5,
   my_numchars_mb,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 7b3164bf438..54fe4476ae6 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -333,6 +333,7 @@ skip:
 
 MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
 {
+    NULL,			/* init */
     my_strnncoll_binary,
     my_strnncoll_binary,
     my_strnxfrm_bin,
@@ -346,6 +347,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,			/* init */
     NULL,			/* ismbchar      */
     my_mbcharlen_8bit,		/* mbcharlen     */
     my_numchars_8bit,
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 3218fdee673..8fde9498ed9 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -572,6 +572,7 @@ static MY_UNI_IDX idx_uni_8859_2[]={
 
 static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_czech,
   my_strnncollsp_czech,
   my_strnxfrm_czech,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index c387246b4c6..e739339b9e4 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8637,6 +8637,7 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_simple,  /* strnncoll  */
   my_strnncollsp_simple,
   my_strnxfrm_simple,	/* strnxfrm   */
@@ -8649,6 +8650,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+  NULL,			/* init */
   ismbchar_euc_kr,
   mbcharlen_euc_kr,
   my_numchars_mb,
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index fe1f72e7eda..aee4ed55af6 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5688,6 +5688,7 @@ my_mb_wc_gb2312(CHARSET_INFO *cs  __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_simple,  /* strnncoll  */
   my_strnncollsp_simple,
   my_strnxfrm_simple,	/* strnxfrm   */
@@ -5700,6 +5701,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+  NULL,			/* init */
   ismbchar_gb2312,
   mbcharlen_gb2312,
   my_numchars_mb,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 8b659cb55f9..d6063e9f80f 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -9918,6 +9918,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_gbk,
   my_strnncollsp_gbk,
   my_strnxfrm_gbk,
@@ -9930,6 +9931,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+  NULL,			/* init */
   ismbchar_gbk,
   mbcharlen_gbk,
   my_numchars_mb,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 03d4e71377b..86c80ff5a66 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -380,6 +380,7 @@ int my_wc_mb_latin1(CHARSET_INFO *cs  __attribute__((unused)),
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,			/* init */
     NULL,
     my_mbcharlen_8bit,
     my_numchars_8bit,
@@ -674,6 +675,7 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
 {
+  NULL,			/* init */
   my_strnncoll_latin1_de,
   my_strnncollsp_latin1_de,
   my_strnxfrm_latin1_de,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 9b02cd3b3da..c143994dbc3 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -512,6 +512,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
 
 MY_COLLATION_HANDLER my_collation_mb_bin_handler =
 {
+    NULL,		/* init */
     my_strnncoll_mb_bin,
     my_strnncoll_mb_bin,
     my_strnxfrm_mb_bin,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 5f0a7426db3..c28df91ae86 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1142,8 +1142,107 @@ skip:
 }
 
 
+typedef struct
+{
+  int		nchars;
+  MY_UNI_IDX	uidx;
+} uni_idx;
+
+#define PLANE_SIZE	0x100
+#define PLANE_NUM	0x100
+#define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)
+
+static int pcmp(const void * f, const void * s)
+{
+  const uni_idx *F= (const uni_idx*) f;
+  const uni_idx *S= (const uni_idx*) s;
+  int res;
+
+  if (!(res=((S->nchars)-(F->nchars))))
+    res=((F->uidx.from)-(S->uidx.to));
+  return res;
+}
+
+static my_bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+  uni_idx	idx[PLANE_NUM];
+  int		i,n;
+  
+  /* Clear plane statistics */
+  bzero(idx,sizeof(idx));
+  
+  /* Count number of characters in each plane */
+  for (i=0; i< 0x100; i++)
+  {
+    uint16 wc=cs->tab_to_uni[i];
+    int pl= PLANE_NUMBER(wc);
+    
+    if (wc || !i)
+    {
+      if (!idx[pl].nchars)
+      {
+        idx[pl].uidx.from=wc;
+        idx[pl].uidx.to=wc;
+      }else
+      {
+        idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
+        idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
+      }
+      idx[pl].nchars++;
+    }
+  }
+  
+  /* Sort planes in descending order */
+  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
+  
+  for (i=0; i < PLANE_NUM; i++)
+  {
+    int ch,numchars;
+    
+    /* Skip empty plane */
+    if (!idx[i].nchars)
+      break;
+    
+    numchars=idx[i].uidx.to-idx[i].uidx.from+1;
+    if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
+      return TRUE;
+    
+    bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
+    
+    for (ch=1; ch < PLANE_SIZE; ch++)
+    {
+      uint16 wc=cs->tab_to_uni[ch];
+      if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
+      {
+        int ofs= wc - idx[i].uidx.from;
+        idx[i].uidx.tab[ofs]= ch;
+      }
+    }
+  }
+  
+  /* Allocate and fill reverse table for each plane */
+  n=i;
+  if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
+    return TRUE;
+
+  for (i=0; i< n; i++)
+    cs->tab_from_uni[i]= idx[i].uidx;
+  
+  /* Set end-of-list marker */
+  bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
+  return FALSE;
+}
+
+static my_bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+  return create_fromuni(cs, alloc);
+}
+
+
+
 MY_CHARSET_HANDLER my_charset_8bit_handler=
 {
+    my_cset_init_8bit,
     NULL,			/* ismbchar      */
     my_mbcharlen_8bit,		/* mbcharlen     */
     my_numchars_8bit,
@@ -1170,6 +1269,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
 
 MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_simple,
     my_strnncollsp_simple,
     my_strnxfrm_simple,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index b4a131d3410..3744711447a 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4534,6 +4534,7 @@ my_mb_wc_sjis(CHARSET_INFO *cs  __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_sjis,
   my_strnncollsp_sjis,
   my_strnxfrm_sjis,
@@ -4547,6 +4548,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+  NULL,			/* init */
   ismbchar_sjis,
   mbcharlen_sjis,
   my_numchars_mb,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 79ac2079720..a0ba1a266ea 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -906,6 +906,7 @@ int my_wc_mb_tis620(CHARSET_INFO *cs  __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_tis620,
     my_strnncollsp_tis620,
     my_strnxfrm_tis620,
@@ -918,6 +919,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,		/* init */
     NULL,		/* ismbchar  */
     my_mbcharlen_8bit,	/* mbcharlen */
     my_numchars_8bit,
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index e6b68b8c9b2..846f17982c3 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7036,8 +7036,464 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
 }
 
 
+/*
+  Collation language is implemented according to
+  subset of ICU Collation Customization (tailorings):
+  http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
+  
+  Collation language elements:
+  Delimiters:
+    space   - skipped
+  
+  <char> :=  A-Z | a-z | \uXXXX
+  
+  Shift command:
+    <shift>  := &       - reset at this letter. 
+  
+  Diff command:
+    <d1> :=  <     - Identifies a primary difference.
+    <d2> :=  <<    - Identifies a secondary difference.
+    <d3> := <<<    - Idenfifies a tertiary difference.
+  
+  
+  Collation rules:
+    <ruleset> :=  <rule>  { <ruleset> }
+    
+    <rule> :=   <d1>    <string>
+              | <d2>    <string>
+              | <d3>    <string>
+              | <shift> <char>
+    
+    <string> := <char> [ <string> ]
+
+  An example, Polish collation:
+  
+    &A < \u0105 <<< \u0104
+    &C < \u0107 <<< \u0106
+    &E < \u0119 <<< \u0118
+    &L < \u0142 <<< \u0141
+    &N < \u0144 <<< \u0143
+    &O < \u00F3 <<< \u00D3
+    &S < \u015B <<< \u015A
+    &Z < \u017A <<< \u017B    
+*/
+
+
+typedef enum my_coll_lexem_num_en
+{
+  MY_COLL_LEXEM_EOF	= 0,
+  MY_COLL_LEXEM_DIFF	= 1, 
+  MY_COLL_LEXEM_SHIFT	= 4,
+  MY_COLL_LEXEM_CHAR	= 5,
+  MY_COLL_LEXEM_ERROR	= 6
+} my_coll_lexem_num;
+
+
+typedef struct my_coll_lexem_st
+{
+  const char *beg;
+  const char *end;
+  const char *prev;
+  int   diff;
+  int   code;
+} MY_COLL_LEXEM;
+
+
+/*
+  Initialize collation rule lexical anilizer
+  
+  SYNOPSIS
+    my_coll_lexem_init
+    lexem                Lex analizer to init
+    str                  Const string to parse
+    strend               End of the string
+  USAGE
+  
+  RETURN VALUES
+    N/A
+*/
+
+static void my_coll_lexem_init(MY_COLL_LEXEM *lexem,
+                               const char *str, const char *strend)
+{
+  lexem->beg= str;
+  lexem->prev= str;
+  lexem->end= strend;
+  lexem->diff= 0;
+  lexem->code= 0;
+}
+
+
+/*
+  Print collation customization expression parse error, with context.
+  
+  SYNOPSIS
+    my_coll_lexem_print_error
+    lexem                Lex analizer to take context from
+    errstr               sting to write error to
+    errsize              errstr size
+    txt                  error message
+  USAGE
+  
+  RETURN VALUES
+    N/A
+*/
+
+static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
+                                      char *errstr, size_t errsize,
+                                      const char *txt)
+{
+  char tail[30];
+  size_t len= lexem->end - lexem->prev;
+  strmake (tail, lexem->prev, min(len, sizeof(tail)-1));
+  errstr[errsize-1]= '\0';
+  my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
+}
+
+
+/*
+  Convert a hex digit into its numeric value
+  
+  SYNOPSIS
+    ch2x
+    ch                   hex digit to convert
+  USAGE
+  
+  RETURN VALUES
+    an integer value in the range 0..15
+    -1 on error
+*/
+
+static int ch2x(int ch)
+{
+  if (ch >= '0' && ch <= '9')
+    return ch - '0';
+  
+  if (ch >= 'a' && ch <= 'f')
+    return 10 + ch - 'a';
+  
+  if (ch >= 'A' && ch <= 'F')
+    return 10 + ch - 'A';
+  
+  return -1;
+}
+
+
+/*
+  Collation language lexical parser:
+  Scans the next lexem.
+  
+  SYNOPSIS
+    my_coll_lexem_next
+    lexem                Lex analizer, previously initialized by 
+                         my_coll_lexem_init.
+  USAGE
+    Call this function in a loop
+    
+  RETURN VALUES
+    Lexem number: eof, diff, shift, char or error.
+*/
+
+static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
+{
+  for ( ;lexem->beg < lexem->end ; lexem->beg++)
+  {
+    lexem->prev= lexem->beg;
+    if (lexem->beg[0] == ' '  || lexem->beg[0] == '\t' || 
+        lexem->beg[0] == '\r' || lexem->beg[0] == '\n')
+      continue;
+    
+    if (lexem->beg[0] == '&')
+    {
+      lexem->beg++;
+      return MY_COLL_LEXEM_SHIFT;
+    }
+    
+    if (lexem->beg[0] == '<')
+    {
+      for (lexem->beg++, lexem->diff=1; 
+           (lexem->beg < lexem->end) && 
+           (lexem->beg[0] == '<') && (lexem->diff<3);
+           lexem->beg++, lexem->diff++);
+        return MY_COLL_LEXEM_DIFF;
+    }
+    
+    if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') ||
+        (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z'))
+    {
+      lexem->code= lexem->beg[0];
+      lexem->beg++;
+      return MY_COLL_LEXEM_CHAR;
+    }
+    
+    if ((lexem->beg[0] == '\\') && 
+        (lexem->beg+2 < lexem->end) && 
+        (lexem->beg[1] == 'u'))
+    {
+      int ch;
+      
+      lexem->code= 0;
+      for (lexem->beg+=2; 
+           (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ; 
+           lexem->beg++)
+      {
+        lexem->code= (lexem->code << 4) + ch;
+      }
+      return MY_COLL_LEXEM_CHAR;
+    }
+    
+    return MY_COLL_LEXEM_ERROR;
+  }
+  return MY_COLL_LEXEM_EOF;
+}
+
+
+/*
+  Collation rule item
+*/
+
+typedef struct my_coll_rule_item_st
+{
+  uint base;     /* Base character                             */
+  uint curr;     /* Current character                          */
+  int diff[3];   /* Primary, Secondary and Tertiary difference */
+} MY_COLL_RULE;
+
+
+/*
+  Collation language syntax parser.
+  Uses lexical parser.
+  
+  SYNOPSIS
+    my_coll_rule_parse
+    rule                 Collation rule list to load to.
+    str                  A string containin collation language expression.
+    strend               End of the string.
+  USAGE
+    
+  RETURN VALUES
+    0 - OK
+    1 - ERROR, e.g. too many items.
+*/
+
+static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
+                              const char *str, const char *strend,
+                              char *errstr, size_t errsize)
+{
+  MY_COLL_LEXEM lexem;
+  my_coll_lexem_num lexnum;
+  my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR;
+  MY_COLL_RULE item; 
+  int state= 0;
+  size_t nitems= 0;
+  
+  /* Init all variables */
+  errstr[0]= '\0';
+  bzero(&item, sizeof(item));
+  my_coll_lexem_init(&lexem, str, strend);
+  
+  while ((lexnum= my_coll_lexem_next(&lexem)))
+  {
+    if (lexnum == MY_COLL_LEXEM_ERROR)
+    {
+      my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character");
+      return -1;
+    }
+    
+    switch (state) {
+    case 0:
+      if (lexnum != MY_COLL_LEXEM_SHIFT)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected");
+        return -1;
+      }
+      prevlexnum= lexnum;
+      state= 2;
+      continue;
+      
+    case 1:
+      if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected");
+        return -1;
+      }
+      prevlexnum= lexnum;
+      state= 2;
+      continue;
+      
+    case 2:
+      if (lexnum != MY_COLL_LEXEM_CHAR)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected");
+        return -1;
+      }
+      
+      if (prevlexnum == MY_COLL_LEXEM_SHIFT)
+      {
+        item.base= lexem.code;
+        item.diff[0]= 0;
+        item.diff[1]= 0;
+        item.diff[2]= 0;
+      }
+      else if (prevlexnum == MY_COLL_LEXEM_DIFF)
+      {
+        item.curr= lexem.code;
+        if (lexem.diff == 3)
+        {
+          item.diff[2]++;
+        }
+        else if (lexem.diff == 2)
+        {
+          item.diff[1]++;
+          item.diff[2]= 0;
+        }
+        else if (lexem.diff == 1)
+        {
+          item.diff[0]++;
+          item.diff[1]= 0;
+          item.diff[2]= 0;
+        }
+        if (nitems >= mitems)
+        {
+          my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");
+          return -1;
+        }
+        rule[nitems++]= item;
+      }
+      else
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen");
+        return -1;
+      }
+      state= 1;
+      continue;
+    }
+  }
+  return (size_t) nitems;
+}
+
+#define MY_MAX_COLL_RULE 64
+
+/*
+  This function copies an UCS2 collation from
+  the default Unicode Collation Algorithm (UCA)
+  weights applying tailorings, i.e. a set of
+  alternative weights for some characters. 
+  
+  The default UCA weights are stored in my_charset_ucs2_general_uca.
+  They consist of 256 pages, 256 character each.
+  
+  If a page is not overwritten by tailoring rules,
+  it is copies as is from UCA as is.
+  
+  If a page contains some overwritten characters, it is
+  allocated. Untouched characters are copied from the
+  default weights.
+*/
+
+static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+  MY_COLL_RULE rule[MY_MAX_COLL_RULE];
+  char errstr[128];
+  uchar   *newlengths;
+  uint16 **newweights;
+  const uchar *deflengths= my_charset_ucs2_general_uca.sort_order;
+  uint16     **defweights= my_charset_ucs2_general_uca.sort_order_big;
+  int rc, i;
+
+  if (!cs->tailoring)
+    return 1;
+  
+  /* Parse ICU Collation Customization expression */
+  if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
+                              cs->tailoring,
+                              cs->tailoring + strlen(cs->tailoring),
+                              errstr, sizeof(errstr))) <= 0)
+  {
+    /* 
+      TODO: add error message reporting.
+      printf("Error: %d '%s'\n", rc, errstr);
+    */
+    return 1;
+  }
+  
+  if (!(newweights= (uint16**) alloc(256*sizeof(uint16*))))
+    return 1;
+  bzero(newweights, 256*sizeof(uint16*));
+  
+  if (!(newlengths= (uchar*) alloc(256)))
+    return 1;
+  
+  memcpy(newlengths, deflengths, 256);
+  
+  /*
+    Calculate maximum lenghts for the pages
+    which will be overwritten.
+  */
+  for (i=0; i < rc; i++)
+  {
+    uint pageb= (rule[i].base >> 8) & 0xFF;
+    uint pagec= (rule[i].curr >> 8) & 0xFF;
+    
+    if (newlengths[pagec] < deflengths[pageb])
+      newlengths[pagec]= deflengths[pageb];
+  }
+  
+  for (i=0; i < rc;  i++)
+  {
+    uint pageb= (rule[i].base >> 8) & 0xFF;
+    uint pagec= (rule[i].curr >> 8) & 0xFF;
+    uint chb, chc;
+    
+    if (!newweights[pagec])
+    {
+      /* Alloc new page and copy the default UCA weights */
+      uint size= 256*newlengths[pagec]*sizeof(uint16);
+      
+      if (!(newweights[pagec]= (uint16*) alloc(size)))
+        return 1;
+      bzero((void*) newweights[pagec], size);
+      
+      for (chc=0 ; chc < 256; chc++)
+      {
+        memcpy(newweights[pagec] + chc*newlengths[pagec],
+               defweights[pagec] + chc*deflengths[pagec],
+               deflengths[pagec]*sizeof(uint16));
+      }
+    }
+    
+    /* 
+      Aply the alternative rule:
+      shift to the base character and primary difference.
+    */
+    chc= rule[i].curr & 0xFF;
+    chb= rule[i].base & 0xFF;
+    memcpy(newweights[pagec] + chc*newlengths[pagec],
+           defweights[pageb] + chb*deflengths[pageb],
+           deflengths[pageb]*sizeof(uint16));
+    /* Apply primary difference */
+    newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0];
+  }
+  
+  /* Copy non-overwritten pages from the default UCA weights */
+  for (i= 0; i < 256 ; i++)
+    if (!newweights[i])
+      newweights[i]= defweights[i];
+  
+  cs->sort_order= newlengths;
+  cs->sort_order_big= newweights;
+  
+  return 0;
+}
+
+static my_bool my_coll_init_uca(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+  return create_tailoring(cs, alloc);
+}
+
 MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
 {
+    my_coll_init_uca,	/* init */
     my_strnncoll_uca,
     my_strnncollsp_uca,
     my_strnxfrm_uca,
@@ -7051,7 +7507,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
 CHARSET_INFO my_charset_ucs2_general_uca=
 {
     45,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
     "ucs2",		/* cs name    */
     "ucs2_general_uca",	/* name         */
     "",			/* comment      */
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 9f1accf841f..f05e85a9d88 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1372,6 +1372,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
 
 static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_ucs2,
     my_strnncoll_ucs2,
     my_strnxfrm_ucs2,
@@ -1385,6 +1386,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
 
 static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
 {
+    NULL,		/* init */
     my_strnncoll_ucs2_bin,
     my_strnncoll_ucs2_bin,
     my_strnxfrm_ucs2_bin,
@@ -1398,6 +1400,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
 
 MY_CHARSET_HANDLER my_charset_ucs2_handler=
 {
+    NULL,		/* init */
     my_ismbchar_ucs2,	/* ismbchar     */
     my_mbcharlen_ucs2,	/* mbcharlen    */
     my_numchars_ucs2,
@@ -1426,7 +1429,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
 CHARSET_INFO my_charset_ucs2_general_ci=
 {
     35,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
     "ucs2",		/* cs name    */
     "ucs2_general_ci",	/* name         */
     "",			/* comment      */
@@ -1452,7 +1455,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
 CHARSET_INFO my_charset_ucs2_bin=
 {
     90,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONTEXT,
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
     "ucs2",		/* cs name    */
     "ucs2_bin",		/* name         */
     "",			/* comment      */
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index f28ea165f80..fb7946a6b98 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -8423,6 +8423,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_simple,/* strnncoll    */
     my_strnncollsp_simple,
     my_strnxfrm_simple,	/* strnxfrm     */
@@ -8435,14 +8436,15 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,		/* init */
     ismbchar_ujis,
     mbcharlen_ujis,
     my_numchars_mb,
     my_charpos_mb,
     my_well_formed_len_mb,
     my_lengthsp_8bit,
-    my_mb_wc_euc_jp,	 /* mb_wc       */
-    my_wc_mb_euc_jp,	 /* wc_mb       */
+    my_mb_wc_euc_jp,	/* mb_wc       */
+    my_wc_mb_euc_jp,	/* wc_mb       */
     my_caseup_str_mb,
     my_casedn_str_mb,
     my_caseup_mb,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 39e9260ffed..99ac114de9c 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2045,6 +2045,7 @@ static int my_mbcharlen_utf8(CHARSET_INFO *cs  __attribute__((unused)) , uint c)
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_utf8,
     my_strnncollsp_utf8,
     my_strnxfrm_utf8,
@@ -2057,6 +2058,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,		/* init */
     my_ismbchar_utf8,
     my_mbcharlen_utf8,
     my_numchars_mb,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index 670318a082e..a2c5768b16c 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -605,6 +605,7 @@ my_like_range_win1250ch(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
 {
+  NULL,				/* init */
   my_strnncoll_win1250ch,
   my_strnncollsp_win1250ch,
   my_strnxfrm_win1250ch,
author	unknown <bar@mysql.com>	2004-06-11 16:29:16 +0500
committer	unknown <bar@mysql.com>	2004-06-11 16:29:16 +0500
commit	5275eb21c25eef0c5b799315da10f26b779fbfe9 (patch)
tree	72245220b637b8d5caf3b8267ea6801af59f0bc7 /strings
parent	4047b5ade3b02d73c60eac7ddae7e5e928de7fd3 (diff)
download	mariadb-git-5275eb21c25eef0c5b799315da10f26b779fbfe9.tar.gz