Allocate memory when a character set is requested:

- For simple character sets: from_uni convertion table. - For UCA: alternative weight arrays. Use mbminlen instead of MY_CS_NONTEXT
author: bar@mysql.com <> 2004-06-11 16:29:16 +0500
committer: bar@mysql.com <> 2004-06-11 16:29:16 +0500
commit: c64d93b27403dc9d154eb601b88d95964f9fc05b (patch)
tree: 72245220b637b8d5caf3b8267ea6801af59f0bc7
parent: 21c524e712875f8b66ad95c76da1d4305d0538d8 (diff)
download: mariadb-git-c64d93b27403dc9d154eb601b88d95964f9fc05b.tar.gz
22 files changed, 608 insertions, 560 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 002b77b5310..9be5538b48a 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -53,7 +53,6 @@ typedef struct unicase_info_st
 #define MY_SEQ_SPACES	2
 
         /* My charsets_list flags */
-#define MY_NO_SETS       0
 #define MY_CS_COMPILED  1      /* compiled-in sets               */
 #define MY_CS_CONFIG    2      /* sets that have a *.conf file   */
 #define MY_CS_INDEX     4      /* sets listed in the Index file  */
@@ -62,7 +61,7 @@ typedef struct unicase_info_st
 #define MY_CS_PRIMARY	32     /* if primary collation           */
 #define MY_CS_STRNXFRM	64     /* if strnxfrm is used for sort   */
 #define MY_CS_UNICODE	128    /* is a charset is full unicode   */
-#define MY_CS_NONTEXT	256    /* if a charset is not sprintf() compatible */
+#define MY_CS_READY	256    /* if a charset is initialized    */
 #define MY_CS_AVAILABLE	512    /* If either compiled-in or loaded*/
 
 #define MY_CHARSET_UNDEFINED 0
@@ -102,6 +101,7 @@ struct charset_info_st;
 
 typedef struct my_collation_handler_st
 {
+  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
   /* Collation routines */
   int     (*strnncoll)(struct charset_info_st *,
 		       const uchar *, uint, const uchar *, uint);
@@ -140,6 +140,7 @@ extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
 
 typedef struct my_charset_handler_st
 {
+  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
   /* Multibyte routines */
   int     (*ismbchar)(struct charset_info_st *, const char *, const char *);
   int     (*mbcharlen)(struct charset_info_st *, uint);
diff --git a/mysys/charset.c b/mysys/charset.c
index d2d71689d7b..165fa19e3d5 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -23,354 +23,6 @@
 
 
 /*
-  Collation language is implemented according to
-  subset of ICU Collation Customization (tailorings):
-  http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
-  
-  Collation language elements:
-  Delimiters:
-    space   - skipped
-  
-  <char> :=  A-Z | a-z | \uXXXX
-  
-  Shift command:
-    <shift>  := &       - reset at this letter. 
-  
-  Diff command:
-    <d1> :=  <     - Identifies a primary difference.
-    <d2> :=  <<    - Identifies a secondary difference.
-    <d3> := <<<    - Idenfifies a tertiary difference.
-  
-  
-  Collation rules:
-    <ruleset> :=  <rule>  { <ruleset> }
-    
-    <rule> :=   <d1>    <string>
-              | <d2>    <string>
-              | <d3>    <string>
-              | <shift> <char>
-    
-    <string> := <char> [ <string> ]
-
-  An example, Polish collation:
-  
-    &A < \u0105 <<< \u0104
-    &C < \u0107 <<< \u0106
-    &E < \u0119 <<< \u0118
-    &L < \u0142 <<< \u0141
-    &N < \u0144 <<< \u0143
-    &O < \u00F3 <<< \u00D3
-    &S < \u015B <<< \u015A
-    &Z < \u017A <<< \u017B    
-*/
-
-
-typedef enum my_coll_lexem_num_en
-{
-  MY_COLL_LEXEM_EOF	= 0,
-  MY_COLL_LEXEM_DIFF	= 1, 
-  MY_COLL_LEXEM_SHIFT	= 4,
-  MY_COLL_LEXEM_CHAR	= 5,
-  MY_COLL_LEXEM_ERROR	= 6
-} my_coll_lexem_num;
-
-
-typedef struct my_coll_lexem_st
-{
-  const char *beg;
-  const char *end;
-  const char *prev;
-  int   diff;
-  int   code;
-} MY_COLL_LEXEM;
-
-
-/*
-  Initialize collation rule lexical anilizer
-  
-  SYNOPSIS
-    my_coll_lexem_init
-    lexem                Lex analizer to init
-    str                  Const string to parse
-    strend               End of the string
-  USAGE
-  
-  RETURN VALUES
-    N/A
-*/
-
-static void my_coll_lexem_init(MY_COLL_LEXEM *lexem,
-                               const char *str, const char *strend)
-{
-  lexem->beg= str;
-  lexem->prev= str;
-  lexem->end= strend;
-  lexem->diff= 0;
-  lexem->code= 0;
-}
-
-
-/*
-  Print collation customization expression parse error, with context.
-  
-  SYNOPSIS
-    my_coll_lexem_print_error
-    lexem                Lex analizer to take context from
-    errstr               sting to write error to
-    errsize              errstr size
-    txt                  error message
-  USAGE
-  
-  RETURN VALUES
-    N/A
-*/
-
-static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
-                                      char *errstr, size_t errsize,
-                                      const char *txt)
-{
-  char tail[30];
-  size_t len= lexem->end - lexem->prev;
-  strmake (tail, lexem->prev, min(len, sizeof(tail)-1));
-  errstr[errsize-1]= '\0';
-  my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
-}
-
-
-/*
-  Convert a hex digit into its numeric value
-  
-  SYNOPSIS
-    ch2x
-    ch                   hex digit to convert
-  USAGE
-  
-  RETURN VALUES
-    an integer value in the range 0..15
-    -1 on error
-*/
-
-static int ch2x(int ch)
-{
-  if (ch >= '0' && ch <= '9')
-    return ch - '0';
-  
-  if (ch >= 'a' && ch <= 'f')
-    return 10 + ch - 'a';
-  
-  if (ch >= 'A' && ch <= 'F')
-    return 10 + ch - 'A';
-  
-  return -1;
-}
-
-
-/*
-  Collation language lexical parser:
-  Scans the next lexem.
-  
-  SYNOPSIS
-    my_coll_lexem_next
-    lexem                Lex analizer, previously initialized by 
-                         my_coll_lexem_init.
-  USAGE
-    Call this function in a loop
-    
-  RETURN VALUES
-    Lexem number: eof, diff, shift, char or error.
-*/
-
-static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
-{
-  for ( ;lexem->beg < lexem->end ; lexem->beg++)
-  {
-    lexem->prev= lexem->beg;
-    if (lexem->beg[0] == ' '  || lexem->beg[0] == '\t' || 
-        lexem->beg[0] == '\r' || lexem->beg[0] == '\n')
-      continue;
-    
-    if (lexem->beg[0] == '&')
-    {
-      lexem->beg++;
-      return MY_COLL_LEXEM_SHIFT;
-    }
-    
-    if (lexem->beg[0] == '<')
-    {
-      for (lexem->beg++, lexem->diff=1; 
-           (lexem->beg < lexem->end) && 
-           (lexem->beg[0] == '<') && (lexem->diff<3);
-           lexem->beg++, lexem->diff++);
-        return MY_COLL_LEXEM_DIFF;
-    }
-    
-    if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') ||
-        (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z'))
-    {
-      lexem->code= lexem->beg[0];
-      lexem->beg++;
-      return MY_COLL_LEXEM_CHAR;
-    }
-    
-    if ((lexem->beg[0] == '\\') && 
-        (lexem->beg+2 < lexem->end) && 
-        (lexem->beg[1] == 'u'))
-    {
-      int ch;
-      
-      lexem->code= 0;
-      for (lexem->beg+=2; 
-           (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ; 
-           lexem->beg++)
-      {
-        lexem->code= (lexem->code << 4) + ch;
-      }
-      return MY_COLL_LEXEM_CHAR;
-    }
-    
-    return MY_COLL_LEXEM_ERROR;
-  }
-  return MY_COLL_LEXEM_EOF;
-}
-
-
-/*
-  Collation rule item
-*/
-
-typedef struct my_coll_rule_item_st
-{
-  uint base;     /* Base character                             */
-  uint curr;     /* Current character                          */
-  int diff[3];   /* Primary, Secondary and Tertiary difference */
-} MY_COLL_RULE;
-
-
-/*
-  Collation language syntax parser.
-  Uses lexical parser.
-  
-  SYNOPSIS
-    my_coll_rule_parse
-    rule                 Collation rule list to load to.
-    str                  A string containin collation language expression.
-    strend               End of the string.
-  USAGE
-    
-  RETURN VALUES
-    0 - OK
-    1 - ERROR, e.g. too many items.
-*/
-
-static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
-                              const char *str, const char *strend,
-                              char *errstr, size_t errsize)
-{
-  MY_COLL_LEXEM lexem;
-  my_coll_lexem_num lexnum;
-  my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR;
-  MY_COLL_RULE item; 
-  int state= 0;
-  size_t nitems= 0;
-  
-  /* Init all variables */
-  errstr[0]= '\0';
-  bzero(&item, sizeof(item));
-  my_coll_lexem_init(&lexem, str, strend);
-  
-  while ((lexnum= my_coll_lexem_next(&lexem)))
-  {
-    if (lexnum == MY_COLL_LEXEM_ERROR)
-    {
-      my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character");
-      return -1;
-    }
-    
-    switch (state) {
-    case 0:
-      if (lexnum != MY_COLL_LEXEM_SHIFT)
-      {
-        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected");
-        return -1;
-      }
-      prevlexnum= lexnum;
-      state= 2;
-      continue;
-      
-    case 1:
-      if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF)
-      {
-        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected");
-        return -1;
-      }
-      prevlexnum= lexnum;
-      state= 2;
-      continue;
-      
-    case 2:
-      if (lexnum != MY_COLL_LEXEM_CHAR)
-      {
-        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected");
-        return -1;
-      }
-      
-      if (prevlexnum == MY_COLL_LEXEM_SHIFT)
-      {
-        item.base= lexem.code;
-        item.diff[0]= 0;
-        item.diff[1]= 0;
-        item.diff[2]= 0;
-      }
-      else if (prevlexnum == MY_COLL_LEXEM_DIFF)
-      {
-        item.curr= lexem.code;
-        if (lexem.diff == 3)
-        {
-          item.diff[2]++;
-        }
-        else if (lexem.diff == 2)
-        {
-          item.diff[1]++;
-          item.diff[2]= 0;
-        }
-        else if (lexem.diff == 1)
-        {
-          item.diff[0]++;
-          item.diff[1]= 0;
-          item.diff[2]= 0;
-        }
-        if (nitems >= mitems)
-        {
-          my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");
-          return -1;
-        }
-        rule[nitems++]= item;
-      }
-      else
-      {
-        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen");
-        return -1;
-      }
-      state= 1;
-      continue;
-    }
-  }
-  return (size_t) nitems;
-}
-
-
-typedef struct
-{
-  int		nchars;
-  MY_UNI_IDX	uidx;
-} uni_idx;
-
-#define PLANE_SIZE	0x100
-#define PLANE_NUM	0x100
-#define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)
-
-
-/*
   The code below implements this functionality:
   
     - Initializing charset related structures
@@ -484,91 +136,6 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
 }
 
 
-static int pcmp(const void * f, const void * s)
-{
-  const uni_idx *F= (const uni_idx*) f;
-  const uni_idx *S= (const uni_idx*) s;
-  int res;
-
-  if (!(res=((S->nchars)-(F->nchars))))
-    res=((F->uidx.from)-(S->uidx.to));
-  return res;
-}
-
-
-static my_bool create_fromuni(CHARSET_INFO *cs)
-{
-  uni_idx	idx[PLANE_NUM];
-  int		i,n;
-  
-  /* Clear plane statistics */
-  bzero(idx,sizeof(idx));
-  
-  /* Count number of characters in each plane */
-  for (i=0; i< 0x100; i++)
-  {
-    uint16 wc=cs->tab_to_uni[i];
-    int pl= PLANE_NUMBER(wc);
-    
-    if (wc || !i)
-    {
-      if (!idx[pl].nchars)
-      {
-        idx[pl].uidx.from=wc;
-        idx[pl].uidx.to=wc;
-      }else
-      {
-        idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
-        idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
-      }
-      idx[pl].nchars++;
-    }
-  }
-  
-  /* Sort planes in descending order */
-  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
-  
-  for (i=0; i < PLANE_NUM; i++)
-  {
-    int ch,numchars;
-    
-    /* Skip empty plane */
-    if (!idx[i].nchars)
-      break;
-    
-    numchars=idx[i].uidx.to-idx[i].uidx.from+1;
-    if (!(idx[i].uidx.tab=(uchar*) my_once_alloc(numchars *
-						 sizeof(*idx[i].uidx.tab),
-						 MYF(MY_WME))))
-      return TRUE;
-
-    bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
-    
-    for (ch=1; ch < PLANE_SIZE; ch++)
-    {
-      uint16 wc=cs->tab_to_uni[ch];
-      if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
-      {
-        int ofs= wc - idx[i].uidx.from;
-        idx[i].uidx.tab[ofs]= ch;
-      }
-    }
-  }
-  
-  /* Allocate and fill reverse table for each plane */
-  n=i;
-  if (!(cs->tab_from_uni= (MY_UNI_IDX*) my_once_alloc(sizeof(MY_UNI_IDX)*(n+1),
-						      MYF(MY_WME))))
-    return TRUE;
-
-  for (i=0; i< n; i++)
-    cs->tab_from_uni[i]= idx[i].uidx;
-  
-  /* Set end-of-list marker */
-  bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
-  return FALSE;
-}
-
 
 static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
 {
@@ -622,8 +189,6 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
     if (!(to->tab_to_uni= (uint16*)  my_once_memdup((char*)from->tab_to_uni,
 						    sz, MYF(MY_WME))))
       goto err;
-    if (create_fromuni(to))
-      goto err;
   }
   to->mbminlen= 1;
   to->mbmaxlen= 1;
@@ -754,117 +319,6 @@ static my_tailoring tailoring[]=
   }
 };
 
-#define MY_MAX_COLL_RULE 64
-
-/*
-  This function copies an UCS2 collation from
-  the default Unicode Collation Algorithm (UCA)
-  weights applying tailorings, i.e. a set of
-  alternative weights for some characters. 
-  
-  The default UCA weights are stored in my_charset_ucs2_general_uca.
-  They consist of 256 pages, 256 character each.
-  
-  If a page is not overwritten by tailoring rules,
-  it is copies as is from UCA as is.
-  
-  If a page contains some overwritten characters, it is
-  allocated. Untouched characters are copied from the
-  default weights.
-*/
-
-static my_bool create_tailoring(CHARSET_INFO *cs)
-{
-  MY_COLL_RULE rule[MY_MAX_COLL_RULE];
-  char errstr[128];
-  uchar   *newlengths;
-  uint16 **newweights;
-  const uchar *deflengths= my_charset_ucs2_general_uca.sort_order;
-  uint16     **defweights= my_charset_ucs2_general_uca.sort_order_big;
-  int rc, i;
-
-  if (!cs->tailoring)
-    return 1;
-  
-  /* Parse ICU Collation Customization expression */
-  if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
-                              cs->tailoring,
-                              cs->tailoring + strlen(cs->tailoring),
-                              errstr, sizeof(errstr))) <= 0)
-  {
-    /* 
-      TODO: add error message reporting.
-      printf("Error: %d '%s'\n", rc, errstr);
-    */
-    return 1;
-  }
-  
-  if (!(newweights= (uint16**) my_once_alloc(256*sizeof(uint16*),MYF(MY_WME))))
-    return 1;
-  bzero(newweights, 256*sizeof(uint16*));
-  
-  if (!(newlengths= (uchar*) my_once_memdup(deflengths,256,MYF(MY_WME))))
-    return 1;
-  
-  /*
-    Calculate maximum lenghts for the pages
-    which will be overwritten.
-  */
-  for (i=0; i < rc; i++)
-  {
-    uint pageb= (rule[i].base >> 8) & 0xFF;
-    uint pagec= (rule[i].curr >> 8) & 0xFF;
-    
-    if (newlengths[pagec] < deflengths[pageb])
-      newlengths[pagec]= deflengths[pageb];
-  }
-  
-  for (i=0; i < rc;  i++)
-  {
-    uint pageb= (rule[i].base >> 8) & 0xFF;
-    uint pagec= (rule[i].curr >> 8) & 0xFF;
-    uint chb, chc;
-    
-    if (!newweights[pagec])
-    {
-      /* Alloc new page and copy the default UCA weights */
-      uint size= 256*newlengths[pagec]*sizeof(uint16);
-      
-      if (!(newweights[pagec]= (uint16*) my_once_alloc(size,MYF(MY_WME))))
-        return 1;
-      bzero((void*) newweights[pagec], size);
-      
-      for (chc=0 ; chc < 256; chc++)
-      {
-        memcpy(newweights[pagec] + chc*newlengths[pagec],
-               defweights[pagec] + chc*deflengths[pagec],
-               deflengths[pagec]*sizeof(uint16));
-      }
-    }
-    
-    /* 
-      Aply the alternative rule:
-      shift to the base character and primary difference.
-    */
-    chc= rule[i].curr & 0xFF;
-    chb= rule[i].base & 0xFF;
-    memcpy(newweights[pagec] + chc*newlengths[pagec],
-           defweights[pageb] + chb*deflengths[pageb],
-           deflengths[pageb]*sizeof(uint16));
-    /* Apply primary difference */
-    newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0];
-  }
-  
-  /* Copy non-overwritten pages from the default UCA weights */
-  for (i= 0; i < 256 ; i++)
-    if (!newweights[i])
-      newweights[i]= defweights[i];
-  
-  cs->sort_order= newlengths;
-  cs->sort_order_big= newweights;
-  
-  return 0;
-}
 
 
 static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
@@ -894,7 +348,7 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
   to->mbminlen= 2;
   to->mbmaxlen= 2;
   
-  return create_tailoring(to);
+  return 0;
   
 err:
   return 1;
@@ -997,7 +451,7 @@ static my_bool init_uca_charsets()
   CHARSET_INFO cs= my_charset_ucs2_general_uca;
   char name[64];
   
-  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT;
+  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE;
   for (t= tailoring; t->tailoring; t++)
   {
     cs.number= 128 + t->number;
@@ -1083,6 +537,10 @@ void add_compiled_collation(CHARSET_INFO *cs)
   cs->state|= MY_CS_AVAILABLE;
 }
 
+static void *cs_alloc(uint size)
+{
+  return my_once_alloc(size, MYF(MY_WME));
+}
 
 
 #ifdef __NETWARE__
@@ -1207,6 +665,14 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
     cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
   }
   pthread_mutex_unlock(&THR_LOCK_charset);
+  if (cs && !(cs->state & MY_CS_READY))
+  {
+    if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
+        (cs->coll->init && cs->coll->init(cs, cs_alloc)))
+      cs= NULL;
+    else
+      cs->state|= MY_CS_READY;
+  }
   return cs;
 }
 
diff --git a/sql/item_create.cc b/sql/item_create.cc
index 74f36de11ac..53d4f14d1ee 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -373,7 +373,7 @@ Item *create_func_space(Item *a)
   CHARSET_INFO *cs= current_thd->variables.collation_connection;
   Item *sp;
   
-  if (cs->state & MY_CS_NONTEXT)
+  if (cs->mbminlen > 1)
   {
     sp= new Item_string("",0,cs);
     if (sp)
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 4e9847e99cc..841898ac505 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -329,7 +329,7 @@ char log_error_file[FN_REFLEN], glob_hostname[FN_REFLEN];
 char* log_error_file_ptr= log_error_file;
 char mysql_real_data_home[FN_REFLEN],
      language[LIBLEN],reg_ext[FN_EXTLEN], mysql_charsets_dir[FN_REFLEN],
-     max_sort_char,*mysqld_user,*mysqld_chroot, *opt_init_file,
+     *mysqld_user,*mysqld_chroot, *opt_init_file,
      *opt_init_connect, *opt_init_slave,
      def_ft_boolean_syntax[sizeof(ft_boolean_syntax)];
 
@@ -5249,7 +5249,6 @@ static void mysql_init_variables(void)
   specialflag= opened_tables= created_tmp_tables= created_tmp_disk_tables= 0;
   binlog_cache_use=  binlog_cache_disk_use= 0;
   max_used_connections= slow_launch_threads = 0;
-  max_sort_char= 0;
   mysqld_user= mysqld_chroot= opt_init_file= opt_bin_logname = 0;
   errmesg= 0;
   mysqld_unix_port= opt_mysql_tmpdir= my_bind_addr_str= NullS;
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index f7e4e436495..cf4f94ba966 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -370,7 +370,7 @@ bool String::copy(const char *str, uint32 arg_length,
 
 bool String::set_ascii(const char *str, uint32 arg_length)
 {
-  if (!(str_charset->state & MY_CS_NONTEXT))
+  if (!(str_charset->mbminlen > 1))
   {
     set(str, arg_length, str_charset);
     return 0;
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index aea6140226c..dc838f0e685 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -858,7 +858,7 @@ int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info,
 	     sql_field->sql_type != FIELD_TYPE_VAR_STRING &&
 	     !f_is_blob(sql_field->pack_flag)) ||
 	    sql_field->charset == &my_charset_bin ||
-	    sql_field->charset->state & MY_CS_NONTEXT || // ucs2 doesn't work yet
+	    sql_field->charset->mbminlen > 1 || // ucs2 doesn't work yet
 	    (ft_key_charset && sql_field->charset != ft_key_charset))
 	{
 	    my_printf_error(ER_BAD_FT_COLUMN,ER(ER_BAD_FT_COLUMN),MYF(0),
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index fb72dec7385..3d9cb92bf0d 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6269,6 +6269,7 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_big5,
   my_strnncollsp_big5,
   my_strnxfrm_big5,
@@ -6281,6 +6282,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_big5_handler=
 {
+  NULL,			/* init */
   ismbchar_big5,
   mbcharlen_big5,
   my_numchars_mb,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 7b3164bf438..54fe4476ae6 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -333,6 +333,7 @@ skip:
 
 MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
 {
+    NULL,			/* init */
     my_strnncoll_binary,
     my_strnncoll_binary,
     my_strnxfrm_bin,
@@ -346,6 +347,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,			/* init */
     NULL,			/* ismbchar      */
     my_mbcharlen_8bit,		/* mbcharlen     */
     my_numchars_8bit,
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 3218fdee673..8fde9498ed9 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -572,6 +572,7 @@ static MY_UNI_IDX idx_uni_8859_2[]={
 
 static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_czech,
   my_strnncollsp_czech,
   my_strnxfrm_czech,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index c387246b4c6..e739339b9e4 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8637,6 +8637,7 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_simple,  /* strnncoll  */
   my_strnncollsp_simple,
   my_strnxfrm_simple,	/* strnxfrm   */
@@ -8649,6 +8650,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+  NULL,			/* init */
   ismbchar_euc_kr,
   mbcharlen_euc_kr,
   my_numchars_mb,
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index fe1f72e7eda..aee4ed55af6 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5688,6 +5688,7 @@ my_mb_wc_gb2312(CHARSET_INFO *cs  __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_simple,  /* strnncoll  */
   my_strnncollsp_simple,
   my_strnxfrm_simple,	/* strnxfrm   */
@@ -5700,6 +5701,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+  NULL,			/* init */
   ismbchar_gb2312,
   mbcharlen_gb2312,
   my_numchars_mb,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 8b659cb55f9..d6063e9f80f 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -9918,6 +9918,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_gbk,
   my_strnncollsp_gbk,
   my_strnxfrm_gbk,
@@ -9930,6 +9931,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+  NULL,			/* init */
   ismbchar_gbk,
   mbcharlen_gbk,
   my_numchars_mb,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 03d4e71377b..86c80ff5a66 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -380,6 +380,7 @@ int my_wc_mb_latin1(CHARSET_INFO *cs  __attribute__((unused)),
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,			/* init */
     NULL,
     my_mbcharlen_8bit,
     my_numchars_8bit,
@@ -674,6 +675,7 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
 {
+  NULL,			/* init */
   my_strnncoll_latin1_de,
   my_strnncollsp_latin1_de,
   my_strnxfrm_latin1_de,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 9b02cd3b3da..c143994dbc3 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -512,6 +512,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
 
 MY_COLLATION_HANDLER my_collation_mb_bin_handler =
 {
+    NULL,		/* init */
     my_strnncoll_mb_bin,
     my_strnncoll_mb_bin,
     my_strnxfrm_mb_bin,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 5f0a7426db3..c28df91ae86 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1142,8 +1142,107 @@ skip:
 }
 
 
+typedef struct
+{
+  int		nchars;
+  MY_UNI_IDX	uidx;
+} uni_idx;
+
+#define PLANE_SIZE	0x100
+#define PLANE_NUM	0x100
+#define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)
+
+static int pcmp(const void * f, const void * s)
+{
+  const uni_idx *F= (const uni_idx*) f;
+  const uni_idx *S= (const uni_idx*) s;
+  int res;
+
+  if (!(res=((S->nchars)-(F->nchars))))
+    res=((F->uidx.from)-(S->uidx.to));
+  return res;
+}
+
+static my_bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+  uni_idx	idx[PLANE_NUM];
+  int		i,n;
+  
+  /* Clear plane statistics */
+  bzero(idx,sizeof(idx));
+  
+  /* Count number of characters in each plane */
+  for (i=0; i< 0x100; i++)
+  {
+    uint16 wc=cs->tab_to_uni[i];
+    int pl= PLANE_NUMBER(wc);
+    
+    if (wc || !i)
+    {
+      if (!idx[pl].nchars)
+      {
+        idx[pl].uidx.from=wc;
+        idx[pl].uidx.to=wc;
+      }else
+      {
+        idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
+        idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
+      }
+      idx[pl].nchars++;
+    }
+  }
+  
+  /* Sort planes in descending order */
+  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
+  
+  for (i=0; i < PLANE_NUM; i++)
+  {
+    int ch,numchars;
+    
+    /* Skip empty plane */
+    if (!idx[i].nchars)
+      break;
+    
+    numchars=idx[i].uidx.to-idx[i].uidx.from+1;
+    if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
+      return TRUE;
+    
+    bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
+    
+    for (ch=1; ch < PLANE_SIZE; ch++)
+    {
+      uint16 wc=cs->tab_to_uni[ch];
+      if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
+      {
+        int ofs= wc - idx[i].uidx.from;
+        idx[i].uidx.tab[ofs]= ch;
+      }
+    }
+  }
+  
+  /* Allocate and fill reverse table for each plane */
+  n=i;
+  if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
+    return TRUE;
+
+  for (i=0; i< n; i++)
+    cs->tab_from_uni[i]= idx[i].uidx;
+  
+  /* Set end-of-list marker */
+  bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
+  return FALSE;
+}
+
+static my_bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+  return create_fromuni(cs, alloc);
+}
+
+
+
 MY_CHARSET_HANDLER my_charset_8bit_handler=
 {
+    my_cset_init_8bit,
     NULL,			/* ismbchar      */
     my_mbcharlen_8bit,		/* mbcharlen     */
     my_numchars_8bit,
@@ -1170,6 +1269,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
 
 MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_simple,
     my_strnncollsp_simple,
     my_strnxfrm_simple,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index b4a131d3410..3744711447a 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4534,6 +4534,7 @@ my_mb_wc_sjis(CHARSET_INFO *cs  __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+  NULL,			/* init */
   my_strnncoll_sjis,
   my_strnncollsp_sjis,
   my_strnxfrm_sjis,
@@ -4547,6 +4548,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+  NULL,			/* init */
   ismbchar_sjis,
   mbcharlen_sjis,
   my_numchars_mb,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 79ac2079720..a0ba1a266ea 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -906,6 +906,7 @@ int my_wc_mb_tis620(CHARSET_INFO *cs  __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_tis620,
     my_strnncollsp_tis620,
     my_strnxfrm_tis620,
@@ -918,6 +919,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,		/* init */
     NULL,		/* ismbchar  */
     my_mbcharlen_8bit,	/* mbcharlen */
     my_numchars_8bit,
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index e6b68b8c9b2..846f17982c3 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7036,8 +7036,464 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
 }
 
 
+/*
+  Collation language is implemented according to
+  subset of ICU Collation Customization (tailorings):
+  http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
+  
+  Collation language elements:
+  Delimiters:
+    space   - skipped
+  
+  <char> :=  A-Z | a-z | \uXXXX
+  
+  Shift command:
+    <shift>  := &       - reset at this letter. 
+  
+  Diff command:
+    <d1> :=  <     - Identifies a primary difference.
+    <d2> :=  <<    - Identifies a secondary difference.
+    <d3> := <<<    - Idenfifies a tertiary difference.
+  
+  
+  Collation rules:
+    <ruleset> :=  <rule>  { <ruleset> }
+    
+    <rule> :=   <d1>    <string>
+              | <d2>    <string>
+              | <d3>    <string>
+              | <shift> <char>
+    
+    <string> := <char> [ <string> ]
+
+  An example, Polish collation:
+  
+    &A < \u0105 <<< \u0104
+    &C < \u0107 <<< \u0106
+    &E < \u0119 <<< \u0118
+    &L < \u0142 <<< \u0141
+    &N < \u0144 <<< \u0143
+    &O < \u00F3 <<< \u00D3
+    &S < \u015B <<< \u015A
+    &Z < \u017A <<< \u017B    
+*/
+
+
+typedef enum my_coll_lexem_num_en
+{
+  MY_COLL_LEXEM_EOF	= 0,
+  MY_COLL_LEXEM_DIFF	= 1, 
+  MY_COLL_LEXEM_SHIFT	= 4,
+  MY_COLL_LEXEM_CHAR	= 5,
+  MY_COLL_LEXEM_ERROR	= 6
+} my_coll_lexem_num;
+
+
+typedef struct my_coll_lexem_st
+{
+  const char *beg;
+  const char *end;
+  const char *prev;
+  int   diff;
+  int   code;
+} MY_COLL_LEXEM;
+
+
+/*
+  Initialize collation rule lexical anilizer
+  
+  SYNOPSIS
+    my_coll_lexem_init
+    lexem                Lex analizer to init
+    str                  Const string to parse
+    strend               End of the string
+  USAGE
+  
+  RETURN VALUES
+    N/A
+*/
+
+static void my_coll_lexem_init(MY_COLL_LEXEM *lexem,
+                               const char *str, const char *strend)
+{
+  lexem->beg= str;
+  lexem->prev= str;
+  lexem->end= strend;
+  lexem->diff= 0;
+  lexem->code= 0;
+}
+
+
+/*
+  Print collation customization expression parse error, with context.
+  
+  SYNOPSIS
+    my_coll_lexem_print_error
+    lexem                Lex analizer to take context from
+    errstr               sting to write error to
+    errsize              errstr size
+    txt                  error message
+  USAGE
+  
+  RETURN VALUES
+    N/A
+*/
+
+static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
+                                      char *errstr, size_t errsize,
+                                      const char *txt)
+{
+  char tail[30];
+  size_t len= lexem->end - lexem->prev;
+  strmake (tail, lexem->prev, min(len, sizeof(tail)-1));
+  errstr[errsize-1]= '\0';
+  my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
+}
+
+
+/*
+  Convert a hex digit into its numeric value
+  
+  SYNOPSIS
+    ch2x
+    ch                   hex digit to convert
+  USAGE
+  
+  RETURN VALUES
+    an integer value in the range 0..15
+    -1 on error
+*/
+
+static int ch2x(int ch)
+{
+  if (ch >= '0' && ch <= '9')
+    return ch - '0';
+  
+  if (ch >= 'a' && ch <= 'f')
+    return 10 + ch - 'a';
+  
+  if (ch >= 'A' && ch <= 'F')
+    return 10 + ch - 'A';
+  
+  return -1;
+}
+
+
+/*
+  Collation language lexical parser:
+  Scans the next lexem.
+  
+  SYNOPSIS
+    my_coll_lexem_next
+    lexem                Lex analizer, previously initialized by 
+                         my_coll_lexem_init.
+  USAGE
+    Call this function in a loop
+    
+  RETURN VALUES
+    Lexem number: eof, diff, shift, char or error.
+*/
+
+static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
+{
+  for ( ;lexem->beg < lexem->end ; lexem->beg++)
+  {
+    lexem->prev= lexem->beg;
+    if (lexem->beg[0] == ' '  || lexem->beg[0] == '\t' || 
+        lexem->beg[0] == '\r' || lexem->beg[0] == '\n')
+      continue;
+    
+    if (lexem->beg[0] == '&')
+    {
+      lexem->beg++;
+      return MY_COLL_LEXEM_SHIFT;
+    }
+    
+    if (lexem->beg[0] == '<')
+    {
+      for (lexem->beg++, lexem->diff=1; 
+           (lexem->beg < lexem->end) && 
+           (lexem->beg[0] == '<') && (lexem->diff<3);
+           lexem->beg++, lexem->diff++);
+        return MY_COLL_LEXEM_DIFF;
+    }
+    
+    if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') ||
+        (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z'))
+    {
+      lexem->code= lexem->beg[0];
+      lexem->beg++;
+      return MY_COLL_LEXEM_CHAR;
+    }
+    
+    if ((lexem->beg[0] == '\\') && 
+        (lexem->beg+2 < lexem->end) && 
+        (lexem->beg[1] == 'u'))
+    {
+      int ch;
+      
+      lexem->code= 0;
+      for (lexem->beg+=2; 
+           (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ; 
+           lexem->beg++)
+      {
+        lexem->code= (lexem->code << 4) + ch;
+      }
+      return MY_COLL_LEXEM_CHAR;
+    }
+    
+    return MY_COLL_LEXEM_ERROR;
+  }
+  return MY_COLL_LEXEM_EOF;
+}
+
+
+/*
+  Collation rule item
+*/
+
+typedef struct my_coll_rule_item_st
+{
+  uint base;     /* Base character                             */
+  uint curr;     /* Current character                          */
+  int diff[3];   /* Primary, Secondary and Tertiary difference */
+} MY_COLL_RULE;
+
+
+/*
+  Collation language syntax parser.
+  Uses lexical parser.
+  
+  SYNOPSIS
+    my_coll_rule_parse
+    rule                 Collation rule list to load to.
+    str                  A string containin collation language expression.
+    strend               End of the string.
+  USAGE
+    
+  RETURN VALUES
+    0 - OK
+    1 - ERROR, e.g. too many items.
+*/
+
+static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
+                              const char *str, const char *strend,
+                              char *errstr, size_t errsize)
+{
+  MY_COLL_LEXEM lexem;
+  my_coll_lexem_num lexnum;
+  my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR;
+  MY_COLL_RULE item; 
+  int state= 0;
+  size_t nitems= 0;
+  
+  /* Init all variables */
+  errstr[0]= '\0';
+  bzero(&item, sizeof(item));
+  my_coll_lexem_init(&lexem, str, strend);
+  
+  while ((lexnum= my_coll_lexem_next(&lexem)))
+  {
+    if (lexnum == MY_COLL_LEXEM_ERROR)
+    {
+      my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character");
+      return -1;
+    }
+    
+    switch (state) {
+    case 0:
+      if (lexnum != MY_COLL_LEXEM_SHIFT)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected");
+        return -1;
+      }
+      prevlexnum= lexnum;
+      state= 2;
+      continue;
+      
+    case 1:
+      if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected");
+        return -1;
+      }
+      prevlexnum= lexnum;
+      state= 2;
+      continue;
+      
+    case 2:
+      if (lexnum != MY_COLL_LEXEM_CHAR)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected");
+        return -1;
+      }
+      
+      if (prevlexnum == MY_COLL_LEXEM_SHIFT)
+      {
+        item.base= lexem.code;
+        item.diff[0]= 0;
+        item.diff[1]= 0;
+        item.diff[2]= 0;
+      }
+      else if (prevlexnum == MY_COLL_LEXEM_DIFF)
+      {
+        item.curr= lexem.code;
+        if (lexem.diff == 3)
+        {
+          item.diff[2]++;
+        }
+        else if (lexem.diff == 2)
+        {
+          item.diff[1]++;
+          item.diff[2]= 0;
+        }
+        else if (lexem.diff == 1)
+        {
+          item.diff[0]++;
+          item.diff[1]= 0;
+          item.diff[2]= 0;
+        }
+        if (nitems >= mitems)
+        {
+          my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");
+          return -1;
+        }
+        rule[nitems++]= item;
+      }
+      else
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen");
+        return -1;
+      }
+      state= 1;
+      continue;
+    }
+  }
+  return (size_t) nitems;
+}
+
+#define MY_MAX_COLL_RULE 64
+
+/*
+  This function copies an UCS2 collation from
+  the default Unicode Collation Algorithm (UCA)
+  weights applying tailorings, i.e. a set of
+  alternative weights for some characters. 
+  
+  The default UCA weights are stored in my_charset_ucs2_general_uca.
+  They consist of 256 pages, 256 character each.
+  
+  If a page is not overwritten by tailoring rules,
+  it is copies as is from UCA as is.
+  
+  If a page contains some overwritten characters, it is
+  allocated. Untouched characters are copied from the
+  default weights.
+*/
+
+static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+  MY_COLL_RULE rule[MY_MAX_COLL_RULE];
+  char errstr[128];
+  uchar   *newlengths;
+  uint16 **newweights;
+  const uchar *deflengths= my_charset_ucs2_general_uca.sort_order;
+  uint16     **defweights= my_charset_ucs2_general_uca.sort_order_big;
+  int rc, i;
+
+  if (!cs->tailoring)
+    return 1;
+  
+  /* Parse ICU Collation Customization expression */
+  if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
+                              cs->tailoring,
+                              cs->tailoring + strlen(cs->tailoring),
+                              errstr, sizeof(errstr))) <= 0)
+  {
+    /* 
+      TODO: add error message reporting.
+      printf("Error: %d '%s'\n", rc, errstr);
+    */
+    return 1;
+  }
+  
+  if (!(newweights= (uint16**) alloc(256*sizeof(uint16*))))
+    return 1;
+  bzero(newweights, 256*sizeof(uint16*));
+  
+  if (!(newlengths= (uchar*) alloc(256)))
+    return 1;
+  
+  memcpy(newlengths, deflengths, 256);
+  
+  /*
+    Calculate maximum lenghts for the pages
+    which will be overwritten.
+  */
+  for (i=0; i < rc; i++)
+  {
+    uint pageb= (rule[i].base >> 8) & 0xFF;
+    uint pagec= (rule[i].curr >> 8) & 0xFF;
+    
+    if (newlengths[pagec] < deflengths[pageb])
+      newlengths[pagec]= deflengths[pageb];
+  }
+  
+  for (i=0; i < rc;  i++)
+  {
+    uint pageb= (rule[i].base >> 8) & 0xFF;
+    uint pagec= (rule[i].curr >> 8) & 0xFF;
+    uint chb, chc;
+    
+    if (!newweights[pagec])
+    {
+      /* Alloc new page and copy the default UCA weights */
+      uint size= 256*newlengths[pagec]*sizeof(uint16);
+      
+      if (!(newweights[pagec]= (uint16*) alloc(size)))
+        return 1;
+      bzero((void*) newweights[pagec], size);
+      
+      for (chc=0 ; chc < 256; chc++)
+      {
+        memcpy(newweights[pagec] + chc*newlengths[pagec],
+               defweights[pagec] + chc*deflengths[pagec],
+               deflengths[pagec]*sizeof(uint16));
+      }
+    }
+    
+    /* 
+      Aply the alternative rule:
+      shift to the base character and primary difference.
+    */
+    chc= rule[i].curr & 0xFF;
+    chb= rule[i].base & 0xFF;
+    memcpy(newweights[pagec] + chc*newlengths[pagec],
+           defweights[pageb] + chb*deflengths[pageb],
+           deflengths[pageb]*sizeof(uint16));
+    /* Apply primary difference */
+    newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0];
+  }
+  
+  /* Copy non-overwritten pages from the default UCA weights */
+  for (i= 0; i < 256 ; i++)
+    if (!newweights[i])
+      newweights[i]= defweights[i];
+  
+  cs->sort_order= newlengths;
+  cs->sort_order_big= newweights;
+  
+  return 0;
+}
+
+static my_bool my_coll_init_uca(CHARSET_INFO *cs, void *(*alloc)(uint))
+{
+  return create_tailoring(cs, alloc);
+}
+
 MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
 {
+    my_coll_init_uca,	/* init */
     my_strnncoll_uca,
     my_strnncollsp_uca,
     my_strnxfrm_uca,
@@ -7051,7 +7507,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
 CHARSET_INFO my_charset_ucs2_general_uca=
 {
     45,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
     "ucs2",		/* cs name    */
     "ucs2_general_uca",	/* name         */
     "",			/* comment      */
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 9f1accf841f..f05e85a9d88 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1372,6 +1372,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
 
 static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_ucs2,
     my_strnncoll_ucs2,
     my_strnxfrm_ucs2,
@@ -1385,6 +1386,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
 
 static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
 {
+    NULL,		/* init */
     my_strnncoll_ucs2_bin,
     my_strnncoll_ucs2_bin,
     my_strnxfrm_ucs2_bin,
@@ -1398,6 +1400,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
 
 MY_CHARSET_HANDLER my_charset_ucs2_handler=
 {
+    NULL,		/* init */
     my_ismbchar_ucs2,	/* ismbchar     */
     my_mbcharlen_ucs2,	/* mbcharlen    */
     my_numchars_ucs2,
@@ -1426,7 +1429,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
 CHARSET_INFO my_charset_ucs2_general_ci=
 {
     35,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
     "ucs2",		/* cs name    */
     "ucs2_general_ci",	/* name         */
     "",			/* comment      */
@@ -1452,7 +1455,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
 CHARSET_INFO my_charset_ucs2_bin=
 {
     90,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONTEXT,
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
     "ucs2",		/* cs name    */
     "ucs2_bin",		/* name         */
     "",			/* comment      */
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index f28ea165f80..fb7946a6b98 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -8423,6 +8423,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_simple,/* strnncoll    */
     my_strnncollsp_simple,
     my_strnxfrm_simple,	/* strnxfrm     */
@@ -8435,14 +8436,15 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,		/* init */
     ismbchar_ujis,
     mbcharlen_ujis,
     my_numchars_mb,
     my_charpos_mb,
     my_well_formed_len_mb,
     my_lengthsp_8bit,
-    my_mb_wc_euc_jp,	 /* mb_wc       */
-    my_wc_mb_euc_jp,	 /* wc_mb       */
+    my_mb_wc_euc_jp,	/* mb_wc       */
+    my_wc_mb_euc_jp,	/* wc_mb       */
     my_caseup_str_mb,
     my_casedn_str_mb,
     my_caseup_mb,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 39e9260ffed..99ac114de9c 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2045,6 +2045,7 @@ static int my_mbcharlen_utf8(CHARSET_INFO *cs  __attribute__((unused)) , uint c)
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
+    NULL,		/* init */
     my_strnncoll_utf8,
     my_strnncollsp_utf8,
     my_strnxfrm_utf8,
@@ -2057,6 +2058,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 
 static MY_CHARSET_HANDLER my_charset_handler=
 {
+    NULL,		/* init */
     my_ismbchar_utf8,
     my_mbcharlen_utf8,
     my_numchars_mb,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index 670318a082e..a2c5768b16c 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -605,6 +605,7 @@ my_like_range_win1250ch(CHARSET_INFO *cs __attribute__((unused)),
 
 static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
 {
+  NULL,				/* init */
   my_strnncoll_win1250ch,
   my_strnncollsp_win1250ch,
   my_strnxfrm_win1250ch,
author	bar@mysql.com <>	2004-06-11 16:29:16 +0500
committer	bar@mysql.com <>	2004-06-11 16:29:16 +0500
commit	c64d93b27403dc9d154eb601b88d95964f9fc05b (patch)
tree	72245220b637b8d5caf3b8267ea6801af59f0bc7
parent	21c524e712875f8b66ad95c76da1d4305d0538d8 (diff)
download	mariadb-git-c64d93b27403dc9d154eb601b88d95964f9fc05b.tar.gz