From fb17025b3ea4983feec4498ab1f807b22a95d099 Mon Sep 17 00:00:00 2001
From: "sergefp@mysql.com" <>
Date: Fri, 21 May 2004 04:27:50 +0400
Subject: WL#1622 "SQL Syntax for Prepared Statements": Post-review fixes (1 of
 2)

---
 mysys/my_error.c | 55 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 10 deletions(-)

(limited to 'mysys')

diff --git a/mysys/my_error.c b/mysys/my_error.c
index 6fd346c89f7..7ca7dbae8de 100644
--- a/mysys/my_error.c
+++ b/mysys/my_error.c
@@ -33,6 +33,12 @@ char NEAR errbuff[NRERRBUFFS][ERRMSGSIZE];
        nr	Errno
        MyFlags	Flags
        ...	variable list
+   NOTE
+    The following subset of printf format is supported:
+    "%[0-9.-]*l?[sdu]", where all length flags are parsed but ignored.
+
+    Additionally "%.*s" is supported and "%.*[ud]" is correctly parsed but 
+    length value is ignored.
 */
 
 int my_error(int nr,myf MyFlags, ...)
@@ -43,7 +49,10 @@ int my_error(int nr,myf MyFlags, ...)
   reg2 char	*endpos;
   char		* par;
   char		ebuff[ERRMSGSIZE+20];
+  int           prec_chars;
+  my_bool       prec_supplied;
   DBUG_ENTER("my_error");
+  LINT_INIT(prec_chars); /* protected by prec_supplied */
 
   va_start(ap,MyFlags);
   DBUG_PRINT("my", ("nr: %d  MyFlags: %d  errno: %d", nr, MyFlags, errno));
@@ -59,7 +68,6 @@ int my_error(int nr,myf MyFlags, ...)
     if (tpos[0] != '%')
     {
       *endpos++= *tpos++;	/* Copy ordinary char */
-      olen++;
       continue;
     }
     if (*++tpos == '%')		/* test if %% */
@@ -68,21 +76,48 @@ int my_error(int nr,myf MyFlags, ...)
     }
     else
     {
-      /* Skipp if max size is used (to be compatible with printf) */
-      while (my_isdigit(&my_charset_latin1, *tpos) || *tpos == '.' || *tpos == '-')
-	tpos++;
-      if (*tpos == 'l')				/* Skipp 'l' argument */
-	tpos++;
+      /* 
+        Skip size/precision flags to be compatible with printf. 
+        The only size/precision flag supported is "%.*s". 
+        "%.*u" and "%.*d" cause 
+      */
+      prec_supplied= 0;
+      if (*tpos== '.')
+      {
+        tpos++;
+        olen--;
+        if (*tpos == '*')
+        {
+          tpos++;
+          olen--;
+          prec_chars= va_arg(ap, int); /* get length parameter */
+          prec_supplied= 1;
+        }
+      }
+       
+      if (!prec_supplied)
+      {
+        while (my_isdigit(&my_charset_latin1, *tpos) || *tpos == '.' || 
+               *tpos == '-')
+	 tpos++;
+        
+        if (*tpos == 'l')				/* Skipp 'l' argument */
+	  tpos++;
+      }
+
       if (*tpos == 's')				/* String parameter */
       {
 	par = va_arg(ap, char *);
 	plen = (uint) strlen(par);
+        if (prec_supplied && prec_chars > 0)
+          plen= min((uint)prec_chars, plen);
 	if (olen + plen < ERRMSGSIZE+2)		/* Replace if possible */
 	{
-	  endpos=strmov(endpos,par);
-	  tpos++;
-	  olen+=plen-2;
-	  continue;
+          memcpy(endpos,par, plen);
+          endpos += plen;
+          tpos++;
+          olen+=plen-2;
+          continue;
 	}
       }
       else if (*tpos == 'd' || *tpos == 'u')	/* Integer parameter */
-- 
cgit v1.2.1


From 7d8f8bc77bd58b0a527a3874cda55311a068d05b Mon Sep 17 00:00:00 2001
From: "monty@mysql.com" <>
Date: Wed, 26 May 2004 19:12:49 +0300
Subject: Changed prototype of killed_ptr() to make it more portable Applied
 patches for Netware

---
 mysys/my_pthread.c | 34 ++++++++++++++++------------------
 mysys/my_static.h  |  2 ++
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'mysys')

diff --git a/mysys/my_pthread.c b/mysys/my_pthread.c
index 2667c0670d8..d721418ffa1 100644
--- a/mysys/my_pthread.c
+++ b/mysys/my_pthread.c
@@ -98,25 +98,23 @@ void *my_pthread_getspecific_imp(pthread_key_t key)
 #undef pthread_exit
 void my_pthread_exit(void *status)
 {
-  NXThreadId_t tid = NXThreadGetId();
+  NXThreadId_t tid;
   NXContext_t ctx;
-  char name[PATH_MAX] = "";
-
-  /* Do not call pthread_exit if it is not a LibC thread */
-  if (tid != 0)
-  {
-    NXThreadGetContext(tid, &ctx);
-    NXContextGetName(ctx, name, PATH_MAX);
-
-    /*
-      "MYSQLD.NLM's LibC Reaper" or "MYSQLD.NLM's main thread"
-      with a debug build of LibC the reaper can have different names
-    */
-    if (!strindex(name, "\'s"))
-    {
-      pthread_exit(status);
-    }
-  }
+  char name[NX_MAX_OBJECT_NAME_LEN+1] = "";
+
+  tid= NXThreadGetId();
+  if (tid == NX_INVALID_THREAD_ID || !tid)
+    return;
+  if (NXThreadGetContext(tid, &ctx) ||
+      NXContextGetName(ctx, name, sizeof(name)-1))
+    return;
+
+  /*
+    "MYSQLD.NLM's LibC Reaper" or "MYSQLD.NLM's main thread"
+    with a debug build of LibC the reaper can have different names
+  */
+  if (!strindex(name, "\'s"))
+    pthread_exit(status);
 }
 #endif
 
diff --git a/mysys/my_static.h b/mysys/my_static.h
index bb408aa808d..51f9fbc922f 100644
--- a/mysys/my_static.h
+++ b/mysys/my_static.h
@@ -19,6 +19,7 @@
   a shared library
 */
 
+C_MODE_START
 #include <signal.h>
 
 #define MAX_SIGNALS	10		/* Max signals under a dont-allow */
@@ -73,3 +74,4 @@ extern struct st_my_file_info my_file_info_default[MY_NFILE];
 #if defined(THREAD) && !defined(__WIN__)
 extern sigset_t my_signals;		/* signals blocked by mf_brkhant */
 #endif
+C_MODE_END
-- 
cgit v1.2.1


From 03b705ff4408f011eebdadffeb249e9ef533c3ea Mon Sep 17 00:00:00 2001
From: "dlenev@brandersnatch.localdomain" <>
Date: Thu, 27 May 2004 17:54:40 +0400
Subject: Made my_snprintf() behavior snprintf() compatible when printing %x
 arguments (it should produce hex digits in lower case). (fixed version)

Replaced _dig_vec array with two _dig_vec_upper/_dig_vec_lower arrays.
Added extra argument to int2str function which controls case of digits you get.
Replaced lot of invocations of int2str for decimal radix with more optimized int10_to_str()
function.
Removed unused my_itoa/my_ltoa functions.
---
 mysys/mf_tempfile.c | 2 +-
 mysys/my_error.c    | 4 ++--
 mysys/my_tempnam.c  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'mysys')

diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c
index 14b8fdc430c..e2ad71654dc 100644
--- a/mysys/mf_tempfile.c
+++ b/mysys/mf_tempfile.c
@@ -181,7 +181,7 @@ File create_temp_file(char *to, const char *dir, const char *prefix,
 
       for (length=0 ; length < 8 && uniq ; length++)
       {
-	*end_pos++= _dig_vec[(int) (uniq & 31)];
+	*end_pos++= _dig_vec_upper[(int) (uniq & 31)];
 	uniq >>= 5;
       }
       (void) strmov(end_pos,TMP_EXT);
diff --git a/mysys/my_error.c b/mysys/my_error.c
index 6fd346c89f7..33d79bbc5e6 100644
--- a/mysys/my_error.c
+++ b/mysys/my_error.c
@@ -90,9 +90,9 @@ int my_error(int nr,myf MyFlags, ...)
 	register int iarg;
 	iarg = va_arg(ap, int);
 	if (*tpos == 'd')
-	  plen= (uint) (int2str((long) iarg,endpos, -10) - endpos);
+	  plen= (uint) (int10_to_str((long) iarg, endpos, -10) - endpos);
 	else
-	  plen= (uint) (int2str((long) (uint) iarg,endpos,10)- endpos);
+	  plen= (uint) (int10_to_str((long) (uint) iarg, endpos, 10) - endpos);
 	if (olen + plen < ERRMSGSIZE+2) /* Replace parameter if possible */
 	{
 	  endpos+=plen;
diff --git a/mysys/my_tempnam.c b/mysys/my_tempnam.c
index b4f76727ee0..9f765298fb6 100644
--- a/mysys/my_tempnam.c
+++ b/mysys/my_tempnam.c
@@ -161,7 +161,7 @@ my_string my_tempnam(const char *dir, const char *pfx,
 
   for (length=0 ; length < 8 && uniq ; length++)
   {
-    *end_pos++= _dig_vec[(int) (uniq & 31)];
+    *end_pos++= _dig_vec_upper[(int) (uniq & 31)];
     uniq >>= 5;
   }
   VOID(strmov(end_pos,TMP_EXT));
-- 
cgit v1.2.1


From 2e1ded2fe12ae42dfe41b4d536ddb85578ed9f8a Mon Sep 17 00:00:00 2001
From: "serg@serg.mylan" <>
Date: Sat, 29 May 2004 17:52:20 +0200
Subject: backport wild_compare fix from 4.1 - bug#3924

---
 mysys/mf_wcomp.c | 67 +++++++++++++++++++++++++++++++++++++-------------------
 mysys/mf_wfile.c |  4 ++--
 2 files changed, 47 insertions(+), 24 deletions(-)

(limited to 'mysys')

diff --git a/mysys/mf_wcomp.c b/mysys/mf_wcomp.c
index bdcfb0501d8..1a01388a3db 100644
--- a/mysys/mf_wcomp.c
+++ b/mysys/mf_wcomp.c
@@ -23,11 +23,12 @@
 
 char wild_many='*';
 char wild_one='?';
-char wild_prefix=0;
+char wild_prefix=0; /* QQ this can potentially cause a SIGSEGV */
 
-int wild_compare(register const char *str, register const char *wildstr)
+int wild_compare(register const char *str, register const char *wildstr,
+                 pbool str_is_pattern)
 {
-  reg3 int flag;
+  char cmp;
   DBUG_ENTER("wild_compare");
 
   while (*wildstr)
@@ -35,33 +36,55 @@ int wild_compare(register const char *str, register const char *wildstr)
     while (*wildstr && *wildstr != wild_many && *wildstr != wild_one)
     {
       if (*wildstr == wild_prefix && wildstr[1])
+      {
 	wildstr++;
-      if (*wildstr++ != *str++) DBUG_RETURN(1);
+        if (str_is_pattern && *str++ != wild_prefix)
+          DBUG_RETURN(1);
+      }
+      if (*wildstr++ != *str++)
+        DBUG_RETURN(1);
     }
-    if (! *wildstr ) DBUG_RETURN (*str != 0);
+    if (! *wildstr )
+      DBUG_RETURN(*str != 0);
     if (*wildstr++ == wild_one)
     {
-      if (! *str++) DBUG_RETURN (1);	/* One char; skipp */
+      if (! *str || (str_is_pattern && *str == wild_many))
+        DBUG_RETURN(1);                     /* One char; skip */
+      if (*str++ == wild_prefix && str_is_pattern && *str)
+        str++;
     }
     else
     {						/* Found '*' */
-      if (!*wildstr) DBUG_RETURN(0);		/* '*' as last char: OK */
-      flag=(*wildstr != wild_many && *wildstr != wild_one);
-      do
+      while (str_is_pattern && *str == wild_many)
+        str++;
+      for (; *wildstr ==  wild_many || *wildstr == wild_one; wildstr++)
+        if (*wildstr == wild_many)
+        {
+          while (str_is_pattern && *str == wild_many)
+            str++;
+        }
+        else
+        {
+          if (str_is_pattern && *str == wild_prefix && str[1])
+            str+=2;
+          else if (! *str++)
+            DBUG_RETURN (1);
+        }
+      if (!*wildstr)
+        DBUG_RETURN(0);		/* '*' as last char: OK */
+      if ((cmp= *wildstr) == wild_prefix && wildstr[1] && !str_is_pattern)
+        cmp=wildstr[1];
+      for (;;str++)
       {
-	if (flag)
-	{
-	  char cmp;
-	  if ((cmp= *wildstr) == wild_prefix && wildstr[1])
-	    cmp=wildstr[1];
-	  while (*str && *str != cmp)
-	    str++;
-	  if (!*str) DBUG_RETURN (1);
-	}
-	if (wild_compare(str,wildstr) == 0) DBUG_RETURN (0);
-      } while (*str++ && wildstr[0] != wild_many);
-      DBUG_RETURN(1);
+        while (*str && *str != cmp)
+          str++;
+        if (!*str)
+          DBUG_RETURN (1);
+	if (wild_compare(str,wildstr,str_is_pattern) == 0)
+          DBUG_RETURN (0);
+      }
+      /* We will never come here */
     }
   }
-  DBUG_RETURN (*str != '\0');
+  DBUG_RETURN (*str != 0);
 } /* wild_compare */
diff --git a/mysys/mf_wfile.c b/mysys/mf_wfile.c
index e9e12c72755..067e4b7acc5 100644
--- a/mysys/mf_wfile.c
+++ b/mysys/mf_wfile.c
@@ -106,7 +106,7 @@ int wf_test(register WF_PACK *wf_pack, register const char *name)
 
   not_pos=wf_pack->not_pos;
   for (i=0 ; i < not_pos; i++)
-    if (wild_compare(name,wf_pack->wild[i]) == 0)
+    if (wild_compare(name,wf_pack->wild[i],0) == 0)
       goto found;
   if (i)
     DBUG_RETURN(1);			/* No-match */
@@ -115,7 +115,7 @@ found:
 /* Test that it isn't in not-list */
 
   for (i=not_pos ; i < wf_pack->wilds; i++)
-    if (wild_compare(name,wf_pack->wild[i]) == 0)
+    if (wild_compare(name,wf_pack->wild[i],0) == 0)
       DBUG_RETURN(1);
   DBUG_RETURN(0);
 } /* wf_test */
-- 
cgit v1.2.1


From 23aee5621db30068669c1632d00890079b5ee02c Mon Sep 17 00:00:00 2001
From: "bar@bar.intranet.mysql.r18.ru" <>
Date: Thu, 3 Jun 2004 17:45:53 +0500
Subject: Unicode collations: WL#916 XML and "collation customization" language
 parsers.

---
 mysys/charset.c | 502 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 496 insertions(+), 6 deletions(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index d801fcdbd76..62068beccae 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -21,6 +21,344 @@
 #include <my_dir.h>
 #include <my_xml.h>
 
+
+/*
+  Collation language is implemented according to
+  subset of ICU Collation Customization (tailorings):
+  http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
+  
+  Collation language elements:
+  Delimiters:
+    space   - skipped
+  
+  <char> :=  A-Z | a-z | \uXXXX
+  
+  Shift command:
+    <shift>  := &       - reset at this letter. 
+  
+  Diff command:
+    <d1> :=  <     - Identifies a primary difference.
+    <d2> :=  <<    - Identifies a secondary difference.
+    <d3> := <<<    - Idenfifies a tertiary difference.
+  
+  
+  Collation rules:
+    <ruleset> :=  <rule>  { <ruleset> }
+    
+    <rule> :=   <d1>    <string>
+              | <d2>    <string>
+              | <d3>    <string>
+              | <shift> <char>
+    
+    <string> := <char> [ <string> ]
+
+  An example, Polish collation:
+  
+    &A < \u0105 <<< \u0104
+    &C < \u0107 <<< \u0106
+    &E < \u0119 <<< \u0118
+    &L < \u0142 <<< \u0141
+    &N < \u0144 <<< \u0143
+    &O < \u00F3 <<< \u00D3
+    &S < \u015B <<< \u015A
+    &Z < \u017A <<< \u017B    
+*/
+
+
+typedef enum my_coll_lexem_num_en
+{
+  MY_COLL_LEXEM_EOF	= 0,
+  MY_COLL_LEXEM_DIFF	= 1, 
+  MY_COLL_LEXEM_SHIFT	= 4,
+  MY_COLL_LEXEM_CHAR	= 5,
+  MY_COLL_LEXEM_ERROR	= 6
+} my_coll_lexem_num;
+
+
+typedef struct my_coll_lexem_st
+{
+  const char *beg;
+  const char *end;
+  const char *prev;
+  int   diff;
+  int   code;
+} MY_COLL_LEXEM;
+
+
+/*
+  Initialize collation rule lexical anilizer
+  
+  SYNOPSIS
+    my_coll_lexem_init
+    lexem                Lex analizer to init
+    str                  Const string to parse
+    strend               End of the string
+  USAGE
+  
+  RETURN VALUES
+    N/A
+*/
+
+static void my_coll_lexem_init(MY_COLL_LEXEM *lexem,
+                               const char *str, const char *strend)
+{
+  lexem->beg= str;
+  lexem->prev= str;
+  lexem->end= strend;
+  lexem->diff= 0;
+  lexem->code= 0;
+}
+
+
+/*
+  Print collation customization expression parse error, with context.
+  
+  SYNOPSIS
+    my_coll_lexem_print_error
+    lexem                Lex analizer to take context from
+    errstr               sting to write error to
+    errsize              errstr size
+    txt                  error message
+  USAGE
+  
+  RETURN VALUES
+    N/A
+*/
+
+static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
+                                      char *errstr, size_t errsize,
+                                      const char *txt)
+{
+  char tail[30];
+  size_t len= lexem->end - lexem->prev;
+  strmake (tail, lexem->prev, min(len, sizeof(tail)-1));
+  errstr[errsize-1]= '\0';
+  my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
+}
+
+
+/*
+  Convert a hex digit into its numeric value
+  
+  SYNOPSIS
+    ch2x
+    ch                   hex digit to convert
+  USAGE
+  
+  RETURN VALUES
+    an integer value in the range 0..15
+    -1 on error
+*/
+
+static int ch2x(int ch)
+{
+  if (ch >= '0' && ch <= '9')
+    return ch - '0';
+  
+  if (ch >= 'a' && ch <= 'f')
+    return 10 + ch - 'a';
+  
+  if (ch >= 'A' && ch <= 'Z')
+    return 10 + ch - 'A';
+  
+  return -1;
+}
+
+
+/*
+  Collation language lexical parser:
+  Scans the next lexem.
+  
+  SYNOPSIS
+    my_coll_lexem_next
+    lexem                Lex analizer, previously initialized by 
+                         my_coll_lexem_init.
+  USAGE
+    Call this function in a loop
+    
+  RETURN VALUES
+    Lexem number: eof, diff, shift, char or error.
+*/
+
+static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
+{
+  for ( ;lexem->beg < lexem->end ; lexem->beg++)
+  {
+    lexem->prev= lexem->beg;
+    if (lexem->beg[0] == ' '  || lexem->beg[0] == '\t' || 
+        lexem->beg[0] == '\r' || lexem->beg[0] == '\n')
+      continue;
+    
+    if (lexem->beg[0] == '&')
+    {
+      lexem->beg++;
+      return MY_COLL_LEXEM_SHIFT;
+    }
+    
+    if (lexem->beg[0] == '<')
+    {
+      for (lexem->beg++, lexem->diff=1; 
+           (lexem->beg < lexem->end) && 
+           (lexem->beg[0] == '<') && (lexem->diff<3);
+           lexem->beg++, lexem->diff++);
+        return MY_COLL_LEXEM_DIFF;
+    }
+    
+    if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') ||
+        (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z'))
+    {
+      lexem->code= lexem->beg[0];
+      lexem->beg++;
+      return MY_COLL_LEXEM_CHAR;
+    }
+    
+    if ((lexem->beg[0] == '\\') && 
+        (lexem->beg+2 < lexem->end) && 
+        (lexem->beg[1] == 'u'))
+    {
+      int ch;
+      
+      lexem->code= 0;
+      for (lexem->beg+=2; 
+           (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ; 
+           lexem->beg++)
+      {
+        lexem->code= (lexem->code << 4) + ch;
+      }
+      return MY_COLL_LEXEM_CHAR;
+    }
+    
+    return MY_COLL_LEXEM_ERROR;
+  }
+  return MY_COLL_LEXEM_EOF;
+}
+
+
+/*
+  Collation rule item
+*/
+
+typedef struct my_coll_rule_item_st
+{
+  uint base;     /* Base character                             */
+  uint curr;     /* Current character                          */
+  int diff[3];   /* Primary, Secondary and Tertiary difference */
+} MY_COLL_RULE;
+
+
+/*
+  Collation language syntax parser.
+  Uses lexical parser.
+  
+  SYNOPSIS
+    my_coll_rule_parse
+    rule                 Collation rule list to load to.
+    str                  A string containin collation language expression.
+    strend               End of the string.
+  USAGE
+    
+  RETURN VALUES
+    0 - OK
+    1 - ERROR, e.g. too many items.
+*/
+
+static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
+                              const char *str, const char *strend,
+                              char *errstr, size_t errsize)
+{
+  MY_COLL_LEXEM lexem;
+  my_coll_lexem_num lexnum;
+  my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR;
+  MY_COLL_RULE item; 
+  int state= 0;
+  size_t nitems= 0;
+  
+  /* Init all variables */
+  errstr[0]= '\0';
+  bzero(&item, sizeof(item));
+  my_coll_lexem_init(&lexem, str, strend);
+  
+  while ((lexnum= my_coll_lexem_next(&lexem)))
+  {
+    if (lexnum == MY_COLL_LEXEM_ERROR)
+    {
+      my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character");
+      return -1;
+    }
+    
+    switch (state) {
+    case 0:
+      if (lexnum != MY_COLL_LEXEM_SHIFT)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected");
+        return -1;
+      }
+      prevlexnum= lexnum;
+      state= 2;
+      continue;
+      
+    case 1:
+      if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected");
+        return -1;
+      }
+      prevlexnum= lexnum;
+      state= 2;
+      continue;
+      
+    case 2:
+      if (lexnum != MY_COLL_LEXEM_CHAR)
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected");
+        return -1;
+      }
+      
+      if (prevlexnum == MY_COLL_LEXEM_SHIFT)
+      {
+        item.base= lexem.code;
+        item.diff[0]= 0;
+        item.diff[1]= 0;
+        item.diff[2]= 0;
+      }
+      else if (prevlexnum == MY_COLL_LEXEM_DIFF)
+      {
+        item.curr= lexem.code;
+        if (lexem.diff == 3)
+        {
+          item.diff[2]++;
+        }
+        else if (lexem.diff == 2)
+        {
+          item.diff[1]++;
+          item.diff[2]= 0;
+        }
+        else if (lexem.diff == 1)
+        {
+          item.diff[0]++;
+          item.diff[1]= 0;
+          item.diff[2]= 0;
+        }
+        if (nitems >= mitems)
+        {
+          my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");
+          return -1;
+        }
+        rule[nitems++]= item;
+      }
+      else
+      {
+        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen");
+        return -1;
+      }
+      state= 1;
+      continue;
+    }
+  }
+  return (size_t) nitems;
+}
+
+
 typedef struct
 {
   int		nchars;
@@ -284,6 +622,144 @@ err:
 }
 
 
+#ifdef HAVE_CHARSET_ucs2
+
+#define MY_MAX_COLL_RULE 64
+
+/*
+  This function copies an UCS2 collation from
+  the default Unicode Collation Algorithm (UCA)
+  weights applying tailorings, i.e. a set of
+  alternative weights for some characters. 
+  
+  The default UCA weights are stored in my_charset_ucs2_general_uca.
+  They consist of 256 pages, 256 character each.
+  
+  If a page is not overwritten by tailoring rules,
+  it is copies as is from UCA as is.
+  
+  If a page contains some overwritten characters, it is
+  allocated. Untouched characters are copied from the
+  default weights.
+*/
+
+static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
+{
+  MY_COLL_RULE rule[MY_MAX_COLL_RULE];
+  char errstr[128];
+  uchar   *newlengths;
+  uint16 **newweights;
+  const uchar *deflengths= my_charset_ucs2_general_uca.sort_order;
+  uint16     **defweights= my_charset_ucs2_general_uca.sort_order_big;
+  int rc, i;
+  
+  to->number= from->number ? from->number : to->number;
+  
+  if (from->csname)
+    if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
+      goto err;
+  
+  if (from->name)
+    if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
+      goto err;
+  
+  if (from->comment)
+    if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
+      goto err;
+  
+  to->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply;
+  to->min_sort_char= my_charset_ucs2_general_uca.min_sort_char;
+  to->max_sort_char= my_charset_ucs2_general_uca.max_sort_char;
+  to->mbminlen= 2;
+  to->mbmaxlen= 2;
+  
+  
+  /* Parse ICU Collation Customization expression */
+  if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
+                              from->sort_order,
+                              from->sort_order + strlen(from->sort_order),
+                              errstr, sizeof(errstr))) <= 0)
+  {
+    /* 
+      TODO: add error message reporting.
+      printf("Error: %d '%s'\n", rc, errstr);
+    */
+    return 1;
+  }
+  
+  
+  if (!(newweights= (uint16**) my_once_alloc(256*sizeof(uint16*),MYF(MY_WME))))
+    goto err;
+  bzero(newweights, 256*sizeof(uint16*));
+  
+  if (!(newlengths= (uchar*) my_once_memdup(deflengths,256,MYF(MY_WME))))
+    goto err;
+  
+  /*
+    Calculate maximum lenghts for the pages
+    which will be overwritten.
+  */
+  for (i=0; i < rc; i++)
+  {
+    uint pageb= (rule[i].base >> 8) & 0xFF;
+    uint pagec= (rule[i].curr >> 8) & 0xFF;
+    
+    if (newlengths[pagec] < deflengths[pageb])
+      newlengths[pagec]= deflengths[pageb];
+  }
+  
+  for (i=0; i < rc;  i++)
+  {
+    uint pageb= (rule[i].base >> 8) & 0xFF;
+    uint pagec= (rule[i].curr >> 8) & 0xFF;
+    uint chb, chc;
+    
+    if (!newweights[pagec])
+    {
+      /* Alloc new page and copy the default UCA weights */
+      uint size= 256*newlengths[pagec]*sizeof(uint16);
+      
+      if (!(newweights[pagec]= (uint16*) my_once_alloc(size,MYF(MY_WME))))
+        goto err;
+      bzero((void*) newweights[pagec], size);
+      
+      for (chc=0 ; chc < 256; chc++)
+      {
+        memcpy(newweights[pagec] + chc*newlengths[pagec],
+               defweights[pagec] + chc*deflengths[pagec],
+               deflengths[pagec]*sizeof(uint16));
+      }
+    }
+    
+    /* 
+      Aply the alternative rule:
+      shift to the base character and primary difference.
+    */
+    chc= rule[i].curr & 0xFF;
+    chb= rule[i].base & 0xFF;
+    memcpy(newweights[pagec] + chc*newlengths[pagec],
+           defweights[pageb] + chb*deflengths[pageb],
+           deflengths[pageb]*sizeof(uint16));
+    /* Apply primary difference */
+    newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0];
+  }
+  
+  /* Copy non-overwritten pages from the default UCA weights */
+  for (i= 0; i < 256 ; i++)
+    if (!newweights[i])
+      newweights[i]= defweights[i];
+  
+  to->sort_order= newlengths;
+  to->sort_order_big= newweights;
+  
+  return 0;
+  
+err:
+  return 1;
+}
+#endif
+
+
 static my_bool simple_cs_is_full(CHARSET_INFO *cs)
 {
   return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
@@ -315,14 +791,28 @@ static int add_collation(CHARSET_INFO *cs)
     
     if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
     {
-      simple_cs_init_functions(all_charsets[cs->number]);
-      if (simple_cs_copy_data(all_charsets[cs->number],cs))
-	return MY_XML_ERROR;
-      if (simple_cs_is_full(all_charsets[cs->number]))
+      if (!strcmp(cs->csname,"ucs2") )
       {
-        all_charsets[cs->number]->state |= MY_CS_LOADED;
+#ifdef HAVE_CHARSET_ucs2
+        CHARSET_INFO *new= all_charsets[cs->number];
+        new->cset= my_charset_ucs2_general_uca.cset;
+        new->coll= my_charset_ucs2_general_uca.coll;
+        if (ucs2_copy_data(new, cs))
+          return MY_XML_ERROR;
+        new->state |= MY_CS_AVAILABLE | MY_CS_LOADED;
+#endif        
+      }
+      else
+      {
+        simple_cs_init_functions(all_charsets[cs->number]);
+        if (simple_cs_copy_data(all_charsets[cs->number],cs))
+	  return MY_XML_ERROR;
+        if (simple_cs_is_full(all_charsets[cs->number]))
+        {
+          all_charsets[cs->number]->state |= MY_CS_LOADED;
+        }
+        all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
       }
-      all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
     }
     else
     {
-- 
cgit v1.2.1


From 2f0ca1ce424a26f71d4b6ec85d890060ecb73c68 Mon Sep 17 00:00:00 2001
From: "paul@kite-hub.kitebird.com" <>
Date: Thu, 3 Jun 2004 11:52:54 -0500
Subject: Fix skipp -> skip once and for all. (Note: This affects only
 comments, not variable names.)

---
 mysys/ChangeLog     |  4 ++--
 mysys/default.c     |  2 +-
 mysys/mf_iocache.c  |  4 ++--
 mysys/mf_iocache2.c |  2 +-
 mysys/mf_pack.c     | 12 ++++++------
 mysys/mf_soundex.c  |  4 ++--
 mysys/mf_wfile.c    |  2 +-
 mysys/my_error.c    |  4 ++--
 mysys/my_getwd.c    |  2 +-
 9 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'mysys')

diff --git a/mysys/ChangeLog b/mysys/ChangeLog
index e24fc00b493..7a426106667 100644
--- a/mysys/ChangeLog
+++ b/mysys/ChangeLog
@@ -91,7 +91,7 @@ Tue Mar 26 15:09:45 1991  Mikael WIDENIUS  (monty at panther)
 
 Sat Mar 23 10:49:49 1991  Michael Widenius  (monty at LYNX)
 
-	* Added init of alarm variables to skipp some warnings from gcc.
+	* Added init of alarm variables to skip some warnings from gcc.
 
 Tue Mar  5 16:50:34 1991  Michael Widenius  (monty at LYNX)
 
@@ -124,7 +124,7 @@ Mon Aug 27 22:20:38 1990  Michael Widenius  (monty at lynx)
 Sun Apr  1 23:29:47 1990  Monty  (monty at monty)
 
 	* Changed mf_keydisk.c to have separate functions for read and write.
-	  Read can now return pointer to intern key-buffer to skipp
+	  Read can now return pointer to intern key-buffer to skip
 	  unessessary memcpy-s.
 
 Fri Mar 23 23:03:39 1990  Monty  (monty at monty)
diff --git a/mysys/default.c b/mysys/default.c
index 056f686e16f..792233ed10d 100644
--- a/mysys/default.c
+++ b/mysys/default.c
@@ -222,7 +222,7 @@ int load_defaults(const char *conf_file, const char **groups,
   /* copy name + found arguments + command line arguments to new array */
   res[0]= argv[0][0];  /* Name MUST be set, even by embedded library */
   memcpy((gptr) (res+1), args.buffer, args.elements*sizeof(char*));
-  /* Skipp --defaults-file and --defaults-extra-file */
+  /* Skip --defaults-file and --defaults-extra-file */
   (*argc)-= args_used;
   (*argv)+= args_used;
 
diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c
index 530721a79ad..7b5371c4289 100644
--- a/mysys/mf_iocache.c
+++ b/mysys/mf_iocache.c
@@ -800,7 +800,7 @@ int _my_b_async_read(register IO_CACHE *info, byte *Buffer, uint Count)
     {						/* Fix if skipped bytes */
       if (info->aio_read_pos + read_length < info->pos_in_file)
       {
-	read_length=0;				/* Skipp block */
+	read_length=0;				/* Skip block */
 	next_pos_in_file=info->pos_in_file;
       }
       else
@@ -894,7 +894,7 @@ int _my_b_async_read(register IO_CACHE *info, byte *Buffer, uint Count)
     if (aioread(info->file,read_buffer,(int) max_length,
 		(my_off_t) next_pos_in_file,MY_SEEK_SET,
 		&info->aio_result.result))
-    {						/* Skipp async io */
+    {						/* Skip async io */
       my_errno=errno;
       DBUG_PRINT("error",("got error: %d, aio_result: %d from aioread, async skipped",
 			  errno, info->aio_result.result.aio_errno));
diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c
index bce08b9795b..70b2f288538 100644
--- a/mysys/mf_iocache2.c
+++ b/mysys/mf_iocache2.c
@@ -266,7 +266,7 @@ uint my_b_vprintf(IO_CACHE *info, const char* fmt, va_list args)
       fmt++;
       /* Found one '%' */
     }
-    /* Skipp if max size is used (to be compatible with printf) */
+    /* Skip if max size is used (to be compatible with printf) */
     while (my_isdigit(&my_charset_latin1, *fmt) || *fmt == '.' || *fmt == '-')
       fmt++;
     if (*fmt == 's')				/* String parameter */
diff --git a/mysys/mf_pack.c b/mysys/mf_pack.c
index 2d0a5ea282b..9193238708d 100644
--- a/mysys/mf_pack.c
+++ b/mysys/mf_pack.c
@@ -43,7 +43,7 @@ void pack_dirname(my_string to, const char *from)
   (void) intern_filename(to,from);		/* Change to intern name */
 
 #ifdef FN_DEVCHAR
-  if ((start=strrchr(to,FN_DEVCHAR)) != 0)	/* Skipp device part */
+  if ((start=strrchr(to,FN_DEVCHAR)) != 0)	/* Skip device part */
     start++;
   else
 #endif
@@ -131,7 +131,7 @@ uint cleanup_dirname(register my_string to, const char *from)
   from_ptr=(my_string) from;
 #ifdef FN_DEVCHAR
   if ((pos=strrchr(from_ptr,FN_DEVCHAR)) != 0)
-  {						/* Skipp device part */
+  {						/* Skip device part */
     length=(uint) (pos-from_ptr)+1;
     start=strnmov(buff,from_ptr,length); from_ptr+=length;
   }
@@ -195,7 +195,7 @@ uint cleanup_dirname(register my_string to, const char *from)
 	  pos--;			/* Remove dupplicate '/' */
       }
       else if (pos-start > 1 && pos[-1] == FN_CURLIB && pos[-2] == FN_LIBCHAR)
-	pos-=2;					/* Skipp /./ */
+	pos-=2;					/* Skip /./ */
       else if (pos > buff+1 && pos[-1] == FN_HOMELIB && pos[-2] == FN_LIBCHAR)
       {					/* Found ..../~/  */
 	buff[0]=FN_HOMELIB;
@@ -409,7 +409,7 @@ uint system_filename(my_string to, const char *from)
   libchar_found=0;
   (void) strmov(buff,from);			 /* If to == from */
   from_pos= buff;
-  if ((pos=strrchr(from_pos,FN_DEVCHAR)))	/* Skipp device part */
+  if ((pos=strrchr(from_pos,FN_DEVCHAR)))	/* Skip device part */
   {
     pos++;
     to_pos=strnmov(to,from_pos,(size_s) (pos-from_pos));
@@ -419,7 +419,7 @@ uint system_filename(my_string to, const char *from)
     to_pos=to;
 
   if (from_pos[0] == FN_CURLIB && from_pos[1] == FN_LIBCHAR)
-    from_pos+=2;				/* Skipp './' */
+    from_pos+=2;				/* Skip './' */
   if (strchr(from_pos,FN_LIBCHAR))
   {
     *(to_pos++) = FN_C_BEFORE_DIR;
@@ -487,7 +487,7 @@ my_string intern_filename(my_string to, const char *from)
 
   convert_dirname(buff,from,NullS);		/* change '<>' to '[]' */
   from_pos=buff;
-  if ((pos=strrchr(from_pos,FN_DEVCHAR)))	/* Skipp device part */
+  if ((pos=strrchr(from_pos,FN_DEVCHAR)))	/* Skip device part */
   {
     pos++;
     to_pos=strnmov(to,from_pos,(size_s) (pos-from_pos));
diff --git a/mysys/mf_soundex.c b/mysys/mf_soundex.c
index 27ab4892c57..c0c6105a6eb 100644
--- a/mysys/mf_soundex.c
+++ b/mysys/mf_soundex.c
@@ -52,7 +52,7 @@ void soundex(CHARSET_INFO * cs,register my_string out_pntr, my_string in_pntr,
 
   if (remove_garbage)
   {
-    while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skipp pre-space */
+    while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */
       in_pntr++;
   }
   *out_pntr++ = map[(uchar)*in_pntr];	/* Copy first letter		 */
@@ -82,7 +82,7 @@ void soundex(CHARSET_INFO * cs,register my_string out_pntr, my_string in_pntr,
 
   /*
     If alpha, map input letter to soundex code.
-    If not alpha and remove_garbage is set then skipp to next char
+    If not alpha and remove_garbage is set then skip to next char
     else return 0
     */
 
diff --git a/mysys/mf_wfile.c b/mysys/mf_wfile.c
index b964d7ee494..7d537eaa06a 100644
--- a/mysys/mf_wfile.c
+++ b/mysys/mf_wfile.c
@@ -39,7 +39,7 @@ WF_PACK *wf_comp(my_string str)
   WF_PACK *ret;
   DBUG_ENTER("wf_comp");
 
-  not_pos= -1;			/* Skipp space and '!' in front */
+  not_pos= -1;			/* Skip space and '!' in front */
   while (*str == ' ')
     str++;
   if (*str == '!')
diff --git a/mysys/my_error.c b/mysys/my_error.c
index 33d79bbc5e6..9789de9d58a 100644
--- a/mysys/my_error.c
+++ b/mysys/my_error.c
@@ -68,10 +68,10 @@ int my_error(int nr,myf MyFlags, ...)
     }
     else
     {
-      /* Skipp if max size is used (to be compatible with printf) */
+      /* Skip if max size is used (to be compatible with printf) */
       while (my_isdigit(&my_charset_latin1, *tpos) || *tpos == '.' || *tpos == '-')
 	tpos++;
-      if (*tpos == 'l')				/* Skipp 'l' argument */
+      if (*tpos == 'l')				/* Skip 'l' argument */
 	tpos++;
       if (*tpos == 's')				/* String parameter */
       {
diff --git a/mysys/my_getwd.c b/mysys/my_getwd.c
index a08d28d8545..fd47c532cff 100644
--- a/mysys/my_getwd.c
+++ b/mysys/my_getwd.c
@@ -108,7 +108,7 @@ int my_setwd(const char *dir, myf MyFlags)
   {
     uint drive,drives;
 
-    pos++;				/* Skipp FN_DEVCHAR */
+    pos++;				/* Skip FN_DEVCHAR */
     drive=(uint) (my_toupper(&my_charset_latin1,dir[0])-'A'+1);
     drives= (uint) -1;
     if ((pos-(byte*) dir) == 2 && drive > 0 && drive < 32)
-- 
cgit v1.2.1


From 3f556025a7536099c9d257a80cb469ece4686d71 Mon Sep 17 00:00:00 2001
From: "bar@bar.intranet.mysql.r18.ru" <>
Date: Fri, 4 Jun 2004 09:07:46 +0500
Subject: charset.c:   Typo fix.   Thanks Vladimir Kolpakov who noticed it.

---
 mysys/charset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index 62068beccae..7eccf2dab68 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -158,7 +158,7 @@ static int ch2x(int ch)
   if (ch >= 'a' && ch <= 'f')
     return 10 + ch - 'a';
   
-  if (ch >= 'A' && ch <= 'Z')
+  if (ch >= 'A' && ch <= 'F')
     return 10 + ch - 'A';
   
   return -1;
-- 
cgit v1.2.1


From 1e05e6cb82a28e18f80ae807d16751eedafaa074 Mon Sep 17 00:00:00 2001
From: "sergefp@mysql.com" <>
Date: Mon, 7 Jun 2004 12:09:10 +0400
Subject: Post review fixes for "SQL Syntax for Prepared Statements".

---
 mysys/my_error.c | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

(limited to 'mysys')

diff --git a/mysys/my_error.c b/mysys/my_error.c
index b16c39085fd..8a377f63c7e 100644
--- a/mysys/my_error.c
+++ b/mysys/my_error.c
@@ -37,8 +37,8 @@ char NEAR errbuff[NRERRBUFFS][ERRMSGSIZE];
     The following subset of printf format is supported:
     "%[0-9.-]*l?[sdu]", where all length flags are parsed but ignored.
 
-    Additionally "%.*s" is supported and "%.*[ud]" is correctly parsed but 
-    length value is ignored.
+    Additionally "%.*s" is supported and "%.*[ud]" is correctly parsed but
+    the length value is ignored.
 */
 
 int my_error(int nr,myf MyFlags, ...)
@@ -49,7 +49,7 @@ int my_error(int nr,myf MyFlags, ...)
   reg2 char	*endpos;
   char		* par;
   char		ebuff[ERRMSGSIZE+20];
-  int           prec_chars;
+  int           prec_chars; /* output precision */
   my_bool       prec_supplied;
   DBUG_ENTER("my_error");
   LINT_INIT(prec_chars); /* protected by prec_supplied */
@@ -76,10 +76,11 @@ int my_error(int nr,myf MyFlags, ...)
     }
     else
     {
-      /* 
-        Skip size/precision flags to be compatible with printf. 
-        The only size/precision flag supported is "%.*s". 
-        "%.*u" and "%.*d" cause 
+      /*
+        Skip size/precision flags to be compatible with printf.
+        The only size/precision flag supported is "%.*s".
+        If "%.*u" or "%.*d" are encountered, the precision number is read
+        from the variable argument list but its value is ignored.
       */
       prec_supplied= 0;
       if (*tpos== '.')
@@ -94,52 +95,52 @@ int my_error(int nr,myf MyFlags, ...)
           prec_supplied= 1;
         }
       }
-       
+
       if (!prec_supplied)
       {
-        while (my_isdigit(&my_charset_latin1, *tpos) || *tpos == '.' || 
+        while (my_isdigit(&my_charset_latin1, *tpos) || *tpos == '.' ||
                *tpos == '-')
-	 tpos++;
-        
-        if (*tpos == 'l')				/* Skipp 'l' argument */
+	  tpos++;
+
+        if (*tpos == 'l')				/* Skip 'l' argument */
 	  tpos++;
       }
 
       if (*tpos == 's')				/* String parameter */
       {
-	par = va_arg(ap, char *);
-	plen = (uint) strlen(par);
+	par= va_arg(ap, char *);
+	plen= (uint) strlen(par);
         if (prec_supplied && prec_chars > 0)
           plen= min((uint)prec_chars, plen);
 	if (olen + plen < ERRMSGSIZE+2)		/* Replace if possible */
 	{
-          memcpy(endpos,par, plen);
-          endpos += plen;
+          strmake(endpos, par, plen);
+          endpos+= plen;
           tpos++;
-          olen+=plen-2;
+          olen+= plen-2;
           continue;
 	}
       }
       else if (*tpos == 'd' || *tpos == 'u')	/* Integer parameter */
       {
 	register int iarg;
-	iarg = va_arg(ap, int);
+	iarg= va_arg(ap, int);
 	if (*tpos == 'd')
 	  plen= (uint) (int10_to_str((long) iarg, endpos, -10) - endpos);
 	else
 	  plen= (uint) (int10_to_str((long) (uint) iarg, endpos, 10) - endpos);
 	if (olen + plen < ERRMSGSIZE+2) /* Replace parameter if possible */
 	{
-	  endpos+=plen;
+	  endpos+= plen;
 	  tpos++;
-	  olen+=plen-2;
+	  olen+= plen-2;
 	  continue;
 	}
       }
     }
-    *endpos++='%';		/* % used as % or unknown code */
+    *endpos++= '%';		/* % used as % or unknown code */
   }
-  *endpos='\0';			/* End of errmessage */
+  *endpos= '\0';			/* End of errmessage */
   va_end(ap);
   DBUG_RETURN((*error_handler_hook)(nr, ebuff, MyFlags));
 }
-- 
cgit v1.2.1


From 8962ed3c7ddd271b08e195f8be864a1cac9804ff Mon Sep 17 00:00:00 2001
From: "bar@bar.intranet.mysql.r18.ru" <>
Date: Tue, 8 Jun 2004 17:56:15 +0500
Subject: WL#916: Unicode collations for some languages

---
 mysys/charset.c | 210 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 180 insertions(+), 30 deletions(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index 7eccf2dab68..ea07708963d 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -624,6 +624,123 @@ err:
 
 #ifdef HAVE_CHARSET_ucs2
 
+typedef struct my_tailoring_st
+{
+  uint  number;
+  const char *name;
+  const char *tailoring;
+} my_tailoring;
+
+static my_tailoring tailoring[]=
+{
+  {
+    0, "icelandic",
+    /*
+      Some sources treat LETTER A WITH DIARESIS (00E4,00C4)
+      secondary greater than LETTER AE (00E6,00C6).
+      http://www.evertype.com/alphabets/icelandic.pdf
+      http://developer.mimer.com/collations/charts/icelandic.htm
+
+      Other sources do not provide any special rules
+      for LETTER A WITH DIARESIS:
+      http://www.omniglot.com/writing/icelandic.htm
+      http://en.wikipedia.org/wiki/Icelandic_alphabet
+      http://oss.software.ibm.com/icu/charts/collation/is.html
+
+      Let's go the first way.
+    */
+    "& A < \\u00E1 <<< \\u00C1 "
+    "& D < \\u00F0 <<< \\u00D0 "
+    "& E < \\u00E9 <<< \\u00C9 "
+    "& I < \\u00ED <<< \\u00CD "
+    "& O < \\u00F3 <<< \\u00D3 "
+    "& U < \\u00FA <<< \\u00DA "
+    "& Y < \\u00FD <<< \\u00DD "
+    "& Z < \\u00FE <<< \\u00DE "
+        "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
+        "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
+        "< \\u00E5 <<< \\u00C5 "
+  },
+  {
+    1, "latvian",
+    /*
+      Some sources treat I and Y primary different.
+      Other sources treat I and Y the same on primary level.
+      We'll go the first way.
+    */
+    "& C < \\u010D <<< \\u010C "
+    "& G < \\u0123 <<< \\u0122 "
+    "& I < \\u0079 <<< \\u0059 "
+    "& K < \\u0137 <<< \\u0136 "
+    "& L < \\u013C <<< \\u013B "
+    "& N < \\u0146 <<< \\u0145 "
+    "& R < \\u0157 <<< \\u0156 "
+    "& S < \\u0161 <<< \\u0160 "
+    "& Z < \\u017E <<< \\u017D "
+  },
+  {
+    2, "romanian",
+    "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
+    "& I < \\u00EE <<< \\u00CE "
+    "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E "
+    "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 "
+  },
+  {
+    3, "slovenian",
+    "& C < \\u010D <<< \\u010C "
+    "& S < \\u0161 <<< \\u0160 "
+    "& Z < \\u017E <<< \\u017D "
+  },
+  {
+    4, "polish",
+    "& A < \\u0105 <<< \\u0104 "
+    "& C < \\u0107 <<< \\u0106 "
+    "& E < \\u0119 <<< \\u0118 "
+    "& L < \\u0142 <<< \\u0141 "
+    "& N < \\u0144 <<< \\u0143 "
+    "& O < \\u00F3 <<< \\u00D3 "
+    "& S < \\u015B <<< \\u015A "
+    "& Z < \\u017A <<< \\u017B "
+  },
+  {
+    5, "estonian",
+    "& S < \\u0161 <<< \\u0160 "
+       " < \\u007A <<< \\u005A "
+       " < \\u017E <<< \\u017D "
+    "& W < \\u00F5 <<< \\u00D5 "
+        "< \\u00E4 <<< \\u00C4 "
+        "< \\u00F6 <<< \\u00D6 "
+        "< \\u00FC <<< \\u00DC "
+  },
+  {
+    6, "spanish",
+    "& N < \\u00F1 <<< \\u00D1 "
+  },
+  {
+    7, "swedish",
+    /*
+      Some sources treat V and W as similar on primary level.
+      We'll treat V and W as different on primary level.
+    */
+    "& Y <<\\u00FC <<< \\u00DC "
+    "& Z < \\u00E5 <<< \\u00C5 "
+        "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 "
+        "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
+  },
+  {
+    8, "turkish",
+    "& C < \\u00E7 <<< \\u00C7 "
+    "& G < \\u011F <<< \\u011E "
+    "& H < \\u0131 <<< \\u0049 "
+    "& O < \\u00F6 <<< \\u00D6 "
+    "& S < \\u015F <<< \\u015E "
+    "& U < \\u00FC <<< \\u00DC "
+  },
+  {
+    0, NULL, NULL
+  }
+};
+
 #define MY_MAX_COLL_RULE 64
 
 /*
@@ -643,7 +760,7 @@ err:
   default weights.
 */
 
-static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
+static my_bool create_tailoring(CHARSET_INFO *cs)
 {
   MY_COLL_RULE rule[MY_MAX_COLL_RULE];
   char errstr[128];
@@ -652,32 +769,14 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
   const uchar *deflengths= my_charset_ucs2_general_uca.sort_order;
   uint16     **defweights= my_charset_ucs2_general_uca.sort_order_big;
   int rc, i;
-  
-  to->number= from->number ? from->number : to->number;
-  
-  if (from->csname)
-    if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
-      goto err;
-  
-  if (from->name)
-    if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
-      goto err;
-  
-  if (from->comment)
-    if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
-      goto err;
-  
-  to->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply;
-  to->min_sort_char= my_charset_ucs2_general_uca.min_sort_char;
-  to->max_sort_char= my_charset_ucs2_general_uca.max_sort_char;
-  to->mbminlen= 2;
-  to->mbmaxlen= 2;
-  
+
+  if (!cs->tailoring)
+    return 1;
   
   /* Parse ICU Collation Customization expression */
   if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
-                              from->sort_order,
-                              from->sort_order + strlen(from->sort_order),
+                              cs->tailoring,
+                              cs->tailoring + strlen(cs->tailoring),
                               errstr, sizeof(errstr))) <= 0)
   {
     /* 
@@ -687,13 +786,12 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
     return 1;
   }
   
-  
   if (!(newweights= (uint16**) my_once_alloc(256*sizeof(uint16*),MYF(MY_WME))))
-    goto err;
+    return 1;
   bzero(newweights, 256*sizeof(uint16*));
   
   if (!(newlengths= (uchar*) my_once_memdup(deflengths,256,MYF(MY_WME))))
-    goto err;
+    return 1;
   
   /*
     Calculate maximum lenghts for the pages
@@ -720,7 +818,7 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
       uint size= 256*newlengths[pagec]*sizeof(uint16);
       
       if (!(newweights[pagec]= (uint16*) my_once_alloc(size,MYF(MY_WME))))
-        goto err;
+        return 1;
       bzero((void*) newweights[pagec], size);
       
       for (chc=0 ; chc < 256; chc++)
@@ -749,10 +847,41 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
     if (!newweights[i])
       newweights[i]= defweights[i];
   
-  to->sort_order= newlengths;
-  to->sort_order_big= newweights;
+  cs->sort_order= newlengths;
+  cs->sort_order_big= newweights;
   
   return 0;
+}
+
+
+static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
+{
+  
+  to->number= from->number ? from->number : to->number;
+  
+  if (from->csname)
+    if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
+      goto err;
+  
+  if (from->name)
+    if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
+      goto err;
+  
+  if (from->comment)
+    if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
+      goto err;
+  
+  if (from->tailoring)
+    if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
+      goto err;
+  
+  to->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply;
+  to->min_sort_char= my_charset_ucs2_general_uca.min_sort_char;
+  to->max_sort_char= my_charset_ucs2_general_uca.max_sort_char;
+  to->mbminlen= 2;
+  to->mbmaxlen= 2;
+  
+  return create_tailoring(to);
   
 err:
   return 1;
@@ -848,6 +977,24 @@ static int add_collation(CHARSET_INFO *cs)
   return MY_XML_OK;
 }
 
+#ifdef HAVE_CHARSET_ucs2
+static my_bool init_uca_charsets()
+{
+  my_tailoring *t;
+  CHARSET_INFO cs= my_charset_ucs2_general_uca;
+  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT;
+  char name[64];
+  for (t= tailoring; t->tailoring; t++)
+  {
+    cs.number= 128 + t->number;
+    cs.tailoring= t->tailoring;
+    cs.name= name;
+    sprintf(name, "ucs2_%s_ci", t->name);
+    add_collation(&cs);
+  }
+  return 0;
+}
+#endif
 
 #define MY_MAX_ALLOWED_BUF 1024*1024
 #define MY_CHARSET_INDEX "Index.xml"
@@ -947,6 +1094,9 @@ static my_bool init_available_charsets(myf myflags)
 
     bzero(&all_charsets,sizeof(all_charsets));
     init_compiled_charsets(myflags);
+#ifdef HAVE_CHARSET_ucs2
+    init_uca_charsets();
+#endif
     
     /* Copy compiled charsets */
     for (cs=all_charsets;
-- 
cgit v1.2.1


From 1b76aa077f260104b9710300e696a3ea6f388fd5 Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Wed, 9 Jun 2004 12:33:30 +0500
Subject: charset.c:   Fix to be ANSI C complient   ,

---
 mysys/charset.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index ea07708963d..a9c733e25cf 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -982,8 +982,9 @@ static my_bool init_uca_charsets()
 {
   my_tailoring *t;
   CHARSET_INFO cs= my_charset_ucs2_general_uca;
-  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT;
   char name[64];
+  
+  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT;
   for (t= tailoring; t->tailoring; t++)
   {
     cs.number= 128 + t->number;
-- 
cgit v1.2.1


From 34d413a6a0459f770f3c10e38f9e5820ac69bd9a Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Thu, 10 Jun 2004 19:10:21 +0500
Subject: Optimization to use less memory.

---
 mysys/charset.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index a9c733e25cf..d2d71689d7b 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -406,12 +406,21 @@ static void set_max_sort_char(CHARSET_INFO *cs)
 }
 
 
-static void init_state_maps(CHARSET_INFO *cs)
+static my_bool init_state_maps(CHARSET_INFO *cs)
 {
   uint i;
-  uchar *state_map= cs->state_map;
-  uchar *ident_map= cs->ident_map;
+  uchar *state_map;
+  uchar *ident_map;
 
+  if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
+    return 1;
+    
+  if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
+    return 1;
+
+  state_map= cs->state_map;
+  ident_map= cs->ident_map;
+  
   /* Fill state_map with states to get a faster parser */
   for (i=0; i < 256 ; i++)
   {
@@ -458,6 +467,7 @@ static void init_state_maps(CHARSET_INFO *cs)
   state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
   state_map[(uchar)'b']= state_map[(uchar)'b']= (uchar) MY_LEX_IDENT_OR_BIN;
   state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
+  return 0;
 }
 
 
@@ -582,7 +592,8 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
 					     MY_CS_CTYPE_TABLE_SIZE,
 					     MYF(MY_WME))))
       goto err;
-    init_state_maps(to);
+    if (init_state_maps(to))
+      goto err;
   }
   if (from->to_lower)
     if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
@@ -601,6 +612,8 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
 						  MY_CS_SORT_ORDER_TABLE_SIZE,
 						  MYF(MY_WME))))
       goto err;
+
+    
     set_max_sort_char(to);
   }
   if (from->tab_to_uni)
@@ -1108,7 +1121,8 @@ static my_bool init_available_charsets(myf myflags)
       {
         set_max_sort_char(*cs);
         if (cs[0]->ctype)
-          init_state_maps(*cs);
+          if (init_state_maps(*cs))
+            *cs= NULL;
       }
     }
     
-- 
cgit v1.2.1


From 78fa465b924836a402e58eebb3e2b7936d712a2d Mon Sep 17 00:00:00 2001
From: "serg@serg.mylan" <>
Date: Thu, 10 Jun 2004 21:18:57 +0200
Subject: bug#3964 and related issues: FTB problems with charsets where one
 byte can match many correct prefix compare with my_strnncoll

---
 mysys/my_handler.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'mysys')

diff --git a/mysys/my_handler.c b/mysys/my_handler.c
index de0fba56d21..6003808df25 100644
--- a/mysys/my_handler.c
+++ b/mysys/my_handler.c
@@ -21,13 +21,11 @@ int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
 		    uchar *b, uint b_length, my_bool part_key,
 		    my_bool skip_end_space)
 {
-  if (part_key && b_length < a_length)
-    a_length=b_length;
   if (skip_end_space)
     return charset_info->coll->strnncollsp(charset_info, a, a_length,
 					   b, b_length);
   return charset_info->coll->strnncoll(charset_info, a, a_length,
-				       b, b_length);
+				       b, b_length, part_key);
 }
 
 
-- 
cgit v1.2.1


From f08bbd1f1260becb5e537932527eef52b1584776 Mon Sep 17 00:00:00 2001
From: "konstantin@mysql.com" <>
Date: Thu, 10 Jun 2004 23:58:39 +0400
Subject: assert.h needed for my_dbug.h now is included in my_dbug.h, where it
 for some reason wasn't included before. A lot of files cleaned up from
 #include <assert.h>

---
 mysys/mf_iocache.c       | 1 -
 mysys/mf_iocache2.c      | 1 -
 mysys/mf_keycache.c      | 1 -
 mysys/my_bitmap.c        | 1 -
 mysys/my_gethostbyname.c | 1 -
 mysys/my_getopt.c        | 1 -
 mysys/my_pthread.c       | 1 -
 mysys/my_seek.c          | 1 -
 mysys/rijndael.c         | 1 -
 mysys/thr_alarm.c        | 1 -
 10 files changed, 10 deletions(-)

(limited to 'mysys')

diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c
index 7b5371c4289..f16f2b7ab72 100644
--- a/mysys/mf_iocache.c
+++ b/mysys/mf_iocache.c
@@ -55,7 +55,6 @@ TODO:
 #include "mysys_err.h"
 static void my_aiowait(my_aio_result *result);
 #endif
-#include <assert.h>
 #include <errno.h>
 
 #ifdef THREAD
diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c
index 70b2f288538..3755bcdb53d 100644
--- a/mysys/mf_iocache2.c
+++ b/mysys/mf_iocache2.c
@@ -23,7 +23,6 @@
 #include <m_string.h>
 #include <stdarg.h>
 #include <m_ctype.h>
-#include <assert.h>
 
 my_off_t my_b_append_tell(IO_CACHE* info)
 {
diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c
index 168483f276b..32b3154b8ed 100644
--- a/mysys/mf_keycache.c
+++ b/mysys/mf_keycache.c
@@ -44,7 +44,6 @@
 #include "my_static.h"
 #include <m_string.h>
 #include <errno.h>
-#include <assert.h>
 #include <stdarg.h>
 
 /*
diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c
index 0f8984e6b3d..3a09255b0b0 100644
--- a/mysys/my_bitmap.c
+++ b/mysys/my_bitmap.c
@@ -35,7 +35,6 @@
 
 #include "mysys_priv.h"
 #include <my_bitmap.h>
-#include <assert.h>
 #include <m_string.h>
 
 
diff --git a/mysys/my_gethostbyname.c b/mysys/my_gethostbyname.c
index 5044a505054..27281f3489d 100644
--- a/mysys/my_gethostbyname.c
+++ b/mysys/my_gethostbyname.c
@@ -18,7 +18,6 @@
 /* Thread safe version of gethostbyname_r() */
 
 #include "mysys_priv.h"
-#include <assert.h>
 #if !defined(MSDOS) && !defined(__WIN__)
 #include <netdb.h>
 #endif
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index 8d0b05d55f7..d7a9babe5e7 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -18,7 +18,6 @@
 #include <m_string.h>
 #include <stdlib.h>
 #include <my_getopt.h>
-#include <assert.h>
 #include <my_sys.h>
 #include <mysys_err.h>
 
diff --git a/mysys/my_pthread.c b/mysys/my_pthread.c
index d721418ffa1..37517fb8327 100644
--- a/mysys/my_pthread.c
+++ b/mysys/my_pthread.c
@@ -23,7 +23,6 @@
 #include <signal.h>
 #include <m_string.h>
 #include <thr_alarm.h>
-#include <assert.h>
 
 #if (defined(__BSD__) || defined(_BSDI_VERSION)) && !defined(HAVE_mit_thread)
 #define SCHED_POLICY SCHED_RR
diff --git a/mysys/my_seek.c b/mysys/my_seek.c
index ec24a26b3d9..6af65d70fd0 100644
--- a/mysys/my_seek.c
+++ b/mysys/my_seek.c
@@ -15,7 +15,6 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 #include "mysys_priv.h"
-#include <assert.h>
 
 	/* Seek to position in file */
 	/*ARGSUSED*/
diff --git a/mysys/rijndael.c b/mysys/rijndael.c
index dd0c45445d5..43cd14101ca 100644
--- a/mysys/rijndael.c
+++ b/mysys/rijndael.c
@@ -26,7 +26,6 @@
 */
 
 #include <my_global.h>
-#include <assert.h>
 #include "rijndael.h"
 
 /*
diff --git a/mysys/thr_alarm.c b/mysys/thr_alarm.c
index 54aa4d421f6..84a8e779ae1 100644
--- a/mysys/thr_alarm.c
+++ b/mysys/thr_alarm.c
@@ -27,7 +27,6 @@
 #include <m_string.h>
 #include <queues.h>
 #include "thr_alarm.h"
-#include <assert.h>
 
 #ifdef HAVE_SYS_SELECT_H
 #include <sys/select.h>				/* AIX needs this for fd_set */
-- 
cgit v1.2.1


From c64d93b27403dc9d154eb601b88d95964f9fc05b Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Fri, 11 Jun 2004 16:29:16 +0500
Subject: Allocate memory when a character set is requested: - For simple
 character sets: from_uni convertion table. - For UCA: alternative weight
 arrays. Use mbminlen instead of MY_CS_NONTEXT

---
 mysys/charset.c | 562 ++------------------------------------------------------
 1 file changed, 14 insertions(+), 548 deletions(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index d2d71689d7b..165fa19e3d5 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -22,354 +22,6 @@
 #include <my_xml.h>
 
 
-/*
-  Collation language is implemented according to
-  subset of ICU Collation Customization (tailorings):
-  http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
-  
-  Collation language elements:
-  Delimiters:
-    space   - skipped
-  
-  <char> :=  A-Z | a-z | \uXXXX
-  
-  Shift command:
-    <shift>  := &       - reset at this letter. 
-  
-  Diff command:
-    <d1> :=  <     - Identifies a primary difference.
-    <d2> :=  <<    - Identifies a secondary difference.
-    <d3> := <<<    - Idenfifies a tertiary difference.
-  
-  
-  Collation rules:
-    <ruleset> :=  <rule>  { <ruleset> }
-    
-    <rule> :=   <d1>    <string>
-              | <d2>    <string>
-              | <d3>    <string>
-              | <shift> <char>
-    
-    <string> := <char> [ <string> ]
-
-  An example, Polish collation:
-  
-    &A < \u0105 <<< \u0104
-    &C < \u0107 <<< \u0106
-    &E < \u0119 <<< \u0118
-    &L < \u0142 <<< \u0141
-    &N < \u0144 <<< \u0143
-    &O < \u00F3 <<< \u00D3
-    &S < \u015B <<< \u015A
-    &Z < \u017A <<< \u017B    
-*/
-
-
-typedef enum my_coll_lexem_num_en
-{
-  MY_COLL_LEXEM_EOF	= 0,
-  MY_COLL_LEXEM_DIFF	= 1, 
-  MY_COLL_LEXEM_SHIFT	= 4,
-  MY_COLL_LEXEM_CHAR	= 5,
-  MY_COLL_LEXEM_ERROR	= 6
-} my_coll_lexem_num;
-
-
-typedef struct my_coll_lexem_st
-{
-  const char *beg;
-  const char *end;
-  const char *prev;
-  int   diff;
-  int   code;
-} MY_COLL_LEXEM;
-
-
-/*
-  Initialize collation rule lexical anilizer
-  
-  SYNOPSIS
-    my_coll_lexem_init
-    lexem                Lex analizer to init
-    str                  Const string to parse
-    strend               End of the string
-  USAGE
-  
-  RETURN VALUES
-    N/A
-*/
-
-static void my_coll_lexem_init(MY_COLL_LEXEM *lexem,
-                               const char *str, const char *strend)
-{
-  lexem->beg= str;
-  lexem->prev= str;
-  lexem->end= strend;
-  lexem->diff= 0;
-  lexem->code= 0;
-}
-
-
-/*
-  Print collation customization expression parse error, with context.
-  
-  SYNOPSIS
-    my_coll_lexem_print_error
-    lexem                Lex analizer to take context from
-    errstr               sting to write error to
-    errsize              errstr size
-    txt                  error message
-  USAGE
-  
-  RETURN VALUES
-    N/A
-*/
-
-static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
-                                      char *errstr, size_t errsize,
-                                      const char *txt)
-{
-  char tail[30];
-  size_t len= lexem->end - lexem->prev;
-  strmake (tail, lexem->prev, min(len, sizeof(tail)-1));
-  errstr[errsize-1]= '\0';
-  my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
-}
-
-
-/*
-  Convert a hex digit into its numeric value
-  
-  SYNOPSIS
-    ch2x
-    ch                   hex digit to convert
-  USAGE
-  
-  RETURN VALUES
-    an integer value in the range 0..15
-    -1 on error
-*/
-
-static int ch2x(int ch)
-{
-  if (ch >= '0' && ch <= '9')
-    return ch - '0';
-  
-  if (ch >= 'a' && ch <= 'f')
-    return 10 + ch - 'a';
-  
-  if (ch >= 'A' && ch <= 'F')
-    return 10 + ch - 'A';
-  
-  return -1;
-}
-
-
-/*
-  Collation language lexical parser:
-  Scans the next lexem.
-  
-  SYNOPSIS
-    my_coll_lexem_next
-    lexem                Lex analizer, previously initialized by 
-                         my_coll_lexem_init.
-  USAGE
-    Call this function in a loop
-    
-  RETURN VALUES
-    Lexem number: eof, diff, shift, char or error.
-*/
-
-static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
-{
-  for ( ;lexem->beg < lexem->end ; lexem->beg++)
-  {
-    lexem->prev= lexem->beg;
-    if (lexem->beg[0] == ' '  || lexem->beg[0] == '\t' || 
-        lexem->beg[0] == '\r' || lexem->beg[0] == '\n')
-      continue;
-    
-    if (lexem->beg[0] == '&')
-    {
-      lexem->beg++;
-      return MY_COLL_LEXEM_SHIFT;
-    }
-    
-    if (lexem->beg[0] == '<')
-    {
-      for (lexem->beg++, lexem->diff=1; 
-           (lexem->beg < lexem->end) && 
-           (lexem->beg[0] == '<') && (lexem->diff<3);
-           lexem->beg++, lexem->diff++);
-        return MY_COLL_LEXEM_DIFF;
-    }
-    
-    if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') ||
-        (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z'))
-    {
-      lexem->code= lexem->beg[0];
-      lexem->beg++;
-      return MY_COLL_LEXEM_CHAR;
-    }
-    
-    if ((lexem->beg[0] == '\\') && 
-        (lexem->beg+2 < lexem->end) && 
-        (lexem->beg[1] == 'u'))
-    {
-      int ch;
-      
-      lexem->code= 0;
-      for (lexem->beg+=2; 
-           (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ; 
-           lexem->beg++)
-      {
-        lexem->code= (lexem->code << 4) + ch;
-      }
-      return MY_COLL_LEXEM_CHAR;
-    }
-    
-    return MY_COLL_LEXEM_ERROR;
-  }
-  return MY_COLL_LEXEM_EOF;
-}
-
-
-/*
-  Collation rule item
-*/
-
-typedef struct my_coll_rule_item_st
-{
-  uint base;     /* Base character                             */
-  uint curr;     /* Current character                          */
-  int diff[3];   /* Primary, Secondary and Tertiary difference */
-} MY_COLL_RULE;
-
-
-/*
-  Collation language syntax parser.
-  Uses lexical parser.
-  
-  SYNOPSIS
-    my_coll_rule_parse
-    rule                 Collation rule list to load to.
-    str                  A string containin collation language expression.
-    strend               End of the string.
-  USAGE
-    
-  RETURN VALUES
-    0 - OK
-    1 - ERROR, e.g. too many items.
-*/
-
-static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
-                              const char *str, const char *strend,
-                              char *errstr, size_t errsize)
-{
-  MY_COLL_LEXEM lexem;
-  my_coll_lexem_num lexnum;
-  my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR;
-  MY_COLL_RULE item; 
-  int state= 0;
-  size_t nitems= 0;
-  
-  /* Init all variables */
-  errstr[0]= '\0';
-  bzero(&item, sizeof(item));
-  my_coll_lexem_init(&lexem, str, strend);
-  
-  while ((lexnum= my_coll_lexem_next(&lexem)))
-  {
-    if (lexnum == MY_COLL_LEXEM_ERROR)
-    {
-      my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character");
-      return -1;
-    }
-    
-    switch (state) {
-    case 0:
-      if (lexnum != MY_COLL_LEXEM_SHIFT)
-      {
-        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected");
-        return -1;
-      }
-      prevlexnum= lexnum;
-      state= 2;
-      continue;
-      
-    case 1:
-      if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF)
-      {
-        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected");
-        return -1;
-      }
-      prevlexnum= lexnum;
-      state= 2;
-      continue;
-      
-    case 2:
-      if (lexnum != MY_COLL_LEXEM_CHAR)
-      {
-        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected");
-        return -1;
-      }
-      
-      if (prevlexnum == MY_COLL_LEXEM_SHIFT)
-      {
-        item.base= lexem.code;
-        item.diff[0]= 0;
-        item.diff[1]= 0;
-        item.diff[2]= 0;
-      }
-      else if (prevlexnum == MY_COLL_LEXEM_DIFF)
-      {
-        item.curr= lexem.code;
-        if (lexem.diff == 3)
-        {
-          item.diff[2]++;
-        }
-        else if (lexem.diff == 2)
-        {
-          item.diff[1]++;
-          item.diff[2]= 0;
-        }
-        else if (lexem.diff == 1)
-        {
-          item.diff[0]++;
-          item.diff[1]= 0;
-          item.diff[2]= 0;
-        }
-        if (nitems >= mitems)
-        {
-          my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");
-          return -1;
-        }
-        rule[nitems++]= item;
-      }
-      else
-      {
-        my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen");
-        return -1;
-      }
-      state= 1;
-      continue;
-    }
-  }
-  return (size_t) nitems;
-}
-
-
-typedef struct
-{
-  int		nchars;
-  MY_UNI_IDX	uidx;
-} uni_idx;
-
-#define PLANE_SIZE	0x100
-#define PLANE_NUM	0x100
-#define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)
-
-
 /*
   The code below implements this functionality:
   
@@ -484,91 +136,6 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
 }
 
 
-static int pcmp(const void * f, const void * s)
-{
-  const uni_idx *F= (const uni_idx*) f;
-  const uni_idx *S= (const uni_idx*) s;
-  int res;
-
-  if (!(res=((S->nchars)-(F->nchars))))
-    res=((F->uidx.from)-(S->uidx.to));
-  return res;
-}
-
-
-static my_bool create_fromuni(CHARSET_INFO *cs)
-{
-  uni_idx	idx[PLANE_NUM];
-  int		i,n;
-  
-  /* Clear plane statistics */
-  bzero(idx,sizeof(idx));
-  
-  /* Count number of characters in each plane */
-  for (i=0; i< 0x100; i++)
-  {
-    uint16 wc=cs->tab_to_uni[i];
-    int pl= PLANE_NUMBER(wc);
-    
-    if (wc || !i)
-    {
-      if (!idx[pl].nchars)
-      {
-        idx[pl].uidx.from=wc;
-        idx[pl].uidx.to=wc;
-      }else
-      {
-        idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
-        idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
-      }
-      idx[pl].nchars++;
-    }
-  }
-  
-  /* Sort planes in descending order */
-  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
-  
-  for (i=0; i < PLANE_NUM; i++)
-  {
-    int ch,numchars;
-    
-    /* Skip empty plane */
-    if (!idx[i].nchars)
-      break;
-    
-    numchars=idx[i].uidx.to-idx[i].uidx.from+1;
-    if (!(idx[i].uidx.tab=(uchar*) my_once_alloc(numchars *
-						 sizeof(*idx[i].uidx.tab),
-						 MYF(MY_WME))))
-      return TRUE;
-
-    bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
-    
-    for (ch=1; ch < PLANE_SIZE; ch++)
-    {
-      uint16 wc=cs->tab_to_uni[ch];
-      if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
-      {
-        int ofs= wc - idx[i].uidx.from;
-        idx[i].uidx.tab[ofs]= ch;
-      }
-    }
-  }
-  
-  /* Allocate and fill reverse table for each plane */
-  n=i;
-  if (!(cs->tab_from_uni= (MY_UNI_IDX*) my_once_alloc(sizeof(MY_UNI_IDX)*(n+1),
-						      MYF(MY_WME))))
-    return TRUE;
-
-  for (i=0; i< n; i++)
-    cs->tab_from_uni[i]= idx[i].uidx;
-  
-  /* Set end-of-list marker */
-  bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
-  return FALSE;
-}
-
 
 static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
 {
@@ -622,8 +189,6 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
     if (!(to->tab_to_uni= (uint16*)  my_once_memdup((char*)from->tab_to_uni,
 						    sz, MYF(MY_WME))))
       goto err;
-    if (create_fromuni(to))
-      goto err;
   }
   to->mbminlen= 1;
   to->mbmaxlen= 1;
@@ -754,117 +319,6 @@ static my_tailoring tailoring[]=
   }
 };
 
-#define MY_MAX_COLL_RULE 64
-
-/*
-  This function copies an UCS2 collation from
-  the default Unicode Collation Algorithm (UCA)
-  weights applying tailorings, i.e. a set of
-  alternative weights for some characters. 
-  
-  The default UCA weights are stored in my_charset_ucs2_general_uca.
-  They consist of 256 pages, 256 character each.
-  
-  If a page is not overwritten by tailoring rules,
-  it is copies as is from UCA as is.
-  
-  If a page contains some overwritten characters, it is
-  allocated. Untouched characters are copied from the
-  default weights.
-*/
-
-static my_bool create_tailoring(CHARSET_INFO *cs)
-{
-  MY_COLL_RULE rule[MY_MAX_COLL_RULE];
-  char errstr[128];
-  uchar   *newlengths;
-  uint16 **newweights;
-  const uchar *deflengths= my_charset_ucs2_general_uca.sort_order;
-  uint16     **defweights= my_charset_ucs2_general_uca.sort_order_big;
-  int rc, i;
-
-  if (!cs->tailoring)
-    return 1;
-  
-  /* Parse ICU Collation Customization expression */
-  if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
-                              cs->tailoring,
-                              cs->tailoring + strlen(cs->tailoring),
-                              errstr, sizeof(errstr))) <= 0)
-  {
-    /* 
-      TODO: add error message reporting.
-      printf("Error: %d '%s'\n", rc, errstr);
-    */
-    return 1;
-  }
-  
-  if (!(newweights= (uint16**) my_once_alloc(256*sizeof(uint16*),MYF(MY_WME))))
-    return 1;
-  bzero(newweights, 256*sizeof(uint16*));
-  
-  if (!(newlengths= (uchar*) my_once_memdup(deflengths,256,MYF(MY_WME))))
-    return 1;
-  
-  /*
-    Calculate maximum lenghts for the pages
-    which will be overwritten.
-  */
-  for (i=0; i < rc; i++)
-  {
-    uint pageb= (rule[i].base >> 8) & 0xFF;
-    uint pagec= (rule[i].curr >> 8) & 0xFF;
-    
-    if (newlengths[pagec] < deflengths[pageb])
-      newlengths[pagec]= deflengths[pageb];
-  }
-  
-  for (i=0; i < rc;  i++)
-  {
-    uint pageb= (rule[i].base >> 8) & 0xFF;
-    uint pagec= (rule[i].curr >> 8) & 0xFF;
-    uint chb, chc;
-    
-    if (!newweights[pagec])
-    {
-      /* Alloc new page and copy the default UCA weights */
-      uint size= 256*newlengths[pagec]*sizeof(uint16);
-      
-      if (!(newweights[pagec]= (uint16*) my_once_alloc(size,MYF(MY_WME))))
-        return 1;
-      bzero((void*) newweights[pagec], size);
-      
-      for (chc=0 ; chc < 256; chc++)
-      {
-        memcpy(newweights[pagec] + chc*newlengths[pagec],
-               defweights[pagec] + chc*deflengths[pagec],
-               deflengths[pagec]*sizeof(uint16));
-      }
-    }
-    
-    /* 
-      Aply the alternative rule:
-      shift to the base character and primary difference.
-    */
-    chc= rule[i].curr & 0xFF;
-    chb= rule[i].base & 0xFF;
-    memcpy(newweights[pagec] + chc*newlengths[pagec],
-           defweights[pageb] + chb*deflengths[pageb],
-           deflengths[pageb]*sizeof(uint16));
-    /* Apply primary difference */
-    newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0];
-  }
-  
-  /* Copy non-overwritten pages from the default UCA weights */
-  for (i= 0; i < 256 ; i++)
-    if (!newweights[i])
-      newweights[i]= defweights[i];
-  
-  cs->sort_order= newlengths;
-  cs->sort_order_big= newweights;
-  
-  return 0;
-}
 
 
 static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
@@ -894,7 +348,7 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
   to->mbminlen= 2;
   to->mbmaxlen= 2;
   
-  return create_tailoring(to);
+  return 0;
   
 err:
   return 1;
@@ -997,7 +451,7 @@ static my_bool init_uca_charsets()
   CHARSET_INFO cs= my_charset_ucs2_general_uca;
   char name[64];
   
-  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT;
+  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE;
   for (t= tailoring; t->tailoring; t++)
   {
     cs.number= 128 + t->number;
@@ -1083,6 +537,10 @@ void add_compiled_collation(CHARSET_INFO *cs)
   cs->state|= MY_CS_AVAILABLE;
 }
 
+static void *cs_alloc(uint size)
+{
+  return my_once_alloc(size, MYF(MY_WME));
+}
 
 
 #ifdef __NETWARE__
@@ -1207,6 +665,14 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
     cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
   }
   pthread_mutex_unlock(&THR_LOCK_charset);
+  if (cs && !(cs->state & MY_CS_READY))
+  {
+    if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
+        (cs->coll->init && cs->coll->init(cs, cs_alloc)))
+      cs= NULL;
+    else
+      cs->state|= MY_CS_READY;
+  }
   return cs;
 }
 
-- 
cgit v1.2.1


From f8b15e8bb6a87a3663d7703b40fbc7f0a1f8691a Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Fri, 11 Jun 2004 17:50:20 +0500
Subject: Initialize max_sort_char only if a character set is requested.

---
 mysys/charset.c | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index 165fa19e3d5..d7aabee68f3 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -38,26 +38,6 @@ my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
 }
 
 
-static void set_max_sort_char(CHARSET_INFO *cs)
-{
-  uchar max_char;
-  uint  i;
-  
-  if (!cs->sort_order)
-    return;
-  
-  max_char=cs->sort_order[(uchar) cs->max_sort_char];
-  for (i= 0; i < 256; i++)
-  {
-    if ((uchar) cs->sort_order[i] > max_char)
-    {
-      max_char=(uchar) cs->sort_order[i];
-      cs->max_sort_char= i;
-    }
-  }
-}
-
-
 static my_bool init_state_maps(CHARSET_INFO *cs)
 {
   uint i;
@@ -180,8 +160,6 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
 						  MYF(MY_WME))))
       goto err;
 
-    
-    set_max_sort_char(to);
   }
   if (from->tab_to_uni)
   {
@@ -577,7 +555,6 @@ static my_bool init_available_charsets(myf myflags)
     {
       if (*cs)
       {
-        set_max_sort_char(*cs);
         if (cs[0]->ctype)
           if (init_state_maps(*cs))
             *cs= NULL;
-- 
cgit v1.2.1


From 53a7bd5931c1e4b023eb272ea1d15ef48800258c Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Fri, 11 Jun 2004 18:25:50 +0500
Subject: charset.c:   Reuse some code between simple and UCA collations.

---
 mysys/charset.c | 59 +++++++++++++++------------------------------------------
 1 file changed, 15 insertions(+), 44 deletions(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index d7aabee68f3..4fcf5dffcdc 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -111,13 +111,11 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
     cs->coll= &my_collation_8bit_simple_ci_handler;
   
   cs->cset= &my_charset_8bit_handler;
-  cs->mbminlen= 1;
-  cs->mbmaxlen= 1;
 }
 
 
-static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
+static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
 {
   to->number= from->number ? from->number : to->number;
 
@@ -168,8 +166,9 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
 						    sz, MYF(MY_WME))))
       goto err;
   }
-  to->mbminlen= 1;
-  to->mbmaxlen= 1;
+  if (from->tailoring)
+    if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
+      goto err;
 
   return 0;
 
@@ -297,40 +296,6 @@ static my_tailoring tailoring[]=
   }
 };
 
-
-
-static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
-{
-  
-  to->number= from->number ? from->number : to->number;
-  
-  if (from->csname)
-    if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
-      goto err;
-  
-  if (from->name)
-    if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
-      goto err;
-  
-  if (from->comment)
-    if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
-      goto err;
-  
-  if (from->tailoring)
-    if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
-      goto err;
-  
-  to->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply;
-  to->min_sort_char= my_charset_ucs2_general_uca.min_sort_char;
-  to->max_sort_char= my_charset_ucs2_general_uca.max_sort_char;
-  to->mbminlen= 2;
-  to->mbmaxlen= 2;
-  
-  return 0;
-  
-err:
-  return 1;
-}
 #endif
 
 
@@ -365,22 +330,28 @@ static int add_collation(CHARSET_INFO *cs)
     
     if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
     {
+      CHARSET_INFO *new= all_charsets[cs->number];
+      if (cs_copy_data(all_charsets[cs->number],cs))
+        return MY_XML_ERROR;
+
       if (!strcmp(cs->csname,"ucs2") )
       {
 #ifdef HAVE_CHARSET_ucs2
-        CHARSET_INFO *new= all_charsets[cs->number];
         new->cset= my_charset_ucs2_general_uca.cset;
         new->coll= my_charset_ucs2_general_uca.coll;
-        if (ucs2_copy_data(new, cs))
-          return MY_XML_ERROR;
+        new->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply;
+        new->min_sort_char= my_charset_ucs2_general_uca.min_sort_char;
+        new->max_sort_char= my_charset_ucs2_general_uca.max_sort_char;
+        new->mbminlen= 2;
+        new->mbmaxlen= 2;
         new->state |= MY_CS_AVAILABLE | MY_CS_LOADED;
 #endif        
       }
       else
       {
         simple_cs_init_functions(all_charsets[cs->number]);
-        if (simple_cs_copy_data(all_charsets[cs->number],cs))
-	  return MY_XML_ERROR;
+        new->mbminlen= 1;
+        new->mbmaxlen= 1;
         if (simple_cs_is_full(all_charsets[cs->number]))
         {
           all_charsets[cs->number]->state |= MY_CS_LOADED;
-- 
cgit v1.2.1


From fc7ba0885e5f15cd49ee710e906d9010951d22d3 Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Fri, 11 Jun 2004 19:16:06 +0500
Subject: Move UCA language specific definitions into ctype-ucs.c.

---
 mysys/charset-def.c |  23 +++++++++
 mysys/charset.c     | 143 ----------------------------------------------------
 2 files changed, 23 insertions(+), 143 deletions(-)

(limited to 'mysys')

diff --git a/mysys/charset-def.c b/mysys/charset-def.c
index a89cf866933..4f988608d13 100644
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@@ -22,6 +22,19 @@
   init_compiled_charsets() that only adds those that he wants
 */
 
+#ifdef HAVE_CHARSET_ucs2
+extern CHARSET_INFO my_charset_ucs2_general_uca;
+extern CHARSET_INFO my_charset_ucs2_icelandic_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_latvian_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_romanian_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_slovenian_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_polish_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_estonian_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_spanish_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_swedish_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_turkish_uca_ci;
+#endif
+
 my_bool init_compiled_charsets(myf flags __attribute__((unused)))
 {
   CHARSET_INFO *cs;
@@ -74,6 +87,16 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
   add_compiled_collation(&my_charset_ucs2_general_ci);
   add_compiled_collation(&my_charset_ucs2_bin);
   add_compiled_collation(&my_charset_ucs2_general_uca);
+  add_compiled_collation(&my_charset_ucs2_general_uca);
+  add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_latvian_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_romanian_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_slovenian_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_polish_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_estonian_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_spanish_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_swedish_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_turkish_uca_ci);
 #endif
 
 #ifdef HAVE_CHARSET_ujis
diff --git a/mysys/charset.c b/mysys/charset.c
index 4fcf5dffcdc..72f102a2296 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -177,127 +177,6 @@ err:
 }
 
 
-#ifdef HAVE_CHARSET_ucs2
-
-typedef struct my_tailoring_st
-{
-  uint  number;
-  const char *name;
-  const char *tailoring;
-} my_tailoring;
-
-static my_tailoring tailoring[]=
-{
-  {
-    0, "icelandic",
-    /*
-      Some sources treat LETTER A WITH DIARESIS (00E4,00C4)
-      secondary greater than LETTER AE (00E6,00C6).
-      http://www.evertype.com/alphabets/icelandic.pdf
-      http://developer.mimer.com/collations/charts/icelandic.htm
-
-      Other sources do not provide any special rules
-      for LETTER A WITH DIARESIS:
-      http://www.omniglot.com/writing/icelandic.htm
-      http://en.wikipedia.org/wiki/Icelandic_alphabet
-      http://oss.software.ibm.com/icu/charts/collation/is.html
-
-      Let's go the first way.
-    */
-    "& A < \\u00E1 <<< \\u00C1 "
-    "& D < \\u00F0 <<< \\u00D0 "
-    "& E < \\u00E9 <<< \\u00C9 "
-    "& I < \\u00ED <<< \\u00CD "
-    "& O < \\u00F3 <<< \\u00D3 "
-    "& U < \\u00FA <<< \\u00DA "
-    "& Y < \\u00FD <<< \\u00DD "
-    "& Z < \\u00FE <<< \\u00DE "
-        "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
-        "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
-        "< \\u00E5 <<< \\u00C5 "
-  },
-  {
-    1, "latvian",
-    /*
-      Some sources treat I and Y primary different.
-      Other sources treat I and Y the same on primary level.
-      We'll go the first way.
-    */
-    "& C < \\u010D <<< \\u010C "
-    "& G < \\u0123 <<< \\u0122 "
-    "& I < \\u0079 <<< \\u0059 "
-    "& K < \\u0137 <<< \\u0136 "
-    "& L < \\u013C <<< \\u013B "
-    "& N < \\u0146 <<< \\u0145 "
-    "& R < \\u0157 <<< \\u0156 "
-    "& S < \\u0161 <<< \\u0160 "
-    "& Z < \\u017E <<< \\u017D "
-  },
-  {
-    2, "romanian",
-    "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
-    "& I < \\u00EE <<< \\u00CE "
-    "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E "
-    "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 "
-  },
-  {
-    3, "slovenian",
-    "& C < \\u010D <<< \\u010C "
-    "& S < \\u0161 <<< \\u0160 "
-    "& Z < \\u017E <<< \\u017D "
-  },
-  {
-    4, "polish",
-    "& A < \\u0105 <<< \\u0104 "
-    "& C < \\u0107 <<< \\u0106 "
-    "& E < \\u0119 <<< \\u0118 "
-    "& L < \\u0142 <<< \\u0141 "
-    "& N < \\u0144 <<< \\u0143 "
-    "& O < \\u00F3 <<< \\u00D3 "
-    "& S < \\u015B <<< \\u015A "
-    "& Z < \\u017A <<< \\u017B "
-  },
-  {
-    5, "estonian",
-    "& S < \\u0161 <<< \\u0160 "
-       " < \\u007A <<< \\u005A "
-       " < \\u017E <<< \\u017D "
-    "& W < \\u00F5 <<< \\u00D5 "
-        "< \\u00E4 <<< \\u00C4 "
-        "< \\u00F6 <<< \\u00D6 "
-        "< \\u00FC <<< \\u00DC "
-  },
-  {
-    6, "spanish",
-    "& N < \\u00F1 <<< \\u00D1 "
-  },
-  {
-    7, "swedish",
-    /*
-      Some sources treat V and W as similar on primary level.
-      We'll treat V and W as different on primary level.
-    */
-    "& Y <<\\u00FC <<< \\u00DC "
-    "& Z < \\u00E5 <<< \\u00C5 "
-        "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 "
-        "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
-  },
-  {
-    8, "turkish",
-    "& C < \\u00E7 <<< \\u00C7 "
-    "& G < \\u011F <<< \\u011E "
-    "& H < \\u0131 <<< \\u0049 "
-    "& O < \\u00F6 <<< \\u00D6 "
-    "& S < \\u015F <<< \\u015E "
-    "& U < \\u00FC <<< \\u00DC "
-  },
-  {
-    0, NULL, NULL
-  }
-};
-
-#endif
-
 
 static my_bool simple_cs_is_full(CHARSET_INFO *cs)
 {
@@ -393,25 +272,6 @@ static int add_collation(CHARSET_INFO *cs)
   return MY_XML_OK;
 }
 
-#ifdef HAVE_CHARSET_ucs2
-static my_bool init_uca_charsets()
-{
-  my_tailoring *t;
-  CHARSET_INFO cs= my_charset_ucs2_general_uca;
-  char name[64];
-  
-  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE;
-  for (t= tailoring; t->tailoring; t++)
-  {
-    cs.number= 128 + t->number;
-    cs.tailoring= t->tailoring;
-    cs.name= name;
-    sprintf(name, "ucs2_%s_ci", t->name);
-    add_collation(&cs);
-  }
-  return 0;
-}
-#endif
 
 #define MY_MAX_ALLOWED_BUF 1024*1024
 #define MY_CHARSET_INDEX "Index.xml"
@@ -515,9 +375,6 @@ static my_bool init_available_charsets(myf myflags)
 
     bzero(&all_charsets,sizeof(all_charsets));
     init_compiled_charsets(myflags);
-#ifdef HAVE_CHARSET_ucs2
-    init_uca_charsets();
-#endif
     
     /* Copy compiled charsets */
     for (cs=all_charsets;
-- 
cgit v1.2.1


From cbd3e61c8d41555dfb33bddcb762a9c49ec7ec3e Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Sat, 12 Jun 2004 20:36:58 +0500
Subject: Unicode collation algorithm: contraction support. E.g. 'Ch' is
 treated as a separate letter in Czech, not as a combination of C+h.

---
 mysys/charset-def.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'mysys')

diff --git a/mysys/charset-def.c b/mysys/charset-def.c
index 4f988608d13..a573581a8ea 100644
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@@ -33,6 +33,11 @@ extern CHARSET_INFO my_charset_ucs2_estonian_uca_ci;
 extern CHARSET_INFO my_charset_ucs2_spanish_uca_ci;
 extern CHARSET_INFO my_charset_ucs2_swedish_uca_ci;
 extern CHARSET_INFO my_charset_ucs2_turkish_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_czech_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_danish_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_spanish2_uca_ci;
 #endif
 
 my_bool init_compiled_charsets(myf flags __attribute__((unused)))
@@ -97,6 +102,11 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
   add_compiled_collation(&my_charset_ucs2_spanish_uca_ci);
   add_compiled_collation(&my_charset_ucs2_swedish_uca_ci);
   add_compiled_collation(&my_charset_ucs2_turkish_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_czech_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_danish_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_lithuanian_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_slovak_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_spanish2_uca_ci);
 #endif
 
 #ifdef HAVE_CHARSET_ujis
-- 
cgit v1.2.1


From 5dd2881f542e06a4ba236d15c19d0da5a8799c71 Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Mon, 14 Jun 2004 16:29:51 +0500
Subject: UTF8 UCA based collations.

---
 mysys/charset-def.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

(limited to 'mysys')

diff --git a/mysys/charset-def.c b/mysys/charset-def.c
index a573581a8ea..73e36688594 100644
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@@ -40,6 +40,24 @@ extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci;
 extern CHARSET_INFO my_charset_ucs2_spanish2_uca_ci;
 #endif
 
+#ifdef HAVE_CHARSET_utf8
+extern CHARSET_INFO my_charset_utf8_general_uca_ci;
+extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
+extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
+extern CHARSET_INFO my_charset_utf8_romanian_uca_ci;
+extern CHARSET_INFO my_charset_utf8_slovenian_uca_ci;
+extern CHARSET_INFO my_charset_utf8_polish_uca_ci;
+extern CHARSET_INFO my_charset_utf8_estonian_uca_ci;
+extern CHARSET_INFO my_charset_utf8_spanish_uca_ci;
+extern CHARSET_INFO my_charset_utf8_swedish_uca_ci;
+extern CHARSET_INFO my_charset_utf8_turkish_uca_ci;
+extern CHARSET_INFO my_charset_utf8_czech_uca_ci;
+extern CHARSET_INFO my_charset_utf8_danish_uca_ci;
+extern CHARSET_INFO my_charset_utf8_lithuanian_uca_ci;
+extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
+extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
+#endif
+
 my_bool init_compiled_charsets(myf flags __attribute__((unused)))
 {
   CHARSET_INFO *cs;
@@ -92,7 +110,6 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
   add_compiled_collation(&my_charset_ucs2_general_ci);
   add_compiled_collation(&my_charset_ucs2_bin);
   add_compiled_collation(&my_charset_ucs2_general_uca);
-  add_compiled_collation(&my_charset_ucs2_general_uca);
   add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci);
   add_compiled_collation(&my_charset_ucs2_latvian_uca_ci);
   add_compiled_collation(&my_charset_ucs2_romanian_uca_ci);
@@ -117,6 +134,21 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
 #ifdef HAVE_CHARSET_utf8
   add_compiled_collation(&my_charset_utf8_general_ci);
   add_compiled_collation(&my_charset_utf8_bin);
+  add_compiled_collation(&my_charset_utf8_general_uca_ci);
+  add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);
+  add_compiled_collation(&my_charset_utf8_latvian_uca_ci);
+  add_compiled_collation(&my_charset_utf8_romanian_uca_ci);
+  add_compiled_collation(&my_charset_utf8_slovenian_uca_ci);
+  add_compiled_collation(&my_charset_utf8_polish_uca_ci);
+  add_compiled_collation(&my_charset_utf8_estonian_uca_ci);
+  add_compiled_collation(&my_charset_utf8_spanish_uca_ci);
+  add_compiled_collation(&my_charset_utf8_swedish_uca_ci);
+  add_compiled_collation(&my_charset_utf8_turkish_uca_ci);
+  add_compiled_collation(&my_charset_utf8_czech_uca_ci);
+  add_compiled_collation(&my_charset_utf8_danish_uca_ci);
+  add_compiled_collation(&my_charset_utf8_lithuanian_uca_ci);
+  add_compiled_collation(&my_charset_utf8_slovak_uca_ci);
+  add_compiled_collation(&my_charset_utf8_spanish2_uca_ci);
 #endif
 
   /* Copy compiled charsets */
-- 
cgit v1.2.1


From f814d224f72063e5acc4084e46db485e7d4b9b02 Mon Sep 17 00:00:00 2001
From: "bar@mysql.com" <>
Date: Wed, 16 Jun 2004 19:06:45 +0500
Subject: Critical fixes after review: - mutex was unlocked before the end of
 the critical sesion, - Portability issue: It's better to use (*alloc)(x)
 instead of alloc(x),   if alloc is a function passed as an argument. - Use {}
 around if() block, to avoid possible problems with some Windows compilers.

---
 mysys/charset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mysys')

diff --git a/mysys/charset.c b/mysys/charset.c
index 72f102a2296..1388fc40c6d 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -469,7 +469,6 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
     }
     cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
   }
-  pthread_mutex_unlock(&THR_LOCK_charset);
   if (cs && !(cs->state & MY_CS_READY))
   {
     if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
@@ -478,6 +477,7 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
     else
       cs->state|= MY_CS_READY;
   }
+  pthread_mutex_unlock(&THR_LOCK_charset);
   return cs;
 }
 
-- 
cgit v1.2.1


From cdddea14e4286db78f7860d5ba46b75da36db440 Mon Sep 17 00:00:00 2001
From: "monty@mysql.com" <>
Date: Fri, 18 Jun 2004 03:23:08 +0300
Subject: Applied patch from Novell (2004-06-03)

---
 mysys/mf_tempfile.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mysys')

diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c
index 14b8fdc430c..ea2bec076d4 100644
--- a/mysys/mf_tempfile.c
+++ b/mysys/mf_tempfile.c
@@ -83,7 +83,7 @@ File create_temp_file(char *to, const char *dir, const char *prefix,
     (*free)(res);
     file=my_create(to, 0, mode, MyFlags);
   }
-#elif defined(HAVE_MKSTEMP)
+#elif defined(HAVE_MKSTEMP) && !defined(__NETWARE__)
   {
     char prefix_buff[30];
     uint pfx_len;
-- 
cgit v1.2.1


From 4740e8b2b7ad157389adfcddf2d17338160c9ae2 Mon Sep 17 00:00:00 2001
From: "lenz@mysql.com" <>
Date: Tue, 22 Jun 2004 17:35:34 +0200
Subject:  - Applied some portability fixes for SGI IRIX/MipsPro compiler   
 (e.g. a fix for BUG#3507 and some modifications recommended    by Andrea
 Suatoni and Joerg Behrens - thank you!)

---
 mysys/hash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mysys')

diff --git a/mysys/hash.c b/mysys/hash.c
index 3afd31a079b..12389e3cf1c 100644
--- a/mysys/hash.c
+++ b/mysys/hash.c
@@ -182,7 +182,7 @@ uint calc_hashnr_caseup(const byte *key, uint len)
 #endif
 
 
-#ifndef __SUNPRO_C				/* SUNPRO can't handle this */
+#if !defined(__SUNPRO_C) && !defined(__USLC__) && !defined(__sgi) /* broken compilers */
 inline
 #endif
 unsigned int rec_hashnr(HASH *hash,const byte *record)
-- 
cgit v1.2.1


From 4d92924807002f301a5b7c443579537cde847f81 Mon Sep 17 00:00:00 2001
From: "lenz@mysql.com" <>
Date: Tue, 22 Jun 2004 17:54:38 +0200
Subject:  - rephrased comment

---
 mysys/hash.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'mysys')

diff --git a/mysys/hash.c b/mysys/hash.c
index 12389e3cf1c..973f6f7cefa 100644
--- a/mysys/hash.c
+++ b/mysys/hash.c
@@ -182,7 +182,8 @@ uint calc_hashnr_caseup(const byte *key, uint len)
 #endif
 
 
-#if !defined(__SUNPRO_C) && !defined(__USLC__) && !defined(__sgi) /* broken compilers */
+/* for compilers which can not handle inline */
+#if !defined(__SUNPRO_C) && !defined(__USLC__) && !defined(__sgi)
 inline
 #endif
 unsigned int rec_hashnr(HASH *hash,const byte *record)
-- 
cgit v1.2.1