Merge neptunus.(none):/home/msvensson/mysql/mysql-5.1

into neptunus.(none):/home/msvensson/mysql/mysql-5.1-new-maint BitKeeper/etc/collapsed: auto-union configure.in: Auto merged mysql-test/Makefile.am: Auto merged mysql-test/extra/rpl_tests/rpl_deadlock.test: Auto merged mysql-test/mysql-test-run-shell.sh: Auto merged mysql-test/r/func_time.result: Auto merged mysql-test/r/ps.result: Auto merged mysql-test/r/windows.result: Auto merged mysql-test/t/disabled.def: Auto merged mysql-test/t/func_time.test: Auto merged mysql-test/t/ps.test: Auto merged mysql-test/t/windows.test: Auto merged sql/item.cc: Auto merged sql/item_timefunc.cc: Auto merged sql/mysqld.cc: Auto merged sql-common/my_time.c: Auto merged sql/sql_parse.cc: Auto merged BitKeeper/deleted/.del-ps_not_windows.result: Delete: mysql-test/r/ps_not_windows.result BitKeeper/deleted/.del-ps_not_windows.test: Delete: mysql-test/t/ps_not_windows.test mysql-test/mysql-test-run.pl: Manual merge mysql-test/r/rpl_deadlock_innodb.result: Manual merge
author: unknown <msvensson@neptunus.(none)> 2006-11-23 18:38:27 +0100
committer: unknown <msvensson@neptunus.(none)> 2006-11-23 18:38:27 +0100
commit: 32f349ed757a265b03b30e3635775ead21bdb54e (patch)
tree: 4de70d98bf438e372e1b73c5e179846ca768e6c8 /strings
parent: 5c9d6a68e314cb8492417eb50bb2e8ea8e5c7200 (diff)
parent: 07d18b0f6761208f1f24e040bfea723274240e3f (diff)
download: mariadb-git-32f349ed757a265b03b30e3635775ead21bdb54e.tar.gz
5 files changed, 163 insertions, 26 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 5758960ef6c..289b76c9e6e 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -211,9 +211,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
 
 /* This function is used for all conversion functions */
 
-static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
+static uint my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
 			    char *str __attribute__((unused)))
 {
+  return 0;
 }
 
 static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)),
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index c945164ac9c..c3848c64219 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -21,40 +21,44 @@
 #ifdef USE_MB
 
 
-void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
+uint my_caseup_str_mb(CHARSET_INFO * cs, char *str)
 {
   register uint32 l;
-  register uchar *map=cs->to_upper;
+  register uchar *map= cs->to_upper;
+  char *str_orig= str;
   
   while (*str)
   {
     /* Pointing after the '\0' is safe here. */
-    if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen)))
-      str+=l;
+    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
+      str+= l;
     else
     { 
-      *str=(char) map[(uchar)*str];
+      *str= (char) map[(uchar)*str];
       str++;
     }
   }
+  return str - str_orig;
 }
 
-void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
+uint my_casedn_str_mb(CHARSET_INFO * cs, char *str)
 {
   register uint32 l;
-  register uchar *map=cs->to_lower;
+  register uchar *map= cs->to_lower;
+  char *str_orig= str;
   
   while (*str)
   {
     /* Pointing after the '\0' is safe here. */
-    if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen)))
-      str+=l;
+    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
+      str+= l;
     else
     {
-      *str=(char) map[(uchar)*str];
+      *str= (char) map[(uchar)*str];
       str++;
     }
   }
+  return str - str_orig;
 }
 
 uint my_caseup_mb(CHARSET_INFO * cs, char *src, uint srclen,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 9b45d5a03b7..9d10ba82114 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -188,20 +188,26 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
 }
 
 
-void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
+uint my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
 {
-  register uchar *map=cs->to_upper;
-  while ((*str = (char) map[(uchar) *str]) != 0)
+  register uchar *map= cs->to_upper;
+  char *str_orig= str;
+  while ((*str= (char) map[(uchar) *str]) != 0)
     str++;
+  return str - str_orig;
 }
 
-void my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
+
+uint my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
 {
-  register uchar *map=cs->to_lower;
-  while ((*str = (char) map[(uchar)*str]) != 0)
+  register uchar *map= cs->to_lower;
+  char *str_orig= str;
+  while ((*str= (char) map[(uchar) *str]) != 0)
     str++;
+  return str - str_orig;
 }
 
+
 uint my_caseup_8bit(CHARSET_INFO * cs, char *src, uint srclen,
                     char *dst __attribute__((unused)),
                     uint dstlen __attribute__((unused)))
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index df43eff3d73..5089db6bf48 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -159,13 +159,13 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen,
 }
 
 
-static void my_caseup_str_ucs2(CHARSET_INFO * cs  __attribute__((unused)), 
+static uint my_caseup_str_ucs2(CHARSET_INFO * cs  __attribute__((unused)), 
 			       char * s __attribute__((unused)))
 {
+  return 0;
 }
 
 
-
 static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
                            char *dst __attribute__((unused)),
                            uint dstlen __attribute__((unused)))
@@ -188,9 +188,11 @@ static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
   return srclen;
 }
 
-static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), 
+
+static uint my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), 
 			       char * s __attribute__((unused)))
 {
+  return 0;
 }
 
 
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 6c3ceaf868b..8a4ed48bef5 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2047,6 +2047,52 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   return MY_CS_ILSEQ;
 }
 
+
+/*
+  The same as above, but without range check
+  for example, for a null-terminated string
+*/
+static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)),
+                                my_wc_t * pwc, const uchar *s)
+{
+  unsigned char c;
+
+  c= s[0];
+  if (c < 0x80)
+  {
+    *pwc = c;
+    return 1;
+  }
+
+  if (c < 0xc2)
+    return MY_CS_ILSEQ;
+
+  if (c < 0xe0)
+  {
+    if (!((s[1] ^ 0x80) < 0x40))
+      return MY_CS_ILSEQ;
+
+    *pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80);
+    return 2;
+  }
+  
+  if (c < 0xf0)
+  {
+    if (!((s[1] ^ 0x80) < 0x40 &&
+          (s[2] ^ 0x80) < 0x40 &&
+          (c >= 0xe1 || s[1] >= 0xa0)))
+      return MY_CS_ILSEQ;
+
+    *pwc= ((my_wc_t) (c & 0x0f) << 12)   |
+          ((my_wc_t) (s[1] ^ 0x80) << 6) |
+           (my_wc_t) (s[2] ^ 0x80);
+
+    return 3;
+  }
+  return MY_CS_ILSEQ;
+}
+
+
 static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
                  my_wc_t wc, uchar *r, uchar *e)
 {
@@ -2093,6 +2139,34 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
 }
 
 
+/*
+  The same as above, but without range check.
+*/
+static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)),
+                                my_wc_t wc, uchar *r)
+{
+  int count;
+
+  if (wc < 0x80)
+    count= 1;
+  else if (wc < 0x800)
+    count= 2;
+  else if (wc < 0x10000)
+    count= 3;
+  else
+    return MY_CS_ILUNI;
+
+  switch (count)
+  {
+    /* Fall through all cases!!! */
+    case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800;
+    case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0;
+    case 1: r[0]= (uchar) wc;
+  }
+  return count;
+}
+
+
 static uint my_caseup_utf8(CHARSET_INFO *cs, char *src, uint srclen,
                                              char *dst, uint dstlen)
 {
@@ -2143,10 +2217,26 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, uint slen,
 }
 
 
-static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s)
+static uint my_caseup_str_utf8(CHARSET_INFO *cs, char *src)
 {
-  uint len= (uint) strlen(s);
-  my_caseup_utf8(cs, s, len, s, len);
+  my_wc_t wc;
+  int srcres, dstres;
+  char *dst= src, *dst0= src;
+  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+  DBUG_ASSERT(cs->caseup_multiply == 1);
+
+  while (*src &&
+         (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
+  {
+    int plane= (wc>>8) & 0xFF;
+    wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+    if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
+      break;
+    src+= srcres;
+    dst+= dstres;
+  }
+  *dst= '\0';
+  return (uint) (dst - dst0);
 }
 
 
@@ -2172,10 +2262,43 @@ static uint my_casedn_utf8(CHARSET_INFO *cs, char *src, uint srclen,
   return (uint) (dst - dst0);
 }
 
-static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
+
+static uint my_casedn_str_utf8(CHARSET_INFO *cs, char *src)
 {
-  uint len= (uint) strlen(s);
-  my_casedn_utf8(cs, s, len, s, len);
+  my_wc_t wc;
+  int srcres, dstres;
+  char *dst= src, *dst0= src;
+  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+  DBUG_ASSERT(cs->casedn_multiply == 1);
+
+  while (*src &&
+         (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
+  {
+    int plane= (wc>>8) & 0xFF;
+    wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+    if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
+      break;
+    src+= srcres;
+    dst+= dstres;
+  }
+
+  /*
+   In rare cases lower string can be shorter than
+   the original string, for example:
+
+   "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE"
+   (which is 0xC4B0 in utf8, i.e. two bytes)
+
+   is converted into
+
+   "U+0069 LATIN SMALL LETTER I"
+   (which is 0x69 in utf8, i.e. one byte)
+
+   So, we need to put '\0' terminator after converting.
+  */
+
+  *dst= '\0';
+  return (uint) (dst - dst0);
 }
 
 
@@ -4051,6 +4174,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
     my_strntoull_8bit,
     my_strntod_8bit,
     my_strtoll10_8bit,
+    my_strntoull10rnd_8bit,
     my_scan_8bit
 };
author	unknown <msvensson@neptunus.(none)>	2006-11-23 18:38:27 +0100
committer	unknown <msvensson@neptunus.(none)>	2006-11-23 18:38:27 +0100
commit	32f349ed757a265b03b30e3635775ead21bdb54e (patch)
tree	4de70d98bf438e372e1b73c5e179846ca768e6c8 /strings
parent	5c9d6a68e314cb8492417eb50bb2e8ea8e5c7200 (diff)
parent	07d18b0f6761208f1f24e040bfea723274240e3f (diff)
download	mariadb-git-32f349ed757a265b03b30e3635775ead21bdb54e.tar.gz