summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <msvensson@neptunus.(none)>2006-11-28 20:59:57 +0100
committerunknown <msvensson@neptunus.(none)>2006-11-28 20:59:57 +0100
commitc789301fcd3877a21d328b8e3c4ab97148d879f3 (patch)
treeabd103946cab0eae83e36388b799bb0e86227ee3 /strings
parentdb875c0c070d34c525f650b15912273d74d4f8aa (diff)
parent5be953f896d0c28395605b103a60182b648e78c7 (diff)
downloadmariadb-git-c789301fcd3877a21d328b8e3c4ab97148d879f3.tar.gz
Merge neptunus.(none):/home/msvensson/mysql/mysql-5.0
into neptunus.(none):/home/msvensson/mysql/mysql-5.0-maint BitKeeper/etc/collapsed: auto-union BitKeeper/etc/ignore: auto-union Makefile.am: Auto merged client/mysqltest.c: Auto merged mysql-test/lib/mtr_report.pl: Auto merged mysql-test/mysql-test-run.pl: Auto merged mysql-test/r/rpl_deadlock.result: Auto merged mysql-test/r/udf.result: Auto merged mysql-test/t/limit.test: Auto merged mysql-test/t/mysql.test: Auto merged mysql-test/t/rpl_deadlock.test: Auto merged mysql-test/t/udf.test: Auto merged mysql-test/mysql-test-run-shell.sh: Auto merged mysql-test/t/view_grant.test: Auto merged sql/item.cc: Auto merged sql/item_func.cc: Auto merged sql/item_func.h: Auto merged sql/item_timefunc.cc: Auto merged sql/mysql_priv.h: Auto merged sql/mysqld.cc: Auto merged sql-common/my_time.c: Auto merged sql/sql_base.cc: Auto merged sql/sql_parse.cc: Auto merged sql/sql_table.cc: Auto merged include/my_sys.h: Manual merge, my_getpagesize broken out of "#ifdef HAVE_SYS_MMAN_H"
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-bin.c3
-rw-r--r--strings/ctype-mb.c24
-rw-r--r--strings/ctype-simple.c18
-rw-r--r--strings/ctype-ucs2.c8
-rw-r--r--strings/ctype-utf8.c135
-rw-r--r--strings/decimal.c4
6 files changed, 164 insertions, 28 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 0bd5a1fda76..3e8b05580f6 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -211,9 +211,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
/* This function is used for all conversion functions */
-static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
+static uint my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
char *str __attribute__((unused)))
{
+ return 0;
}
static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)),
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 394111be3bc..3ef245015d7 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -21,40 +21,44 @@
#ifdef USE_MB
-void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
+uint my_caseup_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
- register uchar *map=cs->to_upper;
+ register uchar *map= cs->to_upper;
+ char *str_orig= str;
while (*str)
{
/* Pointing after the '\0' is safe here. */
- if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen)))
- str+=l;
+ if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
+ str+= l;
else
{
- *str=(char) map[(uchar)*str];
+ *str= (char) map[(uchar)*str];
str++;
}
}
+ return str - str_orig;
}
-void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
+uint my_casedn_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
- register uchar *map=cs->to_lower;
+ register uchar *map= cs->to_lower;
+ char *str_orig= str;
while (*str)
{
/* Pointing after the '\0' is safe here. */
- if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen)))
- str+=l;
+ if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
+ str+= l;
else
{
- *str=(char) map[(uchar)*str];
+ *str= (char) map[(uchar)*str];
str++;
}
}
+ return str - str_orig;
}
uint my_caseup_mb(CHARSET_INFO * cs, char *src, uint srclen,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index e40a1948dcf..7484f3c0d92 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -188,20 +188,26 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
}
-void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
+uint my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
{
- register uchar *map=cs->to_upper;
- while ((*str = (char) map[(uchar) *str]) != 0)
+ register uchar *map= cs->to_upper;
+ char *str_orig= str;
+ while ((*str= (char) map[(uchar) *str]) != 0)
str++;
+ return str - str_orig;
}
-void my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
+
+uint my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
{
- register uchar *map=cs->to_lower;
- while ((*str = (char) map[(uchar)*str]) != 0)
+ register uchar *map= cs->to_lower;
+ char *str_orig= str;
+ while ((*str= (char) map[(uchar) *str]) != 0)
str++;
+ return str - str_orig;
}
+
uint my_caseup_8bit(CHARSET_INFO * cs, char *src, uint srclen,
char *dst __attribute__((unused)),
uint dstlen __attribute__((unused)))
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 4a60220f73e..3c69a314b45 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -159,13 +159,13 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen,
}
-static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
+static uint my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
char * s __attribute__((unused)))
{
+ return 0;
}
-
static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
char *dst __attribute__((unused)),
uint dstlen __attribute__((unused)))
@@ -188,9 +188,11 @@ static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
return srclen;
}
-static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+
+static uint my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
char * s __attribute__((unused)))
{
+ return 0;
}
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index ae2c04fb068..a21fe4961ec 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2045,6 +2045,52 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
return MY_CS_ILSEQ;
}
+
+/*
+ The same as above, but without range check
+ for example, for a null-terminated string
+*/
+static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t * pwc, const uchar *s)
+{
+ unsigned char c;
+
+ c= s[0];
+ if (c < 0x80)
+ {
+ *pwc = c;
+ return 1;
+ }
+
+ if (c < 0xc2)
+ return MY_CS_ILSEQ;
+
+ if (c < 0xe0)
+ {
+ if (!((s[1] ^ 0x80) < 0x40))
+ return MY_CS_ILSEQ;
+
+ *pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80);
+ return 2;
+ }
+
+ if (c < 0xf0)
+ {
+ if (!((s[1] ^ 0x80) < 0x40 &&
+ (s[2] ^ 0x80) < 0x40 &&
+ (c >= 0xe1 || s[1] >= 0xa0)))
+ return MY_CS_ILSEQ;
+
+ *pwc= ((my_wc_t) (c & 0x0f) << 12) |
+ ((my_wc_t) (s[1] ^ 0x80) << 6) |
+ (my_wc_t) (s[2] ^ 0x80);
+
+ return 3;
+ }
+ return MY_CS_ILSEQ;
+}
+
+
static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
my_wc_t wc, uchar *r, uchar *e)
{
@@ -2091,6 +2137,34 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
}
+/*
+ The same as above, but without range check.
+*/
+static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t wc, uchar *r)
+{
+ int count;
+
+ if (wc < 0x80)
+ count= 1;
+ else if (wc < 0x800)
+ count= 2;
+ else if (wc < 0x10000)
+ count= 3;
+ else
+ return MY_CS_ILUNI;
+
+ switch (count)
+ {
+ /* Fall through all cases!!! */
+ case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800;
+ case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0;
+ case 1: r[0]= (uchar) wc;
+ }
+ return count;
+}
+
+
static uint my_caseup_utf8(CHARSET_INFO *cs, char *src, uint srclen,
char *dst, uint dstlen)
{
@@ -2141,10 +2215,26 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, uint slen,
}
-static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s)
+static uint my_caseup_str_utf8(CHARSET_INFO *cs, char *src)
{
- uint len= (uint) strlen(s);
- my_caseup_utf8(cs, s, len, s, len);
+ my_wc_t wc;
+ int srcres, dstres;
+ char *dst= src, *dst0= src;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(cs->caseup_multiply == 1);
+
+ while (*src &&
+ (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
+ {
+ int plane= (wc>>8) & 0xFF;
+ wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
+ break;
+ src+= srcres;
+ dst+= dstres;
+ }
+ *dst= '\0';
+ return (uint) (dst - dst0);
}
@@ -2170,10 +2260,43 @@ static uint my_casedn_utf8(CHARSET_INFO *cs, char *src, uint srclen,
return (uint) (dst - dst0);
}
-static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
+
+static uint my_casedn_str_utf8(CHARSET_INFO *cs, char *src)
{
- uint len= (uint) strlen(s);
- my_casedn_utf8(cs, s, len, s, len);
+ my_wc_t wc;
+ int srcres, dstres;
+ char *dst= src, *dst0= src;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(cs->casedn_multiply == 1);
+
+ while (*src &&
+ (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
+ {
+ int plane= (wc>>8) & 0xFF;
+ wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+ if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
+ break;
+ src+= srcres;
+ dst+= dstres;
+ }
+
+ /*
+ In rare cases lower string can be shorter than
+ the original string, for example:
+
+ "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE"
+ (which is 0xC4B0 in utf8, i.e. two bytes)
+
+ is converted into
+
+ "U+0069 LATIN SMALL LETTER I"
+ (which is 0x69 in utf8, i.e. one byte)
+
+ So, we need to put '\0' terminator after converting.
+ */
+
+ *dst= '\0';
+ return (uint) (dst - dst0);
}
diff --git a/strings/decimal.c b/strings/decimal.c
index 5a0bc0968b6..6b7a2266194 100644
--- a/strings/decimal.c
+++ b/strings/decimal.c
@@ -1036,7 +1036,7 @@ int decimal2ulonglong(decimal_t *from, ulonglong *to)
x=x*DIG_BASE + *buf++;
if (unlikely(y > ((ulonglong) ULONGLONG_MAX/DIG_BASE) || x < y))
{
- *to=y;
+ *to=ULONGLONG_MAX;
return E_DEC_OVERFLOW;
}
}
@@ -1348,7 +1348,7 @@ int bin2decimal(char *from, decimal_t *to, int precision, int scale)
}
from+=i;
*buf=x ^ mask;
- if (((uint32)*buf) >= powers10[intg0x+1])
+ if (((ulonglong)*buf) >= (ulonglong) powers10[intg0x+1])
goto err;
if (buf > to->buf || *buf != 0)
buf++;