summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2004-10-21 12:17:59 +0500
committerunknown <bar@mysql.com>2004-10-21 12:17:59 +0500
commit8f8236008193394ea2785c0095adf02839cde83a (patch)
treeda7ea10d906d7cb39018bfc8fe3fc10f160850d1 /strings
parentffd77c4679615ba3e4dd7614dadc735af5d4aa9c (diff)
downloadmariadb-git-8f8236008193394ea2785c0095adf02839cde83a.tar.gz
Allow cp932 characters to be stored in a SJIS column
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-sjis.c36
-rw-r--r--strings/ctype-utf8.c9
2 files changed, 42 insertions, 3 deletions
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 55ff8ac28fe..4176ff2e538 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4563,6 +4563,40 @@ uint my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
return clen;
}
+/*
+ Returns a well formed length of a SJIS string.
+ CP932 additional characters are also accepted.
+*/
+static
+uint my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e, uint pos)
+{
+ const char *b0= b;
+ while (pos && b < e)
+ {
+ /*
+ Cast to int8 for extra safety.
+ "char" can be unsigned by default
+ on some platforms.
+ */
+ if (((int8)b[0]) >= 0)
+ {
+ /* Single byte character */
+ b+= 1;
+ }
+ else if (issjishead((uchar)*b) && (e-b)>1 && issjistail((uchar)b[1]))
+ {
+ /* Double byte character */
+ b+= 2;
+ }
+ else
+ {
+ /* Wrong byte sequence */
+ break;
+ }
+ }
+ return b - b0;
+}
static MY_COLLATION_HANDLER my_collation_ci_handler =
@@ -4586,7 +4620,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_sjis,
my_numchars_mb,
my_charpos_mb,
- my_well_formed_len_mb,
+ my_well_formed_len_sjis,
my_lengthsp_8bit,
my_numcells_sjis,
my_mb_wc_sjis, /* mb_wc */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index c08a1c0acfb..3ca6c5d279f 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2126,8 +2126,13 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
while (s[0] && t[0])
{
my_wc_t s_wc,t_wc;
-
- if (s[0] >= 0)
+
+ /*
+ Cast to int8 for extra safety.
+ char can be unsigned by default
+ on some platforms.
+ */
+ if (((int8)s[0]) >= 0)
{
/*
s[0] is between 0 and 127.