summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <serg@serg.mylan>2003-10-20 15:53:48 +0200
committerunknown <serg@serg.mylan>2003-10-20 15:53:48 +0200
commit228f4a43a353e9e7c56e1a617749fc9c0e875f6d (patch)
treeff0f9b71c43cfc4ed4e2fa91a8398677dae7377a /strings
parent4a253d2af04869a3e28831b71ddbc5d78279fe5e (diff)
downloadmariadb-git-228f4a43a353e9e7c56e1a617749fc9c0e875f6d.tar.gz
FULLTEXT: correct charset support (UTF included, UCS2 - not)
code cleanup include/m_ctype.h: my_mbcharlen_8bit() { return 1 } mysql-test/r/fulltext.result: fulltext on UTF mysql-test/t/fulltext.test: fulltext on UTF sql/item_cmpfunc.h: cleanup sql/sql_table.cc: FULLTEXT: UCS2 is not allowed sql/sql_yacc.yy: FULLTEXT: code cleanup strings/ctype-bin.c: my_mbcharlen_8bit() { return 1 } strings/ctype-latin1.c: my_mbcharlen_8bit() { return 1 } strings/ctype-simple.c: my_mbcharlen_8bit() { return 1 } strings/ctype-tis620.c: my_mbcharlen_8bit() { return 1 } strings/ctype-utf8.c: hack: (to be fixed properly later) all multi-byte sequences are considered isalpha() now
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-bin.c26
-rw-r--r--strings/ctype-latin1.c4
-rw-r--r--strings/ctype-simple.c2
-rw-r--r--strings/ctype-tis620.c2
-rw-r--r--strings/ctype-utf8.c8
5 files changed, 24 insertions, 18 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 340084ad848..cd1b1399506 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -118,6 +118,12 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
return strcmp(s,t);
}
+int my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
+ uint c __attribute__((unused)))
+{
+ return 1;
+}
+
static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *wc,
const unsigned char *str,
@@ -264,12 +270,12 @@ static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
static
uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
- const char *big, uint b_length,
+ const char *big, uint b_length,
const char *small, uint s_length,
my_match_t *match, uint nmatch)
{
register const uchar *str, *search, *end, *search_end;
-
+
if (s_length <= b_length)
{
if (!s_length)
@@ -282,32 +288,32 @@ uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
}
return 1; /* Empty string is always found */
}
-
+
str= (const uchar*) big;
search= (const uchar*) small;
end= (const uchar*) big+b_length-s_length+1;
search_end= (const uchar*) small + s_length;
-
+
skipp:
while (str != end)
{
if ( (*str++) == (*search))
{
register const uchar *i,*j;
-
- i= str;
+
+ i= str;
j= search+1;
-
+
while (j != search_end)
if ((*i++) != (*j++))
goto skipp;
-
+
if (nmatch > 0)
{
match[0].beg= 0;
match[0].end= str- (const uchar*)big-1;
match[0].mblen= match[0].end;
-
+
if (nmatch > 1)
{
match[1].beg= match[0].end;
@@ -338,7 +344,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* ismbchar */
- NULL, /* mbcharlen */
+ my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_lengthsp_8bit,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index a8a5329f844..15798abb85b 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -170,14 +170,14 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
{
if (str >= end)
return MY_CS_TOOSMALL;
-
+
return ((wc < 256) && ((str[0]=uni_latin1[wc]) || (!wc))) ? 1 : MY_CS_ILUNI;
}
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL,
- NULL,
+ my_mbcharlen_8bit,
my_numchars_8bit,
my_charpos_8bit,
my_lengthsp_8bit,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index f85ce5e7a2b..ed1d2c77049 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1093,7 +1093,7 @@ skipp:
MY_CHARSET_HANDLER my_charset_8bit_handler=
{
NULL, /* ismbchar */
- NULL, /* mbcharlen */
+ my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_lengthsp_8bit,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index a4d8a7d1f79..fd5e58ad8a7 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -717,7 +717,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* ismbchar */
- NULL, /* mbcharlen */
+ my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_lengthsp_8bit,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 3ede1aa26f6..b5716c53ea2 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1540,10 +1540,10 @@ static uchar ctype_utf8[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0
};
static uchar to_lower_utf8[] = {