diff options
author | unknown <serg@serg.mylan> | 2003-10-20 15:53:48 +0200 |
---|---|---|
committer | unknown <serg@serg.mylan> | 2003-10-20 15:53:48 +0200 |
commit | 228f4a43a353e9e7c56e1a617749fc9c0e875f6d (patch) | |
tree | ff0f9b71c43cfc4ed4e2fa91a8398677dae7377a /strings | |
parent | 4a253d2af04869a3e28831b71ddbc5d78279fe5e (diff) | |
download | mariadb-git-228f4a43a353e9e7c56e1a617749fc9c0e875f6d.tar.gz |
FULLTEXT: correct charset support (UTF included, UCS2 - not)
code cleanup
include/m_ctype.h:
my_mbcharlen_8bit() { return 1 }
mysql-test/r/fulltext.result:
fulltext on UTF
mysql-test/t/fulltext.test:
fulltext on UTF
sql/item_cmpfunc.h:
cleanup
sql/sql_table.cc:
FULLTEXT: UCS2 is not allowed
sql/sql_yacc.yy:
FULLTEXT: code cleanup
strings/ctype-bin.c:
my_mbcharlen_8bit() { return 1 }
strings/ctype-latin1.c:
my_mbcharlen_8bit() { return 1 }
strings/ctype-simple.c:
my_mbcharlen_8bit() { return 1 }
strings/ctype-tis620.c:
my_mbcharlen_8bit() { return 1 }
strings/ctype-utf8.c:
hack: (to be fixed properly later) all multi-byte sequences are considered isalpha() now
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-bin.c | 26 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 4 | ||||
-rw-r--r-- | strings/ctype-simple.c | 2 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 2 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 8 |
5 files changed, 24 insertions, 18 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 340084ad848..cd1b1399506 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -118,6 +118,12 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)), return strcmp(s,t); } +int my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)), + uint c __attribute__((unused))) +{ + return 1; +} + static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)), my_wc_t *wc, const unsigned char *str, @@ -264,12 +270,12 @@ static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)), static uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)), - const char *big, uint b_length, + const char *big, uint b_length, const char *small, uint s_length, my_match_t *match, uint nmatch) { register const uchar *str, *search, *end, *search_end; - + if (s_length <= b_length) { if (!s_length) @@ -282,32 +288,32 @@ uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)), } return 1; /* Empty string is always found */ } - + str= (const uchar*) big; search= (const uchar*) small; end= (const uchar*) big+b_length-s_length+1; search_end= (const uchar*) small + s_length; - + skipp: while (str != end) { if ( (*str++) == (*search)) { register const uchar *i,*j; - - i= str; + + i= str; j= search+1; - + while (j != search_end) if ((*i++) != (*j++)) goto skipp; - + if (nmatch > 0) { match[0].beg= 0; match[0].end= str- (const uchar*)big-1; match[0].mblen= match[0].end; - + if (nmatch > 1) { match[1].beg= match[0].end; @@ -338,7 +344,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* ismbchar */ - NULL, /* mbcharlen */ + my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, my_lengthsp_8bit, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index a8a5329f844..15798abb85b 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -170,14 +170,14 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)), { if (str >= end) return MY_CS_TOOSMALL; - + return ((wc < 256) && ((str[0]=uni_latin1[wc]) || (!wc))) ? 1 : MY_CS_ILUNI; } static MY_CHARSET_HANDLER my_charset_handler= { NULL, - NULL, + my_mbcharlen_8bit, my_numchars_8bit, my_charpos_8bit, my_lengthsp_8bit, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index f85ce5e7a2b..ed1d2c77049 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1093,7 +1093,7 @@ skipp: MY_CHARSET_HANDLER my_charset_8bit_handler= { NULL, /* ismbchar */ - NULL, /* mbcharlen */ + my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, my_lengthsp_8bit, diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index a4d8a7d1f79..fd5e58ad8a7 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -717,7 +717,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* ismbchar */ - NULL, /* mbcharlen */ + my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, my_lengthsp_8bit, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 3ede1aa26f6..b5716c53ea2 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1540,10 +1540,10 @@ static uchar ctype_utf8[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0 }; static uchar to_lower_utf8[] = { |