diff options
-rw-r--r-- | client/mysqltest.cc | 56 | ||||
-rw-r--r-- | include/m_ctype.h | 21 | ||||
-rw-r--r-- | mysys/charset.c | 20 | ||||
-rw-r--r-- | sql/create_options.cc | 2 | ||||
-rw-r--r-- | sql/debug_sync.cc | 57 | ||||
-rw-r--r-- | sql/debug_sync.h | 3 | ||||
-rw-r--r-- | sql/sql_class.cc | 2 | ||||
-rw-r--r-- | sql/sys_vars.ic | 16 | ||||
-rw-r--r-- | storage/federated/ha_federated.cc | 3 | ||||
-rw-r--r-- | storage/federatedx/ha_federatedx.cc | 3 | ||||
-rw-r--r-- | storage/spider/spd_db_conn.cc | 2 | ||||
-rw-r--r-- | strings/ctype-big5.c | 7 | ||||
-rw-r--r-- | strings/ctype-bin.c | 8 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 7 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 7 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 7 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 7 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 6 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 1 | ||||
-rw-r--r-- | strings/ctype-mb.c | 2 | ||||
-rw-r--r-- | strings/ctype-simple.c | 8 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 7 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 1 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 34 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 7 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 86 | ||||
-rw-r--r-- | strings/my_strchr.c | 5 | ||||
-rw-r--r-- | strings/my_vsnprintf.c | 3 |
28 files changed, 118 insertions, 270 deletions
diff --git a/client/mysqltest.cc b/client/mysqltest.cc index f09ad3107cc..afb746cd62f 100644 --- a/client/mysqltest.cc +++ b/client/mysqltest.cc @@ -6571,37 +6571,35 @@ int read_line(char *buf, int size) if (!skip_char) { - /* Could be a multibyte character */ - /* This code is based on the code in "sql_load.cc" */ -#ifdef USE_MB - int charlen = my_mbcharlen(charset_info, (unsigned char) c); - /* We give up if multibyte character is started but not */ - /* completed before we pass buf_end */ - if ((charlen > 1) && (p + charlen) <= buf_end) + *p++= c; + if (use_mb(charset_info)) { - int i; - char* mb_start = p; - - *p++ = c; - - for (i= 1; i < charlen; i++) - { - c= my_getc(cur_file->file); - if (feof(cur_file->file)) - goto found_eof; - *p++ = c; - } - if (! my_ismbchar(charset_info, mb_start, p)) - { - /* It was not a multiline char, push back the characters */ - /* We leave first 'c', i.e. pretend it was a normal char */ - while (p-1 > mb_start) - my_ungetc(*--p); - } + const char *mb_start= p - 1; + /* Could be a multibyte character */ + /* See a similar code in "sql_load.cc" */ + for ( ; p < buf_end; ) + { + int charlen= my_charlen(charset_info, mb_start, p); + if (charlen > 0) + break; /* Full character */ + if (MY_CS_IS_TOOSMALL(charlen)) + { + /* We give up if multibyte character is started but not */ + /* completed before we pass buf_end */ + c= my_getc(cur_file->file); + if (feof(cur_file->file)) + goto found_eof; + *p++ = c; + continue; + } + DBUG_ASSERT(charlen == MY_CS_ILSEQ); + /* It was not a multiline char, push back the characters */ + /* We leave first 'c', i.e. pretend it was a normal char */ + while (p - 1 > mb_start) + my_ungetc(*--p); + break; + } } - else -#endif - *p++= c; } } die("The input buffer is too small for this query.x\n" \ diff --git a/include/m_ctype.h b/include/m_ctype.h index c892d576102..536d7fb4629 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -186,6 +186,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_SEQ_INTTAIL 1 #define MY_SEQ_SPACES 2 +#define MY_SEQ_NONSPACES 3 /* Skip non-space characters, including bad bytes */ /* My charsets_list flags */ #define MY_CS_COMPILED 1 /* compiled-in sets */ @@ -403,7 +404,6 @@ struct my_charset_handler_st { my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader); /* Multibyte routines */ - uint (*mbcharlen)(CHARSET_INFO *, uint c); size_t (*numchars)(CHARSET_INFO *, const char *b, const char *e); size_t (*charpos)(CHARSET_INFO *, const char *b, const char *e, size_t pos); @@ -779,7 +779,6 @@ size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs, size_t nchars, MY_STRCOPY_STATUS *status); int my_charlen_8bit(CHARSET_INFO *, const uchar *str, const uchar *end); -uint my_mbcharlen_8bit(CHARSET_INFO *, uint c); /* Functions for multibyte charsets */ @@ -1010,11 +1009,19 @@ int my_charlen(CHARSET_INFO *cs, const char *str, const char *end) return (cs->cset->charlen)(cs, (const uchar *) str, (const uchar *) end); } -#ifdef USE_MB -#define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a))) -#else -#define my_mbcharlen(s, a) 1 -#endif + + +/** + Convert broken and incomplete byte sequences to 1 byte. +*/ +static inline +uint my_charlen_fix(CHARSET_INFO *cs, const char *str, const char *end) +{ + int char_length= my_charlen(cs, str, end); + DBUG_ASSERT(str < end); + return char_length > 0 ? (uint) char_length : (uint) 1U; +} + #define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a))) #define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a))) diff --git a/mysys/charset.c b/mysys/charset.c index 3c134dc388e..e46fd16a5fb 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -54,6 +54,12 @@ get_collation_number_internal(const char *name) } +static my_bool is_multi_byte_ident(CHARSET_INFO *cs, uchar ch) +{ + int chlen= my_charlen(cs, (const char *) &ch, (const char *) &ch + 1); + return MY_CS_IS_TOOSMALL(chlen) ? TRUE : FALSE; +} + static my_bool init_state_maps(struct charset_info_st *cs) { uint i; @@ -73,10 +79,8 @@ static my_bool init_state_maps(struct charset_info_st *cs) state_map[i]=(uchar) MY_LEX_IDENT; else if (my_isdigit(cs,i)) state_map[i]=(uchar) MY_LEX_NUMBER_IDENT; -#if defined(USE_MB) && defined(USE_MB_IDENT) - else if (my_mbcharlen(cs, i)>1) + else if (is_multi_byte_ident(cs, i)) state_map[i]=(uchar) MY_LEX_IDENT; -#endif else if (my_isspace(cs,i)) state_map[i]=(uchar) MY_LEX_SKIP; else @@ -902,15 +906,13 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info, const char *to_start= to; const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); my_bool overflow= FALSE; -#ifdef USE_MB - my_bool use_mb_flag= use_mb(charset_info); -#endif for (end= from + length; from < end; from++) { char escape= 0; #ifdef USE_MB - int tmp_length; - if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) + int tmp_length= use_mb(charset_info) ? my_charlen(charset_info, from, end) : + 1; + if (tmp_length > 1) { if (to + tmp_length > to_end) { @@ -933,7 +935,7 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info, multi-byte character into a valid one. For example, 0xbf27 is not a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \) */ - if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1) + if (tmp_length < 1) /* Bad byte sequence */ escape= *from; else #endif diff --git a/sql/create_options.cc b/sql/create_options.cc index 66515be05b8..3011c4b2d7e 100644 --- a/sql/create_options.cc +++ b/sql/create_options.cc @@ -184,7 +184,7 @@ static bool set_one_value(ha_create_table_option *opt, { for (end=start; *end && *end != ','; - end+= my_mbcharlen(system_charset_info, *end)) /* no-op */; + end++) /* no-op */; if (!my_strnncoll(system_charset_info, (uchar*)start, end-start, (uchar*)value->str, value->length)) diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc index 8b3412eb732..b2a187b07cb 100644 --- a/sql/debug_sync.cc +++ b/sql/debug_sync.cc @@ -847,16 +847,16 @@ static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action) to the string terminator ASCII NUL ('\0'). */ -static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr) +static char *debug_sync_token(char **token_p, uint *token_length_p, + char *ptr, char *ptrend) { DBUG_ASSERT(token_p); DBUG_ASSERT(token_length_p); DBUG_ASSERT(ptr); /* Skip leading space */ - while (my_isspace(system_charset_info, *ptr)) - ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr); - + ptr+= system_charset_info->cset->scan(system_charset_info, + ptr, ptrend, MY_SEQ_SPACES); if (!*ptr) { ptr= NULL; @@ -867,8 +867,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr) *token_p= ptr; /* Find token end. */ - while (*ptr && !my_isspace(system_charset_info, *ptr)) - ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr); + ptr+= system_charset_info->cset->scan(system_charset_info, + ptr, ptrend, MY_SEQ_NONSPACES); /* Get token length. */ *token_length_p= ptr - *token_p; @@ -876,8 +876,9 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr) /* If necessary, terminate token. */ if (*ptr) { + DBUG_ASSERT(ptr < ptrend); /* Get terminator character length. */ - uint mbspacelen= my_mbcharlen(system_charset_info, (uchar) *ptr); + uint mbspacelen= my_charlen_fix(system_charset_info, ptr, ptrend); /* Terminate token. */ *ptr= '\0'; @@ -886,8 +887,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr) ptr+= mbspacelen; /* Skip trailing space */ - while (my_isspace(system_charset_info, *ptr)) - ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr); + ptr+= system_charset_info->cset->scan(system_charset_info, + ptr, ptrend, MY_SEQ_SPACES); } end: @@ -917,7 +918,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr) undefined in this case. */ -static char *debug_sync_number(ulong *number_p, char *actstrptr) +static char *debug_sync_number(ulong *number_p, char *actstrptr, + char *actstrend) { char *ptr; char *ept; @@ -927,7 +929,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr) DBUG_ASSERT(actstrptr); /* Get token from string. */ - if (!(ptr= debug_sync_token(&token, &token_length, actstrptr))) + if (!(ptr= debug_sync_token(&token, &token_length, actstrptr, actstrend))) goto end; *number_p= strtoul(token, &ept, 10); @@ -971,7 +973,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr) for the string. */ -static bool debug_sync_eval_action(THD *thd, char *action_str) +static bool debug_sync_eval_action(THD *thd, char *action_str, char *action_end) { st_debug_sync_action *action= NULL; const char *errmsg; @@ -986,7 +988,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) /* Get debug sync point name. Or a special command. */ - if (!(ptr= debug_sync_token(&token, &token_length, action_str))) + if (!(ptr= debug_sync_token(&token, &token_length, action_str, action_end))) { errmsg= "Missing synchronization point name"; goto err; @@ -1009,7 +1011,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) /* Get kind of action to be taken at sync point. */ - if (!(ptr= debug_sync_token(&token, &token_length, ptr))) + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) { /* No action present. Try special commands. Token unchanged. */ @@ -1090,7 +1092,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) if (!my_strcasecmp(system_charset_info, token, "SIGNAL")) { /* It is SIGNAL. Signal name must follow. */ - if (!(ptr= debug_sync_token(&token, &token_length, ptr))) + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) { errmsg= "Missing signal name after action SIGNAL"; goto err; @@ -1108,7 +1110,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) action->execute= 1; /* Get next token. If none follows, set action. */ - if (!(ptr= debug_sync_token(&token, &token_length, ptr))) + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) goto set_action; } @@ -1118,7 +1120,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) if (!my_strcasecmp(system_charset_info, token, "WAIT_FOR")) { /* It is WAIT_FOR. Wait_for signal name must follow. */ - if (!(ptr= debug_sync_token(&token, &token_length, ptr))) + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) { errmsg= "Missing signal name after action WAIT_FOR"; goto err; @@ -1137,7 +1139,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) action->timeout= opt_debug_sync_timeout; /* Get next token. If none follows, set action. */ - if (!(ptr= debug_sync_token(&token, &token_length, ptr))) + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) goto set_action; /* @@ -1146,14 +1148,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) if (!my_strcasecmp(system_charset_info, token, "TIMEOUT")) { /* It is TIMEOUT. Number must follow. */ - if (!(ptr= debug_sync_number(&action->timeout, ptr))) + if (!(ptr= debug_sync_number(&action->timeout, ptr, action_end))) { errmsg= "Missing valid number after TIMEOUT"; goto err; } /* Get next token. If none follows, set action. */ - if (!(ptr= debug_sync_token(&token, &token_length, ptr))) + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) goto set_action; } } @@ -1174,14 +1176,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) } /* Number must follow. */ - if (!(ptr= debug_sync_number(&action->execute, ptr))) + if (!(ptr= debug_sync_number(&action->execute, ptr, action_end))) { errmsg= "Missing valid number after EXECUTE"; goto err; } /* Get next token. If none follows, set action. */ - if (!(ptr= debug_sync_token(&token, &token_length, ptr))) + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) goto set_action; } @@ -1191,14 +1193,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) if (!my_strcasecmp(system_charset_info, token, "HIT_LIMIT")) { /* Number must follow. */ - if (!(ptr= debug_sync_number(&action->hit_limit, ptr))) + if (!(ptr= debug_sync_number(&action->hit_limit, ptr, action_end))) { errmsg= "Missing valid number after HIT_LIMIT"; goto err; } /* Get next token. If none follows, set action. */ - if (!(ptr= debug_sync_token(&token, &token_length, ptr))) + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) goto set_action; } @@ -1246,7 +1248,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str) terminators in the string. So we need to take a copy here. */ -bool debug_sync_update(THD *thd, char *val_str) +bool debug_sync_update(THD *thd, char *val_str, size_t len) { DBUG_ENTER("debug_sync_update"); DBUG_PRINT("debug_sync", ("set action: '%s'", val_str)); @@ -1255,8 +1257,9 @@ bool debug_sync_update(THD *thd, char *val_str) debug_sync_eval_action() places '\0' in the string, which itself must be '\0' terminated. */ + DBUG_ASSERT(val_str[len] == '\0'); DBUG_RETURN(opt_debug_sync_timeout ? - debug_sync_eval_action(thd, val_str) : + debug_sync_eval_action(thd, val_str, val_str + len) : FALSE); } @@ -1592,7 +1595,7 @@ bool debug_sync_set_action(THD *thd, const char *action_str, size_t len) DBUG_ASSERT(action_str); value= strmake_root(thd->mem_root, action_str, len); - rc= debug_sync_eval_action(thd, value); + rc= debug_sync_eval_action(thd, value, value + len); DBUG_RETURN(rc); } diff --git a/sql/debug_sync.h b/sql/debug_sync.h index bf1b3167dbc..339a21139b1 100644 --- a/sql/debug_sync.h +++ b/sql/debug_sync.h @@ -45,6 +45,9 @@ extern void debug_sync_init_thread(THD *thd); extern void debug_sync_end_thread(THD *thd); extern bool debug_sync_set_action(THD *thd, const char *action_str, size_t len); +extern bool debug_sync_update(THD *thd, char *val_str, size_t len); +extern uchar *debug_sync_value_ptr(THD *thd); + #endif /* defined(ENABLED_DEBUG_SYNC) */ #endif /* DEBUG_SYNC_INCLUDED */ diff --git a/sql/sql_class.cc b/sql/sql_class.cc index b1217cb1f9f..be950627f08 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -3235,7 +3235,7 @@ int select_export::send_data(List<Item> &items) if ((NEED_ESCAPING(*pos) || (check_second_byte && - my_mbcharlen(character_set_client, (uchar) *pos) == 2 && + ((uchar) *pos) > 0x7F /* a potential MB2HEAD */ && pos + 1 < end && NEED_ESCAPING(pos[1]))) && /* diff --git a/sql/sys_vars.ic b/sql/sys_vars.ic index 373f5834838..2488e804310 100644 --- a/sql/sys_vars.ic +++ b/sql/sys_vars.ic @@ -1434,6 +1434,9 @@ public: }; #if defined(ENABLED_DEBUG_SYNC) + +#include "debug_sync.h" + /** The class for @@debug_sync session-only variable */ @@ -1462,15 +1465,19 @@ public: String str(buff, sizeof(buff), system_charset_info), *res; if (!(res=var->value->val_str(&str))) - var->save_result.string_value.str= const_cast<char*>(""); + var->save_result.string_value= empty_lex_str; else - var->save_result.string_value.str= thd->strmake(res->ptr(), res->length()); + { + if (!thd->make_lex_string(&var->save_result.string_value, + res->ptr(), res->length())) + return true; + } return false; } bool session_update(THD *thd, set_var *var) { - extern bool debug_sync_update(THD *thd, char *val_str); - return debug_sync_update(thd, var->save_result.string_value.str); + return debug_sync_update(thd, var->save_result.string_value.str, + var->save_result.string_value.length); } bool global_update(THD *thd, set_var *var) { @@ -1488,7 +1495,6 @@ public: } uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { - extern uchar *debug_sync_value_ptr(THD *thd); return debug_sync_value_ptr(thd); } uchar *global_value_ptr(THD *thd, const LEX_STRING *base) diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index 478a8f1cfaa..2334848a50b 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -561,8 +561,7 @@ static bool append_ident(String *string, const char *name, size_t length, for (name_end= name+length; name < name_end; name+= clen) { uchar c= *(uchar *) name; - if (!(clen= my_mbcharlen(system_charset_info, c))) - clen= 1; + clen= my_charlen_fix(system_charset_info, name, name_end); if (clen == 1 && c == (uchar) quote_char && (result= string->append("e_char, 1, system_charset_info))) goto err; diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc index 890d1bff292..56d900c6720 100644 --- a/storage/federatedx/ha_federatedx.cc +++ b/storage/federatedx/ha_federatedx.cc @@ -500,8 +500,7 @@ bool append_ident(String *string, const char *name, uint length, for (name_end= name+length; name < name_end; name+= clen) { uchar c= *(uchar *) name; - if (!(clen= my_mbcharlen(system_charset_info, c))) - clen= 1; + clen= my_charlen_fix(system_charset_info, name, name_end); if (clen == 1 && c == (uchar) quote_char && (result= string->append("e_char, 1, system_charset_info))) goto err; diff --git a/storage/spider/spd_db_conn.cc b/storage/spider/spd_db_conn.cc index 69a05dc94fb..a65338c4910 100644 --- a/storage/spider/spd_db_conn.cc +++ b/storage/spider/spd_db_conn.cc @@ -1370,7 +1370,7 @@ int spider_db_append_name_with_quote_str( for (name_end = name + length; name < name_end; name += length) { head_code = *name; - if (!(length = my_mbcharlen(system_charset_info, (uchar) head_code))) + if ((length= my_charlen(system_charset_info, name, name_end)) < 1) { my_message(ER_SPIDER_WRONG_CHARACTER_IN_NAME_NUM, ER_SPIDER_WRONG_CHARACTER_IN_NAME_STR, MYF(0)); diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 962931913a2..9ae394eeedc 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -848,12 +848,6 @@ static uint16 big5strokexfrm(uint16 i) } -static uint mbcharlen_big5(CHARSET_INFO *cs __attribute__((unused)), uint c) -{ - return (isbig5head(c)? 2 : 1); -} - - /* page 0 0xA140-0xC7FC */ static const uint16 tab_big5_uni0[]={ 0x3000,0xFF0C,0x3001,0x3002,0xFF0E,0x2022,0xFF1B,0xFF1A, @@ -6731,7 +6725,6 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin= static MY_CHARSET_HANDLER my_charset_big5_handler= { NULL, /* init */ - mbcharlen_big5, my_numchars_mb, my_charpos_mb, my_well_formed_len_big5, diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 8331de34ee4..aab7f2b7448 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -225,13 +225,6 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)), } -uint my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)), - uint c __attribute__((unused))) -{ - return 1; -} - - static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)), my_wc_t *wc, const uchar *str, @@ -510,7 +503,6 @@ static MY_COLLATION_HANDLER my_collation_binary_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, my_well_formed_len_8bit, diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 2163662269d..151fac8a052 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -191,12 +191,6 @@ static const uchar sort_order_cp932[]= #include "ctype-mb.ic" -static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c) -{ - return (iscp932head((uchar) c) ? 2 : 1); -} - - #define cp932code(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d)) @@ -34687,7 +34681,6 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - mbcharlen_cp932, my_numchars_mb, my_charpos_mb, my_well_formed_len_cp932, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 19ed586ea49..d2389136c49 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -210,12 +210,6 @@ static const uchar sort_order_euc_kr[]= #include "ctype-mb.ic" -static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c) -{ - return (iseuc_kr_head(c) ? 2 : 1); -} - - static MY_UNICASE_CHARACTER cA3[256]= { {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */ @@ -9979,7 +9973,6 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - mbcharlen_euc_kr, my_numchars_mb, my_charpos_mb, my_well_formed_len_euckr, diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index 469d3a5be6c..caafd1d13b4 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -221,12 +221,6 @@ static const uchar sort_order_eucjpms[]= #include "strcoll.ic" -static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c) -{ - return (iseucjpms(c)? 2: iseucjpms_ss2(c)? 2: iseucjpms_ss3(c)? 3: 1); -} - - /* Case info pages for JIS-X-0208 range */ static MY_UNICASE_CHARACTER cA2[256]= @@ -67511,7 +67505,6 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - mbcharlen_eucjpms, my_numchars_mb, my_charpos_mb, my_well_formed_len_eucjpms, diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index a77237c1791..dbb92fabc4b 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -173,12 +173,6 @@ static const uchar sort_order_gb2312[]= #include "ctype-mb.ic" -static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c) -{ - return (isgb2312head(c)? 2 : 1); -} - - static MY_UNICASE_CHARACTER cA2[256]= { {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */ @@ -6385,7 +6379,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - mbcharlen_gb2312, my_numchars_mb, my_charpos_mb, my_well_formed_len_gb2312, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index e4e015a59d2..617d72dc4d9 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -3451,11 +3451,6 @@ static uint16 gbksortorder(uint16 i) } -static uint mbcharlen_gbk(CHARSET_INFO *cs __attribute__((unused)),uint c) -{ - return (isgbkhead(c)? 2 : 1); -} - /* page 0 0x8140-0xFE4F */ static const uint16 tab_gbk_uni0[]={ 0x4E02,0x4E04,0x4E05,0x4E06,0x4E0F,0x4E12,0x4E17,0x4E1F, @@ -10666,7 +10661,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - mbcharlen_gbk, my_numchars_mb, my_charpos_mb, my_well_formed_len_gbk, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index aba63d97abb..fe255349530 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -396,7 +396,6 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)), static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - my_mbcharlen_8bit, my_numchars_8bit, my_charpos_8bit, my_well_formed_len_8bit, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 3fa66cb0b2f..56b3309ca10 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -230,7 +230,7 @@ int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t) if (*s++ != *t++) return 1; } - else if (my_mbcharlen(cs, *t) > 1) + else if (my_charlen(cs, t, t + cs->mbmaxlen) > 1) return 1; else if (map[(uchar) *s++] != map[(uchar) *t++]) return 1; diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 5e5a345a638..f405c4f327b 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1059,6 +1059,13 @@ size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq) break; } return (size_t) (str - str0); + case MY_SEQ_NONSPACES: + for ( ; str < end ; str++) + { + if (my_isspace(cs, *str)) + break; + } + return (size_t) (str - str0); default: return 0; } @@ -1916,7 +1923,6 @@ my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs, MY_CHARSET_HANDLER my_charset_8bit_handler= { my_cset_init_8bit, - my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, my_well_formed_len_8bit, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index ebcea22d242..e0546146008 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -192,12 +192,6 @@ static const uchar sort_order_sjis[]= #include "ctype-mb.ic" -static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c) -{ - return (issjishead((uchar) c) ? 2 : 1); -} - - #define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d)) @@ -34066,7 +34060,6 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - mbcharlen_sjis, my_numchars_mb, my_charpos_mb, my_well_formed_len_sjis, diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 711bb21773e..82fd864b05d 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -834,7 +834,6 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, my_well_formed_len_8bit, diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 74e474cc28c..06dab08a772 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1049,6 +1049,9 @@ my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)), { } return (size_t) (str - str0); + case MY_SEQ_NONSPACES: + DBUG_ASSERT(0); /* Not implemented */ + /* pass through */ default: return 0; } @@ -1431,15 +1434,6 @@ my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end) /* Defines my_well_formed_char_length_utf16 */ -static uint -my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)), - uint c __attribute__((unused))) -{ - DBUG_ASSERT(0); - return MY_UTF16_HIGH_HEAD(c) ? 4 : 2; -} - - static size_t my_numchars_utf16(CHARSET_INFO *cs, const char *b, const char *e) @@ -1567,7 +1561,6 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = MY_CHARSET_HANDLER my_charset_utf16_handler= { NULL, /* init */ - my_mbcharlen_utf16, /* mbcharlen */ my_numchars_utf16, my_charpos_utf16, my_well_formed_len_utf16, @@ -1789,7 +1782,6 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = static MY_CHARSET_HANDLER my_charset_utf16le_handler= { NULL, /* init */ - my_mbcharlen_utf16, my_numchars_utf16, my_charpos_utf16, my_well_formed_len_utf16, @@ -2083,14 +2075,6 @@ my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)), /* Defines my_well_formed_char_length_utf32 */ -static uint -my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) , - uint c __attribute__((unused))) -{ - return 4; -} - - static int my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap) { @@ -2484,6 +2468,9 @@ my_scan_utf32(CHARSET_INFO *cs, str+= res; } return (size_t) (str - str0); + case MY_SEQ_NONSPACES: + DBUG_ASSERT(0); /* Not implemented */ + /* pass through */ default: return 0; } @@ -2525,7 +2512,6 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = MY_CHARSET_HANDLER my_charset_utf32_handler= { NULL, /* init */ - my_mbcharlen_utf32, my_numchars_utf32, my_charpos_utf32, my_well_formed_len_utf32, @@ -2862,13 +2848,6 @@ my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)), } -static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , - uint c __attribute__((unused))) -{ - return 2; -} - - static size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)), const char *b, const char *e) @@ -3003,7 +2982,6 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = MY_CHARSET_HANDLER my_charset_ucs2_handler= { NULL, /* init */ - my_mbcharlen_ucs2, /* mbcharlen */ my_numchars_ucs2, my_charpos_ucs2, my_well_formed_len_ucs2, diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index b24fdb3075f..786ae991aef 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -220,12 +220,6 @@ static const uchar sort_order_ujis[]= #include "strcoll.ic" -static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c) -{ - return (isujis(c)? 2: isujis_ss2(c)? 2: isujis_ss3(c)? 3: 1); -} - - static size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)), const char *str, const char *str_end) @@ -67255,7 +67249,6 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - mbcharlen_ujis, my_numchars_mb, my_charpos_mb, my_well_formed_len_ujis, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 3a5616b7323..b6a7a0d0347 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -5426,21 +5426,6 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #include "strcoll.ic" -static uint my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)), - uint c) -{ - if (c < 0x80) - return 1; - else if (c < 0xc2) - return 0; /* Illegal mb head */ - else if (c < 0xe0) - return 2; - else if (c < 0xf0) - return 3; - return 0; /* Illegal mb head */; -} - - static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler = { NULL, /* init */ @@ -5491,7 +5476,6 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler = MY_CHARSET_HANDLER my_charset_utf8_handler= { NULL, /* init */ - my_mbcharlen_utf8, my_numchars_mb, my_charpos_mb, my_well_formed_len_utf8, @@ -7045,7 +7029,6 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = static MY_CHARSET_HANDLER my_charset_filename_handler= { NULL, /* init */ - my_mbcharlen_utf8, my_numchars_mb, my_charpos_mb, my_well_formed_len_mb, @@ -7111,57 +7094,6 @@ struct charset_info_st my_charset_filename= }; -#ifdef MY_TEST_UTF8 -#include <stdio.h> - -static void test_mb(CHARSET_INFO *cs, uchar *s) -{ - while(*s) - { - if (my_ismbhead_utf8(cs,*s)) - { - uint len=my_mbcharlen_utf8(cs,*s); - while(len--) - { - printf("%c",*s); - s++; - } - printf("\n"); - } - else - { - printf("%c\n",*s); - s++; - } - } -} - -int main() -{ - char str[1024]=" utf8 test проба ПЕРА по-РУССКИ"; - CHARSET_INFO *cs; - - test_mb(cs,(uchar*)str); - - printf("orig :'%s'\n",str); - - my_caseup_utf8(cs,str,15); - printf("caseup :'%s'\n",str); - - my_caseup_str_utf8(cs,str); - printf("caseup_str:'%s'\n",str); - - my_casedn_utf8(cs,str,15); - printf("casedn :'%s'\n",str); - - my_casedn_str_utf8(cs,str); - printf("casedn_str:'%s'\n",str); - - return 0; -} - -#endif - #endif /* HAVE_CHARSET_UTF8 */ @@ -7755,23 +7687,6 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs, #include "strcoll.ic" -static uint -my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c) -{ - if (c < 0x80) - return 1; - if (c < 0xc2) - return 0; /* Illegal mb head */ - if (c < 0xe0) - return 2; - if (c < 0xf0) - return 3; - if (c < 0xf8) - return 4; - return 0; /* Illegal mb head */; -} - - static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler= { NULL, /* init */ @@ -7807,7 +7722,6 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler = MY_CHARSET_HANDLER my_charset_utf8mb4_handler= { NULL, /* init */ - my_mbcharlen_utf8mb4, my_numchars_mb, my_charpos_mb, my_well_formed_len_utf8mb4, diff --git a/strings/my_strchr.c b/strings/my_strchr.c index 0305ef89542..2365731fa03 100644 --- a/strings/my_strchr.c +++ b/strings/my_strchr.c @@ -38,7 +38,7 @@ const char *acc_end= (ACC) + (LEN); \ for (ptr_str= (STR) ; ptr_str < (END) ; ptr_str+= mbl) \ { \ - mbl= my_mbcharlen((CS), *(uchar*)ptr_str); \ + mbl= my_charlen_fix((CS), ptr_str, (END)); \ if (mbl < 2) \ { \ DBUG_ASSERT(mbl == 1); \ @@ -63,10 +63,9 @@ end: \ char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end, pchar c) { - uint mbl; while (str < end) { - mbl= my_mbcharlen(cs, *(uchar *)str); + uint mbl= my_ismbchar(cs, str, end); if (mbl < 2) { if (*str == c) diff --git a/strings/my_vsnprintf.c b/strings/my_vsnprintf.c index 4178b20789d..75514a90925 100644 --- a/strings/my_vsnprintf.c +++ b/strings/my_vsnprintf.c @@ -168,8 +168,7 @@ static char *backtick_string(CHARSET_INFO *cs, char *to, const char *end, for ( ; par < par_end; par+= char_len) { uchar c= *(uchar *) par; - if (!(char_len= my_mbcharlen(cs, c))) - char_len= 1; + char_len= my_charlen_fix(cs, par, par_end); if (char_len == 1 && c == (uchar) quote_char ) { if (start + 1 >= end) |