diff options
author | unknown <bar@bar.mysql.r18.ru> | 2003-03-14 18:08:12 +0400 |
---|---|---|
committer | unknown <bar@bar.mysql.r18.ru> | 2003-03-14 18:08:12 +0400 |
commit | aeb47edbbc10addaf7b551b8f2de17142f3ad269 (patch) | |
tree | da8de3c08ddcdfb12acc6f34445b02a681926a01 /mysys/charset.c | |
parent | 13d28097e7174546409f8757cbed937d9c1ab5a6 (diff) | |
download | mariadb-git-aeb47edbbc10addaf7b551b8f2de17142f3ad269.tar.gz |
Every charset now have its own parser state arrays
Diffstat (limited to 'mysys/charset.c')
-rw-r--r-- | mysys/charset.c | 64 |
1 files changed, 63 insertions, 1 deletions
diff --git a/mysys/charset.c b/mysys/charset.c index 8bc250a3f07..3ad27469c03 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -29,7 +29,7 @@ - Initializing charset related structures - Loading dynamic charsets - Searching for a proper CHARSET_INFO - using charset name, collation name or collatio ID + using charset name, collation name or collation ID - Setting server default character set */ @@ -54,6 +54,62 @@ static void set_max_sort_char(CHARSET_INFO *cs) } +static void init_state_maps(CHARSET_INFO *cs) +{ + uint i; + uchar *state_map= cs->state_map; + uchar *ident_map= cs->ident_map; + + /* Fill state_map with states to get a faster parser */ + for (i=0; i < 256 ; i++) + { + if (my_isalpha(cs,i)) + state_map[i]=(uchar) MY_LEX_IDENT; + else if (my_isdigit(cs,i)) + state_map[i]=(uchar) MY_LEX_NUMBER_IDENT; +#if defined(USE_MB) && defined(USE_MB_IDENT) + else if (use_mb(cs) && my_ismbhead(cs, i)) + state_map[i]=(uchar) MY_LEX_IDENT; +#endif + else if (!my_isgraph(cs,i)) + state_map[i]=(uchar) MY_LEX_SKIP; + else + state_map[i]=(uchar) MY_LEX_CHAR; + } + state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT; + state_map[(uchar)'\'']=(uchar) MY_LEX_STRING; + state_map[(uchar)'-']=state_map[(uchar)'+']=(uchar) MY_LEX_SIGNED_NUMBER; + state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT; + state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP; + state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP; + state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL; + state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT; + state_map[(uchar)';']=(uchar) MY_LEX_COLON; + state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR; + state_map[0]=(uchar) MY_LEX_EOL; + state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE; + state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT; + state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT; + state_map[(uchar)'@']= (uchar) MY_LEX_USER_END; + state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER; + state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER; + + /* + Create a second map to make it faster to find identifiers + */ + for (i=0; i < 256 ; i++) + { + ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT || + state_map[i] == MY_LEX_NUMBER_IDENT); + } + + /* Special handling of hex and binary strings */ + state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX; + state_map[(uchar)'b']= state_map[(uchar)'b']= (uchar) MY_LEX_IDENT_OR_BIN; + + +} + static void simple_cs_init_functions(CHARSET_INFO *cs) { @@ -211,8 +267,11 @@ static void simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) to->name= my_once_strdup(from->name,MYF(MY_WME)); if (from->ctype) + { to->ctype= (uchar*) my_once_memdup((char*) from->ctype, MY_CS_CTYPE_TABLE_SIZE, MYF(MY_WME)); + init_state_maps(to); + } if (from->to_lower) to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower, MY_CS_TO_LOWER_TABLE_SIZE, MYF(MY_WME)); @@ -447,7 +506,10 @@ static my_bool init_available_charsets(myf myflags) for (cs=all_charsets; cs < all_charsets+255 ; cs++) { if (*cs) + { set_max_sort_char(*cs); + init_state_maps(*cs); + } } strmov(get_charsets_dir(fname), MY_CHARSET_INDEX); |