summaryrefslogtreecommitdiff
path: root/mysys/charset.c
diff options
context:
space:
mode:
authorunknown <bar@bar.mysql.r18.ru>2003-03-14 18:08:12 +0400
committerunknown <bar@bar.mysql.r18.ru>2003-03-14 18:08:12 +0400
commitaeb47edbbc10addaf7b551b8f2de17142f3ad269 (patch)
treeda8de3c08ddcdfb12acc6f34445b02a681926a01 /mysys/charset.c
parent13d28097e7174546409f8757cbed937d9c1ab5a6 (diff)
downloadmariadb-git-aeb47edbbc10addaf7b551b8f2de17142f3ad269.tar.gz
Every charset now have its own parser state arrays
Diffstat (limited to 'mysys/charset.c')
-rw-r--r--mysys/charset.c64
1 files changed, 63 insertions, 1 deletions
diff --git a/mysys/charset.c b/mysys/charset.c
index 8bc250a3f07..3ad27469c03 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -29,7 +29,7 @@
- Initializing charset related structures
- Loading dynamic charsets
- Searching for a proper CHARSET_INFO
- using charset name, collation name or collatio ID
+ using charset name, collation name or collation ID
- Setting server default character set
*/
@@ -54,6 +54,62 @@ static void set_max_sort_char(CHARSET_INFO *cs)
}
+static void init_state_maps(CHARSET_INFO *cs)
+{
+ uint i;
+ uchar *state_map= cs->state_map;
+ uchar *ident_map= cs->ident_map;
+
+ /* Fill state_map with states to get a faster parser */
+ for (i=0; i < 256 ; i++)
+ {
+ if (my_isalpha(cs,i))
+ state_map[i]=(uchar) MY_LEX_IDENT;
+ else if (my_isdigit(cs,i))
+ state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
+#if defined(USE_MB) && defined(USE_MB_IDENT)
+ else if (use_mb(cs) && my_ismbhead(cs, i))
+ state_map[i]=(uchar) MY_LEX_IDENT;
+#endif
+ else if (!my_isgraph(cs,i))
+ state_map[i]=(uchar) MY_LEX_SKIP;
+ else
+ state_map[i]=(uchar) MY_LEX_CHAR;
+ }
+ state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;
+ state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;
+ state_map[(uchar)'-']=state_map[(uchar)'+']=(uchar) MY_LEX_SIGNED_NUMBER;
+ state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
+ state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
+ state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
+ state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
+ state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;
+ state_map[(uchar)';']=(uchar) MY_LEX_COLON;
+ state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
+ state_map[0]=(uchar) MY_LEX_EOL;
+ state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
+ state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
+ state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
+ state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
+ state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
+ state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
+
+ /*
+ Create a second map to make it faster to find identifiers
+ */
+ for (i=0; i < 256 ; i++)
+ {
+ ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
+ state_map[i] == MY_LEX_NUMBER_IDENT);
+ }
+
+ /* Special handling of hex and binary strings */
+ state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
+ state_map[(uchar)'b']= state_map[(uchar)'b']= (uchar) MY_LEX_IDENT_OR_BIN;
+
+
+}
+
static void simple_cs_init_functions(CHARSET_INFO *cs)
{
@@ -211,8 +267,11 @@ static void simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
to->name= my_once_strdup(from->name,MYF(MY_WME));
if (from->ctype)
+ {
to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
MY_CS_CTYPE_TABLE_SIZE, MYF(MY_WME));
+ init_state_maps(to);
+ }
if (from->to_lower)
to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
MY_CS_TO_LOWER_TABLE_SIZE, MYF(MY_WME));
@@ -447,7 +506,10 @@ static my_bool init_available_charsets(myf myflags)
for (cs=all_charsets; cs < all_charsets+255 ; cs++)
{
if (*cs)
+ {
set_max_sort_char(*cs);
+ init_state_maps(*cs);
+ }
}
strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);