summaryrefslogtreecommitdiff
path: root/sql/sql_lex.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_lex.cc')
-rw-r--r--sql/sql_lex.cc810
1 files changed, 810 insertions, 0 deletions
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
new file mode 100644
index 00000000000..50c9ab852c1
--- /dev/null
+++ b/sql/sql_lex.cc
@@ -0,0 +1,810 @@
+/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+
+/* A lexical scanner on a temporary buffer with a yacc interface */
+
+#include "mysql_priv.h"
+#include "item_create.h"
+#include <m_ctype.h>
+#include <hash.h>
+
+LEX_STRING tmp_table_alias= {(char*) "tmp-table",8};
+
+/* Macros to look like lex */
+
+#define yyGet() *(lex->ptr++)
+#define yyGetLast() lex->ptr[-1]
+#define yyPeek() lex->ptr[0]
+#define yyPeek2() lex->ptr[1]
+#define yyUnget() lex->ptr--
+#define yySkip() lex->ptr++
+#define yyLength() ((uint) (lex->ptr - lex->tok_start)-1)
+
+#if MYSQL_VERSION_ID < 32300
+#define FLOAT_NUM REAL_NUM
+#endif
+
+pthread_key(LEX*,THR_LEX);
+
+#define TOCK_NAME_LENGTH 24
+
+/*
+ The following is based on the latin1 character set, and is only
+ used when comparing keywords
+*/
+
+uchar to_upper_lex[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255
+};
+
+inline int lex_casecmp(const char *s, const char *t, uint len)
+{
+ while (len-- != 0 &&
+ to_upper_lex[(uchar) *s++] == to_upper_lex[(uchar) *t++]) ;
+ return (int) len+1;
+}
+
+#include "lex_hash.h"
+
+static uchar state_map[256];
+
+
+void lex_init(void)
+{
+ uint i;
+ DBUG_ENTER("lex_init");
+ for (i=0 ; i < array_elements(symbols) ; i++)
+ symbols[i].length=(uchar) strlen(symbols[i].name);
+ for (i=0 ; i < array_elements(sql_functions) ; i++)
+ sql_functions[i].length=(uchar) strlen(sql_functions[i].name);
+
+ VOID(pthread_key_create(&THR_LEX,NULL));
+
+ /* Fill state_map with states to get a faster parser */
+ for (i=0; i < 256 ; i++)
+ {
+ if (isalpha(i))
+ state_map[i]=(uchar) STATE_IDENT;
+ else if (isdigit(i))
+ state_map[i]=(uchar) STATE_NUMBER_IDENT;
+#if defined(USE_MB) && defined(USE_MB_IDENT)
+ else if (use_mb(default_charset_info) && my_ismbhead(default_charset_info, i))
+ state_map[i]=(uchar) STATE_IDENT;
+#endif
+ else if (!isgraph(i))
+ state_map[i]=(uchar) STATE_SKIP;
+ else
+ state_map[i]=(uchar) STATE_CHAR;
+ }
+ state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) STATE_IDENT;
+ state_map[(uchar)'\'']=state_map[(uchar)'"']=(uchar) STATE_STRING;
+ state_map[(uchar)'-']=state_map[(uchar)'+']=(uchar) STATE_SIGNED_NUMBER;
+ state_map[(uchar)'.']=(uchar) STATE_REAL_OR_POINT;
+ state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) STATE_CMP_OP;
+ state_map[(uchar)'<']= (uchar) STATE_LONG_CMP_OP;
+ state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) STATE_BOOL;
+ state_map[(uchar)'#']=(uchar) STATE_COMMENT;
+ state_map[(uchar)';']=(uchar) STATE_COLON;
+ state_map[(uchar)':']=(uchar) STATE_SET_VAR;
+ state_map[0]=(uchar) STATE_EOL;
+ state_map[(uchar)'\\']= (uchar) STATE_ESCAPE;
+ state_map[(uchar)'/']= (uchar) STATE_LONG_COMMENT;
+ state_map[(uchar)'*']= (uchar) STATE_END_LONG_COMMENT;
+ state_map[(uchar)'@']= (uchar) STATE_USER_END;
+ state_map[(uchar) '`']= (uchar) STATE_USER_VARIABLE_DELIMITER;
+ if (thd_startup_options & OPTION_ANSI_MODE)
+ {
+ state_map[(uchar) '"'] = STATE_USER_VARIABLE_DELIMITER;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+void lex_free(void)
+{ // Call this when daemon ends
+ DBUG_ENTER("lex_free");
+ DBUG_VOID_RETURN;
+}
+
+
+LEX *lex_start(THD *thd, uchar *buf,uint length)
+{
+ LEX *lex= &thd->lex;
+ lex->next_state=STATE_START;
+ lex->end_of_query=(lex->ptr=buf)+length;
+ lex->yylineno = 1;
+ lex->create_refs=lex->in_comment=0;
+ lex->length=0;
+ lex->in_sum_expr=0;
+ lex->expr_list.empty();
+ lex->ftfunc_list.empty();
+ lex->convert_set=(lex->thd=thd)->convert_set;
+ lex->yacc_yyss=lex->yacc_yyvs=0;
+ lex->ignore_space=test(thd->client_capabilities & CLIENT_IGNORE_SPACE);
+ return lex;
+}
+
+void lex_end(LEX *lex)
+{
+ lex->expr_list.delete_elements(); // If error when parsing sql-varargs
+ x_free(lex->yacc_yyss);
+ x_free(lex->yacc_yyvs);
+}
+
+
+static int find_keyword(LEX *lex, uint len, bool function)
+{
+ uchar *tok=lex->tok_start;
+
+ SYMBOL *symbol = get_hash_symbol((const char *)tok,len,function);
+ if (symbol)
+ {
+ lex->yylval->symbol.symbol=symbol;
+ lex->yylval->symbol.str= (char*) tok;
+ lex->yylval->symbol.length=len;
+ return symbol->tok;
+ }
+#ifdef HAVE_DLOPEN
+ udf_func *udf;
+ if (function && using_udf_functions && (udf=find_udf((char*) tok, len)))
+ {
+ switch (udf->returns) {
+ case STRING_RESULT:
+ lex->yylval->udf=udf;
+ return (udf->type == UDFTYPE_FUNCTION) ? UDF_CHAR_FUNC : UDA_CHAR_SUM;
+ case REAL_RESULT:
+ lex->yylval->udf=udf;
+ return (udf->type == UDFTYPE_FUNCTION) ? UDF_FLOAT_FUNC : UDA_FLOAT_SUM;
+ case INT_RESULT:
+ lex->yylval->udf=udf;
+ return (udf->type == UDFTYPE_FUNCTION) ? UDF_INT_FUNC : UDA_INT_SUM;
+ }
+ }
+#endif
+ return 0;
+}
+
+
+/* make a copy of token before ptr and set yytoklen */
+
+static inline LEX_STRING get_token(LEX *lex,uint length)
+{
+ LEX_STRING tmp;
+ yyUnget(); // ptr points now after last token char
+ tmp.length=lex->yytoklen=length;
+ tmp.str=(char*) sql_strmake((char*) lex->tok_start,tmp.length);
+ return tmp;
+}
+
+/* Return an unescaped text literal without quotes */
+/* Fix sometimes to do only one scan of the string */
+
+static char *get_text(LEX *lex)
+{
+ reg1 uchar c,sep;
+ uint found_escape=0;
+
+ sep= yyGetLast(); // String should end with this
+ //lex->tok_start=lex->ptr-1; // Remember '
+ while (lex->ptr != lex->end_of_query)
+ {
+ c = yyGet();
+#ifdef USE_MB
+ int l;
+ if (use_mb(default_charset_info) &&
+ (l = my_ismbchar(default_charset_info,
+ (const char *)lex->ptr-1,
+ (const char *)lex->end_of_query))) {
+ lex->ptr += l-1;
+ continue;
+ }
+#endif
+ if (c == '\\')
+ { // Escaped character
+ found_escape=1;
+ if (lex->ptr == lex->end_of_query)
+ return 0;
+ yySkip();
+ }
+ else if (c == sep)
+ {
+ if (c == yyGet()) // Check if two separators in a row
+ {
+ found_escape=1; // dupplicate. Remember for delete
+ continue;
+ }
+ else
+ yyUnget();
+
+ /* Found end. Unescape and return string */
+ uchar *str,*end,*start;
+
+ str=lex->tok_start+1;
+ end=lex->ptr-1;
+ start=(uchar*) sql_alloc((uint) (end-str)+1);
+ if (!found_escape)
+ {
+ lex->yytoklen=(uint) (end-str);
+ memcpy(start,str,lex->yytoklen);
+ start[lex->yytoklen]=0;
+ }
+ else
+ {
+ uchar *to;
+ for (to=start ; str != end ; str++)
+ {
+#ifdef USE_MB
+ int l;
+ if (use_mb(default_charset_info) &&
+ (l = my_ismbchar(default_charset_info,
+ (const char *)str, (const char *)end))) {
+ while (l--)
+ *to++ = *str++;
+ str--;
+ continue;
+ }
+#endif
+ if (*str == '\\' && str+1 != end)
+ {
+ switch(*++str) {
+ case 'n':
+ *to++='\n';
+ break;
+ case 't':
+ *to++= '\t';
+ break;
+ case 'r':
+ *to++ = '\r';
+ break;
+ case 'b':
+ *to++ = '\b';
+ break;
+ case '0':
+ *to++= 0; // Ascii null
+ break;
+ case 'Z': // ^Z must be escaped on Win32
+ *to++='\032';
+ break;
+ case '_':
+ case '%':
+ *to++= '\\'; // remember prefix for wildcard
+ /* Fall through */
+ default:
+ *to++ = *str;
+ break;
+ }
+ }
+ else if (*str == sep)
+ *to++= *str++; // Two ' or "
+ else
+ *to++ = *str;
+
+ }
+ *to=0;
+ lex->yytoklen=(uint) (to-start);
+ }
+ if (lex->convert_set)
+ lex->convert_set->convert((char*) start,lex->yytoklen);
+ return (char*) start;
+ }
+ }
+ return 0; // unexpected end of query
+}
+
+
+/*
+** Calc type of integer; long integer, longlong integer or real.
+** Returns smallest type that match the string.
+** When using unsigned long long values the result is converted to a real
+** because else they will be unexpected sign changes because all calculation
+** is done with longlong or double.
+*/
+
+static const char *long_str="2147483647";
+static const uint long_len=10;
+static const char *signed_long_str="-2147483648";
+static const char *longlong_str="9223372036854775807";
+static const uint longlong_len=19;
+static const char *signed_longlong_str="-9223372036854775808";
+static const uint signed_longlong_len=19;
+
+
+inline static uint int_token(const char *str,uint length)
+{
+ if (length < long_len) // quick normal case
+ return NUM;
+ bool neg=0;
+
+ if (*str == '+') // Remove sign and pre-zeros
+ {
+ str++; length--;
+ }
+ else if (*str == '-')
+ {
+ str++; length--;
+ neg=1;
+ }
+ while (*str == '0' && length)
+ {
+ str++; length --;
+ }
+ if (length < long_len)
+ return NUM;
+
+ uint smaller,bigger;
+ const char *cmp;
+ if (neg)
+ {
+ if (length == long_len)
+ {
+ cmp= signed_long_str+1;
+ smaller=NUM; // If <= signed_long_str
+ bigger=LONG_NUM; // If >= signed_long_str
+ }
+ else if (length < signed_longlong_len)
+ return LONG_NUM;
+ else if (length > signed_longlong_len)
+ return REAL_NUM;
+ else
+ {
+ cmp=signed_longlong_str+1;
+ smaller=LONG_NUM; // If <= signed_longlong_str
+ bigger=REAL_NUM;
+ }
+ }
+ else
+ {
+ if (length == long_len)
+ {
+ cmp= long_str;
+ smaller=NUM;
+ bigger=LONG_NUM;
+ }
+ else if (length < longlong_len)
+ return LONG_NUM;
+ else if (length > longlong_len)
+ return REAL_NUM;
+ else
+ {
+ cmp=longlong_str;
+ smaller=LONG_NUM;
+ bigger=REAL_NUM;
+ }
+ }
+ while (*cmp && *cmp++ == *str++) ;
+ return ((uchar) str[-1] <= (uchar) cmp[-1]) ? smaller : bigger;
+}
+
+
+// yylex remember the following states from the following yylex()
+// STATE_EOQ ; found end of query
+// STATE_OPERATOR_OR_IDENT ; last state was an ident, text or number
+// (which can't be followed by a signed number)
+
+int yylex(void *arg)
+{
+ reg1 uchar c;
+ int tokval;
+ uint length;
+ enum lex_states state,prev_state;
+ LEX *lex=current_lex;
+ YYSTYPE *yylval=(YYSTYPE*) arg;
+
+ lex->yylval=yylval; // The global state
+ lex->tok_start=lex->tok_end=lex->ptr;
+ prev_state=state=lex->next_state;
+ lex->next_state=STATE_OPERATOR_OR_IDENT;
+ LINT_INIT(c);
+ for (;;)
+ {
+ switch(state) {
+ case STATE_OPERATOR_OR_IDENT: // Next is operator or keyword
+ case STATE_START: // Start of token
+ // Skipp startspace
+ for (c=yyGet() ; (state_map[c] == STATE_SKIP) ; c= yyGet())
+ {
+ if (c == '\n')
+ lex->yylineno++;
+ }
+ lex->tok_start=lex->ptr-1; // Start of real token
+ state= (enum lex_states) state_map[c];
+ break;
+ case STATE_ESCAPE:
+ if (yyGet() == 'N')
+ { // Allow \N as shortcut for NULL
+ yylval->lex_str.str=(char*) "\\N";
+ yylval->lex_str.length=2;
+ return NULL_SYM;
+ }
+ case STATE_CHAR: // Unknown or single char token
+ case STATE_SKIP: // This should not happen
+ yylval->lex_str.str=(char*) (lex->ptr=lex->tok_start);// Set to first char
+ yylval->lex_str.length=1;
+ c=yyGet();
+ if (c != ')')
+ lex->next_state= STATE_START; // Allow signed numbers
+ if (c == ',')
+ lex->tok_start=lex->ptr; // Let tok_start point at next item
+ return((int) c);
+
+ case STATE_IDENT: // Incomplete keyword or ident
+#if defined(USE_MB) && defined(USE_MB_IDENT)
+ if (use_mb(default_charset_info))
+ {
+ if (my_ismbhead(default_charset_info, yyGetLast()))
+ {
+ int l = my_ismbchar(default_charset_info,
+ (const char *)lex->ptr-1,
+ (const char *)lex->end_of_query);
+ if (l == 0) {
+ state = STATE_CHAR;
+ continue;
+ }
+ lex->ptr += l - 1;
+ }
+ while (state_map[c=yyGet()] == STATE_IDENT ||
+ state_map[c] == STATE_NUMBER_IDENT)
+ {
+ if (my_ismbhead(default_charset_info, c))
+ {
+ int l;
+ if ((l = my_ismbchar(default_charset_info,
+ (const char *)lex->ptr-1,
+ (const char *)lex->end_of_query)) == 0)
+ break;
+ lex->ptr += l-1;
+ }
+ }
+ }
+ else
+#endif
+ while (state_map[c=yyGet()] == STATE_IDENT ||
+ state_map[c] == STATE_NUMBER_IDENT) ;
+ length= (uint) (lex->ptr - lex->tok_start)-1;
+ if (lex->ignore_space)
+ {
+ for ( ; state_map[c] == STATE_SKIP ; c= yyGet());
+ }
+ if (c == '.' && (state_map[yyPeek()] == STATE_IDENT ||
+ state_map[yyPeek()] == STATE_NUMBER_IDENT))
+ lex->next_state=STATE_IDENT_SEP;
+ else
+ { // '(' must follow directly if function
+ yyUnget();
+ if ((tokval = find_keyword(lex,length,c == '(')))
+ {
+ lex->next_state= STATE_START; // Allow signed numbers
+ return(tokval); // Was keyword
+ }
+ yySkip(); // next state does a unget
+ }
+ yylval->lex_str=get_token(lex,length);
+ return(IDENT);
+
+ case STATE_IDENT_SEP: // Found ident and now '.'
+ lex->next_state=STATE_IDENT_START;// Next is an ident (not a keyword)
+ yylval->lex_str.str=(char*) lex->ptr;
+ yylval->lex_str.length=1;
+ c=yyGet(); // should be '.'
+ return((int) c);
+
+ case STATE_NUMBER_IDENT: // number or ident which starts with num
+ while (isdigit((c = yyGet()))) ;
+ if (state_map[c] != STATE_IDENT)
+ { // Can't be identifier
+ state=STATE_INT_OR_REAL;
+ break;
+ }
+ if (c == 'e' || c == 'E')
+ {
+ if ((c=(yyGet())) == '+' || c == '-')
+ { // Allow 1E+10
+ if (isdigit(yyPeek())) // Number must have digit after sign
+ {
+ yySkip();
+ while (isdigit(yyGet())) ;
+ yylval->lex_str=get_token(lex,yyLength());
+ return(FLOAT_NUM);
+ }
+ }
+ yyUnget(); /* purecov: inspected */
+ }
+ else if (c == 'x' && (lex->ptr - lex->tok_start) == 2 &&
+ lex->tok_start[0] == '0' )
+ { // Varbinary
+ while (isxdigit((c = yyGet()))) ;
+ if ((lex->ptr - lex->tok_start) >= 4)
+ {
+ yylval->lex_str=get_token(lex,yyLength());
+ yylval->lex_str.str+=2; // Skipp 0x
+ yylval->lex_str.length-=2;
+ lex->yytoklen-=2;
+ return (HEX_NUM);
+ }
+ yyUnget();
+ }
+ // fall through
+ case STATE_IDENT_START: // Incomplete ident
+#if defined(USE_MB) && defined(USE_MB_IDENT)
+ if (use_mb(default_charset_info))
+ {
+ if (my_ismbhead(default_charset_info, yyGetLast()))
+ {
+ int l = my_ismbchar(default_charset_info,
+ (const char *)lex->ptr-1,
+ (const char *)lex->end_of_query);
+ if (l == 0)
+ {
+ state = STATE_CHAR;
+ continue;
+ }
+ lex->ptr += l - 1;
+ }
+ while (state_map[c=yyGet()] == STATE_IDENT ||
+ state_map[c] == STATE_NUMBER_IDENT)
+ {
+ if (my_ismbhead(default_charset_info, c))
+ {
+ int l;
+ if ((l = my_ismbchar(default_charset_info,
+ (const char *)lex->ptr-1,
+ (const char *)lex->end_of_query)) == 0)
+ break;
+ lex->ptr += l-1;
+ }
+ }
+ }
+ else
+#endif
+ while (state_map[c = yyGet()] == STATE_IDENT ||
+ state_map[c] == STATE_NUMBER_IDENT) ;
+
+ if (c == '.' && (state_map[yyPeek()] == STATE_IDENT ||
+ state_map[yyPeek()] == STATE_NUMBER_IDENT))
+ lex->next_state=STATE_IDENT_SEP;// Next is '.'
+ // fall through
+
+ case STATE_FOUND_IDENT: // Complete ident
+ yylval->lex_str=get_token(lex,yyLength());
+ return(IDENT);
+
+ case STATE_USER_VARIABLE_DELIMITER:
+ lex->tok_start=lex->ptr; // Skipp first `
+ while ((c=yyGet()) && state_map[c] != STATE_USER_VARIABLE_DELIMITER &&
+ c != (uchar) NAMES_SEP_CHAR) ;
+ yylval->lex_str=get_token(lex,yyLength());
+ if (state_map[c] == STATE_USER_VARIABLE_DELIMITER)
+ yySkip(); // Skipp end `
+ return(IDENT);
+
+ case STATE_SIGNED_NUMBER: // Incomplete signed number
+ if (prev_state == STATE_OPERATOR_OR_IDENT)
+ {
+ if (c == '-' && yyPeek() == '-' && isspace(yyPeek2()))
+ state=STATE_COMMENT;
+ else
+ state= STATE_CHAR; // Must be operator
+ break;
+ }
+ if (!isdigit(c=yyGet()) || yyPeek() == 'x')
+ {
+ if (c != '.')
+ {
+ if (c == '-' && isspace(yyPeek()))
+ state=STATE_COMMENT;
+ else
+ state = STATE_CHAR; // Return sign as single char
+ break;
+ }
+ yyUnget(); // Fix for next loop
+ }
+ while (isdigit(c=yyGet())) ; // Incomplete real or int number
+ if ((c == 'e' || c == 'E') && (yyPeek() == '+' || yyPeek() == '-'))
+ { // Real number
+ yyUnget();
+ c= '.'; // Fool next test
+ }
+ // fall through
+ case STATE_INT_OR_REAL: // Compleat int or incompleat real
+ if (c != '.')
+ { // Found complete integer number.
+ yylval->lex_str=get_token(lex,yyLength());
+ return int_token(yylval->lex_str.str,yylval->lex_str.length);
+ }
+ // fall through
+ case STATE_REAL: // Incomplete real number
+ while (isdigit(c = yyGet())) ;
+
+ if (c == 'e' || c == 'E')
+ {
+ c = yyGet();
+ if (c != '-' && c != '+')
+ { // No exp sig found
+ state= STATE_CHAR;
+ break;
+ }
+ if (!isdigit(yyGet()))
+ { // No digit after sign
+ state= STATE_CHAR;
+ break;
+ }
+ while (isdigit(yyGet())) ;
+ yylval->lex_str=get_token(lex,yyLength());
+ return(FLOAT_NUM);
+ }
+ yylval->lex_str=get_token(lex,yyLength());
+ return(REAL_NUM);
+
+ case STATE_CMP_OP: // Incomplete comparison operator
+ if (state_map[yyPeek()] == STATE_CMP_OP ||
+ state_map[yyPeek()] == STATE_LONG_CMP_OP)
+ yySkip();
+ if ((tokval = find_keyword(lex,(uint) (lex->ptr - lex->tok_start),0)))
+ {
+ lex->next_state= STATE_START; // Allow signed numbers
+ return(tokval);
+ }
+ state = STATE_CHAR; // Something fishy found
+ break;
+
+ case STATE_LONG_CMP_OP: // Incomplete comparison operator
+ if (state_map[yyPeek()] == STATE_CMP_OP ||
+ state_map[yyPeek()] == STATE_LONG_CMP_OP)
+ {
+ yySkip();
+ if (state_map[yyPeek()] == STATE_CMP_OP)
+ yySkip();
+ }
+ if ((tokval = find_keyword(lex,(uint) (lex->ptr - lex->tok_start),0)))
+ {
+ lex->next_state= STATE_START; // Found long op
+ return(tokval);
+ }
+ state = STATE_CHAR; // Something fishy found
+ break;
+
+ case STATE_BOOL:
+ if (c != yyPeek())
+ {
+ state=STATE_CHAR;
+ break;
+ }
+ yySkip();
+ tokval = find_keyword(lex,2,0); // Is a bool operator
+ lex->next_state= STATE_START; // Allow signed numbers
+ return(tokval);
+
+ case STATE_STRING: // Incomplete text string
+ if (!(yylval->lex_str.str = get_text(lex)))
+ {
+ state= STATE_CHAR; // Read char by char
+ break;
+ }
+ yylval->lex_str.length=lex->yytoklen;
+ return(TEXT_STRING);
+
+ case STATE_COMMENT: // Comment
+ while ((c = yyGet()) != '\n' && c) ;
+ yyUnget(); // Safety against eof
+ state = STATE_START; // Try again
+ break;
+ case STATE_LONG_COMMENT: /* Long C comment? */
+ if (yyPeek() != '*')
+ {
+ state=STATE_CHAR; // Probable division
+ break;
+ }
+ yySkip(); // Skip '*'
+ if (yyPeek() == '!') // MySQL command in comment
+ {
+ ulong version=MYSQL_VERSION_ID;
+ yySkip();
+ state=STATE_START;
+ if (isdigit(yyPeek()))
+ { // Version number
+ version=strtol((char*) lex->ptr,(char**) &lex->ptr,10);
+ }
+ if (version <= MYSQL_VERSION_ID)
+ {
+ lex->in_comment=1;
+ break;
+ }
+ }
+ while (lex->ptr != lex->end_of_query &&
+ ((c=yyGet()) != '*' || yyPeek() != '/'))
+ {
+ if (c == '\n')
+ lex->yylineno++;
+ }
+ if (lex->ptr != lex->end_of_query)
+ yySkip(); // remove last '/'
+ state = STATE_START; // Try again
+ break;
+ case STATE_END_LONG_COMMENT:
+ if (lex->in_comment && yyPeek() == '/')
+ {
+ yySkip();
+ lex->in_comment=0;
+ state=STATE_START;
+ }
+ else
+ state=STATE_CHAR; // Return '*'
+ break;
+ case STATE_SET_VAR: // Check if ':='
+ if (yyPeek() != '=')
+ {
+ state=STATE_CHAR; // Return ':'
+ break;
+ }
+ yySkip();
+ return (SET_VAR);
+ case STATE_COLON: // optional line terminator
+ if (yyPeek())
+ {
+ state=STATE_CHAR; // Return ';'
+ break;
+ }
+ /* fall true */
+ case STATE_EOL:
+ lex->next_state=STATE_END; // Mark for next loop
+ return(END_OF_INPUT);
+ case STATE_END:
+ lex->next_state=STATE_END;
+ return(0); // We found end of input last time
+
+ // Actually real shouldn't start
+ // with . but allow them anyhow
+ case STATE_REAL_OR_POINT:
+ if (isdigit(yyPeek()))
+ state = STATE_REAL; // Real
+ else
+ {
+ state = STATE_CHAR; // return '.'
+ lex->next_state=STATE_IDENT_START;// Next is an ident (not a keyword)
+ }
+ break;
+ case STATE_USER_END: // end '@' of user@hostname
+ if (state_map[yyPeek()] != STATE_STRING &&
+ state_map[yyPeek()] != STATE_USER_VARIABLE_DELIMITER)
+ lex->next_state=STATE_HOSTNAME; // Mark for next loop
+ yylval->lex_str.str=(char*) lex->ptr;
+ yylval->lex_str.length=1;
+ return((int) '@');
+ case STATE_HOSTNAME: // end '@' of user@hostname
+ for (c=yyGet() ;
+ isalnum(c) || c == '.' || c == '_' || c == '$';
+ c= yyGet()) ;
+ yylval->lex_str=get_token(lex,yyLength());
+ return(LEX_HOSTNAME);
+ }
+ }
+}