diff options
Diffstat (limited to 'sql/gen_lex_hash.cc')
-rw-r--r-- | sql/gen_lex_hash.cc | 560 |
1 files changed, 560 insertions, 0 deletions
diff --git a/sql/gen_lex_hash.cc b/sql/gen_lex_hash.cc new file mode 100644 index 00000000000..3e1f6f15a6b --- /dev/null +++ b/sql/gen_lex_hash.cc @@ -0,0 +1,560 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#define NO_YACC_SYMBOLS +#include <global.h> +#include <my_sys.h> +#include <m_string.h> +#ifndef __GNU_LIBRARY__ +#define __GNU_LIBRARY__ // Skipp warnings in getopt.h +#endif +#include <getopt.h> +#include "mysql_version.h" +#include "lex.h" + +bool opt_search=0,opt_verbose=0; + +#define max_allowed_array 8000 // Don't generate bigger arrays than this +#define max_symbol 32767 // Use this for 'not found' +#define how_much_for_plus 8 // 2-8 +#define type_count 1 // 1-5 +#define char_table_count 5 +#define total_symbols (sizeof(symbols)/sizeof(SYMBOL) +\ + sizeof(sql_functions)/sizeof(SYMBOL)) + +#define how_much_and INT_MAX24 + +/* + The following only have to work with characters in the set + used by SQL commands +*/ + +#undef tolower +#define tolower(a) ((a) >= 'A' && (a) <= 'Z') ? ((a)- 'A' + 'a') : (a) + +static uint how_long_symbols,function_plus,function_mod,function_type; +static uint char_table[256]; +static uchar unique_length[256]; +static uchar bits[how_much_and/8+1]; +static uint primes[max_allowed_array+1]; +static ulong hash_results[type_count][how_much_for_plus+1][total_symbols]; +static ulong start_value=0; + +struct rand_struct { + unsigned long seed1,seed2,max_value; + double max_value_dbl; +}; + +void randominit(struct rand_struct *rand_st,ulong seed1, ulong seed2) +{ /* For mysql 3.21.# */ + rand_st->max_value= 0x3FFFFFFFL; + rand_st->max_value_dbl=(double) rand_st->max_value; + rand_st->seed1=seed1%rand_st->max_value ; + rand_st->seed2=seed2%rand_st->max_value; +} + +double rnd(struct rand_struct *rand_st) +{ + rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value; + rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value; + return (((double) rand_st->seed1)/rand_st->max_value_dbl); +} + + +static void make_char_table(ulong t1,ulong t2,int type) +{ + uint i; + struct rand_struct rand_st; + randominit(&rand_st,t1,t2); + + for (i=0 ; i < 256 ; i++) + { + switch (type) { + case 0: char_table[i]= i + (i << 8); break; + case 1: char_table[i]= i + ((i ^255 ) << 8); break; + case 2: char_table[i]= i; break; + case 3: char_table[i]= i + ((uint) (rnd(&rand_st)*255) << 8); break; + case 4: char_table[i]= (uint) (rnd(&rand_st)*255) + (i << 8); break; + } + } + char_table[0]|=1+257; // Avoid problems with 0 + for (i=0 ; i < 256 ; i++) + { + uint tmp=(uint) (rnd(&rand_st)*255); + swap(uint,char_table[i],char_table[tmp]); + } + /* lower characters should be mapped to upper */ + for (i= 'a' ; i <= 'z' ; i++) + { + /* This loop is coded with extra variables to avoid a bug in gcc 2.96 */ + uchar tmp= (uchar) (i - 'a'); // Assume ascii + tmp+='A'; + char_table[i]=char_table[tmp]; + } +} + +/* Fill array primes with primes between start and 'max_allowed_array' */ + +static void make_prime_array(uint start) +{ + uint i,j,*to; + uint max_index=(uint) sqrt((double) max_allowed_array); + + bzero((char*) primes,sizeof(primes[0])*max_allowed_array); + + i=2; + while (i < max_index) + { + for (j=i+i ; j <= max_allowed_array ; j+=i) + primes[j]=1; + while (primes[++i]) ; + } + + to=primes; + for (i=start ; i <= max_allowed_array ; i++) + if (!primes[i]) + *to++=i; + *to=0; // end marker +} + +#define USE_char_table + +static ulong tab_index_function(const char *s,uint add, uint type) +{ + register ulong nr=start_value+char_table[(uchar) *s]; // Nice value + ulong pos=3; + uint tmp_length=unique_length[(uchar) *s]-1; + while (*++s && tmp_length-- > 0) + { + switch (type) { + case 0: + nr= (nr ^ (char_table[(uchar) *s] + (nr << add))); + break; + case 1: + nr= (nr + (char_table[(uchar) *s] + (nr << add))); + break; + case 2: + nr= (nr ^ (char_table[(uchar) *s] ^ (nr << add))); + break; + case 3: + nr= (char_table[(uchar) *s] ^ (nr << add)); + break; + case 4: + nr+= nr+nr+((nr & 63)+pos)*((ulong) char_table[(uchar) *s]); + pos+=add; + break; + } + } + return nr & INT_MAX24; +} + +static int search(bool write_warning) +{ + uint size_symbols = sizeof(symbols)/sizeof(SYMBOL); + uint size_functions = sizeof(sql_functions)/sizeof(SYMBOL); + uint size=size_symbols + size_functions; + uint i=0,found,*prime,type; + int igra[max_allowed_array],test_count=INT_MAX; + uint possible_plus[how_much_for_plus*type_count+type_count]; + + how_long_symbols = sizeof(symbols)/sizeof(SYMBOL); + + bzero((char*) possible_plus,sizeof(possible_plus)); + found=0; + + /* Check first which function_plus are possible */ + for (type=0 ; type < type_count ; type ++) + { + for (function_plus = 1; + function_plus <= how_much_for_plus; + function_plus++) + { + bzero((char*) bits,sizeof(bits)); + for (i=0; i < size; i++) + { + ulong order= tab_index_function ((i < how_long_symbols) ? + symbols[i].name : + sql_functions[i-how_long_symbols].name, + function_plus, type); + hash_results[type][function_plus][i]=order; + uint pos=order/8; + uint bit=order & 7; + if (bits[pos] & (1 << bit)) + break; + bits[pos]|=1 << bit; + } + if (i == size) + { + possible_plus[found++]=function_plus; + } + } + possible_plus[found++]=0; // End marker + } + if (found == type_count) + { + if (write_warning) + fprintf(stderr,"\ +The hash function didn't return a unique value for any parameter\n\ +You have to change gen_lex_code.cc, function 'tab_index_function' to\n\ +generate unique values for some parameter. When you have succeeded in this,\n\ +you have to change 'main' to print out the new function\n"); + return(1); + } + + if (opt_verbose) + fprintf (stderr,"Info: Possible add values: %d\n",found-type_count); + + for (prime=primes; (function_mod=*prime) ; prime++) + { + uint *plus_ptr=possible_plus; + for (type=0 ; type < type_count ; type++ ) + { + while ((function_plus= *plus_ptr++)) + { + ulong *order_pos= &hash_results[type][function_plus][0]; + if (test_count++ == INT_MAX) + { + test_count=1; + bzero((char*) igra,sizeof(igra)); + } + for (i=0; i<size ;i++) + { + ulong order; + order = *order_pos++ % function_mod; + if (igra[order] == test_count) + break; + igra[order] = test_count; + } + if (i == size) + { + *prime=0; // Mark this used + function_type=type; + return 0; // Found ok value + } + } + } + } + + function_mod=max_allowed_array; + if (write_warning) + fprintf (stderr,"Fatal error when generating hash for symbols\n\ +Didn't find suitable values for perfect hashing:\n\ +You have to edit gen_lex_hase.cc to generate a new hashing function.\n\ +You can try running gen_lex_hash with --search to find a suitable value\n\ +Symbol array size = %d\n",function_mod); + return -1; +} + + +void print_arrays() +{ + uint size_symbols = sizeof(symbols)/sizeof(SYMBOL); + uint size_functions = sizeof(sql_functions)/sizeof(SYMBOL); + uint size=size_symbols + size_functions; + uint i; + + fprintf(stderr,"Symbols: %d Functions: %d; Total: %d\nShifts per char: %d, Array size: %d\n", + size_symbols,size_functions,size_symbols+size_functions, + function_plus,function_mod); + + int *prva= (int*) my_alloca(sizeof(int)*function_mod); + for (i=0 ; i <= function_mod; i++) + prva[i]= max_symbol; + + for (i=0;i<size;i++) + { + ulong order = tab_index_function ((i < how_long_symbols) ? symbols[i].name : sql_functions[i - how_long_symbols].name,function_plus,function_type); + order %= function_mod; + prva [order] = i; + } + +#ifdef USE_char_table + printf("static uint16 char_table[] = {\n"); + for (i=0; i < 255 ;i++) // < 255 is correct + { + printf("%u,",char_table[i]); + if (((i+1) & 15) == 0) + puts(""); + } + printf("%d\n};\n\n\n",char_table[i]); +#endif + + printf("static uchar unique_length[] = {\n"); + for (i=0; i < 255 ;i++) // < 255 is correct + { + printf("%u,",unique_length[i]); + if (((i+1) & 15) == 0) + puts(""); + } + printf("%d\n};\n\n\n",unique_length[i]); + + printf("static uint16 my_function_table[] = {\n"); + for (i=0; i < function_mod-1 ;i++) + { + printf("%d,",prva[i]); + if (((i+1) % 12) == 0) + puts(""); + } + printf("%d\n};\n\n\n",prva[i]); + my_afree((gptr) prva); +} + + +static struct option long_options[] = +{ + {"search", no_argument, 0, 'S'}, + {"verbose", no_argument, 0, 'v'}, + {"version", no_argument, 0, 'V'}, + {"rnd1", required_argument, 0, 'r'}, + {"rnd2", required_argument, 0, 'R'}, + {"type", required_argument, 0, 't'}, + {0, 0, 0, 0} +}; + + +static void usage(int version) +{ + printf("%s Ver 3.0 Distrib %s, for %s (%s)\n", + my_progname, MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE); + if (version) + return; + puts("Copyright (C) 2000 MySQL AB & MySQL Finland AB, by Sinisa and Monty"); + puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n"); + puts("This program generates a perfect hashing function for the sql_lex.cc"); + printf("Usage: %s [OPTIONS]\n", my_progname); + printf("\n\ +-r, --rnd1=# Set 1 part of rnd value for hash generator\n\ +-R, --rnd2=# Set 2 part of rnd value for hash generator\n\ +-t, --type=# Set type of char table to generate\n\ +-S, --search Search after good rnd1 and rnd2 values\n\ +-v, --verbose Write some information while the program executes\n\ +-V, --version Output version information and exit\n"); + +} + +static uint best_type; +static ulong best_t1,best_t2; + +static int get_options(int argc, char **argv) +{ + int c,option_index=0; + + while ((c=getopt_long(argc,argv,"?SvVr:R:t:", + long_options, &option_index)) != EOF) + { + switch(c) { + case 'r': + best_t1=atol(optarg); + break; + case 'R': + best_t2=atol(optarg); + break; + case 't': + best_type=atoi(optarg); + break; + case 'S': + opt_search=1; + break; + case 'v': + opt_verbose=1; + break; + case 'V': usage(1); exit(0); + case 'I': + case '?': + usage(0); + exit(0); + default: + fprintf(stderr,"illegal option: -%c\n",opterr); + usage(0); + exit(1); + } + } + argc-=optind; + argv+=optind; + if (argc >= 1) + { + usage(0); + exit(1); + } + return(0); +} + +static uint max_prefix(const char *name) +{ + uint i; + uint max_length=1; + for (i=0 ; i < sizeof(symbols)/sizeof(SYMBOL) ; i++) + { + const char *str=symbols[i].name; + if (str != name) + { + const char *str2=name; + uint length; + while (*str && *str == *str2) + { + str++; + str2++; + } + length=(uint) (str2 - name)+1; + if (length > max_length) + max_length=length; + } + } + for (i=0 ; i < sizeof(sql_functions)/sizeof(SYMBOL) ; i++) + { + const char *str=sql_functions[i].name; + if (str != name) + { + const char *str2=name; + uint length; + while (*str && *str == *str2) + { + str++; + str2++; + } + length=(uint) (str2 - name)+1; + if (length > max_length) + max_length=length; + } + } + return max_length; +} + + +static void make_max_length_table(void) +{ + uint i; + for (i=0 ; i < sizeof(symbols)/sizeof(SYMBOL) ; i++) + { + uint length=max_prefix(symbols[i].name); + if (length > unique_length[(uchar) symbols[i].name[0]]) + { + unique_length[(uchar) symbols[i].name[0]]=length; + unique_length[(uchar) tolower(symbols[i].name[0])]=length; + } + } + for (i=0 ; i < sizeof(sql_functions)/sizeof(SYMBOL) ; i++) + { + uint length=max_prefix(sql_functions[i].name); + if (length > unique_length[(uchar) sql_functions[i].name[0]]) + { + unique_length[(uchar) sql_functions[i].name[0]]=length; + unique_length[(uchar) tolower(sql_functions[i].name[0])]=length; + } + } +} + + +int main(int argc,char **argv) +{ + struct rand_struct rand_st; + static uint best_mod,best_add,best_functype; + int error; + + MY_INIT(argv[0]); + start_value=1277803L; best_t1=331678L; best_t2=4097229L; best_type=1; + /* mode=5791 add=6 func_type: 0 */ + if (get_options(argc,(char **) argv)) + exit(1); + + make_max_length_table(); + make_char_table(best_t1,best_t2,best_type); + make_prime_array(sizeof(symbols)/sizeof(SYMBOL) + + sizeof(sql_functions)/sizeof(SYMBOL)); + + if ((error=search(1)) > 0 || error && !opt_search) + exit(1); // This should work + best_mod=function_mod; best_add=function_plus; best_functype=function_type; + + if (opt_search) + { + time_t start_time=time((time_t*) 0); + randominit(&rand_st,start_time,start_time/2); // Some random values + printf("start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */\n", + start_value, best_t1,best_t2,best_type,best_mod,best_add, + best_functype); + + for (uint i=1 ; i <= 100000 ; i++) + { + if (i % 10 == 0) + { + putchar('.'); + fflush(stdout); + } + ulong t1=(ulong) (rnd(&rand_st)*INT_MAX24); + ulong t2=(ulong) (rnd(&rand_st)*INT_MAX24); + uint type=(int) (rnd(&rand_st)*char_table_count); + start_value=(ulong) (rnd(&rand_st)*INT_MAX24); + make_char_table(t1,t2,type); + if (!search(0)) + { + best_mod=function_mod; best_add=function_plus; + best_functype=function_type; + best_t1=t1; best_t2=t2; best_type=type; + printf("\nstart_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d func_type: %d */\n", + start_value,best_t1,best_t2,best_type,best_mod,best_add,best_functype); + } + } + } + + function_mod=best_mod; function_plus=best_add; + make_char_table(best_t1,best_t2,best_type); + + printf("/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB\n\ + This program is free software; you can redistribute it and/or modify\n\ + it under the terms of the GNU General Public License as published by\n\ + the Free Software Foundation; either version 2 of the License, or\n\ + (at your option) any later version.\n\n\ + This program is distributed in the hope that it will be useful,\n\ + but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ + GNU General Public License for more details.\n\n\ + You should have received a copy of the GNU General Public License\n\ + along with this program; if not, write to the Free Software\n\ + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */\n\n"); + +printf("/* This code is generated by gen_lex_hash.cc that seeks for a perfect\nhash function */\n\n"); + printf("#include \"lex.h\"\n\n"); + + print_arrays(); + + printf("/* t1= %lu t2=%lu type= %d */\n\n",best_t1,best_t2,best_type); + + printf("inline SYMBOL *get_hash_symbol(const char *s,unsigned int length,bool function)\n\ +{\n\ + ulong idx = %lu+char_table[(uchar) *s];\n\ + SYMBOL *sim;\n\ + const char *start=s;\n\ + int i=unique_length[(uchar) *s++];\n\ + if (i > (int) length) i=(int) length;\n\ + while (--i > 0)\n\ + idx= (idx ^ (char_table[(uchar) *s++] + (idx << %d)));\n\ + idx=my_function_table[(idx & %d) %% %d];\n\ + if (idx >= %d)\n\ + {\n\ + if (!function || idx >= %d) return (SYMBOL*) 0;\n\ + sim=sql_functions + (idx - %d);\n\ + }\n\ + else\n\ + sim=symbols + idx;\n\ + if ((length != sim->length) || lex_casecmp(start,sim->name,length))\n\ + return (SYMBOL *)0;\n\ + return sim;\n\ +}\n",(ulong) start_value,(int) function_plus,(int) how_much_and,function_mod,how_long_symbols,max_symbol,how_long_symbols); + exit(0); + return 0; +} |