From 7dd4eb71fe7576e428fe1ecaaad214d3a39ff4dc Mon Sep 17 00:00:00 2001 From: "monty@tik.mysql.fi" <> Date: Sat, 12 Jan 2002 15:42:54 +0200 Subject: Added support of null keys in HEAP tables Added ORDER BY optimization --- Docs/manual.texi | 189 ++- heap/_check.c | 6 +- heap/heapdef.h | 1 + heap/hp_hash.c | 88 +- heap/hp_open.c | 4 + heap/hp_test2.c | 39 +- heap/hp_write.c | 4 +- include/heap.h | 4 +- include/my_base.h | 3 +- isam/static.c | 2 +- myisam/mi_static.c | 2 +- sql/Makefile.am | 4 +- sql/ha_berkeley.cc | 31 + sql/ha_berkeley.h | 3 +- sql/ha_heap.cc | 13 +- sql/ha_heap.h | 2 +- sql/ha_innobase.cc | 3402 -------------------------------------------------- sql/ha_innobase.h | 192 --- sql/ha_innodb.cc | 3420 +++++++++++++++++++++++++++++++++++++++++++++++++++ sql/ha_innodb.h | 194 +++ sql/ha_isam.cc | 9 + sql/ha_isam.h | 3 +- sql/ha_isammrg.h | 5 +- sql/ha_myisam.cc | 49 +- sql/ha_myisam.h | 15 +- sql/ha_myisammrg.cc | 9 + sql/ha_myisammrg.h | 3 +- sql/handler.cc | 2 +- sql/handler.h | 7 +- sql/mysqld.cc | 2 +- sql/sql_delete.cc | 2 +- sql/sql_select.cc | 139 ++- sql/sql_select.h | 5 +- 33 files changed, 4126 insertions(+), 3727 deletions(-) delete mode 100644 sql/ha_innobase.cc delete mode 100644 sql/ha_innobase.h create mode 100644 sql/ha_innodb.cc create mode 100644 sql/ha_innodb.h diff --git a/Docs/manual.texi b/Docs/manual.texi index 67791aac9f4..0724e5180c2 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -750,7 +750,7 @@ Large server clusters using replication are in production use, with good results. Work on enhanced replication features is continuing in MySQL 4.0. -@item @code{InnoDB} tables -- Gamma +@item @code{InnoDB} tables -- Stable While the @code{InnoDB} transactional table handler is a fairly recent addition to @code{MySQL}, it appears to work well and is already being used in some large, heavy load production systems. @@ -3692,9 +3692,6 @@ Allow users to change startup options without taking down the server. @item Fail safe replication. @item -More functions for full-text search. -@xref{Fulltext TODO}. -@item New key cache @item New table definition file format (@code{.frm} files) This will enable us @@ -3727,9 +3724,6 @@ When using @code{SET CHARACTER SET} we should translate the whole query at once and not only strings. This will enable users to use the translated characters in database, table and column names. @item -Add a portable interface over @code{gethostbyaddr_r()} so that we can change -@code{ip_to_hostname()} to not block other threads while doing DNS lookups. -@item Add @code{record_in_range()} method to @code{MERGE} tables to be able to choose the right index when there is many to choose from. We should also extend the info interface to get the key distribution for each index, @@ -3861,15 +3855,6 @@ Don't add automatic @code{DEFAULT} values to columns. Give an error when using an @code{INSERT} that doesn't contain a column that doesn't have a @code{DEFAULT}. @item -Caching of queries and results. This should be done as a separated -module that examines each query and if this is query is in the cache -the cached result should be returned. When one updates a table one -should remove as few queries as possible from the cache. -This should give a big speed bost on machines with much RAM where -queries are often repeated (like WWW applications). -One idea would be to only cache queries of type: -@code{SELECT CACHED ...} -@item Fix @file{libmysql.c} to allow two @code{mysql_query()} commands in a row without reading results or give a nice error message when one does this. @item @@ -3934,10 +3919,7 @@ ADD_TO_SET(value,set) and REMOVE_FROM_SET(value,set) Add use of @code{t1 JOIN t2 ON ...} and @code{t1 JOIN t2 USING ...} Currently, you can only use this syntax with @code{LEFT JOIN}. @item -Add full support for @code{unsigned long long} type. -@item -Many more variables for @code{show status}. Counts for: -@code{INSERT}/@code{DELETE}/@code{UPDATE} statements. Records reads and +Many more variables for @code{show status}. Records reads and updated. Selects on 1 table and selects with joins. Mean number of tables in select. Number of @code{ORDER BY} and @code{GROUP BY} queries. @item @@ -3952,7 +3934,7 @@ should be implemented. @item Add support for UNICODE. @item -@code{NATURAL JOIN} and @code{UNION JOIN} +@code{NATURAL JOIN}. @item Allow @code{select a from crash_me left join crash_me2 using (a)}; In this case @code{a} is assumed to come from the @code{crash_me} table. @@ -4069,8 +4051,6 @@ Use of full calculation names in the order part. (For ACCESS97) @code{MINUS}, @code{INTERSECT} and @code{FULL OUTER JOIN}. (Currently @code{UNION} (in 4.0) and @code{LEFT OUTER JOIN} are supported) @item -Allow @code{UNIQUE} on fields that can be @code{NULL}. -@item @code{SQL_OPTION MAX_SELECT_TIME=#} to put a time limit on a query. @item Make the update log to a database. @@ -24578,6 +24558,7 @@ great tool to find out if this is a problem with your query. * Where optimisations:: How MySQL optimises @code{WHERE} clauses * DISTINCT optimisation:: How MySQL Optimises @code{DISTINCT} * LEFT JOIN optimisation:: How MySQL optimises @code{LEFT JOIN} +* ORDER BY optimisation:: * LIMIT optimisation:: How MySQL optimises @code{LIMIT} * Insert speed:: Speed of @code{INSERT} queries * Update speed:: Speed of @code{UPDATE} queries @@ -25171,7 +25152,7 @@ MySQL will stop reading from t2 (for that particular row in t1) when the first row in t2 is found. -@node LEFT JOIN optimisation, LIMIT optimisation, DISTINCT optimisation, Query Speed +@node LEFT JOIN optimisation, ORDER BY optimisation, DISTINCT optimisation, Query Speed @subsection How MySQL Optimises @code{LEFT JOIN} and @code{RIGHT JOIN} @findex LEFT JOIN @@ -25237,7 +25218,119 @@ SELECT * FROM b,a LEFT JOIN c ON (c.key=a.key) LEFT JOIN d (d.key=a.key) WHERE b @end example -@node LIMIT optimisation, Insert speed, LEFT JOIN optimisation, Query Speed +@node ORDER BY optimisation, LIMIT optimisation, LEFT JOIN optimisation, Query Speed +@subsection How MySQL Optimises @code{ORDER BY} + +In some cases MySQL can uses index to satisfy an @code{ORDER BY} or +@code{GROUP BY} request without doing any extra sorting. + +The index can also be used even if the @code{ORDER BY} doesn't match the +index exactly, as long as all the unused index parts and all the extra +are @code{ORDER BY} columns are constants in the @code{WHERE} +clause. The following queries will use the index to resolve the +@code{ORDER BY} / @code{GROUP BY} part: + +@example +SELECT * FROM t1 ORDER BY key_part1,key_part2,... +SELECT * FROM t1 WHERE key_part1=constant ORDER BY key_part2 +SELECT * FROM t1 WHERE key_part1=constant GROUP BY key_part2 +SELECT * FROM t1 ORDER BY key_part1 DESC,key_part2 DESC +SELECT * FROM t1 WHERE key_part1=1 ORDER BY key_part1 DESC,key_part2 DESC +@end example + +Some cases where MySQL can NOT use indexes to resolve the @code{ORDER +BY}: (Note that MySQL will still use indexes to find the rows that +matches the where clause): + +@itemize @bullet +@item +You are doing an @code{ORDER BY} on different keys: + +@code{SELECT * FROM t1 ORDER BY key1,key2} +@item +You are doing an @code{ORDER BY} on not following key parts. + +@code{SELECT * FROM t1 WHERE key2=constant ORDER BY key_part2} + +@item +You are mixing @code{ASC} and @code{DESC}. + +@code{SELECT * FROM t1 ORDER BY key_part1 DESC,key_part2 ASC} + +@item +The key used to fetch the rows are not the same one that is used to +do the @code{ORDER BY}: + +@code{SELECT * FROM t1 WHERE key2=constant ORDER BY key1} + +@item +You are joining many tables and the columns you are doing an @code{ORDER +BY} on are not all from the first not-const table that is used to +retrieve rows (This is the first table in the @code{EXPLAIN} output which +doesn't use a @code{const} row fetch method). + +@item +You have different @code{ORDER BY} and @code{GROUP BY} expressions. + +@item +The used table index is an index type that doesn't store rows in order. +(Like index in @code{HEAP} tables). +@end itemize + + +In the cases where MySQL have to sort the result, it uses the following +algorithm: + +@itemize @bullet +@item +Read all rows according to key or by table scanning. +Rows that doesn't match the WHERE clause are skipped. +@item +Store the sort-key in a buffer (of size @code{sort_buffer}). +@item +When the buffer gets full, run a qsort on it and store the result +in a temporary file. Save a pointer to the sorted block. +(In the case where all rows fits into the sort buffer, no temporary +file is created) +@item +Repeat the above until all rows have been read. +@item +Do a multi-merge of up to @code{MERGEBUFF} (7) regions to one block in +another temporary file. Repeat until all blocks from the first file +are in the second file. +@item +Repeat the following until there is less than @code{MERGEBUFF2} (15) +blocks left. +@item +On the last multi-merge, only the pointer to the row (last part of +the sort-key) is written to a result file. +@item +Now the code in @file{sql/records.cc} will be used to read through them +in sorted order by using the row pointers in the result file. To +optimize this, we read in a big block of row pointers, sort these and +then we read the rows in the sorted order into a row buffer +(@code{record_rnd_buffer}) . +@end itemize + +You can with @code{EXPLAIN SELECT ... ORDER BY} check if MySQL can use +indexes to resolve the query. If you get @code{Using filesort} in the +@code{extra} column, then MySQL can't use indexes to resolve the +@code{ORDER BY}. @xref{EXPLAIN}. + +If you want to have a higher @code{ORDER BY} speed, you should first +see if you can get MySQL to use indexes instead of having to do an extra +sorting phase. If this is not possible, then you can do: + +@itemize @bullet +@item +Increase the size of the @code{sort_buffer} variable. +@item +Increase the size of the @code{record_rnd_buffer} variable. +@item +Change @code{tmpdir} to point to a dedicated disk with lots of empty space. +@end itemize + +@node LIMIT optimisation, Insert speed, ORDER BY optimisation, Query Speed @subsection How MySQL Optimises @code{LIMIT} @findex LIMIT @@ -25986,19 +26079,9 @@ SELECT MIN(key_part2),MAX(key_part2) FROM table_name where key_part1=10 @item Sort or group a table if the sorting or grouping is done on a leftmost -prefix of a usable key (for example, @code{ORDER BY key_part_1,key_part_2 }). The -key is read in reverse order if all key parts are followed by @code{DESC}. - -The index can also be used even if the @code{ORDER BY} doesn't match the index -exactly, as long as all the unused index parts and all the extra -are @code{ORDER BY} columns are constants in the @code{WHERE} clause. The -following queries will use the index to resolve the @code{ORDER BY} part: - -@example -SELECT * FROM foo ORDER BY key_part1,key_part2,key_part3; -SELECT * FROM foo WHERE column=constant ORDER BY column, key_part1; -SELECT * FROM foo WHERE key_part1=const GROUP BY key_part2; -@end example +prefix of a usable key (for example, @code{ORDER BY +key_part_1,key_part_2 }). The key is read in reverse order if all key +parts are followed by @code{DESC}. @xref{ORDER BY optimisation}. @item In some cases a query can be optimised to retrieve values without @@ -33355,7 +33438,12 @@ DELETE [LOW_PRIORITY | QUICK] FROM table_name or -DELETE [LOW_PRIORITY | QUICK] table_name[.*] [table_name[.*] ...] FROM +DELETE [LOW_PRIORITY | QUICK] table_name[.*] [,table_name[.*] ...] FROM +table-references [WHERE where_definition] + +or + +DELETE [LOW_PRIORITY | QUICK] FROM table_name[.*], [table_name[.*] ...] USING table-references [WHERE where_definition] @end example @@ -33392,18 +33480,23 @@ TABLE} statement or the @code{myisamchk} utility to reorganise tables. @code{OPTIMIZE TABLE} is easier, but @code{myisamchk} is faster. See @ref{OPTIMIZE TABLE, , @code{OPTIMIZE TABLE}} and @ref{Optimisation}. -The multi table delete format is supported starting from MySQL 4.0.0. +The first multi table delete format is supported starting from MySQL 4.0.0. +The second multi table delete format is supported starting from MySQL 4.0.2. -The idea is that only matching rows from the tables listed @strong{before} the -@code{FROM} clause is deleted. The effect is that you can delete rows -from many tables at the same time and also have additional tables that -are used for searching. +The idea is that only matching rows from the tables listed +@strong{before} the @code{FROM} or before the @code{USING} clause is +deleted. The effect is that you can delete rows from many tables at the +same time and also have additional tables that are used for searching. The @code{.*} after the table names is there just to be compatible with @code{Access}: @example DELETE t1,t2 FROM t1,t2,t3 WHERE t1.id=t2.id AND t2.id=t3.id + +or + +DELETE FROM t1,t2 USING t1,t2,t3 WHERE t1.id=t2.id AND t2.id=t3.id @end example In the above case we delete matching rows just from tables @code{t1} and @@ -48028,10 +48121,18 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item +Added support for @code{NULL} keys in HEAP tables. +@item +Use index for @code{ORDER BY} in queries of type: +@code{SELECT * FROM t1 WHERE key_part1=1 ORDER BY key_part1 DESC,key_part2 DESC} +@item Fixed bug in @code{FLUSH QUERY CACHE}. @item Added @code{CAST()} and @code{CONVERT()} functions. @item +@code{CREATE ... SELECT} on @code{DATE} and @code{TIME} functions now +create columns of the expected type. +@item Changed order of how keys are created in tables. @item Added a new columns @code{Null} and @code{Index_type} to @code{SHOW INDEX}. diff --git a/heap/_check.c b/heap/_check.c index 404cf22a542..03fb664cba9 100644 --- a/heap/_check.c +++ b/heap/_check.c @@ -79,9 +79,11 @@ static int check_one_key(HP_KEYDEF *keydef, uint keynr, ulong records, } DBUG_PRINT("info", ("records: %ld seeks: %d max links: %d hitrate: %.2f", - records,seek,max_links,(float) seek / (float) (records ? records : 1))); + records,seek,max_links, + (float) seek / (float) (records ? records : 1))); if (print_status) printf("Key: %d records: %ld seeks: %d max links: %d hitrate: %.2f\n", - keynr, records, seek, max_links, (float) seek / (float) records); + keynr, records, seek, max_links, + (float) seek / (float) (records ? records : 1)); return error; } diff --git a/heap/heapdef.h b/heap/heapdef.h index 40985336791..bdd7de45370 100644 --- a/heap/heapdef.h +++ b/heap/heapdef.h @@ -70,6 +70,7 @@ extern int _hp_rec_key_cmp(HP_KEYDEF *keydef,const byte *rec1, extern int _hp_key_cmp(HP_KEYDEF *keydef,const byte *rec, const byte *key); extern void _hp_make_key(HP_KEYDEF *keydef,byte *key,const byte *rec); +extern my_bool hp_if_null_in_key(HP_KEYDEF *keyinfo, const byte *record); extern int _hp_close(register HP_INFO *info); extern void _hp_clear(HP_SHARE *info); diff --git a/heap/hp_hash.c b/heap/hp_hash.c index 0adbe64a070..e29e51d2b75 100644 --- a/heap/hp_hash.c +++ b/heap/hp_hash.c @@ -158,11 +158,22 @@ ulong _hp_hashnr(register HP_KEYDEF *keydef, register const byte *key) { uchar *pos=(uchar*) key; key+=seg->length; + if (seg->null_bit) + { + key++; /* Skipp null byte */ + if (*pos) /* Found null */ + { + nr^= (nr << 1) | 1; + continue; + } + pos++; + } if (seg->type == HA_KEYTYPE_TEXT) { for (; pos < (uchar*) key ; pos++) { - nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) my_sort_order[(uint) *pos]))+ (nr << 8); + nr^=(ulong) ((((uint) nr & 63)+nr2) * + ((uint) my_sort_order[(uint) *pos])) + (nr << 8); nr2+=3; } } @@ -170,7 +181,7 @@ ulong _hp_hashnr(register HP_KEYDEF *keydef, register const byte *key) { for (; pos < (uchar*) key ; pos++) { - nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8); + nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos)) + (nr << 8); nr2+=3; } } @@ -188,11 +199,20 @@ ulong _hp_rec_hashnr(register HP_KEYDEF *keydef, register const byte *rec) for (seg=keydef->seg,endseg=seg+keydef->keysegs ; seg < endseg ; seg++) { uchar *pos=(uchar*) rec+seg->start,*end=pos+seg->length; + if (seg->null_bit) + { + if (rec[seg->null_pos] & seg->null_bit) + { + nr^= (nr << 1) | 1; + continue; + } + } if (seg->type == HA_KEYTYPE_TEXT) { for (; pos < end ; pos++) { - nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) my_sort_order[(uint) *pos]))+ (nr << 8); + nr^=(ulong) ((((uint) nr & 63)+nr2)* + ((uint) my_sort_order[(uint) *pos]))+ (nr << 8); nr2+=3; } } @@ -234,6 +254,16 @@ ulong _hp_hashnr(register HP_KEYDEF *keydef, register const byte *key) { uchar *pos=(uchar*) key; key+=seg->length; + if (seg->null_bit) + { + key++; + if (*pos) + { + nr^= (nr << 1) | 1; + continue; + } + pos++; + } if (seg->type == HA_KEYTYPE_TEXT) { for (; pos < (uchar*) key ; pos++) @@ -264,6 +294,14 @@ ulong _hp_rec_hashnr(register HP_KEYDEF *keydef, register const byte *rec) for (seg=keydef->seg,endseg=seg+keydef->keysegs ; seg < endseg ; seg++) { uchar *pos=(uchar*) rec+seg->start,*end=pos+seg->length; + if (seg->null_bit) + { + if (rec[seg->null_pos] & seg->null_bit) + { + nr^= (nr << 1) | 1; + continue; + } + } if (seg->type == HA_KEYTYPE_TEXT) { for ( ; pos < end ; pos++) @@ -295,6 +333,14 @@ int _hp_rec_key_cmp(HP_KEYDEF *keydef, const byte *rec1, const byte *rec2) for (seg=keydef->seg,endseg=seg+keydef->keysegs ; seg < endseg ; seg++) { + if (seg->null_bit) + { + if ((rec1[seg->null_pos] & seg->null_bit) != + (rec2[seg->null_pos] & seg->null_bit)) + return 1; + if (rec1[seg->null_pos] & seg->null_bit) + continue; + } if (seg->type == HA_KEYTYPE_TEXT) { if (my_sortcmp(rec1+seg->start,rec2+seg->start,seg->length)) @@ -309,14 +355,24 @@ int _hp_rec_key_cmp(HP_KEYDEF *keydef, const byte *rec1, const byte *rec2) return 0; } - /* Compare a key in a record to a hole key */ + /* Compare a key in a record to a whole key */ int _hp_key_cmp(HP_KEYDEF *keydef, const byte *rec, const byte *key) { HP_KEYSEG *seg,*endseg; - for (seg=keydef->seg,endseg=seg+keydef->keysegs ; seg < endseg ; seg++) + for (seg=keydef->seg,endseg=seg+keydef->keysegs ; + seg < endseg ; + key+= (seg++)->length) { + if (seg->null_bit) + { + int found_null=test(rec[seg->null_pos] & seg->null_bit); + if (found_null != (int) *key++) + return 1; + if (found_null) + continue; + } if (seg->type == HA_KEYTYPE_TEXT) { if (my_sortcmp(rec+seg->start,key,seg->length)) @@ -327,7 +383,6 @@ int _hp_key_cmp(HP_KEYDEF *keydef, const byte *rec, const byte *key) if (bcmp(rec+seg->start,key,seg->length)) return 1; } - key+=seg->length; } return 0; } @@ -341,7 +396,28 @@ void _hp_make_key(HP_KEYDEF *keydef, byte *key, const byte *rec) for (seg=keydef->seg,endseg=seg+keydef->keysegs ; seg < endseg ; seg++) { + if (seg->null_bit) + *key++= test(rec[seg->null_pos] & seg->null_bit); memcpy(key,rec+seg->start,(size_t) seg->length); key+=seg->length; } } + + +/* + Test if any of the key parts are NULL. + Return: + 1 if any of the key parts was NULL + 0 otherwise +*/ + +my_bool hp_if_null_in_key(HP_KEYDEF *keydef, const byte *record) +{ + HP_KEYSEG *seg,*endseg; + for (seg=keydef->seg,endseg=seg+keydef->keysegs ; seg < endseg ; seg++) + { + if (seg->null_bit && (record[seg->null_pos] & seg->null_bit)) + return 1; + } + return 0; +} diff --git a/heap/hp_open.c b/heap/hp_open.c index 511a63e95da..69e02945253 100644 --- a/heap/hp_open.c +++ b/heap/hp_open.c @@ -44,7 +44,11 @@ HP_INFO *heap_open(const char *name, int mode, uint keys, HP_KEYDEF *keydef, key_segs+= keydef[i].keysegs; bzero((char*) &keydef[i].block,sizeof(keydef[i].block)); for (j=length=0 ; j < keydef[i].keysegs; j++) + { length+=keydef[i].seg[j].length; + if (keydef[i].seg[j].null_bit) + keydef[i].flag |= HA_NULL_PART_KEY; + } keydef[i].length=length; if (length > max_length) max_length=length; diff --git a/heap/hp_test2.c b/heap/hp_test2.c index 458d933016f..e2570893519 100644 --- a/heap/hp_test2.c +++ b/heap/hp_test2.c @@ -30,7 +30,7 @@ #include #define MAX_RECORDS 100000 -#define MAX_KEYS 3 +#define MAX_KEYS 4 static int get_options(int argc, char *argv[]); static int rnd(int max_value); @@ -40,16 +40,20 @@ static uint flag=0,verbose=0,testflag=0,recant=10000,silent=0; static uint keys=MAX_KEYS; static uint16 key1[1001]; static my_bool key3[MAX_RECORDS]; +static int reclength=39; + static int calc_check(byte *buf,uint length); +static void make_record(char *record, uint n1, uint n2, uint n3, + const char *mark, uint count); - /* Huvudprogrammet */ +/* Main program */ int main(int argc, char *argv[]) { register uint i,j; uint ant,n1,n2,n3; - uint reclength,write_count,update,opt_delete,check2,dupp_keys,found_key; + uint write_count,update,opt_delete,check2,dupp_keys,found_key; int error; ulong pos; unsigned long key_check; @@ -66,7 +70,6 @@ int main(int argc, char *argv[]) filename2= "test2_2"; file=file2=0; get_options(argc,argv); - reclength=37; write_count=update=opt_delete=0; key_check=0; @@ -77,21 +80,33 @@ int main(int argc, char *argv[]) keyinfo[0].seg[0].type=HA_KEYTYPE_BINARY; keyinfo[0].seg[0].start=0; keyinfo[0].seg[0].length=6; + keyinfo[0].seg[0].null_bit=0; keyinfo[1].seg=keyseg+1; keyinfo[1].keysegs=2; keyinfo[1].flag=0; keyinfo[1].seg[0].type=HA_KEYTYPE_BINARY; keyinfo[1].seg[0].start=7; keyinfo[1].seg[0].length=6; + keyinfo[1].seg[0].null_bit=0; keyinfo[1].seg[1].type=HA_KEYTYPE_TEXT; - keyinfo[1].seg[1].start=0; /* Tv}delad nyckel */ + keyinfo[1].seg[1].start=0; /* key in two parts */ keyinfo[1].seg[1].length=6; + keyinfo[1].seg[1].null_bit=0; keyinfo[2].seg=keyseg+3; keyinfo[2].keysegs=1; keyinfo[2].flag=HA_NOSAME; keyinfo[2].seg[0].type=HA_KEYTYPE_BINARY; keyinfo[2].seg[0].start=12; keyinfo[2].seg[0].length=8; + keyinfo[2].seg[0].null_bit=0; + keyinfo[3].keysegs=1; + keyinfo[3].flag=HA_NOSAME; + keyinfo[3].seg=keyseg+4; + keyinfo[3].seg[0].type=HA_KEYTYPE_BINARY; + keyinfo[3].seg[0].start=37; + keyinfo[3].seg[0].length=1; + keyinfo[3].seg[0].null_bit=1; + keyinfo[3].seg[0].null_pos=38; bzero((char*) key1,sizeof(key1)); bzero((char*) key3,sizeof(key3)); @@ -110,7 +125,7 @@ int main(int argc, char *argv[]) for (i=0 ; i < recant ; i++) { n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*5,MAX_RECORDS)); - sprintf(record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count); + make_record(record,n1,n2,n3,"Pos",write_count); if (heap_write(file,record)) { @@ -191,7 +206,7 @@ int main(int argc, char *argv[]) for (i=0 ; i < write_count/10 ; i++) { n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*2,MAX_RECORDS)); - sprintf(record2,"%6d:%4d:%8d:XXX: %4d ",n1,n2,n3,update); + make_record(record2, n1, n2, n3, "XXX", update); if (rnd(2) == 1) { if (heap_scan_init(file)) @@ -654,3 +669,13 @@ static int calc_check(byte *buf, uint length) check+= (int) (uchar) *(buf++); return check; } + +static void make_record(char *record, uint n1, uint n2, uint n3, + const char *mark, uint count) +{ + bfill(record,reclength,' '); + sprintf(record,"%6d:%4d:%8d:%3.3s: %4d", + n1,n2,n3,mark,count); + record[37]='A'; /* Store A in null key */ + record[38]=1; /* set as null */ +} diff --git a/heap/hp_write.c b/heap/hp_write.c index 0d68bb96580..a219c8be23a 100644 --- a/heap/hp_write.c +++ b/heap/hp_write.c @@ -239,7 +239,9 @@ int _hp_write_key(register HP_SHARE *info, HP_KEYDEF *keyinfo, } /* Check if dupplicated keys */ - if ((keyinfo->flag & HA_NOSAME) && pos == gpos) + if ((keyinfo->flag & HA_NOSAME) && pos == gpos && + (!(keyinfo->flag & HA_NULL_PART_KEY) || + !hp_if_null_in_key(keyinfo, record))) { pos=empty; do diff --git a/include/heap.h b/include/heap.h index 9efb873ec93..02b04e2b3ec 100644 --- a/include/heap.h +++ b/include/heap.h @@ -78,11 +78,13 @@ typedef struct st_hp_keyseg /* Key-portion */ uint start; /* Start of key in record (from 0) */ uint length; /* Keylength */ uint type; + uint null_bit; /* bit set in row+null_pos */ + uint null_pos; } HP_KEYSEG; typedef struct st_hp_keydef /* Key definition with open */ { - uint flag; /* NOSAME */ + uint flag; /* HA_NOSAME | HA_NULL_PART_KEY */ uint keysegs; /* Number of key-segment */ uint length; /* Length of key (automatic) */ HP_KEYSEG *seg; diff --git a/include/my_base.h b/include/my_base.h index dc88e248469..6494fa93ab9 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -68,7 +68,8 @@ enum ha_rkey_function { enum ha_key_alg { HA_KEY_ALG_BTREE=0, /* B-tree, default one */ - HA_KEY_ALG_RTREE=1 /* R-tree, for spatial searches */ + HA_KEY_ALG_RTREE=1, /* R-tree, for spatial searches */ + HA_KEY_ALG_HASH=2 /* HASH keys (HEAP tables) */ }; /* The following is parameter to ha_extra() */ diff --git a/isam/static.c b/isam/static.c index fbc98ed68fa..9c68a0cfdba 100644 --- a/isam/static.c +++ b/isam/static.c @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* - Static variables for pisam library. All definied here for easy making of + Static variables for ISAM library. All definied here for easy making of a shared library */ diff --git a/myisam/mi_static.c b/myisam/mi_static.c index 55efb727309..86d7fc38f25 100644 --- a/myisam/mi_static.c +++ b/myisam/mi_static.c @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* - Static variables for pisam library. All definied here for easy making of + Static variables for MyISAM library. All definied here for easy making of a shared library */ diff --git a/sql/Makefile.am b/sql/Makefile.am index 88306098107..e1ed9ad8915 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -51,7 +51,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ sql_manager.h sql_map.h sql_string.h unireg.h \ field.h handler.h \ ha_isammrg.h ha_isam.h ha_myisammrg.h\ - ha_heap.h ha_myisam.h ha_berkeley.h ha_innobase.h \ + ha_heap.h ha_myisam.h ha_berkeley.h ha_innodb.h \ opt_range.h opt_ft.h \ sql_select.h structs.h table.h sql_udf.h hash_filo.h\ lex.h lex_symbol.h sql_acl.h sql_crypt.h \ @@ -74,7 +74,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \ time.cc opt_range.cc opt_sum.cc opt_ft.cc \ records.cc filesort.cc handler.cc \ ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ - ha_berkeley.cc ha_innobase.cc \ + ha_berkeley.cc ha_innodb.cc \ ha_isam.cc ha_isammrg.cc \ sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \ sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \ diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index e0a802b499a..6a9187a7cb2 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -1454,6 +1454,37 @@ int ha_berkeley::index_read(byte * buf, const byte * key, DBUG_RETURN(error); } +/* + Read last key is solved by reading the next key and then reading + the previous key +*/ + +int ha_berkeley::index_read_last(byte * buf, const byte * key, uint key_len) +{ + DBT row; + int error; + KEY *key_info= &table->key_info[active_index]; + DBUG_ENTER("ha_berkeley::index_read"); + + statistic_increment(ha_read_key_count,&LOCK_status); + bzero((char*) &row,sizeof(row)); + + /* read of partial key */ + pack_key(&last_key, active_index, key_buff, key, key_len); + /* Store for compare */ + memcpy(key_buff2, key_buff, (key_len=last_key.size)); + key_info->handler.bdb_return_if_eq= 1; + error=read_row(cursor->c_get(cursor, &last_key, &row, DB_SET_RANGE), + (char*) buf, active_index, &row, (DBT*) 0, 0); + key_info->handler.bdb_return_if_eq= 0; + bzero((char*) &row,sizeof(row)); + if (read_row(cursor->c_get(cursor, &last_key, &row, DB_PREV), + (char*) buf, active_index, &row, &last_key, 1) || + berkeley_key_cmp(table, key_info, key_buff2, key_len)) + error=HA_ERR_KEY_NOT_FOUND; + DBUG_RETURN(error); +} + int ha_berkeley::index_next(byte * buf) { diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index fbc858b5996..587d70265fa 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -89,7 +89,7 @@ class ha_berkeley: public handler int_option_flag(HA_READ_NEXT | HA_READ_PREV | HA_REC_NOT_IN_SEQ | HA_KEYPOS_TO_RNDPOS | HA_READ_ORDER | HA_LASTKEY_ORDER | - HA_LONGLONG_KEYS | HA_NULL_KEY | HA_HAVE_KEY_READ_ONLY | + HA_NULL_KEY | HA_HAVE_KEY_READ_ONLY | HA_BLOB_KEY | HA_NOT_EXACT_COUNT | HA_PRIMARY_KEY_IN_READ_INDEX | HA_DROP_BEFORE_CREATE | HA_AUTO_PART_KEY), @@ -123,6 +123,7 @@ class ha_berkeley: public handler uint key_len, enum ha_rkey_function find_flag); int index_read_idx(byte * buf, uint index, const byte * key, uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); int index_next(byte * buf); int index_next_same(byte * buf, const byte *key, uint keylen); int index_prev(byte * buf); diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc index 079fba05f0a..518a9c38d82 100644 --- a/sql/ha_heap.cc +++ b/sql/ha_heap.cc @@ -57,6 +57,7 @@ int ha_heap::open(const char *name, int mode, uint test_if_locked) for (part=0 ; part < pos->key_parts ; part++) { uint flag=pos->key_part[part].key_type; + Field *field=pos->key_part[part].field; if (!f_is_packed(flag) && f_packtype(flag) == (int) FIELD_TYPE_DECIMAL && !(flag & FIELDFLAG_BINARY)) @@ -65,7 +66,17 @@ int ha_heap::open(const char *name, int mode, uint test_if_locked) seg->type= (int) HA_KEYTYPE_BINARY; seg->start=(uint) pos->key_part[part].offset; seg->length=(uint) pos->key_part[part].length; - seg++; + if (field->null_ptr) + { + seg->null_bit=field->null_bit; + seg->null_pos= (uint) (field->null_ptr- + (uchar*) table->record[0]); + } + else + { + seg->null_bit=0; + seg->null_pos=0; + } } } mem_per_row += MY_ALIGN(table->reclength+1, sizeof(char*)); diff --git a/sql/ha_heap.h b/sql/ha_heap.h index fa077cef60a..c8f29dea53c 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -36,7 +36,7 @@ class ha_heap: public handler ulong option_flag() const { return (HA_READ_RND_SAME | HA_NO_INDEX | HA_ONLY_WHOLE_INDEX | HA_WRONG_ASCII_ORDER | HA_KEYPOS_TO_RNDPOS | HA_NO_BLOBS | - HA_REC_NOT_IN_SEQ); } + HA_NULL_KEY | HA_REC_NOT_IN_SEQ | HA_NOT_READ_PREFIX_LAST); } uint max_record_length() const { return HA_MAX_REC_LENGTH; } uint max_keys() const { return MAX_KEY; } uint max_key_parts() const { return MAX_REF_PARTS; } diff --git a/sql/ha_innobase.cc b/sql/ha_innobase.cc deleted file mode 100644 index 683e76a19ad..00000000000 --- a/sql/ha_innobase.cc +++ /dev/null @@ -1,3402 +0,0 @@ -/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & InnoDB Oy - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* This file defines the InnoDB handler: the interface between MySQL and -InnoDB */ - -/* TODO list for the InnoDB handler: - - Ask Monty if strings of different languages can exist in the same - database. Answer: in near future yes, but not yet. -*/ - -#ifdef __GNUC__ -#pragma implementation // gcc: Class implementation -#endif - -#include "mysql_priv.h" -#ifdef HAVE_INNOBASE_DB -#include -#include -#include -#include - -#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1)) - -#include "ha_innobase.h" - -/* We must declare this here because we undef SAFE_MUTEX below */ -pthread_mutex_t innobase_mutex; - -/* Store MySQL definition of 'byte': in Linux it is char while InnoDB -uses unsigned char */ -typedef byte mysql_byte; - -#ifdef SAFE_MUTEX -#undef pthread_mutex_t -#endif - -#define INSIDE_HA_INNOBASE_CC - -/* Include necessary InnoDB headers */ -extern "C" { -#include "../innobase/include/univ.i" -#include "../innobase/include/srv0start.h" -#include "../innobase/include/srv0srv.h" -#include "../innobase/include/trx0roll.h" -#include "../innobase/include/trx0trx.h" -#include "../innobase/include/row0ins.h" -#include "../innobase/include/row0mysql.h" -#include "../innobase/include/row0sel.h" -#include "../innobase/include/row0upd.h" -#include "../innobase/include/log0log.h" -#include "../innobase/include/lock0lock.h" -#include "../innobase/include/dict0crea.h" -#include "../innobase/include/btr0cur.h" -#include "../innobase/include/btr0btr.h" -#include "../innobase/include/fsp0fsp.h" -} - -#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */ -#define HA_INNOBASE_RANGE_COUNT 100 - -bool innodb_skip = 0; -uint innobase_init_flags = 0; -ulong innobase_cache_size = 0; - -long innobase_mirrored_log_groups, innobase_log_files_in_group, - innobase_log_file_size, innobase_log_buffer_size, - innobase_buffer_pool_size, innobase_additional_mem_pool_size, - innobase_file_io_threads, innobase_lock_wait_timeout, - innobase_thread_concurrency, innobase_force_recovery; - -char *innobase_data_home_dir; -char *innobase_log_group_home_dir, *innobase_log_arch_dir; -char *innobase_unix_file_flush_method; -bool innobase_flush_log_at_trx_commit, innobase_log_archive, - innobase_use_native_aio, innobase_fast_shutdown; - -/* - Set default InnoDB size to 64M, to let users use InnoDB without having - to specify any startup options. -*/ - -char *innobase_data_file_path= (char*) "ibdata1:64M"; -char *internal_innobase_data_file_path=0; - -/* The following counter is used to convey information to InnoDB -about server activity: in selects it is not sensible to call -srv_active_wake_master_thread after each fetch or search, we only do -it every INNOBASE_WAKE_INTERVAL'th step. */ - -#define INNOBASE_WAKE_INTERVAL 32 -ulong innobase_active_counter = 0; - -char* innobase_home = NULL; - -char innodb_dummy_stmt_trx_handle = 'D'; - -static HASH innobase_open_tables; - -static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length, - my_bool not_used __attribute__((unused))); -static INNOBASE_SHARE *get_share(const char *table_name); -static void free_share(INNOBASE_SHARE *share); -static void innobase_print_error(const char* db_errpfx, char* buffer); - -/* General functions */ - -/************************************************************************ -Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth -time calls srv_active_wake_master_thread. This function should be used -when a single database operation may introduce a small need for -server utility activity, like checkpointing. */ -inline -void -innobase_active_small(void) -/*=======================*/ -{ - innobase_active_counter++; - - if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) { - srv_active_wake_master_thread(); - } -} - -/************************************************************************ -Converts an InnoDB error code to a MySQL error code. */ -static -int -convert_error_code_to_mysql( -/*========================*/ - /* out: MySQL error code */ - int error) /* in: InnoDB error code */ -{ - if (error == DB_SUCCESS) { - - return(0); - - } else if (error == (int) DB_DUPLICATE_KEY) { - - return(HA_ERR_FOUND_DUPP_KEY); - - } else if (error == (int) DB_RECORD_NOT_FOUND) { - - return(HA_ERR_NO_ACTIVE_RECORD); - - } else if (error == (int) DB_ERROR) { - - return(HA_ERR_NO_ACTIVE_RECORD); - - } else if (error == (int) DB_DEADLOCK) { - - return(HA_ERR_LOCK_DEADLOCK); - - } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) { - - return(HA_ERR_LOCK_WAIT_TIMEOUT); - - } else if (error == (int) DB_NO_REFERENCED_ROW) { - - return(HA_ERR_NO_REFERENCED_ROW); - - } else if (error == (int) DB_ROW_IS_REFERENCED) { - - return(HA_ERR_ROW_IS_REFERENCED); - - } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) { - - return(HA_ERR_CANNOT_ADD_FOREIGN); - - } else if (error == (int) DB_OUT_OF_FILE_SPACE) { - - return(HA_ERR_RECORD_FILE_FULL); - - } else if (error == (int) DB_TABLE_IS_BEING_USED) { - - return(HA_ERR_WRONG_COMMAND); - - } else if (error == (int) DB_TABLE_NOT_FOUND) { - - return(HA_ERR_KEY_NOT_FOUND); - - } else if (error == (int) DB_TOO_BIG_RECORD) { - - return(HA_ERR_TO_BIG_ROW); - } else { - DBUG_ASSERT(0); - - return(-1); // Unknown error - } -} - -extern "C" { -/***************************************************************** -Prints info of a THD object (== user session thread) to the -standatd output. NOTE that mysql/innobase/trx/trx0trx.c must contain -the prototype for this function! */ - -void -innobase_mysql_print_thd( -/*=====================*/ - void* input_thd)/* in: pointer to a MySQL THD object */ -{ - THD* thd; - - thd = (THD*) input_thd; - - printf("MySQL thread id %lu, query id %lu", - thd->thread_id, thd->query_id); - if (thd->host) { - printf(" %s", thd->host); - } - - if (thd->ip) { - printf(" %s", thd->ip); - } - - if (thd->user) { - printf(" %s", thd->user); - } - - if (thd->proc_info) { - printf(" %s", thd->proc_info); - } - - if (thd->query) { - printf("\n%-.100s", thd->query); - } - - printf("\n"); -} -} - -/************************************************************************* -Gets the InnoDB transaction handle for a MySQL handler object, creates -an InnoDB transaction struct if the corresponding MySQL thread struct still -lacks one. */ -static -trx_t* -check_trx_exists( -/*=============*/ - /* out: InnoDB transaction handle */ - THD* thd) /* in: user thread handle */ -{ - trx_t* trx; - - trx = (trx_t*) thd->transaction.all.innobase_tid; - - if (trx == NULL) { - DBUG_ASSERT(thd != NULL); - trx = trx_allocate_for_mysql(); - - trx->mysql_thd = thd; - - thd->transaction.all.innobase_tid = trx; - - /* The execution of a single SQL statement is denoted by - a 'transaction' handle which is a dummy pointer: InnoDB - remembers internally where the latest SQL statement - started, and if error handling requires rolling back the - latest statement, InnoDB does a rollback to a savepoint. */ - - thd->transaction.stmt.innobase_tid = - (void*)&innodb_dummy_stmt_trx_handle; - } - - return(trx); -} - -/************************************************************************* -Updates the user_thd field in a handle and also allocates a new InnoDB -transaction handle if needed, and updates the transaction fields in the -prebuilt struct. */ -inline -int -ha_innobase::update_thd( -/*====================*/ - /* out: 0 or error code */ - THD* thd) /* in: thd to use the handle */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - trx_t* trx; - - trx = check_trx_exists(thd); - - if (prebuilt->trx != trx) { - - row_update_prebuilt_trx(prebuilt, trx); - } - - user_thd = thd; - - return(0); -} - -/************************************************************************* -Reads the data files and their sizes from a character string given in -the .cnf file. */ -static -bool -innobase_parse_data_file_paths_and_sizes(void) -/*==========================================*/ - /* out: TRUE if ok, FALSE if parsing - error */ -{ - char* str; - char* endp; - char* path; - ulint size; - ulint i = 0; - - str = internal_innobase_data_file_path; - - /* First calculate the number of data files and check syntax: - path:size[M];path:size[M]... . Note that a Windows path may - contain a drive name and a ':'. */ - - while (*str != '\0') { - path = str; - - while ((*str != ':' && *str != '\0') - || (*str == ':' - && (*(str + 1) == '\\' || *(str + 1) == '/'))) { - str++; - } - - if (*str == '\0') { - return(FALSE); - } - - str++; - - size = strtoul(str, &endp, 10); - - str = endp; - - if ((*str != 'M') && (*str != 'G')) { - size = size / (1024 * 1024); - } else if (*str == 'G') { - size = size * 1024; - str++; - } else { - str++; - } - - if (strlen(str) >= 6 - && *str == 'n' - && *(str + 1) == 'e' - && *(str + 2) == 'w') { - str += 3; - } - - if (strlen(str) >= 3 - && *str == 'r' - && *(str + 1) == 'a' - && *(str + 2) == 'w') { - str += 3; - } - - if (size == 0) { - return(FALSE); - } - - i++; - - if (*str == ';') { - str++; - } else if (*str != '\0') { - - return(FALSE); - } - } - - srv_data_file_names = (char**)ut_malloc(i * sizeof(void*)); - srv_data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint)); - srv_data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint)); - - srv_n_data_files = i; - - /* Then store the actual values to our arrays */ - - str = internal_innobase_data_file_path; - i = 0; - - while (*str != '\0') { - path = str; - - /* Note that we must ignore the ':' in a Windows path */ - - while ((*str != ':' && *str != '\0') - || (*str == ':' - && (*(str + 1) == '\\' || *(str + 1) == '/'))) { - str++; - } - - if (*str == ':') { - /* Make path a null-terminated string */ - *str = '\0'; - str++; - } - - size = strtoul(str, &endp, 10); - - str = endp; - - if ((*str != 'M') && (*str != 'G')) { - size = size / (1024 * 1024); - } else if (*str == 'G') { - size = size * 1024; - str++; - } else { - str++; - } - - srv_data_file_is_raw_partition[i] = 0; - - if (strlen(str) >= 6 - && *str == 'n' - && *(str + 1) == 'e' - && *(str + 2) == 'w') { - str += 3; - srv_data_file_is_raw_partition[i] = SRV_NEW_RAW; - } - - if (strlen(str) >= 3 - && *str == 'r' - && *(str + 1) == 'a' - && *(str + 2) == 'w') { - str += 3; - - if (srv_data_file_is_raw_partition[i] == 0) { - srv_data_file_is_raw_partition[i] = SRV_OLD_RAW; - } - } - - srv_data_file_names[i] = path; - srv_data_file_sizes[i] = size; - - i++; - - if (*str == ';') { - str++; - } - } - - return(TRUE); -} - -/************************************************************************* -Reads log group home directories from a character string given in -the .cnf file. */ -static -bool -innobase_parse_log_group_home_dirs(void) -/*====================================*/ - /* out: TRUE if ok, FALSE if parsing - error */ -{ - char* str; - char* path; - ulint i = 0; - - str = innobase_log_group_home_dir; - - /* First calculate the number of directories and check syntax: - path;path;... */ - - while (*str != '\0') { - path = str; - - while (*str != ';' && *str != '\0') { - str++; - } - - i++; - - if (*str == ';') { - str++; - } else if (*str != '\0') { - - return(FALSE); - } - } - - if (i != (ulint) innobase_mirrored_log_groups) { - - return(FALSE); - } - - srv_log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*)); - - /* Then store the actual values to our array */ - - str = innobase_log_group_home_dir; - i = 0; - - while (*str != '\0') { - path = str; - - while (*str != ';' && *str != '\0') { - str++; - } - - if (*str == ';') { - *str = '\0'; - str++; - } - - srv_log_group_home_dirs[i] = path; - - i++; - } - - return(TRUE); -} - -/************************************************************************* -Opens an InnoDB database. */ - -bool -innobase_init(void) -/*===============*/ - /* out: TRUE if error */ -{ - int err; - bool ret; - char current_lib[3], *default_path; - - DBUG_ENTER("innobase_init"); - - /* - When using the embedded server, the datadirectory is not - in the current directory. - */ - if (mysql_embedded) - default_path=mysql_real_data_home; - else - { - /* It's better to use current lib, to keep path's short */ - current_lib[0]=FN_CURLIB; - current_lib[1]=FN_LIBCHAR; - current_lib[2]=0; - default_path=current_lib; - } - - if (specialflag & SPECIAL_NO_PRIOR) { - srv_set_thread_priorities = FALSE; - } else { - srv_set_thread_priorities = TRUE; - srv_query_thread_priority = QUERY_PRIOR; - } - - /* - Set InnoDB initialization parameters according to the values - read from MySQL .cnf file - */ - - // Make a copy of innobase_data_file_path to not modify the original - internal_innobase_data_file_path=my_strdup(innobase_data_file_path, - MYF(MY_WME)); - - srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir : - default_path); - srv_logs_home = (char*) ""; - srv_arch_dir = (innobase_log_arch_dir ? innobase_log_arch_dir : - default_path); - - ret = innobase_parse_data_file_paths_and_sizes(); - - if (ret == FALSE) { - sql_print_error("InnoDB: syntax error in innodb_data_file_path"); - DBUG_RETURN(TRUE); - } - - if (!innobase_log_group_home_dir) - innobase_log_group_home_dir= default_path; - ret = innobase_parse_log_group_home_dirs(); - - if (ret == FALSE) { - DBUG_RETURN(TRUE); - } - - srv_unix_file_flush_method_str = (innobase_unix_file_flush_method ? - innobase_unix_file_flush_method : - (char*)"fdatasync"); - - srv_n_log_groups = (ulint) innobase_mirrored_log_groups; - srv_n_log_files = (ulint) innobase_log_files_in_group; - srv_log_file_size = (ulint) innobase_log_file_size; - - srv_log_archive_on = (ulint) innobase_log_archive; - srv_log_buffer_size = (ulint) innobase_log_buffer_size; - srv_flush_log_at_trx_commit = (ibool) innobase_flush_log_at_trx_commit; - - srv_use_native_aio = 0; - - srv_pool_size = (ulint) innobase_buffer_pool_size; - srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; - - srv_n_file_io_threads = (ulint) innobase_file_io_threads; - - srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; - srv_thread_concurrency = (ulint) innobase_thread_concurrency; - srv_force_recovery = (ulint) innobase_force_recovery; - - srv_fast_shutdown = (ibool) innobase_fast_shutdown; - - srv_print_verbose_log = mysql_embedded ? 0 : 1; - if (strcmp(default_charset_info->name, "latin1") == 0) { - /* Store the character ordering table to InnoDB. - For non-latin1 charsets we use the MySQL comparison - functions, and consequently we do not need to know - the ordering internally in InnoDB. */ - - memcpy(srv_latin1_ordering, - default_charset_info->sort_order, 256); - } - - err = innobase_start_or_create_for_mysql(); - - if (err != DB_SUCCESS) { - - DBUG_RETURN(1); - } - (void) hash_init(&innobase_open_tables,32,0,0, - (hash_get_key) innobase_get_key,0,0); - pthread_mutex_init(&innobase_mutex,MY_MUTEX_INIT_FAST); - DBUG_RETURN(0); -} - -/*********************************************************************** -Closes an InnoDB database. */ - -bool -innobase_end(void) -/*==============*/ - /* out: TRUE if error */ -{ - int err; - - DBUG_ENTER("innobase_end"); - - err = innobase_shutdown_for_mysql(); - hash_free(&innobase_open_tables); - my_free(internal_innobase_data_file_path,MYF(MY_ALLOW_ZERO_PTR)); - - if (err != DB_SUCCESS) { - - DBUG_RETURN(1); - } - - DBUG_RETURN(0); -} - -/******************************************************************** -Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit -flushes logs, and the name of this function should be innobase_checkpoint. */ - -bool -innobase_flush_logs(void) -/*=====================*/ - /* out: TRUE if error */ -{ - bool result = 0; - - DBUG_ENTER("innobase_flush_logs"); - - log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); - - DBUG_RETURN(result); -} - -/************************************************************************* -Gets the free space in an InnoDB database: returned in units of kB. */ - -uint -innobase_get_free_space(void) -/*=========================*/ - /* out: free space in kB */ -{ - return((uint) fsp_get_available_space_in_free_extents(0)); -} - -/********************************************************************* -Commits a transaction in an InnoDB database. */ - -int -innobase_commit( -/*============*/ - /* out: 0 or error number */ - THD* thd, /* in: MySQL thread handle of the user for whom - the transaction should be committed */ - void* trx_handle)/* in: InnoDB trx handle or NULL: NULL means - that the current SQL statement ended, and we should - mark the start of a new statement with a savepoint */ -{ - int error = 0; - trx_t* trx; - - DBUG_ENTER("innobase_commit"); - DBUG_PRINT("trans", ("ending transaction")); - - trx = check_trx_exists(thd); - - if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { - srv_conc_enter_innodb(trx); - - trx_commit_for_mysql(trx); - - srv_conc_exit_innodb(); - } - - trx_mark_sql_stat_end(trx); - -#ifndef DBUG_OFF - if (error) { - DBUG_PRINT("error", ("error: %d", error)); - } -#endif - /* Tell InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - DBUG_RETURN(error); -} - -/********************************************************************* -Rolls back a transaction in an InnoDB database. */ - -int -innobase_rollback( -/*==============*/ - /* out: 0 or error number */ - THD* thd, /* in: handle to the MySQL thread of the user - whose transaction should be rolled back */ - void* trx_handle)/* in: InnoDB trx handle or a dummy stmt handle */ -{ - int error = 0; - trx_t* trx; - - DBUG_ENTER("innobase_rollback"); - DBUG_PRINT("trans", ("aborting transaction")); - - trx = check_trx_exists(thd); - - srv_conc_enter_innodb(trx); - - if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { - error = trx_rollback_for_mysql(trx); - } else { - error = trx_rollback_last_sql_stat_for_mysql(trx); - } - - srv_conc_exit_innodb(); - - trx_mark_sql_stat_end(trx); - - DBUG_RETURN(convert_error_code_to_mysql(error)); -} - -/********************************************************************* -Frees a possible InnoDB trx object associated with the current -THD. */ - -int -innobase_close_connection( -/*======================*/ - /* out: 0 or error number */ - THD* thd) /* in: handle to the MySQL thread of the user - whose transaction should be rolled back */ -{ - if (NULL != thd->transaction.all.innobase_tid) { - trx_rollback_for_mysql((trx_t*) - (thd->transaction.all.innobase_tid)); - trx_free_for_mysql((trx_t*) - (thd->transaction.all.innobase_tid)); - } - - return(0); -} - -/********************************************************************** -Prints an error message. */ -static -void -innobase_print_error( -/*=================*/ - const char* db_errpfx, /* in: error prefix text */ - char* buffer) /* in: error text */ -{ - sql_print_error("%s: %s", db_errpfx, buffer); -} - - -/***************************************************************************** -** InnoDB database tables -*****************************************************************************/ - -/******************************************************************** -This function is not relevant since we store the tables and indexes -into our own tablespace, not as files, whose extension this function would -give. */ - -const char** -ha_innobase::bas_ext() const -/*========================*/ - /* out: file extension strings, currently not - used */ -{ - static const char* ext[] = {".InnoDB", NullS}; - - return(ext); -} - -/********************************************************************* -Normalizes a table name string. A normalized name consists of the -database name catenated to '/' and table name. An example: -test/mytable. On Windows normalization puts both the database name and the -table name always to lower case. */ -static -void -normalize_table_name( -/*=================*/ - char* norm_name, /* out: normalized name as a - null-terminated string */ - const char* name) /* in: table name string */ -{ - char* name_ptr; - char* db_ptr; - char* ptr; - - /* Scan name from the end */ - - ptr = strend(name)-1; - - while (ptr >= name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - - name_ptr = ptr + 1; - - DBUG_ASSERT(ptr > name); - - ptr--; - - while (ptr >= name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - - db_ptr = ptr + 1; - - memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name)); - - norm_name[name_ptr - db_ptr - 1] = '/'; - -#ifdef __WIN__ - /* Put to lower case */ - - ptr = norm_name; - - while (*ptr != '\0') { - *ptr = tolower(*ptr); - ptr++; - } -#endif -} - -/********************************************************************* -Creates and opens a handle to a table which already exists in an Innobase -database. */ - -int -ha_innobase::open( -/*==============*/ - /* out: 1 if error, 0 if success */ - const char* name, /* in: table name */ - int mode, /* in: not used */ - uint test_if_locked) /* in: not used */ -{ - dict_table_t* ib_table; - int error = 0; - uint buff_len; - char norm_name[1000]; - - DBUG_ENTER("ha_innobase::open"); - - UT_NOT_USED(mode); - UT_NOT_USED(test_if_locked); - - normalize_table_name(norm_name, name); - - user_thd = NULL; - - last_query_id = (ulong)-1; - - if (!(share=get_share(name))) - DBUG_RETURN(1); - - /* Create buffers for packing the fields of a record. Why - table->reclength did not work here? Obviously, because char - fields when packed actually became 1 byte longer, when we also - stored the string length as the first byte. */ - - buff_len = table->reclength + table->max_key_length - + MAX_REF_PARTS * 3; - if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME), - &upd_buff, buff_len, - &key_val_buff, buff_len, - NullS)) { - free_share(share); - DBUG_RETURN(1); - } - - /* Get pointer to a table object in InnoDB dictionary cache */ - - if (NULL == (ib_table = dict_table_get(norm_name, NULL))) { - - sql_print_error("InnoDB error:\n\ -Cannot find table %s from the internal data dictionary\n\ -of InnoDB though the .frm file for the table exists. Maybe you\n\ -have deleted and recreated InnoDB data files but have forgotten\n\ -to delete the corresponding .frm files of InnoDB tables, or you\n\ -have moved .frm files to another database?", - norm_name); - - free_share(share); - my_free((char*) upd_buff, MYF(0)); - my_errno = ENOENT; - DBUG_RETURN(1); - } - - innobase_prebuilt = row_create_prebuilt(ib_table); - - ((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len = table->reclength; - - primary_key = MAX_KEY; - - if (!row_table_got_default_clust_index(ib_table)) { - - /* If we automatically created the clustered index, - then MySQL does not know about it and it must not be aware - of the index used on scan, to avoid checking if we update - the column of the index. The column is the row id in - the automatical case, and it will not be updated. */ - - ((row_prebuilt_t*)innobase_prebuilt) - ->clust_index_was_generated = FALSE; - - primary_key = 0; - key_used_on_scan = 0; - - /* MySQL allocates the buffer for ref */ - - ref_length = table->key_info->key_length - + table->key_info->key_parts + 10; - - /* One byte per key field is consumed to the SQL NULL - info of the field; we add also 10 bytes of safety margin */ - } else { - ((row_prebuilt_t*)innobase_prebuilt) - ->clust_index_was_generated = TRUE; - - ref_length = DATA_ROW_ID_LEN + 10; - - DBUG_ASSERT(key_used_on_scan == MAX_KEY); - } - - auto_inc_counter_for_this_stat = 0; - - /* Init table lock structure */ - thr_lock_data_init(&share->lock,&lock,(void*) 0); - - info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); - - DBUG_RETURN(0); -} - -/********************************************************************* -Does nothing. */ - -void -ha_innobase::initialize(void) -/*=========================*/ -{ -} - -/********************************************************************** -Closes a handle to an InnoDB table. */ - -int -ha_innobase::close(void) -/*====================*/ - /* out: error number */ -{ - DBUG_ENTER("ha_innobase::close"); - - row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt); - - my_free((char*) upd_buff, MYF(0)); - free_share(share); - - /* Tell InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - DBUG_RETURN(0); -} - -/* The following accessor functions should really be inside MySQL code! */ - -/****************************************************************** -Gets field offset for a field in a table. */ -inline -uint -get_field_offset( -/*=============*/ - /* out: offset */ - TABLE* table, /* in: MySQL table object */ - Field* field) /* in: MySQL field object */ -{ - return((uint) (field->ptr - (char*) table->record[0])); -} - -/****************************************************************** -Checks if a field in a record is SQL NULL. Uses the record format -information in table to track the null bit in record. */ -inline -uint -field_in_record_is_null( -/*====================*/ - /* out: 1 if NULL, 0 otherwise */ - TABLE* table, /* in: MySQL table object */ - Field* field, /* in: MySQL field object */ - char* record) /* in: a row in MySQL format */ -{ - int null_offset; - - if (!field->null_ptr) { - - return(0); - } - - null_offset = (uint) ((char*) field->null_ptr - - (char*) table->record[0]); - - if (record[null_offset] & field->null_bit) { - - return(1); - } - - return(0); -} - -/****************************************************************** -Sets a field in a record to SQL NULL. Uses the record format -information in table to track the null bit in record. */ -inline -void -set_field_in_record_to_null( -/*========================*/ - TABLE* table, /* in: MySQL table object */ - Field* field, /* in: MySQL field object */ - char* record) /* in: a row in MySQL format */ -{ - int null_offset; - - null_offset = (uint) ((char*) field->null_ptr - - (char*) table->record[0]); - - record[null_offset] = record[null_offset] | field->null_bit; -} - -/****************************************************************** -Resets SQL NULL bits in a record to zero. */ -inline -void -reset_null_bits( -/*============*/ - TABLE* table, /* in: MySQL table object */ - char* record) /* in: a row in MySQL format */ -{ - bzero(record, table->null_bytes); -} - -extern "C" { -/***************************************************************** -InnoDB uses this function is to compare two data fields for which the -data type is such that we must use MySQL code to compare them. NOTE that the -prototype of this function is in rem0cmp.c in InnoDB source code! -If you change this function, remember to update the prototype there! */ - -int -innobase_mysql_cmp( -/*===============*/ - /* out: 1, 0, -1, if a is greater, - equal, less than b, respectively */ - int mysql_type, /* in: MySQL type */ - unsigned char* a, /* in: data field */ - unsigned int a_length, /* in: data field length, - not UNIV_SQL_NULL */ - unsigned char* b, /* in: data field */ - unsigned int b_length) /* in: data field length, - not UNIV_SQL_NULL */ -{ - enum_field_types mysql_tp; - int ret; - - DBUG_ASSERT(a_length != UNIV_SQL_NULL); - DBUG_ASSERT(b_length != UNIV_SQL_NULL); - - mysql_tp = (enum_field_types) mysql_type; - - switch (mysql_tp) { - - case FIELD_TYPE_STRING: - case FIELD_TYPE_VAR_STRING: - ret = my_sortncmp((const char*) a, a_length, - (const char*) b, b_length); - if (ret < 0) { - return(-1); - } else if (ret > 0) { - return(1); - } else { - return(0); - } - default: - assert(0); - } - - return(0); -} -} - -/****************************************************************** -Converts a MySQL type to an InnoDB type. */ -inline -ulint -get_innobase_type_from_mysql_type( -/*==============================*/ - /* out: DATA_BINARY, DATA_VARCHAR, ... */ - Field* field) /* in: MySQL field */ -{ - /* The following asserts check that MySQL type code fits in - 8 bits: this is used in ibuf and also when DATA_NOT_NULL is - ORed to the type */ - - DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256); - DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256); - DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256); - DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256); - DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256); - - switch (field->type()) { - case FIELD_TYPE_VAR_STRING: if (field->flags & BINARY_FLAG) { - - return(DATA_BINARY); - } else if (strcmp( - default_charset_info->name, - "latin1") == 0) { - return(DATA_VARCHAR); - } else { - return(DATA_VARMYSQL); - } - case FIELD_TYPE_STRING: if (field->flags & BINARY_FLAG) { - - return(DATA_FIXBINARY); - } else if (strcmp( - default_charset_info->name, - "latin1") == 0) { - return(DATA_CHAR); - } else { - return(DATA_MYSQL); - } - case FIELD_TYPE_LONG: - case FIELD_TYPE_LONGLONG: - case FIELD_TYPE_TINY: - case FIELD_TYPE_SHORT: - case FIELD_TYPE_INT24: - case FIELD_TYPE_DATE: - case FIELD_TYPE_DATETIME: - case FIELD_TYPE_YEAR: - case FIELD_TYPE_NEWDATE: - case FIELD_TYPE_ENUM: - case FIELD_TYPE_SET: - case FIELD_TYPE_TIME: - case FIELD_TYPE_TIMESTAMP: - return(DATA_INT); - case FIELD_TYPE_FLOAT: - return(DATA_FLOAT); - case FIELD_TYPE_DOUBLE: - return(DATA_DOUBLE); - case FIELD_TYPE_DECIMAL: - return(DATA_DECIMAL); - case FIELD_TYPE_TINY_BLOB: - case FIELD_TYPE_MEDIUM_BLOB: - case FIELD_TYPE_BLOB: - case FIELD_TYPE_LONG_BLOB: - return(DATA_BLOB); - default: - assert(0); - } - - return(0); -} - -/*********************************************************************** -Stores a key value for a row to a buffer. */ - -uint -ha_innobase::store_key_val_for_row( -/*===============================*/ - /* out: key value length as stored in buff */ - uint keynr, /* in: key number */ - char* buff, /* in/out: buffer for the key value (in MySQL - format) */ - const mysql_byte* record)/* in: row in MySQL format */ -{ - KEY* key_info = table->key_info + keynr; - KEY_PART_INFO* key_part = key_info->key_part; - KEY_PART_INFO* end = key_part + key_info->key_parts; - char* buff_start = buff; - - DBUG_ENTER("store_key_val_for_row"); - - for (; key_part != end; key_part++) { - - if (key_part->null_bit) { - /* Store 0 if the key part is a NULL part */ - - if (record[key_part->null_offset] - & key_part->null_bit) { - *buff++ = 1; - continue; - } - - *buff++ = 0; - } - - memcpy(buff, record + key_part->offset, key_part->length); - buff += key_part->length; - } - - DBUG_RETURN(buff - buff_start); -} - -/****************************************************************** -Builds a template to the prebuilt struct. */ -static -void -build_template( -/*===========*/ - row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - THD* thd, /* in: current user thread, used - only if templ_type is - ROW_MYSQL_REC_FIELDS */ - TABLE* table, /* in: MySQL table */ - ulint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or - ROW_MYSQL_REC_FIELDS */ -{ - dict_index_t* index; - dict_index_t* clust_index; - mysql_row_templ_t* templ; - Field* field; - ulint n_fields; - ulint n_requested_fields = 0; - ibool fetch_all_in_key = FALSE; - ulint i; - - clust_index = dict_table_get_first_index_noninline(prebuilt->table); - - if (!prebuilt->in_update_remember_pos) { - if (prebuilt->read_just_key) { - fetch_all_in_key = TRUE; - } else { - /* We are building a temporary table: fetch all - columns */ - - templ_type = ROW_MYSQL_WHOLE_ROW; - } - } - - if (prebuilt->select_lock_type == LOCK_X) { - /* TODO: should fix the code in sql_update so that we could do - with fetching only the needed columns */ - - templ_type = ROW_MYSQL_WHOLE_ROW; - } - - if (templ_type == ROW_MYSQL_REC_FIELDS) { - - if (prebuilt->select_lock_type != LOCK_NONE) { - /* Let index be the clustered index */ - - index = clust_index; - } else { - index = prebuilt->index; - } - } else { - index = clust_index; - } - - if (index == clust_index) { - prebuilt->need_to_access_clustered = TRUE; - } else { - prebuilt->need_to_access_clustered = FALSE; - /* Below we check column by column if we need to access - the clustered index */ - } - - n_fields = (ulint)table->fields; - - if (!prebuilt->mysql_template) { - prebuilt->mysql_template = (mysql_row_templ_t*) - mem_alloc_noninline( - n_fields * sizeof(mysql_row_templ_t)); - } - - prebuilt->template_type = templ_type; - prebuilt->null_bitmap_len = table->null_bytes; - - prebuilt->templ_contains_blob = FALSE; - - for (i = 0; i < n_fields; i++) { - templ = prebuilt->mysql_template + n_requested_fields; - field = table->field[i]; - - if (templ_type == ROW_MYSQL_REC_FIELDS - && !(fetch_all_in_key && - ULINT_UNDEFINED != dict_index_get_nth_col_pos( - index, i)) - && thd->query_id != field->query_id - && thd->query_id != (field->query_id ^ MAX_ULONG_BIT) - && thd->query_id != - (field->query_id ^ (MAX_ULONG_BIT >> 1))) { - - /* This field is not needed in the query, skip it */ - - goto skip_field; - } - - n_requested_fields++; - - templ->col_no = i; - - if (index == clust_index) { - templ->rec_field_no = (index->table->cols + i) - ->clust_pos; - } else { - templ->rec_field_no = dict_index_get_nth_col_pos( - index, i); - } - - if (templ->rec_field_no == ULINT_UNDEFINED) { - prebuilt->need_to_access_clustered = TRUE; - } - - if (field->null_ptr) { - templ->mysql_null_byte_offset = - (ulint) ((char*) field->null_ptr - - (char*) table->record[0]); - - templ->mysql_null_bit_mask = (ulint) field->null_bit; - } else { - templ->mysql_null_bit_mask = 0; - } - - templ->mysql_col_offset = (ulint) - get_field_offset(table, field); - - templ->mysql_col_len = (ulint) field->pack_length(); - templ->type = get_innobase_type_from_mysql_type(field); - templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG); - - if (templ->type == DATA_BLOB) { - prebuilt->templ_contains_blob = TRUE; - } -skip_field: - ; - } - - prebuilt->n_template = n_requested_fields; - - if (prebuilt->need_to_access_clustered) { - /* Change rec_field_no's to correspond to the clustered index - record */ - for (i = 0; i < n_requested_fields; i++) { - templ = prebuilt->mysql_template + i; - - templ->rec_field_no = - (index->table->cols + templ->col_no)->clust_pos; - } - } - - if (templ_type == ROW_MYSQL_REC_FIELDS - && prebuilt->select_lock_type != LOCK_NONE) { - - prebuilt->need_to_access_clustered = TRUE; - } -} - -/************************************************************************ -Stores a row in an InnoDB database, to the table specified in this -handle. */ - -int -ha_innobase::write_row( -/*===================*/ - /* out: error code */ - mysql_byte* record) /* in: a row in MySQL format */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; - int error; - longlong auto_inc; - - DBUG_ENTER("ha_innobase::write_row"); - - statistic_increment(ha_write_count, &LOCK_status); - - if (table->time_stamp) { - update_timestamp(record + table->time_stamp - 1); - } - - if (last_query_id != user_thd->query_id) { - prebuilt->sql_stat_start = TRUE; - last_query_id = user_thd->query_id; - } - - if (table->next_number_field && record == table->record[0]) { - - /* Fetch the value the user possibly has set in the - autoincrement field */ - - auto_inc = table->next_number_field->val_int(); - - /* In replication and also otherwise the auto-inc column - can be set with SET INSERT_ID. Then we must look at - user_thd->next_insert_id. If it is nonzero and the user - has not supplied a value, we must use it, and use values - incremented by 1 in all subsequent inserts within the - same SQL statement! */ - - if (auto_inc == 0 && user_thd->next_insert_id != 0) { - auto_inc = user_thd->next_insert_id; - auto_inc_counter_for_this_stat = auto_inc; - } - - if (auto_inc == 0 && auto_inc_counter_for_this_stat) { - /* The user set the auto-inc counter for - this SQL statement with SET INSERT_ID. We must - assign sequential values from the counter. */ - - auto_inc_counter_for_this_stat++; - - auto_inc = auto_inc_counter_for_this_stat; - - /* We give MySQL a new value to place in the - auto-inc column */ - user_thd->next_insert_id = auto_inc; - } - - if (auto_inc != 0) { - /* This call will calculate the max of the - current value and the value supplied by the user, if - the auto_inc counter is already initialized - for the table */ - - /* We have to use the transactional lock mechanism - on the auto-inc counter of the table to ensure - that replication and roll-forward of the binlog - exactly imitates also the given auto-inc values. - The lock is released at each SQL statement's - end. */ - - srv_conc_enter_innodb(prebuilt->trx); - error = row_lock_table_autoinc_for_mysql(prebuilt); - srv_conc_exit_innodb(); - - if (error != DB_SUCCESS) { - - error = convert_error_code_to_mysql(error); - goto func_exit; - } - - dict_table_autoinc_update(prebuilt->table, auto_inc); - } else { - srv_conc_enter_innodb(prebuilt->trx); - - if (!prebuilt->trx->auto_inc_lock) { - - error = row_lock_table_autoinc_for_mysql( - prebuilt); - if (error != DB_SUCCESS) { - srv_conc_exit_innodb(); - - error = convert_error_code_to_mysql( - error); - goto func_exit; - } - } - - auto_inc = dict_table_autoinc_get(prebuilt->table); - srv_conc_exit_innodb(); - - /* If auto_inc is now != 0 the autoinc counter - was already initialized for the table: we can give - the new value for MySQL to place in the field */ - - if (auto_inc != 0) { - user_thd->next_insert_id = auto_inc; - } - } - - /* Set the 'in_update_remember_pos' flag to FALSE to - make sure all columns are fetched in the select done by - update_auto_increment */ - - prebuilt->in_update_remember_pos = FALSE; - - update_auto_increment(); - - if (auto_inc == 0) { - /* The autoinc counter for our table was not yet - initialized, initialize it now */ - - auto_inc = table->next_number_field->val_int(); - - srv_conc_enter_innodb(prebuilt->trx); - error = row_lock_table_autoinc_for_mysql(prebuilt); - srv_conc_exit_innodb(); - - if (error != DB_SUCCESS) { - - error = convert_error_code_to_mysql(error); - goto func_exit; - } - - dict_table_autoinc_initialize(prebuilt->table, - auto_inc); - } - - /* We have to set sql_stat_start to TRUE because - update_auto_increment has called a select, and - has reset that flag; row_insert_for_mysql has to - know to set the IX intention lock on the table, something - it only does at the start of each statement */ - - prebuilt->sql_stat_start = TRUE; - } - - if (prebuilt->mysql_template == NULL - || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) { - /* Build the template used in converting quickly between - the two database formats */ - - build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW); - } - - if (user_thd->lex.sql_command == SQLCOM_INSERT - && user_thd->lex.duplicates == DUP_IGNORE) { - prebuilt->trx->ignore_duplicates_in_insert = TRUE; - } else { - prebuilt->trx->ignore_duplicates_in_insert = FALSE; - } - - srv_conc_enter_innodb(prebuilt->trx); - - error = row_insert_for_mysql((byte*) record, prebuilt); - - srv_conc_exit_innodb(); - - prebuilt->trx->ignore_duplicates_in_insert = FALSE; - - error = convert_error_code_to_mysql(error); - - /* Tell InnoDB server that there might be work for - utility threads: */ -func_exit: - innobase_active_small(); - - DBUG_RETURN(error); -} - -/****************************************************************** -Converts field data for storage in an InnoDB update vector. */ -inline -mysql_byte* -innobase_convert_and_store_changed_col( -/*===================================*/ - /* out: pointer to the end of the converted - data in the buffer */ - upd_field_t* ufield, /* in/out: field in the update vector */ - mysql_byte* buf, /* in: buffer we can use in conversion */ - mysql_byte* data, /* in: column data to store */ - ulint len, /* in: data len */ - ulint col_type,/* in: data type in InnoDB type numbers */ - ulint is_unsigned)/* in: != 0 if an unsigned integer type */ -{ - uint i; - - if (len == UNIV_SQL_NULL) { - data = NULL; - } else if (col_type == DATA_VARCHAR || col_type == DATA_BINARY - || col_type == DATA_VARMYSQL) { - /* Remove trailing spaces */ - while (len > 0 && data[len - 1] == ' ') { - len--; - } - - } else if (col_type == DATA_INT) { - /* Store integer data in InnoDB in a big-endian - format, sign bit negated, if signed */ - - for (i = 0; i < len; i++) { - buf[len - 1 - i] = data[i]; - } - - if (!is_unsigned) { - buf[0] = buf[0] ^ 128; - } - - data = buf; - - buf += len; - } - - ufield->new_val.data = data; - ufield->new_val.len = len; - - return(buf); -} - -/************************************************************************** -Checks which fields have changed in a row and stores information -of them to an update vector. */ -static -int -calc_row_difference( -/*================*/ - /* out: error number or 0 */ - upd_t* uvect, /* in/out: update vector */ - mysql_byte* old_row, /* in: old row in MySQL format */ - mysql_byte* new_row, /* in: new row in MySQL format */ - struct st_table* table, /* in: table in MySQL data dictionary */ - mysql_byte* upd_buff, /* in: buffer to use */ - row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */ - THD* thd) /* in: user thread */ -{ - Field* field; - uint n_fields; - ulint o_len; - ulint n_len; - byte* o_ptr; - byte* n_ptr; - byte* buf; - upd_field_t* ufield; - ulint col_type; - ulint is_unsigned; - ulint n_changed = 0; - uint i; - - n_fields = table->fields; - - /* We use upd_buff to convert changed fields */ - buf = (byte*) upd_buff; - - for (i = 0; i < n_fields; i++) { - field = table->field[i]; - - /* if (thd->query_id != field->query_id) { */ - /* TODO: check that these fields cannot have - changed! */ - - /* goto skip_field; - }*/ - - o_ptr = (byte*) old_row + get_field_offset(table, field); - n_ptr = (byte*) new_row + get_field_offset(table, field); - o_len = field->pack_length(); - n_len = field->pack_length(); - - col_type = get_innobase_type_from_mysql_type(field); - is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG); - - switch (col_type) { - - case DATA_BLOB: - o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len); - n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len); - break; - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_VARMYSQL: - o_ptr = row_mysql_read_var_ref_noninline(&o_len, o_ptr); - n_ptr = row_mysql_read_var_ref_noninline(&n_len, n_ptr); - default: - ; - } - - if (field->null_ptr) { - if (field_in_record_is_null(table, field, - (char*) old_row)) { - o_len = UNIV_SQL_NULL; - } - - if (field_in_record_is_null(table, field, - (char*) new_row)) { - n_len = UNIV_SQL_NULL; - } - } - - if (o_len != n_len || (o_len != UNIV_SQL_NULL && - 0 != memcmp(o_ptr, n_ptr, o_len))) { - /* The field has changed */ - - ufield = uvect->fields + n_changed; - - buf = (byte*) - innobase_convert_and_store_changed_col(ufield, - (mysql_byte*)buf, - (mysql_byte*)n_ptr, n_len, col_type, - is_unsigned); - ufield->exp = NULL; - ufield->field_no = - (prebuilt->table->cols + i)->clust_pos; - n_changed++; - } - ; - } - - uvect->n_fields = n_changed; - uvect->info_bits = 0; - - return(0); -} - -/************************************************************************** -Updates a row given as a parameter to a new value. Note that we are given -whole rows, not just the fields which are updated: this incurs some -overhead for CPU when we check which fields are actually updated. -TODO: currently InnoDB does not prevent the 'Halloween problem': -in a searched update a single row can get updated several times -if its index columns are updated! */ - -int -ha_innobase::update_row( -/*====================*/ - /* out: error number or 0 */ - const mysql_byte* old_row,/* in: old row in MySQL format */ - mysql_byte* new_row)/* in: new row in MySQL format */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - upd_t* uvect; - int error = 0; - - DBUG_ENTER("ha_innobase::update_row"); - - if (table->time_stamp) { - update_timestamp(new_row + table->time_stamp - 1); - } - - if (last_query_id != user_thd->query_id) { - prebuilt->sql_stat_start = TRUE; - last_query_id = user_thd->query_id; - } - - if (prebuilt->upd_node) { - uvect = prebuilt->upd_node->update; - } else { - uvect = row_get_prebuilt_update_vector(prebuilt); - } - - /* Build an update vector from the modified fields in the rows - (uses upd_buff of the handle) */ - - calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table, - upd_buff, prebuilt, user_thd); - /* This is not a delete */ - prebuilt->upd_node->is_delete = FALSE; - - if (!prebuilt->in_update_remember_pos) { - assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); - } - - srv_conc_enter_innodb(prebuilt->trx); - - error = row_update_for_mysql((byte*) old_row, prebuilt); - - srv_conc_exit_innodb(); - - error = convert_error_code_to_mysql(error); - - /* Tell InnoDB server that there might be work for - utility threads: */ - - innobase_active_small(); - - DBUG_RETURN(error); -} - -/************************************************************************** -Deletes a row given as the parameter. */ - -int -ha_innobase::delete_row( -/*====================*/ - /* out: error number or 0 */ - const mysql_byte* record) /* in: a row in MySQL format */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - int error = 0; - - DBUG_ENTER("ha_innobase::delete_row"); - - if (last_query_id != user_thd->query_id) { - prebuilt->sql_stat_start = TRUE; - last_query_id = user_thd->query_id; - } - - if (!prebuilt->upd_node) { - row_get_prebuilt_update_vector(prebuilt); - } - - /* This is a delete */ - - prebuilt->upd_node->is_delete = TRUE; - prebuilt->in_update_remember_pos = TRUE; - - srv_conc_enter_innodb(prebuilt->trx); - - error = row_update_for_mysql((byte*) record, prebuilt); - - srv_conc_exit_innodb(); - - error = convert_error_code_to_mysql(error); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - innobase_active_small(); - - DBUG_RETURN(error); -} - -/********************************************************************** -Initializes a handle to use an index. */ - -int -ha_innobase::index_init( -/*====================*/ - /* out: 0 or error number */ - uint keynr) /* in: key (index) number */ -{ - int error = 0; - DBUG_ENTER("index_init"); - - error = change_active_index(keynr); - - DBUG_RETURN(error); -} - -/********************************************************************** -Currently does nothing. */ - -int -ha_innobase::index_end(void) -/*========================*/ -{ - int error = 0; - DBUG_ENTER("index_end"); - - DBUG_RETURN(error); -} - -/************************************************************************* -Converts a search mode flag understood by MySQL to a flag understood -by InnoDB. */ -inline -ulint -convert_search_mode_to_innobase( -/*============================*/ - enum ha_rkey_function find_flag) -{ - switch (find_flag) { - case HA_READ_KEY_EXACT: return(PAGE_CUR_GE); - /* the above does not require the index to be UNIQUE */ - case HA_READ_KEY_OR_NEXT: return(PAGE_CUR_GE); - case HA_READ_KEY_OR_PREV: return(PAGE_CUR_LE); - case HA_READ_AFTER_KEY: return(PAGE_CUR_G); - case HA_READ_BEFORE_KEY: return(PAGE_CUR_L); - case HA_READ_PREFIX: return(PAGE_CUR_GE); - case HA_READ_PREFIX_LAST: return(PAGE_CUR_LE); - /* the above PREFIX flags mean that the last - field in the key value may just be a prefix - of the complete fixed length field */ - default: assert(0); - } - - return(0); -} - -/************************************************************************** -Positions an index cursor to the index specified in the handle. Fetches the -row if any. */ - -int -ha_innobase::index_read( -/*====================*/ - /* out: 0, HA_ERR_KEY_NOT_FOUND, - or error number */ - mysql_byte* buf, /* in/out: buffer for the returned - row */ - const mysql_byte* key_ptr,/* in: key value; if this is NULL - we position the cursor at the - start or end of index */ - uint key_len,/* in: key value length */ - enum ha_rkey_function find_flag)/* in: search flags from my_base.h */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - ulint mode; - dict_index_t* index; - ulint match_mode = 0; - int error; - ulint ret; - - DBUG_ENTER("index_read"); - statistic_increment(ha_read_key_count, &LOCK_status); - - if (last_query_id != user_thd->query_id) { - prebuilt->sql_stat_start = TRUE; - last_query_id = user_thd->query_id; - } - - index = prebuilt->index; - - /* Note that if the select is used for an update, we always - fetch the clustered index record: therefore the index for which the - template is built is not necessarily prebuilt->index, but can also - be the clustered index */ - - if (prebuilt->sql_stat_start) { - build_template(prebuilt, user_thd, table, - ROW_MYSQL_REC_FIELDS); - } - - if (key_ptr) { - row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple, - (byte*) key_val_buff, - index, - (byte*) key_ptr, - (ulint) key_len); - } else { - /* We position the cursor to the last or the first entry - in the index */ - - dtuple_set_n_fields(prebuilt->search_tuple, 0); - } - - mode = convert_search_mode_to_innobase(find_flag); - - match_mode = 0; - - if (find_flag == HA_READ_KEY_EXACT) { - match_mode = ROW_SEL_EXACT; - - } else if (find_flag == HA_READ_PREFIX - || find_flag == HA_READ_PREFIX_LAST) { - match_mode = ROW_SEL_EXACT_PREFIX; - } - - last_match_mode = match_mode; - - srv_conc_enter_innodb(prebuilt->trx); - - ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0); - - srv_conc_exit_innodb(); - - if (ret == DB_SUCCESS) { - error = 0; - table->status = 0; - - } else if (ret == DB_RECORD_NOT_FOUND) { - error = HA_ERR_KEY_NOT_FOUND; - table->status = STATUS_NOT_FOUND; - - } else if (ret == DB_END_OF_INDEX) { - error = HA_ERR_KEY_NOT_FOUND; - table->status = STATUS_NOT_FOUND; - } else { - error = convert_error_code_to_mysql(ret); - table->status = STATUS_NOT_FOUND; - } - - DBUG_RETURN(error); -} - -/************************************************************************ -Changes the active index of a handle. */ - -int -ha_innobase::change_active_index( -/*=============================*/ - /* out: 0 or error code */ - uint keynr) /* in: use this index; MAX_KEY means always clustered - index, even if it was internally generated by - InnoDB */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - KEY* key; - - statistic_increment(ha_read_key_count, &LOCK_status); - DBUG_ENTER("index_read_idx"); - - active_index = keynr; - - if (keynr != MAX_KEY && table->keys > 0) - { - key = table->key_info + active_index; - - prebuilt->index=dict_table_get_index_noninline(prebuilt->table, key->name); - if (!prebuilt->index) - { - sql_print_error("Innodb could not find key n:o %u with name %s from dict cache for table %s", keynr, key->name, prebuilt->table->name); - return(1); - } - } - else - prebuilt->index = dict_table_get_first_index_noninline(prebuilt->table); - - assert(prebuilt->search_tuple != 0); - - dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields); - - dict_index_copy_types(prebuilt->search_tuple, prebuilt->index, - prebuilt->index->n_fields); - - /* Maybe MySQL changes the active index for a handle also - during some queries, we do not know: then it is safest to build - the template such that all columns will be fetched */ - - build_template(prebuilt, user_thd, table, ROW_MYSQL_WHOLE_ROW); - - DBUG_RETURN(0); -} - -/************************************************************************** -Positions an index cursor to the index specified in keynr. Fetches the -row if any. */ -/* ??? This is only used to read whole keys ??? */ - -int -ha_innobase::index_read_idx( -/*========================*/ - /* out: error number or 0 */ - mysql_byte* buf, /* in/out: buffer for the returned - row */ - uint keynr, /* in: use this index */ - const mysql_byte* key, /* in: key value; if this is NULL - we position the cursor at the - start or end of index */ - uint key_len, /* in: key value length */ - enum ha_rkey_function find_flag)/* in: search flags from my_base.h */ -{ - if (change_active_index(keynr)) { - - return(1); - } - - return(index_read(buf, key, key_len, find_flag)); -} - -/*************************************************************************** -Reads the next or previous row from a cursor, which must have previously been -positioned using index_read. */ - -int -ha_innobase::general_fetch( -/*=======================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error - number */ - mysql_byte* buf, /* in/out: buffer for next row in MySQL - format */ - uint direction, /* in: ROW_SEL_NEXT or ROW_SEL_PREV */ - uint match_mode) /* in: 0, ROW_SEL_EXACT, or - ROW_SEL_EXACT_PREFIX */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - ulint ret; - int error = 0; - - DBUG_ENTER("general_fetch"); - - srv_conc_enter_innodb(prebuilt->trx); - - ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode, - direction); - srv_conc_exit_innodb(); - - if (ret == DB_SUCCESS) { - error = 0; - table->status = 0; - - } else if (ret == DB_RECORD_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - table->status = STATUS_NOT_FOUND; - - } else if (ret == DB_END_OF_INDEX) { - error = HA_ERR_END_OF_FILE; - table->status = STATUS_NOT_FOUND; - } else { - error = convert_error_code_to_mysql(ret); - table->status = STATUS_NOT_FOUND; - } - - DBUG_RETURN(error); -} - -/*************************************************************************** -Reads the next row from a cursor, which must have previously been -positioned using index_read. */ - -int -ha_innobase::index_next( -/*====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error - number */ - mysql_byte* buf) /* in/out: buffer for next row in MySQL - format */ -{ - statistic_increment(ha_read_next_count, &LOCK_status); - - return(general_fetch(buf, ROW_SEL_NEXT, 0)); -} - -/*********************************************************************** -Reads the next row matching to the key value given as the parameter. */ - -int -ha_innobase::index_next_same( -/*=========================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error - number */ - mysql_byte* buf, /* in/out: buffer for the row */ - const mysql_byte* key, /* in: key value */ - uint keylen) /* in: key value length */ -{ - statistic_increment(ha_read_next_count, &LOCK_status); - - return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode)); -} - -/*************************************************************************** -Reads the previous row from a cursor, which must have previously been -positioned using index_read. */ - -int -ha_innobase::index_prev( -/*====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error - number */ - mysql_byte* buf) /* in/out: buffer for previous row in MySQL - format */ -{ - return(general_fetch(buf, ROW_SEL_PREV, 0)); -} - -/************************************************************************ -Positions a cursor on the first record in an index and reads the -corresponding row to buf. */ - -int -ha_innobase::index_first( -/*=====================*/ - /* out: 0, HA_ERR_END_OF_FILE, - or error code */ - mysql_byte* buf) /* in/out: buffer for the row */ -{ - int error; - - DBUG_ENTER("index_first"); - statistic_increment(ha_read_first_count, &LOCK_status); - - error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY); - - /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */ - - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - - DBUG_RETURN(error); -} - -/************************************************************************ -Positions a cursor on the last record in an index and reads the -corresponding row to buf. */ - -int -ha_innobase::index_last( -/*====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error code */ - mysql_byte* buf) /* in/out: buffer for the row */ -{ - int error; - - DBUG_ENTER("index_first"); - statistic_increment(ha_read_last_count, &LOCK_status); - - error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY); - - /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */ - - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - - DBUG_RETURN(error); -} - -/******************************************************************** -Initialize a table scan. */ - -int -ha_innobase::rnd_init( -/*==================*/ - /* out: 0 or error number */ - bool scan) /* in: ???????? */ -{ - int err; - - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - - if (prebuilt->clust_index_was_generated) { - err = change_active_index(MAX_KEY); - } else { - err = change_active_index(primary_key); - } - - start_of_scan = 1; - - return(err); -} - -/********************************************************************* -Ends a table scan ???????????????? */ - -int -ha_innobase::rnd_end(void) -/*======================*/ - /* out: 0 or error number */ -{ - return(index_end()); -} - -/********************************************************************* -Reads the next row in a table scan (also used to read the FIRST row -in a table scan). */ - -int -ha_innobase::rnd_next( -/*==================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error number */ - mysql_byte* buf)/* in/out: returns the row in this buffer, - in MySQL format */ -{ - int error; - - DBUG_ENTER("rnd_next"); - statistic_increment(ha_read_rnd_next_count, &LOCK_status); - - if (start_of_scan) { - error = index_first(buf); - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - start_of_scan = 0; - } else { - error = general_fetch(buf, ROW_SEL_NEXT, 0); - } - - DBUG_RETURN(error); -} - -/************************************************************************** -Fetches a row from the table based on a reference. TODO: currently we use -'ref_stored_len' of the handle as the key length. This may change. */ - -int -ha_innobase::rnd_pos( -/*=================*/ - /* out: 0, HA_ERR_KEY_NOT_FOUND, - or error code */ - mysql_byte* buf, /* in/out: buffer for the row */ - mysql_byte* pos) /* in: primary key value in MySQL format */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - int error; - uint keynr = active_index; - - DBUG_ENTER("rnd_pos"); - statistic_increment(ha_read_rnd_count, &LOCK_status); - - if (prebuilt->clust_index_was_generated) { - /* No primary key was defined for the table and we - generated the clustered index from the row id: the - row reference is the row id, not any key value - that MySQL knows */ - - error = change_active_index(MAX_KEY); - } else { - error = change_active_index(primary_key); - } - - if (error) { - DBUG_RETURN(error); - } - - error = index_read(buf, pos, ref_stored_len, HA_READ_KEY_EXACT); - - change_active_index(keynr); - - DBUG_RETURN(error); -} - -/************************************************************************* -Stores a reference to the current row to 'ref' field of the handle. Note -that the function parameter is illogical: we must assume that 'record' -is the current 'position' of the handle, because if row ref is actually -the row id internally generated in InnoDB, then 'record' does not contain -it. We just guess that the row id must be for the record where the handle -was positioned the last time. */ - -void -ha_innobase::position( -/*==================*/ - const mysql_byte* record) /* in: row in MySQL format */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - uint len; - - if (prebuilt->clust_index_was_generated) { - /* No primary key was defined for the table and we - generated the clustered index from row id: the - row reference will be the row id, not any key value - that MySQL knows */ - - len = DATA_ROW_ID_LEN; - - memcpy(ref, prebuilt->row_id, len); - } else { - len = store_key_val_for_row(primary_key, (char*) ref, record); - } - - DBUG_ASSERT(len <= ref_length); - - ref_stored_len = len; -} - -/*********************************************************************** -Tells something additional to the handler about how to do things. */ - -int -ha_innobase::extra( -/*===============*/ - /* out: 0 or error number */ - enum ha_extra_function operation) - /* in: HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - - switch (operation) { - case HA_EXTRA_RESET: - case HA_EXTRA_RESET_STATE: - prebuilt->read_just_key = 0; - break; - case HA_EXTRA_NO_KEYREAD: - prebuilt->read_just_key = 0; - break; - case HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE: - prebuilt->in_update_remember_pos = FALSE; - break; - case HA_EXTRA_KEYREAD: - prebuilt->read_just_key = 1; - break; - default:/* Do nothing */ - ; - } - - return(0); -} - -int ha_innobase::reset(void) -{ - return(0); -} - -/********************************************************************** -As MySQL will execute an external lock for every new table it uses when it -starts to process an SQL statement, we can use this function to store the -pointer to the THD in the handle. We will also use this function to communicate -to InnoDB that a new SQL statement has started and that we must store a -savepoint to our transaction handle, so that we are able to roll back -the SQL statement in case of an error. */ - -int -ha_innobase::external_lock( -/*=======================*/ - THD* thd, /* in: handle to the user thread */ - int lock_type) /* in: lock type */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - int error = 0; - trx_t* trx; - - DBUG_ENTER("ha_innobase::external_lock"); - - update_thd(thd); - - trx = prebuilt->trx; - - prebuilt->sql_stat_start = TRUE; - prebuilt->in_update_remember_pos = TRUE; - - prebuilt->read_just_key = 0; - - if (lock_type == F_WRLCK) { - - /* If this is a SELECT, then it is in UPDATE TABLE ... - or SELECT ... FOR UPDATE */ - prebuilt->select_lock_type = LOCK_X; - } - - if (lock_type != F_UNLCK) { - if (trx->n_mysql_tables_in_use == 0) { - trx_mark_sql_stat_end(trx); - } - - thd->transaction.all.innodb_active_trans = 1; - trx->n_mysql_tables_in_use++; - - if (prebuilt->select_lock_type != LOCK_NONE) { - - trx->mysql_n_tables_locked++; - } - } else { - trx->n_mysql_tables_in_use--; - auto_inc_counter_for_this_stat = 0; - - if (trx->n_mysql_tables_in_use == 0) { - - trx->mysql_n_tables_locked = 0; - - if (trx->has_search_latch) { - - trx_search_latch_release_if_reserved(trx); - } - - if (trx->auto_inc_lock) { - - /* If we had reserved the auto-inc lock for - some table in this SQL statement, we release - it now */ - - srv_conc_enter_innodb(trx); - row_unlock_table_autoinc_for_mysql(trx); - srv_conc_exit_innodb(); - } - - if (!(thd->options - & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) { - - innobase_commit(thd, trx); - } - } - } - - DBUG_RETURN(error); -} - -/********************************************************************* -Creates a table definition to an InnoDB database. */ -static -int -create_table_def( -/*=============*/ - trx_t* trx, /* in: InnoDB transaction handle */ - TABLE* form, /* in: information on table - columns and indexes */ - const char* table_name) /* in: table name */ -{ - Field* field; - dict_table_t* table; - ulint n_cols; - int error; - ulint col_type; - ulint nulls_allowed; - ulint unsigned_type; - ulint i; - - DBUG_ENTER("create_table_def"); - DBUG_PRINT("enter", ("table_name: %s", table_name)); - - n_cols = form->fields; - - /* The '0' below specifies that everything is currently - created in tablespace 0 */ - - table = dict_mem_table_create((char*) table_name, 0, n_cols); - - for (i = 0; i < n_cols; i++) { - field = form->field[i]; - - col_type = get_innobase_type_from_mysql_type(field); - if (field->null_ptr) { - nulls_allowed = 0; - } else { - nulls_allowed = DATA_NOT_NULL; - } - - if (field->flags & UNSIGNED_FLAG) { - unsigned_type = DATA_UNSIGNED; - } else { - unsigned_type = 0; - } - - dict_mem_table_add_col(table, (char*) field->field_name, - col_type, (ulint)field->type() - | nulls_allowed | unsigned_type, - field->pack_length(), 0); - } - - error = row_create_table_for_mysql(table, trx); - - error = convert_error_code_to_mysql(error); - - DBUG_RETURN(error); -} - -/********************************************************************* -Creates an index in an InnoDB database. */ -static -int -create_index( -/*=========*/ - trx_t* trx, /* in: InnoDB transaction handle */ - TABLE* form, /* in: information on table - columns and indexes */ - const char* table_name, /* in: table name */ - uint key_num) /* in: index number */ -{ - dict_index_t* index; - int error; - ulint n_fields; - KEY* key; - KEY_PART_INFO* key_part; - ulint ind_type; - ulint i; - - DBUG_ENTER("create_index"); - - key = form->key_info + key_num; - - n_fields = key->key_parts; - - ind_type = 0; - - if (strcmp(key->name, "PRIMARY") == 0) { - ind_type = ind_type | DICT_CLUSTERED; - } - - if (key->flags & HA_NOSAME ) { - ind_type = ind_type | DICT_UNIQUE; - } - - /* The '0' below specifies that everything in InnoDB is currently - created in tablespace 0 */ - - index = dict_mem_index_create((char*) table_name, key->name, 0, - ind_type, n_fields); - for (i = 0; i < n_fields; i++) { - key_part = key->key_part + i; - - /* We assume all fields should be sorted in ascending - order, hence the '0': */ - dict_mem_index_add_field(index, - (char*) key_part->field->field_name, 0); - } - - error = row_create_index_for_mysql(index, trx); - - error = convert_error_code_to_mysql(error); - - DBUG_RETURN(error); -} - -/********************************************************************* -Creates an index to an InnoDB table when the user has defined no -primary index. */ -static -int -create_clustered_index_when_no_primary( -/*===================================*/ - trx_t* trx, /* in: InnoDB transaction handle */ - const char* table_name) /* in: table name */ -{ - dict_index_t* index; - int error; - - /* The first '0' below specifies that everything in InnoDB is - currently created in file space 0 */ - - index = dict_mem_index_create((char*) table_name, - (char*) "GEN_CLUST_INDEX", - 0, DICT_CLUSTERED, 0); - error = row_create_index_for_mysql(index, trx); - - error = convert_error_code_to_mysql(error); - - return(error); -} - -/********************************************************************* -Creates a new table to an InnoDB database. */ - -int -ha_innobase::create( -/*================*/ - /* out: error number */ - const char* name, /* in: table name */ - TABLE* form, /* in: information on table - columns and indexes */ - HA_CREATE_INFO* create_info) /* in: more information of the - created table, contains also the - create statement string */ -{ - int error; - dict_table_t* innobase_table; - trx_t* trx; - int primary_key_no = -1; - KEY* key; - uint i; - char name2[FN_REFLEN]; - char norm_name[FN_REFLEN]; - - DBUG_ENTER("ha_innobase::create"); - - trx = trx_allocate_for_mysql(); - - fn_format(name2, name, "", "",2); // Remove the .frm extension - - normalize_table_name(norm_name, name2); - - /* Create the table definition in InnoDB */ - - if ((error = create_table_def(trx, form, norm_name))) { - - trx_commit_for_mysql(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(error); - } - - /* Look for a primary key */ - - for (i = 0; i < form->keys; i++) { - key = form->key_info + i; - - if (strcmp(key->name, "PRIMARY") == 0) { - primary_key_no = (int) i; - } - } - - /* Our function row_get_mysql_key_number_for_index assumes - the primary key is always number 0, if it exists */ - - assert(primary_key_no == -1 || primary_key_no == 0); - - /* Create the keys */ - - if (form->keys == 0 || primary_key_no == -1) { - /* Create an index which is used as the clustered index; - order the rows by their row id which is internally generated - by InnoDB */ - - error = create_clustered_index_when_no_primary(trx, - norm_name); - if (error) { - trx_commit_for_mysql(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(error); - } - } - - if (primary_key_no != -1) { - /* In InnoDB the clustered index must always be created - first */ - if ((error = create_index(trx, form, norm_name, - (uint) primary_key_no))) { - trx_commit_for_mysql(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(error); - } - } - - for (i = 0; i < form->keys; i++) { - - if (i != (uint) primary_key_no) { - - if ((error = create_index(trx, form, norm_name, i))) { - - trx_commit_for_mysql(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(error); - } - } - } - - error = row_table_add_foreign_constraints(trx, - create_info->create_statement, norm_name); - - error = convert_error_code_to_mysql(error); - - if (error) { - trx_commit_for_mysql(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(error); - } - - trx_commit_for_mysql(trx); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); - - innobase_table = dict_table_get(norm_name, NULL); - - assert(innobase_table != 0); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - trx_free_for_mysql(trx); - - DBUG_RETURN(0); -} - -/********************************************************************* -Drops a table from an InnoDB database. Before calling this function, -MySQL calls innobase_commit to commit the transaction of the current user. -Then the current user cannot have locks set on the table. Drop table -operation inside InnoDB will remove all locks any user has on the table -inside InnoDB. */ - -int -ha_innobase::delete_table( -/*======================*/ - /* out: error number */ - const char* name) /* in: table name */ -{ - ulint name_len; - int error; - trx_t* trx; - char norm_name[1000]; - - DBUG_ENTER("ha_innobase::delete_table"); - - trx = trx_allocate_for_mysql(); - - name_len = strlen(name); - - assert(name_len < 1000); - - /* Strangely, MySQL passes the table name without the '.frm' - extension, in contrast to ::create */ - - normalize_table_name(norm_name, name); - - /* Drop the table in InnoDB */ - - error = row_drop_table_for_mysql(norm_name, trx, FALSE); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - trx_commit_for_mysql(trx); - - trx_free_for_mysql(trx); - - error = convert_error_code_to_mysql(error); - - DBUG_RETURN(error); -} - -/********************************************************************* -Removes all tables in the named database inside InnoDB. */ - -int -innobase_drop_database( -/*===================*/ - /* out: error number */ - char* path) /* in: database path; inside InnoDB the name - of the last directory in the path is used as - the database name: for example, in 'mysql/data/test' - the database name is 'test' */ -{ - ulint len = 0; - trx_t* trx; - char* ptr; - int error; - char namebuf[10000]; - - ptr = strend(path) - 2; - - while (ptr >= path && *ptr != '\\' && *ptr != '/') { - ptr--; - len++; - } - - ptr++; - - memcpy(namebuf, ptr, len); - namebuf[len] = '/'; - namebuf[len + 1] = '\0'; - - trx = trx_allocate_for_mysql(); - - error = row_drop_database_for_mysql(namebuf, trx); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - trx_commit_for_mysql(trx); - trx_free_for_mysql(trx); - - error = convert_error_code_to_mysql(error); - - return(error); -} - -/************************************************************************* -Renames an InnoDB table. */ - -int -ha_innobase::rename_table( -/*======================*/ - /* out: 0 or error code */ - const char* from, /* in: old name of the table */ - const char* to) /* in: new name of the table */ -{ - ulint name_len1; - ulint name_len2; - int error; - trx_t* trx; - char norm_from[1000]; - char norm_to[1000]; - - DBUG_ENTER("ha_innobase::rename_table"); - - trx = trx_allocate_for_mysql(); - - name_len1 = strlen(from); - name_len2 = strlen(to); - - assert(name_len1 < 1000); - assert(name_len2 < 1000); - - normalize_table_name(norm_from, from); - normalize_table_name(norm_to, to); - - /* Rename the table in InnoDB */ - - error = row_rename_table_for_mysql(norm_from, norm_to, trx); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - trx_commit_for_mysql(trx); - trx_free_for_mysql(trx); - - error = convert_error_code_to_mysql(error); - - DBUG_RETURN(error); -} - -/************************************************************************* -Estimates the number of index records in a range. */ - -ha_rows -ha_innobase::records_in_range( -/*==========================*/ - /* out: estimated number of rows, - currently 32-bit int or uint */ - int keynr, /* in: index number */ - const mysql_byte* start_key, /* in: start key value of the - range, may also be empty */ - uint start_key_len, /* in: start key val len, may - also be 0 */ - enum ha_rkey_function start_search_flag,/* in: start search condition - e.g., 'greater than' */ - const mysql_byte* end_key, /* in: range end key val, may - also be empty */ - uint end_key_len, /* in: range end key val len, - may also be 0 */ - enum ha_rkey_function end_search_flag)/* in: range end search cond */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - KEY* key; - dict_index_t* index; - mysql_byte* key_val_buff2 = (mysql_byte*) my_malloc( - table->reclength - + table->max_key_length + 100, - MYF(MY_WME)); - dtuple_t* range_start; - dtuple_t* range_end; - ulint n_rows; - ulint mode1; - ulint mode2; - void* heap1; - void* heap2; - - DBUG_ENTER("records_in_range"); - - if (prebuilt->trx) { - prebuilt->trx->op_info = (char*) "estimating range size"; - } - - active_index = keynr; - - key = table->key_info + active_index; - - index = dict_table_get_index_noninline(prebuilt->table, key->name); - - range_start = dtuple_create_for_mysql(&heap1, key->key_parts); - dict_index_copy_types(range_start, index, key->key_parts); - - range_end = dtuple_create_for_mysql(&heap2, key->key_parts); - dict_index_copy_types(range_end, index, key->key_parts); - - row_sel_convert_mysql_key_to_innobase( - range_start, (byte*) key_val_buff, index, - (byte*) start_key, - (ulint) start_key_len); - - row_sel_convert_mysql_key_to_innobase( - range_end, (byte*) key_val_buff2, index, - (byte*) end_key, - (ulint) end_key_len); - - mode1 = convert_search_mode_to_innobase(start_search_flag); - mode2 = convert_search_mode_to_innobase(end_search_flag); - - n_rows = btr_estimate_n_rows_in_range(index, range_start, - mode1, range_end, mode2); - dtuple_free_for_mysql(heap1); - dtuple_free_for_mysql(heap2); - - my_free((char*) key_val_buff2, MYF(0)); - - if (prebuilt->trx) { - prebuilt->trx->op_info = (char*) ""; - } - - DBUG_RETURN((ha_rows) n_rows); -} - -/************************************************************************* -Gives an UPPER BOUND to the number of rows in a table. This is used in -filesort.cc and the upper bound must hold. TODO: Since the number of -rows in a table may change after this function is called, we still may -get a 'Sort aborted' error in filesort.cc of MySQL. The ultimate fix is to -improve the algorithm of filesort.cc. */ - -ha_rows -ha_innobase::estimate_number_of_rows(void) -/*======================================*/ - /* out: upper bound of rows */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - dict_index_t* index; - ulonglong estimate; - ulonglong data_file_length; - - if (prebuilt->trx) { - prebuilt->trx->op_info = - (char*) "estimating upper bound of table size"; - } - - DBUG_ENTER("info"); - - index = dict_table_get_first_index_noninline(prebuilt->table); - - data_file_length = ((ulonglong) index->stat_n_leaf_pages) - * UNIV_PAGE_SIZE; - - /* Calculate a minimum length for a clustered index record and from - that an upper bound for the number of rows. Since we only calculate - new statistics in row0mysql.c when a tablehas grown - by a threshold factor, we must add a safety factor 2 in front - of the formula below. */ - - estimate = 2 * data_file_length / dict_index_calc_min_rec_len(index); - - if (prebuilt->trx) { - prebuilt->trx->op_info = (char*) ""; - } - - return((ha_rows) estimate); -} - -/************************************************************************* -How many seeks it will take to read through the table. This is to be -comparable to the number returned by records_in_range so that we can -decide if we should scan the table or use keys. */ - -double -ha_innobase::scan_time() -/*====================*/ - /* out: estimated time measured in disk seeks */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - - /* In the following formula we assume that scanning 10 pages - takes the same time as a disk seek: */ - - return((double) (prebuilt->table->stat_clustered_index_size / 10)); -} - -/************************************************************************* -Returns statistics information of the table to the MySQL interpreter, -in various fields of the handle object. */ - -void -ha_innobase::info( -/*==============*/ - uint flag) /* in: what information MySQL requests */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - dict_table_t* ib_table; - dict_index_t* index; - ulong rec_per_key; - ulong j; - ulong i; - - DBUG_ENTER("info"); - - if (prebuilt->trx) { - prebuilt->trx->op_info = (char*) "calculating table stats"; - } - - ib_table = prebuilt->table; - - if (flag & HA_STATUS_TIME) { - /* In sql_show we call with this flag: update then statistics - so that they are up-to-date */ - - dict_update_statistics(ib_table); - } - - if (flag & HA_STATUS_VARIABLE) { - records = (ha_rows)ib_table->stat_n_rows; - deleted = 0; - data_file_length = ((ulonglong) - ib_table->stat_clustered_index_size) - * UNIV_PAGE_SIZE; - index_file_length = ((ulonglong) - ib_table->stat_sum_of_other_index_sizes) - * UNIV_PAGE_SIZE; - delete_length = 0; - check_time = 0; - - if (records == 0) { - mean_rec_length = 0; - } else { - mean_rec_length = (ulong) (data_file_length / records); - } - } - - if (flag & HA_STATUS_CONST) { - index = dict_table_get_first_index_noninline(ib_table); - - if (prebuilt->clust_index_was_generated) { - index = dict_table_get_next_index_noninline(index); - } - - for (i = 0; i < table->keys; i++) { - for (j = 0; j < table->key_info[i].key_parts; j++) { - - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = records; - } else { - rec_per_key = (ulong)(records / - index->stat_n_diff_key_vals[j + 1]); - } - - if (rec_per_key == 0) { - rec_per_key = 1; - } - - table->key_info[i].rec_per_key[j] - = rec_per_key; - } - - index = dict_table_get_next_index_noninline(index); - } - } - - /* The trx struct in InnoDB contains a pthread mutex embedded: - in the debug version of MySQL that it replaced by a 'safe mutex' - which is of a different size. We have to use a function to access - trx fields. Otherwise trx->error_info will be a random - pointer and cause a seg fault. */ - - if (flag & HA_STATUS_ERRKEY) { - errkey = (unsigned int) row_get_mysql_key_number_for_index( - (dict_index_t*) - trx_get_error_info(prebuilt->trx)); - } - - if (prebuilt->trx) { - prebuilt->trx->op_info = (char*) ""; - } - - DBUG_VOID_RETURN; -} - -/*********************************************************************** -Tries to check that an InnoDB table is not corrupted. If corruption is -noticed, prints to stderr information about it. In case of corruption -may also assert a failure and crash the server. */ - -int -ha_innobase::check( -/*===============*/ - /* out: HA_ADMIN_CORRUPT or - HA_ADMIN_OK */ - THD* thd, /* in: user thread handle */ - HA_CHECK_OPT* check_opt) /* in: check options, currently - ignored */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - ulint ret; - - if (prebuilt->mysql_template == NULL) { - /* Build the template; we will use a dummy template - in index scans done in checking */ - - build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW); - } - - ret = row_check_table_for_mysql(prebuilt); - - if (ret == DB_SUCCESS) { - return(HA_ADMIN_OK); - } - - return(HA_ADMIN_CORRUPT); -} - -/***************************************************************** -Adds information about free space in the InnoDB tablespace to a table comment -which is printed out when a user calls SHOW TABLE STATUS. Adds also info on -foreign keys. */ - -char* -ha_innobase::update_table_comment( -/*==============================*/ - /* out: table comment + InnoDB free space + - info on foreign keys */ - const char* comment)/* in: table comment defined by user */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; - uint length = strlen(comment); - char* str = my_malloc(length + 550, MYF(0)); - char* pos; - - if (!str) { - return((char*)comment); - } - - pos = str; - if (length) { - pos=strmov(str, comment); - *pos++=';'; - *pos++=' '; - } - - pos += sprintf(pos, "InnoDB free: %lu kB", - (ulong) innobase_get_free_space()); - - /* We assume 150 bytes of space to print info */ - - dict_print_info_on_foreign_keys(pos, 500, prebuilt->table); - - return(str); -} - -/**************************************************************************** - Handling the shared INNOBASE_SHARE structure that is needed to provide table - locking. -****************************************************************************/ - -static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length, - my_bool not_used __attribute__((unused))) -{ - *length=share->table_name_length; - return (mysql_byte*) share->table_name; -} - -static INNOBASE_SHARE *get_share(const char *table_name) -{ - INNOBASE_SHARE *share; - pthread_mutex_lock(&innobase_mutex); - uint length=(uint) strlen(table_name); - if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables, - (mysql_byte*) table_name, - length))) - { - if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1, - MYF(MY_WME | MY_ZEROFILL)))) - { - share->table_name_length=length; - share->table_name=(char*) (share+1); - strmov(share->table_name,table_name); - if (hash_insert(&innobase_open_tables, (mysql_byte*) share)) - { - pthread_mutex_unlock(&innobase_mutex); - my_free((gptr) share,0); - return 0; - } - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); - } - } - share->use_count++; - pthread_mutex_unlock(&innobase_mutex); - return share; -} - -static void free_share(INNOBASE_SHARE *share) -{ - pthread_mutex_lock(&innobase_mutex); - if (!--share->use_count) - { - hash_delete(&innobase_open_tables, (mysql_byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); - } - pthread_mutex_unlock(&innobase_mutex); -} - -/********************************************************************* -Stores a MySQL lock into a 'lock' field in a handle. */ - -THR_LOCK_DATA** -ha_innobase::store_lock( -/*====================*/ - /* out: pointer to the next - element in the 'to' array */ - THD* thd, /* in: user thread handle */ - THR_LOCK_DATA** to, /* in: pointer to an array - of pointers to lock structs; - pointer to the 'lock' field - of current handle is stored - next to this array */ - enum thr_lock_type lock_type) /* in: lock type to store in - 'lock' */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - - if (lock_type == TL_READ_WITH_SHARED_LOCKS || - lock_type == TL_READ_NO_INSERT) { - /* This is a SELECT ... IN SHARE MODE, or - we are doing a complex SQL statement like - INSERT INTO ... SELECT ... and the logical logging - requires the use of a locking read */ - - prebuilt->select_lock_type = LOCK_S; - } else { - /* We set possible LOCK_X value in external_lock, not yet - here even if this would be SELECT ... FOR UPDATE */ - - prebuilt->select_lock_type = LOCK_NONE; - } - - if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) { - - /* If we are not doing a LOCK TABLE, then allow multiple - writers */ - - if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && - lock_type <= TL_WRITE) && !thd->in_lock_tables) { - - lock_type = TL_WRITE_ALLOW_WRITE; - } - - lock.type=lock_type; - } - - *to++= &lock; - - return(to); -} - -/*********************************************************************** -Returns the next auto-increment column value for the table. write_row -normally fetches the value from the cache in the data dictionary. This -function in used by SHOW TABLE STATUS and when the first insert to the table -is done after database startup. */ - -longlong -ha_innobase::get_auto_increment() -/*=============================*/ - /* out: the next auto-increment column value */ -{ - row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - longlong nr; - int error; - - (void) extra(HA_EXTRA_KEYREAD); - index_init(table->next_number_index); - - /* We use an exclusive lock when we read the max key value from the - auto-increment column index. This is because then build_template will - advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query - id of the auto-increment column is not changed, and previously InnoDB - did not fetch it, causing SHOW TABLE STATUS to show wrong values - for the autoinc column. */ - - prebuilt->select_lock_type = LOCK_X; - prebuilt->trx->mysql_n_tables_locked += 1; - - error=index_last(table->record[1]); - - if (error) { - nr = 1; - } else { - nr = (longlong) table->next_number_field-> - val_int_offset(table->rec_buff_length) + 1; - } - - (void) extra(HA_EXTRA_NO_KEYREAD); - - index_end(); - - return(nr); -} - - -#endif /* HAVE_INNOBASE_DB */ diff --git a/sql/ha_innobase.h b/sql/ha_innobase.h deleted file mode 100644 index 3c3025c39c1..00000000000 --- a/sql/ha_innobase.h +++ /dev/null @@ -1,192 +0,0 @@ -/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB - && Innobase Oy - - -This file is modified from ha_berkeley.h of MySQL distribution- - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#ifdef __GNUC__ -#pragma interface /* gcc class implementation */ -#endif - -/* This file defines the Innobase handler: the interface between MySQL and -Innobase */ - -typedef struct st_innobase_share { - THR_LOCK lock; - pthread_mutex_t mutex; - char *table_name; - uint table_name_length,use_count; -} INNOBASE_SHARE; - - -/* The class defining a handle to an Innobase table */ -class ha_innobase: public handler -{ - void* innobase_prebuilt; /* (row_prebuilt_t*) prebuilt - struct in Innobase, used to save - CPU */ - THD* user_thd; /* the thread handle of the user - currently using the handle; this is - set in external_lock function */ - ulong last_query_id; /* the latest query id where the - handle was used */ - THR_LOCK_DATA lock; - INNOBASE_SHARE *share; - - gptr alloc_ptr; - byte* upd_buff; /* buffer used in updates */ - byte* key_val_buff; /* buffer used in converting - search key values from MySQL format - to Innobase format */ - uint ref_stored_len; /* length of the key value stored to - 'ref' buffer of the handle, if any */ - ulong int_option_flag; - uint primary_key; - uint last_dup_key; - ulong start_of_scan; /* this is set to 1 when we are - starting a table scan but have not - yet fetched any row, else 0 */ - - uint last_match_mode;/* match mode of the latest search: - ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX, - or undefined */ - longlong auto_inc_counter_for_this_stat; - ulong max_row_length(const byte *buf); - - uint store_key_val_for_row(uint keynr, char* buff, const byte* record); - int update_thd(THD* thd); - int change_active_index(uint keynr); - int general_fetch(byte* buf, uint direction, uint match_mode); - - /* Init values for the class: */ - public: - ha_innobase(TABLE *table): handler(table), - int_option_flag(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | - HA_REC_NOT_IN_SEQ | - HA_KEYPOS_TO_RNDPOS | HA_LASTKEY_ORDER | - HA_HAVE_KEY_READ_ONLY | HA_READ_NOT_EXACT_KEY | - HA_LONGLONG_KEYS | HA_NULL_KEY | - HA_NOT_EXACT_COUNT | - HA_NO_WRITE_DELAYED | - HA_PRIMARY_KEY_IN_READ_INDEX | - HA_DROP_BEFORE_CREATE | - HA_NO_PREFIX_CHAR_KEYS), - last_dup_key((uint) -1), - start_of_scan(0) - { - } - ~ha_innobase() {} - - const char* table_type() const { return("InnoDB");} - const char *index_type(uint key_number) { return "BTREE"; } - const char** bas_ext() const; - ulong option_flag() const { return int_option_flag; } - uint max_record_length() const { return HA_MAX_REC_LENGTH; } - uint max_keys() const { return MAX_KEY; } - uint max_key_parts() const { return MAX_REF_PARTS; } - /* An InnoDB page must store >= 2 keys; - a secondary key record must also contain the - primary key value: - max key length is therefore set to slightly - less than 1 / 4 of page size which is 16 kB; - but currently MySQL does not work with keys - whose size is > MAX_KEY_LENGTH */ - uint max_key_length() const { return((MAX_KEY_LENGTH <= 3500) ? - MAX_KEY_LENGTH : 3500);} - bool fast_key_read() { return 1;} - key_map keys_to_use_for_scanning() { return ~(key_map) 0; } - bool has_transactions() { return 1;} - - int open(const char *name, int mode, uint test_if_locked); - void initialize(void); - int close(void); - double scan_time(); - - int write_row(byte * buf); - int update_row(const byte * old_data, byte * new_data); - int delete_row(const byte * buf); - - int index_init(uint index); - int index_end(); - int index_read(byte * buf, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_idx(byte * buf, uint index, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_next(byte * buf); - int index_next_same(byte * buf, const byte *key, uint keylen); - int index_prev(byte * buf); - int index_first(byte * buf); - int index_last(byte * buf); - - int rnd_init(bool scan=1); - int rnd_end(); - int rnd_next(byte *buf); - int rnd_pos(byte * buf, byte *pos); - - void position(const byte *record); - void info(uint); - int extra(enum ha_extra_function operation); - int reset(void); - int external_lock(THD *thd, int lock_type); - void position(byte *record); - ha_rows records_in_range(int inx, - const byte *start_key,uint start_key_len, - enum ha_rkey_function start_search_flag, - const byte *end_key,uint end_key_len, - enum ha_rkey_function end_search_flag); - ha_rows estimate_number_of_rows(); - - int create(const char *name, register TABLE *form, - HA_CREATE_INFO *create_info); - int delete_table(const char *name); - int rename_table(const char* from, const char* to); - int check(THD* thd, HA_CHECK_OPT* check_opt); - char* update_table_comment(const char* comment); - - THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type); - longlong get_auto_increment(); -}; - -extern bool innodb_skip; -extern SHOW_COMP_OPTION have_innodb; -extern uint innobase_init_flags, innobase_lock_type; -extern ulong innobase_cache_size; -extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; -extern long innobase_lock_scan_time; -extern long innobase_mirrored_log_groups, innobase_log_files_in_group; -extern long innobase_log_file_size, innobase_log_buffer_size; -extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size; -extern long innobase_file_io_threads, innobase_lock_wait_timeout; -extern long innobase_force_recovery, innobase_thread_concurrency; -extern char *innobase_data_home_dir, *innobase_data_file_path; -extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; -extern char *innobase_unix_file_flush_method; -extern bool innobase_flush_log_at_trx_commit, innobase_log_archive, - innobase_use_native_aio, innobase_fast_shutdown; - -extern TYPELIB innobase_lock_typelib; - -bool innobase_init(void); -bool innobase_end(void); -bool innobase_flush_logs(void); -uint innobase_get_free_space(void); - -int innobase_commit(THD *thd, void* trx_handle); -int innobase_rollback(THD *thd, void* trx_handle); -int innobase_close_connection(THD *thd); -int innobase_drop_database(char *path); - diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc new file mode 100644 index 00000000000..8941481a95f --- /dev/null +++ b/sql/ha_innodb.cc @@ -0,0 +1,3420 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & InnoDB Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* This file defines the InnoDB handler: the interface between MySQL and +InnoDB */ + +/* TODO list for the InnoDB handler: + - Ask Monty if strings of different languages can exist in the same + database. Answer: in near future yes, but not yet. +*/ + +#ifdef __GNUC__ +#pragma implementation // gcc: Class implementation +#endif + +#include "mysql_priv.h" +#ifdef HAVE_INNOBASE_DB +#include +#include +#include +#include + +#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1)) + +#include "ha_innodb.h" + +/* We must declare this here because we undef SAFE_MUTEX below */ +pthread_mutex_t innobase_mutex; + +/* Store MySQL definition of 'byte': in Linux it is char while InnoDB +uses unsigned char */ +typedef byte mysql_byte; + +#ifdef SAFE_MUTEX +#undef pthread_mutex_t +#endif + +#define INSIDE_HA_INNOBASE_CC + +/* Include necessary InnoDB headers */ +extern "C" { +#include "../innobase/include/univ.i" +#include "../innobase/include/srv0start.h" +#include "../innobase/include/srv0srv.h" +#include "../innobase/include/trx0roll.h" +#include "../innobase/include/trx0trx.h" +#include "../innobase/include/row0ins.h" +#include "../innobase/include/row0mysql.h" +#include "../innobase/include/row0sel.h" +#include "../innobase/include/row0upd.h" +#include "../innobase/include/log0log.h" +#include "../innobase/include/lock0lock.h" +#include "../innobase/include/dict0crea.h" +#include "../innobase/include/btr0cur.h" +#include "../innobase/include/btr0btr.h" +#include "../innobase/include/fsp0fsp.h" +} + +#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */ +#define HA_INNOBASE_RANGE_COUNT 100 + +bool innodb_skip = 0; +uint innobase_init_flags = 0; +ulong innobase_cache_size = 0; + +long innobase_mirrored_log_groups, innobase_log_files_in_group, + innobase_log_file_size, innobase_log_buffer_size, + innobase_buffer_pool_size, innobase_additional_mem_pool_size, + innobase_file_io_threads, innobase_lock_wait_timeout, + innobase_thread_concurrency, innobase_force_recovery; + +char *innobase_data_home_dir; +char *innobase_log_group_home_dir, *innobase_log_arch_dir; +char *innobase_unix_file_flush_method; +bool innobase_flush_log_at_trx_commit, innobase_log_archive, + innobase_use_native_aio, innobase_fast_shutdown; + +/* + Set default InnoDB size to 64M, to let users use InnoDB without having + to specify any startup options. +*/ + +char *innobase_data_file_path= (char*) "ibdata1:64M"; +char *internal_innobase_data_file_path=0; + +/* The following counter is used to convey information to InnoDB +about server activity: in selects it is not sensible to call +srv_active_wake_master_thread after each fetch or search, we only do +it every INNOBASE_WAKE_INTERVAL'th step. */ + +#define INNOBASE_WAKE_INTERVAL 32 +ulong innobase_active_counter = 0; + +char* innobase_home = NULL; + +char innodb_dummy_stmt_trx_handle = 'D'; + +static HASH innobase_open_tables; + +static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length, + my_bool not_used __attribute__((unused))); +static INNOBASE_SHARE *get_share(const char *table_name); +static void free_share(INNOBASE_SHARE *share); +static void innobase_print_error(const char* db_errpfx, char* buffer); + +/* General functions */ + +/************************************************************************ +Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth +time calls srv_active_wake_master_thread. This function should be used +when a single database operation may introduce a small need for +server utility activity, like checkpointing. */ +inline +void +innobase_active_small(void) +/*=======================*/ +{ + innobase_active_counter++; + + if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) { + srv_active_wake_master_thread(); + } +} + +/************************************************************************ +Converts an InnoDB error code to a MySQL error code. */ +static +int +convert_error_code_to_mysql( +/*========================*/ + /* out: MySQL error code */ + int error) /* in: InnoDB error code */ +{ + if (error == DB_SUCCESS) { + + return(0); + + } else if (error == (int) DB_DUPLICATE_KEY) { + + return(HA_ERR_FOUND_DUPP_KEY); + + } else if (error == (int) DB_RECORD_NOT_FOUND) { + + return(HA_ERR_NO_ACTIVE_RECORD); + + } else if (error == (int) DB_ERROR) { + + return(HA_ERR_NO_ACTIVE_RECORD); + + } else if (error == (int) DB_DEADLOCK) { + + return(HA_ERR_LOCK_DEADLOCK); + + } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) { + + return(HA_ERR_LOCK_WAIT_TIMEOUT); + + } else if (error == (int) DB_NO_REFERENCED_ROW) { + + return(HA_ERR_NO_REFERENCED_ROW); + + } else if (error == (int) DB_ROW_IS_REFERENCED) { + + return(HA_ERR_ROW_IS_REFERENCED); + + } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) { + + return(HA_ERR_CANNOT_ADD_FOREIGN); + + } else if (error == (int) DB_OUT_OF_FILE_SPACE) { + + return(HA_ERR_RECORD_FILE_FULL); + + } else if (error == (int) DB_TABLE_IS_BEING_USED) { + + return(HA_ERR_WRONG_COMMAND); + + } else if (error == (int) DB_TABLE_NOT_FOUND) { + + return(HA_ERR_KEY_NOT_FOUND); + + } else if (error == (int) DB_TOO_BIG_RECORD) { + + return(HA_ERR_TO_BIG_ROW); + } else { + DBUG_ASSERT(0); + + return(-1); // Unknown error + } +} + +extern "C" { +/***************************************************************** +Prints info of a THD object (== user session thread) to the +standatd output. NOTE that mysql/innobase/trx/trx0trx.c must contain +the prototype for this function! */ + +void +innobase_mysql_print_thd( +/*=====================*/ + void* input_thd)/* in: pointer to a MySQL THD object */ +{ + THD* thd; + + thd = (THD*) input_thd; + + printf("MySQL thread id %lu, query id %lu", + thd->thread_id, thd->query_id); + if (thd->host) { + printf(" %s", thd->host); + } + + if (thd->ip) { + printf(" %s", thd->ip); + } + + if (thd->user) { + printf(" %s", thd->user); + } + + if (thd->proc_info) { + printf(" %s", thd->proc_info); + } + + if (thd->query) { + printf("\n%-.100s", thd->query); + } + + printf("\n"); +} +} + +/************************************************************************* +Gets the InnoDB transaction handle for a MySQL handler object, creates +an InnoDB transaction struct if the corresponding MySQL thread struct still +lacks one. */ +static +trx_t* +check_trx_exists( +/*=============*/ + /* out: InnoDB transaction handle */ + THD* thd) /* in: user thread handle */ +{ + trx_t* trx; + + trx = (trx_t*) thd->transaction.all.innobase_tid; + + if (trx == NULL) { + DBUG_ASSERT(thd != NULL); + trx = trx_allocate_for_mysql(); + + trx->mysql_thd = thd; + + thd->transaction.all.innobase_tid = trx; + + /* The execution of a single SQL statement is denoted by + a 'transaction' handle which is a dummy pointer: InnoDB + remembers internally where the latest SQL statement + started, and if error handling requires rolling back the + latest statement, InnoDB does a rollback to a savepoint. */ + + thd->transaction.stmt.innobase_tid = + (void*)&innodb_dummy_stmt_trx_handle; + } + + return(trx); +} + +/************************************************************************* +Updates the user_thd field in a handle and also allocates a new InnoDB +transaction handle if needed, and updates the transaction fields in the +prebuilt struct. */ +inline +int +ha_innobase::update_thd( +/*====================*/ + /* out: 0 or error code */ + THD* thd) /* in: thd to use the handle */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + trx_t* trx; + + trx = check_trx_exists(thd); + + if (prebuilt->trx != trx) { + + row_update_prebuilt_trx(prebuilt, trx); + } + + user_thd = thd; + + return(0); +} + +/************************************************************************* +Reads the data files and their sizes from a character string given in +the .cnf file. */ +static +bool +innobase_parse_data_file_paths_and_sizes(void) +/*==========================================*/ + /* out: TRUE if ok, FALSE if parsing + error */ +{ + char* str; + char* endp; + char* path; + ulint size; + ulint i = 0; + + str = internal_innobase_data_file_path; + + /* First calculate the number of data files and check syntax: + path:size[M];path:size[M]... . Note that a Windows path may + contain a drive name and a ':'. */ + + while (*str != '\0') { + path = str; + + while ((*str != ':' && *str != '\0') + || (*str == ':' + && (*(str + 1) == '\\' || *(str + 1) == '/'))) { + str++; + } + + if (*str == '\0') { + return(FALSE); + } + + str++; + + size = strtoul(str, &endp, 10); + + str = endp; + + if ((*str != 'M') && (*str != 'G')) { + size = size / (1024 * 1024); + } else if (*str == 'G') { + size = size * 1024; + str++; + } else { + str++; + } + + if (strlen(str) >= 6 + && *str == 'n' + && *(str + 1) == 'e' + && *(str + 2) == 'w') { + str += 3; + } + + if (strlen(str) >= 3 + && *str == 'r' + && *(str + 1) == 'a' + && *(str + 2) == 'w') { + str += 3; + } + + if (size == 0) { + return(FALSE); + } + + i++; + + if (*str == ';') { + str++; + } else if (*str != '\0') { + + return(FALSE); + } + } + + srv_data_file_names = (char**)ut_malloc(i * sizeof(void*)); + srv_data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint)); + srv_data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint)); + + srv_n_data_files = i; + + /* Then store the actual values to our arrays */ + + str = internal_innobase_data_file_path; + i = 0; + + while (*str != '\0') { + path = str; + + /* Note that we must ignore the ':' in a Windows path */ + + while ((*str != ':' && *str != '\0') + || (*str == ':' + && (*(str + 1) == '\\' || *(str + 1) == '/'))) { + str++; + } + + if (*str == ':') { + /* Make path a null-terminated string */ + *str = '\0'; + str++; + } + + size = strtoul(str, &endp, 10); + + str = endp; + + if ((*str != 'M') && (*str != 'G')) { + size = size / (1024 * 1024); + } else if (*str == 'G') { + size = size * 1024; + str++; + } else { + str++; + } + + srv_data_file_is_raw_partition[i] = 0; + + if (strlen(str) >= 6 + && *str == 'n' + && *(str + 1) == 'e' + && *(str + 2) == 'w') { + str += 3; + srv_data_file_is_raw_partition[i] = SRV_NEW_RAW; + } + + if (strlen(str) >= 3 + && *str == 'r' + && *(str + 1) == 'a' + && *(str + 2) == 'w') { + str += 3; + + if (srv_data_file_is_raw_partition[i] == 0) { + srv_data_file_is_raw_partition[i] = SRV_OLD_RAW; + } + } + + srv_data_file_names[i] = path; + srv_data_file_sizes[i] = size; + + i++; + + if (*str == ';') { + str++; + } + } + + return(TRUE); +} + +/************************************************************************* +Reads log group home directories from a character string given in +the .cnf file. */ +static +bool +innobase_parse_log_group_home_dirs(void) +/*====================================*/ + /* out: TRUE if ok, FALSE if parsing + error */ +{ + char* str; + char* path; + ulint i = 0; + + str = innobase_log_group_home_dir; + + /* First calculate the number of directories and check syntax: + path;path;... */ + + while (*str != '\0') { + path = str; + + while (*str != ';' && *str != '\0') { + str++; + } + + i++; + + if (*str == ';') { + str++; + } else if (*str != '\0') { + + return(FALSE); + } + } + + if (i != (ulint) innobase_mirrored_log_groups) { + + return(FALSE); + } + + srv_log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*)); + + /* Then store the actual values to our array */ + + str = innobase_log_group_home_dir; + i = 0; + + while (*str != '\0') { + path = str; + + while (*str != ';' && *str != '\0') { + str++; + } + + if (*str == ';') { + *str = '\0'; + str++; + } + + srv_log_group_home_dirs[i] = path; + + i++; + } + + return(TRUE); +} + +/************************************************************************* +Opens an InnoDB database. */ + +bool +innobase_init(void) +/*===============*/ + /* out: TRUE if error */ +{ + int err; + bool ret; + char current_lib[3], *default_path; + + DBUG_ENTER("innobase_init"); + + /* + When using the embedded server, the datadirectory is not + in the current directory. + */ + if (mysql_embedded) + default_path=mysql_real_data_home; + else + { + /* It's better to use current lib, to keep path's short */ + current_lib[0]=FN_CURLIB; + current_lib[1]=FN_LIBCHAR; + current_lib[2]=0; + default_path=current_lib; + } + + if (specialflag & SPECIAL_NO_PRIOR) { + srv_set_thread_priorities = FALSE; + } else { + srv_set_thread_priorities = TRUE; + srv_query_thread_priority = QUERY_PRIOR; + } + + /* + Set InnoDB initialization parameters according to the values + read from MySQL .cnf file + */ + + // Make a copy of innobase_data_file_path to not modify the original + internal_innobase_data_file_path=my_strdup(innobase_data_file_path, + MYF(MY_WME)); + + srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir : + default_path); + srv_logs_home = (char*) ""; + srv_arch_dir = (innobase_log_arch_dir ? innobase_log_arch_dir : + default_path); + + ret = innobase_parse_data_file_paths_and_sizes(); + + if (ret == FALSE) { + sql_print_error("InnoDB: syntax error in innodb_data_file_path"); + DBUG_RETURN(TRUE); + } + + if (!innobase_log_group_home_dir) + innobase_log_group_home_dir= default_path; + ret = innobase_parse_log_group_home_dirs(); + + if (ret == FALSE) { + DBUG_RETURN(TRUE); + } + + srv_unix_file_flush_method_str = (innobase_unix_file_flush_method ? + innobase_unix_file_flush_method : + (char*)"fdatasync"); + + srv_n_log_groups = (ulint) innobase_mirrored_log_groups; + srv_n_log_files = (ulint) innobase_log_files_in_group; + srv_log_file_size = (ulint) innobase_log_file_size; + + srv_log_archive_on = (ulint) innobase_log_archive; + srv_log_buffer_size = (ulint) innobase_log_buffer_size; + srv_flush_log_at_trx_commit = (ibool) innobase_flush_log_at_trx_commit; + + srv_use_native_aio = 0; + + srv_pool_size = (ulint) innobase_buffer_pool_size; + srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; + + srv_n_file_io_threads = (ulint) innobase_file_io_threads; + + srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; + srv_thread_concurrency = (ulint) innobase_thread_concurrency; + srv_force_recovery = (ulint) innobase_force_recovery; + + srv_fast_shutdown = (ibool) innobase_fast_shutdown; + + srv_print_verbose_log = mysql_embedded ? 0 : 1; + if (strcmp(default_charset_info->name, "latin1") == 0) { + /* Store the character ordering table to InnoDB. + For non-latin1 charsets we use the MySQL comparison + functions, and consequently we do not need to know + the ordering internally in InnoDB. */ + + memcpy(srv_latin1_ordering, + default_charset_info->sort_order, 256); + } + + err = innobase_start_or_create_for_mysql(); + + if (err != DB_SUCCESS) { + + DBUG_RETURN(1); + } + (void) hash_init(&innobase_open_tables,32,0,0, + (hash_get_key) innobase_get_key,0,0); + pthread_mutex_init(&innobase_mutex,MY_MUTEX_INIT_FAST); + DBUG_RETURN(0); +} + +/*********************************************************************** +Closes an InnoDB database. */ + +bool +innobase_end(void) +/*==============*/ + /* out: TRUE if error */ +{ + int err; + + DBUG_ENTER("innobase_end"); + + err = innobase_shutdown_for_mysql(); + hash_free(&innobase_open_tables); + my_free(internal_innobase_data_file_path,MYF(MY_ALLOW_ZERO_PTR)); + + if (err != DB_SUCCESS) { + + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + +/******************************************************************** +Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit +flushes logs, and the name of this function should be innobase_checkpoint. */ + +bool +innobase_flush_logs(void) +/*=====================*/ + /* out: TRUE if error */ +{ + bool result = 0; + + DBUG_ENTER("innobase_flush_logs"); + + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + + DBUG_RETURN(result); +} + +/************************************************************************* +Gets the free space in an InnoDB database: returned in units of kB. */ + +uint +innobase_get_free_space(void) +/*=========================*/ + /* out: free space in kB */ +{ + return((uint) fsp_get_available_space_in_free_extents(0)); +} + +/********************************************************************* +Commits a transaction in an InnoDB database. */ + +int +innobase_commit( +/*============*/ + /* out: 0 or error number */ + THD* thd, /* in: MySQL thread handle of the user for whom + the transaction should be committed */ + void* trx_handle)/* in: InnoDB trx handle or NULL: NULL means + that the current SQL statement ended, and we should + mark the start of a new statement with a savepoint */ +{ + int error = 0; + trx_t* trx; + + DBUG_ENTER("innobase_commit"); + DBUG_PRINT("trans", ("ending transaction")); + + trx = check_trx_exists(thd); + + if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { + srv_conc_enter_innodb(trx); + + trx_commit_for_mysql(trx); + + srv_conc_exit_innodb(); + } + + trx_mark_sql_stat_end(trx); + +#ifndef DBUG_OFF + if (error) { + DBUG_PRINT("error", ("error: %d", error)); + } +#endif + /* Tell InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + DBUG_RETURN(error); +} + +/********************************************************************* +Rolls back a transaction in an InnoDB database. */ + +int +innobase_rollback( +/*==============*/ + /* out: 0 or error number */ + THD* thd, /* in: handle to the MySQL thread of the user + whose transaction should be rolled back */ + void* trx_handle)/* in: InnoDB trx handle or a dummy stmt handle */ +{ + int error = 0; + trx_t* trx; + + DBUG_ENTER("innobase_rollback"); + DBUG_PRINT("trans", ("aborting transaction")); + + trx = check_trx_exists(thd); + + srv_conc_enter_innodb(trx); + + if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { + error = trx_rollback_for_mysql(trx); + } else { + error = trx_rollback_last_sql_stat_for_mysql(trx); + } + + srv_conc_exit_innodb(); + + trx_mark_sql_stat_end(trx); + + DBUG_RETURN(convert_error_code_to_mysql(error)); +} + +/********************************************************************* +Frees a possible InnoDB trx object associated with the current +THD. */ + +int +innobase_close_connection( +/*======================*/ + /* out: 0 or error number */ + THD* thd) /* in: handle to the MySQL thread of the user + whose transaction should be rolled back */ +{ + if (NULL != thd->transaction.all.innobase_tid) { + trx_rollback_for_mysql((trx_t*) + (thd->transaction.all.innobase_tid)); + trx_free_for_mysql((trx_t*) + (thd->transaction.all.innobase_tid)); + } + + return(0); +} + +/********************************************************************** +Prints an error message. */ +static +void +innobase_print_error( +/*=================*/ + const char* db_errpfx, /* in: error prefix text */ + char* buffer) /* in: error text */ +{ + sql_print_error("%s: %s", db_errpfx, buffer); +} + + +/***************************************************************************** +** InnoDB database tables +*****************************************************************************/ + +/******************************************************************** +This function is not relevant since we store the tables and indexes +into our own tablespace, not as files, whose extension this function would +give. */ + +const char** +ha_innobase::bas_ext() const +/*========================*/ + /* out: file extension strings, currently not + used */ +{ + static const char* ext[] = {".InnoDB", NullS}; + + return(ext); +} + +/********************************************************************* +Normalizes a table name string. A normalized name consists of the +database name catenated to '/' and table name. An example: +test/mytable. On Windows normalization puts both the database name and the +table name always to lower case. */ +static +void +normalize_table_name( +/*=================*/ + char* norm_name, /* out: normalized name as a + null-terminated string */ + const char* name) /* in: table name string */ +{ + char* name_ptr; + char* db_ptr; + char* ptr; + + /* Scan name from the end */ + + ptr = strend(name)-1; + + while (ptr >= name && *ptr != '\\' && *ptr != '/') { + ptr--; + } + + name_ptr = ptr + 1; + + DBUG_ASSERT(ptr > name); + + ptr--; + + while (ptr >= name && *ptr != '\\' && *ptr != '/') { + ptr--; + } + + db_ptr = ptr + 1; + + memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name)); + + norm_name[name_ptr - db_ptr - 1] = '/'; + +#ifdef __WIN__ + /* Put to lower case */ + + ptr = norm_name; + + while (*ptr != '\0') { + *ptr = tolower(*ptr); + ptr++; + } +#endif +} + +/********************************************************************* +Creates and opens a handle to a table which already exists in an Innobase +database. */ + +int +ha_innobase::open( +/*==============*/ + /* out: 1 if error, 0 if success */ + const char* name, /* in: table name */ + int mode, /* in: not used */ + uint test_if_locked) /* in: not used */ +{ + dict_table_t* ib_table; + int error = 0; + uint buff_len; + char norm_name[1000]; + + DBUG_ENTER("ha_innobase::open"); + + UT_NOT_USED(mode); + UT_NOT_USED(test_if_locked); + + normalize_table_name(norm_name, name); + + user_thd = NULL; + + last_query_id = (ulong)-1; + + if (!(share=get_share(name))) + DBUG_RETURN(1); + + /* Create buffers for packing the fields of a record. Why + table->reclength did not work here? Obviously, because char + fields when packed actually became 1 byte longer, when we also + stored the string length as the first byte. */ + + buff_len = table->reclength + table->max_key_length + + MAX_REF_PARTS * 3; + if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME), + &upd_buff, buff_len, + &key_val_buff, buff_len, + NullS)) { + free_share(share); + DBUG_RETURN(1); + } + + /* Get pointer to a table object in InnoDB dictionary cache */ + + if (NULL == (ib_table = dict_table_get(norm_name, NULL))) { + + sql_print_error("InnoDB error:\n\ +Cannot find table %s from the internal data dictionary\n\ +of InnoDB though the .frm file for the table exists. Maybe you\n\ +have deleted and recreated InnoDB data files but have forgotten\n\ +to delete the corresponding .frm files of InnoDB tables, or you\n\ +have moved .frm files to another database?", + norm_name); + + free_share(share); + my_free((char*) upd_buff, MYF(0)); + my_errno = ENOENT; + DBUG_RETURN(1); + } + + innobase_prebuilt = row_create_prebuilt(ib_table); + + ((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len = table->reclength; + + primary_key = MAX_KEY; + + if (!row_table_got_default_clust_index(ib_table)) { + + /* If we automatically created the clustered index, + then MySQL does not know about it and it must not be aware + of the index used on scan, to avoid checking if we update + the column of the index. The column is the row id in + the automatical case, and it will not be updated. */ + + ((row_prebuilt_t*)innobase_prebuilt) + ->clust_index_was_generated = FALSE; + + primary_key = 0; + key_used_on_scan = 0; + + /* MySQL allocates the buffer for ref */ + + ref_length = table->key_info->key_length + + table->key_info->key_parts + 10; + + /* One byte per key field is consumed to the SQL NULL + info of the field; we add also 10 bytes of safety margin */ + } else { + ((row_prebuilt_t*)innobase_prebuilt) + ->clust_index_was_generated = TRUE; + + ref_length = DATA_ROW_ID_LEN + 10; + + DBUG_ASSERT(key_used_on_scan == MAX_KEY); + } + + auto_inc_counter_for_this_stat = 0; + + /* Init table lock structure */ + thr_lock_data_init(&share->lock,&lock,(void*) 0); + + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); + + DBUG_RETURN(0); +} + +/********************************************************************* +Does nothing. */ + +void +ha_innobase::initialize(void) +/*=========================*/ +{ +} + +/********************************************************************** +Closes a handle to an InnoDB table. */ + +int +ha_innobase::close(void) +/*====================*/ + /* out: error number */ +{ + DBUG_ENTER("ha_innobase::close"); + + row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt); + + my_free((char*) upd_buff, MYF(0)); + free_share(share); + + /* Tell InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + DBUG_RETURN(0); +} + +/* The following accessor functions should really be inside MySQL code! */ + +/****************************************************************** +Gets field offset for a field in a table. */ +inline +uint +get_field_offset( +/*=============*/ + /* out: offset */ + TABLE* table, /* in: MySQL table object */ + Field* field) /* in: MySQL field object */ +{ + return((uint) (field->ptr - (char*) table->record[0])); +} + +/****************************************************************** +Checks if a field in a record is SQL NULL. Uses the record format +information in table to track the null bit in record. */ +inline +uint +field_in_record_is_null( +/*====================*/ + /* out: 1 if NULL, 0 otherwise */ + TABLE* table, /* in: MySQL table object */ + Field* field, /* in: MySQL field object */ + char* record) /* in: a row in MySQL format */ +{ + int null_offset; + + if (!field->null_ptr) { + + return(0); + } + + null_offset = (uint) ((char*) field->null_ptr + - (char*) table->record[0]); + + if (record[null_offset] & field->null_bit) { + + return(1); + } + + return(0); +} + +/****************************************************************** +Sets a field in a record to SQL NULL. Uses the record format +information in table to track the null bit in record. */ +inline +void +set_field_in_record_to_null( +/*========================*/ + TABLE* table, /* in: MySQL table object */ + Field* field, /* in: MySQL field object */ + char* record) /* in: a row in MySQL format */ +{ + int null_offset; + + null_offset = (uint) ((char*) field->null_ptr + - (char*) table->record[0]); + + record[null_offset] = record[null_offset] | field->null_bit; +} + +/****************************************************************** +Resets SQL NULL bits in a record to zero. */ +inline +void +reset_null_bits( +/*============*/ + TABLE* table, /* in: MySQL table object */ + char* record) /* in: a row in MySQL format */ +{ + bzero(record, table->null_bytes); +} + +extern "C" { +/***************************************************************** +InnoDB uses this function is to compare two data fields for which the +data type is such that we must use MySQL code to compare them. NOTE that the +prototype of this function is in rem0cmp.c in InnoDB source code! +If you change this function, remember to update the prototype there! */ + +int +innobase_mysql_cmp( +/*===============*/ + /* out: 1, 0, -1, if a is greater, + equal, less than b, respectively */ + int mysql_type, /* in: MySQL type */ + unsigned char* a, /* in: data field */ + unsigned int a_length, /* in: data field length, + not UNIV_SQL_NULL */ + unsigned char* b, /* in: data field */ + unsigned int b_length) /* in: data field length, + not UNIV_SQL_NULL */ +{ + enum_field_types mysql_tp; + int ret; + + DBUG_ASSERT(a_length != UNIV_SQL_NULL); + DBUG_ASSERT(b_length != UNIV_SQL_NULL); + + mysql_tp = (enum_field_types) mysql_type; + + switch (mysql_tp) { + + case FIELD_TYPE_STRING: + case FIELD_TYPE_VAR_STRING: + ret = my_sortncmp((const char*) a, a_length, + (const char*) b, b_length); + if (ret < 0) { + return(-1); + } else if (ret > 0) { + return(1); + } else { + return(0); + } + default: + assert(0); + } + + return(0); +} +} + +/****************************************************************** +Converts a MySQL type to an InnoDB type. */ +inline +ulint +get_innobase_type_from_mysql_type( +/*==============================*/ + /* out: DATA_BINARY, DATA_VARCHAR, ... */ + Field* field) /* in: MySQL field */ +{ + /* The following asserts check that MySQL type code fits in + 8 bits: this is used in ibuf and also when DATA_NOT_NULL is + ORed to the type */ + + DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256); + DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256); + DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256); + DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256); + DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256); + + switch (field->type()) { + case FIELD_TYPE_VAR_STRING: if (field->flags & BINARY_FLAG) { + + return(DATA_BINARY); + } else if (strcmp( + default_charset_info->name, + "latin1") == 0) { + return(DATA_VARCHAR); + } else { + return(DATA_VARMYSQL); + } + case FIELD_TYPE_STRING: if (field->flags & BINARY_FLAG) { + + return(DATA_FIXBINARY); + } else if (strcmp( + default_charset_info->name, + "latin1") == 0) { + return(DATA_CHAR); + } else { + return(DATA_MYSQL); + } + case FIELD_TYPE_LONG: + case FIELD_TYPE_LONGLONG: + case FIELD_TYPE_TINY: + case FIELD_TYPE_SHORT: + case FIELD_TYPE_INT24: + case FIELD_TYPE_DATE: + case FIELD_TYPE_DATETIME: + case FIELD_TYPE_YEAR: + case FIELD_TYPE_NEWDATE: + case FIELD_TYPE_ENUM: + case FIELD_TYPE_SET: + case FIELD_TYPE_TIME: + case FIELD_TYPE_TIMESTAMP: + return(DATA_INT); + case FIELD_TYPE_FLOAT: + return(DATA_FLOAT); + case FIELD_TYPE_DOUBLE: + return(DATA_DOUBLE); + case FIELD_TYPE_DECIMAL: + return(DATA_DECIMAL); + case FIELD_TYPE_TINY_BLOB: + case FIELD_TYPE_MEDIUM_BLOB: + case FIELD_TYPE_BLOB: + case FIELD_TYPE_LONG_BLOB: + return(DATA_BLOB); + default: + assert(0); + } + + return(0); +} + +/*********************************************************************** +Stores a key value for a row to a buffer. */ + +uint +ha_innobase::store_key_val_for_row( +/*===============================*/ + /* out: key value length as stored in buff */ + uint keynr, /* in: key number */ + char* buff, /* in/out: buffer for the key value (in MySQL + format) */ + const mysql_byte* record)/* in: row in MySQL format */ +{ + KEY* key_info = table->key_info + keynr; + KEY_PART_INFO* key_part = key_info->key_part; + KEY_PART_INFO* end = key_part + key_info->key_parts; + char* buff_start = buff; + + DBUG_ENTER("store_key_val_for_row"); + + for (; key_part != end; key_part++) { + + if (key_part->null_bit) { + /* Store 0 if the key part is a NULL part */ + + if (record[key_part->null_offset] + & key_part->null_bit) { + *buff++ = 1; + continue; + } + + *buff++ = 0; + } + + memcpy(buff, record + key_part->offset, key_part->length); + buff += key_part->length; + } + + DBUG_RETURN(buff - buff_start); +} + +/****************************************************************** +Builds a template to the prebuilt struct. */ +static +void +build_template( +/*===========*/ + row_prebuilt_t* prebuilt, /* in: prebuilt struct */ + THD* thd, /* in: current user thread, used + only if templ_type is + ROW_MYSQL_REC_FIELDS */ + TABLE* table, /* in: MySQL table */ + ulint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or + ROW_MYSQL_REC_FIELDS */ +{ + dict_index_t* index; + dict_index_t* clust_index; + mysql_row_templ_t* templ; + Field* field; + ulint n_fields; + ulint n_requested_fields = 0; + ibool fetch_all_in_key = FALSE; + ulint i; + + clust_index = dict_table_get_first_index_noninline(prebuilt->table); + + if (!prebuilt->in_update_remember_pos) { + if (prebuilt->read_just_key) { + fetch_all_in_key = TRUE; + } else { + /* We are building a temporary table: fetch all + columns */ + + templ_type = ROW_MYSQL_WHOLE_ROW; + } + } + + if (prebuilt->select_lock_type == LOCK_X) { + /* TODO: should fix the code in sql_update so that we could do + with fetching only the needed columns */ + + templ_type = ROW_MYSQL_WHOLE_ROW; + } + + if (templ_type == ROW_MYSQL_REC_FIELDS) { + + if (prebuilt->select_lock_type != LOCK_NONE) { + /* Let index be the clustered index */ + + index = clust_index; + } else { + index = prebuilt->index; + } + } else { + index = clust_index; + } + + if (index == clust_index) { + prebuilt->need_to_access_clustered = TRUE; + } else { + prebuilt->need_to_access_clustered = FALSE; + /* Below we check column by column if we need to access + the clustered index */ + } + + n_fields = (ulint)table->fields; + + if (!prebuilt->mysql_template) { + prebuilt->mysql_template = (mysql_row_templ_t*) + mem_alloc_noninline( + n_fields * sizeof(mysql_row_templ_t)); + } + + prebuilt->template_type = templ_type; + prebuilt->null_bitmap_len = table->null_bytes; + + prebuilt->templ_contains_blob = FALSE; + + for (i = 0; i < n_fields; i++) { + templ = prebuilt->mysql_template + n_requested_fields; + field = table->field[i]; + + if (templ_type == ROW_MYSQL_REC_FIELDS + && !(fetch_all_in_key && + ULINT_UNDEFINED != dict_index_get_nth_col_pos( + index, i)) + && thd->query_id != field->query_id + && thd->query_id != (field->query_id ^ MAX_ULONG_BIT) + && thd->query_id != + (field->query_id ^ (MAX_ULONG_BIT >> 1))) { + + /* This field is not needed in the query, skip it */ + + goto skip_field; + } + + n_requested_fields++; + + templ->col_no = i; + + if (index == clust_index) { + templ->rec_field_no = (index->table->cols + i) + ->clust_pos; + } else { + templ->rec_field_no = dict_index_get_nth_col_pos( + index, i); + } + + if (templ->rec_field_no == ULINT_UNDEFINED) { + prebuilt->need_to_access_clustered = TRUE; + } + + if (field->null_ptr) { + templ->mysql_null_byte_offset = + (ulint) ((char*) field->null_ptr + - (char*) table->record[0]); + + templ->mysql_null_bit_mask = (ulint) field->null_bit; + } else { + templ->mysql_null_bit_mask = 0; + } + + templ->mysql_col_offset = (ulint) + get_field_offset(table, field); + + templ->mysql_col_len = (ulint) field->pack_length(); + templ->type = get_innobase_type_from_mysql_type(field); + templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG); + + if (templ->type == DATA_BLOB) { + prebuilt->templ_contains_blob = TRUE; + } +skip_field: + ; + } + + prebuilt->n_template = n_requested_fields; + + if (prebuilt->need_to_access_clustered) { + /* Change rec_field_no's to correspond to the clustered index + record */ + for (i = 0; i < n_requested_fields; i++) { + templ = prebuilt->mysql_template + i; + + templ->rec_field_no = + (index->table->cols + templ->col_no)->clust_pos; + } + } + + if (templ_type == ROW_MYSQL_REC_FIELDS + && prebuilt->select_lock_type != LOCK_NONE) { + + prebuilt->need_to_access_clustered = TRUE; + } +} + +/************************************************************************ +Stores a row in an InnoDB database, to the table specified in this +handle. */ + +int +ha_innobase::write_row( +/*===================*/ + /* out: error code */ + mysql_byte* record) /* in: a row in MySQL format */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; + int error; + longlong auto_inc; + + DBUG_ENTER("ha_innobase::write_row"); + + statistic_increment(ha_write_count, &LOCK_status); + + if (table->time_stamp) { + update_timestamp(record + table->time_stamp - 1); + } + + if (last_query_id != user_thd->query_id) { + prebuilt->sql_stat_start = TRUE; + last_query_id = user_thd->query_id; + } + + if (table->next_number_field && record == table->record[0]) { + + /* Fetch the value the user possibly has set in the + autoincrement field */ + + auto_inc = table->next_number_field->val_int(); + + /* In replication and also otherwise the auto-inc column + can be set with SET INSERT_ID. Then we must look at + user_thd->next_insert_id. If it is nonzero and the user + has not supplied a value, we must use it, and use values + incremented by 1 in all subsequent inserts within the + same SQL statement! */ + + if (auto_inc == 0 && user_thd->next_insert_id != 0) { + auto_inc = user_thd->next_insert_id; + auto_inc_counter_for_this_stat = auto_inc; + } + + if (auto_inc == 0 && auto_inc_counter_for_this_stat) { + /* The user set the auto-inc counter for + this SQL statement with SET INSERT_ID. We must + assign sequential values from the counter. */ + + auto_inc_counter_for_this_stat++; + + auto_inc = auto_inc_counter_for_this_stat; + + /* We give MySQL a new value to place in the + auto-inc column */ + user_thd->next_insert_id = auto_inc; + } + + if (auto_inc != 0) { + /* This call will calculate the max of the + current value and the value supplied by the user, if + the auto_inc counter is already initialized + for the table */ + + /* We have to use the transactional lock mechanism + on the auto-inc counter of the table to ensure + that replication and roll-forward of the binlog + exactly imitates also the given auto-inc values. + The lock is released at each SQL statement's + end. */ + + srv_conc_enter_innodb(prebuilt->trx); + error = row_lock_table_autoinc_for_mysql(prebuilt); + srv_conc_exit_innodb(); + + if (error != DB_SUCCESS) { + + error = convert_error_code_to_mysql(error); + goto func_exit; + } + + dict_table_autoinc_update(prebuilt->table, auto_inc); + } else { + srv_conc_enter_innodb(prebuilt->trx); + + if (!prebuilt->trx->auto_inc_lock) { + + error = row_lock_table_autoinc_for_mysql( + prebuilt); + if (error != DB_SUCCESS) { + srv_conc_exit_innodb(); + + error = convert_error_code_to_mysql( + error); + goto func_exit; + } + } + + auto_inc = dict_table_autoinc_get(prebuilt->table); + srv_conc_exit_innodb(); + + /* If auto_inc is now != 0 the autoinc counter + was already initialized for the table: we can give + the new value for MySQL to place in the field */ + + if (auto_inc != 0) { + user_thd->next_insert_id = auto_inc; + } + } + + /* Set the 'in_update_remember_pos' flag to FALSE to + make sure all columns are fetched in the select done by + update_auto_increment */ + + prebuilt->in_update_remember_pos = FALSE; + + update_auto_increment(); + + if (auto_inc == 0) { + /* The autoinc counter for our table was not yet + initialized, initialize it now */ + + auto_inc = table->next_number_field->val_int(); + + srv_conc_enter_innodb(prebuilt->trx); + error = row_lock_table_autoinc_for_mysql(prebuilt); + srv_conc_exit_innodb(); + + if (error != DB_SUCCESS) { + + error = convert_error_code_to_mysql(error); + goto func_exit; + } + + dict_table_autoinc_initialize(prebuilt->table, + auto_inc); + } + + /* We have to set sql_stat_start to TRUE because + update_auto_increment has called a select, and + has reset that flag; row_insert_for_mysql has to + know to set the IX intention lock on the table, something + it only does at the start of each statement */ + + prebuilt->sql_stat_start = TRUE; + } + + if (prebuilt->mysql_template == NULL + || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) { + /* Build the template used in converting quickly between + the two database formats */ + + build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW); + } + + if (user_thd->lex.sql_command == SQLCOM_INSERT + && user_thd->lex.duplicates == DUP_IGNORE) { + prebuilt->trx->ignore_duplicates_in_insert = TRUE; + } else { + prebuilt->trx->ignore_duplicates_in_insert = FALSE; + } + + srv_conc_enter_innodb(prebuilt->trx); + + error = row_insert_for_mysql((byte*) record, prebuilt); + + srv_conc_exit_innodb(); + + prebuilt->trx->ignore_duplicates_in_insert = FALSE; + + error = convert_error_code_to_mysql(error); + + /* Tell InnoDB server that there might be work for + utility threads: */ +func_exit: + innobase_active_small(); + + DBUG_RETURN(error); +} + +/****************************************************************** +Converts field data for storage in an InnoDB update vector. */ +inline +mysql_byte* +innobase_convert_and_store_changed_col( +/*===================================*/ + /* out: pointer to the end of the converted + data in the buffer */ + upd_field_t* ufield, /* in/out: field in the update vector */ + mysql_byte* buf, /* in: buffer we can use in conversion */ + mysql_byte* data, /* in: column data to store */ + ulint len, /* in: data len */ + ulint col_type,/* in: data type in InnoDB type numbers */ + ulint is_unsigned)/* in: != 0 if an unsigned integer type */ +{ + uint i; + + if (len == UNIV_SQL_NULL) { + data = NULL; + } else if (col_type == DATA_VARCHAR || col_type == DATA_BINARY + || col_type == DATA_VARMYSQL) { + /* Remove trailing spaces */ + while (len > 0 && data[len - 1] == ' ') { + len--; + } + + } else if (col_type == DATA_INT) { + /* Store integer data in InnoDB in a big-endian + format, sign bit negated, if signed */ + + for (i = 0; i < len; i++) { + buf[len - 1 - i] = data[i]; + } + + if (!is_unsigned) { + buf[0] = buf[0] ^ 128; + } + + data = buf; + + buf += len; + } + + ufield->new_val.data = data; + ufield->new_val.len = len; + + return(buf); +} + +/************************************************************************** +Checks which fields have changed in a row and stores information +of them to an update vector. */ +static +int +calc_row_difference( +/*================*/ + /* out: error number or 0 */ + upd_t* uvect, /* in/out: update vector */ + mysql_byte* old_row, /* in: old row in MySQL format */ + mysql_byte* new_row, /* in: new row in MySQL format */ + struct st_table* table, /* in: table in MySQL data dictionary */ + mysql_byte* upd_buff, /* in: buffer to use */ + row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */ + THD* thd) /* in: user thread */ +{ + Field* field; + uint n_fields; + ulint o_len; + ulint n_len; + byte* o_ptr; + byte* n_ptr; + byte* buf; + upd_field_t* ufield; + ulint col_type; + ulint is_unsigned; + ulint n_changed = 0; + uint i; + + n_fields = table->fields; + + /* We use upd_buff to convert changed fields */ + buf = (byte*) upd_buff; + + for (i = 0; i < n_fields; i++) { + field = table->field[i]; + + /* if (thd->query_id != field->query_id) { */ + /* TODO: check that these fields cannot have + changed! */ + + /* goto skip_field; + }*/ + + o_ptr = (byte*) old_row + get_field_offset(table, field); + n_ptr = (byte*) new_row + get_field_offset(table, field); + o_len = field->pack_length(); + n_len = field->pack_length(); + + col_type = get_innobase_type_from_mysql_type(field); + is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG); + + switch (col_type) { + + case DATA_BLOB: + o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len); + n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len); + break; + case DATA_VARCHAR: + case DATA_BINARY: + case DATA_VARMYSQL: + o_ptr = row_mysql_read_var_ref_noninline(&o_len, o_ptr); + n_ptr = row_mysql_read_var_ref_noninline(&n_len, n_ptr); + default: + ; + } + + if (field->null_ptr) { + if (field_in_record_is_null(table, field, + (char*) old_row)) { + o_len = UNIV_SQL_NULL; + } + + if (field_in_record_is_null(table, field, + (char*) new_row)) { + n_len = UNIV_SQL_NULL; + } + } + + if (o_len != n_len || (o_len != UNIV_SQL_NULL && + 0 != memcmp(o_ptr, n_ptr, o_len))) { + /* The field has changed */ + + ufield = uvect->fields + n_changed; + + buf = (byte*) + innobase_convert_and_store_changed_col(ufield, + (mysql_byte*)buf, + (mysql_byte*)n_ptr, n_len, col_type, + is_unsigned); + ufield->exp = NULL; + ufield->field_no = + (prebuilt->table->cols + i)->clust_pos; + n_changed++; + } + ; + } + + uvect->n_fields = n_changed; + uvect->info_bits = 0; + + return(0); +} + +/************************************************************************** +Updates a row given as a parameter to a new value. Note that we are given +whole rows, not just the fields which are updated: this incurs some +overhead for CPU when we check which fields are actually updated. +TODO: currently InnoDB does not prevent the 'Halloween problem': +in a searched update a single row can get updated several times +if its index columns are updated! */ + +int +ha_innobase::update_row( +/*====================*/ + /* out: error number or 0 */ + const mysql_byte* old_row,/* in: old row in MySQL format */ + mysql_byte* new_row)/* in: new row in MySQL format */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + upd_t* uvect; + int error = 0; + + DBUG_ENTER("ha_innobase::update_row"); + + if (table->time_stamp) { + update_timestamp(new_row + table->time_stamp - 1); + } + + if (last_query_id != user_thd->query_id) { + prebuilt->sql_stat_start = TRUE; + last_query_id = user_thd->query_id; + } + + if (prebuilt->upd_node) { + uvect = prebuilt->upd_node->update; + } else { + uvect = row_get_prebuilt_update_vector(prebuilt); + } + + /* Build an update vector from the modified fields in the rows + (uses upd_buff of the handle) */ + + calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table, + upd_buff, prebuilt, user_thd); + /* This is not a delete */ + prebuilt->upd_node->is_delete = FALSE; + + if (!prebuilt->in_update_remember_pos) { + assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); + } + + srv_conc_enter_innodb(prebuilt->trx); + + error = row_update_for_mysql((byte*) old_row, prebuilt); + + srv_conc_exit_innodb(); + + error = convert_error_code_to_mysql(error); + + /* Tell InnoDB server that there might be work for + utility threads: */ + + innobase_active_small(); + + DBUG_RETURN(error); +} + +/************************************************************************** +Deletes a row given as the parameter. */ + +int +ha_innobase::delete_row( +/*====================*/ + /* out: error number or 0 */ + const mysql_byte* record) /* in: a row in MySQL format */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + int error = 0; + + DBUG_ENTER("ha_innobase::delete_row"); + + if (last_query_id != user_thd->query_id) { + prebuilt->sql_stat_start = TRUE; + last_query_id = user_thd->query_id; + } + + if (!prebuilt->upd_node) { + row_get_prebuilt_update_vector(prebuilt); + } + + /* This is a delete */ + + prebuilt->upd_node->is_delete = TRUE; + prebuilt->in_update_remember_pos = TRUE; + + srv_conc_enter_innodb(prebuilt->trx); + + error = row_update_for_mysql((byte*) record, prebuilt); + + srv_conc_exit_innodb(); + + error = convert_error_code_to_mysql(error); + + /* Tell the InnoDB server that there might be work for + utility threads: */ + + innobase_active_small(); + + DBUG_RETURN(error); +} + +/********************************************************************** +Initializes a handle to use an index. */ + +int +ha_innobase::index_init( +/*====================*/ + /* out: 0 or error number */ + uint keynr) /* in: key (index) number */ +{ + int error = 0; + DBUG_ENTER("index_init"); + + error = change_active_index(keynr); + + DBUG_RETURN(error); +} + +/********************************************************************** +Currently does nothing. */ + +int +ha_innobase::index_end(void) +/*========================*/ +{ + int error = 0; + DBUG_ENTER("index_end"); + + DBUG_RETURN(error); +} + +/************************************************************************* +Converts a search mode flag understood by MySQL to a flag understood +by InnoDB. */ +inline +ulint +convert_search_mode_to_innobase( +/*============================*/ + enum ha_rkey_function find_flag) +{ + switch (find_flag) { + case HA_READ_KEY_EXACT: return(PAGE_CUR_GE); + /* the above does not require the index to be UNIQUE */ + case HA_READ_KEY_OR_NEXT: return(PAGE_CUR_GE); + case HA_READ_KEY_OR_PREV: return(PAGE_CUR_LE); + case HA_READ_AFTER_KEY: return(PAGE_CUR_G); + case HA_READ_BEFORE_KEY: return(PAGE_CUR_L); + case HA_READ_PREFIX: return(PAGE_CUR_GE); + case HA_READ_PREFIX_LAST: return(PAGE_CUR_LE); + /* the above PREFIX flags mean that the last + field in the key value may just be a prefix + of the complete fixed length field */ + default: assert(0); + } + + return(0); +} + +/************************************************************************** +Positions an index cursor to the index specified in the handle. Fetches the +row if any. */ + +int +ha_innobase::index_read( +/*====================*/ + /* out: 0, HA_ERR_KEY_NOT_FOUND, + or error number */ + mysql_byte* buf, /* in/out: buffer for the returned + row */ + const mysql_byte* key_ptr,/* in: key value; if this is NULL + we position the cursor at the + start or end of index */ + uint key_len,/* in: key value length */ + enum ha_rkey_function find_flag)/* in: search flags from my_base.h */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + ulint mode; + dict_index_t* index; + ulint match_mode = 0; + int error; + ulint ret; + + DBUG_ENTER("index_read"); + statistic_increment(ha_read_key_count, &LOCK_status); + + if (last_query_id != user_thd->query_id) { + prebuilt->sql_stat_start = TRUE; + last_query_id = user_thd->query_id; + } + + index = prebuilt->index; + + /* Note that if the select is used for an update, we always + fetch the clustered index record: therefore the index for which the + template is built is not necessarily prebuilt->index, but can also + be the clustered index */ + + if (prebuilt->sql_stat_start) { + build_template(prebuilt, user_thd, table, + ROW_MYSQL_REC_FIELDS); + } + + if (key_ptr) { + row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple, + (byte*) key_val_buff, + index, + (byte*) key_ptr, + (ulint) key_len); + } else { + /* We position the cursor to the last or the first entry + in the index */ + + dtuple_set_n_fields(prebuilt->search_tuple, 0); + } + + mode = convert_search_mode_to_innobase(find_flag); + + match_mode = 0; + + if (find_flag == HA_READ_KEY_EXACT) { + match_mode = ROW_SEL_EXACT; + + } else if (find_flag == HA_READ_PREFIX + || find_flag == HA_READ_PREFIX_LAST) { + match_mode = ROW_SEL_EXACT_PREFIX; + } + + last_match_mode = match_mode; + + srv_conc_enter_innodb(prebuilt->trx); + + ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0); + + srv_conc_exit_innodb(); + + if (ret == DB_SUCCESS) { + error = 0; + table->status = 0; + + } else if (ret == DB_RECORD_NOT_FOUND) { + error = HA_ERR_KEY_NOT_FOUND; + table->status = STATUS_NOT_FOUND; + + } else if (ret == DB_END_OF_INDEX) { + error = HA_ERR_KEY_NOT_FOUND; + table->status = STATUS_NOT_FOUND; + } else { + error = convert_error_code_to_mysql(ret); + table->status = STATUS_NOT_FOUND; + } + + DBUG_RETURN(error); +} + + +/* + The following functions works like index_read, but it find the last + row with the current index prefix. + This code is disabled until Heikki has verified that InnoDB support the + HA_READ_PREFIX_LAST flag and removed the HA_NOT_READ_PREFIX_LAST + flag from ha_innodb.h +*/ + +int +ha_innobase::index_read_last(mysql_byte *buf, + const mysql_byte *key_ptr, + uint key_len) +{ + return index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST); +} + + +/************************************************************************ +Changes the active index of a handle. */ + +int +ha_innobase::change_active_index( +/*=============================*/ + /* out: 0 or error code */ + uint keynr) /* in: use this index; MAX_KEY means always clustered + index, even if it was internally generated by + InnoDB */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + KEY* key; + + statistic_increment(ha_read_key_count, &LOCK_status); + DBUG_ENTER("index_read_idx"); + + active_index = keynr; + + if (keynr != MAX_KEY && table->keys > 0) + { + key = table->key_info + active_index; + + prebuilt->index=dict_table_get_index_noninline(prebuilt->table, key->name); + if (!prebuilt->index) + { + sql_print_error("Innodb could not find key n:o %u with name %s from dict cache for table %s", keynr, key->name, prebuilt->table->name); + return(1); + } + } + else + prebuilt->index = dict_table_get_first_index_noninline(prebuilt->table); + + assert(prebuilt->search_tuple != 0); + + dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields); + + dict_index_copy_types(prebuilt->search_tuple, prebuilt->index, + prebuilt->index->n_fields); + + /* Maybe MySQL changes the active index for a handle also + during some queries, we do not know: then it is safest to build + the template such that all columns will be fetched */ + + build_template(prebuilt, user_thd, table, ROW_MYSQL_WHOLE_ROW); + + DBUG_RETURN(0); +} + +/************************************************************************** +Positions an index cursor to the index specified in keynr. Fetches the +row if any. */ +/* ??? This is only used to read whole keys ??? */ + +int +ha_innobase::index_read_idx( +/*========================*/ + /* out: error number or 0 */ + mysql_byte* buf, /* in/out: buffer for the returned + row */ + uint keynr, /* in: use this index */ + const mysql_byte* key, /* in: key value; if this is NULL + we position the cursor at the + start or end of index */ + uint key_len, /* in: key value length */ + enum ha_rkey_function find_flag)/* in: search flags from my_base.h */ +{ + if (change_active_index(keynr)) { + + return(1); + } + + return(index_read(buf, key, key_len, find_flag)); +} + +/*************************************************************************** +Reads the next or previous row from a cursor, which must have previously been +positioned using index_read. */ + +int +ha_innobase::general_fetch( +/*=======================*/ + /* out: 0, HA_ERR_END_OF_FILE, or error + number */ + mysql_byte* buf, /* in/out: buffer for next row in MySQL + format */ + uint direction, /* in: ROW_SEL_NEXT or ROW_SEL_PREV */ + uint match_mode) /* in: 0, ROW_SEL_EXACT, or + ROW_SEL_EXACT_PREFIX */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + ulint ret; + int error = 0; + + DBUG_ENTER("general_fetch"); + + srv_conc_enter_innodb(prebuilt->trx); + + ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode, + direction); + srv_conc_exit_innodb(); + + if (ret == DB_SUCCESS) { + error = 0; + table->status = 0; + + } else if (ret == DB_RECORD_NOT_FOUND) { + error = HA_ERR_END_OF_FILE; + table->status = STATUS_NOT_FOUND; + + } else if (ret == DB_END_OF_INDEX) { + error = HA_ERR_END_OF_FILE; + table->status = STATUS_NOT_FOUND; + } else { + error = convert_error_code_to_mysql(ret); + table->status = STATUS_NOT_FOUND; + } + + DBUG_RETURN(error); +} + +/*************************************************************************** +Reads the next row from a cursor, which must have previously been +positioned using index_read. */ + +int +ha_innobase::index_next( +/*====================*/ + /* out: 0, HA_ERR_END_OF_FILE, or error + number */ + mysql_byte* buf) /* in/out: buffer for next row in MySQL + format */ +{ + statistic_increment(ha_read_next_count, &LOCK_status); + + return(general_fetch(buf, ROW_SEL_NEXT, 0)); +} + +/*********************************************************************** +Reads the next row matching to the key value given as the parameter. */ + +int +ha_innobase::index_next_same( +/*=========================*/ + /* out: 0, HA_ERR_END_OF_FILE, or error + number */ + mysql_byte* buf, /* in/out: buffer for the row */ + const mysql_byte* key, /* in: key value */ + uint keylen) /* in: key value length */ +{ + statistic_increment(ha_read_next_count, &LOCK_status); + + return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode)); +} + +/*************************************************************************** +Reads the previous row from a cursor, which must have previously been +positioned using index_read. */ + +int +ha_innobase::index_prev( +/*====================*/ + /* out: 0, HA_ERR_END_OF_FILE, or error + number */ + mysql_byte* buf) /* in/out: buffer for previous row in MySQL + format */ +{ + return(general_fetch(buf, ROW_SEL_PREV, 0)); +} + +/************************************************************************ +Positions a cursor on the first record in an index and reads the +corresponding row to buf. */ + +int +ha_innobase::index_first( +/*=====================*/ + /* out: 0, HA_ERR_END_OF_FILE, + or error code */ + mysql_byte* buf) /* in/out: buffer for the row */ +{ + int error; + + DBUG_ENTER("index_first"); + statistic_increment(ha_read_first_count, &LOCK_status); + + error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY); + + /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */ + + if (error == HA_ERR_KEY_NOT_FOUND) { + error = HA_ERR_END_OF_FILE; + } + + DBUG_RETURN(error); +} + +/************************************************************************ +Positions a cursor on the last record in an index and reads the +corresponding row to buf. */ + +int +ha_innobase::index_last( +/*====================*/ + /* out: 0, HA_ERR_END_OF_FILE, or error code */ + mysql_byte* buf) /* in/out: buffer for the row */ +{ + int error; + + DBUG_ENTER("index_first"); + statistic_increment(ha_read_last_count, &LOCK_status); + + error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY); + + /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */ + + if (error == HA_ERR_KEY_NOT_FOUND) { + error = HA_ERR_END_OF_FILE; + } + + DBUG_RETURN(error); +} + +/******************************************************************** +Initialize a table scan. */ + +int +ha_innobase::rnd_init( +/*==================*/ + /* out: 0 or error number */ + bool scan) /* in: ???????? */ +{ + int err; + + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + + if (prebuilt->clust_index_was_generated) { + err = change_active_index(MAX_KEY); + } else { + err = change_active_index(primary_key); + } + + start_of_scan = 1; + + return(err); +} + +/********************************************************************* +Ends a table scan ???????????????? */ + +int +ha_innobase::rnd_end(void) +/*======================*/ + /* out: 0 or error number */ +{ + return(index_end()); +} + +/********************************************************************* +Reads the next row in a table scan (also used to read the FIRST row +in a table scan). */ + +int +ha_innobase::rnd_next( +/*==================*/ + /* out: 0, HA_ERR_END_OF_FILE, or error number */ + mysql_byte* buf)/* in/out: returns the row in this buffer, + in MySQL format */ +{ + int error; + + DBUG_ENTER("rnd_next"); + statistic_increment(ha_read_rnd_next_count, &LOCK_status); + + if (start_of_scan) { + error = index_first(buf); + if (error == HA_ERR_KEY_NOT_FOUND) { + error = HA_ERR_END_OF_FILE; + } + start_of_scan = 0; + } else { + error = general_fetch(buf, ROW_SEL_NEXT, 0); + } + + DBUG_RETURN(error); +} + +/************************************************************************** +Fetches a row from the table based on a reference. TODO: currently we use +'ref_stored_len' of the handle as the key length. This may change. */ + +int +ha_innobase::rnd_pos( +/*=================*/ + /* out: 0, HA_ERR_KEY_NOT_FOUND, + or error code */ + mysql_byte* buf, /* in/out: buffer for the row */ + mysql_byte* pos) /* in: primary key value in MySQL format */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + int error; + uint keynr = active_index; + + DBUG_ENTER("rnd_pos"); + statistic_increment(ha_read_rnd_count, &LOCK_status); + + if (prebuilt->clust_index_was_generated) { + /* No primary key was defined for the table and we + generated the clustered index from the row id: the + row reference is the row id, not any key value + that MySQL knows */ + + error = change_active_index(MAX_KEY); + } else { + error = change_active_index(primary_key); + } + + if (error) { + DBUG_RETURN(error); + } + + error = index_read(buf, pos, ref_stored_len, HA_READ_KEY_EXACT); + + change_active_index(keynr); + + DBUG_RETURN(error); +} + +/************************************************************************* +Stores a reference to the current row to 'ref' field of the handle. Note +that the function parameter is illogical: we must assume that 'record' +is the current 'position' of the handle, because if row ref is actually +the row id internally generated in InnoDB, then 'record' does not contain +it. We just guess that the row id must be for the record where the handle +was positioned the last time. */ + +void +ha_innobase::position( +/*==================*/ + const mysql_byte* record) /* in: row in MySQL format */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + uint len; + + if (prebuilt->clust_index_was_generated) { + /* No primary key was defined for the table and we + generated the clustered index from row id: the + row reference will be the row id, not any key value + that MySQL knows */ + + len = DATA_ROW_ID_LEN; + + memcpy(ref, prebuilt->row_id, len); + } else { + len = store_key_val_for_row(primary_key, (char*) ref, record); + } + + DBUG_ASSERT(len <= ref_length); + + ref_stored_len = len; +} + +/*********************************************************************** +Tells something additional to the handler about how to do things. */ + +int +ha_innobase::extra( +/*===============*/ + /* out: 0 or error number */ + enum ha_extra_function operation) + /* in: HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + + switch (operation) { + case HA_EXTRA_RESET: + case HA_EXTRA_RESET_STATE: + prebuilt->read_just_key = 0; + break; + case HA_EXTRA_NO_KEYREAD: + prebuilt->read_just_key = 0; + break; + case HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE: + prebuilt->in_update_remember_pos = FALSE; + break; + case HA_EXTRA_KEYREAD: + prebuilt->read_just_key = 1; + break; + default:/* Do nothing */ + ; + } + + return(0); +} + +int ha_innobase::reset(void) +{ + return(0); +} + +/********************************************************************** +As MySQL will execute an external lock for every new table it uses when it +starts to process an SQL statement, we can use this function to store the +pointer to the THD in the handle. We will also use this function to communicate +to InnoDB that a new SQL statement has started and that we must store a +savepoint to our transaction handle, so that we are able to roll back +the SQL statement in case of an error. */ + +int +ha_innobase::external_lock( +/*=======================*/ + THD* thd, /* in: handle to the user thread */ + int lock_type) /* in: lock type */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + int error = 0; + trx_t* trx; + + DBUG_ENTER("ha_innobase::external_lock"); + + update_thd(thd); + + trx = prebuilt->trx; + + prebuilt->sql_stat_start = TRUE; + prebuilt->in_update_remember_pos = TRUE; + + prebuilt->read_just_key = 0; + + if (lock_type == F_WRLCK) { + + /* If this is a SELECT, then it is in UPDATE TABLE ... + or SELECT ... FOR UPDATE */ + prebuilt->select_lock_type = LOCK_X; + } + + if (lock_type != F_UNLCK) { + if (trx->n_mysql_tables_in_use == 0) { + trx_mark_sql_stat_end(trx); + } + + thd->transaction.all.innodb_active_trans = 1; + trx->n_mysql_tables_in_use++; + + if (prebuilt->select_lock_type != LOCK_NONE) { + + trx->mysql_n_tables_locked++; + } + } else { + trx->n_mysql_tables_in_use--; + auto_inc_counter_for_this_stat = 0; + + if (trx->n_mysql_tables_in_use == 0) { + + trx->mysql_n_tables_locked = 0; + + if (trx->has_search_latch) { + + trx_search_latch_release_if_reserved(trx); + } + + if (trx->auto_inc_lock) { + + /* If we had reserved the auto-inc lock for + some table in this SQL statement, we release + it now */ + + srv_conc_enter_innodb(trx); + row_unlock_table_autoinc_for_mysql(trx); + srv_conc_exit_innodb(); + } + + if (!(thd->options + & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) { + + innobase_commit(thd, trx); + } + } + } + + DBUG_RETURN(error); +} + +/********************************************************************* +Creates a table definition to an InnoDB database. */ +static +int +create_table_def( +/*=============*/ + trx_t* trx, /* in: InnoDB transaction handle */ + TABLE* form, /* in: information on table + columns and indexes */ + const char* table_name) /* in: table name */ +{ + Field* field; + dict_table_t* table; + ulint n_cols; + int error; + ulint col_type; + ulint nulls_allowed; + ulint unsigned_type; + ulint i; + + DBUG_ENTER("create_table_def"); + DBUG_PRINT("enter", ("table_name: %s", table_name)); + + n_cols = form->fields; + + /* The '0' below specifies that everything is currently + created in tablespace 0 */ + + table = dict_mem_table_create((char*) table_name, 0, n_cols); + + for (i = 0; i < n_cols; i++) { + field = form->field[i]; + + col_type = get_innobase_type_from_mysql_type(field); + if (field->null_ptr) { + nulls_allowed = 0; + } else { + nulls_allowed = DATA_NOT_NULL; + } + + if (field->flags & UNSIGNED_FLAG) { + unsigned_type = DATA_UNSIGNED; + } else { + unsigned_type = 0; + } + + dict_mem_table_add_col(table, (char*) field->field_name, + col_type, (ulint)field->type() + | nulls_allowed | unsigned_type, + field->pack_length(), 0); + } + + error = row_create_table_for_mysql(table, trx); + + error = convert_error_code_to_mysql(error); + + DBUG_RETURN(error); +} + +/********************************************************************* +Creates an index in an InnoDB database. */ +static +int +create_index( +/*=========*/ + trx_t* trx, /* in: InnoDB transaction handle */ + TABLE* form, /* in: information on table + columns and indexes */ + const char* table_name, /* in: table name */ + uint key_num) /* in: index number */ +{ + dict_index_t* index; + int error; + ulint n_fields; + KEY* key; + KEY_PART_INFO* key_part; + ulint ind_type; + ulint i; + + DBUG_ENTER("create_index"); + + key = form->key_info + key_num; + + n_fields = key->key_parts; + + ind_type = 0; + + if (strcmp(key->name, "PRIMARY") == 0) { + ind_type = ind_type | DICT_CLUSTERED; + } + + if (key->flags & HA_NOSAME ) { + ind_type = ind_type | DICT_UNIQUE; + } + + /* The '0' below specifies that everything in InnoDB is currently + created in tablespace 0 */ + + index = dict_mem_index_create((char*) table_name, key->name, 0, + ind_type, n_fields); + for (i = 0; i < n_fields; i++) { + key_part = key->key_part + i; + + /* We assume all fields should be sorted in ascending + order, hence the '0': */ + dict_mem_index_add_field(index, + (char*) key_part->field->field_name, 0); + } + + error = row_create_index_for_mysql(index, trx); + + error = convert_error_code_to_mysql(error); + + DBUG_RETURN(error); +} + +/********************************************************************* +Creates an index to an InnoDB table when the user has defined no +primary index. */ +static +int +create_clustered_index_when_no_primary( +/*===================================*/ + trx_t* trx, /* in: InnoDB transaction handle */ + const char* table_name) /* in: table name */ +{ + dict_index_t* index; + int error; + + /* The first '0' below specifies that everything in InnoDB is + currently created in file space 0 */ + + index = dict_mem_index_create((char*) table_name, + (char*) "GEN_CLUST_INDEX", + 0, DICT_CLUSTERED, 0); + error = row_create_index_for_mysql(index, trx); + + error = convert_error_code_to_mysql(error); + + return(error); +} + +/********************************************************************* +Creates a new table to an InnoDB database. */ + +int +ha_innobase::create( +/*================*/ + /* out: error number */ + const char* name, /* in: table name */ + TABLE* form, /* in: information on table + columns and indexes */ + HA_CREATE_INFO* create_info) /* in: more information of the + created table, contains also the + create statement string */ +{ + int error; + dict_table_t* innobase_table; + trx_t* trx; + int primary_key_no = -1; + KEY* key; + uint i; + char name2[FN_REFLEN]; + char norm_name[FN_REFLEN]; + + DBUG_ENTER("ha_innobase::create"); + + trx = trx_allocate_for_mysql(); + + fn_format(name2, name, "", "",2); // Remove the .frm extension + + normalize_table_name(norm_name, name2); + + /* Create the table definition in InnoDB */ + + if ((error = create_table_def(trx, form, norm_name))) { + + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + DBUG_RETURN(error); + } + + /* Look for a primary key */ + + for (i = 0; i < form->keys; i++) { + key = form->key_info + i; + + if (strcmp(key->name, "PRIMARY") == 0) { + primary_key_no = (int) i; + } + } + + /* Our function row_get_mysql_key_number_for_index assumes + the primary key is always number 0, if it exists */ + + assert(primary_key_no == -1 || primary_key_no == 0); + + /* Create the keys */ + + if (form->keys == 0 || primary_key_no == -1) { + /* Create an index which is used as the clustered index; + order the rows by their row id which is internally generated + by InnoDB */ + + error = create_clustered_index_when_no_primary(trx, + norm_name); + if (error) { + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + DBUG_RETURN(error); + } + } + + if (primary_key_no != -1) { + /* In InnoDB the clustered index must always be created + first */ + if ((error = create_index(trx, form, norm_name, + (uint) primary_key_no))) { + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + DBUG_RETURN(error); + } + } + + for (i = 0; i < form->keys; i++) { + + if (i != (uint) primary_key_no) { + + if ((error = create_index(trx, form, norm_name, i))) { + + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + DBUG_RETURN(error); + } + } + } + + error = row_table_add_foreign_constraints(trx, + create_info->create_statement, norm_name); + + error = convert_error_code_to_mysql(error); + + if (error) { + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + DBUG_RETURN(error); + } + + trx_commit_for_mysql(trx); + + /* Flush the log to reduce probability that the .frm files and + the InnoDB data dictionary get out-of-sync if the user runs + with innodb_flush_log_at_trx_commit = 0 */ + + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + + innobase_table = dict_table_get(norm_name, NULL); + + assert(innobase_table != 0); + + /* Tell the InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + trx_free_for_mysql(trx); + + DBUG_RETURN(0); +} + +/********************************************************************* +Drops a table from an InnoDB database. Before calling this function, +MySQL calls innobase_commit to commit the transaction of the current user. +Then the current user cannot have locks set on the table. Drop table +operation inside InnoDB will remove all locks any user has on the table +inside InnoDB. */ + +int +ha_innobase::delete_table( +/*======================*/ + /* out: error number */ + const char* name) /* in: table name */ +{ + ulint name_len; + int error; + trx_t* trx; + char norm_name[1000]; + + DBUG_ENTER("ha_innobase::delete_table"); + + trx = trx_allocate_for_mysql(); + + name_len = strlen(name); + + assert(name_len < 1000); + + /* Strangely, MySQL passes the table name without the '.frm' + extension, in contrast to ::create */ + + normalize_table_name(norm_name, name); + + /* Drop the table in InnoDB */ + + error = row_drop_table_for_mysql(norm_name, trx, FALSE); + + /* Flush the log to reduce probability that the .frm files and + the InnoDB data dictionary get out-of-sync if the user runs + with innodb_flush_log_at_trx_commit = 0 */ + + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + + /* Tell the InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + error = convert_error_code_to_mysql(error); + + DBUG_RETURN(error); +} + +/********************************************************************* +Removes all tables in the named database inside InnoDB. */ + +int +innobase_drop_database( +/*===================*/ + /* out: error number */ + char* path) /* in: database path; inside InnoDB the name + of the last directory in the path is used as + the database name: for example, in 'mysql/data/test' + the database name is 'test' */ +{ + ulint len = 0; + trx_t* trx; + char* ptr; + int error; + char namebuf[10000]; + + ptr = strend(path) - 2; + + while (ptr >= path && *ptr != '\\' && *ptr != '/') { + ptr--; + len++; + } + + ptr++; + + memcpy(namebuf, ptr, len); + namebuf[len] = '/'; + namebuf[len + 1] = '\0'; + + trx = trx_allocate_for_mysql(); + + error = row_drop_database_for_mysql(namebuf, trx); + + /* Flush the log to reduce probability that the .frm files and + the InnoDB data dictionary get out-of-sync if the user runs + with innodb_flush_log_at_trx_commit = 0 */ + + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + + /* Tell the InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + trx_commit_for_mysql(trx); + trx_free_for_mysql(trx); + + error = convert_error_code_to_mysql(error); + + return(error); +} + +/************************************************************************* +Renames an InnoDB table. */ + +int +ha_innobase::rename_table( +/*======================*/ + /* out: 0 or error code */ + const char* from, /* in: old name of the table */ + const char* to) /* in: new name of the table */ +{ + ulint name_len1; + ulint name_len2; + int error; + trx_t* trx; + char norm_from[1000]; + char norm_to[1000]; + + DBUG_ENTER("ha_innobase::rename_table"); + + trx = trx_allocate_for_mysql(); + + name_len1 = strlen(from); + name_len2 = strlen(to); + + assert(name_len1 < 1000); + assert(name_len2 < 1000); + + normalize_table_name(norm_from, from); + normalize_table_name(norm_to, to); + + /* Rename the table in InnoDB */ + + error = row_rename_table_for_mysql(norm_from, norm_to, trx); + + /* Flush the log to reduce probability that the .frm files and + the InnoDB data dictionary get out-of-sync if the user runs + with innodb_flush_log_at_trx_commit = 0 */ + + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + + /* Tell the InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + trx_commit_for_mysql(trx); + trx_free_for_mysql(trx); + + error = convert_error_code_to_mysql(error); + + DBUG_RETURN(error); +} + +/************************************************************************* +Estimates the number of index records in a range. */ + +ha_rows +ha_innobase::records_in_range( +/*==========================*/ + /* out: estimated number of rows, + currently 32-bit int or uint */ + int keynr, /* in: index number */ + const mysql_byte* start_key, /* in: start key value of the + range, may also be empty */ + uint start_key_len, /* in: start key val len, may + also be 0 */ + enum ha_rkey_function start_search_flag,/* in: start search condition + e.g., 'greater than' */ + const mysql_byte* end_key, /* in: range end key val, may + also be empty */ + uint end_key_len, /* in: range end key val len, + may also be 0 */ + enum ha_rkey_function end_search_flag)/* in: range end search cond */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + KEY* key; + dict_index_t* index; + mysql_byte* key_val_buff2 = (mysql_byte*) my_malloc( + table->reclength + + table->max_key_length + 100, + MYF(MY_WME)); + dtuple_t* range_start; + dtuple_t* range_end; + ulint n_rows; + ulint mode1; + ulint mode2; + void* heap1; + void* heap2; + + DBUG_ENTER("records_in_range"); + + if (prebuilt->trx) { + prebuilt->trx->op_info = (char*) "estimating range size"; + } + + active_index = keynr; + + key = table->key_info + active_index; + + index = dict_table_get_index_noninline(prebuilt->table, key->name); + + range_start = dtuple_create_for_mysql(&heap1, key->key_parts); + dict_index_copy_types(range_start, index, key->key_parts); + + range_end = dtuple_create_for_mysql(&heap2, key->key_parts); + dict_index_copy_types(range_end, index, key->key_parts); + + row_sel_convert_mysql_key_to_innobase( + range_start, (byte*) key_val_buff, index, + (byte*) start_key, + (ulint) start_key_len); + + row_sel_convert_mysql_key_to_innobase( + range_end, (byte*) key_val_buff2, index, + (byte*) end_key, + (ulint) end_key_len); + + mode1 = convert_search_mode_to_innobase(start_search_flag); + mode2 = convert_search_mode_to_innobase(end_search_flag); + + n_rows = btr_estimate_n_rows_in_range(index, range_start, + mode1, range_end, mode2); + dtuple_free_for_mysql(heap1); + dtuple_free_for_mysql(heap2); + + my_free((char*) key_val_buff2, MYF(0)); + + if (prebuilt->trx) { + prebuilt->trx->op_info = (char*) ""; + } + + DBUG_RETURN((ha_rows) n_rows); +} + +/************************************************************************* +Gives an UPPER BOUND to the number of rows in a table. This is used in +filesort.cc and the upper bound must hold. TODO: Since the number of +rows in a table may change after this function is called, we still may +get a 'Sort aborted' error in filesort.cc of MySQL. The ultimate fix is to +improve the algorithm of filesort.cc. */ + +ha_rows +ha_innobase::estimate_number_of_rows(void) +/*======================================*/ + /* out: upper bound of rows */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + dict_index_t* index; + ulonglong estimate; + ulonglong data_file_length; + + if (prebuilt->trx) { + prebuilt->trx->op_info = + (char*) "estimating upper bound of table size"; + } + + DBUG_ENTER("info"); + + index = dict_table_get_first_index_noninline(prebuilt->table); + + data_file_length = ((ulonglong) index->stat_n_leaf_pages) + * UNIV_PAGE_SIZE; + + /* Calculate a minimum length for a clustered index record and from + that an upper bound for the number of rows. Since we only calculate + new statistics in row0mysql.c when a tablehas grown + by a threshold factor, we must add a safety factor 2 in front + of the formula below. */ + + estimate = 2 * data_file_length / dict_index_calc_min_rec_len(index); + + if (prebuilt->trx) { + prebuilt->trx->op_info = (char*) ""; + } + + return((ha_rows) estimate); +} + +/************************************************************************* +How many seeks it will take to read through the table. This is to be +comparable to the number returned by records_in_range so that we can +decide if we should scan the table or use keys. */ + +double +ha_innobase::scan_time() +/*====================*/ + /* out: estimated time measured in disk seeks */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + + /* In the following formula we assume that scanning 10 pages + takes the same time as a disk seek: */ + + return((double) (prebuilt->table->stat_clustered_index_size / 10)); +} + +/************************************************************************* +Returns statistics information of the table to the MySQL interpreter, +in various fields of the handle object. */ + +void +ha_innobase::info( +/*==============*/ + uint flag) /* in: what information MySQL requests */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + dict_table_t* ib_table; + dict_index_t* index; + ulong rec_per_key; + ulong j; + ulong i; + + DBUG_ENTER("info"); + + if (prebuilt->trx) { + prebuilt->trx->op_info = (char*) "calculating table stats"; + } + + ib_table = prebuilt->table; + + if (flag & HA_STATUS_TIME) { + /* In sql_show we call with this flag: update then statistics + so that they are up-to-date */ + + dict_update_statistics(ib_table); + } + + if (flag & HA_STATUS_VARIABLE) { + records = (ha_rows)ib_table->stat_n_rows; + deleted = 0; + data_file_length = ((ulonglong) + ib_table->stat_clustered_index_size) + * UNIV_PAGE_SIZE; + index_file_length = ((ulonglong) + ib_table->stat_sum_of_other_index_sizes) + * UNIV_PAGE_SIZE; + delete_length = 0; + check_time = 0; + + if (records == 0) { + mean_rec_length = 0; + } else { + mean_rec_length = (ulong) (data_file_length / records); + } + } + + if (flag & HA_STATUS_CONST) { + index = dict_table_get_first_index_noninline(ib_table); + + if (prebuilt->clust_index_was_generated) { + index = dict_table_get_next_index_noninline(index); + } + + for (i = 0; i < table->keys; i++) { + for (j = 0; j < table->key_info[i].key_parts; j++) { + + if (index->stat_n_diff_key_vals[j + 1] == 0) { + + rec_per_key = records; + } else { + rec_per_key = (ulong)(records / + index->stat_n_diff_key_vals[j + 1]); + } + + if (rec_per_key == 0) { + rec_per_key = 1; + } + + table->key_info[i].rec_per_key[j] + = rec_per_key; + } + + index = dict_table_get_next_index_noninline(index); + } + } + + /* The trx struct in InnoDB contains a pthread mutex embedded: + in the debug version of MySQL that it replaced by a 'safe mutex' + which is of a different size. We have to use a function to access + trx fields. Otherwise trx->error_info will be a random + pointer and cause a seg fault. */ + + if (flag & HA_STATUS_ERRKEY) { + errkey = (unsigned int) row_get_mysql_key_number_for_index( + (dict_index_t*) + trx_get_error_info(prebuilt->trx)); + } + + if (prebuilt->trx) { + prebuilt->trx->op_info = (char*) ""; + } + + DBUG_VOID_RETURN; +} + +/*********************************************************************** +Tries to check that an InnoDB table is not corrupted. If corruption is +noticed, prints to stderr information about it. In case of corruption +may also assert a failure and crash the server. */ + +int +ha_innobase::check( +/*===============*/ + /* out: HA_ADMIN_CORRUPT or + HA_ADMIN_OK */ + THD* thd, /* in: user thread handle */ + HA_CHECK_OPT* check_opt) /* in: check options, currently + ignored */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + ulint ret; + + if (prebuilt->mysql_template == NULL) { + /* Build the template; we will use a dummy template + in index scans done in checking */ + + build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW); + } + + ret = row_check_table_for_mysql(prebuilt); + + if (ret == DB_SUCCESS) { + return(HA_ADMIN_OK); + } + + return(HA_ADMIN_CORRUPT); +} + +/***************************************************************** +Adds information about free space in the InnoDB tablespace to a table comment +which is printed out when a user calls SHOW TABLE STATUS. Adds also info on +foreign keys. */ + +char* +ha_innobase::update_table_comment( +/*==============================*/ + /* out: table comment + InnoDB free space + + info on foreign keys */ + const char* comment)/* in: table comment defined by user */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; + uint length = strlen(comment); + char* str = my_malloc(length + 550, MYF(0)); + char* pos; + + if (!str) { + return((char*)comment); + } + + pos = str; + if (length) { + pos=strmov(str, comment); + *pos++=';'; + *pos++=' '; + } + + pos += sprintf(pos, "InnoDB free: %lu kB", + (ulong) innobase_get_free_space()); + + /* We assume 150 bytes of space to print info */ + + dict_print_info_on_foreign_keys(pos, 500, prebuilt->table); + + return(str); +} + +/**************************************************************************** + Handling the shared INNOBASE_SHARE structure that is needed to provide table + locking. +****************************************************************************/ + +static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length, + my_bool not_used __attribute__((unused))) +{ + *length=share->table_name_length; + return (mysql_byte*) share->table_name; +} + +static INNOBASE_SHARE *get_share(const char *table_name) +{ + INNOBASE_SHARE *share; + pthread_mutex_lock(&innobase_mutex); + uint length=(uint) strlen(table_name); + if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables, + (mysql_byte*) table_name, + length))) + { + if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1, + MYF(MY_WME | MY_ZEROFILL)))) + { + share->table_name_length=length; + share->table_name=(char*) (share+1); + strmov(share->table_name,table_name); + if (hash_insert(&innobase_open_tables, (mysql_byte*) share)) + { + pthread_mutex_unlock(&innobase_mutex); + my_free((gptr) share,0); + return 0; + } + thr_lock_init(&share->lock); + pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); + } + } + share->use_count++; + pthread_mutex_unlock(&innobase_mutex); + return share; +} + +static void free_share(INNOBASE_SHARE *share) +{ + pthread_mutex_lock(&innobase_mutex); + if (!--share->use_count) + { + hash_delete(&innobase_open_tables, (mysql_byte*) share); + thr_lock_delete(&share->lock); + pthread_mutex_destroy(&share->mutex); + my_free((gptr) share, MYF(0)); + } + pthread_mutex_unlock(&innobase_mutex); +} + +/********************************************************************* +Stores a MySQL lock into a 'lock' field in a handle. */ + +THR_LOCK_DATA** +ha_innobase::store_lock( +/*====================*/ + /* out: pointer to the next + element in the 'to' array */ + THD* thd, /* in: user thread handle */ + THR_LOCK_DATA** to, /* in: pointer to an array + of pointers to lock structs; + pointer to the 'lock' field + of current handle is stored + next to this array */ + enum thr_lock_type lock_type) /* in: lock type to store in + 'lock' */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + + if (lock_type == TL_READ_WITH_SHARED_LOCKS || + lock_type == TL_READ_NO_INSERT) { + /* This is a SELECT ... IN SHARE MODE, or + we are doing a complex SQL statement like + INSERT INTO ... SELECT ... and the logical logging + requires the use of a locking read */ + + prebuilt->select_lock_type = LOCK_S; + } else { + /* We set possible LOCK_X value in external_lock, not yet + here even if this would be SELECT ... FOR UPDATE */ + + prebuilt->select_lock_type = LOCK_NONE; + } + + if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) { + + /* If we are not doing a LOCK TABLE, then allow multiple + writers */ + + if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && + lock_type <= TL_WRITE) && !thd->in_lock_tables) { + + lock_type = TL_WRITE_ALLOW_WRITE; + } + + lock.type=lock_type; + } + + *to++= &lock; + + return(to); +} + +/*********************************************************************** +Returns the next auto-increment column value for the table. write_row +normally fetches the value from the cache in the data dictionary. This +function in used by SHOW TABLE STATUS and when the first insert to the table +is done after database startup. */ + +longlong +ha_innobase::get_auto_increment() +/*=============================*/ + /* out: the next auto-increment column value */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + longlong nr; + int error; + + (void) extra(HA_EXTRA_KEYREAD); + index_init(table->next_number_index); + + /* We use an exclusive lock when we read the max key value from the + auto-increment column index. This is because then build_template will + advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query + id of the auto-increment column is not changed, and previously InnoDB + did not fetch it, causing SHOW TABLE STATUS to show wrong values + for the autoinc column. */ + + prebuilt->select_lock_type = LOCK_X; + prebuilt->trx->mysql_n_tables_locked += 1; + + error=index_last(table->record[1]); + + if (error) { + nr = 1; + } else { + nr = (longlong) table->next_number_field-> + val_int_offset(table->rec_buff_length) + 1; + } + + (void) extra(HA_EXTRA_NO_KEYREAD); + + index_end(); + + return(nr); +} + + +#endif /* HAVE_INNOBASE_DB */ diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h new file mode 100644 index 00000000000..fb10975f30a --- /dev/null +++ b/sql/ha_innodb.h @@ -0,0 +1,194 @@ +/* Copyright (C) 2000 MySQL AB && Innobase Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This file is based on ha_berkeley.h of MySQL distribution + + This file defines the Innodb handler: the interface between MySQL and + Innodb +*/ + +#ifdef __GNUC__ +#pragma interface /* gcc class implementation */ +#endif + +typedef struct st_innobase_share { + THR_LOCK lock; + pthread_mutex_t mutex; + char *table_name; + uint table_name_length,use_count; +} INNOBASE_SHARE; + + +/* The class defining a handle to an Innodb table */ +class ha_innobase: public handler +{ + void* innobase_prebuilt; /* (row_prebuilt_t*) prebuilt + struct in Innodb, used to save + CPU */ + THD* user_thd; /* the thread handle of the user + currently using the handle; this is + set in external_lock function */ + ulong last_query_id; /* the latest query id where the + handle was used */ + THR_LOCK_DATA lock; + INNOBASE_SHARE *share; + + gptr alloc_ptr; + byte* upd_buff; /* buffer used in updates */ + byte* key_val_buff; /* buffer used in converting + search key values from MySQL format + to Innodb format */ + uint ref_stored_len; /* length of the key value stored to + 'ref' buffer of the handle, if any */ + ulong int_option_flag; + uint primary_key; + uint last_dup_key; + ulong start_of_scan; /* this is set to 1 when we are + starting a table scan but have not + yet fetched any row, else 0 */ + + uint last_match_mode;/* match mode of the latest search: + ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX, + or undefined */ + longlong auto_inc_counter_for_this_stat; + ulong max_row_length(const byte *buf); + + uint store_key_val_for_row(uint keynr, char* buff, const byte* record); + int update_thd(THD* thd); + int change_active_index(uint keynr); + int general_fetch(byte* buf, uint direction, uint match_mode); + + /* Init values for the class: */ + public: + ha_innobase(TABLE *table): handler(table), + int_option_flag(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | + HA_REC_NOT_IN_SEQ | + HA_KEYPOS_TO_RNDPOS | HA_LASTKEY_ORDER | + HA_HAVE_KEY_READ_ONLY | HA_READ_NOT_EXACT_KEY | + HA_NULL_KEY | + HA_NOT_EXACT_COUNT | + HA_NO_WRITE_DELAYED | + HA_PRIMARY_KEY_IN_READ_INDEX | + HA_DROP_BEFORE_CREATE | HA_NOT_READ_PREFIX_LAST | + HA_NO_PREFIX_CHAR_KEYS), + last_dup_key((uint) -1), + start_of_scan(0) + { + } + ~ha_innobase() {} + + const char* table_type() const { return("InnoDB");} + const char *index_type(uint key_number) { return "BTREE"; } + const char** bas_ext() const; + ulong option_flag() const { return int_option_flag; } + uint max_record_length() const { return HA_MAX_REC_LENGTH; } + uint max_keys() const { return MAX_KEY; } + uint max_key_parts() const { return MAX_REF_PARTS; } + /* An InnoDB page must store >= 2 keys; + a secondary key record must also contain the + primary key value: + max key length is therefore set to slightly + less than 1 / 4 of page size which is 16 kB; + but currently MySQL does not work with keys + whose size is > MAX_KEY_LENGTH */ + uint max_key_length() const { return((MAX_KEY_LENGTH <= 3500) ? + MAX_KEY_LENGTH : 3500);} + bool fast_key_read() { return 1;} + key_map keys_to_use_for_scanning() { return ~(key_map) 0; } + bool has_transactions() { return 1;} + + int open(const char *name, int mode, uint test_if_locked); + void initialize(void); + int close(void); + double scan_time(); + + int write_row(byte * buf); + int update_row(const byte * old_data, byte * new_data); + int delete_row(const byte * buf); + + int index_init(uint index); + int index_end(); + int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); + int index_next(byte * buf); + int index_next_same(byte * buf, const byte *key, uint keylen); + int index_prev(byte * buf); + int index_first(byte * buf); + int index_last(byte * buf); + + int rnd_init(bool scan=1); + int rnd_end(); + int rnd_next(byte *buf); + int rnd_pos(byte * buf, byte *pos); + + void position(const byte *record); + void info(uint); + int extra(enum ha_extra_function operation); + int reset(void); + int external_lock(THD *thd, int lock_type); + void position(byte *record); + ha_rows records_in_range(int inx, + const byte *start_key,uint start_key_len, + enum ha_rkey_function start_search_flag, + const byte *end_key,uint end_key_len, + enum ha_rkey_function end_search_flag); + ha_rows estimate_number_of_rows(); + + int create(const char *name, register TABLE *form, + HA_CREATE_INFO *create_info); + int delete_table(const char *name); + int rename_table(const char* from, const char* to); + int check(THD* thd, HA_CHECK_OPT* check_opt); + char* update_table_comment(const char* comment); + + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type); + longlong get_auto_increment(); +}; + +extern bool innodb_skip; +extern SHOW_COMP_OPTION have_innodb; +extern uint innobase_init_flags, innobase_lock_type; +extern ulong innobase_cache_size; +extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; +extern long innobase_lock_scan_time; +extern long innobase_mirrored_log_groups, innobase_log_files_in_group; +extern long innobase_log_file_size, innobase_log_buffer_size; +extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size; +extern long innobase_file_io_threads, innobase_lock_wait_timeout; +extern long innobase_force_recovery, innobase_thread_concurrency; +extern char *innobase_data_home_dir, *innobase_data_file_path; +extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; +extern char *innobase_unix_file_flush_method; +extern bool innobase_flush_log_at_trx_commit, innobase_log_archive, + innobase_use_native_aio, innobase_fast_shutdown; + +extern TYPELIB innobase_lock_typelib; + +bool innobase_init(void); +bool innobase_end(void); +bool innobase_flush_logs(void); +uint innobase_get_free_space(void); + +int innobase_commit(THD *thd, void* trx_handle); +int innobase_rollback(THD *thd, void* trx_handle); +int innobase_close_connection(THD *thd); +int innobase_drop_database(char *path); + diff --git a/sql/ha_isam.cc b/sql/ha_isam.cc index a96f42c202f..4b8c40f8fe6 100644 --- a/sql/ha_isam.cc +++ b/sql/ha_isam.cc @@ -109,6 +109,15 @@ int ha_isam::index_read_idx(byte * buf, uint index, const byte * key, return !error ? 0 : my_errno ? my_errno : -1; } +int ha_isam::index_read_last(byte * buf, const byte * key, uint key_len) +{ + statistic_increment(ha_read_key_count,&LOCK_status); + int error=nisam_rkey(file, buf, active_index, key, key_len, + HA_READ_PREFIX_LAST); + table->status=error ? STATUS_NOT_FOUND: 0; + return !error ? 0 : my_errno ? my_errno : -1; +} + int ha_isam::index_next(byte * buf) { statistic_increment(ha_read_next_count,&LOCK_status); diff --git a/sql/ha_isam.h b/sql/ha_isam.h index e878f0fe697..4194632ddbe 100644 --- a/sql/ha_isam.h +++ b/sql/ha_isam.h @@ -33,7 +33,7 @@ class ha_isam: public handler int_option_flag(HA_READ_NEXT | HA_READ_PREV | HA_READ_RND_SAME | HA_KEYPOS_TO_RNDPOS | HA_READ_ORDER | HA_LASTKEY_ORDER | HA_HAVE_KEY_READ_ONLY | HA_READ_NOT_EXACT_KEY | - HA_LONGLONG_KEYS | HA_KEY_READ_WRONG_STR | HA_DUPP_POS | + HA_KEY_READ_WRONG_STR | HA_DUPP_POS | HA_NOT_DELETE_WITH_CACHE) {} ~ha_isam() {} @@ -57,6 +57,7 @@ class ha_isam: public handler uint key_len, enum ha_rkey_function find_flag); int index_read_idx(byte * buf, uint idx, const byte * key, uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); int index_next(byte * buf); int index_prev(byte * buf); int index_first(byte * buf); diff --git a/sql/ha_isammrg.h b/sql/ha_isammrg.h index 888ecf0ca37..1ee0b0e2547 100644 --- a/sql/ha_isammrg.h +++ b/sql/ha_isammrg.h @@ -32,8 +32,9 @@ class ha_isammrg: public handler ~ha_isammrg() {} const char *table_type() const { return "MRG_ISAM"; } const char **bas_ext() const; - ulong option_flag() const { return HA_READ_RND_SAME | HA_KEYPOS_TO_RNDPOS - | HA_REC_NOT_IN_SEQ;} + ulong option_flag() const { return (HA_READ_RND_SAME | HA_KEYPOS_TO_RNDPOS | + HA_NOT_READ_PREFIX_LAST | + HA_REC_NOT_IN_SEQ); } uint max_record_length() const { return HA_MAX_REC_LENGTH; } uint max_keys() const { return 0; } uint max_key_parts() const { return 0; } diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index 77d541bfdfb..78ac9f3b309 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -777,6 +777,14 @@ int ha_myisam::index_read_idx(byte * buf, uint index, const byte * key, return error; } +int ha_myisam::index_read_last(byte * buf, const byte * key, uint key_len) +{ + statistic_increment(ha_read_key_count,&LOCK_status); + int error=mi_rkey(file,buf,active_index, key, key_len, HA_READ_PREFIX_LAST); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + int ha_myisam::index_next(byte * buf) { statistic_increment(ha_read_next_count,&LOCK_status); @@ -973,7 +981,7 @@ void ha_myisam::update_create_info(HA_CREATE_INFO *create_info) } -int ha_myisam::create(const char *name, register TABLE *form, +int ha_myisam::create(const char *name, register TABLE *table, HA_CREATE_INFO *info) { int error; @@ -985,20 +993,20 @@ int ha_myisam::create(const char *name, register TABLE *form, MI_KEYDEF *keydef; MI_COLUMNDEF *recinfo,*recinfo_pos; MI_KEYSEG *keyseg; - uint options=form->db_options_in_use; + uint options=table->db_options_in_use; DBUG_ENTER("ha_myisam::create"); type=HA_KEYTYPE_BINARY; // Keep compiler happy if (!(my_multi_malloc(MYF(MY_WME), - &recinfo,(form->fields*2+2)*sizeof(MI_COLUMNDEF), - &keydef, form->keys*sizeof(MI_KEYDEF), + &recinfo,(table->fields*2+2)*sizeof(MI_COLUMNDEF), + &keydef, table->keys*sizeof(MI_KEYDEF), &keyseg, - ((form->key_parts + form->keys) * sizeof(MI_KEYSEG)), + ((table->key_parts + table->keys) * sizeof(MI_KEYSEG)), 0))) DBUG_RETURN(1); - pos=form->key_info; - for (i=0; i < form->keys ; i++, pos++) + pos=table->key_info; + for (i=0; i < table->keys ; i++, pos++) { keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT)); keydef[i].seg=keyseg; @@ -1041,7 +1049,7 @@ int ha_myisam::create(const char *name, register TABLE *form, { keydef[i].seg[j].null_bit=field->null_bit; keydef[i].seg[j].null_pos= (uint) (field->null_ptr- - (uchar*) form->record[0]); + (uchar*) table->record[0]); } else { @@ -1059,19 +1067,19 @@ int ha_myisam::create(const char *name, register TABLE *form, keydef[i].seg[j].flag|=HA_BLOB_PART; /* save number of bytes used to pack length */ keydef[i].seg[j].bit_start= (uint) (field->pack_length() - - form->blob_ptr_size); + table->blob_ptr_size); } } keyseg+=pos->key_parts; } recpos=0; recinfo_pos=recinfo; - while (recpos < (uint) form->reclength) + while (recpos < (uint) table->reclength) { Field **field,*found=0; - minpos=form->reclength; length=0; + minpos=table->reclength; length=0; - for (field=form->field ; *field ; field++) + for (field=table->field ; *field ; field++) { if ((fieldpos=(*field)->offset()) >= recpos && fieldpos <= minpos) @@ -1117,7 +1125,7 @@ int ha_myisam::create(const char *name, register TABLE *form, { recinfo_pos->null_bit=found->null_bit; recinfo_pos->null_pos= (uint) (found->null_ptr- - (uchar*) form->record[0]); + (uchar*) table->record[0]); } else { @@ -1132,20 +1140,23 @@ int ha_myisam::create(const char *name, register TABLE *form, } MI_CREATE_INFO create_info; bzero((char*) &create_info,sizeof(create_info)); - create_info.max_rows=form->max_rows; - create_info.reloc_rows=form->min_rows; + create_info.max_rows=table->max_rows; + create_info.reloc_rows=table->min_rows; create_info.auto_increment=(info->auto_increment_value ? info->auto_increment_value -1 : (ulonglong) 0); - create_info.data_file_length=(ulonglong) form->max_rows*form->avg_row_length; + create_info.data_file_length= ((ulonglong) table->max_rows * + table->avg_row_length); create_info.raid_type=info->raid_type; - create_info.raid_chunks=info->raid_chunks ? info->raid_chunks : RAID_DEFAULT_CHUNKS; - create_info.raid_chunksize=info->raid_chunksize ? info->raid_chunksize : RAID_DEFAULT_CHUNKSIZE; + create_info.raid_chunks= (info->raid_chunks ? info->raid_chunks : + RAID_DEFAULT_CHUNKS); + create_info.raid_chunksize=(info->raid_chunksize ? info->raid_chunksize : + RAID_DEFAULT_CHUNKSIZE); create_info.data_file_name= info->data_file_name; create_info.index_file_name=info->index_file_name; error=mi_create(fn_format(buff,name,"","",2+4), - form->keys,keydef, + table->keys,keydef, (uint) (recinfo_pos-recinfo), recinfo, 0, (MI_UNIQUEDEF*) 0, &create_info, diff --git a/sql/ha_myisam.h b/sql/ha_myisam.h index e2044dfe1e2..75655a2b505 100644 --- a/sql/ha_myisam.h +++ b/sql/ha_myisam.h @@ -47,7 +47,7 @@ class ha_myisam: public handler int_option_flag(HA_READ_NEXT | HA_READ_PREV | HA_READ_RND_SAME | HA_KEYPOS_TO_RNDPOS | HA_READ_ORDER | HA_LASTKEY_ORDER | HA_HAVE_KEY_READ_ONLY | HA_READ_NOT_EXACT_KEY | - HA_LONGLONG_KEYS | HA_NULL_KEY | + HA_NULL_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPP_POS | HA_BLOB_KEY | HA_AUTO_PART_KEY), enable_activate_all_index(1) @@ -71,6 +71,7 @@ class ha_myisam: public handler uint key_len, enum ha_rkey_function find_flag); int index_read_idx(byte * buf, uint idx, const byte * key, uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); int index_next(byte * buf); int index_prev(byte * buf); int index_first(byte * buf); @@ -78,9 +79,15 @@ class ha_myisam: public handler int index_next_same(byte *buf, const byte *key, uint keylen); int index_end() { ft_handler=NULL; return 0; } int ft_init() - { if(!ft_handler) return 1; ft_handler->please->reinit_search(ft_handler); return 0; } - FT_INFO *ft_init_ext(uint mode, uint inx,const byte *key, uint keylen, bool presort) - { return ft_init_search(mode, file,inx,(byte*) key,keylen,presort); } + { + if (!ft_handler) + return 1; + ft_handler->please->reinit_search(ft_handler); + return 0; + } + FT_INFO *ft_init_ext(uint mode, uint inx,const byte *key, uint keylen, + bool presort) + { return ft_init_search(mode, file,inx,(byte*) key,keylen,presort); } int ft_read(byte *buf); int rnd_init(bool scan=1); int rnd_next(byte *buf); diff --git a/sql/ha_myisammrg.cc b/sql/ha_myisammrg.cc index 999d9fe33ef..63a23fb708f 100644 --- a/sql/ha_myisammrg.cc +++ b/sql/ha_myisammrg.cc @@ -112,6 +112,15 @@ int ha_myisammrg::index_read_idx(byte * buf, uint index, const byte * key, return error; } +int ha_myisammrg::index_read_last(byte * buf, const byte * key, uint key_len) +{ + statistic_increment(ha_read_key_count,&LOCK_status); + int error=myrg_rkey(file,buf,active_index, key, key_len, + HA_READ_PREFIX_LAST); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + int ha_myisammrg::index_next(byte * buf) { statistic_increment(ha_read_next_count,&LOCK_status); diff --git a/sql/ha_myisammrg.h b/sql/ha_myisammrg.h index e18c520c803..2ab3a807543 100644 --- a/sql/ha_myisammrg.h +++ b/sql/ha_myisammrg.h @@ -38,7 +38,7 @@ class ha_myisammrg: public handler HA_HAVE_KEY_READ_ONLY | HA_KEYPOS_TO_RNDPOS | HA_READ_ORDER | HA_LASTKEY_ORDER | HA_READ_NOT_EXACT_KEY | - HA_LONGLONG_KEYS | HA_NULL_KEY | HA_BLOB_KEY); } + HA_NULL_KEY | HA_BLOB_KEY); } uint max_record_length() const { return HA_MAX_REC_LENGTH; } uint max_keys() const { return MI_MAX_KEY; } uint max_key_parts() const { return MAX_REF_PARTS; } @@ -55,6 +55,7 @@ class ha_myisammrg: public handler uint key_len, enum ha_rkey_function find_flag); int index_read_idx(byte * buf, uint idx, const byte * key, uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); int index_next(byte * buf); int index_prev(byte * buf); int index_first(byte * buf); diff --git a/sql/handler.cc b/sql/handler.cc index f097e501a8b..52d65edf0d4 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -33,7 +33,7 @@ #include "ha_berkeley.h" #endif #ifdef HAVE_INNOBASE_DB -#include "ha_innobase.h" +#include "ha_innodb.h" #endif #include #include diff --git a/sql/handler.h b/sql/handler.h index 33cfa965363..aa809b333b4 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -55,12 +55,11 @@ #define HA_REC_NOT_IN_SEQ 64 /* ha_info don't return recnumber; It returns a position to ha_r_rnd */ #define HA_ONLY_WHOLE_INDEX 128 /* Can't use part key searches */ -#define HA_RSAME_NO_INDEX 256 /* RSAME can't restore index */ +#define HA_NOT_READ_PREFIX_LAST 256 /* RSAME can't restore index */ #define HA_WRONG_ASCII_ORDER 512 /* Can't use sorting through key */ #define HA_HAVE_KEY_READ_ONLY 1024 /* Can read only keys (no record) */ #define HA_READ_NOT_EXACT_KEY 2048 /* Can read record after/before key */ #define HA_NO_INDEX 4096 /* No index needed for next/prev */ -#define HA_LONGLONG_KEYS 8192 /* Can have longlong as key */ #define HA_KEY_READ_WRONG_STR 16384 /* keyread returns converted strings */ #define HA_NULL_KEY 32768 /* One can have keys with NULL */ #define HA_DUPP_POS 65536 /* ha_position() gives dupp row */ @@ -256,6 +255,10 @@ public: virtual int index_first(byte * buf)=0; virtual int index_last(byte * buf)=0; virtual int index_next_same(byte *buf, const byte *key, uint keylen); + virtual int index_read_last(byte * buf, const byte * key, uint key_len) + { + return (my_errno=HA_ERR_WRONG_COMMAND); + } virtual int ft_init() { return -1; } virtual FT_INFO *ft_init_ext(uint mode,uint inx,const byte *key, uint keylen, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8e9ff17387c..a82e07fec6a 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -27,7 +27,7 @@ #include "ha_berkeley.h" #endif #ifdef HAVE_INNOBASE_DB -#include "ha_innobase.h" +#include "ha_innodb.h" #endif #include "ha_myisam.h" #include diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index eed6e4e5f81..ba46251078b 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -24,7 +24,7 @@ #include "mysql_priv.h" -#include "ha_innobase.h" +#include "ha_innodb.h" #include "sql_select.h" int mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, ORDER *order, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 46c1dc80762..144b76407ab 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -89,15 +89,18 @@ static int join_read_system(JOIN_TAB *tab); static int join_read_const(JOIN_TAB *tab); static int join_read_key(JOIN_TAB *tab); static int join_read_always_key(JOIN_TAB *tab); +static int join_read_last_key(JOIN_TAB *tab); static int join_no_more_records(READ_RECORD *info); static int join_read_next(READ_RECORD *info); static int join_init_quick_read_record(JOIN_TAB *tab); static int test_if_quick_select(JOIN_TAB *tab); static int join_init_read_record(JOIN_TAB *tab); -static int join_init_read_first_with_key(JOIN_TAB *tab); -static int join_init_read_next_with_key(READ_RECORD *info); -static int join_init_read_last_with_key(JOIN_TAB *tab); -static int join_init_read_prev_with_key(READ_RECORD *info); +static int join_read_first(JOIN_TAB *tab); +static int join_read_next(READ_RECORD *info); +static int join_read_next_same(READ_RECORD *info); +static int join_read_last(JOIN_TAB *tab); +static int join_read_prev_same(READ_RECORD *info); +static int join_read_prev(READ_RECORD *info); static int join_ft_read_first(JOIN_TAB *tab); static int join_ft_read_next(READ_RECORD *info); static COND *make_cond_for_table(COND *cond,table_map table, @@ -2510,7 +2513,7 @@ make_join_readinfo(JOIN *join,uint options) tab->quick=0; table->file->index_init(tab->ref.key); tab->read_first_record= join_read_always_key; - tab->read_record.read_record= join_read_next; + tab->read_record.read_record= join_read_next_same; if (table->used_keys & ((key_map) 1 << tab->ref.key) && !table->no_keyread) { @@ -2585,7 +2588,7 @@ make_join_readinfo(JOIN *join,uint options) { // Only read index tree tab->index=find_shortest_key(table, table->used_keys); tab->table->file->index_init(tab->index); - tab->read_first_record= join_init_read_first_with_key; + tab->read_first_record= join_read_first; tab->type=JT_NEXT; // Read with index_first / index_next } } @@ -3641,6 +3644,10 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List &fields, { if (field->flags & GROUP_FLAG && !using_unique_constraint) { + /* + We have to reserve one byte here for NULL bits, + as this is updated by 'end_update()' + */ *pos++=0; // Null is stored here recinfo->length=1; recinfo->type=FIELD_NORMAL; @@ -3730,11 +3737,11 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List &fields, { /* To be able to group on NULL, we move the null bit to be - just before the column and extend the key to cover the null bit + just before the column. + The null byte is updated by 'end_update()' */ - *group_buff= 0; // Init null byte - key_part_info->offset--; - key_part_info->length++; + key_part_info->null_bit=1; + key_part_info->null_offset= key_part_info->offset-1; group->field->move_field((char*) group_buff+1, (uchar*) group_buff, 1); } @@ -4497,6 +4504,35 @@ join_read_always_key(JOIN_TAB *tab) return 0; } +/* + This function is used when optimizing away ORDER BY in + SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC +*/ + +static int +join_read_last_key(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + + if (cp_buffer_from_ref(&tab->ref)) + return -1; + if ((error=table->file->index_read_last(table->record[0], + tab->ref.key_buff, + tab->ref.key_length))) + { + if (error != HA_ERR_KEY_NOT_FOUND) + { + sql_print_error("read_const: Got error %d when reading table %s",error, + table->path); + table->file->print_error(error,MYF(0)); + return 1; + } + return -1; /* purecov: inspected */ + } + return 0; +} + /* ARGSUSED */ static int @@ -4507,7 +4543,7 @@ join_no_more_records(READ_RECORD *info __attribute__((unused))) static int -join_read_next(READ_RECORD *info) +join_read_next_same(READ_RECORD *info) { int error; TABLE *table= info->table; @@ -4530,6 +4566,37 @@ join_read_next(READ_RECORD *info) return 0; } +static int +join_read_prev_same(READ_RECORD *info) +{ + int error; + TABLE *table= info->table; + JOIN_TAB *tab=table->reginfo.join_tab; + + if ((error=table->file->index_prev(table->record[0]))) + { + if (error != HA_ERR_END_OF_FILE) + { + sql_print_error("read_next: Got error %d when reading table %s",error, + table->path); + table->file->print_error(error,MYF(0)); + error= 1; + } + else + { + table->status= STATUS_GARBAGE; + error= -1; + } + } + else if (key_cmp(table, tab->ref.key_buff, tab->ref.key, + tab->ref.key_length)) + { + table->status=STATUS_NOT_FOUND; + error= 1; + } + return error; +} + static int join_init_quick_read_record(JOIN_TAB *tab) @@ -4560,7 +4627,7 @@ join_init_read_record(JOIN_TAB *tab) } static int -join_init_read_first_with_key(JOIN_TAB *tab) +join_read_first(JOIN_TAB *tab) { int error; TABLE *table=tab->table; @@ -4571,7 +4638,7 @@ join_init_read_first_with_key(JOIN_TAB *tab) table->file->extra(HA_EXTRA_KEYREAD); } tab->table->status=0; - tab->read_record.read_record=join_init_read_next_with_key; + tab->read_record.read_record=join_read_next; tab->read_record.table=table; tab->read_record.file=table->file; tab->read_record.index=tab->index; @@ -4591,8 +4658,9 @@ join_init_read_first_with_key(JOIN_TAB *tab) return 0; } + static int -join_init_read_next_with_key(READ_RECORD *info) +join_read_next(READ_RECORD *info) { int error=info->file->index_next(info->record); if (error) @@ -4609,9 +4677,8 @@ join_init_read_next_with_key(READ_RECORD *info) return 0; } - static int -join_init_read_last_with_key(JOIN_TAB *tab) +join_read_last(JOIN_TAB *tab) { TABLE *table=tab->table; int error; @@ -4621,7 +4688,7 @@ join_init_read_last_with_key(JOIN_TAB *tab) table->file->extra(HA_EXTRA_KEYREAD); } tab->table->status=0; - tab->read_record.read_record=join_init_read_prev_with_key; + tab->read_record.read_record=join_read_prev; tab->read_record.table=table; tab->read_record.file=table->file; tab->read_record.index=tab->index; @@ -4641,8 +4708,9 @@ join_init_read_last_with_key(JOIN_TAB *tab) return 0; } + static int -join_init_read_prev_with_key(READ_RECORD *info) +join_read_prev(READ_RECORD *info) { int error=info->file->index_prev(info->record); if (error) @@ -4659,6 +4727,7 @@ join_init_read_prev_with_key(READ_RECORD *info) return 0; } + static int join_ft_read_first(JOIN_TAB *tab) { @@ -4734,7 +4803,8 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), if (join->select_options & OPTION_FOUND_ROWS) { JOIN_TAB *jt=join->join_tab; - if ((join->tables == 1) && !join->tmp_table && !join->sort_and_group && !join->send_group_parts && !join->having && !jt->select_cond ) + if ((join->tables == 1) && !join->tmp_table && !join->sort_and_group + && !join->send_group_parts && !join->having && !jt->select_cond) { join->select_options ^= OPTION_FOUND_ROWS; join->send_records = jt->records; @@ -5315,6 +5385,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, if (ref_key >= 0) { + /* + We come here when there is a REF key. + */ int order_direction; uint used_key_parts; /* Check if we get the rows in requested sorted order by using the key */ @@ -5322,11 +5395,11 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, (order_direction = test_if_order_by_key(order,table,ref_key, &used_key_parts))) { - if (order_direction == -1) + if (order_direction == -1) // If ORDER BY ... DESC { if (select && select->quick) { - // ORDER BY ref_key DESC + // ORDER BY range_key DESC QUICK_SELECT_DESC *tmp=new QUICK_SELECT_DESC(select->quick, used_key_parts); if (!tmp || tmp->error) @@ -5341,11 +5414,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, { /* SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC - TODO: - Add a new traversal function to read last matching row and - traverse backwards. + + Use a traversal function that starts by reading the last row + with key part (A) and then traverse the index backwards. */ - DBUG_RETURN(0); + if (table->file->option_flag() & HA_NOT_READ_PREFIX_LAST) + DBUG_RETURN(1); + tab->read_first_record= join_read_last_key; + tab->read_record.read_record= join_read_prev_same; + /* fall through */ } } DBUG_RETURN(1); /* No need to sort */ @@ -5377,8 +5454,8 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, if (!no_changes) { tab->index=nr; - tab->read_first_record= (flag > 0 ? join_init_read_first_with_key: - join_init_read_last_with_key); + tab->read_first_record= (flag > 0 ? join_read_first: + join_read_last); table->file->index_init(nr); tab->type=JT_NEXT; // Read with index_first(), index_next() if (table->used_keys & ((key_map) 1 << nr)) @@ -6369,7 +6446,8 @@ get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables) static void calc_group_buffer(JOIN *join,ORDER *group) { - uint key_length=0,parts=0; + uint key_length=0, parts=0, null_parts=0; + if (group) join->group= 1; for (; group ; group=group->next) @@ -6390,10 +6468,11 @@ calc_group_buffer(JOIN *join,ORDER *group) key_length+=(*group->item)->max_length; parts++; if ((*group->item)->maybe_null) - key_length++; + null_parts++; } - join->tmp_table_param.group_length=key_length; + join->tmp_table_param.group_length=key_length+null_parts; join->tmp_table_param.group_parts=parts; + join->tmp_table_param.group_null_parts=null_parts; } diff --git a/sql/sql_select.h b/sql/sql_select.h index dc8c97736a5..9eb287c8845 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -127,12 +127,13 @@ class TMP_TABLE_PARAM { ha_rows end_write_records; uint field_count,sum_func_count,func_count; uint hidden_field_count; - uint group_parts,group_length; + uint group_parts,group_length,group_null_parts; uint quick_group; bool using_indirect_summary_function; TMP_TABLE_PARAM() - :copy_funcs_it(copy_funcs), copy_field(0), group_parts(0), group_length(0) + :copy_funcs_it(copy_funcs), copy_field(0), group_parts(0), + group_length(0), group_null_parts(0) {} ~TMP_TABLE_PARAM() { -- cgit v1.2.1 From 4d10a0cb7eac04d7134537df139c87023453111d Mon Sep 17 00:00:00 2001 From: "monty@hundin.mysql.fi" <> Date: Wed, 16 Jan 2002 00:42:52 +0200 Subject: Add support for NULL=NULL in keys (Used in GROUP BY optimization) Add ISAM to Windows version Fix of test results Fixes for NULL keys in HEAP tables. --- .bzrignore | 1 + Docs/manual.texi | 4 ++- heap/hp_open.c | 3 +- heap/hp_rkey.c | 2 +- heap/hp_write.c | 2 +- include/config-win.h | 2 ++ include/my_base.h | 2 ++ libmysqld/Makefile.am | 2 +- myisam/mi_write.c | 86 +++++++++++++++++++++++--------------------- mysql-test/r/group_by.result | 81 +++++++++++++++++++++++++++++++++++++++++ mysql-test/r/heap.result | 26 ++++++++++++++ mysql-test/r/null.result | 1 - mysql-test/r/order_by.result | 6 ++-- mysql-test/t/group_by.test | 23 ++++++++++++ mysql-test/t/heap.test | 22 ++++++++++++ mysql-test/t/null.test | 1 - sql/ha_heap.cc | 20 ++++++----- sql/item_strfunc.h | 3 +- sql/item_timefunc.h | 3 +- sql/sql_parse.cc | 33 +++++++++-------- sql/sql_select.cc | 77 +++++++++++++++++++++++++-------------- 21 files changed, 297 insertions(+), 103 deletions(-) diff --git a/.bzrignore b/.bzrignore index 0b9549a818c..e2939d2ed59 100644 --- a/.bzrignore +++ b/.bzrignore @@ -451,3 +451,4 @@ vio/test-ssl vio/test-sslclient vio/test-sslserver vio/viotest-ssl +libmysqld/ha_innodb.cc diff --git a/Docs/manual.texi b/Docs/manual.texi index 0724e5180c2..c35237c54d4 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -48121,10 +48121,12 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item +Fixed bug in @code{GROUP BY BINARY column} +@item Added support for @code{NULL} keys in HEAP tables. @item Use index for @code{ORDER BY} in queries of type: -@code{SELECT * FROM t1 WHERE key_part1=1 ORDER BY key_part1 DESC,key_part2 DESC} +@code{SELECT * FROM t WHERE key_part1=1 ORDER BY key_part1 DESC,key_part2 DESC} @item Fixed bug in @code{FLUSH QUERY CACHE}. @item diff --git a/heap/hp_open.c b/heap/hp_open.c index 69e02945253..938ab8c4f78 100644 --- a/heap/hp_open.c +++ b/heap/hp_open.c @@ -46,7 +46,8 @@ HP_INFO *heap_open(const char *name, int mode, uint keys, HP_KEYDEF *keydef, for (j=length=0 ; j < keydef[i].keysegs; j++) { length+=keydef[i].seg[j].length; - if (keydef[i].seg[j].null_bit) + if (keydef[i].seg[j].null_bit && + !(keydef[i].flag & HA_NULL_ARE_EQUAL)) keydef[i].flag |= HA_NULL_PART_KEY; } keydef[i].length=length; diff --git a/heap/hp_rkey.c b/heap/hp_rkey.c index 37ebf5c784e..e7a1d81fba6 100644 --- a/heap/hp_rkey.c +++ b/heap/hp_rkey.c @@ -20,7 +20,7 @@ int heap_rkey(HP_INFO *info, byte *record, int inx, const byte *key) { byte *pos; HP_SHARE *share=info->s; - DBUG_ENTER("hp_rkey"); + DBUG_ENTER("heap_rkey"); DBUG_PRINT("enter",("base: %lx inx: %d",info,inx)); if ((uint) inx >= share->keys) diff --git a/heap/hp_write.c b/heap/hp_write.c index a219c8be23a..806f40e5be5 100644 --- a/heap/hp_write.c +++ b/heap/hp_write.c @@ -238,7 +238,7 @@ int _hp_write_key(register HP_SHARE *info, HP_KEYDEF *keyinfo, _hp_movelink(pos,gpos,empty); } - /* Check if dupplicated keys */ + /* Check if duplicated keys */ if ((keyinfo->flag & HA_NOSAME) && pos == gpos && (!(keyinfo->flag & HA_NULL_PART_KEY) || !hp_if_null_in_key(keyinfo, record))) diff --git a/include/config-win.h b/include/config-win.h index ea7b290a12a..a3770cd68ad 100644 --- a/include/config-win.h +++ b/include/config-win.h @@ -255,6 +255,8 @@ inline double ulonglong2double(ulonglong value) #define HAVE_COMPRESS #define HAVE_CREATESEMAPHORE +#define HAVE_ISAM /* We want to have support for ISAM in 4.0 */ + #ifdef NOT_USED #define HAVE_SNPRINTF /* Gave link error */ #define _snprintf snprintf diff --git a/include/my_base.h b/include/my_base.h index 6494fa93ab9..68f33147145 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -150,6 +150,7 @@ enum ha_base_keytype { #define HA_FULLTEXT 128 /* SerG: for full-text search */ #define HA_UNIQUE_CHECK 256 /* Check the key for uniqueness */ #define HA_SPATIAL 1024 /* Alex Barkov: for spatial search */ +#define HA_NULL_ARE_EQUAL 2048 /* NULL in key are cmp as equal */ /* Automatic bits in key-flag */ @@ -260,6 +261,7 @@ enum ha_base_keytype { #define MBR_DISJOINT 4096 #define MBR_EQUAL 8192 #define MBR_DATA 16384 +#define SEARCH_NULL_ARE_EQUAL 32768 /* NULL in keys are equal */ /* bits in opt_flag */ #define QUICK_USED 1 diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am index 7c4197afb32..edee13c98bb 100644 --- a/libmysqld/Makefile.am +++ b/libmysqld/Makefile.am @@ -38,7 +38,7 @@ libmysqlsources = errmsg.c get_password.c password.c noinst_HEADERS = embedded_priv.h sqlsources = convert.cc derror.cc field.cc field_conv.cc filesort.cc \ - ha_innobase.cc ha_berkeley.cc ha_heap.cc ha_isam.cc ha_isammrg.cc \ + ha_innodb.cc ha_berkeley.cc ha_heap.cc ha_isam.cc ha_isammrg.cc \ ha_myisam.cc ha_myisammrg.cc handler.cc sql_handler.cc \ hostname.cc init.cc \ item.cc item_buff.cc item_cmpfunc.cc item_create.cc \ diff --git a/myisam/mi_write.c b/myisam/mi_write.c index 8f4db221a9c..1f43a5defcc 100644 --- a/myisam/mi_write.c +++ b/myisam/mi_write.c @@ -25,7 +25,8 @@ /* Functions declared in this file */ -static int w_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, +static int w_search(MI_INFO *info,MI_KEYDEF *keyinfo, + uint comp_flag, uchar *key, uint key_length, my_off_t pos, uchar *father_buff, uchar *father_keypos, my_off_t father_page, my_bool insert_last); @@ -245,10 +246,23 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key, uint key_length) { int error; + uint comp_flag; + MI_KEYDEF *keyinfo=info->s->keyinfo+keynr; DBUG_ENTER("_mi_ck_write_btree"); + if (keyinfo->flag & HA_SORT_ALLOWS_SAME) + comp_flag=SEARCH_BIGGER; /* Put after same key */ + else if (keyinfo->flag & HA_NOSAME) + { + comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No dupplicates */ + if (keyinfo->flag & HA_NULL_ARE_EQUAL) + comp_flag|= SEARCH_NULL_ARE_EQUAL; + } + else + comp_flag=SEARCH_SAME; /* Keys in rec-pos order */ + if (info->s->state.key_root[keynr] == HA_OFFSET_ERROR || - (error=w_search(info,info->s->keyinfo+keynr,key, key_length, + (error=w_search(info, keyinfo, comp_flag, key, key_length, info->s->state.key_root[keynr], (uchar *) 0, (uchar*) 0, (my_off_t) 0, 1)) > 0) error=_mi_enlarge_root(info,keynr,key); @@ -291,13 +305,12 @@ int _mi_enlarge_root(register MI_INFO *info, uint keynr, uchar *key) */ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, - uchar *key, uint key_length, my_off_t page, - uchar *father_buff, - uchar *father_keypos, my_off_t father_page, - my_bool insert_last) + uint comp_flag, uchar *key, uint key_length, my_off_t page, + uchar *father_buff, uchar *father_keypos, + my_off_t father_page, my_bool insert_last) { int error,flag; - uint comp_flag,nod_flag, search_key_length; + uint nod_flag, search_key_length; uchar *temp_buff,*keypos; uchar keybuff[MI_MAX_KEY_BUFF]; my_bool was_last_key; @@ -305,17 +318,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, DBUG_ENTER("w_search"); DBUG_PRINT("enter",("page: %ld",page)); - search_key_length=USE_WHOLE_KEY; - if (keyinfo->flag & HA_SORT_ALLOWS_SAME) - comp_flag=SEARCH_BIGGER; /* Put after same key */ - else if (keyinfo->flag & HA_NOSAME) - { - comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No dupplicates */ - search_key_length= key_length; - } - else - comp_flag=SEARCH_SAME; /* Keys in rec-pos order */ - + search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ MI_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); @@ -344,7 +347,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, insert_last=0; next_page=_mi_kpos(nod_flag,keypos); if (next_page == HA_OFFSET_ERROR || - (error=w_search(info,keyinfo,key,key_length,next_page, + (error=w_search(info, keyinfo, comp_flag, key, key_length, next_page, temp_buff, keypos, page, insert_last)) >0) { error=_mi_insert(info,keyinfo,key,temp_buff,keypos,keybuff,father_buff, @@ -759,41 +762,44 @@ static int keys_compare(bulk_insert_param *param, uchar *key1, uchar *key2) { uint not_used; return _mi_key_cmp(param->info->s->keyinfo[param->keynr].seg, - key1, key2, USE_WHOLE_KEY, SEARCH_SAME, ¬_used); + key1, key2, USE_WHOLE_KEY, SEARCH_SAME, + ¬_used); } static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param) { - /* probably I can use info->lastkey here, but I'm not sure, - and to be safe I'd better use local lastkey. - Monty, feel free to comment on this */ + /* + Probably I can use info->lastkey here, but I'm not sure, + and to be safe I'd better use local lastkey. + */ uchar lastkey[MI_MAX_KEY_BUFF]; uint keylen; MI_KEYDEF *keyinfo; switch (mode) { - case free_init: - if (param->info->s->concurrent_insert) - { - rw_wrlock(¶m->info->s->key_root_lock[param->keynr]); - param->info->s->keyinfo[param->keynr].version++; - } - return 0; - case free_free: - keyinfo=param->info->s->keyinfo+param->keynr; - keylen=_mi_keylength(keyinfo, key); - memcpy(lastkey, key, keylen); - return _mi_ck_write_btree(param->info,param->keynr,lastkey, - keylen - param->info->s->rec_reflength); - case free_end: - if (param->info->s->concurrent_insert) - rw_unlock(¶m->info->s->key_root_lock[param->keynr]); - return 0; + case free_init: + if (param->info->s->concurrent_insert) + { + rw_wrlock(¶m->info->s->key_root_lock[param->keynr]); + param->info->s->keyinfo[param->keynr].version++; + } + return 0; + case free_free: + keyinfo=param->info->s->keyinfo+param->keynr; + keylen=_mi_keylength(keyinfo, key); + memcpy(lastkey, key, keylen); + return _mi_ck_write_btree(param->info,param->keynr,lastkey, + keylen - param->info->s->rec_reflength); + case free_end: + if (param->info->s->concurrent_insert) + rw_unlock(¶m->info->s->key_root_lock[param->keynr]); + return 0; } return -1; } + int _mi_init_bulk_insert(MI_INFO *info) { MYISAM_SHARE *share=info->s; diff --git a/mysql-test/r/group_by.result b/mysql-test/r/group_by.result index a0e234aa69c..a67298c73c0 100644 --- a/mysql-test/r/group_by.result +++ b/mysql-test/r/group_by.result @@ -263,3 +263,84 @@ score count(*) 2 1 1 2 drop table t1; +CREATE TABLE t1 (a char(1)); +INSERT INTO t1 VALUES ('A'),('B'),('A'),('B'),('A'),('B'),(NULL),('a'),('b'),(NULL),('A'),('B'),(NULL); +SELECT a FROM t1 GROUP BY a; +a +NULL +A +B +SELECT a,count(*) FROM t1 GROUP BY a; +a count(*) +NULL 3 +A 5 +B 5 +SELECT a FROM t1 GROUP BY binary a; +a +NULL +A +B +a +b +SELECT a,count(*) FROM t1 GROUP BY binary a; +a count(*) +NULL 3 +A 4 +B 4 +a 1 +b 1 +SELECT binary a FROM t1 GROUP BY 1; +binary a +NULL +A +B +a +b +SELECT binary a,count(*) FROM t1 GROUP BY 1; +binary a count(*) +NULL 3 +A 4 +B 4 +a 1 +b 1 +SET SQL_BIG_TABLES=1; +SELECT a FROM t1 GROUP BY a; +a +NULL +A +B +SELECT a,count(*) FROM t1 GROUP BY a; +a count(*) +NULL 3 +A 5 +B 5 +SELECT a FROM t1 GROUP BY binary a; +a +NULL +A +B +a +b +SELECT a,count(*) FROM t1 GROUP BY binary a; +a count(*) +NULL 3 +A 4 +B 4 +a 1 +b 1 +SELECT binary a FROM t1 GROUP BY 1; +binary a +NULL +A +B +a +b +SELECT binary a,count(*) FROM t1 GROUP BY 1; +binary a count(*) +NULL 3 +A 4 +B 4 +a 1 +b 1 +SET SQL_BIG_TABLES=0; +drop table t1; diff --git a/mysql-test/r/heap.result b/mysql-test/r/heap.result index 5495997324a..fc4a116307e 100644 --- a/mysql-test/r/heap.result +++ b/mysql-test/r/heap.result @@ -165,3 +165,29 @@ explain select * from t1 where btn="a" and new_col="a"; table type possible_keys key key_len ref rows Extra t1 ref btn btn 11 const,const 10 where used drop table t1; +CREATE TABLE t1 ( +a int default NULL, +b int default NULL, +KEY a (a), +UNIQUE b (b) +) type=heap; +INSERT INTO t1 VALUES (NULL,99),(99,NULL),(1,1),(2,2),(1,3); +SELECT * FROM t1 WHERE a=NULL; +a b +explain SELECT * FROM t1 WHERE a IS NULL; +table type possible_keys key key_len ref rows Extra +t1 ref a a 5 const 10 where used +SELECT * FROM t1 WHERE a<=>NULL; +a b +NULL 99 +SELECT * FROM t1 WHERE b=NULL; +a b +explain SELECT * FROM t1 WHERE b IS NULL; +table type possible_keys key key_len ref rows Extra +t1 ref b b 5 const 1 where used +SELECT * FROM t1 WHERE b<=>NULL; +a b +99 NULL +INSERT INTO t1 VALUES (1,3); +Duplicate entry '3' for key 1 +DROP TABLE t1; diff --git a/mysql-test/r/null.result b/mysql-test/r/null.result index 77f2c67bf05..e6e3b7155a3 100644 --- a/mysql-test/r/null.result +++ b/mysql-test/r/null.result @@ -40,7 +40,6 @@ insert into t1 values (null); select * from t1 where x != 0; x drop table t1; -DROP TABLE IF EXISTS t1; CREATE TABLE t1 ( indexed_field int default NULL, KEY indexed_field (indexed_field) diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result index a9d18e838cb..9bc716ee8b9 100644 --- a/mysql-test/r/order_by.result +++ b/mysql-test/r/order_by.result @@ -286,15 +286,15 @@ a b c 1 NULL NULL explain select * from t1 where a = 1 order by a desc, b desc; table type possible_keys key key_len ref rows Extra -t1 ref a a 4 const 5 where used; Using index; Using filesort +t1 ref a a 4 const 5 where used; Using index select * from t1 where a = 1 order by a desc, b desc; a b c 1 3 b -1 1 NULL 1 1 b 1 1 b -1 NULL NULL +1 1 NULL 1 NULL b +1 NULL NULL explain select * from t1 where a = 1 and b is null order by a desc, b desc; table type possible_keys key key_len ref rows Extra t1 ref a a 9 const,const 2 where used; Using index; Using filesort diff --git a/mysql-test/t/group_by.test b/mysql-test/t/group_by.test index efa1744feee..b98505e06b9 100644 --- a/mysql-test/t/group_by.test +++ b/mysql-test/t/group_by.test @@ -243,3 +243,26 @@ select sql_big_result spid,sum(userid) from t1 group by spid desc; explain select sql_big_result score,count(*) from t1 group by score desc; select sql_big_result score,count(*) from t1 group by score desc; drop table t1; + +# +# Compare with hash keys +# + +CREATE TABLE t1 (a char(1)); +INSERT INTO t1 VALUES ('A'),('B'),('A'),('B'),('A'),('B'),(NULL),('a'),('b'),(NULL),('A'),('B'),(NULL); +SELECT a FROM t1 GROUP BY a; +SELECT a,count(*) FROM t1 GROUP BY a; +SELECT a FROM t1 GROUP BY binary a; +SELECT a,count(*) FROM t1 GROUP BY binary a; +SELECT binary a FROM t1 GROUP BY 1; +SELECT binary a,count(*) FROM t1 GROUP BY 1; +# Do the same tests with MyISAM temporary tables +SET SQL_BIG_TABLES=1; +SELECT a FROM t1 GROUP BY a; +SELECT a,count(*) FROM t1 GROUP BY a; +SELECT a FROM t1 GROUP BY binary a; +SELECT a,count(*) FROM t1 GROUP BY binary a; +SELECT binary a FROM t1 GROUP BY 1; +SELECT binary a,count(*) FROM t1 GROUP BY 1; +SET SQL_BIG_TABLES=0; +drop table t1; diff --git a/mysql-test/t/heap.test b/mysql-test/t/heap.test index abb9e1fd1bc..b08e8f6ea36 100644 --- a/mysql-test/t/heap.test +++ b/mysql-test/t/heap.test @@ -100,3 +100,25 @@ update t1 set new_col=btn; explain select * from t1 where btn="a"; explain select * from t1 where btn="a" and new_col="a"; drop table t1; + +# +# Test of NULL keys +# + +CREATE TABLE t1 ( + a int default NULL, + b int default NULL, + KEY a (a), + UNIQUE b (b) +) type=heap; +INSERT INTO t1 VALUES (NULL,99),(99,NULL),(1,1),(2,2),(1,3); +SELECT * FROM t1 WHERE a=NULL; +explain SELECT * FROM t1 WHERE a IS NULL; +SELECT * FROM t1 WHERE a<=>NULL; +SELECT * FROM t1 WHERE b=NULL; +explain SELECT * FROM t1 WHERE b IS NULL; +SELECT * FROM t1 WHERE b<=>NULL; + +--error 1062 +INSERT INTO t1 VALUES (1,3); +DROP TABLE t1; diff --git a/mysql-test/t/null.test b/mysql-test/t/null.test index a010ab38e07..8bd9e806118 100644 --- a/mysql-test/t/null.test +++ b/mysql-test/t/null.test @@ -25,7 +25,6 @@ drop table t1; # Test problem med index on NULL columns and testing with =NULL; # -DROP TABLE IF EXISTS t1; CREATE TABLE t1 ( indexed_field int default NULL, KEY indexed_field (indexed_field) diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc index 518a9c38d82..5f482bca1e8 100644 --- a/sql/ha_heap.cc +++ b/sql/ha_heap.cc @@ -33,7 +33,7 @@ const char **ha_heap::bas_ext() const int ha_heap::open(const char *name, int mode, uint test_if_locked) { - uint key,part,parts,mem_per_row=0; + uint key,parts,mem_per_row=0; ulong max_rows; HP_KEYDEF *keydef; HP_KEYSEG *seg; @@ -48,24 +48,27 @@ int ha_heap::open(const char *name, int mode, uint test_if_locked) for (key=0 ; key < table->keys ; key++) { KEY *pos=table->key_info+key; + KEY_PART_INFO *key_part= pos->key_part; + KEY_PART_INFO *key_part_end= key_part+pos->key_parts; + mem_per_row += (pos->key_length + (sizeof(char*) * 2)); keydef[key].keysegs=(uint) pos->key_parts; - keydef[key].flag = (pos->flags & HA_NOSAME); + keydef[key].flag = (pos->flags & (HA_NOSAME | HA_NULL_ARE_EQUAL)); keydef[key].seg=seg; - for (part=0 ; part < pos->key_parts ; part++) + for (; key_part != key_part_end ; key_part++, seg++) { - uint flag=pos->key_part[part].key_type; - Field *field=pos->key_part[part].field; + uint flag=key_part->key_type; + Field *field=key_part->field; if (!f_is_packed(flag) && f_packtype(flag) == (int) FIELD_TYPE_DECIMAL && !(flag & FIELDFLAG_BINARY)) seg->type= (int) HA_KEYTYPE_TEXT; else seg->type= (int) HA_KEYTYPE_BINARY; - seg->start=(uint) pos->key_part[part].offset; - seg->length=(uint) pos->key_part[part].length; + seg->start=(uint) key_part->offset; + seg->length=(uint) key_part->length; if (field->null_ptr) { seg->null_bit=field->null_bit; @@ -88,7 +91,8 @@ int ha_heap::open(const char *name, int mode, uint test_if_locked) table->max_rows : max_rows), table->min_rows); my_free((gptr) keydef,MYF(0)); - info(HA_STATUS_NO_LOCK | HA_STATUS_CONST | HA_STATUS_VARIABLE); + if (file) + info(HA_STATUS_NO_LOCK | HA_STATUS_CONST | HA_STATUS_VARIABLE); ref_length=sizeof(HEAP_PTR); return (!file ? errno : 0); } diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 870edffeb7e..1279a5099d5 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -435,7 +435,8 @@ class Item_func_binary :public Item_str_func public: Item_func_binary(Item *a) :Item_str_func(a) {} const char *func_name() const { return "binary"; } - String *val_str(String *a) { return (args[0]->val_str(a)); } + String *val_str(String *a) + { a=args[0]->val_str(a); null_value=args[0]->null_value; return a; } void fix_length_and_dec() { binary=1; max_length=args[0]->max_length; } void print(String *str) { print_op(str); } }; diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h index c9daa2316e8..32b85e7f028 100644 --- a/sql/item_timefunc.h +++ b/sql/item_timefunc.h @@ -418,7 +418,8 @@ class Item_typecast :public Item_str_func { public: Item_typecast(Item *a) :Item_str_func(a) {} - String *val_str(String *a) { return (args[0]->val_str(a)); } + String *val_str(String *a) + { a=args[0]->val_str(a); null_value=args[0]->null_value; return a; } void fix_length_and_dec() { max_length=args[0]->max_length; } void print(String *str); }; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 2fbdf05e826..63abf757c1e 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -557,9 +557,9 @@ pthread_handler_decl(handle_one_connection,arg) pthread_detach_this_thread(); -#if !defined( __WIN__) && !defined(OS2) /* Win32 calls this in pthread_create */ - if (my_thread_init()) // needed to be called first before we call - // DBUG_ macros +#if !defined( __WIN__) && !defined(OS2) // Win32 calls this in pthread_create + // The following calls needs to be done before we call DBUG_ macros + if (my_thread_init()) { close_connection(&thd->net,ER_OUT_OF_RESOURCES); statistic_increment(aborted_connects,&LOCK_thread_count); @@ -568,13 +568,13 @@ pthread_handler_decl(handle_one_connection,arg) } #endif - // handle_one_connection() is the only way a thread would start - // and would always be on top of the stack - // therefore, the thread stack always starts at the address of the first - // local variable of handle_one_connection, which is thd - // we need to know the start of the stack so that we could check for - // stack overruns - + /* + handle_one_connection() is the only way a thread would start + and would always be on top of the stack, therefore, the thread + stack always starts at the address of the first local variable + of handle_one_connection, which is thd. We need to know the + start of the stack so that we could check for stack overruns. + */ DBUG_PRINT("info", ("handle_one_connection called by thread %d\n", thd->thread_id)); // now that we've called my_thread_init(), it is safe to call DBUG_* @@ -634,12 +634,12 @@ pthread_handler_decl(handle_one_connection,arg) if (net->error && net->vio != 0) { if (!thd->killed && opt_warnings) - sql_print_error(ER(ER_NEW_ABORTING_CONNECTION), - thd->thread_id,(thd->db ? thd->db : "unconnected"), - thd->user ? thd->user : "unauthenticated", - thd->host_or_ip, - (net->last_errno ? ER(net->last_errno) : - ER(ER_UNKNOWN_ERROR))); + sql_print_error(ER(ER_NEW_ABORTING_CONNECTION), + thd->thread_id,(thd->db ? thd->db : "unconnected"), + thd->user ? thd->user : "unauthenticated", + thd->host_or_ip, + (net->last_errno ? ER(net->last_errno) : + ER(ER_UNKNOWN_ERROR))); send_error(net,net->last_errno,NullS); thread_safe_increment(aborted_threads,&LOCK_thread_count); } @@ -1216,7 +1216,6 @@ mysql_execute_command(void) #endif } - thread_safe_increment(com_stat[lex->sql_command],&LOCK_thread_count); /* Skip if we are in the slave thread, some table rules have been given and the table list says the query should not be replicated diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 144b76407ab..9cda33d20d0 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -183,7 +183,7 @@ mysql_select(THD *thd,TABLE_LIST *tables,List &fields,COND *conds, ulong select_options,select_result *result) { TABLE *tmp_table; - int error,tmp; + int error, tmp_error, tmp; bool need_tmp,hidden_group_fields; bool simple_order,simple_group,no_order, skip_sort_order; Item::cond_result cond_value; @@ -678,8 +678,11 @@ mysql_select(THD *thd,TABLE_LIST *tables,List &fields,COND *conds, /* Copy data to the temporary table */ thd->proc_info="Copying to tmp table"; - if (do_select(&join,(List *) 0,tmp_table,0)) + if ((tmp_error=do_select(&join,(List *) 0,tmp_table,0))) + { + error=tmp_error; goto err; /* purecov: inspected */ + } if (join.having) join.having=having=0; // Allready done @@ -752,9 +755,11 @@ mysql_select(THD *thd,TABLE_LIST *tables,List &fields,COND *conds, group=0; } thd->proc_info="Copying to group table"; + tmp_error= -1; if (make_sum_func_list(&join,all_fields) || - do_select(&join,(List *) 0,tmp_table2,0)) + (tmp_error=do_select(&join,(List *) 0,tmp_table2,0))) { + error=tmp_error; free_tmp_table(thd,tmp_table2); goto err; /* purecov: inspected */ } @@ -3736,14 +3741,16 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List &fields, if (maybe_null) { /* - To be able to group on NULL, we move the null bit to be - just before the column. - The null byte is updated by 'end_update()' + To be able to group on NULL, we reserve place in group_buff + for the NULL flag just before the column. + The field data is after this flag. + The NULL flag is updated by 'end_update()' and 'end_write()' */ - key_part_info->null_bit=1; - key_part_info->null_offset= key_part_info->offset-1; - group->field->move_field((char*) group_buff+1, (uchar*) group_buff, - 1); + keyinfo->flags|= HA_NULL_ARE_EQUAL; // def. that NULL == NULL + key_part_info->null_bit=field->null_bit; + key_part_info->null_offset= (uint) (field->null_ptr - + (uchar*) table->record[0]); + group->field->move_field((char*) ++group->buff); } else group->field->move_field((char*) group_buff); @@ -3899,10 +3906,10 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, for (uint i=0; i < keyinfo->key_parts ; i++,seg++) { Field *field=keyinfo->key_part[i].field; - seg->flag=0; - seg->language=MY_CHARSET_CURRENT; - seg->length=keyinfo->key_part[i].length; - seg->start=keyinfo->key_part[i].offset; + seg->flag= 0; + seg->language= MY_CHARSET_CURRENT; + seg->length= keyinfo->key_part[i].length; + seg->start= keyinfo->key_part[i].offset; if (field->flags & BLOB_FLAG) { seg->type= @@ -3923,11 +3930,17 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, keyinfo->key_part[i].length > 4) seg->flag|=HA_SPACE_PACK; } - if (using_unique_constraint && - !(field->flags & NOT_NULL_FLAG)) + if (!(field->flags & NOT_NULL_FLAG)) { seg->null_bit= field->null_bit; seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]); + /* + We are using a GROUP BY on something that contains NULL + In this case we have to tell MyISAM that two NULL should + on INSERT be compared as equal + */ + if (!using_unique_constraint) + keydef.flag|= HA_NULL_ARE_EQUAL; } } } @@ -4065,9 +4078,12 @@ bool create_myisam_from_heap(TABLE *table, TMP_TABLE_PARAM *param, int error, } -/***************************************************************************** -** Make a join of all tables and write it on socket or to table -*****************************************************************************/ +/**************************************************************************** + Make a join of all tables and write it on socket or to table + Return: 0 if ok + 1 if error is sent + -1 if error should be sent +****************************************************************************/ static int do_select(JOIN *join,List *fields,TABLE *table,Procedure *procedure) @@ -4144,15 +4160,21 @@ do_select(JOIN *join,List *fields,TABLE *table,Procedure *procedure) if (error == -3) error=0; /* select_limit used */ } + + /* Return 1 if error is sent; -1 if error should be sent */ if (error < 0) - join->result->send_error(0,NullS); /* purecov: inspected */ + { + join->result->send_error(0,NullS); /* purecov: inspected */ + error=1; // Error sent + } else { - if (!table) // If sending data to client + error=0; + if (!table) // If sending data to client { join_free(join); // Unlock all cursors if (join->result->send_eof()) - error= -1; + error= 1; // Don't send error } DBUG_PRINT("info",("%ld records output",join->send_records)); } @@ -4169,10 +4191,10 @@ do_select(JOIN *join,List *fields,TABLE *table,Procedure *procedure) my_errno=tmp; error= -1; } - if (error != old_error) + if (error == -1) table->file->print_error(my_errno,MYF(0)); } - DBUG_RETURN(error < 0); + DBUG_RETURN(error); } @@ -4926,6 +4948,7 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), copy_fields(&join->tmp_table_param); copy_funcs(join->tmp_table_param.funcs); +#ifdef TO_BE_DELETED if (!table->uniques) // If not unique handling { /* Copy null values from group to row */ @@ -4936,10 +4959,11 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), if (item->maybe_null) { Field *field=item->tmp_table_field(); - field->ptr[-1]= (byte) (field->is_null() ? 0 : 1); + field->ptr[-1]= (byte) (field->is_null() ? 1 : 0); } } } +#endif if (!join->having || join->having->val_int()) { join->found_records++; @@ -4994,8 +5018,9 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { Item *item= *group->item; item->save_org_in_field(group->field); + /* Store in the used key if the field was 0 */ if (item->maybe_null) - group->buff[0]=item->null_value ? 0: 1; // Save reversed value + group->buff[-1]=item->null_value ? 1 : 0; } // table->file->index_init(0); if (!table->file->index_read(table->record[1], -- cgit v1.2.1