diff options
63 files changed, 1880 insertions, 554 deletions
diff --git a/.bzrignore b/.bzrignore index e4b60deec56..cb4056778cf 100644 --- a/.bzrignore +++ b/.bzrignore @@ -3001,3 +3001,4 @@ storage/maria/unittest/ma_pagecache_single_64k-t-big storage/maria/maria_control storage/maria/maria_log.* storage/maria/unittest/ma_test_loghandler_long-t-big +storage/maria/maria_read_log diff --git a/include/maria.h b/include/maria.h index 5f8e3dcdd41..edecef9eeeb 100644 --- a/include/maria.h +++ b/include/maria.h @@ -325,17 +325,18 @@ typedef struct st_maria_sort_info pthread_mutex_t mutex; pthread_cond_t cond; #endif - MARIA_HA *info; + MARIA_HA *info, *new_info; HA_CHECK *param; char *buff; SORT_KEY_BLOCKS *key_block, *key_block_end; SORT_FT_BUF *ft_buf; my_off_t filelength, dupp, buff_length; + ulonglong page; ha_rows max_records; uint current_key, total_keys; uint got_error, threads_running; myf myf_rw; - enum data_file_type new_data_file_type; + enum data_file_type new_data_file_type, org_data_file_type; } MARIA_SORT_INFO; typedef struct st_maria_sort_param diff --git a/include/my_base.h b/include/my_base.h index 38376adfe85..952c325f911 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -47,6 +47,7 @@ #define HA_OPEN_FOR_REPAIR 32 /* open even if crashed */ #define HA_OPEN_FROM_SQL_LAYER 64 #define HA_OPEN_MMAP 128 /* open memory mapped */ +#define HA_OPEN_COPY 256 /* Open copy (for repair) */ /* The following is parameter to ha_rkey() how to use key */ diff --git a/include/my_handler.h b/include/my_handler.h index 13dcd01a332..1a1235d0588 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -110,7 +110,8 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, uint *diff_pos); extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a); - +extern void my_handler_error_register(void); +extern void my_handler_error_unregister(void); /* Inside an in-memory data record, memory pointers to pieces of the record (like BLOBs) are stored in their native byte order and in diff --git a/mysql-test/include/ps_conv.inc b/mysql-test/include/ps_conv.inc index 0dd819f6e62..b0c0f9bd9e0 100644 --- a/mysql-test/include/ps_conv.inc +++ b/mysql-test/include/ps_conv.inc @@ -52,7 +52,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +eval create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/maria-connect.result b/mysql-test/r/maria-connect.result new file mode 100644 index 00000000000..e232f564d10 --- /dev/null +++ b/mysql-test/r/maria-connect.result @@ -0,0 +1,23 @@ +set global storage_engine=maria; +set session storage_engine=maria; +drop table if exists t1; +SET SQL_WARNINGS=1; +RESET MASTER; +set binlog_format=statement; +CREATE TABLE t1 (a int primary key); +insert t1 values (1),(2),(3); +insert t1 values (4),(2),(5); +ERROR 23000: Duplicate entry '2' for key 'PRIMARY' +select * from t1; +a +1 +2 +3 +4 +SHOW BINLOG EVENTS FROM 102; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 102 Query 1 200 use `test`; CREATE TABLE t1 (a int primary key) +master-bin.000001 200 Query 1 291 use `test`; insert t1 values (1),(2),(3) +master-bin.000001 291 Query 1 382 use `test`; insert t1 values (4),(2),(5) +drop table t1; +set binlog_format=default; diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index 3ec9af0fffa..6303f498e36 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -2,25 +2,6 @@ set global storage_engine=maria; set session storage_engine=maria; drop table if exists t1,t2; SET SQL_WARNINGS=1; -RESET MASTER; -set binlog_format=statement; -CREATE TABLE t1 (a int primary key); -insert t1 values (1),(2),(3); -insert t1 values (4),(2),(5); -ERROR 23000: Duplicate entry '2' for key 'PRIMARY' -select * from t1; -a -1 -2 -3 -4 -SHOW BINLOG EVENTS FROM 102; -Log_name Pos Event_type Server_id End_log_pos Info -master-bin.000001 102 Query 1 200 use `test`; CREATE TABLE t1 (a int primary key) -master-bin.000001 200 Query 1 291 use `test`; insert t1 values (1),(2),(3) -master-bin.000001 291 Query 1 382 use `test`; insert t1 values (4),(2),(5) -drop table t1; -set binlog_format=default; CREATE TABLE t1 ( STRING_DATA char(255) default NULL, KEY string_data (STRING_DATA) @@ -618,7 +599,7 @@ t1 1 a 1 a A NULL NULL NULL YES BTREE disabled alter table t1 enable keys; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +t1 1 a 1 a A NULL NULL NULL YES BTREE alter table t1 engine=heap; alter table t1 disable keys; Warnings: @@ -853,19 +834,19 @@ _id DELETE FROM t1 WHERE _id < 8; SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK OPTIMIZE TABLE t1; Table Op Msg_type Msg_text -test.t1 optimize status Table is already up to date +test.t1 optimize status OK CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # SELECT _id FROM t1; _id 8 @@ -912,7 +893,7 @@ _id DELETE FROM t1 WHERE _id < 8; SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK @@ -924,7 +905,7 @@ Table Op Msg_type Msg_text test.t1 check status OK SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # SELECT _id FROM t1; _id 8 @@ -1598,7 +1579,7 @@ alter table t1 disable keys; alter table t1 enable keys; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 1 a 1 a A 8 NULL NULL YES BTREE disabled +t1 1 a 1 a A 8 NULL NULL YES BTREE drop table t1; show create table t1; show create table t1; @@ -1811,3 +1792,67 @@ CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK DROP TABLE t1; +create table t1 (a int) transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=dynamic transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=dynamic transactional=1; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC TRANSACTIONAL=1 +alter table t1 row_format=PAGE; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=1 +alter table t1 transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=PAGE; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE +drop table t1; +create table t1 (a int) row_format=page; +insert delayed into t1 values(1); +ERROR HY000: Table storage engine for 't1' doesn't have this option +drop table t1; +create table t1 (a int) row_format=page transactional=0; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +a +1 +select count(*) from t1; +count(*) +1 +drop table t1; +create table t1 (a int) row_format=dynamic; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +a +1 +select count(*) from t1; +count(*) +1 +drop table t1; diff --git a/mysql-test/r/ps_2myisam.result b/mysql-test/r/ps_2myisam.result index 2bfd6d31ac9..9330ac2853e 100644 --- a/mysql-test/r/ps_2myisam.result +++ b/mysql-test/r/ps_2myisam.result @@ -1756,7 +1756,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_3innodb.result b/mysql-test/r/ps_3innodb.result index 607a0426bd7..4972942e6f3 100644 --- a/mysql-test/r/ps_3innodb.result +++ b/mysql-test/r/ps_3innodb.result @@ -1739,7 +1739,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_4heap.result b/mysql-test/r/ps_4heap.result index f4eec0c610c..bb17d0d161c 100644 --- a/mysql-test/r/ps_4heap.result +++ b/mysql-test/r/ps_4heap.result @@ -1740,7 +1740,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_5merge.result b/mysql-test/r/ps_5merge.result index 38e4626d59c..1ed2136151b 100644 --- a/mysql-test/r/ps_5merge.result +++ b/mysql-test/r/ps_5merge.result @@ -1676,7 +1676,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, @@ -4690,7 +4690,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_7ndb.result b/mysql-test/r/ps_7ndb.result index 432a07df9d0..af693de986b 100644 --- a/mysql-test/r/ps_7ndb.result +++ b/mysql-test/r/ps_7ndb.result @@ -1739,7 +1739,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result index 9268c44eecd..b1ea905c406 100644 --- a/mysql-test/r/ps_maria.result +++ b/mysql-test/r/ps_maria.result @@ -1756,7 +1756,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/t/maria-connect.test b/mysql-test/t/maria-connect.test new file mode 100644 index 00000000000..aedfa92e278 --- /dev/null +++ b/mysql-test/t/maria-connect.test @@ -0,0 +1,39 @@ +# +# Test that can't be run with --extern +# + +-- source include/have_maria.inc + +let $default=`select @@global.storage_engine`; +set global storage_engine=maria; +set session storage_engine=maria; + +# Initialise +--disable_warnings +drop table if exists t1; +--enable_warnings +SET SQL_WARNINGS=1; + +# +# UNIQUE key test +# +# as long as maria cannot rollback, binlog should contain both inserts +# +RESET MASTER; +set binlog_format=statement; +CREATE TABLE t1 (a int primary key); +insert t1 values (1),(2),(3); +--error 1582 +insert t1 values (4),(2),(5); +select * from t1; +SHOW BINLOG EVENTS FROM 102; +drop table t1; +set binlog_format=default; + +# End of 5.2 tests + +--disable_result_log +--disable_query_log +eval set global storage_engine=$default; +--enable_result_log +--enable_query_log diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index 763abbd9d25..f03d744f850 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -16,22 +16,6 @@ drop table if exists t1,t2; SET SQL_WARNINGS=1; # -# UNIQUE key test -# -# as long as maria cannot rollback, binlog should contain both inserts -# -RESET MASTER; -set binlog_format=statement; -CREATE TABLE t1 (a int primary key); -insert t1 values (1),(2),(3); ---error 1582 -insert t1 values (4),(2),(5); -select * from t1; -SHOW BINLOG EVENTS FROM 102; -drop table t1; -set binlog_format=default; - -# # Test problem with CHECK TABLE; # @@ -597,10 +581,7 @@ insert t1 select * from t2; show keys from t1; alter table t1 enable keys; show keys from t1; -#TODO after we have repair: delete the following --disable-warnings ---disable_warnings alter table t1 engine=heap; ---enable_warnings alter table t1 disable keys; show keys from t1; drop table t1,t2; @@ -1072,10 +1053,10 @@ create table t1 (a int not null, key key_block_size=1024 (a)); --error 1064 create table t1 (a int not null, key `a` key_block_size=1024 (a)); - # # Test of changing MI_KEY_BLOCK_LENGTH # + CREATE TABLE t1 ( c1 INT, c2 VARCHAR(300), @@ -1116,6 +1097,45 @@ DELETE FROM t1 WHERE c1 >= 10; CHECK TABLE t1; DROP TABLE t1; +# +# Test that TRANSACTIONAL is preserved +# + +create table t1 (a int) transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=dynamic transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=dynamic transactional=1; +show create table t1; +alter table t1 row_format=PAGE; +show create table t1; +alter table t1 transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=PAGE; +show create table t1; +drop table t1; + +# Verify that INSERT DELAYED is disabled only for transactional tables +create table t1 (a int) row_format=page; +--error ER_ILLEGAL_HA +insert delayed into t1 values(1); +drop table t1; +create table t1 (a int) row_format=page transactional=0; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +select count(*) from t1; +drop table t1; +create table t1 (a int) row_format=dynamic; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +select count(*) from t1; +drop table t1; + # End of 5.2 tests --disable_result_log diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index e40490776f8..8b8ba540a4e 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -1696,6 +1696,7 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) my_bool append_cache; my_off_t pos_in_file; DBUG_ENTER("my_b_flush_io_cache"); + DBUG_PRINT("enter", ("cache: 0x%lx", (long) info)); if (!(append_cache = (info->type == SEQ_READ_APPEND))) need_append_buffer_lock=0; diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 95a9f08a07a..065c10e3d73 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -2456,7 +2456,14 @@ restart: } else { - /* Link the block into a list of blocks 'in switch' */ + /* + Link the block into a list of blocks 'in switch'. + Note that if there could be two concurrent flush_key_blocks_int() + on this file (normally this does not happen, as MyISAM uses + intern_lock for flushing), then the first one may move the block + into its first_in_switch, and the second one would just not see + the block and wrongly consider its job done. + */ unlink_changed(block); link_changed(block, &first_in_switch); } diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c index 6c412157937..a820d09a2c6 100644 --- a/mysys/mf_tempfile.c +++ b/mysys/mf_tempfile.c @@ -107,6 +107,7 @@ File create_temp_file(char *to, const char *dir, const char *prefix, if (org_file >= 0 && file < 0) { int tmp=my_errno; + close(org_file); (void) my_delete(to, MYF(MY_WME | ME_NOINPUT)); my_errno=tmp; } diff --git a/mysys/my_error.c b/mysys/my_error.c index 48392fe84c3..00c78b64e0e 100644 --- a/mysys/my_error.c +++ b/mysys/my_error.c @@ -84,11 +84,6 @@ int my_error(int nr, myf MyFlags, ...) if (nr <= meh_p->meh_last) break; -#ifdef SHARED_LIBRARY - if ((meh_p == &my_errmsgs_globerrs) && ! globerrs[0]) - init_glob_errs(); -#endif - /* get the error message string. Default, if NULL or empty string (""). */ if (! (format= (meh_p && (nr >= meh_p->meh_first)) ? meh_p->meh_errmsgs[nr - meh_p->meh_first] : NULL) || ! *format) diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 757cbe490f8..bf75d992f9d 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -19,6 +19,7 @@ #include <m_ctype.h> #include <my_base.h> #include <my_handler.h> +#include <my_sys.h> int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, uchar *b, uint b_length, my_bool part_key, @@ -563,3 +564,68 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) return keyseg; } + +/* + Errors a handler can give you +*/ + +static const char *handler_error_messages[]= +{ + "Didn't find key on read or update", + "Duplicate key on write or update", + "Undefined handler error 122", + "Someone has changed the row since it was read (while the table was locked to prevent it)", + "Wrong index given to function", + "Undefined handler error 125", + "Index file is crashed", + "Record file is crashed", + "Out of memory in engine", + "Undefined handler error 129", + "Incorrect file format", + "Command not supported by database", + "Old database file", + "No record read before update", + "Record was already deleted (or record file crashed)", + "No more room in record file", + "No more room in index file", + "No more records (read after end of file)", + "Unsupported extension used for table", + "Too big row", + "Wrong create options", + "Duplicate unique key or constraint on write or update", + "Unknown character set used in table", + "Conflicting table definitions in sub-tables of MERGE table", + "Table is crashed and last repair failed", + "Table was marked as crashed and should be repaired", + "Lock timed out; Retry transaction", + "Lock table is full; Restart program with a larger locktable", + "Updates are not allowed under a read only transactions", + "Lock deadlock; Retry transaction", + "Foreign key constraint is incorrectly formed", + "Cannot add a child row", + "Cannot delete a parent row", + "Unknown handler error" +}; + + +/* + Register handler error messages for usage with my_error() + + NOTES + This is safe to call multiple times as my_error_register() + will ignore calls to register already registered error numbers. +*/ + + +void my_handler_error_register(void) +{ + my_error_register(handler_error_messages, HA_ERR_FIRST, + HA_ERR_FIRST+ array_elements(handler_error_messages)-1); +} + + +void my_handler_error_unregister(void) +{ + my_error_unregister(HA_ERR_FIRST, + HA_ERR_FIRST+ array_elements(handler_error_messages)-1); +} diff --git a/mysys/my_init.c b/mysys/my_init.c index e8a55fdc1e6..2023a7da223 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -77,6 +77,7 @@ my_bool my_init(void) mysys_usage_id++; my_umask= 0660; /* Default umask for new files */ my_umask_dir= 0700; /* Default umask for new directories */ + init_glob_errs(); #if defined(THREAD) && defined(SAFE_MUTEX) safe_mutex_global_init(); /* Must be called early */ #endif diff --git a/mysys/my_open.c b/mysys/my_open.c index 6fe7883b99b..b4bb7e25810 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -71,6 +71,7 @@ File my_open(const char *FileName, int Flags, myf MyFlags) #else fd = open((my_string) FileName, Flags); #endif + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_OPEN, EE_FILENOTFOUND, MyFlags)); } /* my_open */ @@ -124,61 +125,65 @@ int my_close(File fd, myf MyFlags) SYNOPSIS my_register_filename() - fd - FileName - type_file_type + fd File number opened, -1 if error on open + FileName File name + type_file_type How file was created + error_message_number Error message number if caller got error (fd == -1) + MyFlags Flags for my_close() + + RETURN + -1 error + # Filenumber + */ File my_register_filename(File fd, const char *FileName, enum file_type type_of_file, uint error_message_number, myf MyFlags) { + DBUG_ENTER("my_register_filename"); if ((int) fd >= 0) { if ((uint) fd >= my_file_limit) { #if defined(THREAD) && !defined(HAVE_PREAD) - (void) my_close(fd,MyFlags); - my_errno=EMFILE; - if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) - my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - return(-1); -#endif + my_errno= EMFILE; +#else thread_safe_increment(my_file_opened,&THR_LOCK_open); - return(fd); /* safeguard */ + DBUG_RETURN(fd); /* safeguard */ +#endif } - pthread_mutex_lock(&THR_LOCK_open); - if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags))) + else { - my_file_opened++; - my_file_info[fd].type = type_of_file; + pthread_mutex_lock(&THR_LOCK_open); + if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags))) + { + my_file_opened++; + my_file_info[fd].type = type_of_file; #if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); + pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); #endif + pthread_mutex_unlock(&THR_LOCK_open); + DBUG_PRINT("exit",("fd: %d",fd)); + DBUG_RETURN(fd); + } pthread_mutex_unlock(&THR_LOCK_open); - DBUG_PRINT("exit",("fd: %d",fd)); - return(fd); + my_errno= ENOMEM; } - pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); - fd= -1; - my_errno=ENOMEM; } else - my_errno=errno; - DBUG_PRINT("error",("Got error %d on open",my_errno)); - if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) { - if (my_errno == EMFILE) { - DBUG_PRINT("error",("print err: %d",EE_OUT_OF_FILERESOURCES)); - my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - } else { - DBUG_PRINT("error",("print err: %d",error_message_number)); - my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - } + my_errno= errno; + + DBUG_PRINT("error",("Got error %d on open", my_errno)); + if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) + { + if (my_errno == EMFILE) + error_message_number= EE_OUT_OF_FILERESOURCES; + DBUG_PRINT("error",("print err: %d",error_message_number)); + my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG), + FileName, my_errno); } - return(fd); + DBUG_RETURN(-1); } #ifdef __WIN__ diff --git a/mysys/my_symlink2.c b/mysys/my_symlink2.c index 279672be11c..932f2b6424f 100644 --- a/mysys/my_symlink2.c +++ b/mysys/my_symlink2.c @@ -33,7 +33,9 @@ File my_create_with_symlink(const char *linkname, const char *filename, int create_link; char abs_linkname[FN_REFLEN]; DBUG_ENTER("my_create_with_symlink"); - DBUG_PRINT("enter", ("linkname: %s filename: %s", linkname, filename)); + DBUG_PRINT("enter", + ("linkname: %s filename: %s", linkname ? linkname : "NULL", + filename)); if (my_disable_symlinks) { diff --git a/sql-bench/example b/sql-bench/example index df2a9b8be69..cb39fad819e 100644 --- a/sql-bench/example +++ b/sql-bench/example @@ -6,15 +6,14 @@ machine="Linux-x64" # InnoDB tests -./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_log_file_size=100M" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log - -./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_log_file_size=100M" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log +./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_additional_mem_pool_size=20M --innodb_log_file_size=1000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=2 --skip-innodb-doblewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log +./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_additional_mem_pool_size=20M --innodb_log_file_size=1000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=2 --skip-innodb-doblewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log # MyISAM tests -./run-all-tests --suffix=-myisam --comments="Engine=MyISAM key_buffer_size=16M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --log +./run-all-tests --suffix=-myisam --comments="Engine=MyISAM key_buffer_size=256M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --log -./run-all-tests --suffix=_fast-myisam --comments="Engine=MyISAM key_buffer_size=16M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log +./run-all-tests --suffix=_fast-myisam --comments="Engine=MyISAM key_buffer_size=256M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log compare-results --relative output/RUN-mysql-myisam-* output/RUN-mysql_fast-myisam* output/RUN-mysql* diff --git a/sql/handler.cc b/sql/handler.cc index b32098bfc78..f3c71887e9a 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -68,7 +68,7 @@ static const LEX_STRING sys_table_aliases[]= }; const char *ha_row_type[] = { - "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "?","?","?" + "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE","?","?","?" }; const char *tx_isolation_names[] = @@ -281,7 +281,8 @@ handler *get_ha_partition(partition_info *part_info) 0 OK != 0 Error */ -static int ha_init_errors(void) + +int ha_init_errors(void) { #define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg) const char **errmsgs; @@ -495,9 +496,6 @@ int ha_init() int error= 0; DBUG_ENTER("ha_init"); - if (ha_init_errors()) - DBUG_RETURN(1); - DBUG_ASSERT(total_ha < MAX_HA); /* Check if there is a transaction-capable storage engine besides the diff --git a/sql/handler.h b/sql/handler.h index 94d2997a545..00c8ea0c5c3 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -268,7 +268,7 @@ enum legacy_db_type enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED, - ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, ROW_TYPE_PAGES }; + ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, ROW_TYPE_PAGE }; enum enum_binlog_func { BFN_RESET_LOGS= 1, @@ -311,6 +311,7 @@ enum enum_binlog_command { #define HA_CREATE_USED_PASSWORD (1L << 17) #define HA_CREATE_USED_CONNECTION (1L << 18) #define HA_CREATE_USED_KEY_BLOCK_SIZE (1L << 19) +#define HA_CREATE_USED_TRANSACTIONAL (1L << 20) typedef ulonglong my_xid; // this line is the same as in log_event.h #define MYSQL_XID_PREFIX "MySQLXid" @@ -741,6 +742,7 @@ class partition_info; struct st_partition_iter; #define NOT_A_PARTITION_ID ((uint32)-1) +enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES }; typedef struct st_ha_create_information { @@ -763,6 +765,8 @@ typedef struct st_ha_create_information uint options; /* OR of HA_CREATE_ options */ uint merge_insert_method; uint extra_size; /* length of extra data segment */ + /* 0 not used, 1 if not transactional, 2 if transactional */ + enum ha_choice transactional; bool table_existed; /* 1 in create if table existed */ bool frm_only; /* 1 if no ha_create_table() */ bool varchar; /* 1 if table has a VARCHAR */ @@ -1661,6 +1665,7 @@ static inline bool ha_storage_engine_is_enabled(const handlerton *db_type) } /* basic stuff */ +int ha_init_errors(void); int ha_init(void); int ha_end(void); int ha_initialize_handlerton(st_plugin_int *plugin); diff --git a/sql/lex.h b/sql/lex.h index 45155da7692..28271bf46d9 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -379,6 +379,7 @@ static SYMBOL symbols[] = { { "OWNER", SYM(OWNER_SYM)}, { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, { "PARSER", SYM(PARSER_SYM)}, + { "PAGE", SYM(PAGE_SYM)}, { "PARTIAL", SYM(PARTIAL)}, { "PARTITION", SYM(PARTITION_SYM)}, { "PARTITIONING", SYM(PARTITIONING_SYM)}, @@ -528,6 +529,7 @@ static SYMBOL symbols[] = { { "TO", SYM(TO_SYM)}, { "TRAILING", SYM(TRAILING)}, { "TRANSACTION", SYM(TRANSACTION_SYM)}, + { "TRANSACTIONAL", SYM(TRANSACTIONAL_SYM)}, { "TRIGGER", SYM(TRIGGER_SYM)}, { "TRIGGERS", SYM(TRIGGERS_SYM)}, { "TRUE", SYM(TRUE_SYM)}, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index a49e4005c67..fd77317509b 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3350,6 +3350,10 @@ server."); using_update_log=1; } + /* Allow storage engine to give real error messages */ + if (ha_init_errors()) + DBUG_RETURN(1); + if (plugin_init(opt_bootstrap)) { sql_print_error("Failed to init plugins."); diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 250d9d917eb..9ae38d5dcec 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -1371,6 +1371,11 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet, packet->append(STRING_WITH_LEN(" ROW_FORMAT=")); packet->append(ha_row_type[(uint) share->row_type]); } + if (share->transactional != HA_CHOICE_UNDEF) + { + packet->append(STRING_WITH_LEN(" TRANSACTIONAL=")); + packet->append(share->transactional == HA_CHOICE_YES ? "1" : "0", 1); + } if (table->s->key_block_size) { char *end; @@ -2910,8 +2915,8 @@ static int get_schema_tables_record(THD *thd, struct st_table_list *tables, case ROW_TYPE_COMPACT: tmp_buff= "Compact"; break; - case ROW_TYPE_PAGES: - tmp_buff= "Paged"; + case ROW_TYPE_PAGE: + tmp_buff= "Page"; break; } table->field[6]->store(tmp_buff, strlen(tmp_buff), cs); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 0697fdd79b4..a037fc6f727 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -5653,6 +5653,8 @@ view_err: create_info->default_table_charset= table->s->table_charset; if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) create_info->key_block_size= table->s->key_block_size; + if (!(used_fields & HA_CREATE_USED_TRANSACTIONAL)) + create_info->transactional= table->s->transactional; if (!create_info->tablespace && create_info->storage_media != HA_SM_MEMORY) { @@ -6916,7 +6918,6 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, lex->col_list.empty(); lex->alter_info.reset(); bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type= 0; create_info.row_type=ROW_TYPE_NOT_USED; create_info.default_table_charset=default_charset_info; /* Force alter table to recreate table */ diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 9c062407921..1bf198b5b4a 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -720,6 +720,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token OUT_SYM /* SQL-2003-R */ %token OWNER_SYM %token PACK_KEYS_SYM +%token PAGE_SYM %token PARAM_MARKER %token PARSER_SYM %token PARTIAL /* SQL-2003-N */ @@ -872,6 +873,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TO_SYM /* SQL-2003-R */ %token TRAILING /* SQL-2003-R */ %token TRANSACTION_SYM +%token TRANSACTIONAL_SYM %token TRIGGERS_SYM %token TRIGGER_SYM /* SQL-2003-R */ %token TRIM /* SQL-2003-N */ @@ -4213,6 +4215,12 @@ create_table_option: Lex->create_info.used_fields|= HA_CREATE_USED_KEY_BLOCK_SIZE; Lex->create_info.key_block_size= $3; } + | TRANSACTIONAL_SYM opt_equal ulong_num + { + Lex->create_info.used_fields|= HA_CREATE_USED_TRANSACTIONAL; + Lex->create_info.transactional= ($3 != 0 ? HA_CHOICE_YES : + HA_CHOICE_NO); + } ; default_charset: @@ -4272,7 +4280,8 @@ row_types: | DYNAMIC_SYM { $$= ROW_TYPE_DYNAMIC; } | COMPRESSED_SYM { $$= ROW_TYPE_COMPRESSED; } | REDUNDANT_SYM { $$= ROW_TYPE_REDUNDANT; } - | COMPACT_SYM { $$= ROW_TYPE_COMPACT; }; + | COMPACT_SYM { $$= ROW_TYPE_COMPACT; } + | PAGE_SYM { $$= ROW_TYPE_PAGE; }; merge_insert_types: NO_SYM { $$= MERGE_INSERT_DISABLED; } @@ -9786,6 +9795,7 @@ keyword_sp: | ONE_SHOT_SYM {} | ONE_SYM {} | PACK_KEYS_SYM {} + | PAGE_SYM {} | PARTIAL {} | PARTITIONING_SYM {} | PARTITIONS_SYM {} @@ -9855,6 +9865,7 @@ keyword_sp: | TEXT_SYM {} | THAN_SYM {} | TRANSACTION_SYM {} + | TRANSACTIONAL_SYM {} | TRIGGERS_SYM {} | TIMESTAMP {} | TIMESTAMP_ADD {} diff --git a/sql/table.cc b/sql/table.cc index ed3cac85214..316d99a85b5 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -460,7 +460,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, if (!head[32]) // New frm file in 3.23 { share->avg_row_length= uint4korr(head+34); - share-> row_type= (row_type) head[40]; + share->transactional= (ha_choice) head[39]; + share->row_type= (row_type) head[40]; share->table_charset= get_charset((uint) head[38],MYF(0)); share->null_field_first= 1; } @@ -2111,7 +2112,9 @@ File create_frm(THD *thd, const char *name, const char *db, int2store(fileinfo+16,reclength); int4store(fileinfo+18,create_info->max_rows); int4store(fileinfo+22,create_info->min_rows); + /* fileinfo[26] is set in mysql_create_frm() */ fileinfo[27]=2; // Use long pack-fields + /* fileinfo[28 & 29] is set to key_info_length in mysql_create_frm() */ create_info->table_options|=HA_OPTION_LONG_BLOB_PTR; // Use portable blob pointers int2store(fileinfo+30,create_info->table_options); fileinfo[32]=0; // No filename anymore @@ -2119,8 +2122,9 @@ File create_frm(THD *thd, const char *name, const char *db, int4store(fileinfo+34,create_info->avg_row_length); fileinfo[38]= (create_info->default_table_charset ? create_info->default_table_charset->number : 0); + fileinfo[39]= (uchar) create_info->transactional; fileinfo[40]= (uchar) create_info->row_type; - /* Next few bytes were for RAID support */ + /* Next few bytes where for RAID support */ fileinfo[41]= 0; fileinfo[42]= 0; fileinfo[43]= 0; diff --git a/sql/table.h b/sql/table.h index fc2f25f3aa8..fc9f1b7caa4 100644 --- a/sql/table.h +++ b/sql/table.h @@ -175,6 +175,7 @@ typedef struct st_table_share handlerton *db_type; /* table_type for handler */ enum row_type row_type; /* How rows are stored */ enum tmp_table_type tmp_table; + enum ha_choice transactional; uint ref_count; /* How many TABLE objects uses this */ uint open_count; /* Number of tables in open list */ diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 24cc6dfb915..232dd7e695d 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -437,32 +437,38 @@ volatile int *_ma_killed_ptr(HA_CHECK *param) void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) { + va_list args; + DBUG_ENTER("_ma_check_print_error"); param->error_printed |= 1; param->out_flag |= O_DATA_LOST; - va_list args; va_start(args, fmt); _ma_check_print_msg(param, "error", fmt, args); va_end(args); + DBUG_VOID_RETURN; } void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...) { va_list args; + DBUG_ENTER("_ma_check_print_info"); va_start(args, fmt); _ma_check_print_msg(param, "info", fmt, args); va_end(args); + DBUG_VOID_RETURN; } void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) { + va_list args; + DBUG_ENTER("_ma_check_print_warning"); param->warning_printed= 1; param->out_flag |= O_DATA_LOST; - va_list args; va_start(args, fmt); _ma_check_print_msg(param, "warning", fmt, args); va_end(args); + DBUG_VOID_RETURN; } } @@ -473,7 +479,7 @@ handler(hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | HA_FILE_BASED | HA_CAN_GEOMETRY | MARIA_CANNOT_ROLLBACK | - HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | + HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), can_enable_indexes(1) {} @@ -691,9 +697,19 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked) info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0)); - save_transactional= file->s->base.transactional; if ((data_file_type= file->s->data_file_type) != STATIC_RECORD) int_table_flags |= HA_REC_NOT_IN_SEQ; + if (!file->s->base.born_transactional) + { + /* + INSERT DELAYED cannot work with transactional tables (because it cannot + stand up to "when client gets ok the data is safe on disk": the record + may not even be inserted). In the future, we could enable it back (as a + client doing INSERT DELAYED knows the specificities; but we then should + make sure to regularly commit in the delayed_insert thread). + */ + int_table_flags|= HA_CAN_INSERT_DELAYED; + } if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) int_table_flags |= HA_HAS_CHECKSUM; @@ -1067,16 +1083,6 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) param.out_flag= 0; strmov(fixed_name, file->s->open_file_name); -#ifndef TO_BE_FIXED - /* QQ: Until we have repair for block format, lie that it succeded */ - if (file->s->data_file_type == BLOCK_RECORD) - { - if (do_optimize) - DBUG_RETURN(analyze(thd, (HA_CHECK_OPT*) 0)); - DBUG_RETURN(HA_ADMIN_OK); - } -#endif - // Don't lock tables if we have used LOCK TABLE if (!thd->locked_tables && maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK)) @@ -1101,7 +1107,9 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) local_testflag |= T_STATISTICS; param.testflag |= T_STATISTICS; // We get this for free statistics_done= 1; - if (thd->variables.maria_repair_threads > 1) + /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */ + if (thd->variables.maria_repair_threads > 1 && + file->s->data_file_type != BLOCK_RECORD) { char buf[40]; /* TODO: respect maria_repair_threads variable */ @@ -1180,8 +1188,6 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) llstr(rows, llbuff), llstr(file->state->records, llbuff2)); } - if (!error) - error= _ma_repair_write_log_record(¶m, file); } else { @@ -1863,30 +1869,19 @@ int ha_maria::external_lock(THD *thd, int lock_type) { TRN *trn= THD_TRN; DBUG_ENTER("ha_maria::external_lock"); - if (!save_transactional) + /* + We don't test now_transactional because it may vary between lock/unlock + and thus confuse our reference counting. + It is critical to skip non-transactional tables: user-visible temporary + tables get an external_lock() when read/written for the first time, but no + corresponding unlock (they just stay locked and are later dropped while + locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp" + would never commit as its "locked_tables" count would stay 1. + */ + if (!file->s->base.born_transactional) goto skip_transaction; - if (!trn && lock_type != F_UNLCK) /* no transaction yet - open it now */ - { - trn= trnman_new_trn(& thd->mysys_var->mutex, - & thd->mysys_var->suspend, - thd->thread_stack + STACK_DIRECTION * - (my_thread_stack_size - STACK_MIN_SIZE)); - if (!trn) - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - - DBUG_PRINT("info", ("THD_TRN set to 0x%lx", (ulong)trn)); - THD_TRN= trn; - if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) - trans_register_ha(thd, TRUE, maria_hton); - } if (lock_type != F_UNLCK) { - this->file->trn= trn; - if (!trnman_increment_locked_tables(trn)) - { - trans_register_ha(thd, FALSE, maria_hton); - trnman_new_statement(trn); - } if (!thd->transaction.on) { /* @@ -1898,11 +1893,32 @@ int ha_maria::external_lock(THD *thd, int lock_type) tons of archived logs to roll-forward, we could then not disable REDOs/UNDOs in this case. */ - file->s->base.transactional= FALSE; + _ma_tmp_disable_logging_for_table(file->s); + } + if (!trn) /* no transaction yet - open it now */ + { + trn= trnman_new_trn(& thd->mysys_var->mutex, + & thd->mysys_var->suspend, + thd->thread_stack + STACK_DIRECTION * + (my_thread_stack_size - STACK_MIN_SIZE)); + if (unlikely(!trn)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + DBUG_PRINT("info", ("THD_TRN set to 0x%lx", (ulong)trn)); + THD_TRN= trn; + if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + trans_register_ha(thd, TRUE, maria_hton); + } + this->file->trn= trn; + if (!trnman_increment_locked_tables(trn)) + { + trans_register_ha(thd, FALSE, maria_hton); + trnman_new_statement(trn); } } else { + _ma_reenable_logging_for_table(file->s); this->file->trn= 0; /* TODO: remove it also in commit and rollback */ if (trn && trnman_has_locked_tables(trn)) { @@ -1923,7 +1939,6 @@ int ha_maria::external_lock(THD *thd, int lock_type) #endif } } - file->s->base.transactional= save_transactional; } skip_transaction: DBUG_RETURN(maria_lock_database(file, !table->s->tmp_table ? @@ -1934,7 +1949,7 @@ skip_transaction: int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type) { TRN *trn= THD_TRN; - if (save_transactional) + if (file->s->base.born_transactional) { DBUG_ASSERT(trn); // this may be called only after external_lock() DBUG_ASSERT(trnman_has_locked_tables(trn)); @@ -1979,7 +1994,7 @@ enum row_type ha_maria::get_row_type() const switch (file->s->data_file_type) { case STATIC_RECORD: return ROW_TYPE_FIXED; case DYNAMIC_RECORD: return ROW_TYPE_DYNAMIC; - case BLOCK_RECORD: return ROW_TYPE_PAGES; + case BLOCK_RECORD: return ROW_TYPE_PAGE; case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED; default: return ROW_TYPE_NOT_USED; } @@ -1988,6 +2003,8 @@ enum row_type ha_maria::get_row_type() const static enum data_file_type maria_row_type(HA_CREATE_INFO *info) { + if (info->transactional == HA_CHOICE_YES) + return BLOCK_RECORD; switch (info->row_type) { case ROW_TYPE_FIXED: return STATIC_RECORD; case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD; @@ -2032,7 +2049,8 @@ int ha_maria::create(const char *name, register TABLE *table_arg, share->avg_row_length); create_info.data_file_name= ha_create_info->data_file_name; create_info.index_file_name= ha_create_info->index_file_name; - create_info.transactional= row_type == BLOCK_RECORD; + create_info.transactional= (row_type == BLOCK_RECORD && + ha_create_info->transactional != HA_CHOICE_NO); if (ha_create_info->options & HA_LEX_CREATE_TMP_TABLE) create_flags|= HA_CREATE_TMP_TABLE; diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index a2f6b190657..dd0a9594ef3 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -39,11 +39,6 @@ class ha_maria :public handler char *data_file_name, *index_file_name; enum data_file_type data_file_type; bool can_enable_indexes; - /** - @brief for temporarily disabling table's transactionality - (if THD::transaction::on is false), remember the original value here - */ - bool save_transactional; int repair(THD * thd, HA_CHECK ¶m, bool optimize); public: diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index e1308bce487..3376f4abf2c 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -106,6 +106,19 @@ put on disk even if they are not in the page cache). - When explicitely requested (for example on backup or after recvoery, to simplify things) + + The flow of writing a row is that: + - Lock the bitmap + - Decide which data pages we will write to + - Mark them full in the bitmap page so that other threads do not try to + use the same data pages as us + - We unlock the bitmap + - Write the data pages + - Lock the bitmap + - Correct the bitmap page with the true final occupation of the data + pages (that is, we marked pages full but when we are done we realize + we didn't fill them) + - Unlock the bitmap. */ #include "maria_def.h" @@ -283,7 +296,7 @@ void _ma_bitmap_delete_all(MARIA_SHARE *share) { bzero(bitmap->map, share->block_size); memcpy(bitmap->map + share->block_size - 2, maria_bitmap_marker, 2); - bitmap->changed= 0; + bitmap->changed= 1; bitmap->page= 0; bitmap->used_size= bitmap->total_size; } diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 17ca22390f4..3ce4c9efe42 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -581,18 +581,10 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn) DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn)); /* True if not disk error */ - DBUG_ASSERT((undo_lsn != LSN_IMPOSSIBLE) || !info->s->base.transactional); + DBUG_ASSERT((undo_lsn != LSN_IMPOSSIBLE) || !info->s->now_transactional); - if (!info->s->base.transactional) - { - /* - If this is a transactional table but with transactionality temporarily - disabled (like in ALTER TABLE) we need to give a sensible LSN to pages - and not LSN_IMPOSSIBLE. If this is not a transactional table it will - reduce to LSN_IMPOSSIBLE. - */ - undo_lsn= info->s->state.create_rename_lsn; - } + if (!info->s->now_transactional) + undo_lsn= LSN_IMPOSSIBLE; /* don't try to set a LSN on pages */ while (pinned_page-- != page_link) pagecache_unlock_by_link(info->s->pagecache, pinned_page->link, @@ -868,7 +860,7 @@ static void calc_record_size(MARIA_HA *info, const byte *record, compact_page() buff Page to compact block_size Size of page - recnr Put empty data after this row + rownr Put empty data after this row extend_block If 1, extend the block at 'rownr' to cover the whole block. */ @@ -980,6 +972,13 @@ static void compact_page(byte *buff, uint block_size, uint rownr, uint length= (uint) (dir - buff) - start_of_found_block; int2store(dir+2, length); } + else + { + /* + TODO: + Update (buff + EMPTY_SPACE_OFFSET) if we remove transid from rows + */ + } buff[PAGE_TYPE_OFFSET]&= ~(byte) PAGE_CAN_BE_COMPACTED; } DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size);); @@ -987,6 +986,37 @@ static void compact_page(byte *buff, uint block_size, uint rownr, } +/* + Create an empty tail or head page + + SYNOPSIS + make_empty_page() + buff Page buffer + block_size Block size + page_type HEAD_PAGE or TAIL_PAGE + + NOTES + EMPTY_SPACE is not updated +*/ + +static void make_empty_page(byte *buff, uint block_size, uint page_type) +{ + + bzero(buff, PAGE_HEADER_SIZE); + /* + We zero the rest of the block to avoid getting old memory information + to disk and to allow the file to be compressed better if archived. + The rest of the code does not assume the block is zeroed above + PAGE_OVERHEAD_SIZE + */ + bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE); + buff[PAGE_TYPE_OFFSET]= (byte) page_type; + buff[DIR_COUNT_OFFSET]= 1; + /* Store position to the first row */ + int2store(buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE, + PAGE_HEADER_SIZE); +} + /* Read or initialize new head or tail page @@ -1019,6 +1049,7 @@ struct st_row_pos_info uint empty_space; /* Space left on page */ }; + static my_bool get_head_or_tail_page(MARIA_HA *info, MARIA_BITMAP_BLOCK *block, byte *buff, uint length, uint page_type, @@ -1035,25 +1066,12 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, if (block->org_bitmap_value == 0) /* Empty block */ { /* New page */ - bzero(buff, PAGE_HEADER_SIZE); - - /* - We zero the rest of the block to avoid getting old memory information - to disk and to allow the file to be compressed better if archived. - The rest of the code does not assume the block is zeroed above - PAGE_OVERHEAD_SIZE - */ - bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE); - - buff[PAGE_TYPE_OFFSET]= (byte) page_type; - buff[DIR_COUNT_OFFSET]= 1; + make_empty_page(buff, block_size, page_type); res->buff= buff; res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE); res->data= (buff + PAGE_HEADER_SIZE); res->dir= res->data + res->length; res->rownr= 0; - /* Store position to the first row */ - int2store(res->dir, PAGE_HEADER_SIZE); DBUG_ASSERT(length <= res->length); } else @@ -1446,7 +1464,7 @@ static my_bool free_full_page_range(MARIA_HA *info, ulonglong page, uint count) page, count, PAGECACHE_LOCK_WRITE, 0)) res= 1; - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; DBUG_ASSERT(info->trn->rec_lsn); @@ -1710,8 +1728,12 @@ static my_bool write_block_record(MARIA_HA *info, uint length= (uint) (data - row_pos->data); DBUG_PRINT("info", ("head length: %u", length)); if (length < info->s->base.min_row_length) + { + uint diff_length= info->s->base.min_row_length - length; + bzero(data, diff_length); + data+= diff_length; length= info->s->base.min_row_length; - + } int2store(row_pos->dir + 2, length); /* update empty space at start of block */ row_pos->empty_space-= length; @@ -1789,10 +1811,12 @@ static my_bool write_block_record(MARIA_HA *info, ulong length; ulong data_length= (tmp_data - info->rec_buff); -#ifdef SANITY_CHECK +#ifdef MONTY_WILL_KNOW +#ifdef SANITY_CHECKS if (cur_block->sub_blocks == 1) goto crashed; /* no reserved full or tails */ #endif +#endif /* Find out where to write tail for non-blob fields. @@ -1825,8 +1849,8 @@ static my_bool write_block_record(MARIA_HA *info, FULL_PAGE_SIZE(block_size))) && cur_block->page_count) { -#ifdef SANITY_CHECK - if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_BIT)) +#ifdef SANITY_CHECKS + if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_USED)) goto crashed; #endif data_length-= length; @@ -1840,7 +1864,7 @@ static my_bool write_block_record(MARIA_HA *info, /* Skip empty filler block */ cur_block++; } -#ifdef SANITY_CHECK +#ifdef SANITY_CHECKS if ((cur_block >= end_block)) goto crashed; #endif @@ -1953,7 +1977,7 @@ static my_bool write_block_record(MARIA_HA *info, head_block+1, bitmap_blocks->count - 1); } - if (share->base.transactional) + if (share->now_transactional) { uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; @@ -1998,7 +2022,7 @@ static my_bool write_block_record(MARIA_HA *info, else push_dynamic(&info->pinned_pages, (void*) &page_link); - if (share->base.transactional && (tmp_data_used || blob_full_pages_exists)) + if (share->now_transactional && (tmp_data_used || blob_full_pages_exists)) { /* Log REDO writes for all full pages (head part and all blobs) @@ -2095,7 +2119,7 @@ static my_bool write_block_record(MARIA_HA *info, } /* Write UNDO record */ - if (share->base.transactional) + if (share->now_transactional) { uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; @@ -2312,7 +2336,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) } } - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; @@ -2472,6 +2496,76 @@ err: /* + Delete a directory entry + + SYNOPSIS + delete_dir_entry() + buff Page buffer + block_size Block size + record_number Record number to delete + empty_space Empty space on page after delete + + RETURN + -1 Error on page + 0 ok + 1 Page is now empty +*/ + +static int delete_dir_entry(byte *buff, uint block_size, uint record_number, + uint *empty_space_res) +{ + uint number_of_records= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; + uint length, empty_space; + byte *dir; + DBUG_ENTER("delete_dir_entry"); + +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - + PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE)) + { + DBUG_PRINT("error", ("record_number: %u number_of_records: %u", + record_number, number_of_records)); + + DBUG_RETURN(-1); + } +#endif + + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + dir[0]= dir[1]= 0; /* Delete entry */ + length= uint2korr(dir + 2); + + if (record_number == number_of_records - 1) + { + /* Delete this entry and all following empty directory entries */ + byte *end= buff + block_size - PAGE_SUFFIX_SIZE; + do + { + number_of_records--; + dir+= DIR_ENTRY_SIZE; + empty_space+= DIR_ENTRY_SIZE; + } while (dir < end && dir[0] == 0 && dir[1] == 0); + buff[DIR_COUNT_OFFSET]= (byte) (uchar) number_of_records; + } + empty_space+= length; + if (number_of_records != 0) + { + /* Update directory */ + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED; + + *empty_space_res= empty_space; + DBUG_RETURN(0); + } + buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE; + *empty_space_res= block_size; + DBUG_RETURN(1); +} + + +/* Delete a head a tail part SYNOPSIS @@ -2493,11 +2587,12 @@ static my_bool delete_head_or_tail(MARIA_HA *info, my_bool head) { MARIA_SHARE *share= info->s; - uint number_of_records, empty_space, length; + uint empty_space; uint block_size= share->block_size; - byte *buff, *dir; + byte *buff; LSN lsn; MARIA_PINNED_PAGE page_link; + int res; DBUG_ENTER("delete_head_or_tail"); info->keyread_buff_used= 1; @@ -2511,60 +2606,30 @@ static my_bool delete_head_or_tail(MARIA_HA *info, page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; push_dynamic(&info->pinned_pages, (void*) &page_link); - number_of_records= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; -#ifdef SANITY_CHECKS - if (record_number >= number_of_records || - record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - - PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE)) - { - DBUG_PRINT("error", ("record_number: %u number_of_records: %u", - record_number, number_of_records)); + res= delete_dir_entry(buff, block_size, record_number, &empty_space); + if (res < 0) DBUG_RETURN(1); - } -#endif - - dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); - dir[0]= dir[1]= 0; /* Delete entry */ - length= uint2korr(dir + 2); - empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); - - if (record_number == number_of_records - 1) - { - /* Delete this entry and all following empty directory entries */ - byte *end= buff + block_size - PAGE_SUFFIX_SIZE; - do - { - number_of_records--; - dir+= DIR_ENTRY_SIZE; - empty_space+= DIR_ENTRY_SIZE; - } while (dir < end && dir[0] == 0 && dir[1] == 0); - buff[DIR_COUNT_OFFSET]= (byte) (uchar) number_of_records; - } - empty_space+= length; - if (number_of_records != 0) + if (res == 0) { uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - /* Update directory */ - int2store(buff + EMPTY_SPACE_OFFSET, empty_space); - buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED; - DBUG_ASSERT(share->pagecache->block_size == block_size); - - /* Log REDO data */ - page_store(log_data+ FILEID_STORE_SIZE, page); - dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, + if (info->s->now_transactional) + { + /* Log REDO data */ + page_store(log_data+ FILEID_STORE_SIZE, page); + dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, record_number); - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); - if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD : - LOGREC_REDO_PURGE_ROW_TAIL), - info->trn, share, sizeof(log_data), - TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) - DBUG_RETURN(1); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD : + LOGREC_REDO_PURGE_ROW_TAIL), + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + DBUG_RETURN(1); + } if (pagecache_write(share->pagecache, &info->dfile, page, 0, buff, share->page_type, @@ -2572,11 +2637,6 @@ static my_bool delete_head_or_tail(MARIA_HA *info, PAGECACHE_PIN_LEFT_PINNED, PAGECACHE_WRITE_DELAY, &page_link.link)) DBUG_RETURN(1); - - /* Change the lock used when we read the page */ - page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; - set_dynamic(&info->pinned_pages, (void*) &page_link, - info->pinned_pages.elements-1); } else { @@ -2584,19 +2644,36 @@ static my_bool delete_head_or_tail(MARIA_HA *info, PAGE_STORE_SIZE + PAGERANGE_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - pagerange_store(log_data + FILEID_STORE_SIZE, 1); - page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); - pagerange_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + - PAGE_STORE_SIZE, 1); - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); - if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, - info->trn, share, sizeof(log_data), - TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) + if (info->s->now_transactional) + { + pagerange_store(log_data + FILEID_STORE_SIZE, 1); + page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); + pagerange_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + + PAGE_STORE_SIZE, 1); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + DBUG_RETURN(1); + } + /* Write the empty page (needed only for REPAIR to work) */ + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, share->page_type, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, &page_link.link)) DBUG_RETURN(1); + DBUG_ASSERT(empty_space >= info->s->bitmap.sizes[0]); } + /* Change the lock used when we read the page */ + page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; + set_dynamic(&info->pinned_pages, (void*) &page_link, + info->pinned_pages.elements-1); + DBUG_PRINT("info", ("empty_space: %u", empty_space)); DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space)); } @@ -2660,7 +2737,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const byte *record) if (info->cur_row.extents && free_full_pages(info, &info->cur_row)) goto err; - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + @@ -2815,7 +2892,8 @@ static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, extent->extent+= ROW_EXTENT_SIZE; extent->page= uint5korr(extent->extent); page_count= uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE); - DBUG_ASSERT(page_count != 0); + if (!page_count) + goto crashed; extent->tail= page_count & TAIL_BIT; extent->page_count= (page_count & ~TAIL_BIT); extent->first_extent= 0; @@ -2838,7 +2916,8 @@ static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, if (!extent->tail) { /* Full data page */ - DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == BLOB_PAGE); + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE) + goto crashed; extent->page++; /* point to next page */ extent->page_count--; *end_of_data= buff + share->block_size; @@ -2847,7 +2926,8 @@ static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, } /* Found tail. page_count is in this case the position in the tail page */ - DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == TAIL_PAGE); + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != TAIL_PAGE) + goto crashed; *(extent->tail_positions++)= ma_recordpos(extent->page, extent->page_count); info->cur_row.tail_count++; /* For maria_chk */ @@ -2969,7 +3049,6 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, MARIA_COLUMNDEF *column, *end_column; DBUG_ENTER("_ma_read_block_record2"); - LINT_INIT(field_lengths); LINT_INIT(field_length_data); LINT_INIT(blob_buffer); @@ -3015,6 +3094,7 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, } extent.first_extent= 1; + field_lengths= 0; if (share->base.max_field_lengths) { get_key_length(field_lengths, data); @@ -3049,7 +3129,7 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, Read row extents (note that first extent was already read into info->cur_row.extents above) */ - if (row_extents) + if (row_extents > 1) { if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE, (row_extents - 1) * ROW_EXTENT_SIZE, @@ -3074,7 +3154,7 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, } /* Read array of field lengths. This may be stored in several extents */ - if (share->base.max_field_lengths) + if (field_lengths) { field_length_data= info->cur_row.field_lengths; if (read_long_data(info, field_length_data, field_lengths, &extent, @@ -3480,6 +3560,8 @@ restart_bitmap_scan: DBUG_PRINT("error", ("Wrong page header")); DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); } + DBUG_PRINT("info", ("Page %lu has %u rows", + (ulong) page, info->scan.number_of_rows)); info->scan.dir= (info->scan.page_buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); info->scan.dir_end= (info->scan.dir - @@ -3505,8 +3587,11 @@ restart_bitmap_scan: filepos= (my_off_t) info->scan.bitmap_page * block_size; if (unlikely(filepos >= info->state->data_file_length)) { + DBUG_PRINT("info", ("Found end of file")); DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE)); } + DBUG_PRINT("info", ("Reading bitmap at %lu", + (ulong) info->scan.bitmap_page)); if (!(pagecache_read(share->pagecache, &info->dfile, info->scan.bitmap_page, 0, info->scan.bitmap_buff, PAGECACHE_PLAIN_PAGE, @@ -4005,3 +4090,268 @@ static size_t fill_update_undo_parts(MARIA_HA *info, const byte *oldrec, row_length+= start_log_parts->length; DBUG_RETURN(row_length); } + +/*************************************************************************** + Applying of REDO log records +***************************************************************************/ + +/* + Apply LOGREC_REDO_INSERT_ROW_HEAD & LOGREC_REDO_INSERT_ROW_TAIL + + SYNOPSIS + _ma_apply_redo_insert_row_head_or_tail() + info Maria handler + lsn LSN to put on page + page_type HEAD_PAGE or TAIL_PAGE + header Header (without FILEID) + data Data to be put on page + data_length Length of data + + RETURN + 0 ok + # Error number +*/ + +uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header, + const byte *data, + size_t data_length) +{ + MARIA_SHARE *share= info->s; + ulonglong page; + uint rownr, empty_space; + uint block_size= share->block_size; + uint rec_offset; + byte *buff= info->keyread_buff, *dir; + DBUG_ENTER("_ma_apply_redo_insert_row_head"); + + info->keyread_buff_used= 1; + page= page_korr(header); + rownr= dirpos_korr(header+PAGE_STORE_SIZE); + + if (page * info->s->block_size > info->state->data_file_length) + { + /* New page at end of file */ + DBUG_ASSERT(rownr == 0); + if (rownr != 0) + goto err; + make_empty_page(buff, block_size, page_type); + empty_space= (block_size - PAGE_OVERHEAD_SIZE); + rec_offset= PAGE_HEADER_SIZE; + dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + + /* Update that file is extended */ + info->state->data_file_length= page * info->s->block_size; + } + else + { + uint max_entry; + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, + page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + if (lsn_korr(buff) >= lsn) + { + /* Already applied */ + + /* Fix bitmap, just in case */ + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); + } + + max_entry= (uint) ((uchar*) buff)[DIR_COUNT_OFFSET]; + if (((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != page_type)) + { + /* + This is a page that has been freed before and now should be + changed to new type. + */ + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE && + (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != UNALLOCATED_PAGE) + goto err; + make_empty_page(buff, block_size, page_type); + empty_space= (block_size - PAGE_OVERHEAD_SIZE); + rec_offset= PAGE_HEADER_SIZE; + dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + } + else + { + dir= (buff + block_size - DIR_ENTRY_SIZE * (rownr + 1) - + PAGE_SUFFIX_SIZE); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + if (max_entry >= rownr) + { + /* Add directory entry first in directory and data last on page */ + DBUG_ASSERT(max_entry == rownr); + if (max_entry != rownr) + goto err; + rec_offset= (uint2korr(dir + DIR_ENTRY_SIZE) + + uint2korr(dir + DIR_ENTRY_SIZE +2)); + if ((uint) (dir - buff) < rec_offset + data_length) + { + /* Create place for directory & data */ + compact_page(buff, block_size, max_entry - 1, 0); + rec_offset= (uint2korr(dir + DIR_ENTRY_SIZE) + + uint2korr(dir + DIR_ENTRY_SIZE +2)); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + DBUG_ASSERT(!((uint) (dir - buff) < rec_offset + data_length)); + if ((uint) (dir - buff) < rec_offset + data_length) + goto err; + } + buff[DIR_COUNT_OFFSET]= (byte) (uchar) max_entry+1; + int2store(dir, rec_offset); + empty_space-= DIR_ENTRY_SIZE; + } + else + { + /* reuse old empty entry */ + byte *pos, *end, *end_data; + DBUG_ASSERT(uint2korr(dir) == 0); + if (uint2korr(dir)) + goto err; /* Should have been empty */ + + /* Find start of where we can put data */ + end= (buff + block_size - DIR_ENTRY_SIZE * max_entry - + PAGE_SUFFIX_SIZE); + for (pos= dir ; pos >= end ; pos-= DIR_ENTRY_SIZE) + { + if ((rec_offset= uint2korr(pos))) + { + rec_offset+= uint2korr(pos+2); + break; + } + } + DBUG_ASSERT(pos >= end); + if (pos < end) /* Wrong directory */ + goto err; + + /* find end data */ + end_data= end; /* Start of directory */ + end= (buff + block_size - PAGE_SUFFIX_SIZE); + for (pos= dir ; pos < end ; pos+= DIR_ENTRY_SIZE) + { + uint offset; + if ((offset= uint2korr(pos))) + { + end_data= buff + offset; + break; + } + } + if ((uint) (end_data - (buff + rec_offset)) < data_length) + { + uint length; + /* Not enough continues space, compact page to get more */ + int2store(dir, rec_offset); + compact_page(buff, block_size, rownr, 1); + rec_offset= uint2korr(dir); + length= uint2korr(dir+2); + DBUG_ASSERT(length >= data_length); + if (length < data_length) + goto err; + empty_space= length; + } + } + } + } + /* Copy data */ + int2store(dir+2, data_length); + memcpy(buff + rec_offset, data, data_length); + empty_space-= data_length; + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + + /* Write modified page */ + lsn_store(buff, lsn); + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0)) + DBUG_RETURN(my_errno); + + /* Fix bitmap */ + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + + DBUG_RETURN(0); + +err: + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); +} + + +/* + Apply LOGREC_REDO_PURGE_ROW_HEAD & LOGREC_REDO_PURGE_ROW_TAIL + + SYNOPSIS + _ma_apply_redo_purge_row_head_or_tail() + info Maria handler + lsn LSN to put on page + page_type HEAD_PAGE or TAIL_PAGE + header Header (without FILEID) + data Data to be put on page + data_length Length of data + + NOTES + This function is very similar to delete_head_or_tail() + + RETURN + 0 ok + # Error number +*/ + +uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header) +{ + MARIA_SHARE *share= info->s; + ulonglong page; + uint record_number, empty_space; + uint block_size= share->block_size; + byte *buff= info->keyread_buff; + DBUG_ENTER("_ma_apply_redo_purge_row_head_or_tail"); + + info->keyread_buff_used= 1; + page= page_korr(header); + record_number= dirpos_korr(header+PAGE_STORE_SIZE); + + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, + page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (byte) page_type); + + if (lsn_korr(buff) >= lsn) + { + /* Already applied */ + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); + } + + if (delete_dir_entry(buff, block_size, record_number, &empty_space) < 0) + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); + + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0)) + DBUG_RETURN(my_errno); + + /* This will work even if the page was marked as UNALLOCATED_PAGE */ + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + + DBUG_RETURN(0); +} diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index 819d1c2e4d2..0ed0898859c 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -178,3 +178,11 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, ulonglong page, uint *bitmap_pattern); void _ma_bitmap_delete_all(MARIA_SHARE *share); +uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header, + const byte *data, + size_t data_length); +uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index cd10e87325c..88198892985 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -42,7 +42,6 @@ #include "ma_ftdefs.h" #include <myisamchk.h> -#include <m_ctype.h> #include <stdarg.h> #include <my_getopt.h> #ifdef HAVE_SYS_VADVISE_H @@ -87,6 +86,13 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, static ha_checksum maria_byte_checksum(const byte *buf, uint length); static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share); static void restore_data_file_type(MARIA_SHARE *share); +static void change_data_file_descriptor(MARIA_HA *info, File new_file); +static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info, + MARIA_HA *info, byte *record); +static void copy_data_file_state(MARIA_STATE_INFO *to, + MARIA_STATE_INFO *from); +static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info); + void maria_chk_init(HA_CHECK *param) { @@ -838,7 +844,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, } } (*key_checksum)+= maria_byte_checksum((byte*) key, - key_length- info->s->rec_reflength); + key_length- info->s->rec_reflength); record= _ma_dpos(info,0,key+key_length); if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */ { @@ -1263,18 +1269,21 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, } else { - info->cur_row.checksum= _ma_checksum(info,record); + ha_checksum checksum= 0; + if (info->s->calc_checksum) + checksum= (*info->s->calc_checksum)(info, record); + if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE)) { if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len, - test(info->s->calc_checksum))) + test(info->s->calc_checksum), checksum)) { _ma_check_print_error(param,"Found wrong packed record at %s", llstr(start_recpos,llbuff)); got_error= 1; } } - param->glob_crc+= info->cur_row.checksum; + param->glob_crc+= checksum; } if (! got_error) @@ -1507,8 +1516,11 @@ static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, byte *record, } if (info->s->calc_checksum) { - info->cur_row.checksum= _ma_checksum(info, record); - param->glob_crc+= info->cur_row.checksum; + ha_checksum checksum= (*info->s->calc_checksum)(info, record); + if (info->cur_row.checksum != (checksum & 255)) + _ma_check_print_error(param, "Page %9s: Row %3d has wrong checksum", + llstr(page_pos, llbuff), row); + param->glob_crc+= checksum; } if (info->cur_row.extents_count) { @@ -1572,6 +1584,8 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, my_bool full_dir; uint offset_page, offset; + LINT_INIT(full_dir); + if (_ma_scan_init_block_record(info)) { _ma_check_print_error(param, "got error %d when initializing scan", @@ -1649,13 +1663,12 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, llstr(pos, llbuff), page_type); if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) goto err; + continue; } switch ((enum en_page_type) page_type) { case UNALLOCATED_PAGE: case MAX_PAGE_TYPE: - DBUG_PRINT("warning", - ("Found page with wrong page type: %d", page_type)); - DBUG_ASSERT(0); + DBUG_ASSERT(0); /* Impossible */ break; case HEAD_PAGE: row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET]; @@ -1908,13 +1921,32 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) } /* maria_chk_data_link */ - /* Recover old table by reading each record and writing all keys */ - /* Save new datafile-name in temp_filename */ +/* + Recover old table by reading each record and writing all keys + + NOTES + Save new datafile-name in temp_filename. + We overwrite the index file as we go (writekeys() for example), so if we + crash during this the table is unusable and user (or Recovery in the + future) must repeat the REPAIR/OPTIMIZE operation. We could use a + temporary index file in the future (drawback: more disk space). + + IMPLEMENTATION (for hard repair with block format) + - Create new, unrelated MARIA_HA of the table + - Create new datafile and associate it with new handler + - Reset all statistic information in new handler + - Copy all data to new handler with normal write operations + - Move state of new handler to old handler + - Close new handler + - Close data file in old handler + - Rename old data file to new data file. + - Reopen data file in old handler +*/ int maria_repair(HA_CHECK *param, register MARIA_HA *info, my_string name, int rep_quick) { - int error,got_error; + int error, got_error= 1; uint i; ha_rows start_records,new_header_length; my_off_t del; @@ -1923,6 +1955,10 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, char llbuff[22],llbuff2[22]; MARIA_SORT_INFO sort_info; MARIA_SORT_PARAM sort_param; + my_bool block_record, scan_inited= 0; + enum data_file_type org_data_file_type= info->s->data_file_type; + myf sync_dir= ((share->now_transactional && !share->temporary) ? + MY_SYNC_DIR : 0); DBUG_ENTER("maria_repair"); bzero((char *)&sort_info, sizeof(sort_info)); @@ -1930,9 +1966,11 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, start_records=info->state->records; new_header_length=(param->testflag & T_UNPACK) ? 0L : share->pack.header_length; - got_error=1; new_file= -1; sort_param.sort_info=&sort_info; + block_record= org_data_file_type == BLOCK_RECORD; + sort_info.info= sort_info.new_info= info; + bzero(&info->rec_cache,sizeof(info->rec_cache)); if (!(param->testflag & T_SILENT)) { @@ -1944,28 +1982,6 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; - if (init_io_cache(¶m->read_cache, info->dfile.file, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME))) - { - bzero(&info->rec_cache,sizeof(info->rec_cache)); - goto err; - } - if (!rep_quick) - if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, - WRITE_CACHE, new_header_length, 1, - MYF(MY_WME | MY_WAIT_IF_FULL))) - goto err; - info->opt_flag|=WRITE_CACHE_USED; - if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, - MYF(0))) || - _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, - info->s->base.default_rec_buff_size)) - { - _ma_check_print_error(param, "Not enough memory for extra record"); - goto err; - } - if (!rep_quick) { /* Get real path for data file */ @@ -1984,11 +2000,79 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, new_header_length, "datafile-header")) goto err; info->s->state.dellink= HA_OFFSET_ERROR; - info->rec_cache.file=new_file; - if (param->testflag & T_UNPACK) - restore_data_file_type(share); + info->rec_cache.file= new_file; + if (share->data_file_type == BLOCK_RECORD || + ((param->testflag & T_UNPACK) && + share->state.header.org_data_file_type == BLOCK_RECORD)) + { + MARIA_HA *new_info; + /** + @todo RECOVERY it's a bit worrying to have two MARIA_SHARE on the + same index file: + - Checkpoint will see them as two tables + - are we sure that new_info never flushes an in-progress state + to the index file? And how to prevent Checkpoint from doing that? + - in the close future maria_close() will write the state... + */ + if (!(sort_info.new_info= maria_open(info->s->open_file_name, O_RDWR, + HA_OPEN_COPY | HA_OPEN_FOR_REPAIR))) + goto err; + new_info= sort_info.new_info; + change_data_file_descriptor(new_info, new_file); + maria_lock_database(new_info, F_EXTRA_LCK); + if ((param->testflag & T_UNPACK) && + share->data_file_type == COMPRESSED_RECORD) + { + (*new_info->s->once_end)(new_info->s); + (*new_info->s->end)(new_info); + restore_data_file_type(new_info->s); + _ma_setup_functions(new_info->s); + if ((*new_info->s->once_init)(new_info->s, new_file) || + (*new_info->s->init)(new_info)) + goto err; + } + _ma_reset_status(sort_info.new_info); + if (_ma_initialize_data_file(sort_info.new_info->s, new_file)) + goto err; + block_record= 1; + } + } + + if (org_data_file_type != BLOCK_RECORD) + { + /* We need a read buffer to read rows in big blocks */ + if (init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE, share->pack.header_length, 1, MYF(MY_WME))) + goto err; } - sort_info.info=info; + if (sort_info.new_info->s->data_file_type != BLOCK_RECORD) + { + /* When writing to not block records, we need a write buffer */ + if (!rep_quick) + if (init_io_cache(&info->rec_cache, new_file, + (uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + } + else + { + scan_inited= 1; + if (maria_scan_init(sort_info.info)) + goto err; + } + + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + sort_info.param = param; sort_param.read_cache=param->read_cache; sort_param.pos=sort_param.max_pos=share->pack.header_length; @@ -2031,9 +2115,14 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, maria_lock_memory(param); /* Everything is alloced */ + sort_info.org_data_file_type= info->s->data_file_type; + /* Re-create all keys, which are set in key_map. */ while (!(error=sort_get_next_record(&sort_param))) { + if (block_record && _ma_sort_write_record(&sort_param)) + goto err; + if (writekeys(&sort_param)) { if (my_errno != HA_ERR_FOUND_DUPP_KEY) @@ -2059,7 +2148,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, } continue; } - if (_ma_sort_write_record(&sort_param)) + + if (!block_record && _ma_sort_write_record(&sort_param)) goto err; } if (error > 0 || maria_write_data_suffix(&sort_info, (my_bool)!rep_quick) || @@ -2082,35 +2172,59 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, { _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); _ma_check_print_error(param,"Run recovery again without -q"); - got_error=1; param->retry_repair=1; param->testflag|=T_RETRY_WITHOUT_QUICK; goto err; } + if (param->testflag & T_SAFE_REPAIR) { /* Don't repair if we loosed more than one row */ - if (info->state->records+1 < start_records) + if (sort_info.new_info->state->records+1 < start_records) { info->state->records=start_records; - got_error=1; goto err; } } if (!rep_quick) { - my_close(info->dfile.file, MYF(0)); - info->dfile.file= new_file; - info->state->data_file_length=sort_param.filepos; + if (sort_info.new_info != sort_info.info) + { + MARIA_STATE_INFO save_state= sort_info.new_info->s->state; + if (maria_close(sort_info.new_info)) + { + _ma_check_print_error(param, "Got error %d on close", my_errno); + goto err; + } + copy_data_file_state(&info->s->state, &save_state); + new_file= -1; + } + else + info->state->data_file_length= sort_param.filepos; share->state.version=(ulong) time((time_t*) 0); /* Force reopen */ + + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + my_close(new_file, MYF(MY_WME)); + my_close(info->dfile.file, MYF(MY_WME)); + info->dfile.file= new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) | + sync_dir) || + _ma_open_datafile(info, share, -1)) + { + goto err; + } } else { - info->state->data_file_length=sort_param.max_pos; + info->state->data_file_length= sort_param.max_pos; } if (param->testflag & T_CALC_CHECKSUM) - info->state->checksum=param->glob_crc; + info->state->checksum= param->glob_crc; if (!(param->testflag & T_SILENT)) { @@ -2122,35 +2236,25 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, llstr(sort_info.dupp,llbuff)); } - got_error=0; + got_error= sync_dir ? write_log_record_for_repair(param, info) : 0; /* If invoked by external program that uses thr_lock */ if (&share->state.state != info->state) memcpy( &share->state.state, info->state, sizeof(*info->state)); err: - if (!got_error) - { - /* Replace the actual file with the temporary file */ - if (new_file >= 0) - { - myf sync_dir= (share->base.transactional && !share->temporary) ? - MY_SYNC_DIR : 0; - my_close(new_file,MYF(0)); - info->dfile.file= new_file= -1; - if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, - DATA_TMP_EXT, - MYF((param->testflag & T_BACKUP_DATA ? - MY_REDEL_MAKE_BACKUP : 0) | - sync_dir)) || - _ma_open_datafile(info,share,-1)) - got_error=1; - } - } + if (scan_inited) + maria_scan_end(sort_info.info); + if (got_error) { if (! param->error_printed) _ma_check_print_error(param,"%d for record at pos %s",my_errno, llstr(sort_param.start_recpos,llbuff)); + if (sort_info.new_info && sort_info.new_info != sort_info.info) + { + sort_info.new_info->dfile.file= -1; + maria_close(sort_info.new_info); + } if (new_file >= 0) { VOID(my_close(new_file,MYF(0))); @@ -2333,7 +2437,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) int old_lock; MARIA_SHARE *share=info->s; MARIA_STATE_INFO old_state; - myf sync_dir= (share->base.transactional && !share->temporary) ? + myf sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; DBUG_ENTER("maria_sort_index"); @@ -2602,7 +2706,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, uint i; ulong length; ha_rows start_records; - my_off_t new_header_length,del; + my_off_t new_header_length, org_header_length, del; File new_file; MARIA_SORT_PARAM sort_param; MARIA_SHARE *share=info->s; @@ -2611,15 +2715,19 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, char llbuff[22]; MARIA_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; - myf sync_dir= (share->base.transactional && !share->temporary) ? - MY_SYNC_DIR : 0; + myf sync_dir= ((share->now_transactional && !share->temporary) ? + MY_SYNC_DIR : 0); DBUG_ENTER("maria_repair_by_sort"); + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + start_records=info->state->records; got_error=1; new_file= -1; - new_header_length=(param->testflag & T_UNPACK) ? 0 : - share->pack.header_length; + org_header_length= share->pack.header_length; + new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length; + if (!(param->testflag & T_SILENT)) { printf("- recovering (with sort) MARIA-table '%s'\n",name); @@ -2630,15 +2738,13 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; - bzero((char*)&sort_info,sizeof(sort_info)); - bzero((char *)&sort_param, sizeof(sort_param)); if (!(sort_info.key_block= alloc_key_blocks(param, (uint) param->sort_key_blocks, - share->base.max_key_block_length)) - || init_io_cache(¶m->read_cache, info->dfile.file, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || + share->base.max_key_block_length)) || + init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE, org_header_length, 1, MYF(MY_WME)) || (! rep_quick && init_io_cache(&info->rec_cache, info->dfile.file, (uint) param->write_buffer_length, @@ -2648,6 +2754,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; info->opt_flag|=WRITE_CACHE_USED; info->rec_cache.file= info->dfile.file; /* for sort_delete_record */ + sort_info.org_data_file_type= info->s->data_file_type; if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, MYF(0))) || @@ -2703,8 +2810,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, key_map= ~key_map; /* Create the missing keys */ } - sort_info.info=info; - sort_info.param = param; + sort_info.info= sort_info.new_info= info; + sort_info.param= param; set_data_file_type(&sort_info, share); sort_param.filepos=new_header_length; @@ -2716,9 +2823,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, sort_param.wordlist=NULL; init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); - if (share->data_file_type == DYNAMIC_RECORD) + if (sort_info.org_data_file_type == DYNAMIC_RECORD) length=max(share->base.min_pack_length+1,share->base.min_block_length); - else if (share->data_file_type == COMPRESSED_RECORD) + else if (sort_info.org_data_file_type == COMPRESSED_RECORD) length=share->base.min_block_length; else length=share->base.pack_reclength; @@ -2756,7 +2863,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if ((!(param->testflag & T_SILENT))) printf ("- Fixing index %d\n",sort_param.key+1); - sort_param.max_pos=sort_param.pos=share->pack.header_length; + sort_param.max_pos= sort_param.pos= org_header_length; keyseg=sort_param.seg; bzero((char*) sort_param.unique,sizeof(sort_param.unique)); sort_param.key_length=share->rec_reflength; @@ -2854,8 +2961,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, share->state.version=(ulong) time((time_t*) 0); my_close(info->dfile.file, MYF(0)); info->dfile.file= new_file; - share->data_file_type=sort_info.new_data_file_type; - share->pack.header_length=(ulong) new_header_length; + share->data_file_type= sort_info.new_data_file_type; + org_header_length= (ulong) new_header_length; + sort_info.org_data_file_type= info->s->data_file_type; sort_param.fix_datafile=0; } else @@ -2883,11 +2991,11 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (rep_quick & T_FORCE_UNIQUENESS) { - my_off_t skr=info->state->data_file_length+ - (share->options & HA_OPTION_COMPRESS_RECORD ? - MEMMAP_EXTRA_MARGIN : 0); + my_off_t skr= (info->state->data_file_length + + (sort_info.org_data_file_type == COMPRESSED_RECORD) ? + MEMMAP_EXTRA_MARGIN : 0); #ifdef USE_RELOC - if (share->data_file_type == STATIC_RECORD && + if (sort_info.org_data_file_type == STATIC_RECORD && skr < share->base.reloc*share->base.min_pack_length) skr=share->base.reloc*share->base.min_pack_length; #endif @@ -3032,7 +3140,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, MARIA_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; pthread_attr_t thr_attr; - myf sync_dir= (share->base.transactional && !share->temporary) ? + myf sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; DBUG_ENTER("maria_repair_parallel"); @@ -3085,6 +3193,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); pthread_cond_init(&sort_info.cond, 0); + sort_info.org_data_file_type= info->s->data_file_type; + if (!(sort_info.key_block= alloc_key_blocks(param, (uint) param->sort_key_blocks, share->base.max_key_block_length)) || @@ -3152,8 +3262,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, key_map= ~key_map; /* Create the missing keys */ } - sort_info.info=info; - sort_info.param = param; + sort_info.info= sort_info.new_info= info; + sort_info.param= param; set_data_file_type(&sort_info, share); sort_info.dupp=0; @@ -3161,9 +3271,9 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, param->read_cache.end_of_file=sort_info.filelength= my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); - if (share->data_file_type == DYNAMIC_RECORD) + if (sort_info.org_data_file_type == DYNAMIC_RECORD) rec_length=max(share->base.min_pack_length+1,share->base.min_block_length); - else if (share->data_file_type == COMPRESSED_RECORD) + else if (sort_info.org_data_file_type == COMPRESSED_RECORD) rec_length=share->base.min_block_length; else rec_length=share->base.pack_reclength; @@ -3379,8 +3489,6 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, */ my_close(info->dfile.file, MYF(0)); info->dfile.file= new_file; - - share->data_file_type=sort_info.new_data_file_type; share->pack.header_length=(ulong) new_header_length; } else @@ -3397,11 +3505,11 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (rep_quick & T_FORCE_UNIQUENESS) { - my_off_t skr=info->state->data_file_length+ - (share->options & HA_OPTION_COMPRESS_RECORD ? - MEMMAP_EXTRA_MARGIN : 0); + my_off_t skr= (info->state->data_file_length + + (sort_info.org_data_file_type == COMPRESSED_RECORD) ? + MEMMAP_EXTRA_MARGIN : 0); #ifdef USE_RELOC - if (share->data_file_type == STATIC_RECORD && + if (sort_info.org_data_file_type == STATIC_RECORD && skr < share->base.reloc*share->base.min_pack_length) skr=share->base.reloc*share->base.min_pack_length; #endif @@ -3587,27 +3695,28 @@ static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key) sort_get_next_record() sort_param Information about and for the sort process - NOTE - + NOTES Dynamic Records With Non-Quick Parallel Repair - For non-quick parallel repair we use a synchronized read/write - cache. This means that one thread is the master who fixes the data - file by reading each record from the old data file and writing it - to the new data file. By doing this the records in the new data - file are written contiguously. Whenever the write buffer is full, - it is copied to the read buffer. The slaves read from the read - buffer, which is not associated with a file. Thus read_cache.file - is -1. When using _mi_read_cache(), the slaves must always set - flag to READING_NEXT so that the function never tries to read from - file. This is safe because the records are contiguous. There is no - need to read outside the cache. This condition is evaluated in the - variable 'parallel_flag' for quick reference. read_cache.file must - be >= 0 in every other case. + For non-quick parallel repair we use a synchronized read/write + cache. This means that one thread is the master who fixes the data + file by reading each record from the old data file and writing it + to the new data file. By doing this the records in the new data + file are written contiguously. Whenever the write buffer is full, + it is copied to the read buffer. The slaves read from the read + buffer, which is not associated with a file. Thus read_cache.file + is -1. When using _mi_read_cache(), the slaves must always set + flag to READING_NEXT so that the function never tries to read from + file. This is safe because the records are contiguous. There is no + need to read outside the cache. This condition is evaluated in the + variable 'parallel_flag' for quick reference. read_cache.file must + be >= 0 in every other case. RETURN -1 end of file 0 ok + sort_param->filepos points to record position. + sort_param->record contains record > 0 error */ @@ -3628,10 +3737,61 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (*_ma_killed_ptr(param)) DBUG_RETURN(1); - switch (share->data_file_type) { + switch (sort_info->org_data_file_type) { case BLOCK_RECORD: - DBUG_ASSERT(0); + { + for (;;) + { + int flag; + + if (info != sort_info->new_info) + { + /* Safe scanning */ + flag= _ma_safe_scan_block_record(sort_info, info, + sort_param->record); + } + else + { + /* Scan on clean table */ + flag= _ma_scan_block_record(info, sort_param->record, + info->cur_row.nextpos, 1); + } + if (!flag) + { + if (sort_param->calc_checksum) + { + ha_checksum checksum; + checksum= (*info->s->calc_check_checksum)(info, sort_param->record); + if (info->s->calc_checksum && + info->cur_row.checksum != (checksum & 255)) + { + if (param->testflag & T_VERBOSE) + { + char llbuff[22]; + record_pos_to_txt(info, sort_param->filepos, llbuff); + _ma_check_print_info(param, + "Found record with wrong checksum at %s", + llbuff); + } + continue; + } + info->cur_row.checksum= checksum; + param->glob_crc+= checksum; + } + sort_param->filepos= info->cur_row.lastpos; + DBUG_RETURN(0); + } + if (flag == HA_ERR_END_OF_FILE) + { + sort_param->max_pos= sort_info->filelength; + DBUG_RETURN(-1); + } + /* Retry only if wrong record, not if disk error */ + if (flag != HA_ERR_WRONG_IN_RECORD) + DBUG_RETURN(flag); + } break; + } case STATIC_RECORD: for (;;) { @@ -3669,6 +3829,8 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) { byte *to; LINT_INIT(to); + ha_checksum checksum= 0; + pos=sort_param->pos; searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; @@ -3938,14 +4100,14 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (sort_param->read_cache.error < 0) DBUG_RETURN(1); if (sort_param->calc_checksum) - info->cur_row.checksum= _ma_checksum(info, sort_param->record); + checksum= (info->s->calc_check_checksum)(info, sort_param->record); if ((param->testflag & (T_EXTEND | T_REP)) || searching) { if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff, sort_param->find_length, (param->testflag & T_QUICK) && sort_param->calc_checksum && - test(info->s->calc_checksum))) + test(info->s->calc_checksum), checksum)) { _ma_check_print_info(param,"Found wrong packed record at %s", llstr(sort_param->start_recpos,llbuff)); @@ -3953,7 +4115,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } } if (sort_param->calc_checksum) - param->glob_crc+= info->cur_row.checksum; + param->glob_crc+= checksum; DBUG_RETURN(0); } if (!searching) @@ -4027,8 +4189,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (sort_param->calc_checksum) { - info->cur_row.checksum= (*info->s->calc_checksum)(info, - sort_param->record); + info->cur_row.checksum= (*info->s->calc_check_checksum)(info, + sort_param-> + record); param->glob_crc+= info->cur_row.checksum; } DBUG_RETURN(0); @@ -4061,8 +4224,8 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) byte *from; byte block_buff[8]; MARIA_SORT_INFO *sort_info=sort_param->sort_info; - HA_CHECK *param=sort_info->param; - MARIA_HA *info=sort_info->info; + HA_CHECK *param= sort_info->param; + MARIA_HA *info= sort_info->new_info; MARIA_SHARE *share=info->s; DBUG_ENTER("_ma_sort_write_record"); @@ -4070,7 +4233,11 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) { switch (sort_info->new_data_file_type) { case BLOCK_RECORD: - DBUG_ASSERT(0); + if ((sort_param->filepos= (*share->write_record_init)(info, + sort_param-> + record)) == + HA_OFFSET_ERROR) + DBUG_RETURN(1); break; case STATIC_RECORD: if (my_b_write(&info->rec_cache,sort_param->record, @@ -4103,7 +4270,9 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER); } /* We can use info->checksum here as only one thread calls this */ - info->cur_row.checksum= _ma_checksum(info,sort_param->record); + info->cur_row.checksum= (*info->s->calc_check_checksum)(info, + sort_param-> + record); reclength= _ma_rec_pack(info,from,sort_param->record); flag=0; @@ -4160,7 +4329,7 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) } /* _ma_sort_write_record */ - /* Compare two keys from _ma_create_index_by_sort */ +/* Compare two keys from _ma_create_index_by_sort */ static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, const void *b) @@ -4518,7 +4687,8 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) } } if (sort_param->calc_checksum) - param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); + param->glob_crc-=(*info->s->calc_check_checksum)(info, + sort_param->record); } error= (flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info, sort_param->record)); @@ -4527,7 +4697,8 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) DBUG_RETURN(error); } /* sort_delete_record */ - /* Fix all pending blocks and flush everything to disk */ + +/* Fix all pending blocks and flush everything to disk */ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) { @@ -4799,9 +4970,9 @@ end: int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile) { - MARIA_HA *info=sort_info->info; + MARIA_HA *info=sort_info->new_info; - if (info->s->options & HA_OPTION_COMPRESS_RECORD && fix_datafile) + if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile) { char buff[MEMMAP_EXTRA_MARGIN]; bzero(buff,sizeof(buff)); @@ -5114,6 +5285,9 @@ my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, */ if (! maria_is_any_key_active(key_map)) return FALSE; /* Can't use sort */ + /* QQ: Remove this when maria_repair_by_sort() works with block format */ + if (info->s->data_file_type == BLOCK_RECORD) + return FALSE; for (i=0 ; i < share->base.keys ; i++,key++) { if (!force && maria_too_big_key_for_sort(key,rows)) @@ -5132,7 +5306,8 @@ set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share) MARIA_SHARE tmp; sort_info->new_data_file_type= share->state.header.org_data_file_type; /* Set delete_function for sort_delete_record() */ - memcpy((char*) &tmp, share, sizeof(*share)); + tmp= *share; + tmp.state.header.data_file_type= tmp.state.header.org_data_file_type; tmp.options= ~HA_OPTION_COMPRESS_RECORD; _ma_setup_functions(&tmp); share->delete_record=tmp.delete_record; @@ -5145,11 +5320,166 @@ static void restore_data_file_type(MARIA_SHARE *share) mi_int2store(share->state.header.options,share->options); share->state.header.data_file_type= share->state.header.org_data_file_type; - share->data_file_type= share->state.header.data_file_type= + share->data_file_type= share->state.header.data_file_type; share->pack.header_length= 0; } +static void change_data_file_descriptor(MARIA_HA *info, File new_file) +{ + my_close(info->dfile.file, MYF(0)); + info->dfile.file= info->s->bitmap.file.file= new_file; +} + + +/* + Copy all states that has to do with the data file + + NOTES + This is done to copy the state from the data file generated from + repair to the original handler +*/ + +static void copy_data_file_state(MARIA_STATE_INFO *to, + MARIA_STATE_INFO *from) +{ + to->state.records= from->state.records; + to->state.del= from->state.del; + to->state.empty= from->state.empty; + to->state.data_file_length= from->state.data_file_length; + to->split= from->split; + to->dellink= from->dellink; + to->first_bitmap_with_space= from->first_bitmap_with_space; +} + + +/* + Read 'safely' next record while scanning table. + + SYNOPSIS + _ma_safe_scan_block_record() + info Maria handler + record Store found here + + NOTES + - One must have called mi_scan() before this + + Differences compared to _ma_scan_block_records() are: + - We read all blocks, not only blocks marked by the bitmap to be safe + - In case of errors, next read will read next record. + - More sanity checks + + RETURN + 0 ok + HA_ERR_END_OF_FILE End of file + # error number +*/ + + +static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info, + MARIA_HA *info, byte *record) +{ + uint record_pos= info->cur_row.nextpos; + ulonglong page= sort_info->page; + DBUG_ENTER("_ma_safe_scan_block_record"); + + for (;;) + { + /* Find next row in current page */ + if (likely(record_pos < info->scan.number_of_rows)) + { + uint length, offset; + byte *data, *end_of_data; + char llbuff[22]; + + while (!(offset= uint2korr(info->scan.dir))) + { + info->scan.dir-= DIR_ENTRY_SIZE; + record_pos++; + if (info->scan.dir < info->scan.dir_end) + { + _ma_check_print_info(sort_info->param, + "Wrong directory on page: %s", + llstr(page, llbuff)); + goto read_next_page; + } + } + /* found row */ + info->cur_row.lastpos= info->scan.row_base_page + record_pos; + info->cur_row.nextpos= record_pos + 1; + data= info->scan.page_buff + offset; + length= uint2korr(info->scan.dir + 2); + end_of_data= data + length; + info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */ + + if (end_of_data > info->scan.dir_end || + offset < PAGE_HEADER_SIZE || length < info->s->base.min_block_length) + { + _ma_check_print_info(sort_info->param, + "Wrong directory entry %3u at page %s", + record_pos, llstr(page, llbuff)); + record_pos++; + continue; + } + else + { + DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); + } + } + +read_next_page: + /* Read until we find next head page */ + for (;;) + { + uint page_type; + char llbuff[22]; + + sort_info->page++; /* In case of errors */ + page++; + if (!(page % info->s->bitmap.pages_covered)) + page++; /* Skip bitmap */ + if ((page + 1) * info->s->block_size > sort_info->filelength) + DBUG_RETURN(HA_ERR_END_OF_FILE); + if (!(pagecache_read(info->s->pagecache, + &info->dfile, + page, 0, info->scan.page_buff, + PAGECACHE_READ_UNKNOWN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + + page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] & + PAGE_TYPE_MASK); + if (page_type == HEAD_PAGE) + { + if ((info->scan.number_of_rows= + (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0) + break; + _ma_check_print_info(sort_info->param, + "Wrong head page at %s", + llstr(page * info->s->block_size, llbuff)); + } + else if (page_type >= MAX_PAGE_TYPE) + { + _ma_check_print_info(sort_info->param, + "Found wrong page type: %d at %s", + page_type, llstr(page * info->s->block_size, + llbuff)); + } + } + + /* New head page */ + info->scan.dir= (info->scan.page_buff + info->s->block_size - + PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); + info->scan.dir_end= (info->scan.dir - + (info->scan.number_of_rows - 1) * + DIR_ENTRY_SIZE); + info->scan.row_base_page= ma_recordpos(page, 0); + record_pos= 0; + } +} + + /** @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn @@ -5170,11 +5500,10 @@ static void restore_data_file_type(MARIA_SHARE *share) @retval 1 error (disk problem) */ -int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) +static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info) { MARIA_SHARE *share= info->s; - /* Only called from ha_maria.cc, not maria_check, so translog is inited */ - if (share->base.transactional && !share->temporary) + if (translog_inited) /* test it in case this is maria_chk */ { /* For now this record is only informative. It could serve when applying @@ -5218,8 +5547,8 @@ int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) made durable earlier (MY_SYNC_DIR passed to maria_change_to_newfile()). */ DBUG_ASSERT(info->dfile.file >= 0); - return _ma_update_create_rename_lsn_on_disk(share, FALSE) || - _ma_sync_table_files(info); + return (_ma_update_create_rename_lsn_on_disk(share, FALSE) || + _ma_sync_table_files(info)); } return 0; } diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index fdee50f6fde..4fec7359d66 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -108,7 +108,8 @@ int maria_close(register MARIA_HA *info) } } #endif - my_free((gptr) info->s,MYF(0)); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + my_free((gptr) share, MYF(0)); } pthread_mutex_unlock(&THR_LOCK_maria); if (info->ftparser_param) @@ -122,8 +123,6 @@ int maria_close(register MARIA_HA *info) my_free((gptr) info,MYF(0)); if (error) - { - DBUG_RETURN(my_errno=error); - } + DBUG_RETURN(my_errno= error); DBUG_RETURN(0); } /* maria_close */ diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index fa4ec442e41..d6c121b21be 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -18,7 +18,7 @@ First version written by Guilhem Bichot on 2006-04-27. */ -#define CONTROL_FILE_BASE_NAME "maria_control" +#define CONTROL_FILE_BASE_NAME "maria_log_control" /* Here is the interface of this module */ diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 8ad8f0564d7..2098d7119eb 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -259,7 +259,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, } share.base.null_bytes= ci->null_bytes; share.base.original_null_bytes= ci->null_bytes; - share.base.transactional= ci->transactional; + share.base.born_transactional= ci->transactional; share.base.max_field_lengths= max_field_lengths; share.base.field_offsets= 0; /* for future */ @@ -738,7 +738,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, (MY_UNPACK_FILENAME | (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | MY_APPEND_EXT); - linkname_ptr= NULL; + linkname_ptr= NullS; /* Replace the current file. Don't sync dir now if the data file has the same path. @@ -1007,7 +1007,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, { fn_format(filename,name,"", MARIA_NAME_DEXT, MY_UNPACK_FILENAME | MY_APPEND_EXT); - linkname_ptr= NULL; + linkname_ptr= NullS; create_flag=MY_DELETE_OLD; } if ((dfile= @@ -1016,7 +1016,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, goto err; errpos=3; - if (_ma_initialize_data_file(dfile, &share)) + if (_ma_initialize_data_file(&share, dfile)) goto err; } @@ -1155,7 +1155,7 @@ static int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr) /* Initialize data file */ -int _ma_initialize_data_file(File dfile, MARIA_SHARE *share) +int _ma_initialize_data_file(MARIA_SHARE *share, File dfile) { if (share->data_file_type == BLOCK_RECORD) { diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index 3e531b518f8..42e7fb3c2f9 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -31,9 +31,7 @@ int maria_delete_all_rows(MARIA_HA *info) { - uint i; MARIA_SHARE *share=info->s; - MARIA_STATE_INFO *state=&share->state; my_bool log_record; DBUG_ENTER("maria_delete_all_rows"); @@ -48,7 +46,7 @@ int maria_delete_all_rows(MARIA_HA *info) */ if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); - log_record= share->base.transactional && !share->temporary; + log_record= share->now_transactional && !share->temporary; if (_ma_mark_file_changed(info)) goto err; @@ -71,18 +69,7 @@ int maria_delete_all_rows(MARIA_HA *info) goto err; } - info->state->records=info->state->del=state->split=0; - state->changed= 0; /* File is optimized */ - state->dellink = HA_OFFSET_ERROR; - state->sortkey= (ushort) ~0; - info->state->key_file_length=share->base.keystart; - info->state->data_file_length=0; - info->state->empty=info->state->key_empty=0; - info->state->checksum=0; - - state->key_del= HA_OFFSET_ERROR; - for (i=0 ; i < share->base.keys ; i++) - state->key_root[i]= HA_OFFSET_ERROR; + _ma_reset_status(info); /* If we are using delayed keys or if the user has done changes to the tables @@ -94,7 +81,7 @@ int maria_delete_all_rows(MARIA_HA *info) my_chsize(share->kfile.file, share->base.keystart, 0, MYF(MY_WME)) ) goto err; - if (_ma_initialize_data_file(info->dfile.file, share)) + if (_ma_initialize_data_file(share, info->dfile.file)) goto err; /* @@ -126,4 +113,39 @@ err: allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(my_errno=save_errno); } -} /* maria_delete */ +} /* maria_delete_all_rows */ + + +/* + Reset status information + + SYNOPSIS + _ma_reset_status() + maria Maria handler + + DESCRIPTION + Resets data and index file information as if the file would be empty + Files are not touched. +*/ + +void _ma_reset_status(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + MARIA_STATE_INFO *state= &share->state; + uint i; + + info->state->records= info->state->del= state->split= 0; + state->changed= 0; /* File is optimized */ + state->dellink= HA_OFFSET_ERROR; + state->sortkey= (ushort) ~0; + info->state->key_file_length= share->base.keystart; + info->state->data_file_length= 0; + info->state->empty= info->state->key_empty= 0; + info->state->checksum= 0; + + /* Drop the delete key chain. */ + state->key_del= HA_OFFSET_ERROR; + /* Clear all keys */ + for (i=0 ; i < share->base.keys ; i++) + state->key_root[i]= HA_OFFSET_ERROR; +} diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index 39a286ad1f7..6d6b9d032fd 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -64,7 +64,7 @@ int maria_delete_table(const char *name) raid_type= info->s->base.raid_type; raid_chunks= info->s->base.raid_chunks; #endif - sync_dir= (info->s->base.transactional && !info->s->temporary) ? + sync_dir= (info->s->now_transactional && !info->s->temporary) ? MY_SYNC_DIR : 0; maria_close(info); } diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index ebf84032106..9281378fd33 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -1018,7 +1018,8 @@ uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) */ my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, - ulong packed_length, my_bool with_checksum) + ulong packed_length, my_bool with_checksum, + ha_checksum checksum) { uint length,new_length,flag,bit,i; char *pos,*end,*packpos,*to; @@ -1124,7 +1125,7 @@ my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) || (bit != 1 && (flag & ~(bit - 1)))) goto err; - if (with_checksum && ((uchar) info->cur_row.checksum != (uchar) *to)) + if (with_checksum && ((uchar) checksum != (uchar) *to)) { DBUG_PRINT("error",("wrong checksum for row")); goto err; diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c index a04fba4e0d8..cfb4580a72f 100644 --- a/storage/maria/ma_info.c +++ b/storage/maria/ma_info.c @@ -135,6 +135,7 @@ void _ma_report_error(int errcode, const char *file_name) file_name+= length - 64; } } + my_error(errcode, MYF(ME_NOREFRESH), file_name); DBUG_VOID_RETURN; } diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index 8042c6d9873..1cd82720260 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -44,6 +44,7 @@ int maria_init(void) maria_inited= TRUE; pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); _ma_init_block_record_data(); + my_handler_error_register(); } return 0; } diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index 4f92054dcb4..f709d7e5759 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -135,6 +135,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) } info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); info->lock_type= F_UNLCK; + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); break; case F_RDLCK: if (info->lock_type == F_WRLCK) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 3a8e01da09a..cb5e02a1cc0 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -4263,7 +4263,7 @@ my_bool translog_write_record(LSN *lsn, if (share) { - if (!share->base.transactional) + if (!share->now_transactional) { DBUG_PRINT("info", ("It is not transactional table")); DBUG_RETURN(0); @@ -4331,12 +4331,12 @@ my_bool translog_write_record(LSN *lsn, { uint i; uint len= 0; -#ifdef HAVE_PURIFY +#ifdef HAVE_purify ha_checksum checksum= 0; #endif for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++) { -#ifdef HAVE_PURIFY +#ifdef HAVE_purify /* Find unitialized bytes early */ checksum+= my_checksum(checksum, parts_data[i].str, parts_data[i].length); @@ -5615,6 +5615,16 @@ static my_bool write_hook_for_redo(enum translog_record_type type __attribute__ ((unused))) { /* + Users of dummy_transaction_object must keep this TRN clean as it + is used by many threads (like those manipulating non-transactional + tables). It might be dangerous if one user sets rec_lsn or some other + member and it is picked up by another user (like putting this rec_lsn into + a page of a non-transactional table); it's safer if all members stay 0. So + non-transactional log records (REPAIR, CREATE, RENAME, DROP) should not + call this hook; we trust them but verify ;) + */ + DBUG_ASSERT(!(maria_multi_threaded && (trn->trid == 0))); + /* If the hook stays so simple, it would be faster to pass !trn->rec_lsn ? trn->rec_lsn : some_dummy_lsn to translog_write_record(), like Monty did in his original code, and not @@ -5640,6 +5650,7 @@ static my_bool write_hook_for_undo(enum translog_record_type type struct st_translog_parts *parts __attribute__ ((unused))) { + DBUG_ASSERT(!(maria_multi_threaded && (trn->trid == 0))); trn->undo_lsn= *lsn; if (unlikely(LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn) == 0)) trn->first_undo_lsn= diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index f2bfd2c9d7e..230f999c19a 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -62,7 +62,7 @@ struct st_maria_share; #define pagerange_store(T,A) int2store(T,A) #define fileid_korr(P) uint2korr(P) #define page_korr(P) uint5korr(P) -#define dirpos_korr(P) (P[0]) +#define dirpos_korr(P) ((P)[0]) #define pagerange_korr(P) uint2korr(P) /* diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h index 34cb7616b74..e034834aa20 100644 --- a/storage/maria/ma_loghandler_lsn.h +++ b/storage/maria/ma_loghandler_lsn.h @@ -45,7 +45,7 @@ typedef TRANSLOG_ADDRESS LSN; #define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL) /* Makes lsn/log address from file number and record offset */ -#define MAKE_LSN(F,S) ((((uint64)(F)) << 32) | (S)) +#define MAKE_LSN(F,S) ((LSN) ((((uint64)(F)) << 32) | (S))) /* checks LSN */ #define LSN_VALID(L) \ diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 5cd2bfbb838..eb0bba7503f 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -260,7 +260,9 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT, MY_UNPACK_FILENAME),MYF(0)); pthread_mutex_lock(&THR_LOCK_maria); - if (!(old_info=_ma_test_if_reopen(name_buff))) + old_info= 0; + if ((open_flags & HA_OPEN_COPY) || + !(old_info=_ma_test_if_reopen(name_buff))) { share= &share_buff; bzero((gptr) &share_buff,sizeof(share_buff)); @@ -586,8 +588,18 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.null_bytes + share->base.pack_bytes + test(share->options & HA_OPTION_CHECKSUM)); - if (share->base.transactional) + if (open_flags & HA_OPEN_COPY) { + /* + this instance will be a temporary one used just to create a data + file for REPAIR. Don't do logging. This base information will not go + to disk. + */ + share->base.born_transactional= FALSE; + } + if (share->base.born_transactional) + { + share->page_type= PAGECACHE_LSN_PAGE; share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE; if (unlikely((share->state.create_rename_lsn == (LSN)ULONGLONG_MAX) && (open_flags & HA_OPEN_FROM_SQL_LAYER))) @@ -600,11 +612,12 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) _ma_update_create_rename_lsn_on_disk(share, TRUE); } } + else + share->page_type= PAGECACHE_PLAIN_PAGE; + share->now_transactional= share->base.born_transactional; + share->base.default_rec_buff_size= max(share->base.pack_reclength, share->base.max_key_length); - share->page_type= (share->base.transactional ? PAGECACHE_LSN_PAGE : - PAGECACHE_PLAIN_PAGE); - if (share->data_file_type == DYNAMIC_RECORD) { share->base.extra_rec_buff_size= @@ -870,6 +883,8 @@ void _ma_setup_functions(register MARIA_SHARE *share) } share->file_read= _ma_nommap_pread; share->file_write= _ma_nommap_pwrite; + share->calc_check_checksum= share->calc_checksum; + if (!(share->options & HA_OPTION_CHECKSUM) && share->data_file_type != COMPRESSED_RECORD) share->calc_checksum= share->calc_write_checksum= 0; @@ -1118,7 +1133,7 @@ uint _ma_base_info_write(File file, MARIA_BASE_INFO *base) *ptr++= base->key_reflength; *ptr++= base->keys; *ptr++= base->auto_key; - *ptr++= base->transactional; + *ptr++= base->born_transactional; *ptr++= 0; /* Reserved */ mi_int2store(ptr,base->pack_bytes); ptr+= 2; mi_int2store(ptr,base->blobs); ptr+= 2; @@ -1161,7 +1176,7 @@ static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base) base->key_reflength= *ptr++; base->keys= *ptr++; base->auto_key= *ptr++; - base->transactional= *ptr++; + base->born_transactional= *ptr++; ptr++; base->pack_bytes= mi_uint2korr(ptr); ptr+= 2; base->blobs= mi_uint2korr(ptr); ptr+= 2; diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index b1ebfbbe7c6..50dde101c0d 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -177,7 +177,8 @@ static const char *page_cache_page_type_str[]= /* used only for control page type changing during debugging */ "EMPTY", "PLAIN", - "LSN" + "LSN", + "READ_UNKNOWN" }; static const char *page_cache_page_write_mode_str[]= @@ -320,7 +321,8 @@ struct st_pagecache_block_link #ifndef DBUG_OFF /* debug checks */ static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_pin mode) + enum pagecache_page_pin mode + __attribute__((unused))) { struct st_my_thread_var *thread= my_thread_var; PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); @@ -378,6 +380,7 @@ static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, 1 - Error */ +#ifdef NOT_USED static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_lock lock, enum pagecache_page_pin pin) @@ -445,7 +448,8 @@ error: page_cache_page_pin_str[pin])); DBUG_RETURN(1); } -#endif +#endif /* NOT_USED */ +#endif /* !DBUG_OFF */ #define FLUSH_CACHE 2000 /* sort this many blocks at once */ @@ -581,6 +585,7 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, myf flags) { DBUG_ENTER("pagecache_fwrite"); + DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); if (type == PAGECACHE_LSN_PAGE) { LSN lsn; @@ -2435,16 +2440,16 @@ static void read_block(PAGECACHE *pagecache, } -/* - Set LSN on the page to the given one if the given LSN is bigger +/** + @brief Set LSN on the page to the given one if the given LSN is bigger - SYNOPSIS - check_and_set_lsn() - lsn LSN to set - block block to check and set + @param pagecache pointer to a page cache data structure + @param lsn LSN to set + @param block block to check and set */ -static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) +static void check_and_set_lsn(PAGECACHE *pagecache, + LSN lsn, PAGECACHE_BLOCK_LINK *block) { LSN old; DBUG_ENTER("check_and_set_lsn"); @@ -2454,7 +2459,14 @@ static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) (ulong)LSN_FILE_NO(old), (ulong)LSN_OFFSET(old), (ulong)LSN_FILE_NO(lsn), (ulong)LSN_OFFSET(lsn))); if (cmp_translog_addr(lsn, old) > 0) + { + + DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE); lsn_store(block->buffer + PAGE_LSN_OFFSET, lsn); + /* we stored LSN in page so we dirtied it */ + if (!(block->status & PCBLOCK_CHANGED)) + link_to_changed_list(pagecache, block); + } DBUG_VOID_RETURN; } @@ -2527,10 +2539,8 @@ void pagecache_unlock(PAGECACHE *pagecache, if (block->rec_lsn == 0) block->rec_lsn= first_REDO_LSN_for_page; } - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); if (make_lock_and_pin(pagecache, block, lock, pin)) { @@ -2590,10 +2600,8 @@ void pagecache_unpin(PAGECACHE *pagecache, DBUG_ASSERT(block != 0); DBUG_ASSERT(page_st == PAGE_READ); - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); /* we can just unpin only with keeping read lock because: @@ -2690,7 +2698,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, block->rec_lsn= first_REDO_LSN_for_page; } if (lsn != LSN_IMPOSSIBLE) - check_and_set_lsn(lsn, block); + check_and_set_lsn(pagecache, lsn, block); if (make_lock_and_pin(pagecache, block, lock, pin)) DBUG_ASSERT(0); /* should not happend */ @@ -2744,10 +2752,8 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); /* We can just unpin only with keeping read lock because: @@ -2858,8 +2864,10 @@ restart: (pin == PAGECACHE_PIN)), &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || - block->type == type); - block->type= type; + block->type == type || type == PAGECACHE_READ_UNKNOWN_PAGE); + if (type != PAGECACHE_READ_UNKNOWN_PAGE || + block->type == PAGECACHE_EMPTY_PAGE) + block->type= type; if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ)) { DBUG_PRINT("info", ("read block 0x%lx", (ulong)block)); @@ -3174,6 +3182,7 @@ my_bool pagecache_write_part(PAGECACHE *pagecache, page_cache_page_pin_str[pin], page_cache_page_write_mode_str[write_mode], offset, size)); + DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED); DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK); DBUG_ASSERT(offset + size <= pagecache->block_size); @@ -3223,6 +3232,7 @@ restart: } DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == PAGECACHE_READ_UNKNOWN_PAGE || block->type == type); block->type= type; @@ -3643,6 +3653,14 @@ restart: ("changed_blocks") though it's still dirty (the flush by another thread has not yet happened). Checkpoint will miss the page and so must be blocked until that flush has happened. + Note that if there are two concurrent + flush_pagecache_blocks_int() on this file, then the first one may + move the block into its first_in_switch, and the second one would + just not see the block and wrongly consider its job done. + @todo RECOVERY Maria does protect such flushes with intern_lock, + but Checkpoint does not (Checkpoint makes sure that + changed_blocks_is_incomplete is 0 when it starts, but as + flush_cached_blocks() releases mutex, this may change... */ /** @todo RECOVERY: check all places where we remove a page from the @@ -3898,7 +3916,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, ptr+= 4; lsn_store(ptr, block->rec_lsn); ptr+= LSN_STORE_SIZE; - if (block->rec_lsn != 0) + if (block->rec_lsn != LSN_IMPOSSIBLE) { if (cmp_translog_addr(block->rec_lsn, minimum_rec_lsn) < 0) minimum_rec_lsn= block->rec_lsn; diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h index 478f71161eb..04355514e06 100644 --- a/storage/maria/ma_pagecache.h +++ b/storage/maria/ma_pagecache.h @@ -34,7 +34,9 @@ enum pagecache_page_type /* the page does not contain LSN */ PAGECACHE_PLAIN_PAGE, /* the page contain LSN (maria tablespace page) */ - PAGECACHE_LSN_PAGE + PAGECACHE_LSN_PAGE, + /* Page type used when scanning file and we don't care about the type */ + PAGECACHE_READ_UNKNOWN_PAGE }; /* diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c index 8f42a5b931a..9dd75705229 100644 --- a/storage/maria/ma_rename.c +++ b/storage/maria/ma_rename.c @@ -56,7 +56,13 @@ int maria_rename(const char *old_name, const char *new_name) raid_chunks = share->base.raid_chunks; #endif - sync_dir= (share->base.transactional && !share->temporary) ? + /* + the renaming of an internal table to the final table (like in ALTER TABLE) + is the moment when this table receives its correct create_rename_lsn and + this is important; make sure transactionality has been re-enabled. + */ + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; if (sync_dir) { diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh index 76b6c32913f..a6786315afe 100755 --- a/storage/maria/ma_test_all.sh +++ b/storage/maria/ma_test_all.sh @@ -5,15 +5,23 @@ # If you want to run this in Valgrind, you should use --trace-children=yes, # so that it detects problems in ma_test* and not in the shell script + +# Remove # from following line if you need some more information +#set -x -v -e + +set -e # abort at first failure + valgrind="valgrind --alignment=8 --leak-check=yes" silent="-s" suffix="" -#set -x -v -e if [ -z "$maria_path" ] then maria_path="." fi +# Delete temporary files +rm -f *.TMD + run_tests() { row_type=$1 @@ -126,6 +134,11 @@ run_repair_tests() $maria_path/maria_chk$suffix -se test1 $maria_path/maria_chk$suffix -rqos --correct-checksum test1 $maria_path/maria_chk$suffix -se test1 + $maria_path/ma_test2$suffix $silent -c -d1 $row_type + $maria_path/maria_chk$suffix -s --parallel-recover test2 + $maria_path/maria_chk$suffix -se test2 + $maria_path/maria_chk$suffix -s --parallel-recover --quick test2 + $maria_path/maria_chk$suffix -se test2 } run_pack_tests() @@ -153,6 +166,15 @@ run_pack_tests() $maria_path/maria_chk$suffix -es test1 $maria_path/maria_chk$suffix -rus test1 $maria_path/maria_chk$suffix -es test1 + + $maria_path/ma_test2$suffix $silent -c -d1 $row_type + $maria_path/maria_chk$suffix -s --parallel-recover test2 + $maria_path/maria_chk$suffix -se test2 + $maria_path/maria_chk$suffix -s --parallel-recover --unpack test2 + $maria_path/maria_chk$suffix -se test2 + $maria_path/maria_pack$suffix --force -s test1 + $maria_path/maria_chk$suffix -s --parallel-recover --unpack test2 + $maria_path/maria_chk$suffix -se test2 } echo "Running tests with dynamic row format" @@ -167,20 +189,28 @@ run_pack_tests -S echo "Running tests with block row format" run_tests -M +run_repair_tests -M +run_pack_tests -M echo "Running tests with block row format and transactions" run_tests "-M -T" +run_repair_tests "-M -T" +run_pack_tests "-M -T" # -# Tests that gives warnings +# Tests that gives warnings or errors # $maria_path/ma_test2$suffix $silent -L -K -W -P -S -R1 -m500 $maria_path/maria_chk$suffix -sm test2 echo "ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135" -$maria_path/ma_test2$suffix $silent -L -K -R1 -m2000 +$maria_path/ma_test2$suffix $silent -L -K -R1 -m2000 >ma_test2_message.txt 2>&1 && false # success is failure +cat ma_test2_message.txt +grep "Error: 135" ma_test2_message.txt > /dev/null echo "$maria_path/maria_chk$suffix -sm test2 will warn that 'Datafile is almost full'" -$maria_path/maria_chk$suffix -sm test2 +$maria_path/maria_chk$suffix -sm test2 >ma_test2_message.txt 2>&1 +cat ma_test2_message.txt +grep "warning: Datafile is almost full" ma_test2_message.txt >/dev/null $maria_path/maria_chk$suffix -ssm test2 # diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index 737c7c909b4..913959717fc 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -147,6 +147,7 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) if (share->calc_checksum) { info->cur_row.checksum= (*share->calc_checksum)(info,newrec); + info->state->checksum+= (info->cur_row.checksum - old_checksum); /* Store new checksum in index file header */ key_changed|= HA_STATE_CHANGED; } @@ -173,8 +174,6 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) if (auto_key_changed) set_if_bigger(info->s->state.auto_increment, ma_retrieve_auto_increment(info, newrec)); - if (share->calc_checksum) - info->state->checksum+= (info->cur_row.checksum - old_checksum); /* We can't yet have HA_STATE_AKTIV here, as block_record dosn't support diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index 9019cc33295..37f6f1fb49b 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -676,14 +676,7 @@ get_one_option(int optid, check_param.testflag|= T_UPDATE_STATE; break; case '#': - if (argument == disabled_my_option) - { - DBUG_POP(); - } - else - { - DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); - } + DBUG_SET_INITIAL(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); break; case 'V': print_version(); @@ -862,16 +855,25 @@ static int maria_chk(HA_CHECK *param, my_string filename) share->r_locks=0; maria_block_size= share->base.block_size; - if (share->data_file_type == BLOCK_RECORD && - (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_FAST | T_STATISTICS | - T_CHECK | T_CHECK_ONLY_CHANGED))) + if (share->data_file_type == BLOCK_RECORD || + ((param->testflag & T_UNPACK) && + share->state.header.org_data_file_type == BLOCK_RECORD)) { - _ma_check_print_error(param, - "Record format used by '%s' is is not yet supported with repair/check", - filename); - param->error_printed= 0; - error= 1; - goto end2; + if (param->testflag & T_SORT_RECORDS) + { + _ma_check_print_error(param, + "Record format used by '%s' is is not yet supported with repair/check", + filename); + param->error_printed= 0; + error= 1; + goto end2; + } + /* We can't do parallell repair with BLOCK_RECORD yet */ + if (param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) + { + param->testflag&= ~(T_REP_BY_SORT | T_REP_PARALLEL); + param->testflag|= T_REP; + } } /* @@ -1031,7 +1033,7 @@ static int maria_chk(HA_CHECK *param, my_string filename) know what the log's end LSN is now, so we just let the server know that it will have to find and store it. */ - if (share->base.transactional) + if (share->base.born_transactional) share->state.create_rename_lsn= (LSN)ULONGLONG_MAX; if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) && (maria_is_any_key_active(share->state.key_map) || @@ -1764,11 +1766,14 @@ void _ma_check_print_info(HA_CHECK *param __attribute__((unused)), const char *fmt,...) { va_list args; + DBUG_ENTER("_ma_check_print_info"); + DBUG_PRINT("enter", ("format: %s", fmt)); va_start(args,fmt); VOID(vfprintf(stdout, fmt, args)); VOID(fputc('\n',stdout)); va_end(args); + DBUG_VOID_RETURN; } /* VARARGS */ @@ -1777,6 +1782,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("_ma_check_print_warning"); + DBUG_PRINT("enter", ("format: %s", fmt)); fflush(stdout); if (!param->warning_printed && !param->error_printed) @@ -1802,7 +1808,7 @@ void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("_ma_check_print_error"); - DBUG_PRINT("enter",("format: %s",fmt)); + DBUG_PRINT("enter", ("format: %s", fmt)); fflush(stdout); if (!param->warning_printed && !param->error_printed) diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 39b8ba2292c..e46b120bf3f 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -30,6 +30,7 @@ #define MAX_NONMAPPED_INSERTS 1000 #define MARIA_MAX_TREE_LEVELS 32 +#define SANITY_CHECKS struct st_transaction; @@ -170,8 +171,11 @@ typedef struct st_ma_base_info /* The following are from the header */ uint key_parts, all_key_parts; - /* If false, we disable logging, versioning, transaction etc */ - my_bool transactional; + /** + @brief If false, we disable logging, versioning, transaction etc. Observe + difference with MARIA_SHARE::now_transactional + */ + my_bool born_transactional; } MARIA_BASE_INFO; @@ -264,7 +268,9 @@ typedef struct st_maria_share Calculate checksum for a row during write. May be 0 if we calculate the checksum in write_record_init() */ - ha_checksum(*calc_write_checksum) (struct st_maria_info *, const byte *); + ha_checksum(*calc_write_checksum)(struct st_maria_info *, const byte *); + /* calculate checksum for a row during check table */ + ha_checksum(*calc_check_checksum)(struct st_maria_info *, const byte *); /* Compare a row in memory with a row on disk */ my_bool (*compare_unique)(struct st_maria_info *, MARIA_UNIQUEDEF *, const byte *record, MARIA_RECORD_POS pos); @@ -303,6 +309,13 @@ typedef struct st_maria_share not_flushed, concurrent_insert; my_bool delay_key_write; my_bool have_rtree; + /** + @brief if the table is transactional right now. It may have been created + transactional (base.born_transactional==TRUE) but with transactionality + (logging) temporarily disabled (now_transactional==FALSE). The opposite + (FALSE, TRUE) is impossible. + */ + my_bool now_transactional; #ifdef THREAD THR_LOCK lock; pthread_mutex_t intern_lock; /* Locking for use with _locking */ @@ -749,7 +762,7 @@ extern ulong _ma_rec_unpack(MARIA_HA *info, byte *to, byte *from, ulong reclength); extern my_bool _ma_rec_check(MARIA_HA *info, const char *record, byte *packpos, ulong packed_length, - my_bool with_checkum); + my_bool with_checkum, ha_checksum checksum); extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos, ulong length, my_off_t next_filepos, byte ** record, ulong *reclength, @@ -874,6 +887,7 @@ void _ma_update_status(void *param); void _ma_restore_status(void *param); void _ma_copy_status(void *to, void *from); my_bool _ma_check_status(void *param); +void _ma_reset_status(MARIA_HA *maria); extern MARIA_HA *_ma_test_if_reopen(char *filename); my_bool _ma_check_table_is_closed(const char *name, const char *where); @@ -887,7 +901,6 @@ MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const byte *record); my_bool _ma_write_abort_default(MARIA_HA *info); C_MODE_START -int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info); /* Functions needed by _ma_check (are overrided in MySQL) */ volatile int *_ma_killed_ptr(HA_CHECK *param); void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); @@ -908,10 +921,14 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param); int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, ulong); int _ma_sync_table_files(const MARIA_HA *info); -int _ma_initialize_data_file(File dfile, MARIA_SHARE *share); +int _ma_initialize_data_file(MARIA_SHARE *share, File dfile); int _ma_update_create_rename_lsn_on_disk(MARIA_SHARE *share, my_bool do_sync); void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); +#define _ma_tmp_disable_logging_for_table(S) \ + { (S)->now_transactional= FALSE; (S)->page_type= PAGECACHE_PLAIN_PAGE; } +#define _ma_reenable_logging_for_table(S) \ + { if (((S)->now_transactional= (S)->base.born_transactional)) \ + (S)->page_type= PAGECACHE_LSN_PAGE; } extern PAGECACHE *maria_log_pagecache; - diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index 568814f6f8a..2d664e08662 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -14,20 +14,22 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "maria_def.h" +#include <ma_blockrec.h> #include <my_getopt.h> #define PCACHE_SIZE (1024*1024*10) #define LOG_FLAGS 0 #define LOG_FILE_SIZE (1024L*1024L) - -static PAGECACHE pagecache; - static const char *load_default_groups[]= { "maria_read_log",0 }; static void get_options(int *argc,char * * *argv); #ifndef DBUG_OFF -static const char *default_dbug_option; +#if defined(__WIN__) +const char *default_dbug_option= "d:t:i:O,\\maria_read_log.trace"; +#else +const char *default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; #endif +#endif /* DBUG_OFF */ static my_bool opt_only_display, opt_display_and_apply; struct TRN_FOR_RECOVERY @@ -55,7 +57,25 @@ prototype_exec_hook(CHECKPOINT); prototype_exec_hook(REDO_CREATE_TABLE); prototype_exec_hook(FILE_ID); prototype_exec_hook(REDO_INSERT_ROW_HEAD); +prototype_exec_hook(REDO_INSERT_ROW_TAIL); +prototype_exec_hook(REDO_PURGE_ROW_HEAD); +prototype_exec_hook(REDO_PURGE_ROW_TAIL); +prototype_exec_hook(UNDO_ROW_INSERT); +prototype_exec_hook(UNDO_ROW_DELETE); prototype_exec_hook(COMMIT); + + +/* + TODO: Avoid mallocs in exec. + + Proposed fix: + Add either a context/buffer argument to all exec_hook functions + or add 'record_buffer' and 'record_buffer_length' to + TRANSLOG_HEADER_BUFFER. + With this we could use my_realloc() instead of my_malloc() to + allocate data and save some mallocs. +*/ + /* To implement REDO_DROP_TABLE and REDO_RENAME_TABLE, we would need to go through the all_tables[] array, find all open instances of the @@ -78,19 +98,6 @@ int main(int argc, char **argv) maria_data_root= "."; -#ifndef DBUG_OFF -#if defined(__WIN__) - default_dbug_option= "d:t:i:O,\\maria_read_log.trace"; -#else - default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; -#endif - if (argc > 1) - { - DBUG_SET(default_dbug_option); - DBUG_SET_INITIAL(default_dbug_option); - } -#endif - if (maria_init()) { fprintf(stderr, "Can't init Maria engine (%d)\n", errno); @@ -107,7 +114,7 @@ int main(int argc, char **argv) fprintf(stderr, "Can't find any log\n"); goto err; } - if (init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + if (init_pagecache(maria_pagecache, PCACHE_SIZE, 0, 0, TRANSLOG_PAGE_SIZE) == 0) { fprintf(stderr, "Got error in init_pagecache() (errno: %d)\n", errno); @@ -119,7 +126,7 @@ int main(int argc, char **argv) But if it finds a log and this log was crashed, it will create a new log, which is useless. TODO: start log handler in read-only mode. */ - if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, maria_pagecache, TRANSLOG_DEFAULT_FLAGS)) { fprintf(stderr, "Can't init loghandler (%d)\n", errno); @@ -137,6 +144,11 @@ int main(int argc, char **argv) install_exec_hook(REDO_CREATE_TABLE); install_exec_hook(FILE_ID); install_exec_hook(REDO_INSERT_ROW_HEAD); + install_exec_hook(REDO_INSERT_ROW_TAIL); + install_exec_hook(REDO_PURGE_ROW_HEAD); + install_exec_hook(REDO_PURGE_ROW_TAIL); + install_exec_hook(UNDO_ROW_INSERT); + install_exec_hook(UNDO_ROW_DELETE); install_exec_hook(COMMIT); if (opt_only_display) @@ -261,7 +273,7 @@ err: /* don't touch anything more, in case we hit a bug */ exit(1); end: - maria_end(); + maria_panic(HA_PANIC_CLOSE); free_defaults(default_argv); my_end(0); exit(0); @@ -318,7 +330,13 @@ get_one_option(int optid __attribute__((unused)), const struct my_option *opt __attribute__((unused)), char *argument __attribute__((unused))) { - /* for now there is nothing special with our options */ + switch (optid) { +#ifndef DBUG_OFF + case '#': + DBUG_SET_INITIAL(argument ? argument : default_dbug_option); + break; + } +#endif return 0; } @@ -442,8 +460,11 @@ prototype_exec_hook(REDO_CREATE_TABLE) info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR); if (info) { - DBUG_ASSERT(info->s->reopen == 1); /* check that we're not using it */ - if (!info->s->base.transactional) + MARIA_SHARE *share= info->s; + /* check that we're not already using it */ + DBUG_ASSERT(share->reopen == 1); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + if (!share->base.born_transactional) { /* could be that transactional table was later dropped, and a non-trans @@ -454,7 +475,7 @@ prototype_exec_hook(REDO_CREATE_TABLE) DBUG_ASSERT(0); /* I want to know this */ goto end; } - if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0) { printf(", has create_rename_lsn (%lu,0x%lx) is more recent than record", (ulong) LSN_FILE_NO(rec->lsn), @@ -521,7 +542,7 @@ prototype_exec_hook(REDO_CREATE_TABLE) data file does not preclude this). */ if (((info= maria_open(name, O_RDONLY, 0)) == NULL) || - _ma_initialize_data_file(dfile, info->s)) + _ma_initialize_data_file(info->s, dfile)) { fprintf(stderr, "Failed to open new table or write to data file\n"); goto err; @@ -551,6 +572,7 @@ prototype_exec_hook(FILE_ID) int error; char *name, *buff; MARIA_HA *info= NULL; + MARIA_SHARE *share; if (((buff= my_malloc(rec->record_length, MYF(MY_WME))) == NULL) || (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != rec->record_length)) @@ -566,7 +588,7 @@ prototype_exec_hook(FILE_ID) { printf(", closing table '%s'", info->s->open_file_name); all_tables[sid]= NULL; - info->s->base.transactional= TRUE; /* put back the truth */ + _ma_reenable_logging_for_table(info->s); /* put back the truth */ if (maria_close(info)) { fprintf(stderr, "Failed to close table\n"); @@ -586,19 +608,19 @@ prototype_exec_hook(FILE_ID) fprintf(stderr, "Table is crashed, can't apply log records to it\n"); goto err; } - DBUG_ASSERT(info->s->reopen == 1); /* should always be only one instance */ - if (!info->s->base.transactional) + share= info->s; + /* check that we're not already using it */ + DBUG_ASSERT(share->reopen == 1); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + if (!share->base.born_transactional) { printf(", is not transactional\n"); DBUG_ASSERT(0); /* I want to know this */ goto end; } all_tables[sid]= info; - /* - don't log any records for this work. TODO make sure this variable does not - go to disk before we restore it to its true value. - */ - info->s->base.transactional= FALSE; + /* don't log any records for this work */ + _ma_tmp_disable_logging_for_table(share); printf(", opened\n"); error= 0; goto end; @@ -619,6 +641,140 @@ prototype_exec_hook(REDO_INSERT_ROW_HEAD) ulonglong page; MARIA_HA *info; char llbuf[22]; + byte *buff= 0; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if ((!(buff= (byte*) my_malloc(rec->record_length, MYF(MY_WME)))) || + (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto end; + } + if (_ma_apply_redo_insert_row_head_or_tail(info, rec->lsn, HEAD_PAGE, + rec->header + FILEID_STORE_SIZE, + buff + (rec->record_length - + rec->non_header_data_len), + rec->non_header_data_len)) + goto end; + my_free(buff, MYF(0)); + return 0; + +end: + /* as we don't have apply working: */ + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + return 1; +} + + +prototype_exec_hook(REDO_INSERT_ROW_TAIL) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + byte *buff= 0; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if ((!(buff= (byte*) my_malloc(rec->record_length, MYF(MY_WME)))) || + (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto end; + } + if (_ma_apply_redo_insert_row_head_or_tail(info, rec->lsn, TAIL_PAGE, + rec->header + FILEID_STORE_SIZE, + buff + (rec->record_length - + rec->non_header_data_len), + rec->non_header_data_len)) + goto end; + + my_free(buff, MYF(0)); + return 0; + +end: + /* as we don't have apply working: */ + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + return 1; +} + + +prototype_exec_hook(REDO_PURGE_ROW_HEAD) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + sid= fileid_korr(rec->header); page= page_korr(rec->header + FILEID_STORE_SIZE); llstr(page, llbuf); @@ -653,13 +809,89 @@ prototype_exec_hook(REDO_INSERT_ROW_HEAD) runtime, putting the same LSN as runtime had done will decrease differences. So we use the UNDO's LSN which is current_group_end_lsn. */ - DBUG_ASSERT("Monty" == "this is the place"); + + if (_ma_apply_redo_purge_row_head_or_tail(info, rec->lsn, HEAD_PAGE, + rec->header + FILEID_STORE_SIZE)) + goto end; + + return 0; + end: /* as we don't have apply working: */ return 1; } +prototype_exec_hook(REDO_PURGE_ROW_TAIL) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if (_ma_apply_redo_purge_row_head_or_tail(info, rec->lsn, TAIL_PAGE, + rec->header + FILEID_STORE_SIZE)) + goto end; + + return 0; + +end: + /* as we don't have apply working: */ + return 1; +} + + +static int exec_LOGREC_UNDO_ROW_INSERT(const TRANSLOG_HEADER_BUFFER *rec + __attribute__((unused))) +{ + /* Ignore this during the redo phase */ + return 0; +} + +static int exec_LOGREC_UNDO_ROW_DELETE(const TRANSLOG_HEADER_BUFFER *rec + __attribute__((unused))) +{ + /* Ignore this during the redo phase */ + return 0; +} + + + prototype_exec_hook(COMMIT) { uint16 sid= rec->short_trid; @@ -742,7 +974,10 @@ static void end_of_redo_phase() { MARIA_HA *info= all_tables[sid]; if (info != NULL) + { + _ma_reenable_logging_for_table(info->s); /* put back the truth */ maria_close(info); + } } } } diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index 28264d5d903..b63cb60c059 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -84,6 +84,6 @@ ma_pagecache_consist_64kWR_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPA # the generic lock manager may not be used in the end and lockman1-t crashes, # so we don't build lockman-t and lockman1-t -CLEANFILES = maria_control page_cache_test_file_1 \ +CLEANFILES = maria_log_control page_cache_test_file_1 \ maria_log.???????? diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c index 1b6cff5e903..b95e0f4d857 100644 --- a/storage/myisam/ft_stopwords.c +++ b/storage/myisam/ft_stopwords.c @@ -51,10 +51,11 @@ static int ft_add_stopword(const char *w) int ft_init_stopwords() { + DBUG_ENTER("ft_init_stopwords"); if (!stopwords3) { if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) - return -1; + DBUG_RETURN(-1); init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp, 0, (ft_stopword_file ? (tree_element_free)&FT_STOPWORD_free : 0), @@ -70,10 +71,10 @@ int ft_init_stopwords() int error=-1; if (!*ft_stopword_file) - return 0; + DBUG_RETURN(0); if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1) - return -1; + DBUG_RETURN(-1); len=(uint)my_seek(fd, 0L, MY_SEEK_END, MYF(0)); my_seek(fd, 0L, MY_SEEK_SET, MYF(0)); if (!(start=buffer=my_malloc(len+1, MYF(MY_WME)))) @@ -90,7 +91,7 @@ err1: my_free(buffer, MYF(0)); err0: my_close(fd, MYF(MY_WME)); - return error; + DBUG_RETURN(error); } else { @@ -100,13 +101,14 @@ err0: for (;*sws;sws++) { if (ft_add_stopword(*sws)) - return -1; + DBUG_RETURN(-1); } ft_stopword_file="(built-in)"; /* for SHOW VARIABLES */ } - return 0; + DBUG_RETURN(0); } + int is_stopword(char *word, uint len) { FT_STOPWORD sw; @@ -118,6 +120,8 @@ int is_stopword(char *word, uint len) void ft_free_stopwords() { + DBUG_ENTER("ft_free_stopwords"); + if (stopwords3) { delete_tree(stopwords3); /* purecov: inspected */ @@ -125,4 +129,5 @@ void ft_free_stopwords() stopwords3=0; } ft_stopword_file= 0; + DBUG_VOID_RETURN; } diff --git a/support-files/compiler_warnings.supp b/support-files/compiler_warnings.supp index babc482976d..1d73e7a55cc 100644 --- a/support-files/compiler_warnings.supp +++ b/support-files/compiler_warnings.supp @@ -55,6 +55,11 @@ db_vrfy.c : .*comparison is always false due to limited range of data type.* .*/ndb/.* : .*defined but not used.* # +# Maria warning that is ok in debug builds +# +storage/maria/ma_pagecache.c: .*'info_check_pin' defined but not used + +# # Unexplanable (?) stuff # listener.cc : .*conversion from 'SOCKET' to 'int'.* |