diff options
author | unknown <jani@a88-113-38-195.elisa-laajakaista.fi> | 2007-07-04 12:20:53 +0300 |
---|---|---|
committer | unknown <jani@a88-113-38-195.elisa-laajakaista.fi> | 2007-07-04 12:20:53 +0300 |
commit | 3aa3cc1cfad1acc28b47006b16d2907ddfcb09d5 (patch) | |
tree | ae7500f44289b2ba848d51b85fa7df3577248b18 | |
parent | 3f4faedb3fcd648bd824de40bec23d981c38d2cd (diff) | |
parent | 55bb37319624ab72cb20caf1328dba216074b417 (diff) | |
download | mariadb-git-3aa3cc1cfad1acc28b47006b16d2907ddfcb09d5.tar.gz |
Merge jamppa@bk-internal.mysql.com:/home/bk/mysql-maria
into a88-113-38-195.elisa-laajakaista.fi:/home/my/bk/mysql-maria.prod
BitKeeper/etc/ignore:
auto-union
include/maria.h:
Auto merged
include/my_base.h:
Auto merged
mysql-test/include/ps_conv.inc:
Auto merged
mysql-test/r/ps_2myisam.result:
Auto merged
mysql-test/r/ps_3innodb.result:
Auto merged
mysql-test/r/ps_4heap.result:
Auto merged
mysql-test/r/ps_5merge.result:
Auto merged
mysql-test/r/ps_7ndb.result:
Auto merged
mysys/Makefile.am:
Auto merged
mysys/mf_iocache.c:
Auto merged
mysys/mf_tempfile.c:
Auto merged
mysys/my_error.c:
Auto merged
mysys/my_init.c:
Auto merged
mysys/my_open.c:
Auto merged
sql/handler.cc:
Auto merged
sql/handler.h:
Auto merged
sql/lex.h:
Auto merged
sql/sql_show.cc:
Auto merged
sql/sql_yacc.yy:
Auto merged
sql/table.cc:
Auto merged
sql/table.h:
Auto merged
storage/maria/ha_maria.cc:
Auto merged
storage/maria/ha_maria.h:
Auto merged
storage/maria/ma_bitmap.c:
Auto merged
storage/maria/ma_blockrec.h:
Auto merged
storage/maria/ma_create.c:
Auto merged
storage/maria/ma_dynrec.c:
Auto merged
storage/maria/ma_loghandler.h:
Auto merged
storage/maria/ma_loghandler_lsn.h:
Auto merged
storage/maria/ma_open.c:
Auto merged
storage/maria/ma_pagecache.c:
Auto merged
storage/maria/ma_pagecache.h:
Auto merged
storage/maria/ma_test1.c:
Auto merged
storage/maria/ma_test2.c:
Auto merged
storage/maria/ma_update.c:
Auto merged
storage/maria/maria_chk.c:
Auto merged
storage/maria/unittest/ma_test_loghandler-t.c:
Auto merged
storage/maria/unittest/ma_test_loghandler_multigroup-t.c:
Auto merged
storage/maria/unittest/ma_test_loghandler_multithread-t.c:
Auto merged
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
Auto merged
storage/myisam/ft_stopwords.c:
Auto merged
storage/myisam/mi_close.c:
Auto merged
support-files/compiler_warnings.supp:
Auto merged
mysys/mf_keycache.c:
Pulled from mysql-maria tree, manual merge.
mysys/my_handler.c:
Pulled from mysql-maria tree, manual merge.
mysys/my_symlink2.c:
Pulled from mysql-maria tree, manual merge.
sql/mysqld.cc:
Pulled from mysql-maria tree, manual merge.
sql/sql_table.cc:
Pulled from mysql-maria tree, manual merge.
storage/maria/ma_blockrec.c:
Pulled from mysql-maria tree, manual merge.
storage/maria/ma_check.c:
Pulled from mysql-maria tree, manual merge.
storage/maria/ma_close.c:
Pulled from mysql-maria tree, manual merge.
storage/maria/ma_loghandler.c:
Pulled from mysql-maria tree, manual merge.
storage/maria/maria_def.h:
Pulled from mysql-maria tree, manual merge.
74 files changed, 3232 insertions, 880 deletions
diff --git a/.bzrignore b/.bzrignore index d58cca23226..d284c0f740a 100644 --- a/.bzrignore +++ b/.bzrignore @@ -2431,6 +2431,7 @@ storage/maria/maria_ftdump storage/maria/maria_log storage/maria/maria_log.* storage/maria/maria_pack +storage/maria/maria_read_log storage/maria/unittest/ma_pagecache_consist_1k-t-big storage/maria/unittest/ma_pagecache_consist_1kHC-t-big storage/maria/unittest/ma_pagecache_consist_1kRD-t-big diff --git a/include/maria.h b/include/maria.h index 9dd6e077666..abe6463edd0 100644 --- a/include/maria.h +++ b/include/maria.h @@ -325,17 +325,18 @@ typedef struct st_maria_sort_info pthread_mutex_t mutex; pthread_cond_t cond; #endif - MARIA_HA *info; + MARIA_HA *info, *new_info; HA_CHECK *param; char *buff; SORT_KEY_BLOCKS *key_block, *key_block_end; SORT_FT_BUF *ft_buf; my_off_t filelength, dupp, buff_length; + ulonglong page; ha_rows max_records; uint current_key, total_keys; uint got_error, threads_running; myf myf_rw; - enum data_file_type new_data_file_type; + enum data_file_type new_data_file_type, org_data_file_type; } MARIA_SORT_INFO; typedef struct st_maria_sort_param diff --git a/include/my_base.h b/include/my_base.h index 9f07bc70f5b..fad3ce225ef 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -47,6 +47,7 @@ #define HA_OPEN_FOR_REPAIR 32 /* open even if crashed */ #define HA_OPEN_FROM_SQL_LAYER 64 #define HA_OPEN_MMAP 128 /* open memory mapped */ +#define HA_OPEN_COPY 256 /* Open copy (for repair) */ /* The following is parameter to ha_rkey() how to use key */ diff --git a/include/my_handler.h b/include/my_handler.h index 13dcd01a332..1a1235d0588 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -110,7 +110,8 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, uint *diff_pos); extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a); - +extern void my_handler_error_register(void); +extern void my_handler_error_unregister(void); /* Inside an in-memory data record, memory pointers to pieces of the record (like BLOBs) are stored in their native byte order and in diff --git a/mysql-test/include/ps_conv.inc b/mysql-test/include/ps_conv.inc index 195d1061664..8cbe9450063 100644 --- a/mysql-test/include/ps_conv.inc +++ b/mysql-test/include/ps_conv.inc @@ -52,7 +52,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +eval create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/maria-connect.result b/mysql-test/r/maria-connect.result new file mode 100644 index 00000000000..e232f564d10 --- /dev/null +++ b/mysql-test/r/maria-connect.result @@ -0,0 +1,23 @@ +set global storage_engine=maria; +set session storage_engine=maria; +drop table if exists t1; +SET SQL_WARNINGS=1; +RESET MASTER; +set binlog_format=statement; +CREATE TABLE t1 (a int primary key); +insert t1 values (1),(2),(3); +insert t1 values (4),(2),(5); +ERROR 23000: Duplicate entry '2' for key 'PRIMARY' +select * from t1; +a +1 +2 +3 +4 +SHOW BINLOG EVENTS FROM 102; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 102 Query 1 200 use `test`; CREATE TABLE t1 (a int primary key) +master-bin.000001 200 Query 1 291 use `test`; insert t1 values (1),(2),(3) +master-bin.000001 291 Query 1 382 use `test`; insert t1 values (4),(2),(5) +drop table t1; +set binlog_format=default; diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index 3ec9af0fffa..6303f498e36 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -2,25 +2,6 @@ set global storage_engine=maria; set session storage_engine=maria; drop table if exists t1,t2; SET SQL_WARNINGS=1; -RESET MASTER; -set binlog_format=statement; -CREATE TABLE t1 (a int primary key); -insert t1 values (1),(2),(3); -insert t1 values (4),(2),(5); -ERROR 23000: Duplicate entry '2' for key 'PRIMARY' -select * from t1; -a -1 -2 -3 -4 -SHOW BINLOG EVENTS FROM 102; -Log_name Pos Event_type Server_id End_log_pos Info -master-bin.000001 102 Query 1 200 use `test`; CREATE TABLE t1 (a int primary key) -master-bin.000001 200 Query 1 291 use `test`; insert t1 values (1),(2),(3) -master-bin.000001 291 Query 1 382 use `test`; insert t1 values (4),(2),(5) -drop table t1; -set binlog_format=default; CREATE TABLE t1 ( STRING_DATA char(255) default NULL, KEY string_data (STRING_DATA) @@ -618,7 +599,7 @@ t1 1 a 1 a A NULL NULL NULL YES BTREE disabled alter table t1 enable keys; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +t1 1 a 1 a A NULL NULL NULL YES BTREE alter table t1 engine=heap; alter table t1 disable keys; Warnings: @@ -853,19 +834,19 @@ _id DELETE FROM t1 WHERE _id < 8; SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK OPTIMIZE TABLE t1; Table Op Msg_type Msg_text -test.t1 optimize status Table is already up to date +test.t1 optimize status OK CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # SELECT _id FROM t1; _id 8 @@ -912,7 +893,7 @@ _id DELETE FROM t1 WHERE _id < 8; SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK @@ -924,7 +905,7 @@ Table Op Msg_type Msg_text test.t1 check status OK SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # SELECT _id FROM t1; _id 8 @@ -1598,7 +1579,7 @@ alter table t1 disable keys; alter table t1 enable keys; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 1 a 1 a A 8 NULL NULL YES BTREE disabled +t1 1 a 1 a A 8 NULL NULL YES BTREE drop table t1; show create table t1; show create table t1; @@ -1811,3 +1792,67 @@ CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK DROP TABLE t1; +create table t1 (a int) transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=dynamic transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=dynamic transactional=1; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC TRANSACTIONAL=1 +alter table t1 row_format=PAGE; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=1 +alter table t1 transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=PAGE; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE +drop table t1; +create table t1 (a int) row_format=page; +insert delayed into t1 values(1); +ERROR HY000: Table storage engine for 't1' doesn't have this option +drop table t1; +create table t1 (a int) row_format=page transactional=0; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +a +1 +select count(*) from t1; +count(*) +1 +drop table t1; +create table t1 (a int) row_format=dynamic; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +a +1 +select count(*) from t1; +count(*) +1 +drop table t1; diff --git a/mysql-test/r/ps_2myisam.result b/mysql-test/r/ps_2myisam.result index feb40de9a6d..39a76378ad5 100644 --- a/mysql-test/r/ps_2myisam.result +++ b/mysql-test/r/ps_2myisam.result @@ -1756,7 +1756,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_3innodb.result b/mysql-test/r/ps_3innodb.result index 9398426ec0b..c3aac0b2395 100644 --- a/mysql-test/r/ps_3innodb.result +++ b/mysql-test/r/ps_3innodb.result @@ -1739,7 +1739,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_4heap.result b/mysql-test/r/ps_4heap.result index c2de727f572..fda3c6adf0b 100644 --- a/mysql-test/r/ps_4heap.result +++ b/mysql-test/r/ps_4heap.result @@ -1740,7 +1740,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_5merge.result b/mysql-test/r/ps_5merge.result index c1f36cc84de..dfb876ea827 100644 --- a/mysql-test/r/ps_5merge.result +++ b/mysql-test/r/ps_5merge.result @@ -1676,7 +1676,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, @@ -4697,7 +4697,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_7ndb.result b/mysql-test/r/ps_7ndb.result index 26475f4c147..413f1e25ab5 100644 --- a/mysql-test/r/ps_7ndb.result +++ b/mysql-test/r/ps_7ndb.result @@ -1739,7 +1739,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result index 9268c44eecd..b1ea905c406 100644 --- a/mysql-test/r/ps_maria.result +++ b/mysql-test/r/ps_maria.result @@ -1756,7 +1756,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/t/maria-connect.test b/mysql-test/t/maria-connect.test new file mode 100644 index 00000000000..aedfa92e278 --- /dev/null +++ b/mysql-test/t/maria-connect.test @@ -0,0 +1,39 @@ +# +# Test that can't be run with --extern +# + +-- source include/have_maria.inc + +let $default=`select @@global.storage_engine`; +set global storage_engine=maria; +set session storage_engine=maria; + +# Initialise +--disable_warnings +drop table if exists t1; +--enable_warnings +SET SQL_WARNINGS=1; + +# +# UNIQUE key test +# +# as long as maria cannot rollback, binlog should contain both inserts +# +RESET MASTER; +set binlog_format=statement; +CREATE TABLE t1 (a int primary key); +insert t1 values (1),(2),(3); +--error 1582 +insert t1 values (4),(2),(5); +select * from t1; +SHOW BINLOG EVENTS FROM 102; +drop table t1; +set binlog_format=default; + +# End of 5.2 tests + +--disable_result_log +--disable_query_log +eval set global storage_engine=$default; +--enable_result_log +--enable_query_log diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index 763abbd9d25..f03d744f850 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -16,22 +16,6 @@ drop table if exists t1,t2; SET SQL_WARNINGS=1; # -# UNIQUE key test -# -# as long as maria cannot rollback, binlog should contain both inserts -# -RESET MASTER; -set binlog_format=statement; -CREATE TABLE t1 (a int primary key); -insert t1 values (1),(2),(3); ---error 1582 -insert t1 values (4),(2),(5); -select * from t1; -SHOW BINLOG EVENTS FROM 102; -drop table t1; -set binlog_format=default; - -# # Test problem with CHECK TABLE; # @@ -597,10 +581,7 @@ insert t1 select * from t2; show keys from t1; alter table t1 enable keys; show keys from t1; -#TODO after we have repair: delete the following --disable-warnings ---disable_warnings alter table t1 engine=heap; ---enable_warnings alter table t1 disable keys; show keys from t1; drop table t1,t2; @@ -1072,10 +1053,10 @@ create table t1 (a int not null, key key_block_size=1024 (a)); --error 1064 create table t1 (a int not null, key `a` key_block_size=1024 (a)); - # # Test of changing MI_KEY_BLOCK_LENGTH # + CREATE TABLE t1 ( c1 INT, c2 VARCHAR(300), @@ -1116,6 +1097,45 @@ DELETE FROM t1 WHERE c1 >= 10; CHECK TABLE t1; DROP TABLE t1; +# +# Test that TRANSACTIONAL is preserved +# + +create table t1 (a int) transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=dynamic transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=dynamic transactional=1; +show create table t1; +alter table t1 row_format=PAGE; +show create table t1; +alter table t1 transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=PAGE; +show create table t1; +drop table t1; + +# Verify that INSERT DELAYED is disabled only for transactional tables +create table t1 (a int) row_format=page; +--error ER_ILLEGAL_HA +insert delayed into t1 values(1); +drop table t1; +create table t1 (a int) row_format=page transactional=0; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +select count(*) from t1; +drop table t1; +create table t1 (a int) row_format=dynamic; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +select count(*) from t1; +drop table t1; + # End of 5.2 tests --disable_result_log diff --git a/mysys/Makefile.am b/mysys/Makefile.am index bdc9f176d8e..3d3047b3a9c 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -20,7 +20,7 @@ INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ -I$(top_srcdir)/include -I$(srcdir) pkglib_LIBRARIES = libmysys.a LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a $(top_builddir)/dbug/libdbug.a -noinst_HEADERS = mysys_priv.h my_static.h +noinst_HEADERS = mysys_priv.h my_static.h my_safehash.h libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_path.c mf_loadpath.c my_file.c \ my_open.c my_create.c my_dup.c my_seek.c my_read.c \ diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index 58650733490..0f49dd22bb9 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -1701,6 +1701,7 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) my_bool append_cache; my_off_t pos_in_file; DBUG_ENTER("my_b_flush_io_cache"); + DBUG_PRINT("enter", ("cache: 0x%lx", (long) info)); if (!(append_cache = (info->type == SEQ_READ_APPEND))) need_append_buffer_lock=0; diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c index c1108f85054..98da06e9dfa 100644 --- a/mysys/mf_tempfile.c +++ b/mysys/mf_tempfile.c @@ -122,6 +122,7 @@ File create_temp_file(char *to, const char *dir, const char *prefix, if (org_file >= 0 && file < 0) { int tmp=my_errno; + close(org_file); (void) my_delete(to, MYF(MY_WME | ME_NOINPUT)); my_errno=tmp; } diff --git a/mysys/my_error.c b/mysys/my_error.c index e8fd8b938ee..75701536dd3 100644 --- a/mysys/my_error.c +++ b/mysys/my_error.c @@ -84,11 +84,6 @@ int my_error(int nr, myf MyFlags, ...) if (nr <= meh_p->meh_last) break; -#ifdef SHARED_LIBRARY - if ((meh_p == &my_errmsgs_globerrs) && ! globerrs[0]) - init_glob_errs(); -#endif - /* get the error message string. Default, if NULL or empty string (""). */ if (! (format= (meh_p && (nr >= meh_p->meh_first)) ? meh_p->meh_errmsgs[nr - meh_p->meh_first] : NULL) || ! *format) diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 78cc10ac840..bf75d992f9d 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -19,6 +19,7 @@ #include <m_ctype.h> #include <my_base.h> #include <my_handler.h> +#include <my_sys.h> int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, uchar *b, uint b_length, my_bool part_key, @@ -562,3 +563,69 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) } return keyseg; } + + +/* + Errors a handler can give you +*/ + +static const char *handler_error_messages[]= +{ + "Didn't find key on read or update", + "Duplicate key on write or update", + "Undefined handler error 122", + "Someone has changed the row since it was read (while the table was locked to prevent it)", + "Wrong index given to function", + "Undefined handler error 125", + "Index file is crashed", + "Record file is crashed", + "Out of memory in engine", + "Undefined handler error 129", + "Incorrect file format", + "Command not supported by database", + "Old database file", + "No record read before update", + "Record was already deleted (or record file crashed)", + "No more room in record file", + "No more room in index file", + "No more records (read after end of file)", + "Unsupported extension used for table", + "Too big row", + "Wrong create options", + "Duplicate unique key or constraint on write or update", + "Unknown character set used in table", + "Conflicting table definitions in sub-tables of MERGE table", + "Table is crashed and last repair failed", + "Table was marked as crashed and should be repaired", + "Lock timed out; Retry transaction", + "Lock table is full; Restart program with a larger locktable", + "Updates are not allowed under a read only transactions", + "Lock deadlock; Retry transaction", + "Foreign key constraint is incorrectly formed", + "Cannot add a child row", + "Cannot delete a parent row", + "Unknown handler error" +}; + + +/* + Register handler error messages for usage with my_error() + + NOTES + This is safe to call multiple times as my_error_register() + will ignore calls to register already registered error numbers. +*/ + + +void my_handler_error_register(void) +{ + my_error_register(handler_error_messages, HA_ERR_FIRST, + HA_ERR_FIRST+ array_elements(handler_error_messages)-1); +} + + +void my_handler_error_unregister(void) +{ + my_error_unregister(HA_ERR_FIRST, + HA_ERR_FIRST+ array_elements(handler_error_messages)-1); +} diff --git a/mysys/my_init.c b/mysys/my_init.c index 45601f54cfa..5ec22256e15 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -77,6 +77,7 @@ my_bool my_init(void) mysys_usage_id++; my_umask= 0660; /* Default umask for new files */ my_umask_dir= 0700; /* Default umask for new directories */ + init_glob_errs(); #if defined(THREAD) && defined(SAFE_MUTEX) safe_mutex_global_init(); /* Must be called early */ #endif diff --git a/mysys/my_open.c b/mysys/my_open.c index 7efaed90e2d..750979c1146 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -71,6 +71,7 @@ File my_open(const char *FileName, int Flags, myf MyFlags) #else fd = open((char *) FileName, Flags); #endif + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_OPEN, EE_FILENOTFOUND, MyFlags)); } /* my_open */ @@ -124,61 +125,65 @@ int my_close(File fd, myf MyFlags) SYNOPSIS my_register_filename() - fd - FileName - type_file_type + fd File number opened, -1 if error on open + FileName File name + type_file_type How file was created + error_message_number Error message number if caller got error (fd == -1) + MyFlags Flags for my_close() + + RETURN + -1 error + # Filenumber + */ File my_register_filename(File fd, const char *FileName, enum file_type type_of_file, uint error_message_number, myf MyFlags) { + DBUG_ENTER("my_register_filename"); if ((int) fd >= 0) { if ((uint) fd >= my_file_limit) { #if defined(THREAD) && !defined(HAVE_PREAD) - (void) my_close(fd,MyFlags); - my_errno=EMFILE; - if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) - my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - return(-1); -#endif + my_errno= EMFILE; +#else thread_safe_increment(my_file_opened,&THR_LOCK_open); - return(fd); /* safeguard */ + DBUG_RETURN(fd); /* safeguard */ +#endif } - pthread_mutex_lock(&THR_LOCK_open); - if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags))) + else { - my_file_opened++; - my_file_info[fd].type = type_of_file; + pthread_mutex_lock(&THR_LOCK_open); + if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags))) + { + my_file_opened++; + my_file_info[fd].type = type_of_file; #if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); + pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); #endif + pthread_mutex_unlock(&THR_LOCK_open); + DBUG_PRINT("exit",("fd: %d",fd)); + DBUG_RETURN(fd); + } pthread_mutex_unlock(&THR_LOCK_open); - DBUG_PRINT("exit",("fd: %d",fd)); - return(fd); + my_errno= ENOMEM; } - pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); - fd= -1; - my_errno=ENOMEM; } else - my_errno=errno; - DBUG_PRINT("error",("Got error %d on open",my_errno)); - if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) { - if (my_errno == EMFILE) { - DBUG_PRINT("error",("print err: %d",EE_OUT_OF_FILERESOURCES)); - my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - } else { - DBUG_PRINT("error",("print err: %d",error_message_number)); - my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - } + my_errno= errno; + + DBUG_PRINT("error",("Got error %d on open", my_errno)); + if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) + { + if (my_errno == EMFILE) + error_message_number= EE_OUT_OF_FILERESOURCES; + DBUG_PRINT("error",("print err: %d",error_message_number)); + my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG), + FileName, my_errno); } - return(fd); + DBUG_RETURN(-1); } #ifdef __WIN__ diff --git a/sql-bench/example b/sql-bench/example index df2a9b8be69..cb39fad819e 100644 --- a/sql-bench/example +++ b/sql-bench/example @@ -6,15 +6,14 @@ machine="Linux-x64" # InnoDB tests -./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_log_file_size=100M" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log - -./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_log_file_size=100M" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log +./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_additional_mem_pool_size=20M --innodb_log_file_size=1000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=2 --skip-innodb-doblewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log +./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_additional_mem_pool_size=20M --innodb_log_file_size=1000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=2 --skip-innodb-doblewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log # MyISAM tests -./run-all-tests --suffix=-myisam --comments="Engine=MyISAM key_buffer_size=16M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --log +./run-all-tests --suffix=-myisam --comments="Engine=MyISAM key_buffer_size=256M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --log -./run-all-tests --suffix=_fast-myisam --comments="Engine=MyISAM key_buffer_size=16M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log +./run-all-tests --suffix=_fast-myisam --comments="Engine=MyISAM key_buffer_size=256M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log compare-results --relative output/RUN-mysql-myisam-* output/RUN-mysql_fast-myisam* output/RUN-mysql* diff --git a/sql/handler.cc b/sql/handler.cc index d768ad4dc5d..19af3397c13 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -68,7 +68,7 @@ static const LEX_STRING sys_table_aliases[]= }; const char *ha_row_type[] = { - "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "?","?","?" + "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE","?","?","?" }; const char *tx_isolation_names[] = @@ -296,7 +296,8 @@ handler *get_ha_partition(partition_info *part_info) 0 OK != 0 Error */ -static int ha_init_errors(void) + +int ha_init_errors(void) { #define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg) const char **errmsgs; @@ -511,9 +512,6 @@ int ha_init() int error= 0; DBUG_ENTER("ha_init"); - if (ha_init_errors()) - DBUG_RETURN(1); - DBUG_ASSERT(total_ha < MAX_HA); /* Check if there is a transaction-capable storage engine besides the diff --git a/sql/handler.h b/sql/handler.h index 9eed5ce7f2e..3ea1bfb55d4 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -281,7 +281,7 @@ enum legacy_db_type enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED, - ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, ROW_TYPE_PAGES }; + ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, ROW_TYPE_PAGE }; enum enum_binlog_func { BFN_RESET_LOGS= 1, @@ -324,6 +324,7 @@ enum enum_binlog_command { #define HA_CREATE_USED_PASSWORD (1L << 17) #define HA_CREATE_USED_CONNECTION (1L << 18) #define HA_CREATE_USED_KEY_BLOCK_SIZE (1L << 19) +#define HA_CREATE_USED_TRANSACTIONAL (1L << 20) typedef ulonglong my_xid; // this line is the same as in log_event.h #define MYSQL_XID_PREFIX "MySQLXid" @@ -754,6 +755,7 @@ class partition_info; struct st_partition_iter; #define NOT_A_PARTITION_ID ((uint32)-1) +enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES }; typedef struct st_ha_create_information { @@ -776,6 +778,8 @@ typedef struct st_ha_create_information uint options; /* OR of HA_CREATE_ options */ uint merge_insert_method; uint extra_size; /* length of extra data segment */ + /* 0 not used, 1 if not transactional, 2 if transactional */ + enum ha_choice transactional; bool table_existed; /* 1 in create if table existed */ bool frm_only; /* 1 if no ha_create_table() */ bool varchar; /* 1 if table has a VARCHAR */ @@ -1776,6 +1780,7 @@ static inline bool ha_storage_engine_is_enabled(const handlerton *db_type) } /* basic stuff */ +int ha_init_errors(void); int ha_init(void); int ha_end(void); int ha_initialize_handlerton(st_plugin_int *plugin); diff --git a/sql/lex.h b/sql/lex.h index e311379120d..5f24b06cf05 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -380,6 +380,7 @@ static SYMBOL symbols[] = { { "OWNER", SYM(OWNER_SYM)}, { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, { "PARSER", SYM(PARSER_SYM)}, + { "PAGE", SYM(PAGE_SYM)}, { "PARTIAL", SYM(PARTIAL)}, { "PARTITION", SYM(PARTITION_SYM)}, { "PARTITIONING", SYM(PARTITIONING_SYM)}, @@ -529,6 +530,7 @@ static SYMBOL symbols[] = { { "TO", SYM(TO_SYM)}, { "TRAILING", SYM(TRAILING)}, { "TRANSACTION", SYM(TRANSACTION_SYM)}, + { "TRANSACTIONAL", SYM(TRANSACTIONAL_SYM)}, { "TRIGGER", SYM(TRIGGER_SYM)}, { "TRIGGERS", SYM(TRIGGERS_SYM)}, { "TRUE", SYM(TRUE_SYM)}, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 5c915f5474b..57d8a89a3c1 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3417,6 +3417,10 @@ server."); using_update_log=1; } + /* Allow storage engine to give real error messages */ + if (ha_init_errors()) + DBUG_RETURN(1); + if (plugin_init(&defaults_argc, defaults_argv, (opt_noacl ? PLUGIN_INIT_SKIP_PLUGIN_TABLE : 0) | (opt_help ? PLUGIN_INIT_SKIP_INITIALIZATION : 0))) diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 903c8ab74f1..dcaa466011e 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -1323,6 +1323,11 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet, packet->append(STRING_WITH_LEN(" ROW_FORMAT=")); packet->append(ha_row_type[(uint) share->row_type]); } + if (share->transactional != HA_CHOICE_UNDEF) + { + packet->append(STRING_WITH_LEN(" TRANSACTIONAL=")); + packet->append(share->transactional == HA_CHOICE_YES ? "1" : "0", 1); + } if (table->s->key_block_size) { char *end; @@ -2896,8 +2901,8 @@ static int get_schema_tables_record(THD *thd, struct st_table_list *tables, case ROW_TYPE_COMPACT: tmp_buff= "Compact"; break; - case ROW_TYPE_PAGES: - tmp_buff= "Paged"; + case ROW_TYPE_PAGE: + tmp_buff= "Page"; break; } table->field[6]->store(tmp_buff, strlen(tmp_buff), cs); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 047c210d6a5..6fc1c235ebf 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -5334,6 +5334,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, } if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) create_info->key_block_size= table->s->key_block_size; + if (!(used_fields & HA_CREATE_USED_TRANSACTIONAL)) + create_info->transactional= table->s->transactional; if (!create_info->tablespace && create_info->storage_media != HA_SM_MEMORY) { @@ -6989,7 +6991,6 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list) DBUG_ENTER("mysql_recreate_table"); bzero((char*) &create_info, sizeof(create_info)); - create_info.db_type= 0; create_info.row_type=ROW_TYPE_NOT_USED; create_info.default_table_charset=default_charset_info; /* Force alter table to recreate table */ diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 3babaa2aa7a..d521b8e8a12 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -857,6 +857,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token OUT_SYM /* SQL-2003-R */ %token OWNER_SYM %token PACK_KEYS_SYM +%token PAGE_SYM %token PARAM_MARKER %token PARSER_SYM %token PARTIAL /* SQL-2003-N */ @@ -1009,6 +1010,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TO_SYM /* SQL-2003-R */ %token TRAILING /* SQL-2003-R */ %token TRANSACTION_SYM +%token TRANSACTIONAL_SYM %token TRIGGERS_SYM %token TRIGGER_SYM /* SQL-2003-R */ %token TRIM /* SQL-2003-N */ @@ -4354,6 +4356,12 @@ create_table_option: Lex->create_info.used_fields|= HA_CREATE_USED_KEY_BLOCK_SIZE; Lex->create_info.key_block_size= $3; } + | TRANSACTIONAL_SYM opt_equal ulong_num + { + Lex->create_info.used_fields|= HA_CREATE_USED_TRANSACTIONAL; + Lex->create_info.transactional= ($3 != 0 ? HA_CHOICE_YES : + HA_CHOICE_NO); + } ; default_charset: @@ -4432,7 +4440,8 @@ row_types: | DYNAMIC_SYM { $$= ROW_TYPE_DYNAMIC; } | COMPRESSED_SYM { $$= ROW_TYPE_COMPRESSED; } | REDUNDANT_SYM { $$= ROW_TYPE_REDUNDANT; } - | COMPACT_SYM { $$= ROW_TYPE_COMPACT; }; + | COMPACT_SYM { $$= ROW_TYPE_COMPACT; } + | PAGE_SYM { $$= ROW_TYPE_PAGE; }; merge_insert_types: NO_SYM { $$= MERGE_INSERT_DISABLED; } @@ -9998,6 +10007,7 @@ keyword_sp: | ONE_SHOT_SYM {} | ONE_SYM {} | PACK_KEYS_SYM {} + | PAGE_SYM {} | PARTIAL {} | PARTITIONING_SYM {} | PARTITIONS_SYM {} @@ -10067,6 +10077,7 @@ keyword_sp: | TEXT_SYM {} | THAN_SYM {} | TRANSACTION_SYM {} + | TRANSACTIONAL_SYM {} | TRIGGERS_SYM {} | TIMESTAMP {} | TIMESTAMP_ADD {} diff --git a/sql/table.cc b/sql/table.cc index 45ca17afce4..72a4d4974eb 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -535,7 +535,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, if (!head[32]) // New frm file in 3.23 { share->avg_row_length= uint4korr(head+34); - share-> row_type= (row_type) head[40]; + share->transactional= (ha_choice) head[39]; + share->row_type= (row_type) head[40]; share->table_charset= get_charset((uint) head[38],MYF(0)); share->null_field_first= 1; } @@ -2230,7 +2231,9 @@ File create_frm(THD *thd, const char *name, const char *db, int2store(fileinfo+16,reclength); int4store(fileinfo+18,create_info->max_rows); int4store(fileinfo+22,create_info->min_rows); + /* fileinfo[26] is set in mysql_create_frm() */ fileinfo[27]=2; // Use long pack-fields + /* fileinfo[28 & 29] is set to key_info_length in mysql_create_frm() */ create_info->table_options|=HA_OPTION_LONG_BLOB_PTR; // Use portable blob pointers int2store(fileinfo+30,create_info->table_options); fileinfo[32]=0; // No filename anymore @@ -2238,8 +2241,9 @@ File create_frm(THD *thd, const char *name, const char *db, int4store(fileinfo+34,create_info->avg_row_length); fileinfo[38]= (create_info->default_table_charset ? create_info->default_table_charset->number : 0); + fileinfo[39]= (uchar) create_info->transactional; fileinfo[40]= (uchar) create_info->row_type; - /* Next few bytes were for RAID support */ + /* Next few bytes where for RAID support */ fileinfo[41]= 0; fileinfo[42]= 0; fileinfo[43]= 0; diff --git a/sql/table.h b/sql/table.h index 90c07979512..1843c0038c7 100644 --- a/sql/table.h +++ b/sql/table.h @@ -180,6 +180,7 @@ typedef struct st_table_share } enum row_type row_type; /* How rows are stored */ enum tmp_table_type tmp_table; + enum ha_choice transactional; uint ref_count; /* How many TABLE objects uses this */ uint open_count; /* Number of tables in open list */ diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index fbb25584910..2d11d2f470b 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -33,7 +33,7 @@ SUBDIRS = . unittest EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt plug.in pkgdata_DATA = ma_test_all ma_test_all.res pkglib_LIBRARIES = libmaria.a -bin_PROGRAMS = maria_chk maria_pack maria_ftdump +bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_read_log maria_chk_DEPENDENCIES= $(LIBRARIES) # Only reason to link with libmyisam.a here is that it's where some fulltext # pieces are (but soon we'll remove fulltext dependencies from Maria). @@ -49,6 +49,12 @@ maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +maria_read_log_DEPENDENCIES=$(LIBRARIES) +maria_read_log_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \ diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 0e629d6638c..da701feda10 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -437,32 +437,38 @@ volatile int *_ma_killed_ptr(HA_CHECK *param) void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) { + va_list args; + DBUG_ENTER("_ma_check_print_error"); param->error_printed |= 1; param->out_flag |= O_DATA_LOST; - va_list args; va_start(args, fmt); _ma_check_print_msg(param, "error", fmt, args); va_end(args); + DBUG_VOID_RETURN; } void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...) { va_list args; + DBUG_ENTER("_ma_check_print_info"); va_start(args, fmt); _ma_check_print_msg(param, "info", fmt, args); va_end(args); + DBUG_VOID_RETURN; } void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) { + va_list args; + DBUG_ENTER("_ma_check_print_warning"); param->warning_printed= 1; param->out_flag |= O_DATA_LOST; - va_list args; va_start(args, fmt); _ma_check_print_msg(param, "warning", fmt, args); va_end(args); + DBUG_VOID_RETURN; } } @@ -473,7 +479,7 @@ handler(hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | HA_FILE_BASED | HA_CAN_GEOMETRY | MARIA_CANNOT_ROLLBACK | - HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | + HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), can_enable_indexes(1) {} @@ -691,9 +697,19 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked) info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0)); - save_transactional= file->s->base.transactional; if ((data_file_type= file->s->data_file_type) != STATIC_RECORD) int_table_flags |= HA_REC_NOT_IN_SEQ; + if (!file->s->base.born_transactional) + { + /* + INSERT DELAYED cannot work with transactional tables (because it cannot + stand up to "when client gets ok the data is safe on disk": the record + may not even be inserted). In the future, we could enable it back (as a + client doing INSERT DELAYED knows the specificities; but we then should + make sure to regularly commit in the delayed_insert thread). + */ + int_table_flags|= HA_CAN_INSERT_DELAYED; + } if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) int_table_flags |= HA_HAS_CHECKSUM; @@ -1067,16 +1083,6 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) param.out_flag= 0; strmov(fixed_name, file->s->open_file_name); -#ifndef TO_BE_FIXED - /* QQ: Until we have repair for block format, lie that it succeded */ - if (file->s->data_file_type == BLOCK_RECORD) - { - if (do_optimize) - DBUG_RETURN(analyze(thd, (HA_CHECK_OPT*) 0)); - DBUG_RETURN(HA_ADMIN_OK); - } -#endif - // Don't lock tables if we have used LOCK TABLE if (!thd->locked_tables && maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK)) @@ -1101,7 +1107,9 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) local_testflag |= T_STATISTICS; param.testflag |= T_STATISTICS; // We get this for free statistics_done= 1; - if (thd->variables.maria_repair_threads > 1) + /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */ + if (thd->variables.maria_repair_threads > 1 && + file->s->data_file_type != BLOCK_RECORD) { char buf[40]; /* TODO: respect maria_repair_threads variable */ @@ -1180,8 +1188,6 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) llstr(rows, llbuff), llstr(file->state->records, llbuff2)); } - if (!error) - error= _ma_repair_write_log_record(¶m, file); } else { @@ -1853,30 +1859,19 @@ int ha_maria::external_lock(THD *thd, int lock_type) { TRN *trn= THD_TRN; DBUG_ENTER("ha_maria::external_lock"); - if (!save_transactional) + /* + We don't test now_transactional because it may vary between lock/unlock + and thus confuse our reference counting. + It is critical to skip non-transactional tables: user-visible temporary + tables get an external_lock() when read/written for the first time, but no + corresponding unlock (they just stay locked and are later dropped while + locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp" + would never commit as its "locked_tables" count would stay 1. + */ + if (!file->s->base.born_transactional) goto skip_transaction; - if (!trn && lock_type != F_UNLCK) /* no transaction yet - open it now */ - { - trn= trnman_new_trn(& thd->mysys_var->mutex, - & thd->mysys_var->suspend, - thd->thread_stack + STACK_DIRECTION * - (my_thread_stack_size - STACK_MIN_SIZE)); - if (!trn) - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - - DBUG_PRINT("info", ("THD_TRN set to 0x%lx", (ulong)trn)); - THD_TRN= trn; - if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) - trans_register_ha(thd, TRUE, maria_hton); - } if (lock_type != F_UNLCK) { - this->file->trn= trn; - if (!trnman_increment_locked_tables(trn)) - { - trans_register_ha(thd, FALSE, maria_hton); - trnman_new_statement(trn); - } if (!thd->transaction.on) { /* @@ -1888,11 +1883,32 @@ int ha_maria::external_lock(THD *thd, int lock_type) tons of archived logs to roll-forward, we could then not disable REDOs/UNDOs in this case. */ - file->s->base.transactional= FALSE; + _ma_tmp_disable_logging_for_table(file->s); + } + if (!trn) /* no transaction yet - open it now */ + { + trn= trnman_new_trn(& thd->mysys_var->mutex, + & thd->mysys_var->suspend, + thd->thread_stack + STACK_DIRECTION * + (my_thread_stack_size - STACK_MIN_SIZE)); + if (unlikely(!trn)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + DBUG_PRINT("info", ("THD_TRN set to 0x%lx", (ulong)trn)); + THD_TRN= trn; + if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + trans_register_ha(thd, TRUE, maria_hton); + } + this->file->trn= trn; + if (!trnman_increment_locked_tables(trn)) + { + trans_register_ha(thd, FALSE, maria_hton); + trnman_new_statement(trn); } } else { + _ma_reenable_logging_for_table(file->s); this->file->trn= 0; /* TODO: remove it also in commit and rollback */ if (trn && trnman_has_locked_tables(trn)) { @@ -1913,7 +1929,6 @@ int ha_maria::external_lock(THD *thd, int lock_type) #endif } } - file->s->base.transactional= save_transactional; } skip_transaction: DBUG_RETURN(maria_lock_database(file, !table->s->tmp_table ? @@ -1924,7 +1939,7 @@ skip_transaction: int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type) { TRN *trn= THD_TRN; - if (save_transactional) + if (file->s->base.born_transactional) { DBUG_ASSERT(trn); // this may be called only after external_lock() DBUG_ASSERT(trnman_has_locked_tables(trn)); @@ -1969,7 +1984,7 @@ enum row_type ha_maria::get_row_type() const switch (file->s->data_file_type) { case STATIC_RECORD: return ROW_TYPE_FIXED; case DYNAMIC_RECORD: return ROW_TYPE_DYNAMIC; - case BLOCK_RECORD: return ROW_TYPE_PAGES; + case BLOCK_RECORD: return ROW_TYPE_PAGE; case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED; default: return ROW_TYPE_NOT_USED; } @@ -1978,6 +1993,8 @@ enum row_type ha_maria::get_row_type() const static enum data_file_type maria_row_type(HA_CREATE_INFO *info) { + if (info->transactional == HA_CHOICE_YES) + return BLOCK_RECORD; switch (info->row_type) { case ROW_TYPE_FIXED: return STATIC_RECORD; case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD; @@ -2022,7 +2039,8 @@ int ha_maria::create(const char *name, register TABLE *table_arg, share->avg_row_length); create_info.data_file_name= ha_create_info->data_file_name; create_info.index_file_name= ha_create_info->index_file_name; - create_info.transactional= row_type == BLOCK_RECORD; + create_info.transactional= (row_type == BLOCK_RECORD && + ha_create_info->transactional != HA_CHOICE_NO); if (ha_create_info->options & HA_LEX_CREATE_TMP_TABLE) create_flags|= HA_CREATE_TMP_TABLE; @@ -2231,7 +2249,7 @@ static int ha_maria_init(void *p) maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES; bzero(maria_log_pagecache, sizeof(*maria_log_pagecache)); maria_data_root= mysql_real_data_home; - res= maria_init() || ma_control_file_create_or_open() || + res= maria_init() || ma_control_file_create_or_open(TRUE) || (init_pagecache(maria_log_pagecache, TRANSLOG_PAGECACHE_SIZE, 0, 0, TRANSLOG_PAGE_SIZE) == 0) || diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index 22741ddb24d..c97919ab86a 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -39,11 +39,6 @@ class ha_maria :public handler char *data_file_name, *index_file_name; enum data_file_type data_file_type; bool can_enable_indexes; - /** - @brief for temporarily disabling table's transactionality - (if THD::transaction::on is false), remember the original value here - */ - bool save_transactional; int repair(THD * thd, HA_CHECK ¶m, bool optimize); public: diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index e781c47c241..9b2741037c5 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -106,6 +106,19 @@ put on disk even if they are not in the page cache). - When explicitely requested (for example on backup or after recvoery, to simplify things) + + The flow of writing a row is that: + - Lock the bitmap + - Decide which data pages we will write to + - Mark them full in the bitmap page so that other threads do not try to + use the same data pages as us + - We unlock the bitmap + - Write the data pages + - Lock the bitmap + - Correct the bitmap page with the true final occupation of the data + pages (that is, we marked pages full but when we are done we realize + we didn't fill them) + - Unlock the bitmap. */ #include "maria_def.h" @@ -283,7 +296,7 @@ void _ma_bitmap_delete_all(MARIA_SHARE *share) { bzero(bitmap->map, share->block_size); memcpy(bitmap->map + share->block_size - 2, maria_bitmap_marker, 2); - bitmap->changed= 0; + bitmap->changed= 1; bitmap->page= 0; bitmap->used_size= bitmap->total_size; } diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 0c502641d9b..6e2dfc4fe15 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -557,7 +557,8 @@ static my_bool check_if_zero(uchar *pos, uint length) SYNOPSIS _ma_unpin_all_pages() info Maria handler - undo_lsn LSN for undo pages. 0 if we shouldn't write undo (error) + undo_lsn LSN for undo pages. LSN_IMPOSSIBLE if we shouldn't write undo + (error) NOTE We unpin pages in the reverse order as they where pinned; This may not @@ -580,17 +581,10 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn) DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn)); /* True if not disk error */ - DBUG_ASSERT(undo_lsn != 0 || !info->s->base.transactional); + DBUG_ASSERT((undo_lsn != LSN_IMPOSSIBLE) || !info->s->now_transactional); - if (!info->s->base.transactional) - { - /* - If this is a transactional table but with transactionality temporarily - disabled (like in ALTER TABLE) we need to give a sensible LSN to pages - and not 0. If this is not a transactional table it will reduce to 0. - */ - undo_lsn= info->s->state.create_rename_lsn; - } + if (!info->s->now_transactional) + undo_lsn= LSN_IMPOSSIBLE; /* don't try to set a LSN on pages */ while (pinned_page-- != page_link) pagecache_unlock_by_link(info->s->pagecache, pinned_page->link, @@ -866,7 +860,7 @@ static void calc_record_size(MARIA_HA *info, const uchar *record, compact_page() buff Page to compact block_size Size of page - recnr Put empty data after this row + rownr Put empty data after this row extend_block If 1, extend the block at 'rownr' to cover the whole block. */ @@ -978,6 +972,13 @@ static void compact_page(uchar *buff, uint block_size, uint rownr, uint length= (uint) (dir - buff) - start_of_found_block; int2store(dir+2, length); } + else + { + /* + TODO: + Update (buff + EMPTY_SPACE_OFFSET) if we remove transid from rows + */ + } buff[PAGE_TYPE_OFFSET]&= ~(uchar) PAGE_CAN_BE_COMPACTED; } DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size);); @@ -985,6 +986,37 @@ static void compact_page(uchar *buff, uint block_size, uint rownr, } +/* + Create an empty tail or head page + + SYNOPSIS + make_empty_page() + buff Page buffer + block_size Block size + page_type HEAD_PAGE or TAIL_PAGE + + NOTES + EMPTY_SPACE is not updated +*/ + +static void make_empty_page(byte *buff, uint block_size, uint page_type) +{ + + bzero(buff, PAGE_HEADER_SIZE); + /* + We zero the rest of the block to avoid getting old memory information + to disk and to allow the file to be compressed better if archived. + The rest of the code does not assume the block is zeroed above + PAGE_OVERHEAD_SIZE + */ + bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE); + buff[PAGE_TYPE_OFFSET]= (byte) page_type; + buff[DIR_COUNT_OFFSET]= 1; + /* Store position to the first row */ + int2store(buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE, + PAGE_HEADER_SIZE); +} + /* Read or initialize new head or tail page @@ -1017,6 +1049,7 @@ struct st_row_pos_info uint empty_space; /* Space left on page */ }; + static my_bool get_head_or_tail_page(MARIA_HA *info, MARIA_BITMAP_BLOCK *block, uchar *buff, uint length, uint page_type, @@ -1033,25 +1066,12 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, if (block->org_bitmap_value == 0) /* Empty block */ { /* New page */ - bzero(buff, PAGE_HEADER_SIZE); - - /* - We zero the rest of the block to avoid getting old memory information - to disk and to allow the file to be compressed better if archived. - The rest of the code does not assume the block is zeroed above - PAGE_OVERHEAD_SIZE - */ - bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE); - - buff[PAGE_TYPE_OFFSET]= (uchar) page_type; - buff[DIR_COUNT_OFFSET]= 1; + make_empty_page(buff, block_size, page_type); res->buff= buff; res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE); res->data= (buff + PAGE_HEADER_SIZE); res->dir= res->data + res->length; res->rownr= 0; - /* Store position to the first row */ - int2store(res->dir, PAGE_HEADER_SIZE); DBUG_ASSERT(length <= res->length); } else @@ -1444,7 +1464,7 @@ static my_bool free_full_page_range(MARIA_HA *info, ulonglong page, uint count) page, count, PAGECACHE_LOCK_WRITE, 0)) res= 1; - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; DBUG_ASSERT(info->trn->rec_lsn); @@ -1708,8 +1728,12 @@ static my_bool write_block_record(MARIA_HA *info, uint length= (uint) (data - row_pos->data); DBUG_PRINT("info", ("head length: %u", length)); if (length < info->s->base.min_row_length) + { + uint diff_length= info->s->base.min_row_length - length; + bzero(data, diff_length); + data+= diff_length; length= info->s->base.min_row_length; - + } int2store(row_pos->dir + 2, length); /* update empty space at start of block */ row_pos->empty_space-= length; @@ -1787,7 +1811,7 @@ static my_bool write_block_record(MARIA_HA *info, ulong length; ulong data_length= (tmp_data - info->rec_buff); -#ifdef SANITY_CHECK +#ifdef SANITY_CHECKS if (cur_block->sub_blocks == 1) goto crashed; /* no reserved full or tails */ #endif @@ -1823,8 +1847,8 @@ static my_bool write_block_record(MARIA_HA *info, FULL_PAGE_SIZE(block_size))) && cur_block->page_count) { -#ifdef SANITY_CHECK - if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_BIT)) +#ifdef SANITY_CHECKS + if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_USED)) goto crashed; #endif data_length-= length; @@ -1838,7 +1862,7 @@ static my_bool write_block_record(MARIA_HA *info, /* Skip empty filler block */ cur_block++; } -#ifdef SANITY_CHECK +#ifdef SANITY_CHECKS if ((cur_block >= end_block)) goto crashed; #endif @@ -1951,15 +1975,15 @@ static my_bool write_block_record(MARIA_HA *info, head_block+1, bitmap_blocks->count - 1); } - if (share->base.transactional) + if (share->now_transactional) { uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; size_t data_length= (size_t) (data - row_pos->data); /* Log REDO changes of head page */ - page_store(log_data+ FILEID_STORE_SIZE, head_block->page); - dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, + page_store(log_data + FILEID_STORE_SIZE, head_block->page); + dirpos_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, row_pos->rownr); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); @@ -1996,7 +2020,7 @@ static my_bool write_block_record(MARIA_HA *info, else push_dynamic(&info->pinned_pages, (void*) &page_link); - if (share->base.transactional && (tmp_data_used || blob_full_pages_exists)) + if (share->now_transactional && (tmp_data_used || blob_full_pages_exists)) { /* Log REDO writes for all full pages (head part and all blobs) @@ -2093,7 +2117,7 @@ static my_bool write_block_record(MARIA_HA *info, } /* Write UNDO record */ - if (share->base.transactional) + if (share->now_transactional) { uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; @@ -2183,12 +2207,22 @@ crashed: disk_err: /** @todo RECOVERY we are going to let dirty pages go to disk while we have - logged UNDO, this violates WAL. If we have not written any full pages, - all dirty pages are pinned so we could just delete them from the - pagecache. Moreover, we have written some REDOs without a closing UNDO, + logged UNDO, this violates WAL. We must mark the table corrupted! + + @todo RECOVERY we have written some REDOs without a closing UNDO, it's possible that a next operation by this transaction succeeds and then Recovery would glue the "orphan REDOs" to the succeeded operation and - execute the failed REDOs. + execute the failed REDOs. We need some mark "abort this group" in the + log, or mark the table corrupted (then user will repair it and thus REDOs + will be skipped). + + @todo RECOVERY to not let write errors go unnoticed, pagecache_write() + should take a MARIA_HA* in argument, and it it + fails when flushing a page to disk it should call + (*the_maria_ha->write_error_func)(the_maria_ha) + and this hook will mark the table corrupted. + Maybe hook should be stored in the pagecache's block structure, or in a + hash "file->maria_ha*". */ /* Unpin all pinned pages to not cause problems for disk cache */ _ma_unpin_all_pages(info, 0); @@ -2300,7 +2334,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) } } - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; @@ -2460,6 +2494,76 @@ err: /* + Delete a directory entry + + SYNOPSIS + delete_dir_entry() + buff Page buffer + block_size Block size + record_number Record number to delete + empty_space Empty space on page after delete + + RETURN + -1 Error on page + 0 ok + 1 Page is now empty +*/ + +static int delete_dir_entry(byte *buff, uint block_size, uint record_number, + uint *empty_space_res) +{ + uint number_of_records= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; + uint length, empty_space; + byte *dir; + DBUG_ENTER("delete_dir_entry"); + +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - + PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE)) + { + DBUG_PRINT("error", ("record_number: %u number_of_records: %u", + record_number, number_of_records)); + + DBUG_RETURN(-1); + } +#endif + + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + dir[0]= dir[1]= 0; /* Delete entry */ + length= uint2korr(dir + 2); + + if (record_number == number_of_records - 1) + { + /* Delete this entry and all following empty directory entries */ + byte *end= buff + block_size - PAGE_SUFFIX_SIZE; + do + { + number_of_records--; + dir+= DIR_ENTRY_SIZE; + empty_space+= DIR_ENTRY_SIZE; + } while (dir < end && dir[0] == 0 && dir[1] == 0); + buff[DIR_COUNT_OFFSET]= (byte) (uchar) number_of_records; + } + empty_space+= length; + if (number_of_records != 0) + { + /* Update directory */ + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED; + + *empty_space_res= empty_space; + DBUG_RETURN(0); + } + buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE; + *empty_space_res= block_size; + DBUG_RETURN(1); +} + + +/* Delete a head a tail part SYNOPSIS @@ -2481,11 +2585,12 @@ static my_bool delete_head_or_tail(MARIA_HA *info, my_bool head) { MARIA_SHARE *share= info->s; - uint number_of_records, empty_space, length; + uint empty_space; uint block_size= share->block_size; - uchar *buff, *dir; + uchar *buff; LSN lsn; MARIA_PINNED_PAGE page_link; + int res; DBUG_ENTER("delete_head_or_tail"); info->keyread_buff_used= 1; @@ -2499,60 +2604,29 @@ static my_bool delete_head_or_tail(MARIA_HA *info, page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; push_dynamic(&info->pinned_pages, (void*) &page_link); - number_of_records= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; -#ifdef SANITY_CHECKS - if (record_number >= number_of_records || - record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - - PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE)) - { - DBUG_PRINT("error", ("record_number: %u number_of_records: %u", - record_number, number_of_records)); + res= delete_dir_entry(buff, block_size, record_number, &empty_space); + if (res < 0) DBUG_RETURN(1); - } -#endif - - dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); - dir[0]= dir[1]= 0; /* Delete entry */ - length= uint2korr(dir + 2); - empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); - - if (record_number == number_of_records - 1) - { - /* Delete this entry and all following empty directory entries */ - uchar *end= buff + block_size - PAGE_SUFFIX_SIZE; - do - { - number_of_records--; - dir+= DIR_ENTRY_SIZE; - empty_space+= DIR_ENTRY_SIZE; - } while (dir < end && dir[0] == 0 && dir[1] == 0); - buff[DIR_COUNT_OFFSET]= (uchar) (uchar) number_of_records; - } - empty_space+= length; - if (number_of_records != 0) + if (res == 0) { uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - - /* Update directory */ - int2store(buff + EMPTY_SPACE_OFFSET, empty_space); - buff[PAGE_TYPE_OFFSET]|= (uchar) PAGE_CAN_BE_COMPACTED; - DBUG_ASSERT(share->pagecache->block_size == block_size); - - /* Log REDO data */ - page_store(log_data+ FILEID_STORE_SIZE, page); - dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, + if (info->s->now_transactional) + { + /* Log REDO data */ + page_store(log_data+ FILEID_STORE_SIZE, page); + dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, record_number); - - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); - if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD : - LOGREC_REDO_PURGE_ROW_TAIL), - info->trn, share, sizeof(log_data), - TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) - DBUG_RETURN(1); + + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD : + LOGREC_REDO_PURGE_ROW_TAIL), + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + DBUG_RETURN(1); + } if (pagecache_write(share->pagecache, &info->dfile, page, 0, buff, share->page_type, @@ -2560,11 +2634,6 @@ static my_bool delete_head_or_tail(MARIA_HA *info, PAGECACHE_PIN_LEFT_PINNED, PAGECACHE_WRITE_DELAY, &page_link.link)) DBUG_RETURN(1); - - /* Change the lock used when we read the page */ - page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; - set_dynamic(&info->pinned_pages, (void*) &page_link, - info->pinned_pages.elements-1); } else { @@ -2572,19 +2641,36 @@ static my_bool delete_head_or_tail(MARIA_HA *info, PAGE_STORE_SIZE + PAGERANGE_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - pagerange_store(log_data + FILEID_STORE_SIZE, 1); - page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); - pagerange_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + - PAGE_STORE_SIZE, 1); - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); - if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, - info->trn, share, sizeof(log_data), - TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) + if (info->s->now_transactional) + { + pagerange_store(log_data + FILEID_STORE_SIZE, 1); + page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); + pagerange_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + + PAGE_STORE_SIZE, 1); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + DBUG_RETURN(1); + } + /* Write the empty page (needed only for REPAIR to work) */ + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, share->page_type, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, &page_link.link)) DBUG_RETURN(1); + DBUG_ASSERT(empty_space >= info->s->bitmap.sizes[0]); } + /* Change the lock used when we read the page */ + page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; + set_dynamic(&info->pinned_pages, (void*) &page_link, + info->pinned_pages.elements-1); + DBUG_PRINT("info", ("empty_space: %u", empty_space)); DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space)); } @@ -2648,7 +2734,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) if (info->cur_row.extents && free_full_pages(info, &info->cur_row)) goto err; - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + @@ -2803,7 +2889,8 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, extent->extent+= ROW_EXTENT_SIZE; extent->page= uint5korr(extent->extent); page_count= uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE); - DBUG_ASSERT(page_count != 0); + if (!page_count) + goto crashed; extent->tail= page_count & TAIL_BIT; extent->page_count= (page_count & ~TAIL_BIT); extent->first_extent= 0; @@ -2826,7 +2913,8 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, if (!extent->tail) { /* Full data page */ - DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == BLOB_PAGE); + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE) + goto crashed; extent->page++; /* point to next page */ extent->page_count--; *end_of_data= buff + share->block_size; @@ -2835,7 +2923,8 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, } /* Found tail. page_count is in this case the position in the tail page */ - DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == TAIL_PAGE); + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != TAIL_PAGE) + goto crashed; *(extent->tail_positions++)= ma_recordpos(extent->page, extent->page_count); info->cur_row.tail_count++; /* For maria_chk */ @@ -2957,7 +3046,6 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, MARIA_COLUMNDEF *column, *end_column; DBUG_ENTER("_ma_read_block_record2"); - LINT_INIT(field_lengths); LINT_INIT(field_length_data); LINT_INIT(blob_buffer); @@ -3003,6 +3091,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, } extent.first_extent= 1; + field_lengths= 0; if (share->base.max_field_lengths) { get_key_length(field_lengths, data); @@ -3037,7 +3126,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, Read row extents (note that first extent was already read into info->cur_row.extents above) */ - if (row_extents) + if (row_extents > 1) { if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE, (row_extents - 1) * ROW_EXTENT_SIZE, @@ -3062,7 +3151,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, } /* Read array of field lengths. This may be stored in several extents */ - if (share->base.max_field_lengths) + if (field_lengths) { field_length_data= info->cur_row.field_lengths; if (read_long_data(info, field_length_data, field_lengths, &extent, @@ -3468,6 +3557,8 @@ restart_bitmap_scan: DBUG_PRINT("error", ("Wrong page header")); DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); } + DBUG_PRINT("info", ("Page %lu has %u rows", + (ulong) page, info->scan.number_of_rows)); info->scan.dir= (info->scan.page_buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); info->scan.dir_end= (info->scan.dir - @@ -3493,8 +3584,11 @@ restart_bitmap_scan: filepos= (my_off_t) info->scan.bitmap_page * block_size; if (unlikely(filepos >= info->state->data_file_length)) { + DBUG_PRINT("info", ("Found end of file")); DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE)); } + DBUG_PRINT("info", ("Reading bitmap at %lu", + (ulong) info->scan.bitmap_page)); if (!(pagecache_read(share->pagecache, &info->dfile, info->scan.bitmap_page, 0, info->scan.bitmap_buff, PAGECACHE_PLAIN_PAGE, @@ -3993,3 +4087,268 @@ static size_t fill_update_undo_parts(MARIA_HA *info, const uchar *oldrec, row_length+= start_log_parts->length; DBUG_RETURN(row_length); } + +/*************************************************************************** + Applying of REDO log records +***************************************************************************/ + +/* + Apply LOGREC_REDO_INSERT_ROW_HEAD & LOGREC_REDO_INSERT_ROW_TAIL + + SYNOPSIS + _ma_apply_redo_insert_row_head_or_tail() + info Maria handler + lsn LSN to put on page + page_type HEAD_PAGE or TAIL_PAGE + header Header (without FILEID) + data Data to be put on page + data_length Length of data + + RETURN + 0 ok + # Error number +*/ + +uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header, + const byte *data, + size_t data_length) +{ + MARIA_SHARE *share= info->s; + ulonglong page; + uint rownr, empty_space; + uint block_size= share->block_size; + uint rec_offset; + byte *buff= info->keyread_buff, *dir; + DBUG_ENTER("_ma_apply_redo_insert_row_head"); + + info->keyread_buff_used= 1; + page= page_korr(header); + rownr= dirpos_korr(header+PAGE_STORE_SIZE); + + if (page * info->s->block_size > info->state->data_file_length) + { + /* New page at end of file */ + DBUG_ASSERT(rownr == 0); + if (rownr != 0) + goto err; + make_empty_page(buff, block_size, page_type); + empty_space= (block_size - PAGE_OVERHEAD_SIZE); + rec_offset= PAGE_HEADER_SIZE; + dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + + /* Update that file is extended */ + info->state->data_file_length= page * info->s->block_size; + } + else + { + uint max_entry; + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, + page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + if (lsn_korr(buff) >= lsn) + { + /* Already applied */ + + /* Fix bitmap, just in case */ + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); + } + + max_entry= (uint) ((uchar*) buff)[DIR_COUNT_OFFSET]; + if (((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != page_type)) + { + /* + This is a page that has been freed before and now should be + changed to new type. + */ + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE && + (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != UNALLOCATED_PAGE) + goto err; + make_empty_page(buff, block_size, page_type); + empty_space= (block_size - PAGE_OVERHEAD_SIZE); + rec_offset= PAGE_HEADER_SIZE; + dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + } + else + { + dir= (buff + block_size - DIR_ENTRY_SIZE * (rownr + 1) - + PAGE_SUFFIX_SIZE); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + if (max_entry >= rownr) + { + /* Add directory entry first in directory and data last on page */ + DBUG_ASSERT(max_entry == rownr); + if (max_entry != rownr) + goto err; + rec_offset= (uint2korr(dir + DIR_ENTRY_SIZE) + + uint2korr(dir + DIR_ENTRY_SIZE +2)); + if ((uint) (dir - buff) < rec_offset + data_length) + { + /* Create place for directory & data */ + compact_page(buff, block_size, max_entry - 1, 0); + rec_offset= (uint2korr(dir + DIR_ENTRY_SIZE) + + uint2korr(dir + DIR_ENTRY_SIZE +2)); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + DBUG_ASSERT(!((uint) (dir - buff) < rec_offset + data_length)); + if ((uint) (dir - buff) < rec_offset + data_length) + goto err; + } + buff[DIR_COUNT_OFFSET]= (byte) (uchar) max_entry+1; + int2store(dir, rec_offset); + empty_space-= DIR_ENTRY_SIZE; + } + else + { + /* reuse old empty entry */ + byte *pos, *end, *end_data; + DBUG_ASSERT(uint2korr(dir) == 0); + if (uint2korr(dir)) + goto err; /* Should have been empty */ + + /* Find start of where we can put data */ + end= (buff + block_size - DIR_ENTRY_SIZE * max_entry - + PAGE_SUFFIX_SIZE); + for (pos= dir ; pos >= end ; pos-= DIR_ENTRY_SIZE) + { + if ((rec_offset= uint2korr(pos))) + { + rec_offset+= uint2korr(pos+2); + break; + } + } + DBUG_ASSERT(pos >= end); + if (pos < end) /* Wrong directory */ + goto err; + + /* find end data */ + end_data= end; /* Start of directory */ + end= (buff + block_size - PAGE_SUFFIX_SIZE); + for (pos= dir ; pos < end ; pos+= DIR_ENTRY_SIZE) + { + uint offset; + if ((offset= uint2korr(pos))) + { + end_data= buff + offset; + break; + } + } + if ((uint) (end_data - (buff + rec_offset)) < data_length) + { + uint length; + /* Not enough continues space, compact page to get more */ + int2store(dir, rec_offset); + compact_page(buff, block_size, rownr, 1); + rec_offset= uint2korr(dir); + length= uint2korr(dir+2); + DBUG_ASSERT(length >= data_length); + if (length < data_length) + goto err; + empty_space= length; + } + } + } + } + /* Copy data */ + int2store(dir+2, data_length); + memcpy(buff + rec_offset, data, data_length); + empty_space-= data_length; + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + + /* Write modified page */ + lsn_store(buff, lsn); + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0)) + DBUG_RETURN(my_errno); + + /* Fix bitmap */ + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + + DBUG_RETURN(0); + +err: + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); +} + + +/* + Apply LOGREC_REDO_PURGE_ROW_HEAD & LOGREC_REDO_PURGE_ROW_TAIL + + SYNOPSIS + _ma_apply_redo_purge_row_head_or_tail() + info Maria handler + lsn LSN to put on page + page_type HEAD_PAGE or TAIL_PAGE + header Header (without FILEID) + data Data to be put on page + data_length Length of data + + NOTES + This function is very similar to delete_head_or_tail() + + RETURN + 0 ok + # Error number +*/ + +uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header) +{ + MARIA_SHARE *share= info->s; + ulonglong page; + uint record_number, empty_space; + uint block_size= share->block_size; + byte *buff= info->keyread_buff; + DBUG_ENTER("_ma_apply_redo_purge_row_head_or_tail"); + + info->keyread_buff_used= 1; + page= page_korr(header); + record_number= dirpos_korr(header+PAGE_STORE_SIZE); + + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, + page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (byte) page_type); + + if (lsn_korr(buff) >= lsn) + { + /* Already applied */ + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); + } + + if (delete_dir_entry(buff, block_size, record_number, &empty_space) < 0) + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); + + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0)) + DBUG_RETURN(my_errno); + + /* This will work even if the page was marked as UNALLOCATED_PAGE */ + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + + DBUG_RETURN(0); +} diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index e9364f71069..1eeb3368972 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -178,3 +178,11 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, ulonglong page, uint *bitmap_pattern); void _ma_bitmap_delete_all(MARIA_SHARE *share); +uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header, + const byte *data, + size_t data_length); +uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 98c686d724b..e1fd9c0170a 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -42,7 +42,6 @@ #include "ma_ftdefs.h" #include <myisamchk.h> -#include <m_ctype.h> #include <stdarg.h> #include <my_getopt.h> #ifdef HAVE_SYS_VADVISE_H @@ -87,6 +86,13 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, static ha_checksum maria_byte_checksum(const uchar *buf, uint length); static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share); static void restore_data_file_type(MARIA_SHARE *share); +static void change_data_file_descriptor(MARIA_HA *info, File new_file); +static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info, + MARIA_HA *info, uchar *record); +static void copy_data_file_state(MARIA_STATE_INFO *to, + MARIA_STATE_INFO *from); +static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info); + void maria_chk_init(HA_CHECK *param) { @@ -838,7 +844,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, } } (*key_checksum)+= maria_byte_checksum((uchar*) key, - key_length- info->s->rec_reflength); + key_length- info->s->rec_reflength); record= _ma_dpos(info,0,key+key_length); if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */ { @@ -1263,18 +1269,21 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, } else { - info->cur_row.checksum= _ma_checksum(info,record); + ha_checksum checksum= 0; + if (info->s->calc_checksum) + checksum= (*info->s->calc_checksum)(info, record); + if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE)) { if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len, - test(info->s->calc_checksum))) + test(info->s->calc_checksum), checksum)) { _ma_check_print_error(param,"Found wrong packed record at %s", llstr(start_recpos,llbuff)); got_error= 1; } } - param->glob_crc+= info->cur_row.checksum; + param->glob_crc+= checksum; } if (! got_error) @@ -1507,8 +1516,11 @@ static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record, } if (info->s->calc_checksum) { - info->cur_row.checksum= _ma_checksum(info, record); - param->glob_crc+= info->cur_row.checksum; + ha_checksum checksum= (*info->s->calc_checksum)(info, record); + if (info->cur_row.checksum != (checksum & 255)) + _ma_check_print_error(param, "Page %9s: Row %3d has wrong checksum", + llstr(page_pos, llbuff), row); + param->glob_crc+= checksum; } if (info->cur_row.extents_count) { @@ -1572,6 +1584,8 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, my_bool full_dir; uint offset_page, offset; + LINT_INIT(full_dir); + if (_ma_scan_init_block_record(info)) { _ma_check_print_error(param, "got error %d when initializing scan", @@ -1649,13 +1663,12 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, llstr(pos, llbuff), page_type); if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) goto err; + continue; } switch ((enum en_page_type) page_type) { case UNALLOCATED_PAGE: case MAX_PAGE_TYPE: - DBUG_PRINT("warning", - ("Found page with wrong page type: %d", page_type)); - DBUG_ASSERT(0); + DBUG_ASSERT(0); /* Impossible */ break; case HEAD_PAGE: row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET]; @@ -1908,13 +1921,28 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) } /* maria_chk_data_link */ - /* Recover old table by reading each record and writing all keys */ - /* Save new datafile-name in temp_filename */ +/* + Recover old table by reading each record and writing all keys + + NOTES + Save new datafile-name in temp_filename + + IMPLEMENTATION (for hard repair with block format) + - Create new, unrelated MARIA_HA of the table + - Create new datafile and associate it with new handler + - Reset all statistic information in new handler + - Copy all data to new handler with normal write operations + - Move state of new handler to old handler + - Close new handler + - Close data file in old handler + - Rename old data file to new data file. + - Reopen data file in old handler +*/ int maria_repair(HA_CHECK *param, register MARIA_HA *info, char *name, int rep_quick) { - int error,got_error; + int error, got_error= 1; uint i; ha_rows start_records,new_header_length; my_off_t del; @@ -1923,6 +1951,10 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, char llbuff[22],llbuff2[22]; MARIA_SORT_INFO sort_info; MARIA_SORT_PARAM sort_param; + my_bool block_record, scan_inited= 0; + enum data_file_type org_data_file_type= info->s->data_file_type; + myf sync_dir= ((share->now_transactional && !share->temporary) ? + MY_SYNC_DIR : 0); DBUG_ENTER("maria_repair"); bzero((char *)&sort_info, sizeof(sort_info)); @@ -1930,9 +1962,11 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, start_records=info->state->records; new_header_length=(param->testflag & T_UNPACK) ? 0L : share->pack.header_length; - got_error=1; new_file= -1; sort_param.sort_info=&sort_info; + block_record= org_data_file_type == BLOCK_RECORD; + sort_info.info= sort_info.new_info= info; + bzero(&info->rec_cache,sizeof(info->rec_cache)); if (!(param->testflag & T_SILENT)) { @@ -1944,28 +1978,6 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; - if (init_io_cache(¶m->read_cache, info->dfile.file, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME))) - { - bzero(&info->rec_cache,sizeof(info->rec_cache)); - goto err; - } - if (!rep_quick) - if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, - WRITE_CACHE, new_header_length, 1, - MYF(MY_WME | MY_WAIT_IF_FULL))) - goto err; - info->opt_flag|=WRITE_CACHE_USED; - if (!(sort_param.record=(uchar*) my_malloc((uint) share->base.pack_reclength, - MYF(0))) || - _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, - info->s->base.default_rec_buff_size)) - { - _ma_check_print_error(param, "Not enough memory for extra record"); - goto err; - } - if (!rep_quick) { /* Get real path for data file */ @@ -1984,11 +1996,79 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, new_header_length, "datafile-header")) goto err; info->s->state.dellink= HA_OFFSET_ERROR; - info->rec_cache.file=new_file; - if (param->testflag & T_UNPACK) - restore_data_file_type(share); + info->rec_cache.file= new_file; + if (share->data_file_type == BLOCK_RECORD || + ((param->testflag & T_UNPACK) && + share->state.header.org_data_file_type == BLOCK_RECORD)) + { + MARIA_HA *new_info; + /** + @todo RECOVERY it's a bit worrying to have two MARIA_SHARE on the + same index file: + - Checkpoint will see them as two tables + - are we sure that new_info never flushes an in-progress state + to the index file? And how to prevent Checkpoint from doing that? + - in the close future maria_close() will write the state... + */ + if (!(sort_info.new_info= maria_open(info->s->open_file_name, O_RDWR, + HA_OPEN_COPY | HA_OPEN_FOR_REPAIR))) + goto err; + new_info= sort_info.new_info; + change_data_file_descriptor(new_info, new_file); + maria_lock_database(new_info, F_EXTRA_LCK); + if ((param->testflag & T_UNPACK) && + share->data_file_type == COMPRESSED_RECORD) + { + (*new_info->s->once_end)(new_info->s); + (*new_info->s->end)(new_info); + restore_data_file_type(new_info->s); + _ma_setup_functions(new_info->s); + if ((*new_info->s->once_init)(new_info->s, new_file) || + (*new_info->s->init)(new_info)) + goto err; + } + _ma_reset_status(sort_info.new_info); + if (_ma_initialize_data_file(sort_info.new_info->s, new_file)) + goto err; + block_record= 1; + } + } + + if (org_data_file_type != BLOCK_RECORD) + { + /* We need a read buffer to read rows in big blocks */ + if (init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE, share->pack.header_length, 1, MYF(MY_WME))) + goto err; } - sort_info.info=info; + if (sort_info.new_info->s->data_file_type != BLOCK_RECORD) + { + /* When writing to not block records, we need a write buffer */ + if (!rep_quick) + if (init_io_cache(&info->rec_cache, new_file, + (uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + } + else + { + scan_inited= 1; + if (maria_scan_init(sort_info.info)) + goto err; + } + + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + sort_info.param = param; sort_param.read_cache=param->read_cache; sort_param.pos=sort_param.max_pos=share->pack.header_length; @@ -2031,9 +2111,14 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, maria_lock_memory(param); /* Everything is alloced */ + sort_info.org_data_file_type= info->s->data_file_type; + /* Re-create all keys, which are set in key_map. */ while (!(error=sort_get_next_record(&sort_param))) { + if (block_record && _ma_sort_write_record(&sort_param)) + goto err; + if (writekeys(&sort_param)) { if (my_errno != HA_ERR_FOUND_DUPP_KEY) @@ -2059,7 +2144,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, } continue; } - if (_ma_sort_write_record(&sort_param)) + + if (!block_record && _ma_sort_write_record(&sort_param)) goto err; } if (error > 0 || maria_write_data_suffix(&sort_info, (my_bool)!rep_quick) || @@ -2082,35 +2168,59 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, { _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); _ma_check_print_error(param,"Run recovery again without -q"); - got_error=1; param->retry_repair=1; param->testflag|=T_RETRY_WITHOUT_QUICK; goto err; } + if (param->testflag & T_SAFE_REPAIR) { /* Don't repair if we loosed more than one row */ - if (info->state->records+1 < start_records) + if (sort_info.new_info->state->records+1 < start_records) { info->state->records=start_records; - got_error=1; goto err; } } if (!rep_quick) { - my_close(info->dfile.file, MYF(0)); - info->dfile.file= new_file; - info->state->data_file_length=sort_param.filepos; + if (sort_info.new_info != sort_info.info) + { + MARIA_STATE_INFO save_state= sort_info.new_info->s->state; + if (maria_close(sort_info.new_info)) + { + _ma_check_print_error(param, "Got error %d on close", my_errno); + goto err; + } + copy_data_file_state(&info->s->state, &save_state); + new_file= -1; + } + else + info->state->data_file_length= sort_param.filepos; share->state.version=(ulong) time((time_t*) 0); /* Force reopen */ + + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + my_close(new_file, MYF(MY_WME)); + my_close(info->dfile.file, MYF(MY_WME)); + info->dfile.file= new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) | + sync_dir) || + _ma_open_datafile(info, share, -1)) + { + goto err; + } } else { - info->state->data_file_length=sort_param.max_pos; + info->state->data_file_length= sort_param.max_pos; } if (param->testflag & T_CALC_CHECKSUM) - info->state->checksum=param->glob_crc; + info->state->checksum= param->glob_crc; if (!(param->testflag & T_SILENT)) { @@ -2122,35 +2232,25 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, llstr(sort_info.dupp,llbuff)); } - got_error=0; + got_error= sync_dir ? write_log_record_for_repair(param, info) : 0; /* If invoked by external program that uses thr_lock */ if (&share->state.state != info->state) memcpy( &share->state.state, info->state, sizeof(*info->state)); err: - if (!got_error) - { - /* Replace the actual file with the temporary file */ - if (new_file >= 0) - { - myf sync_dir= (share->base.transactional && !share->temporary) ? - MY_SYNC_DIR : 0; - my_close(new_file,MYF(0)); - info->dfile.file= new_file= -1; - if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, - DATA_TMP_EXT, - MYF((param->testflag & T_BACKUP_DATA ? - MY_REDEL_MAKE_BACKUP : 0) | - sync_dir)) || - _ma_open_datafile(info,share,-1)) - got_error=1; - } - } + if (scan_inited) + maria_scan_end(sort_info.info); + if (got_error) { if (! param->error_printed) _ma_check_print_error(param,"%d for record at pos %s",my_errno, llstr(sort_param.start_recpos,llbuff)); + if (sort_info.new_info && sort_info.new_info != sort_info.info) + { + sort_info.new_info->dfile.file= -1; + maria_close(sort_info.new_info); + } if (new_file >= 0) { VOID(my_close(new_file,MYF(0))); @@ -2333,7 +2433,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name) int old_lock; MARIA_SHARE *share=info->s; MARIA_STATE_INFO old_state; - myf sync_dir= (share->base.transactional && !share->temporary) ? + myf sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; DBUG_ENTER("maria_sort_index"); @@ -2602,7 +2702,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, uint i; ulong length; ha_rows start_records; - my_off_t new_header_length,del; + my_off_t new_header_length, org_header_length, del; File new_file; MARIA_SORT_PARAM sort_param; MARIA_SHARE *share=info->s; @@ -2611,15 +2711,19 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, char llbuff[22]; MARIA_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; - myf sync_dir= (share->base.transactional && !share->temporary) ? - MY_SYNC_DIR : 0; + myf sync_dir= ((share->now_transactional && !share->temporary) ? + MY_SYNC_DIR : 0); DBUG_ENTER("maria_repair_by_sort"); + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + start_records=info->state->records; got_error=1; new_file= -1; - new_header_length=(param->testflag & T_UNPACK) ? 0 : - share->pack.header_length; + org_header_length= share->pack.header_length; + new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length; + if (!(param->testflag & T_SILENT)) { printf("- recovering (with sort) MARIA-table '%s'\n",name); @@ -2630,15 +2734,13 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; - bzero((char*)&sort_info,sizeof(sort_info)); - bzero((char *)&sort_param, sizeof(sort_param)); if (!(sort_info.key_block= alloc_key_blocks(param, (uint) param->sort_key_blocks, - share->base.max_key_block_length)) - || init_io_cache(¶m->read_cache, info->dfile.file, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || + share->base.max_key_block_length)) || + init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE, org_header_length, 1, MYF(MY_WME)) || (! rep_quick && init_io_cache(&info->rec_cache, info->dfile.file, (uint) param->write_buffer_length, @@ -2648,6 +2750,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; info->opt_flag|=WRITE_CACHE_USED; info->rec_cache.file= info->dfile.file; /* for sort_delete_record */ + sort_info.org_data_file_type= info->s->data_file_type; if (!(sort_param.record=(uchar*) my_malloc((uint) share->base.pack_reclength, MYF(0))) || @@ -2703,8 +2806,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, key_map= ~key_map; /* Create the missing keys */ } - sort_info.info=info; - sort_info.param = param; + sort_info.info= sort_info.new_info= info; + sort_info.param= param; set_data_file_type(&sort_info, share); sort_param.filepos=new_header_length; @@ -2716,9 +2819,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, sort_param.wordlist=NULL; init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); - if (share->data_file_type == DYNAMIC_RECORD) + if (sort_info.org_data_file_type == DYNAMIC_RECORD) length=max(share->base.min_pack_length+1,share->base.min_block_length); - else if (share->data_file_type == COMPRESSED_RECORD) + else if (sort_info.org_data_file_type == COMPRESSED_RECORD) length=share->base.min_block_length; else length=share->base.pack_reclength; @@ -2756,7 +2859,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if ((!(param->testflag & T_SILENT))) printf ("- Fixing index %d\n",sort_param.key+1); - sort_param.max_pos=sort_param.pos=share->pack.header_length; + sort_param.max_pos= sort_param.pos= org_header_length; keyseg=sort_param.seg; bzero((char*) sort_param.unique,sizeof(sort_param.unique)); sort_param.key_length=share->rec_reflength; @@ -2854,8 +2957,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, share->state.version=(ulong) time((time_t*) 0); my_close(info->dfile.file, MYF(0)); info->dfile.file= new_file; - share->data_file_type=sort_info.new_data_file_type; - share->pack.header_length=(ulong) new_header_length; + share->data_file_type= sort_info.new_data_file_type; + org_header_length= (ulong) new_header_length; + sort_info.org_data_file_type= info->s->data_file_type; sort_param.fix_datafile=0; } else @@ -2883,11 +2987,11 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (rep_quick & T_FORCE_UNIQUENESS) { - my_off_t skr=info->state->data_file_length+ - (share->options & HA_OPTION_COMPRESS_RECORD ? - MEMMAP_EXTRA_MARGIN : 0); + my_off_t skr= (info->state->data_file_length + + (sort_info.org_data_file_type == COMPRESSED_RECORD) ? + MEMMAP_EXTRA_MARGIN : 0); #ifdef USE_RELOC - if (share->data_file_type == STATIC_RECORD && + if (sort_info.org_data_file_type == STATIC_RECORD && skr < share->base.reloc*share->base.min_pack_length) skr=share->base.reloc*share->base.min_pack_length; #endif @@ -3032,7 +3136,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, MARIA_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; pthread_attr_t thr_attr; - myf sync_dir= (share->base.transactional && !share->temporary) ? + myf sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; DBUG_ENTER("maria_repair_parallel"); @@ -3085,6 +3189,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); pthread_cond_init(&sort_info.cond, 0); + sort_info.org_data_file_type= info->s->data_file_type; + if (!(sort_info.key_block= alloc_key_blocks(param, (uint) param->sort_key_blocks, share->base.max_key_block_length)) || @@ -3152,8 +3258,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, key_map= ~key_map; /* Create the missing keys */ } - sort_info.info=info; - sort_info.param = param; + sort_info.info= sort_info.new_info= info; + sort_info.param= param; set_data_file_type(&sort_info, share); sort_info.dupp=0; @@ -3161,9 +3267,9 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, param->read_cache.end_of_file=sort_info.filelength= my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); - if (share->data_file_type == DYNAMIC_RECORD) + if (sort_info.org_data_file_type == DYNAMIC_RECORD) rec_length=max(share->base.min_pack_length+1,share->base.min_block_length); - else if (share->data_file_type == COMPRESSED_RECORD) + else if (sort_info.org_data_file_type == COMPRESSED_RECORD) rec_length=share->base.min_block_length; else rec_length=share->base.pack_reclength; @@ -3379,8 +3485,6 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, */ my_close(info->dfile.file, MYF(0)); info->dfile.file= new_file; - - share->data_file_type=sort_info.new_data_file_type; share->pack.header_length=(ulong) new_header_length; } else @@ -3397,11 +3501,11 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (rep_quick & T_FORCE_UNIQUENESS) { - my_off_t skr=info->state->data_file_length+ - (share->options & HA_OPTION_COMPRESS_RECORD ? - MEMMAP_EXTRA_MARGIN : 0); + my_off_t skr= (info->state->data_file_length + + (sort_info.org_data_file_type == COMPRESSED_RECORD) ? + MEMMAP_EXTRA_MARGIN : 0); #ifdef USE_RELOC - if (share->data_file_type == STATIC_RECORD && + if (sort_info.org_data_file_type == STATIC_RECORD && skr < share->base.reloc*share->base.min_pack_length) skr=share->base.reloc*share->base.min_pack_length; #endif @@ -3587,27 +3691,28 @@ static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key) sort_get_next_record() sort_param Information about and for the sort process - NOTE - + NOTES Dynamic Records With Non-Quick Parallel Repair - For non-quick parallel repair we use a synchronized read/write - cache. This means that one thread is the master who fixes the data - file by reading each record from the old data file and writing it - to the new data file. By doing this the records in the new data - file are written contiguously. Whenever the write buffer is full, - it is copied to the read buffer. The slaves read from the read - buffer, which is not associated with a file. Thus read_cache.file - is -1. When using _mi_read_cache(), the slaves must always set - flag to READING_NEXT so that the function never tries to read from - file. This is safe because the records are contiguous. There is no - need to read outside the cache. This condition is evaluated in the - variable 'parallel_flag' for quick reference. read_cache.file must - be >= 0 in every other case. + For non-quick parallel repair we use a synchronized read/write + cache. This means that one thread is the master who fixes the data + file by reading each record from the old data file and writing it + to the new data file. By doing this the records in the new data + file are written contiguously. Whenever the write buffer is full, + it is copied to the read buffer. The slaves read from the read + buffer, which is not associated with a file. Thus read_cache.file + is -1. When using _mi_read_cache(), the slaves must always set + flag to READING_NEXT so that the function never tries to read from + file. This is safe because the records are contiguous. There is no + need to read outside the cache. This condition is evaluated in the + variable 'parallel_flag' for quick reference. read_cache.file must + be >= 0 in every other case. RETURN -1 end of file 0 ok + sort_param->filepos points to record position. + sort_param->record contains record > 0 error */ @@ -3628,10 +3733,61 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (*_ma_killed_ptr(param)) DBUG_RETURN(1); - switch (share->data_file_type) { + switch (sort_info->org_data_file_type) { case BLOCK_RECORD: - DBUG_ASSERT(0); + { + for (;;) + { + int flag; + + if (info != sort_info->new_info) + { + /* Safe scanning */ + flag= _ma_safe_scan_block_record(sort_info, info, + sort_param->record); + } + else + { + /* Scan on clean table */ + flag= _ma_scan_block_record(info, sort_param->record, + info->cur_row.nextpos, 1); + } + if (!flag) + { + if (sort_param->calc_checksum) + { + ha_checksum checksum; + checksum= (*info->s->calc_check_checksum)(info, sort_param->record); + if (info->s->calc_checksum && + info->cur_row.checksum != (checksum & 255)) + { + if (param->testflag & T_VERBOSE) + { + char llbuff[22]; + record_pos_to_txt(info, sort_param->filepos, llbuff); + _ma_check_print_info(param, + "Found record with wrong checksum at %s", + llbuff); + } + continue; + } + info->cur_row.checksum= checksum; + param->glob_crc+= checksum; + } + sort_param->filepos= info->cur_row.lastpos; + DBUG_RETURN(0); + } + if (flag == HA_ERR_END_OF_FILE) + { + sort_param->max_pos= sort_info->filelength; + DBUG_RETURN(-1); + } + /* Retry only if wrong record, not if disk error */ + if (flag != HA_ERR_WRONG_IN_RECORD) + DBUG_RETURN(flag); + } break; + } case STATIC_RECORD: for (;;) { @@ -3669,6 +3825,8 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) { uchar *to; LINT_INIT(to); + ha_checksum checksum= 0; + pos=sort_param->pos; searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; @@ -3938,14 +4096,14 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (sort_param->read_cache.error < 0) DBUG_RETURN(1); if (sort_param->calc_checksum) - info->cur_row.checksum= _ma_checksum(info, sort_param->record); + checksum= (info->s->calc_check_checksum)(info, sort_param->record); if ((param->testflag & (T_EXTEND | T_REP)) || searching) { if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff, sort_param->find_length, (param->testflag & T_QUICK) && sort_param->calc_checksum && - test(info->s->calc_checksum))) + test(info->s->calc_checksum), checksum)) { _ma_check_print_info(param,"Found wrong packed record at %s", llstr(sort_param->start_recpos,llbuff)); @@ -3953,7 +4111,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } } if (sort_param->calc_checksum) - param->glob_crc+= info->cur_row.checksum; + param->glob_crc+= checksum; DBUG_RETURN(0); } if (!searching) @@ -4027,8 +4185,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (sort_param->calc_checksum) { - info->cur_row.checksum= (*info->s->calc_checksum)(info, - sort_param->record); + info->cur_row.checksum= (*info->s->calc_check_checksum)(info, + sort_param-> + record); param->glob_crc+= info->cur_row.checksum; } DBUG_RETURN(0); @@ -4061,8 +4220,8 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) uchar *from; uchar block_buff[8]; MARIA_SORT_INFO *sort_info=sort_param->sort_info; - HA_CHECK *param=sort_info->param; - MARIA_HA *info=sort_info->info; + HA_CHECK *param= sort_info->param; + MARIA_HA *info= sort_info->new_info; MARIA_SHARE *share=info->s; DBUG_ENTER("_ma_sort_write_record"); @@ -4070,7 +4229,11 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) { switch (sort_info->new_data_file_type) { case BLOCK_RECORD: - DBUG_ASSERT(0); + if ((sort_param->filepos= (*share->write_record_init)(info, + sort_param-> + record)) == + HA_OFFSET_ERROR) + DBUG_RETURN(1); break; case STATIC_RECORD: if (my_b_write(&info->rec_cache,sort_param->record, @@ -4103,7 +4266,9 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER); } /* We can use info->checksum here as only one thread calls this */ - info->cur_row.checksum= _ma_checksum(info,sort_param->record); + info->cur_row.checksum= (*info->s->calc_check_checksum)(info, + sort_param-> + record); reclength= _ma_rec_pack(info,from,sort_param->record); flag=0; @@ -4160,7 +4325,7 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) } /* _ma_sort_write_record */ - /* Compare two keys from _ma_create_index_by_sort */ +/* Compare two keys from _ma_create_index_by_sort */ static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, const void *b) @@ -4518,7 +4683,8 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) } } if (sort_param->calc_checksum) - param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); + param->glob_crc-=(*info->s->calc_check_checksum)(info, + sort_param->record); } error= (flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info, sort_param->record)); @@ -4527,7 +4693,8 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) DBUG_RETURN(error); } /* sort_delete_record */ - /* Fix all pending blocks and flush everything to disk */ + +/* Fix all pending blocks and flush everything to disk */ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) { @@ -4799,9 +4966,9 @@ end: int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile) { - MARIA_HA *info=sort_info->info; + MARIA_HA *info=sort_info->new_info; - if (info->s->options & HA_OPTION_COMPRESS_RECORD && fix_datafile) + if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile) { char buff[MEMMAP_EXTRA_MARGIN]; bzero(buff,sizeof(buff)); @@ -5114,6 +5281,9 @@ my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, */ if (! maria_is_any_key_active(key_map)) return FALSE; /* Can't use sort */ + /* QQ: Remove this when maria_repair_by_sort() works with block format */ + if (info->s->data_file_type == BLOCK_RECORD) + return FALSE; for (i=0 ; i < share->base.keys ; i++,key++) { if (!force && maria_too_big_key_for_sort(key,rows)) @@ -5132,7 +5302,8 @@ set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share) MARIA_SHARE tmp; sort_info->new_data_file_type= share->state.header.org_data_file_type; /* Set delete_function for sort_delete_record() */ - memcpy((char*) &tmp, share, sizeof(*share)); + tmp= *share; + tmp.state.header.data_file_type= tmp.state.header.org_data_file_type; tmp.options= ~HA_OPTION_COMPRESS_RECORD; _ma_setup_functions(&tmp); share->delete_record=tmp.delete_record; @@ -5145,11 +5316,166 @@ static void restore_data_file_type(MARIA_SHARE *share) mi_int2store(share->state.header.options,share->options); share->state.header.data_file_type= share->state.header.org_data_file_type; - share->data_file_type= share->state.header.data_file_type= + share->data_file_type= share->state.header.data_file_type; share->pack.header_length= 0; } +static void change_data_file_descriptor(MARIA_HA *info, File new_file) +{ + my_close(info->dfile.file, MYF(0)); + info->dfile.file= info->s->bitmap.file.file= new_file; +} + + +/* + Copy all states that has to do with the data file + + NOTES + This is done to copy the state from the data file generated from + repair to the original handler +*/ + +static void copy_data_file_state(MARIA_STATE_INFO *to, + MARIA_STATE_INFO *from) +{ + to->state.records= from->state.records; + to->state.del= from->state.del; + to->state.empty= from->state.empty; + to->state.data_file_length= from->state.data_file_length; + to->split= from->split; + to->dellink= from->dellink; + to->first_bitmap_with_space= from->first_bitmap_with_space; +} + + +/* + Read 'safely' next record while scanning table. + + SYNOPSIS + _ma_safe_scan_block_record() + info Maria handler + record Store found here + + NOTES + - One must have called mi_scan() before this + + Differences compared to _ma_scan_block_records() are: + - We read all blocks, not only blocks marked by the bitmap to be safe + - In case of errors, next read will read next record. + - More sanity checks + + RETURN + 0 ok + HA_ERR_END_OF_FILE End of file + # error number +*/ + + +static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info, + MARIA_HA *info, byte *record) +{ + uint record_pos= info->cur_row.nextpos; + ulonglong page= sort_info->page; + DBUG_ENTER("_ma_safe_scan_block_record"); + + for (;;) + { + /* Find next row in current page */ + if (likely(record_pos < info->scan.number_of_rows)) + { + uint length, offset; + byte *data, *end_of_data; + char llbuff[22]; + + while (!(offset= uint2korr(info->scan.dir))) + { + info->scan.dir-= DIR_ENTRY_SIZE; + record_pos++; + if (info->scan.dir < info->scan.dir_end) + { + _ma_check_print_info(sort_info->param, + "Wrong directory on page: %s", + llstr(page, llbuff)); + goto read_next_page; + } + } + /* found row */ + info->cur_row.lastpos= info->scan.row_base_page + record_pos; + info->cur_row.nextpos= record_pos + 1; + data= info->scan.page_buff + offset; + length= uint2korr(info->scan.dir + 2); + end_of_data= data + length; + info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */ + + if (end_of_data > info->scan.dir_end || + offset < PAGE_HEADER_SIZE || length < info->s->base.min_block_length) + { + _ma_check_print_info(sort_info->param, + "Wrong directory entry %3u at page %s", + record_pos, llstr(page, llbuff)); + record_pos++; + continue; + } + else + { + DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); + } + } + +read_next_page: + /* Read until we find next head page */ + for (;;) + { + uint page_type; + char llbuff[22]; + + sort_info->page++; /* In case of errors */ + page++; + if (!(page % info->s->bitmap.pages_covered)) + page++; /* Skip bitmap */ + if ((page + 1) * info->s->block_size > sort_info->filelength) + DBUG_RETURN(HA_ERR_END_OF_FILE); + if (!(pagecache_read(info->s->pagecache, + &info->dfile, + page, 0, info->scan.page_buff, + PAGECACHE_READ_UNKNOWN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + + page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] & + PAGE_TYPE_MASK); + if (page_type == HEAD_PAGE) + { + if ((info->scan.number_of_rows= + (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0) + break; + _ma_check_print_info(sort_info->param, + "Wrong head page at %s", + llstr(page * info->s->block_size, llbuff)); + } + else if (page_type >= MAX_PAGE_TYPE) + { + _ma_check_print_info(sort_info->param, + "Found wrong page type: %d at %s", + page_type, llstr(page * info->s->block_size, + llbuff)); + } + } + + /* New head page */ + info->scan.dir= (info->scan.page_buff + info->s->block_size - + PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); + info->scan.dir_end= (info->scan.dir - + (info->scan.number_of_rows - 1) * + DIR_ENTRY_SIZE); + info->scan.row_base_page= ma_recordpos(page, 0); + record_pos= 0; + } +} + + /** @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn @@ -5170,13 +5496,28 @@ static void restore_data_file_type(MARIA_SHARE *share) @retval 1 error (disk problem) */ -int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) +static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info) { MARIA_SHARE *share= info->s; - /* Only called from ha_maria.cc, not maria_check, so translog is inited */ - if (share->base.transactional && !share->temporary) + if (translog_inited) /* test it in case this is maria_chk */ { - /* For now this record is only informative */ + /* + For now this record is only informative. It could serve when applying + logs to a backup, but that needs more thought. Assume table became + corrupted. It is repaired, then some writes happen to it. + Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE + record. For it to give the same result as originally, the table should + be corrupted the same way, so applying previous REDOs should produce the + same corruption; that's really not guaranteed (different execution paths + in execution of REDOs vs runtime code so not same bugs hit, temporary + hardware issues not repeatable etc). Corruption may not be repeatable. + A reasonable solution is to execute the REDO_REPAIR_TABLE record and + check if the checksum of the resulting table matches what it was at the + end of the original repair (should be stored in log record); or execute + the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches + was it was at the start of the original repair (should be stored in log + record). + */ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; uchar log_data[LSN_STORE_SIZE]; compile_time_assert(LSN_STORE_SIZE >= (FILEID_STORE_SIZE + 4)); @@ -5193,18 +5534,16 @@ int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) log_array[TRANSLOG_INTERNAL_PARTS + 0].length, sizeof(log_array)/sizeof(log_array[0]), - log_array, log_data))) + log_array, log_data) || + translog_flush(share->state.create_rename_lsn))) return 1; /* But this piece is really needed, to have the new table's content durable and to not apply old REDOs to the new table. The table's existence was made durable earlier (MY_SYNC_DIR passed to maria_change_to_newfile()). */ - lsn_store(log_data, share->state.create_rename_lsn); DBUG_ASSERT(info->dfile.file >= 0); - DBUG_ASSERT(share->kfile.file >= 0); - return (my_pwrite(share->kfile.file, log_data, sizeof(log_data), - sizeof(share->state.header) + 2, MYF(MY_NABP)) || + return (_ma_update_create_rename_lsn_on_disk(share, FALSE) || _ma_sync_table_files(info)); } return 0; diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h index 1ce2ccb7012..c011c8234b7 100644 --- a/storage/maria/ma_checkpoint.h +++ b/storage/maria/ma_checkpoint.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +/* Copyright (C) 2006,2007 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,14 +21,61 @@ /* This is the interface of this module. */ -typedef enum enum_checkpoint_level { - NONE=-1, - INDIRECT, /* just write dirty_pages, transactions table and sync files */ - MEDIUM, /* also flush all dirty pages which were already dirty at prev checkpoint*/ - FULL /* also flush all dirty pages */ +typedef enum enum_ma_checkpoint_level { + CHECKPOINT_NONE= 0, + /* just write dirty_pages, transactions table and sync files */ + CHECKPOINT_INDIRECT, + /* also flush all dirty pages which were already dirty at prev checkpoint */ + CHECKPOINT_MEDIUM, + /* also flush all dirty pages */ + CHECKPOINT_FULL } CHECKPOINT_LEVEL; -void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); -my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level); -my_bool execute_asynchronous_checkpoint_if_any(); -/* that's all that's needed in the interface */ +C_MODE_START +int ma_checkpoint_init(); +void ma_checkpoint_end(); +int ma_checkpoint_execute(CHECKPOINT_LEVEL level, my_bool no_wait); +C_MODE_END + +/** + @brief reads some LSNs with special trickery + + If a 64-bit variable transitions between both halves being zero to both + halves being non-zero, and back, this function can be used to do a read of + it (without mutex, without atomic load) which always produces a correct + (though maybe slightly old) value (even on 32-bit CPUs). The value is at + least as new as the latest mutex unlock done by the calling thread. + The assumption is that the system sets both 4-byte halves either at the + same time, or one after the other (in any order), but NOT some bytes of the + first half then some bytes of the second half then the rest of bytes of the + first half. With this assumption, the function can detect when it is + seeing an inconsistent value. + + @param LSN pointer to the LSN variable to read + + @return LSN part (most significant byte always 0) +*/ +#if ( SIZEOF_CHARP >= 8 ) +/* 64-bit CPU, 64-bit reads are atomic */ +#define lsn_read_non_atomic LSN_WITH_FLAGS_TO_LSN +#else +static inline LSN lsn_read_non_atomic_32(const volatile LSN *x) +{ + /* + 32-bit CPU, 64-bit reads may give a mixed of old half and new half (old + low bits and new high bits, or the contrary). + */ + for (;;) /* loop until no atomicity problems */ + { + /* + Remove most significant byte in case this is a LSN_WITH_FLAGS object. + Those flags in TRN::first_undo_lsn break the condition on transitions so + they must be removed below. + */ + LSN y= LSN_WITH_FLAGS_TO_LSN(*x); + if (likely((y == LSN_IMPOSSIBLE) || LSN_VALID(y))) + return y; + } +} +#define lsn_read_non_atomic(x) lsn_read_non_atomic_32(&x) +#endif diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index b760d537670..b52ce113540 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -85,6 +85,7 @@ int maria_close(register MARIA_HA *info) not change the crashed state. We can NOT write the state in other cases as other threads may be using the file at this point + IF using --external-locking, which does not apply to Maria. */ if (share->mode != O_RDONLY && maria_is_crashed(info)) _ma_state_info_write(share->kfile.file, &share->state, 1); @@ -107,7 +108,8 @@ int maria_close(register MARIA_HA *info) } } #endif - my_free((uchar*) info->s,MYF(0)); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + my_free((uchar*) share, MYF(0)); } pthread_mutex_unlock(&THR_LOCK_maria); if (info->ftparser_param) @@ -121,8 +123,6 @@ int maria_close(register MARIA_HA *info) my_free((uchar*) info,MYF(0)); if (error) - { - DBUG_RETURN(my_errno=error); - } + DBUG_RETURN(my_errno= error); DBUG_RETURN(0); } /* maria_close */ diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index db5440dc873..66f0c37f4a3 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -40,15 +40,9 @@ #define CONTROL_FILE_FILENO_SIZE 4 #define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) -/* - This module owns these two vars. - uint32 is always atomically updated, but LSN is 8 bytes, we will need - provisions to ensure that it's updated atomically in - ma_control_file_write_and_force(). Probably the log mutex could be - used. TODO. -*/ -LSN last_checkpoint_lsn; -uint32 last_logno; +/* This module owns these two vars. */ +LSN last_checkpoint_lsn= LSN_IMPOSSIBLE; +uint32 last_logno= FILENO_IMPOSSIBLE; /** @brief If log's lock should be asserted when writing to control file. @@ -65,16 +59,16 @@ my_bool maria_multi_threaded= FALSE; static int control_file_fd= -1; /* - Initialize control file subsystem - - SYNOPSIS - ma_control_file_create_or_open() + @brief Initialize control file subsystem - Looks for the control file. If absent, it's a fresh start, creates file. + Looks for the control file. If none and creation is requested, creates file. If present, reads it to find out last checkpoint's LSN and last log, updates the last_checkpoint_lsn and last_logno global variables. Called at engine's start. + @param create_if_missing + + @note The format of the control file is: 4 bytes: magic string 4 bytes: checksum of the following bytes @@ -82,11 +76,11 @@ static int control_file_fd= -1; 4 bytes: offset in log where last checkpoint is 4 bytes: number of last log - RETURN - 0 - OK - 1 - Error (in which case the file is left closed) + @return Operation status + @retval 0 OK + @retval 1 Error (in which case the file is left closed) */ -CONTROL_FILE_ERROR ma_control_file_create_or_open() +CONTROL_FILE_ERROR ma_control_file_create_or_open(my_bool create_if_missing) { char buffer[CONTROL_FILE_SIZE]; char name[FN_REFLEN]; @@ -115,6 +109,8 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() if (create_file) { + if (!create_if_missing) + DBUG_RETURN(CONTROL_FILE_MISSING); if ((control_file_fd= my_create(name, 0, open_flags, MYF(MY_SYNC_DIR))) < 0) DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); @@ -136,8 +132,8 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() */ /* init the file with these "undefined" values */ - DBUG_RETURN(ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, - CONTROL_FILE_IMPOSSIBLE_FILENO, + DBUG_RETURN(ma_control_file_write_and_force(LSN_IMPOSSIBLE, + FILENO_IMPOSSIBLE, CONTROL_FILE_UPDATE_ALL)); } @@ -315,8 +311,8 @@ int ma_control_file_end() As this module owns these variables, closing the module forbids access to them (just a safety): */ - last_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; - last_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + last_checkpoint_lsn= LSN_IMPOSSIBLE; + last_logno= FILENO_IMPOSSIBLE; DBUG_RETURN(close_error); } diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index c974838684b..fa4ec442e41 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -19,27 +19,17 @@ */ #define CONTROL_FILE_BASE_NAME "maria_control" -/* - indicate absence of the log file number; first log is always number 1, 0 is - impossible. -*/ -#define CONTROL_FILE_IMPOSSIBLE_FILENO 0 -/* logs always have a header */ -#define CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET 0 -/* indicate absence of LSN. */ -#define CONTROL_FILE_IMPOSSIBLE_LSN ((LSN)0) /* Here is the interface of this module */ /* LSN of the last checkoint - (if last_checkpoint_lsn == CONTROL_FILE_IMPOSSIBLE_LSN - then there was never a checkpoint) + (if last_checkpoint_lsn == LSN_IMPOSSIBLE then there was never a checkpoint) */ extern LSN last_checkpoint_lsn; /* - Last log number (if last_logno == - CONTROL_FILE_IMPOSSIBLE_FILENO then there is no log file yet) + Last log number (if last_logno == FILENO_IMPOSSIBLE then there is no log + file yet) */ extern uint32 last_logno; @@ -51,6 +41,7 @@ typedef enum enum_control_file_error { CONTROL_FILE_TOO_BIG, CONTROL_FILE_BAD_MAGIC_STRING, CONTROL_FILE_BAD_CHECKSUM, + CONTROL_FILE_MISSING, CONTROL_FILE_UNKNOWN_ERROR /* any other error */ } CONTROL_FILE_ERROR; @@ -63,11 +54,11 @@ extern "C" { #endif /* - Looks for the control file. If absent, it's a fresh start, create file. - If present, read it to find out last checkpoint's LSN and last log. + Looks for the control file. If none and creation was requested, creates file. + If present, reads it to find out last checkpoint's LSN and last log. Called at engine's start. */ -CONTROL_FILE_ERROR ma_control_file_create_or_open(); +CONTROL_FILE_ERROR ma_control_file_create_or_open(my_bool); /* Write information durably to the control file. Called when we have created a new log (after syncing this log's creation) diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index e26b49a1d37..88374872ce2 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -52,8 +52,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, unique_key_parts,fulltext_keys,offset, not_block_record_extra_length; uint max_field_lengths, extra_header_size; ulong reclength, real_reclength,min_pack_length; - char filename[FN_REFLEN], dlinkname[FN_REFLEN], *dlinkname_ptr= NULL, - klinkname[FN_REFLEN], *klinkname_ptr= NULL; + char filename[FN_REFLEN], linkname[FN_REFLEN], *linkname_ptr; ulong pack_reclength; ulonglong tot_length,max_rows, tmp; enum en_fieldtype type; @@ -260,7 +259,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, } share.base.null_bytes= ci->null_bytes; share.base.original_null_bytes= ci->null_bytes; - share.base.transactional= ci->transactional; + share.base.born_transactional= ci->transactional; share.base.max_field_lengths= max_field_lengths; share.base.field_offsets= 0; /* for future */ @@ -621,14 +620,14 @@ int maria_create(const char *name, enum data_file_type datafile_type, mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE); mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE); mi_int2store(share.state.header.base_pos,base_pos); - share.state.header.data_file_type= datafile_type; + share.state.header.data_file_type= share.data_file_type= datafile_type; share.state.header.org_data_file_type= org_datafile_type; share.state.header.language= (ci->language ? ci->language : default_charset_info->number); share.state.dellink = HA_OFFSET_ERROR; share.state.first_bitmap_with_space= 0; - share.state.create_rename_lsn= 0; + share.state.create_rename_lsn= LSN_IMPOSSIBLE; share.state.process= (ulong) getpid(); share.state.unique= (ulong) 0; share.state.update_count=(ulong) 0; @@ -721,9 +720,9 @@ int maria_create(const char *name, enum data_file_type datafile_type, MY_UNPACK_FILENAME | (have_iext ? MY_REPLACE_EXT : MY_APPEND_EXT)); } - fn_format(klinkname, name, "", MARIA_NAME_IEXT, + fn_format(linkname, name, "", MARIA_NAME_IEXT, MY_UNPACK_FILENAME|MY_APPEND_EXT); - klinkname_ptr= klinkname; + linkname_ptr= linkname; /* Don't create the table if the link or file exists to ensure that one doesn't accidently destroy another table. @@ -739,6 +738,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, (MY_UNPACK_FILENAME | (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | MY_APPEND_EXT); + linkname_ptr= NullS; /* Replace the current file. Don't sync dir now if the data file has the same path. @@ -761,54 +761,11 @@ int maria_create(const char *name, enum data_file_type datafile_type, goto err; } - if ((file= my_create_with_symlink(klinkname_ptr, filename, 0, create_mode, + if ((file= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, MYF(MY_WME|create_flag))) < 0) goto err; errpos=1; - if (!(flags & HA_DONT_TOUCH_DATA)) - { - if (ci->data_file_name) - { - char *dext= strrchr(ci->data_file_name, '.'); - int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); - - if (tmp_table) - { - char *path; - /* chop off the table name, tempory tables use generated name */ - if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) - *path= '\0'; - fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, - MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); - } - else - { - fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | - (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); - } - fn_format(dlinkname, name, "",MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | MY_APPEND_EXT); - dlinkname_ptr= dlinkname; - create_flag=0; - } - else - { - fn_format(filename,name,"", MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | MY_APPEND_EXT); - create_flag=MY_DELETE_OLD; - } - if ((dfile= - my_create_with_symlink(dlinkname_ptr, filename, 0, create_mode, - MYF(MY_WME | create_flag | sync_dir))) < 0) - goto err; - errpos=3; - - share.data_file_type= datafile_type; - if (_ma_initialize_data_file(dfile, &share)) - goto err; - } DBUG_PRINT("info", ("write state info and base info")); if (_ma_state_info_write(file, &share.state, 2) || _ma_base_info_write(file, &share.base)) @@ -948,33 +905,38 @@ int maria_create(const char *name, enum data_file_type datafile_type, not log 1 KB of mostly zeroes if this is a small table. */ char empty_string[]= ""; - LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; uint total_rec_length= 0; uint i; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 1 + 2 + + log_array[TRANSLOG_INTERNAL_PARTS + 1].length= 1 + 2 + 2 + kfile_size_before_extension; /* we are needing maybe 64 kB, so don't use the stack */ - log_data= my_malloc(log_array[TRANSLOG_INTERNAL_PARTS + 0].length, MYF(0)); + log_data= my_malloc(log_array[TRANSLOG_INTERNAL_PARTS + 1].length, MYF(0)); if ((log_data == NULL) || - my_pread(file, 1 + 2 + log_data, kfile_size_before_extension, + my_pread(file, 1 + 2 + 2 + log_data, kfile_size_before_extension, 0, MYF(MY_NABP))) - goto err_no_lock; + goto err; /* remember if the data file was created or not, to know if Recovery can do it or not, in the future */ log_data[0]= test(flags & HA_DONT_TOUCH_DATA); int2store(log_data + 1, kfile_size_before_extension); - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; + int2store(log_data + 1 + 2, share.base.keystart); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char *)name; + /* we store the end-zero, for Recovery to just pass it to my_create() */ + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= + strlen(log_array[TRANSLOG_INTERNAL_PARTS + 0].str) + 1; + log_array[TRANSLOG_INTERNAL_PARTS + 1].str= log_data; /* symlink description is also needed for re-creation by Recovery: */ - log_array[TRANSLOG_INTERNAL_PARTS + 1].str= - dlinkname_ptr ? dlinkname : empty_string; - log_array[TRANSLOG_INTERNAL_PARTS + 1].length= - strlen(log_array[TRANSLOG_INTERNAL_PARTS + 1].str); - log_array[TRANSLOG_INTERNAL_PARTS + 2].str= - klinkname_ptr ? klinkname : empty_string; + log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (char *) + (ci->data_file_name ? ci->data_file_name : empty_string); log_array[TRANSLOG_INTERNAL_PARTS + 2].length= - strlen(log_array[TRANSLOG_INTERNAL_PARTS + 2].str); + strlen(log_array[TRANSLOG_INTERNAL_PARTS + 2].str) + 1; + log_array[TRANSLOG_INTERNAL_PARTS + 3].str= (char *) + (ci->index_file_name ? ci->index_file_name : empty_string); + log_array[TRANSLOG_INTERNAL_PARTS + 3].length= + strlen(log_array[TRANSLOG_INTERNAL_PARTS + 3].str) + 1; for (i= TRANSLOG_INTERNAL_PARTS; i < (sizeof(log_array)/sizeof(log_array[0])); i++) total_rec_length+= log_array[i].length; @@ -983,8 +945,14 @@ int maria_create(const char *name, enum data_file_type datafile_type, MySQL layer to be crash-safe, which it is not now (that would require work using the ddl_log of sql/sql_table.cc); when it is, we should reconsider the moment of writing this log record (before or after op, - under THR_LOCK_maria or not...), how to use it in Recovery, and force - the log. For now this record is just informative. + under THR_LOCK_maria or not...), how to use it in Recovery. + For now this record can serve when we apply logs to a backup, + so we sync it. This happens before the data file is created. If the data + file was created before, and we crashed before writing the log record, + at restart the table may be used, so we would not have a trustable + history in the log (impossible to apply this log to a backup). The way + we do it, if we crash before writing the log record then there is no + data file and the table cannot be used. Note that in case of TRUNCATE TABLE we also come here. When in CREATE/TRUNCATE (or DROP or RENAME or REPAIR) we have not called external_lock(), so have no TRN. It does not matter, as all these @@ -995,21 +963,63 @@ int maria_create(const char *name, enum data_file_type datafile_type, &dummy_transaction_object, NULL, total_rec_length, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL))) - goto err_no_lock; + log_array, NULL) || + translog_flush(share.state.create_rename_lsn))) + goto err; /* store LSN into file, needed for Recovery to not be confused if a DROP+CREATE happened (applying REDOs to the wrong table). - If such direct my_pwrite() to a fixed offset is too "hackish", I can - call ma_state_info_write() again but it will be less efficient. */ - lsn_store(log_data, share.state.create_rename_lsn); - if (my_pwrite(file, log_data, LSN_STORE_SIZE, - sizeof(share.state.header) + 2, MYF(MY_NABP))) - goto err_no_lock; + share.kfile.file= file; + if (_ma_update_create_rename_lsn_on_disk(&share, FALSE)) + goto err; my_free(log_data, MYF(0)); } + if (!(flags & HA_DONT_TOUCH_DATA)) + { + if (ci->data_file_name) + { + char *dext= strrchr(ci->data_file_name, '.'); + int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); + + if (tmp_table) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); + } + else + { + fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | + (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); + } + fn_format(linkname, name, "",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr= linkname; + create_flag=0; + } + else + { + fn_format(filename,name,"", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr= NullS; + create_flag=MY_DELETE_OLD; + } + if ((dfile= + my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag | sync_dir))) < 0) + goto err; + errpos=3; + + if (_ma_initialize_data_file(&share, dfile)) + goto err; + } + /* Enlarge files */ DBUG_PRINT("info", ("enlarge to keystart: %lu", (ulong) share.base.keystart)); @@ -1025,7 +1035,6 @@ int maria_create(const char *name, enum data_file_type datafile_type, if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0))) goto err; #endif - errpos=2; if ((sync_dir && my_sync(dfile, MYF(0))) || my_close(dfile,MYF(0))) goto err; } @@ -1146,7 +1155,7 @@ static int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr) /* Initialize data file */ -int _ma_initialize_data_file(File dfile, MARIA_SHARE *share) +int _ma_initialize_data_file(MARIA_SHARE *share, File dfile) { if (share->data_file_type == BLOCK_RECORD) { @@ -1157,3 +1166,32 @@ int _ma_initialize_data_file(File dfile, MARIA_SHARE *share) } return 0; } + + +/** + @brief Writes create_rename_lsn to disk, optionally forces + + This is for special cases where: + - we don't want to write the full state to disk (so, not call + _ma_state_info_write()) because some parts of the state may be + currently inconsistent, or because it would be overkill + - we must sync this LSN immediately for correctness. + + @param share table's share + @param do_sync if the write should be forced to disk + + @return Operation status + @retval 0 ok + @retval 1 error (disk problem) +*/ + +int _ma_update_create_rename_lsn_on_disk(MARIA_SHARE *share, my_bool do_sync) +{ + char buf[LSN_STORE_SIZE]; + File file= share->kfile.file; + DBUG_ASSERT(file >= 0); + lsn_store(buf, share->state.create_rename_lsn); + return (my_pwrite(file, buf, sizeof(buf), + sizeof(share->state.header) + 2, MYF(MY_NABP)) || + (do_sync && my_sync(file, MYF(0)))); +} diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index 7286f540aa1..42e7fb3c2f9 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -17,7 +17,7 @@ /* This clears the status information and truncates files */ #include "maria_def.h" -#include "trnman_public.h" +#include "trnman.h" /** @brief deletes all rows from a table @@ -31,9 +31,7 @@ int maria_delete_all_rows(MARIA_HA *info) { - uint i; MARIA_SHARE *share=info->s; - MARIA_STATE_INFO *state=&share->state; my_bool log_record; DBUG_ENTER("maria_delete_all_rows"); @@ -48,22 +46,30 @@ int maria_delete_all_rows(MARIA_HA *info) */ if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); - log_record= share->base.transactional && !share->temporary; + log_record= share->now_transactional && !share->temporary; if (_ma_mark_file_changed(info)) goto err; - info->state->records=info->state->del=state->split=0; - state->changed= 0; /* File is optimized */ - state->dellink = HA_OFFSET_ERROR; - state->sortkey= (ushort) ~0; - info->state->key_file_length=share->base.keystart; - info->state->data_file_length=0; - info->state->empty=info->state->key_empty=0; - info->state->checksum=0; + if (log_record) + { + /* + This record will be used by Recovery to finish the deletion if it + crashed. We force it because it's a non-undoable operation. + */ + LSN lsn; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + uchar log_data[FILEID_STORE_SIZE]; + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (unlikely(translog_write_record(&lsn, LOGREC_REDO_DELETE_ALL, + info->trn, share, 0, + sizeof(log_array)/sizeof(log_array[0]), + log_array, log_data) || + translog_flush(lsn))) + goto err; + } - state->key_del= HA_OFFSET_ERROR; - for (i=0 ; i < share->base.keys ; i++) - state->key_root[i]= HA_OFFSET_ERROR; + _ma_reset_status(info); /* If we are using delayed keys or if the user has done changes to the tables @@ -75,9 +81,15 @@ int maria_delete_all_rows(MARIA_HA *info) my_chsize(share->kfile.file, share->base.keystart, 0, MYF(MY_WME)) ) goto err; - if (_ma_initialize_data_file(info->dfile.file, share)) + if (_ma_initialize_data_file(share, info->dfile.file)) goto err; + /* + The operations above on the index/data file will be forced to disk at + Checkpoint or maria_close() time. So we can reset: + */ + info->trn->rec_lsn= LSN_IMPOSSIBLE; + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); #ifdef HAVE_MMAP /* Resize mmaped area */ @@ -85,38 +97,6 @@ int maria_delete_all_rows(MARIA_HA *info) _ma_remap_file(info, (my_off_t)0); rw_unlock(&info->s->mmap_lock); #endif - if (log_record) - { - /* For now this record is only informative */ - LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - uchar log_data[LSN_STORE_SIZE]; - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= FILEID_STORE_SIZE; - if (unlikely(translog_write_record(&share->state.create_rename_lsn, - LOGREC_REDO_DELETE_ALL, - info->trn, share, 0, - sizeof(log_array)/sizeof(log_array[0]), - log_array, log_data))) - goto err; - /* - store LSN into file. It is an optimization so that all old REDOs for - this table are ignored (scenario: checkpoint, INSERT1s, DELETE ALL; - INSERT2s, crash: then Recovery can skip INSERT1s). It also allows us to - ignore the present record at Recovery. - Note that storing the LSN could not be done by _ma_writeinfo() above as - the table is locked at this moment. So we need to do it by ourselves. - */ - lsn_store(log_data, share->state.create_rename_lsn); - if (my_pwrite(share->kfile.file, log_data, sizeof(log_data), - sizeof(share->state.header) + 2, MYF(MY_NABP)) || - _ma_sync_table_files(info)) - goto err; - /** - @todo RECOVERY Until we take into account the log record above - for log-low-water-mark calculation and use it in Recovery, we need - to sync above. - */ - } allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(0); @@ -125,10 +105,47 @@ err: int save_errno=my_errno; VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); info->update|=HA_STATE_WRITTEN; /* Buffer changed */ - /** @todo RECOVERY until we use the log record above we have to sync */ - if (log_record &&_ma_sync_table_files(info) && !save_errno) - save_errno= my_errno; + /** + @todo RECOVERY if we come here, Recovery may later apply the REDO above, + which may be wrong. Not fixing it now, as anyway this way of deleting + rows will have to be re-examined when we have versioning. + */ allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(my_errno=save_errno); } -} /* maria_delete */ +} /* maria_delete_all_rows */ + + +/* + Reset status information + + SYNOPSIS + _ma_reset_status() + maria Maria handler + + DESCRIPTION + Resets data and index file information as if the file would be empty + Files are not touched. +*/ + +void _ma_reset_status(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + MARIA_STATE_INFO *state= &share->state; + uint i; + + info->state->records= info->state->del= state->split= 0; + state->changed= 0; /* File is optimized */ + state->dellink= HA_OFFSET_ERROR; + state->sortkey= (ushort) ~0; + info->state->key_file_length= share->base.keystart; + info->state->data_file_length= 0; + info->state->empty= info->state->key_empty= 0; + info->state->checksum= 0; + + /* Drop the delete key chain. */ + state->key_del= HA_OFFSET_ERROR; + /* Clear all keys */ + for (i=0 ; i < share->base.keys ; i++) + state->key_root[i]= HA_OFFSET_ERROR; +} diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index 990714043bf..6d6b9d032fd 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -64,7 +64,7 @@ int maria_delete_table(const char *name) raid_type= info->s->base.raid_type; raid_chunks= info->s->base.raid_chunks; #endif - sync_dir= (info->s->base.transactional && !info->s->temporary) ? + sync_dir= (info->s->now_transactional && !info->s->temporary) ? MY_SYNC_DIR : 0; maria_close(info); } @@ -78,9 +78,9 @@ int maria_delete_table(const char *name) { /* For this log record to be of any use for Recovery, we need the upper - MySQL layer to be crash-safe in DDLs; when it is we should reconsider - the moment of writing this log record, how to use it in Recovery, and - force the log. For now this record is only informative. + MySQL layer to be crash-safe in DDLs. + For now this record can serve when we apply logs to a backup, so we sync + it. */ LSN lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; @@ -91,7 +91,8 @@ int maria_delete_table(const char *name) log_array[TRANSLOG_INTERNAL_PARTS + 0].length, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL))) + log_array, NULL) || + translog_flush(lsn))) DBUG_RETURN(1); } diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index e1968811ba2..28b970ef589 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -1019,7 +1019,8 @@ uint _ma_rec_pack(MARIA_HA *info, register uchar *to, */ my_bool _ma_rec_check(MARIA_HA *info,const char *record, uchar *rec_buff, - ulong packed_length, my_bool with_checksum) + ulong packed_length, my_bool with_checksum, + ha_checksum checksum) { uint length,new_length,flag,bit,i; char *pos,*end,*packpos,*to; @@ -1125,7 +1126,7 @@ my_bool _ma_rec_check(MARIA_HA *info,const char *record, uchar *rec_buff, if (packed_length != (uint) (to - (char*) rec_buff) + test(info->s->calc_checksum) || (bit != 1 && (flag & ~(bit - 1)))) goto err; - if (with_checksum && ((uchar) info->cur_row.checksum != (uchar) *to)) + if (with_checksum && ((uchar) checksum != (uchar) *to)) { DBUG_PRINT("error",("wrong checksum for row")); goto err; diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c index a04fba4e0d8..cfb4580a72f 100644 --- a/storage/maria/ma_info.c +++ b/storage/maria/ma_info.c @@ -135,6 +135,7 @@ void _ma_report_error(int errcode, const char *file_name) file_name+= length - 64; } } + my_error(errcode, MYF(ME_NOREFRESH), file_name); DBUG_VOID_RETURN; } diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index 8042c6d9873..1cd82720260 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -44,6 +44,7 @@ int maria_init(void) maria_inited= TRUE; pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); _ma_init_block_record_data(); + my_handler_error_register(); } return 0; } diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index abb095d47c2..1825367c44c 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -129,6 +129,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) } info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); info->lock_type= F_UNLCK; + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); break; case F_RDLCK: if (info->lock_type == F_WRLCK) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 9364fe6a5c8..95c8aacaf09 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -61,21 +61,6 @@ #define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE) #define MAX_NUMBER_OF_LSNS_PER_RECORD 2 -/* record parts descriptor */ -struct st_translog_parts -{ - /* full record length */ - translog_size_t record_length; - /* full record length with chunk headers */ - translog_size_t total_record_length; - /* current part index */ - uint current; - /* total number of elements in parts */ - uint elements; - /* array of parts (LEX_STRING) */ - LEX_STRING *parts; -}; - /* log write buffer descriptor */ struct st_translog_buffer { @@ -176,15 +161,6 @@ static uchar end_of_log= 0; my_bool translog_inited= 0; -/* record classes */ -enum record_class -{ - LOGRECTYPE_NOT_ALLOWED, - LOGRECTYPE_VARIABLE_LENGTH, - LOGRECTYPE_PSEUDOFIXEDLENGTH, - LOGRECTYPE_FIXEDLENGTH -}; - /* chunk types */ #define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */ #define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */ @@ -196,52 +172,11 @@ enum record_class /* compressed (relative) LSN constants */ #define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */ -typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, - TRN *trn, struct st_maria_share *share, - struct st_translog_parts *parts); - -typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, - TRN *trn, - LSN *lsn, - struct st_translog_parts *parts); - -typedef uint16(*read_rec_hook) (enum translog_record_type type, - uint16 read_length, uchar *read_buff, - uchar *decoded_buff); - -/* - Descriptor of log record type - Note: Don't reorder because of constructs later... -*/ -struct st_log_record_type_descriptor -{ - /* internal class of the record */ - enum record_class class; - /* - length for fixed-size record, pseudo-fixed record - length with uncompressed LSNs - */ - uint16 fixed_length; - /* how much record body (belonged to headers too) read with headers */ - uint16 read_header_len; - /* HOOK for writing the record called before lock */ - prewrite_rec_hook prewrite_hook; - /* HOOK for writing the record called when LSN is known, inside lock */ - inwrite_rec_hook inwrite_hook; - /* HOOK for reading headers */ - read_rec_hook read_hook; - /* - For pseudo fixed records number of compressed LSNs followed by - system header - */ - int16 compressed_LSN; -}; #include <my_atomic.h> /* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */ static MARIA_SHARE **id_to_share= NULL; -#define SHARE_ID_MAX 65535 /* array's size */ /* lock for id_to_share */ static my_atomic_rwlock_t LOCK_id_to_share; @@ -257,27 +192,32 @@ static my_bool write_hook_for_undo(enum translog_record_type type, NOTE that after first public Maria release, these can NOT be changed */ -typedef struct st_log_record_type_descriptor LOG_DESC; -static LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]; +LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]; static LOG_DESC INIT_LOGREC_FIXED_RECORD_0LSN_EXAMPLE= -{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0, + "fixed0example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0, +"variable0example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_FIXED_RECORD_1LSN_EXAMPLE= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 7, 7, NULL, NULL, NULL, 1}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 7, 7, NULL, NULL, NULL, 1, +"fixed1example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 12, NULL, NULL, NULL, 1}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 12, NULL, NULL, NULL, 1, +"variable1example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_FIXED_RECORD_2LSN_EXAMPLE= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 23, 23, NULL, NULL, NULL, 2}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 23, 23, NULL, NULL, NULL, 2, +"fixed2example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 19, NULL, NULL, NULL, 2}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 19, NULL, NULL, NULL, 2, +"variable2example", FALSE, NULL, NULL}; void example_loghandler_init() @@ -298,126 +238,158 @@ void example_loghandler_init() static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23= -{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0 }; +{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0, + "reserved", FALSE, NULL, NULL }; static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD= {LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL, - write_hook_for_redo, NULL, 0}; + write_hook_for_redo, NULL, 0, + "redo_insert_row_head", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL= {LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL, - write_hook_for_redo, NULL, 0}; + write_hook_for_redo, NULL, 0, + "redo_insert_row_tail", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOB= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0, + "redo_insert_row_blob", FALSE, NULL, NULL}; /*QQQ:TODO:header???*/ static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL, + write_hook_for_redo, NULL, 0, + "redo_insert_row_blobs", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_redo, NULL, 0}; + NULL, write_hook_for_redo, NULL, 0, + "redo_purge_row_head", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_redo, NULL, 0}; + NULL, write_hook_for_redo, NULL, 0, + "redo_purge_row_tail", FALSE, NULL, NULL}; /* QQQ: TODO: variable and fixed size??? */ static LOG_DESC INIT_LOGREC_REDO_PURGE_BLOCKS= {LOGRECTYPE_VARIABLE_LENGTH, 0, - FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + PAGE_STORE_SIZE + - PAGERANGE_STORE_SIZE, - NULL, write_hook_for_redo, NULL, 0}; + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, + NULL, write_hook_for_redo, NULL, 0, + "redo_purge_blocks", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW= -{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0, + "redo_delete_row", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0, + "redo_update_row_head", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_INDEX= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0, + "redo_index", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW= -{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0, + "redo_undelete_row", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_CLR_END= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_redo, NULL, 1}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_redo, NULL, 1, + "clr_end", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_PURGE_END= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1, + "purge_end", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT= {LOGRECTYPE_FIXEDLENGTH, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_undo, NULL, 0}; + NULL, write_hook_for_undo, NULL, 0, + "undo_row_insert", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE= {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_undo, NULL, 0}; + NULL, write_hook_for_undo, NULL, 0, + "undo_row_delete", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE= {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_undo, NULL, 1}; + NULL, write_hook_for_undo, NULL, 1, + "undo_row_update", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_PURGE= {LOGRECTYPE_PSEUDOFIXEDLENGTH, LSN_STORE_SIZE, LSN_STORE_SIZE, - NULL, NULL, NULL, 1}; + NULL, NULL, NULL, 1, + "undo_row_purge", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, write_hook_for_undo, NULL, 1}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, write_hook_for_undo, NULL, 1, + "undo_key_insert", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, write_hook_for_undo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, write_hook_for_undo, NULL, 0, + "undo_key_delete", TRUE, NULL, NULL}; // QQ: why not compressed? static LOG_DESC INIT_LOGREC_PREPARE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0, + "prepare", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1, + "prepare_with_undo_purge", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_COMMIT= -{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0, + "commit", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1, + "commit_with_undo_purge", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_CHECKPOINT= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0, + "checkpoint", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0, +"redo_create_table", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0, + "redo_rename_table", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0, + "redo_drop_table", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE, - NULL, NULL, NULL, 0}; + NULL, write_hook_for_redo, NULL, 0, + "redo_delete_all", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 4, FILEID_STORE_SIZE + 4, - NULL, NULL, NULL, 0}; + NULL, NULL, NULL, 0, + "redo_repair_table", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_FILE_ID= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 4, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, NULL, NULL, 0, + "file_id", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID= -{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0, + "long_transaction_id", TRUE, NULL, NULL}; const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL; @@ -636,6 +608,56 @@ static my_bool translog_write_file_header() /* + Information from transaction log file header +*/ + +typedef struct st_loghandler_file_info +{ + ulonglong timestamp; /* Time stamp */ + ulong maria_version; /* Version of maria loghandler */ + ulong mysql_versiob; /* Version of mysql server */ + ulong server_id; /* Server ID */ + uint page_size; /* Loghandler page size */ + uint file_number; /* Number of the file (from the file header) */ +} LOGHANDLER_FILE_INFO; + +/* + @brief Read hander file information from last opened loghandler file + + @param desc header information descriptor to be filled with information + + @retval 0 OK + @retval 1 Error +*/ + +my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc) +{ + byte page_buff[TRANSLOG_PAGE_SIZE], *ptr; + DBUG_ENTER("translog_read_file_header"); + + if (my_pread(log_descriptor.log_file_num[0], page_buff, + sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME))) + { + DBUG_PRINT("info", ("log read fail error: %d", my_errno)); + DBUG_RETURN(1); + } + ptr= page_buff + sizeof(maria_trans_file_magic); + desc->timestamp= uint8korr(ptr); + ptr+= 8; + desc->maria_version= uint4korr(ptr); + ptr+= 4; + desc->mysql_versiob= uint4korr(ptr); + ptr+= 4; + desc->server_id= uint4korr(ptr); + ptr+= 2; + desc->page_size= uint2korr(ptr); + ptr+= 2; + desc->file_number= uint3korr(ptr); + DBUG_RETURN(0); +} + + +/* Initialize transaction log file buffer SYNOPSIS @@ -651,7 +673,7 @@ static my_bool translog_buffer_init(struct st_translog_buffer *buffer) { DBUG_ENTER("translog_buffer_init"); /* This buffer offset */ - buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + buffer->last_lsn= LSN_IMPOSSIBLE; /* This Buffer File */ buffer->file= -1; buffer->overlay= 0; @@ -729,7 +751,7 @@ static my_bool translog_create_new_file() translog_write_file_header()) DBUG_RETURN(1); - if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, file_no, + if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, file_no, CONTROL_FILE_UPDATE_ONLY_LOGNO)) DBUG_RETURN(1); @@ -1156,7 +1178,7 @@ static void translog_start_buffer(struct st_translog_buffer *buffer, (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon))); DBUG_ASSERT(buffer_no == buffer->buffer_no); - buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + buffer->last_lsn= LSN_IMPOSSIBLE; buffer->offset= log_descriptor.horizon; buffer->file= log_descriptor.log_file_num[0]; buffer->overlay= 0; @@ -1958,6 +1980,7 @@ my_bool translog_init(const char *directory, int old_log_was_recovered= 0, logs_found= 0; uint old_flags= flags; TRANSLOG_ADDRESS sure_page, last_page, last_valid_page; + my_bool version_changed= 0; DBUG_ENTER("translog_init"); loghandler_init(); /* Safe to do many times */ @@ -2037,7 +2060,7 @@ my_bool translog_init(const char *directory, i, (ulong) log_descriptor.buffers + i)); } - logs_found= (last_logno != CONTROL_FILE_IMPOSSIBLE_FILENO); + logs_found= (last_logno != FILENO_IMPOSSIBLE); if (logs_found) { @@ -2049,7 +2072,7 @@ my_bool translog_init(const char *directory, find the log end */ - if (LSN_FILE_NO(last_checkpoint_lsn) == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE) { DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0); /* there was no checkpoints we will read from the beginning */ @@ -2087,7 +2110,7 @@ my_bool translog_init(const char *directory, /* TODO: check page size */ - last_valid_page= CONTROL_FILE_IMPOSSIBLE_LSN; + last_valid_page= LSN_IMPOSSIBLE; /* scan and validate pages */ do { @@ -2135,7 +2158,7 @@ my_bool translog_init(const char *directory, current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE); } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) && !old_log_was_recovered); - if (last_valid_page == CONTROL_FILE_IMPOSSIBLE_LSN) + if (last_valid_page == LSN_IMPOSSIBLE) { /* Panic!!! Even page which should be valid is invalid */ /* TODO: issue error */ @@ -2201,6 +2224,13 @@ my_bool translog_init(const char *directory, buffer->buffer))); DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); } + if (!old_log_was_recovered && old_flags == flags) + { + LOGHANDLER_FILE_INFO info; + if (translog_read_file_header(&info)) + DBUG_RETURN(1); + version_changed= (info.maria_version != TRANSLOG_VERSION_ID); + } } DBUG_PRINT("info", ("Logs found: %d was recovered: %d", logs_found, old_log_was_recovered)); @@ -2214,14 +2244,14 @@ my_bool translog_init(const char *directory, open_logfile_by_number_no_cache(1)) == -1 || translog_write_file_header()) DBUG_RETURN(1); - if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, 1, + if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, 1, CONTROL_FILE_UPDATE_ONLY_LOGNO)) DBUG_RETURN(1); /* assign buffer 0 */ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); } - else if (old_log_was_recovered || old_flags != flags) + else if (old_log_was_recovered || old_flags != flags || version_changed) { /* leave the damaged file untouched */ log_descriptor.horizon+= LSN_ONE_FILE; @@ -2251,8 +2281,8 @@ my_bool translog_init(const char *directory, structures for generating 2-byte ids: */ my_atomic_rwlock_init(&LOCK_id_to_share); - id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX*sizeof(MARIA_SHARE*), - MYF(MY_WME|MY_ZEROFILL)); + id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX * sizeof(MARIA_SHARE*), + MYF(MY_WME | MY_ZEROFILL)); if (unlikely(!id_to_share)) DBUG_RETURN(1); id_to_share--; /* min id is 1 */ @@ -2347,7 +2377,7 @@ void translog_destroy() 1 Error */ -static my_bool translog_lock() +my_bool translog_lock() { struct st_translog_buffer *current_buffer; DBUG_ENTER("translog_lock"); @@ -2380,7 +2410,7 @@ static my_bool translog_lock() 1 Error */ -static inline my_bool translog_unlock() +my_bool translog_unlock() { DBUG_ENTER("translog_unlock"); translog_buffer_unlock(log_descriptor.bc.buffer); @@ -4233,7 +4263,7 @@ my_bool translog_write_record(LSN *lsn, if (share) { - if (!share->base.transactional) + if (!share->now_transactional) { DBUG_PRINT("info", ("It is not transactional table")); DBUG_RETURN(0); @@ -4254,14 +4284,14 @@ my_bool translog_write_record(LSN *lsn, } if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID))) { - LSN lsn; + LSN dummy_lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; uchar log_data[6]; int6store(log_data, trn->trid); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */ - if (unlikely(translog_write_record(&lsn, LOGREC_LONG_TRANSACTION_ID, + if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID, trn, NULL, sizeof(log_data), sizeof(log_array)/sizeof(log_array[0]), log_array, NULL))) @@ -4301,12 +4331,12 @@ my_bool translog_write_record(LSN *lsn, { uint i; uint len= 0; -#ifdef HAVE_PURIFY +#ifdef HAVE_purify ha_checksum checksum= 0; #endif for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++) { -#ifdef HAVE_PURIFY +#ifdef HAVE_purify /* Find unitialized bytes early */ checksum+= my_checksum(checksum, parts_data[i].str, parts_data[i].length); @@ -4346,6 +4376,8 @@ my_bool translog_write_record(LSN *lsn, } } + DBUG_PRINT("info", ("LSN: (%lu,0x%lx)", (ulong) LSN_FILE_NO(*lsn), + (ulong) LSN_OFFSET(*lsn))); DBUG_RETURN(rc); } @@ -5035,7 +5067,7 @@ translog_read_record_header_scan(TRANSLOG_SCANNER_DATA - it is like translog_read_record_header, but read next record, so see its NOTES. - in case of end of the log buff->lsn will be set to - (CONTROL_FILE_IMPOSSIBLE_LSN) + (LSN_IMPOSSIBLE) RETURN 0 error @@ -5080,7 +5112,7 @@ translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA if (scanner->page[scanner->page_offset] == 0) { /* Last record was read */ - buff->lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + buff->lsn= LSN_IMPOSSIBLE; /* Return 'end of log' marker */ DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); } @@ -5242,7 +5274,7 @@ translog_size_t translog_read_record(LSN lsn, if (data == NULL) { - DBUG_ASSERT(lsn != CONTROL_FILE_IMPOSSIBLE_LSN); + DBUG_ASSERT(lsn != LSN_IMPOSSIBLE); data= &internal_data; } if (lsn || @@ -5583,6 +5615,16 @@ static my_bool write_hook_for_redo(enum translog_record_type type __attribute__ ((unused))) { /* + Users of dummy_transaction_object must keep this TRN clean as it + is used by many threads (like those manipulating non-transactional + tables). It might be dangerous if one user sets rec_lsn or some other + member and it is picked up by another user (like putting this rec_lsn into + a page of a non-transactional table); it's safer if all members stay 0. So + non-transactional log records (REPAIR, CREATE, RENAME, DROP) should not + call this hook; we trust them but verify ;) + */ + DBUG_ASSERT(trn->trid != 0); + /* If the hook stays so simple, it would be faster to pass !trn->rec_lsn ? trn->rec_lsn : some_dummy_lsn to translog_write_record(), like Monty did in his original code, and not @@ -5608,6 +5650,7 @@ static my_bool write_hook_for_undo(enum translog_record_type type struct st_translog_parts *parts __attribute__ ((unused))) { + DBUG_ASSERT(trn->trid != 0); /* see write_hook_for_redo() */ trn->undo_lsn= *lsn; if (unlikely(LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn) == 0)) trn->first_undo_lsn= @@ -5649,21 +5692,23 @@ int translog_assign_id_to_share(MARIA_SHARE *share, TRN *trn) if (likely(share->id == 0)) { /* Inspired by set_short_trid() of trnman.c */ - int i= share->kfile.file % SHARE_ID_MAX + 1; - my_atomic_rwlock_wrlock(&LOCK_id_to_share); - /** - @todo RECOVERY BUG: if all slots are used, and we're using rwlocks - above, we will never exit the loop. To be discussed with Serg. - */ - for ( ; ; i= i % SHARE_ID_MAX + 1) /* the range is [1..SHARE_ID_MAX] */ + uint i= share->kfile.file % SHARE_ID_MAX + 1; + do { - void *tmp= NULL; - if (id_to_share[i] == NULL && - my_atomic_casptr((void **)&id_to_share[i], &tmp, share)) - break; - } - my_atomic_rwlock_wrunlock(&LOCK_id_to_share); - share->id= (uint16)i; + my_atomic_rwlock_wrlock(&LOCK_id_to_share); + for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */ + { + void *tmp= NULL; + if (id_to_share[i] == NULL && + my_atomic_casptr((void **)&id_to_share[i], &tmp, share)) + { + share->id= (uint16)i; + break; + } + } + my_atomic_rwlock_wrunlock(&LOCK_id_to_share); + i= 1; /* scan the whole array */ + } while (share->id == 0); DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, i)); LSN lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; @@ -5681,7 +5726,7 @@ int translog_assign_id_to_share(MARIA_SHARE *share, TRN *trn) strlen() */ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= - strlen(share->open_file_name); + strlen(share->open_file_name) + 1; if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, share, sizeof(log_data) + log_array[TRANSLOG_INTERNAL_PARTS + @@ -5715,3 +5760,15 @@ void translog_deassign_id_from_share(MARIA_SHARE *share) my_atomic_storeptr((void **)&id_to_share[share->id], 0); my_atomic_rwlock_rdunlock(&LOCK_id_to_share); } + + +/** + @brief returns the LSN of the first record starting in this log + + @note so far works only for the very first log created on this system +*/ + +LSN first_lsn_in_log() +{ + return MAKE_LSN(1, TRANSLOG_PAGE_SIZE + log_descriptor.page_overhead); +} diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index a831f088e9b..088e0d8ab8b 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -1,3 +1,21 @@ +/* Copyright (C) 2007 MySQL AB & Sanja Belkin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _ma_loghandler_h +#define _ma_loghandler_h + /* transaction log default cache size (TODO: make it global variable) */ #define TRANSLOG_PAGECACHE_SIZE 1024*1024*2 /* transaction log default file size (TODO: make it global variable) */ @@ -20,6 +38,7 @@ #define TRANSLOG_PAGE_SIZE (8*1024) #include "ma_loghandler_lsn.h" +#include "trnman_public.h" /* short transaction ID type */ typedef uint16 SHORT_TRANSACTION_ID; @@ -41,6 +60,10 @@ struct st_maria_share; #define page_store(T,A) int5store(T,A) #define dirpos_store(T,A) ((*(uchar*) (T)) = A) #define pagerange_store(T,A) int2store(T,A) +#define fileid_korr(P) uint2korr(P) +#define page_korr(P) uint5korr(P) +#define dirpos_korr(P) ((P)[0]) +#define pagerange_korr(P) uint2korr(P) /* Length of disk drive sector size (we assume that writing it @@ -228,10 +251,102 @@ extern translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner, TRANSLOG_HEADER_BUFFER *buff); +extern my_bool translog_lock(); +extern my_bool translog_unlock(); extern void translog_lock_assert_owner(); extern TRANSLOG_ADDRESS translog_get_horizon(); extern int translog_assign_id_to_share(struct st_maria_share *share, struct st_transaction *trn); extern void translog_deassign_id_from_share(struct st_maria_share *share); extern my_bool translog_inited; + +/* + all the rest added because of recovery; should we make + ma_loghandler_for_recovery.h ? +*/ + +#define SHARE_ID_MAX 65535 /* array's size */ + +extern LSN first_lsn_in_log(); + +/* record parts descriptor */ +struct st_translog_parts +{ + /* full record length */ + translog_size_t record_length; + /* full record length with chunk headers */ + translog_size_t total_record_length; + /* current part index */ + uint current; + /* total number of elements in parts */ + uint elements; + /* array of parts (LEX_STRING) */ + LEX_STRING *parts; +}; + +typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, + TRN *trn, struct st_maria_share *share, + struct st_translog_parts *parts); + +typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, + TRN *trn, + LSN *lsn, + struct st_translog_parts *parts); + +typedef uint16(*read_rec_hook) (enum translog_record_type type, + uint16 read_length, uchar *read_buff, + byte *decoded_buff); + + +/* record classes */ +enum record_class +{ + LOGRECTYPE_NOT_ALLOWED, + LOGRECTYPE_VARIABLE_LENGTH, + LOGRECTYPE_PSEUDOFIXEDLENGTH, + LOGRECTYPE_FIXEDLENGTH +}; + +/* C++ can't bear that a variable's name is "class" */ +#ifndef __cplusplus +/* + Descriptor of log record type + Note: Don't reorder because of constructs later... +*/ +typedef struct st_log_record_type_descriptor +{ + /* internal class of the record */ + enum record_class class; + /* + length for fixed-size record, pseudo-fixed record + length with uncompressed LSNs + */ + uint16 fixed_length; + /* how much record body (belonged to headers too) read with headers */ + uint16 read_header_len; + /* HOOK for writing the record called before lock */ + prewrite_rec_hook prewrite_hook; + /* HOOK for writing the record called when LSN is known, inside lock */ + inwrite_rec_hook inwrite_hook; + /* HOOK for reading headers */ + read_rec_hook read_hook; + /* + For pseudo fixed records number of compressed LSNs followed by + system header + */ + int16 compressed_LSN; + /* the rest is for maria_read_log & Recovery */ + /** @brief for debug error messages or "maria_read_log" command-line tool */ + const char *name; + my_bool record_ends_group; + /* a function to execute when we see the record during the REDO phase */ + int (*record_execute_in_redo_phase)(const TRANSLOG_HEADER_BUFFER *); + /* a function to execute when we see the record during the UNDO phase */ + int (*record_execute_in_undo_phase)(const TRANSLOG_HEADER_BUFFER *); +} LOG_DESC; + +extern LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]; +#endif + C_MODE_END +#endif diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h index 0c02fe2c489..387fe2763d5 100644 --- a/storage/maria/ma_loghandler_lsn.h +++ b/storage/maria/ma_loghandler_lsn.h @@ -1,3 +1,18 @@ +/* Copyright (C) 2007 MySQL AB & Sanja Belkin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + #ifndef _ma_loghandler_lsn_h #define _ma_loghandler_lsn_h @@ -24,16 +39,18 @@ typedef TRANSLOG_ADDRESS LSN; #define LSN_FILE_NO(L) ((L) >> 32) /* Gets raw file number part of a LSN/log address */ -#define LSN_FINE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL)) +#define LSN_FILE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL)) /* Gets record offset of a LSN/log address */ #define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL) /* Makes lsn/log address from file number and record offset */ -#define MAKE_LSN(F,S) ((((uint64)(F)) << 32) | (S)) +#define MAKE_LSN(F,S) ((LSN) ((((uint64)(F)) << 32) | (S))) /* checks LSN */ -#define LSN_VALID(L) DBUG_ASSERT((L) >= 0 && (L) < (uint64)0xFFFFFFFFFFFFFFLL) +#define LSN_VALID(L) \ + ((LSN_FILE_NO_PART(L) != FILENO_IMPOSSIBLE) && \ + (LSN_OFFSET(L) != LOG_OFFSET_IMPOSSIBLE)) /* size of stored LSN on a disk, don't change it! */ #define LSN_STORE_SIZE 7 @@ -51,7 +68,7 @@ typedef TRANSLOG_ADDRESS LSN; /* what we need to add to LSN to increase it on one file */ #define LSN_ONE_FILE ((int64)0x100000000LL) -#define LSN_REPLACE_OFFSET(L, S) (LSN_FINE_NO_PART(L) | (S)) +#define LSN_REPLACE_OFFSET(L, S) (LSN_FILE_NO_PART(L) | (S)) /* an 8-byte type whose most significant uchar is used for "flags"; 7 @@ -61,4 +78,7 @@ typedef LSN LSN_WITH_FLAGS; #define LSN_WITH_FLAGS_TO_LSN(x) (x & ULL(0x00FFFFFFFFFFFFFF)) #define LSN_WITH_FLAGS_TO_FLAGS(x) (x & ULL(0xFF00000000000000)) +#define FILENO_IMPOSSIBLE 0 /**< log file's numbering starts at 1 */ +#define LOG_OFFSET_IMPOSSIBLE 0 /**< log always has a header */ +#define LSN_IMPOSSIBLE 0 #endif diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 15f8dcf4e51..e6df213609b 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -260,7 +260,9 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT, MY_UNPACK_FILENAME),MYF(0)); pthread_mutex_lock(&THR_LOCK_maria); - if (!(old_info=_ma_test_if_reopen(name_buff))) + old_info= 0; + if ((open_flags & HA_OPEN_COPY) || + !(old_info=_ma_test_if_reopen(name_buff))) { share= &share_buff; bzero((uchar*) &share_buff,sizeof(share_buff)); @@ -586,13 +588,36 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.null_bytes + share->base.pack_bytes + test(share->options & HA_OPTION_CHECKSUM)); - if (share->base.transactional) + if (open_flags & HA_OPEN_COPY) + { + /* + this instance will be a temporary one used just to create a data + file for REPAIR. Don't do logging. This base information will not go + to disk. + */ + share->base.born_transactional= FALSE; + } + if (share->base.born_transactional) + { + share->page_type= PAGECACHE_LSN_PAGE; share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE; + if (unlikely((share->state.create_rename_lsn == (LSN)ULONGLONG_MAX) && + (open_flags & HA_OPEN_FROM_SQL_LAYER))) + { + /* + This table was repaired with maria_chk. Past log records should be + ignored, future log records should not: we define the present. + */ + share->state.create_rename_lsn= translog_get_horizon(); + _ma_update_create_rename_lsn_on_disk(share, TRUE); + } + } + else + share->page_type= PAGECACHE_PLAIN_PAGE; + share->now_transactional= share->base.born_transactional; + share->base.default_rec_buff_size= max(share->base.pack_reclength, share->base.max_key_length); - share->page_type= (share->base.transactional ? PAGECACHE_LSN_PAGE : - PAGECACHE_PLAIN_PAGE); - if (share->data_file_type == DYNAMIC_RECORD) { share->base.extra_rec_buff_size= @@ -858,6 +883,8 @@ void _ma_setup_functions(register MARIA_SHARE *share) } share->file_read= _ma_nommap_pread; share->file_write= _ma_nommap_pwrite; + share->calc_check_checksum= share->calc_checksum; + if (!(share->options & HA_OPTION_CHECKSUM) && share->data_file_type != COMPRESSED_RECORD) share->calc_checksum= share->calc_write_checksum= 0; @@ -1106,7 +1133,7 @@ uint _ma_base_info_write(File file, MARIA_BASE_INFO *base) *ptr++= base->key_reflength; *ptr++= base->keys; *ptr++= base->auto_key; - *ptr++= base->transactional; + *ptr++= base->born_transactional; *ptr++= 0; /* Reserved */ mi_int2store(ptr,base->pack_bytes); ptr+= 2; mi_int2store(ptr,base->blobs); ptr+= 2; @@ -1149,7 +1176,7 @@ static uchar *_ma_base_info_read(uchar *ptr, MARIA_BASE_INFO *base) base->key_reflength= *ptr++; base->keys= *ptr++; base->auto_key= *ptr++; - base->transactional= *ptr++; + base->born_transactional= *ptr++; ptr++; base->pack_bytes= mi_uint2korr(ptr); ptr+= 2; base->blobs= mi_uint2korr(ptr); ptr+= 2; diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 8af4532ff97..eb939ba9eb0 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -177,7 +177,8 @@ static const char *page_cache_page_type_str[]= /* used only for control page type changing during debugging */ "EMPTY", "PLAIN", - "LSN" + "LSN", + "READ_UNKNOWN" }; static const char *page_cache_page_write_mode_str[]= @@ -320,7 +321,8 @@ struct st_pagecache_block_link #ifndef DBUG_OFF /* debug checks */ static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_pin mode) + enum pagecache_page_pin mode + __attribute__((unused))) { struct st_my_thread_var *thread= my_thread_var; PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); @@ -378,6 +380,7 @@ static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, 1 - Error */ +#ifdef NOT_USED static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_lock lock, enum pagecache_page_pin pin) @@ -445,7 +448,8 @@ error: page_cache_page_pin_str[pin])); DBUG_RETURN(1); } -#endif +#endif /* NOT_USED */ +#endif /* !DBUG_OFF */ #define FLUSH_CACHE 2000 /* sort this many blocks at once */ @@ -581,17 +585,14 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, myf flags) { DBUG_ENTER("pagecache_fwrite"); + DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); if (type == PAGECACHE_LSN_PAGE) { LSN lsn; DBUG_PRINT("info", ("Log handler call")); /* TODO: integrate with page format */ lsn= lsn_korr(buffer + PAGE_LSN_OFFSET); - /* - check CONTROL_FILE_IMPOSSIBLE_FILENO & - CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET - */ - DBUG_ASSERT(lsn != 0); + DBUG_ASSERT(LSN_VALID(lsn)); translog_flush(lsn); } DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, @@ -2439,16 +2440,16 @@ static void read_block(PAGECACHE *pagecache, } -/* - Set LSN on the page to the given one if the given LSN is bigger +/** + @brief Set LSN on the page to the given one if the given LSN is bigger - SYNOPSIS - check_and_set_lsn() - lsn LSN to set - block block to check and set + @param pagecache pointer to a page cache data structure + @param lsn LSN to set + @param block block to check and set */ -static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) +static void check_and_set_lsn(PAGECACHE *pagecache, + LSN lsn, PAGECACHE_BLOCK_LINK *block) { LSN old; DBUG_ENTER("check_and_set_lsn"); @@ -2458,7 +2459,14 @@ static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) (ulong)LSN_FILE_NO(old), (ulong)LSN_OFFSET(old), (ulong)LSN_FILE_NO(lsn), (ulong)LSN_OFFSET(lsn))); if (cmp_translog_addr(lsn, old) > 0) + { + + DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE); lsn_store(block->buffer + PAGE_LSN_OFFSET, lsn); + /* we stored LSN in page so we dirtied it */ + if (!(block->status & PCBLOCK_CHANGED)) + link_to_changed_list(pagecache, block); + } DBUG_VOID_RETURN; } @@ -2474,7 +2482,7 @@ static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) lock lock change pin pin page first_REDO_LSN_for_page do not set it if it is zero - lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + lsn if it is not LSN_IMPOSSIBLE (0) and it is bigger then LSN on the page it will be written on the page @@ -2531,10 +2539,8 @@ void pagecache_unlock(PAGECACHE *pagecache, if (block->rec_lsn == 0) block->rec_lsn= first_REDO_LSN_for_page; } - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); if (make_lock_and_pin(pagecache, block, lock, pin)) { @@ -2566,7 +2572,7 @@ void pagecache_unlock(PAGECACHE *pagecache, pagecache pointer to a page cache data structure file handler for the file for the block of data to be read pageno number of the block of data in the file - lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + lsn if it is not LSN_IMPOSSIBLE (0) and it is bigger then LSN on the page it will be written on the page */ @@ -2594,10 +2600,8 @@ void pagecache_unpin(PAGECACHE *pagecache, DBUG_ASSERT(block != 0); DBUG_ASSERT(page_st == PAGE_READ); - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); /* we can just unpin only with keeping read lock because: @@ -2635,10 +2639,9 @@ void pagecache_unpin(PAGECACHE *pagecache, link direct link to page (returned by read or write) lock lock change pin pin page - first_REDO_LSN_for_page do not set it if it is zero - lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it - is bigger then LSN on the page it will be written on - the page + first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0) + lsn if it is not LSN_IMPOSSIBLE and it is bigger then + LSN on the page it will be written on the page */ void pagecache_unlock_by_link(PAGECACHE *pagecache, @@ -2681,7 +2684,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - if (first_REDO_LSN_for_page) + if (first_REDO_LSN_for_page != LSN_IMPOSSIBLE) { /* LOCK_READ_UNLOCK is ok here as the page may have first locked @@ -2694,10 +2697,8 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, if (block->rec_lsn == 0) block->rec_lsn= first_REDO_LSN_for_page; } - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); if (make_lock_and_pin(pagecache, block, lock, pin)) DBUG_ASSERT(0); /* should not happend */ @@ -2726,7 +2727,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, pagecache_unpin_by_link() pagecache pointer to a page cache data structure link direct link to page (returned by read or write) - lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + lsn if it is not LSN_IMPOSSIBLE (0) and it is bigger then LSN on the page it will be written on the page */ @@ -2751,10 +2752,8 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); /* We can just unpin only with keeping read lock because: @@ -2865,8 +2864,10 @@ restart: (pin == PAGECACHE_PIN)), &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || - block->type == type); - block->type= type; + block->type == type || type == PAGECACHE_READ_UNKNOWN_PAGE); + if (type != PAGECACHE_READ_UNKNOWN_PAGE || + block->type == PAGECACHE_EMPTY_PAGE) + block->type= type; if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ)) { DBUG_PRINT("info", ("read block 0x%lx", (ulong)block)); @@ -3181,6 +3182,7 @@ my_bool pagecache_write_part(PAGECACHE *pagecache, page_cache_page_pin_str[pin], page_cache_page_write_mode_str[write_mode], offset, size)); + DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED); DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK); DBUG_ASSERT(offset + size <= pagecache->block_size); @@ -3230,6 +3232,7 @@ restart: } DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == PAGECACHE_READ_UNKNOWN_PAGE || block->type == type); block->type= type; @@ -3650,6 +3653,14 @@ restart: ("changed_blocks") though it's still dirty (the flush by another thread has not yet happened). Checkpoint will miss the page and so must be blocked until that flush has happened. + Note that if there are two concurrent + flush_pagecache_blocks_int() on this file, then the first one may + move the block into its first_in_switch, and the second one would + just not see the block and wrongly consider its job done. + @todo RECOVERY Maria does protect such flushes with intern_lock, + but Checkpoint does not (Checkpoint makes sure that + changed_blocks_is_incomplete is 0 when it starts, but as + flush_cached_blocks() releases mutex, this may change... */ /** @todo RECOVERY: check all places where we remove a page from the @@ -3905,7 +3916,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, ptr+= 4; lsn_store(ptr, block->rec_lsn); ptr+= LSN_STORE_SIZE; - if (block->rec_lsn != 0) + if (block->rec_lsn != LSN_IMPOSSIBLE) { if (cmp_translog_addr(block->rec_lsn, minimum_rec_lsn) < 0) minimum_rec_lsn= block->rec_lsn; diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h index 5030c2a2d7b..86426c5b4bc 100644 --- a/storage/maria/ma_pagecache.h +++ b/storage/maria/ma_pagecache.h @@ -34,7 +34,9 @@ enum pagecache_page_type /* the page does not contain LSN */ PAGECACHE_PLAIN_PAGE, /* the page contain LSN (maria tablespace page) */ - PAGECACHE_LSN_PAGE + PAGECACHE_LSN_PAGE, + /* Page type used when scanning file and we don't care about the type */ + PAGECACHE_READ_UNKNOWN_PAGE }; /* diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h index d2901f5724c..42c5071babd 100644 --- a/storage/maria/ma_recovery.h +++ b/storage/maria/ma_recovery.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +/* Copyright (C) 2006,2007 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c index 5224698c614..9dd75705229 100644 --- a/storage/maria/ma_rename.c +++ b/storage/maria/ma_rename.c @@ -56,17 +56,23 @@ int maria_rename(const char *old_name, const char *new_name) raid_chunks = share->base.raid_chunks; #endif - sync_dir= (share->base.transactional && !share->temporary) ? + /* + the renaming of an internal table to the final table (like in ALTER TABLE) + is the moment when this table receives its correct create_rename_lsn and + this is important; make sure transactionality has been re-enabled. + */ + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; if (sync_dir) { - uchar log_data[LSN_STORE_SIZE]; + uchar log_data[2 + 2]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; uint old_name_len= strlen(old_name), new_name_len= strlen(new_name); int2store(log_data, old_name_len); int2store(log_data + 2, new_name_len); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 2 + 2; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char *)old_name; log_array[TRANSLOG_INTERNAL_PARTS + 1].length= old_name_len; log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (char *)new_name; @@ -76,15 +82,16 @@ int maria_rename(const char *old_name, const char *new_name) MySQL layer to be crash-safe, which it is not now (that would require work using the ddl_log of sql/sql_table.cc); when it is, we should reconsider the moment of writing this log record (before or after op, - under THR_LOCK_maria or not...), how to use it in Recovery, and force - the log. For now this record is just informative. + under THR_LOCK_maria or not...), how to use it in Recovery. + For now it can serve to apply logs to a backup so we sync it. */ if (unlikely(translog_write_record(&share->state.create_rename_lsn, LOGREC_REDO_RENAME_TABLE, &dummy_transaction_object, NULL, 2 + 2 + old_name_len + new_name_len, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL))) + log_array, NULL) || + translog_flush(share->state.create_rename_lsn))) { maria_close(info); DBUG_RETURN(1); @@ -93,10 +100,7 @@ int maria_rename(const char *old_name, const char *new_name) store LSN into file, needed for Recovery to not be confused if a RENAME happened (applying REDOs to the wrong table). */ - lsn_store(log_data, share->state.create_rename_lsn); - if (my_pwrite(share->kfile.file, log_data, sizeof(log_data), - sizeof(share->state.header) + 2, MYF(MY_NABP)) || - my_sync(share->kfile.file, MYF(MY_WME))) + if (_ma_update_create_rename_lsn_on_disk(share, TRUE)) { maria_close(info); DBUG_RETURN(1); diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c index 35d654bbb45..7d7a975a641 100644 --- a/storage/maria/ma_test1.c +++ b/storage/maria/ma_test1.c @@ -60,7 +60,7 @@ int main(int argc,char *argv[]) if (maria_init() || (init_pagecache(maria_pagecache, IO_SIZE*16, 0, 0, maria_block_size) == 0) || - ma_control_file_create_or_open() || + ma_control_file_create_or_open(TRUE) || (init_pagecache(maria_log_pagecache, TRANSLOG_PAGECACHE_SIZE, 0, 0, TRANSLOG_PAGE_SIZE) == 0) || diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index dd5596c4d5c..585a78b753b 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -224,7 +224,7 @@ int main(int argc, char *argv[]) /* Maria requires that we always have a page cache */ if ((init_pagecache(maria_pagecache, pagecache_size, 0, 0, maria_block_size) == 0) || - ma_control_file_create_or_open() || + ma_control_file_create_or_open(TRUE) || (init_pagecache(maria_log_pagecache, TRANSLOG_PAGECACHE_SIZE, 0, 0, TRANSLOG_PAGE_SIZE) == 0) || diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh index 76b6c32913f..5ea76a7037d 100755 --- a/storage/maria/ma_test_all.sh +++ b/storage/maria/ma_test_all.sh @@ -5,15 +5,21 @@ # If you want to run this in Valgrind, you should use --trace-children=yes, # so that it detects problems in ma_test* and not in the shell script + +# Remove # from following line if you need some more information +#set -x -v -e + valgrind="valgrind --alignment=8 --leak-check=yes" silent="-s" suffix="" -#set -x -v -e if [ -z "$maria_path" ] then maria_path="." fi +# Delete temporary files +rm -f *.TMD + run_tests() { row_type=$1 @@ -126,6 +132,11 @@ run_repair_tests() $maria_path/maria_chk$suffix -se test1 $maria_path/maria_chk$suffix -rqos --correct-checksum test1 $maria_path/maria_chk$suffix -se test1 + $maria_path/ma_test2$suffix $silent -c -d1 $row_type + $maria_path/maria_chk$suffix -s --parallel-recover test2 + $maria_path/maria_chk$suffix -se test2 + $maria_path/maria_chk$suffix -s --parallel-recover --quick test2 + $maria_path/maria_chk$suffix -se test2 } run_pack_tests() @@ -153,6 +164,15 @@ run_pack_tests() $maria_path/maria_chk$suffix -es test1 $maria_path/maria_chk$suffix -rus test1 $maria_path/maria_chk$suffix -es test1 + + $maria_path/ma_test2$suffix $silent -c -d1 $row_type + $maria_path/maria_chk$suffix -s --parallel-recover test2 + $maria_path/maria_chk$suffix -se test2 + $maria_path/maria_chk$suffix -s --parallel-recover --unpack test2 + $maria_path/maria_chk$suffix -se test2 + $maria_path/maria_pack$suffix --force -s test1 + $maria_path/maria_chk$suffix -s --parallel-recover --unpack test2 + $maria_path/maria_chk$suffix -se test2 } echo "Running tests with dynamic row format" @@ -167,9 +187,13 @@ run_pack_tests -S echo "Running tests with block row format" run_tests -M +run_repair_tests -M +run_pack_tests -M echo "Running tests with block row format and transactions" run_tests "-M -T" +run_repair_tests "-M -T" +run_pack_tests "-M -T" # # Tests that gives warnings diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index 4d21167535d..0cb2e2b648b 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -147,6 +147,7 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec) if (share->calc_checksum) { info->cur_row.checksum= (*share->calc_checksum)(info,newrec); + info->state->checksum+= (info->cur_row.checksum - old_checksum); /* Store new checksum in index file header */ key_changed|= HA_STATE_CHANGED; } @@ -173,8 +174,6 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec) if (auto_key_changed) set_if_bigger(info->s->state.auto_increment, ma_retrieve_auto_increment(info, newrec)); - if (share->calc_checksum) - info->state->checksum+= (info->cur_row.checksum - old_checksum); /* We can't yet have HA_STATE_AKTIV here, as block_record dosn't support diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index f8a51507624..58953caa57c 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -676,14 +676,7 @@ get_one_option(int optid, check_param.testflag|= T_UPDATE_STATE; break; case '#': - if (argument == disabled_my_option) - { - DBUG_POP(); - } - else - { - DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); - } + DBUG_SET_INITIAL(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); break; case 'V': print_version(); @@ -862,16 +855,25 @@ static int maria_chk(HA_CHECK *param, char *filename) share->r_locks=0; maria_block_size= share->base.block_size; - if (share->data_file_type == BLOCK_RECORD && - (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_FAST | T_STATISTICS | - T_CHECK | T_CHECK_ONLY_CHANGED))) + if (share->data_file_type == BLOCK_RECORD || + ((param->testflag & T_UNPACK) && + share->state.header.org_data_file_type == BLOCK_RECORD)) { - _ma_check_print_error(param, - "Record format used by '%s' is is not yet supported with repair/check", - filename); - param->error_printed= 0; - error= 1; - goto end2; + if (param->testflag & T_SORT_RECORDS) + { + _ma_check_print_error(param, + "Record format used by '%s' is is not yet supported with repair/check", + filename); + param->error_printed= 0; + error= 1; + goto end2; + } + /* We can't do parallell repair with BLOCK_RECORD yet */ + if (param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) + { + param->testflag&= ~(T_REP_BY_SORT | T_REP_PARALLEL); + param->testflag|= T_REP; + } } /* @@ -1026,6 +1028,13 @@ static int maria_chk(HA_CHECK *param, char *filename) } if (!error) { + /* + Tell the server's Recovery to ignore old REDOs on this table; we don't + know what the log's end LSN is now, so we just let the server know + that it will have to find and store it. + */ + if (share->base.born_transactional) + share->state.create_rename_lsn= (LSN)ULONGLONG_MAX; if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) && (maria_is_any_key_active(share->state.key_map) || (rep_quick && !param->keys_in_use && !recreate)) && @@ -1757,11 +1766,14 @@ void _ma_check_print_info(HA_CHECK *param __attribute__((unused)), const char *fmt,...) { va_list args; + DBUG_ENTER("_ma_check_print_info"); + DBUG_PRINT("enter", ("format: %s", fmt)); va_start(args,fmt); VOID(vfprintf(stdout, fmt, args)); VOID(fputc('\n',stdout)); va_end(args); + DBUG_VOID_RETURN; } /* VARARGS */ @@ -1770,6 +1782,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("_ma_check_print_warning"); + DBUG_PRINT("enter", ("format: %s", fmt)); fflush(stdout); if (!param->warning_printed && !param->error_printed) @@ -1795,7 +1808,7 @@ void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("_ma_check_print_error"); - DBUG_PRINT("enter",("format: %s",fmt)); + DBUG_PRINT("enter", ("format: %s", fmt)); fflush(stdout); if (!param->warning_printed && !param->error_printed) diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 6c735b745ea..62f37077ceb 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -30,6 +30,7 @@ #define MAX_NONMAPPED_INSERTS 1000 #define MARIA_MAX_TREE_LEVELS 32 +#define SANITY_CHECKS struct st_transaction; @@ -170,8 +171,11 @@ typedef struct st_ma_base_info /* The following are from the header */ uint key_parts, all_key_parts; - /* If false, we disable logging, versioning, transaction etc */ - my_bool transactional; + /** + @brief If false, we disable logging, versioning, transaction etc. Observe + difference with MARIA_SHARE::now_transactional + */ + my_bool born_transactional; } MARIA_BASE_INFO; @@ -264,7 +268,9 @@ typedef struct st_maria_share Calculate checksum for a row during write. May be 0 if we calculate the checksum in write_record_init() */ - ha_checksum(*calc_write_checksum) (struct st_maria_info *, const uchar *); + ha_checksum(*calc_write_checksum)(struct st_maria_info *, const uchar *); + /* calculate checksum for a row during check table */ + ha_checksum(*calc_check_checksum)(struct st_maria_info *, const uchar *); /* Compare a row in memory with a row on disk */ my_bool (*compare_unique)(struct st_maria_info *, MARIA_UNIQUEDEF *, const uchar *record, MARIA_RECORD_POS pos); @@ -303,6 +309,13 @@ typedef struct st_maria_share not_flushed, concurrent_insert; my_bool delay_key_write; my_bool have_rtree; + /** + @brief if the table is transactional right now. It may have been created + transactional (base.born_transactional==TRUE) but with transactionality + (logging) temporarily disabled (now_transactional==FALSE). The opposite + (FALSE, TRUE) is impossible. + */ + my_bool now_transactional; #ifdef THREAD THR_LOCK lock; pthread_mutex_t intern_lock; /* Locking for use with _locking */ @@ -749,7 +762,7 @@ extern ulong _ma_rec_unpack(MARIA_HA *info, uchar *to, uchar *from, ulong reclength); extern my_bool _ma_rec_check(MARIA_HA *info, const char *record, uchar *packpos, ulong packed_length, - my_bool with_checkum); + my_bool with_checkum, ha_checksum checksum); extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos, ulong length, my_off_t next_filepos, uchar ** record, ulong *reclength, @@ -874,6 +887,7 @@ void _ma_update_status(void *param); void _ma_restore_status(void *param); void _ma_copy_status(void *to, void *from); my_bool _ma_check_status(void *param); +void _ma_reset_status(MARIA_HA *maria); extern MARIA_HA *_ma_test_if_reopen(char *filename); my_bool _ma_check_table_is_closed(const char *name, const char *where); @@ -886,13 +900,12 @@ void _ma_remap_file(MARIA_HA *info, my_off_t size); MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const uchar *record); my_bool _ma_write_abort_default(MARIA_HA *info); -/* Functions needed by _ma_check (are overrided in MySQL) */ C_MODE_START +/* Functions needed by _ma_check (are overrided in MySQL) */ volatile int *_ma_killed_ptr(HA_CHECK *param); void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); void _ma_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...)); -int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info); C_MODE_END int _ma_flush_pending_blocks(MARIA_SORT_PARAM *param); @@ -908,9 +921,14 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param); int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, ulong); int _ma_sync_table_files(const MARIA_HA *info); -int _ma_initialize_data_file(File dfile, MARIA_SHARE *share); +int _ma_initialize_data_file(MARIA_SHARE *share, File dfile); +int _ma_update_create_rename_lsn_on_disk(MARIA_SHARE *share, my_bool do_sync); void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); +#define _ma_tmp_disable_logging_for_table(S) \ + { (S)->now_transactional= FALSE; (S)->page_type= PAGECACHE_PLAIN_PAGE; } +#define _ma_reenable_logging_for_table(S) \ + { if (((S)->now_transactional= (S)->base.born_transactional)) \ + (S)->page_type= PAGECACHE_LSN_PAGE; } extern PAGECACHE *maria_log_pagecache; - diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c new file mode 100644 index 00000000000..2d664e08662 --- /dev/null +++ b/storage/maria/maria_read_log.c @@ -0,0 +1,983 @@ +/* Copyright (C) 2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#include <ma_blockrec.h> +#include <my_getopt.h> + +#define PCACHE_SIZE (1024*1024*10) +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE (1024L*1024L) + +static const char *load_default_groups[]= { "maria_read_log",0 }; +static void get_options(int *argc,char * * *argv); +#ifndef DBUG_OFF +#if defined(__WIN__) +const char *default_dbug_option= "d:t:i:O,\\maria_read_log.trace"; +#else +const char *default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; +#endif +#endif /* DBUG_OFF */ +static my_bool opt_only_display, opt_display_and_apply; + +struct TRN_FOR_RECOVERY +{ + LSN group_start_lsn, undo_lsn; + TrID long_trid; +}; + +struct TRN_FOR_RECOVERY all_active_trans[SHORT_TRID_MAX + 1]; +MARIA_HA *all_tables[SHORT_TRID_MAX + 1]; +LSN current_group_end_lsn= LSN_IMPOSSIBLE; + +static void end_of_redo_phase(); +static void display_record_position(const LOG_DESC *log_desc, + const TRANSLOG_HEADER_BUFFER *rec, + uint number); +static int display_and_apply_record(const LOG_DESC *log_desc, + const TRANSLOG_HEADER_BUFFER *rec); +#define prototype_exec_hook(R) \ +static int exec_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec) +prototype_exec_hook(LONG_TRANSACTION_ID); +#ifdef MARIA_CHECKPOINT +prototype_exec_hook(CHECKPOINT); +#endif +prototype_exec_hook(REDO_CREATE_TABLE); +prototype_exec_hook(FILE_ID); +prototype_exec_hook(REDO_INSERT_ROW_HEAD); +prototype_exec_hook(REDO_INSERT_ROW_TAIL); +prototype_exec_hook(REDO_PURGE_ROW_HEAD); +prototype_exec_hook(REDO_PURGE_ROW_TAIL); +prototype_exec_hook(UNDO_ROW_INSERT); +prototype_exec_hook(UNDO_ROW_DELETE); +prototype_exec_hook(COMMIT); + + +/* + TODO: Avoid mallocs in exec. + + Proposed fix: + Add either a context/buffer argument to all exec_hook functions + or add 'record_buffer' and 'record_buffer_length' to + TRANSLOG_HEADER_BUFFER. + With this we could use my_realloc() instead of my_malloc() to + allocate data and save some mallocs. +*/ + +/* + To implement REDO_DROP_TABLE and REDO_RENAME_TABLE, we would need to go + through the all_tables[] array, find all open instances of the + table-to-drop-or-rename, and remove them from the array. + We however know that in real Recovery, we don't have to handle those log + records at all, same for REDO_CREATE_TABLE. + So for now, we can use this program to replay/debug a sequence of CREATE + + DMLs, but not DROP/RENAME; it is probably enough for a start. +*/ + +int main(int argc, char **argv) +{ + LSN lsn; + char **default_argv; + MY_INIT(argv[0]); + + load_defaults("my", load_default_groups, &argc, &argv); + default_argv= argv; + get_options(&argc, &argv); + + maria_data_root= "."; + + if (maria_init()) + { + fprintf(stderr, "Can't init Maria engine (%d)\n", errno); + goto err; + } + /* we don't want to create a control file, it MUST exist */ + if (ma_control_file_create_or_open(FALSE)) + { + fprintf(stderr, "Can't open control file (%d)\n", errno); + goto err; + } + if (last_logno == FILENO_IMPOSSIBLE) + { + fprintf(stderr, "Can't find any log\n"); + goto err; + } + if (init_pagecache(maria_pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE) == 0) + { + fprintf(stderr, "Got error in init_pagecache() (errno: %d)\n", errno); + goto err; + } + /* + If log handler does not find the "last_logno" log it will return error, + which is good. + But if it finds a log and this log was crashed, it will create a new log, + which is useless. TODO: start log handler in read-only mode. + */ + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, maria_pagecache, + TRANSLOG_DEFAULT_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + goto err; + } + + /* install hooks for execution */ +#define install_exec_hook(R) \ + log_record_type_descriptor[LOGREC_ ## R].record_execute_in_redo_phase= \ + exec_LOGREC_ ## R; + install_exec_hook(LONG_TRANSACTION_ID); +#ifdef MARIA_CHECKPOINT + install_exec_hook(CHECKPOINT); +#endif + install_exec_hook(REDO_CREATE_TABLE); + install_exec_hook(FILE_ID); + install_exec_hook(REDO_INSERT_ROW_HEAD); + install_exec_hook(REDO_INSERT_ROW_TAIL); + install_exec_hook(REDO_PURGE_ROW_HEAD); + install_exec_hook(REDO_PURGE_ROW_TAIL); + install_exec_hook(UNDO_ROW_INSERT); + install_exec_hook(UNDO_ROW_DELETE); + install_exec_hook(COMMIT); + + if (opt_only_display) + printf("You are using --only-display, NOTHING will be written to disk\n"); + + lsn= first_lsn_in_log(); /*could also be last_checkpoint_lsn */ + + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + uint i= 1; + + translog_size_t len= translog_read_record_header(lsn, &rec); + + if (len == (TRANSLOG_RECORD_HEADER_MAX_SIZE + 1)) + { + printf("EOF on the log\n"); + goto end; + } + + if (translog_init_scanner(lsn, 1, &scanner)) + { + fprintf(stderr, "Scanner init failed\n"); + goto err; + } + for (;;i++) + { + uint16 sid= rec.short_trid; + const LOG_DESC *log_desc= &log_record_type_descriptor[rec.type]; + display_record_position(log_desc, &rec, i); + + /* + A complete group is a set of log records with an "end mark" record + (e.g. a set of REDOs for an operation, terminated by an UNDO for this + operation); if there is no "end mark" record the group is incomplete + and won't be executed. + There are pitfalls: if a table write failed, the transaction may have + put an incomplete group in the log and then a COMMIT record, that will + make a complete group which is wrong. We say that we should mark the + table corrupted if such error happens (what if it cannot be marked?). + */ + if (log_desc->record_ends_group) + { + if (all_active_trans[sid].group_start_lsn != LSN_IMPOSSIBLE) + { + /* + There is a complete group for this transaction, containing more than + this event. + */ + printf(" ends a group:\n"); + struct st_translog_scanner_data scanner2; + TRANSLOG_HEADER_BUFFER rec2; + len= + translog_read_record_header(all_active_trans[sid].group_start_lsn, &rec2); + if (len == (TRANSLOG_RECORD_HEADER_MAX_SIZE + 1)) + { + fprintf(stderr, "Cannot find record where it should be\n"); + goto err; + } + if (translog_init_scanner(rec2.lsn, 1, &scanner2)) + { + fprintf(stderr, "Scanner2 init failed\n"); + goto err; + } + current_group_end_lsn= rec.lsn; + do + { + if (rec2.short_trid == sid) /* it's in our group */ + { + const LOG_DESC *log_desc2= &log_record_type_descriptor[rec2.type]; + display_record_position(log_desc2, &rec2, 0); + if (display_and_apply_record(log_desc2, &rec2)) + goto err; + } + len= translog_read_next_record_header(&scanner2, &rec2); + if (len == (TRANSLOG_RECORD_HEADER_MAX_SIZE + 1)) + { + fprintf(stderr, "Cannot find record where it should be\n"); + goto err; + } + } + while (rec2.lsn < rec.lsn); + translog_free_record_header(&rec2); + /* group finished */ + all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; + current_group_end_lsn= LSN_IMPOSSIBLE; /* for debugging */ + } + if (display_and_apply_record(log_desc, &rec)) + goto err; + } + else /* record does not end group */ + { + /* just record the fact, can't know if can execute yet */ + if (all_active_trans[sid].group_start_lsn == LSN_IMPOSSIBLE) + { + /* group not yet started */ + all_active_trans[sid].group_start_lsn= rec.lsn; + } + } + len= translog_read_next_record_header(&scanner, &rec); + if (len == (TRANSLOG_RECORD_HEADER_MAX_SIZE + 1)) + { + printf("EOF on the log\n"); + goto end; + } + } + translog_free_record_header(&rec); + + /* + So we have applied all REDOs. + We may now have unfinished transactions. + I don't think it's this program's job to roll them back: + to roll back and at the same time stay idempotent, it needs to write log + records (without CLRs, 2nd rollback would hit the effects of first + rollback and fail). But this standalone tool is not allowed to write to + the server's transaction log. So we do not roll back anything. + In the real Recovery code, or the code to do "recover after online + backup", yes we will roll back. + */ + end_of_redo_phase(); + goto end; +err: + /* don't touch anything more, in case we hit a bug */ + exit(1); +end: + maria_panic(HA_PANIC_CLOSE); + free_defaults(default_argv); + my_end(0); + exit(0); + return 0; /* No compiler warning */ +} + + +static struct my_option my_long_options[] = +{ + {"only-display", 'o', "display brief info about records's header", + (gptr*) &opt_only_display, (gptr*) &opt_only_display, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, + {"display-and-apply", 'a', + "like --only-display but displays more info and modifies tables", + (gptr*) &opt_display_and_apply, (gptr*) &opt_display_and_apply, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +#include <help_start.h> + +static void print_version(void) +{ + VOID(printf("%s Ver 1.0 for %s on %s\n", + my_progname, SYSTEM_TYPE, MACHINE_TYPE)); + NETWARE_SET_SCREEN_MODE(1); +} + + +static void usage(void) +{ + print_version(); + puts("Copyright (C) 2007 MySQL AB"); + puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); + puts("and you are welcome to modify and redistribute it under the GPL license\n"); + + puts("Display and apply log records from a MARIA transaction log"); + puts("found in the current directory (for now)"); + VOID(printf("\nUsage: %s OPTIONS\n", my_progname)); + puts("You need to use one of -o or -a"); + my_print_help(my_long_options); + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + +#include <help_end.h> + +static my_bool +get_one_option(int optid __attribute__((unused)), + const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + switch (optid) { +#ifndef DBUG_OFF + case '#': + DBUG_SET_INITIAL(argument ? argument : default_dbug_option); + break; + } +#endif + return 0; +} + +static void get_options(int *argc,char ***argv) +{ + int ho_error; + + my_progname= argv[0][0]; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + if ((opt_only_display + opt_display_and_apply) != 1) + { + usage(); + exit(1); + } +} + + +/* very basic info about the record's header */ +static void display_record_position(const LOG_DESC *log_desc, + const TRANSLOG_HEADER_BUFFER *rec, + uint number) +{ + /* + if number==0, we're going over records which we had already seen and which + form a group, so we indent below the group's end record + */ + printf("%sRec#%u LSN (%lu,0x%lx) short_trid %u %s(num_type:%u) len %lu\n", + number ? "" : " ", number, + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn), + rec->short_trid, log_desc->name, rec->type, + (ulong)rec->record_length); +} + + +static int display_and_apply_record(const LOG_DESC *log_desc, + const TRANSLOG_HEADER_BUFFER *rec) +{ + int error; + if (opt_only_display) + return 0; + if (log_desc->record_execute_in_redo_phase == NULL) + { + /* die on all not-yet-handled records :) */ + DBUG_ASSERT("one more hook" == "to write"); + } + if ((error= (*log_desc->record_execute_in_redo_phase)(rec))) + fprintf(stderr, "Got error when executing record\n"); + return error; +} + + +prototype_exec_hook(LONG_TRANSACTION_ID) +{ + uint16 sid= rec->short_trid; + TrID long_trid= all_active_trans[sid].long_trid; + /* abort group of this trn (must be of before a crash) */ + LSN gslsn= all_active_trans[sid].group_start_lsn; + char llbuf[22]; + if (gslsn != LSN_IMPOSSIBLE) + { + printf("Group at LSN (%lu,0x%lx) short_trid %u aborted\n", + (ulong) LSN_FILE_NO(gslsn), (ulong) LSN_OFFSET(gslsn), sid); + all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; + } + if (long_trid != 0) + { + LSN ulsn= all_active_trans[sid].undo_lsn; + if (ulsn != LSN_IMPOSSIBLE) + { + llstr(long_trid, llbuf); + fprintf(stderr, "Found an old transaction long_trid %s short_trid %u" + " with same short id as this new transaction, and has neither" + " committed nor rollback (undo_lsn: (%lu,0x%lx))\n", llbuf, + sid, (ulong) LSN_FILE_NO(ulsn), (ulong) LSN_OFFSET(ulsn)); + goto err; + } + } + long_trid= uint6korr(rec->header); + all_active_trans[sid].long_trid= long_trid; + llstr(long_trid, llbuf); + printf("Transaction long_trid %s short_trid %u starts\n", llbuf, sid); + goto end; +err: + DBUG_ASSERT(0); + return 1; +end: + return 0; +} + + +#ifdef MARIA_CHECKPOINT +prototype_exec_hook(CHECKPOINT) +{ + /* the only checkpoint we care about was found via control file, ignore */ + return 0; +} +#endif + + +prototype_exec_hook(REDO_CREATE_TABLE) +{ + File dfile= -1, kfile= -1; + char *linkname_ptr, filename[FN_REFLEN]; + char *name, *ptr; + myf create_flag; + uint flags; + int error, create_mode= O_RDWR | O_TRUNC; + MARIA_HA *info= NULL; + if (((name= my_malloc(rec->record_length, MYF(MY_WME))) == NULL) || + (translog_read_record(rec->lsn, 0, rec->record_length, name, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto err; + } + printf("Table '%s'", name); + /* we try hard to get create_rename_lsn, to avoid mistakes if possible */ + info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR); + if (info) + { + MARIA_SHARE *share= info->s; + /* check that we're not already using it */ + DBUG_ASSERT(share->reopen == 1); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + if (!share->base.born_transactional) + { + /* + could be that transactional table was later dropped, and a non-trans + one was renamed to its name, thus create_rename_lsn is 0 and should + not be trusted. + */ + printf(", is not transactional\n"); + DBUG_ASSERT(0); /* I want to know this */ + goto end; + } + if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than record", + (ulong) LSN_FILE_NO(rec->lsn), + (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + if (maria_is_crashed(info)) + { + printf(", is crashed, overwriting it"); + DBUG_ASSERT(0); /* I want to know this */ + } + maria_close(info); + info= NULL; + } + /* if does not exist, is older, or its header is corrupted, overwrite it */ + // TODO symlinks + ptr= name + strlen(name) + 1; + if ((flags= ptr[0] ? HA_DONT_TOUCH_DATA : 0)) + printf(", we will only touch index file"); + fn_format(filename, name, "", MARIA_NAME_IEXT, + (MY_UNPACK_FILENAME | + (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | + MY_APPEND_EXT); + linkname_ptr= NULL; + create_flag= MY_DELETE_OLD; + printf(", creating as '%s'", filename); + if ((kfile= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME|create_flag))) < 0) + { + fprintf(stderr, "Failed to create index file\n"); + goto err; + } + ptr++; + uint kfile_size_before_extension= uint2korr(ptr); + ptr+= 2; + uint keystart= uint2korr(ptr); + ptr+= 2; + /* set create_rename_lsn (for maria_read_log to be idempotent) */ + lsn_store(ptr + sizeof(info->s->state.header) + 2, rec->lsn); + if (my_pwrite(kfile, ptr, + kfile_size_before_extension, 0, MYF(MY_NABP|MY_WME)) || + my_chsize(kfile, keystart, 0, MYF(MY_WME))) + { + fprintf(stderr, "Failed to write to index file\n"); + goto err; + } + if (!(flags & HA_DONT_TOUCH_DATA)) + { + fn_format(filename,name,"", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr= NULL; + create_flag=MY_DELETE_OLD; + if ((dfile= + my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag))) < 0) + { + fprintf(stderr, "Failed to create data file\n"); + goto err; + } + /* + we now have an empty data file. To be able to + _ma_initialize_data_file() we need some pieces of the share to be + correctly filled. So we just open the table (fortunately, an empty + data file does not preclude this). + */ + if (((info= maria_open(name, O_RDONLY, 0)) == NULL) || + _ma_initialize_data_file(info->s, dfile)) + { + fprintf(stderr, "Failed to open new table or write to data file\n"); + goto err; + } + } + error= 0; + goto end; +err: + DBUG_ASSERT(0); + error= 1; +end: + printf("\n"); + if (kfile >= 0) + error|= my_close(kfile, MYF(MY_WME)); + if (dfile >= 0) + error|= my_close(dfile, MYF(MY_WME)); + if (info != NULL) + error|= maria_close(info); + my_free(name, MYF(MY_ALLOW_ZERO_PTR)); + return 0; +} + + +prototype_exec_hook(FILE_ID) +{ + uint16 sid; + int error; + char *name, *buff; + MARIA_HA *info= NULL; + MARIA_SHARE *share; + if (((buff= my_malloc(rec->record_length, MYF(MY_WME))) == NULL) || + (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto err; + } + sid= fileid_korr(buff); + name= buff + FILEID_STORE_SIZE; + printf("Table '%s', id %u", name, sid); + info= all_tables[sid]; + if (info != NULL) + { + printf(", closing table '%s'", info->s->open_file_name); + all_tables[sid]= NULL; + _ma_reenable_logging_for_table(info->s); /* put back the truth */ + if (maria_close(info)) + { + fprintf(stderr, "Failed to close table\n"); + goto err; + } + } + info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR); + if (info == NULL) + { + printf(", is absent (must have been dropped later?)" + " or its header is so corrupted that we cannot open it;" + " we skip it\n"); + goto end; + } + if (maria_is_crashed(info)) + { + fprintf(stderr, "Table is crashed, can't apply log records to it\n"); + goto err; + } + share= info->s; + /* check that we're not already using it */ + DBUG_ASSERT(share->reopen == 1); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + if (!share->base.born_transactional) + { + printf(", is not transactional\n"); + DBUG_ASSERT(0); /* I want to know this */ + goto end; + } + all_tables[sid]= info; + /* don't log any records for this work */ + _ma_tmp_disable_logging_for_table(share); + printf(", opened\n"); + error= 0; + goto end; +err: + DBUG_ASSERT(0); + error= 1; + if (info != NULL) + error|= maria_close(info); +end: + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + return 0; +} + + +prototype_exec_hook(REDO_INSERT_ROW_HEAD) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + byte *buff= 0; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if ((!(buff= (byte*) my_malloc(rec->record_length, MYF(MY_WME)))) || + (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto end; + } + if (_ma_apply_redo_insert_row_head_or_tail(info, rec->lsn, HEAD_PAGE, + rec->header + FILEID_STORE_SIZE, + buff + (rec->record_length - + rec->non_header_data_len), + rec->non_header_data_len)) + goto end; + my_free(buff, MYF(0)); + return 0; + +end: + /* as we don't have apply working: */ + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + return 1; +} + + +prototype_exec_hook(REDO_INSERT_ROW_TAIL) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + byte *buff= 0; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if ((!(buff= (byte*) my_malloc(rec->record_length, MYF(MY_WME)))) || + (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto end; + } + if (_ma_apply_redo_insert_row_head_or_tail(info, rec->lsn, TAIL_PAGE, + rec->header + FILEID_STORE_SIZE, + buff + (rec->record_length - + rec->non_header_data_len), + rec->non_header_data_len)) + goto end; + + my_free(buff, MYF(0)); + return 0; + +end: + /* as we don't have apply working: */ + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + return 1; +} + + +prototype_exec_hook(REDO_PURGE_ROW_HEAD) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if (_ma_apply_redo_purge_row_head_or_tail(info, rec->lsn, HEAD_PAGE, + rec->header + FILEID_STORE_SIZE)) + goto end; + + return 0; + +end: + /* as we don't have apply working: */ + return 1; +} + + +prototype_exec_hook(REDO_PURGE_ROW_TAIL) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if (_ma_apply_redo_purge_row_head_or_tail(info, rec->lsn, TAIL_PAGE, + rec->header + FILEID_STORE_SIZE)) + goto end; + + return 0; + +end: + /* as we don't have apply working: */ + return 1; +} + + +static int exec_LOGREC_UNDO_ROW_INSERT(const TRANSLOG_HEADER_BUFFER *rec + __attribute__((unused))) +{ + /* Ignore this during the redo phase */ + return 0; +} + +static int exec_LOGREC_UNDO_ROW_DELETE(const TRANSLOG_HEADER_BUFFER *rec + __attribute__((unused))) +{ + /* Ignore this during the redo phase */ + return 0; +} + + + +prototype_exec_hook(COMMIT) +{ + uint16 sid= rec->short_trid; + TrID long_trid= all_active_trans[sid].long_trid; + LSN gslsn= all_active_trans[sid].group_start_lsn; + char llbuf[22]; + if (long_trid == 0) + { + printf("We don't know about transaction short_trid %u;" + "it probably committed long ago, forget it\n", sid); + return 0; + } + llstr(long_trid, llbuf); + printf("Transaction long_trid %s short_trid %u committed", llbuf, sid); + if (gslsn != LSN_IMPOSSIBLE) + { + /* + It's not an error, it may be that trn got a disk error when writing to a + table, so an unfinished group staid in the log. + */ + printf(", with group at LSN (%lu,0x%lx) short_trid %u aborted\n", + (ulong) LSN_FILE_NO(gslsn), (ulong) LSN_OFFSET(gslsn), sid); + all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; + } + else + printf("\n"); + all_active_trans[sid].long_trid= 0; +#ifdef MARIA_VERSIONING + /* + if real recovery: + transaction was committed, move it to some separate list for later + purging (but don't purge now! purging may have been started before, we + may find REDO_PURGE records soon). + */ +#endif + return 0; +} + + +/* Just to inform about any aborted groups or unfinished transactions */ +static void end_of_redo_phase() +{ + uint sid, unfinished= 0; + for (sid= 0; sid <= SHORT_TRID_MAX; sid++) + { + TrID long_trid= all_active_trans[sid].long_trid; + LSN gslsn= all_active_trans[sid].group_start_lsn; + if (long_trid == 0) + continue; + if (all_active_trans[sid].undo_lsn != LSN_IMPOSSIBLE) + { + char llbuf[22]; + llstr(long_trid, llbuf); + printf("Transaction long_trid %s short_trid %u unfinished\n", + llbuf, sid); + } + if (gslsn != LSN_IMPOSSIBLE) + { + printf("Group at LSN (%lu,0x%lx) short_trid %u aborted\n", + (ulong) LSN_FILE_NO(gslsn), (ulong) LSN_OFFSET(gslsn), sid); + } + /* If real recovery: roll back unfinished transaction */ +#ifdef MARIA_VERSIONING + /* + If real recovery: transaction was committed, move it to some separate + list for soon purging. + */ +#endif + } + /* + We don't close tables if there are some unfinished transactions, because + closing tables normally requires that all unfinished transactions on them + be rolled back. + For example, closing will soon write the state to disk and when doing that + it will think this is a committed state, but it may not be. + */ + if (unfinished == 0) + { + for (sid= 0; sid <= SHORT_TRID_MAX; sid++) + { + MARIA_HA *info= all_tables[sid]; + if (info != NULL) + { + _ma_reenable_logging_for_table(info->s); /* put back the truth */ + maria_close(info); + } + } + } +} diff --git a/storage/maria/trnman_public.h b/storage/maria/trnman_public.h index 3e0a21c26a6..e1891466c4d 100644 --- a/storage/maria/trnman_public.h +++ b/storage/maria/trnman_public.h @@ -20,6 +20,9 @@ to include my_atomic.h in C++ code. */ +#ifndef _trnman_public_h +#define _trnman_public_h + #include "ma_loghandler_lsn.h" C_MODE_START @@ -52,3 +55,4 @@ my_bool trnman_has_locked_tables(TRN *trn); void trnman_reset_locked_tables(TRN *trn); C_MODE_END +#endif diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index 71a1157f1ba..a7472361dad 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -121,8 +121,8 @@ static int delete_file(myf my_flags) The error will however be printed on stderr. */ my_delete(file_name, my_flags); - expect_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; - expect_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + expect_checkpoint_lsn= LSN_IMPOSSIBLE; + expect_logno= FILENO_IMPOSSIBLE; return 0; } @@ -146,9 +146,9 @@ static int verify_module_values_match_expected() */ static int verify_module_values_are_impossible() { - RET_ERR_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + RET_ERR_UNLESS(last_logno == FILENO_IMPOSSIBLE); RET_ERR_UNLESS(last_checkpoint_lsn == - CONTROL_FILE_IMPOSSIBLE_LSN); + LSN_IMPOSSIBLE); return 0; } @@ -164,7 +164,7 @@ static int close_file() static int create_or_open_file() { - RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_OK); + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_OK); /* Check that the module reports expected information */ RET_ERR_UNLESS(verify_module_values_match_expected() == 0); return 0; @@ -188,7 +188,7 @@ static int test_one_log() RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; expect_logno= 123; - RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, + RET_ERR_UNLESS(write_file(LSN_IMPOSSIBLE, expect_logno, objs_to_write) == 0); RET_ERR_UNLESS(close_file() == 0); @@ -206,7 +206,7 @@ static int test_five_logs() for (i= 0; i<5; i++) { expect_logno*= 3; - RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, expect_logno, + RET_ERR_UNLESS(write_file(LSN_IMPOSSIBLE, expect_logno, objs_to_write) == 0); } RET_ERR_UNLESS(close_file() == 0); @@ -320,7 +320,7 @@ static int test_bad_magic_string() RET_ERR_UNLESS(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ - RET_ERR_UNLESS(ma_control_file_create_or_open() == + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_BAD_MAGIC_STRING); /* Restore magic string */ RET_ERR_UNLESS(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0); @@ -346,7 +346,7 @@ static int test_bad_checksum() buffer[0]+= 3; /* mangle checksum */ RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ - RET_ERR_UNLESS(ma_control_file_create_or_open() == + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_BAD_CHECKSUM); /* Restore checksum */ buffer[0]-= 3; @@ -369,10 +369,11 @@ static int test_bad_size() MYF(MY_WME))) >= 0); RET_ERR_UNLESS(my_write(fd, buffer, 10, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ - RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_SMALL); + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == + CONTROL_FILE_TOO_SMALL); RET_ERR_UNLESS(my_write(fd, buffer, 30, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ - RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_BIG); + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_TOO_BIG); RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); /* Leave a correct control file */ diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c index 40f9e72c3b2..a6bd53e949d 100644 --- a/storage/maria/unittest/ma_test_loghandler-t.c +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -164,7 +164,7 @@ int main(int argc __attribute__((unused)), char *argv[]) } #endif - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "Can't init control file (%d)\n", errno); exit(1); @@ -336,7 +336,7 @@ int main(int argc __attribute__((unused)), char *argv[]) ma_control_file_end(); - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); exit(1); @@ -398,7 +398,7 @@ int main(int argc __attribute__((unused)), char *argv[]) i, errno); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { if (i != ITERATIONS) { @@ -477,7 +477,7 @@ int main(int argc __attribute__((unused)), char *argv[]) "failed (%d)\n", i, errno); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { fprintf(stderr, "EOL met at the middle of iteration (first var) %u " "instead of beginning of %u\n", i, ITERATIONS); @@ -572,7 +572,7 @@ int main(int argc __attribute__((unused)), char *argv[]) i, errno); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { fprintf(stderr, "EOL met at the middle of iteration %u " "instead of beginning of %u\n", i, ITERATIONS); diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index 6a27321ec98..4c534ad4e05 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -161,7 +161,7 @@ int main(int argc __attribute__((unused)), char *argv[]) } #endif - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "Can't init control file (%d)\n", errno); exit(1); @@ -325,7 +325,7 @@ int main(int argc __attribute__((unused)), char *argv[]) end_pagecache(&pagecache, 1); ma_control_file_end(); - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); exit(1); @@ -390,7 +390,7 @@ int main(int argc __attribute__((unused)), char *argv[]) translog_free_record_header(&rec); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { if (i != ITERATIONS) { @@ -470,7 +470,7 @@ int main(int argc __attribute__((unused)), char *argv[]) "failed (%d)\n", i, errno); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { fprintf(stderr, "EOL met at the middle of iteration (first var) %u " "instead of beginning of %u\n", i, ITERATIONS); @@ -568,7 +568,7 @@ int main(int argc __attribute__((unused)), char *argv[]) translog_free_record_header(&rec); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { fprintf(stderr, "EOL met at the middle of iteration %u " "instead of beginning of %u\n", i, ITERATIONS); diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index bf3ede113c0..7bb4a5aba77 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -270,7 +270,7 @@ int main(int argc __attribute__((unused)), my_thread_global_init(); - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "Can't init control file (%d)\n", errno); exit(1); @@ -384,7 +384,7 @@ int main(int argc __attribute__((unused)), translog_free_record_header(&rec); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { if (i != WRITERS * ITERATIONS * 2) { diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index 327b8300fbb..804dd961fbc 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -56,7 +56,7 @@ int main(int argc __attribute__((unused)), char *argv[]) } #endif - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "Can't init control file (%d)\n", errno); exit(1); diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c index b0f1ef8ff8b..b5cec80ef99 100644 --- a/storage/myisam/ft_stopwords.c +++ b/storage/myisam/ft_stopwords.c @@ -51,10 +51,11 @@ static int ft_add_stopword(const char *w) int ft_init_stopwords() { + DBUG_ENTER("ft_init_stopwords"); if (!stopwords3) { if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) - return -1; + DBUG_RETURN(-1); init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp, 0, (ft_stopword_file ? (tree_element_free)&FT_STOPWORD_free : 0), @@ -70,10 +71,10 @@ int ft_init_stopwords() int error=-1; if (!*ft_stopword_file) - return 0; + DBUG_RETURN(0); if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1) - return -1; + DBUG_RETURN(-1); len=(uint)my_seek(fd, 0L, MY_SEEK_END, MYF(0)); my_seek(fd, 0L, MY_SEEK_SET, MYF(0)); if (!(start=buffer=my_malloc(len+1, MYF(MY_WME)))) @@ -90,7 +91,7 @@ err1: my_free(buffer, MYF(0)); err0: my_close(fd, MYF(MY_WME)); - return error; + DBUG_RETURN(error); } else { @@ -100,13 +101,14 @@ err0: for (;*sws;sws++) { if (ft_add_stopword(*sws)) - return -1; + DBUG_RETURN(-1); } ft_stopword_file="(built-in)"; /* for SHOW VARIABLES */ } - return 0; + DBUG_RETURN(0); } + int is_stopword(char *word, uint len) { FT_STOPWORD sw; @@ -118,6 +120,8 @@ int is_stopword(char *word, uint len) void ft_free_stopwords() { + DBUG_ENTER("ft_free_stopwords"); + if (stopwords3) { delete_tree(stopwords3); /* purecov: inspected */ @@ -125,4 +129,5 @@ void ft_free_stopwords() stopwords3=0; } ft_stopword_file= 0; + DBUG_VOID_RETURN; } diff --git a/storage/myisam/mi_close.c b/storage/myisam/mi_close.c index 07105aea88d..747555dbdfb 100644 --- a/storage/myisam/mi_close.c +++ b/storage/myisam/mi_close.c @@ -75,6 +75,7 @@ int mi_close(register MI_INFO *info) not change the crashed state. We can NOT write the state in other cases as other threads may be using the file at this point + IF using --external-locking. */ if (share->mode != O_RDONLY && mi_is_crashed(info)) mi_state_info_write(share->kfile, &share->state, 1); diff --git a/support-files/compiler_warnings.supp b/support-files/compiler_warnings.supp index 0a2c720b81b..a4048c2fbc0 100644 --- a/support-files/compiler_warnings.supp +++ b/support-files/compiler_warnings.supp @@ -65,6 +65,11 @@ db_vrfy.c : .*comparison is always false due to limited range of data type.* .*/ndb/.* : .*defined but not used.* # +# Maria warning that is ok in debug builds +# +storage/maria/ma_pagecache.c: .*'info_check_pin' defined but not used + +# # Unexplanable (?) stuff # listener.cc : .*conversion from 'SOCKET' to 'int'.* |