diff options
57 files changed, 4633 insertions, 476 deletions
diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result index 537b30520bd..608ada570fb 100644 --- a/mysql-test/r/information_schema.result +++ b/mysql-test/r/information_schema.result @@ -54,9 +54,13 @@ EVENTS FILES GLOBAL_STATUS GLOBAL_VARIABLES +INNODB_BUFFER_PAGE +INNODB_BUFFER_PAGE_LRU INNODB_BUFFER_POOL_PAGES INNODB_BUFFER_POOL_PAGES_BLOB INNODB_BUFFER_POOL_PAGES_INDEX +INNODB_BUFFER_POOL_STATS +INNODB_CHANGED_PAGES INNODB_CMP INNODB_CMPMEM INNODB_CMPMEM_RESET @@ -89,7 +93,6 @@ TRIGGERS USER_PRIVILEGES VIEWS XTRADB_ADMIN_COMMAND -XTRADB_ENHANCEMENTS columns_priv db event @@ -860,6 +863,8 @@ TABLE_NAME COLUMN_NAME PRIVILEGES COLUMNS TABLE_NAME select COLUMN_PRIVILEGES TABLE_NAME select FILES TABLE_NAME select +INNODB_BUFFER_PAGE TABLE_NAME select +INNODB_BUFFER_PAGE_LRU TABLE_NAME select INNODB_INDEX_STATS table_name select INNODB_TABLE_STATS table_name select KEY_COLUMN_USAGE TABLE_NAME select @@ -1245,12 +1250,12 @@ DROP PROCEDURE p1; DROP USER mysql_bug20230@localhost; SELECT MAX(table_name) FROM information_schema.tables WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test'); MAX(table_name) -XTRADB_ENHANCEMENTS +XTRADB_ADMIN_COMMAND SELECT table_name from information_schema.tables WHERE table_name=(SELECT MAX(table_name) FROM information_schema.tables WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test')); table_name -XTRADB_ENHANCEMENTS +XTRADB_ADMIN_COMMAND DROP TABLE IF EXISTS bug23037; DROP FUNCTION IF EXISTS get_value; SELECT COLUMN_NAME, MD5(COLUMN_DEFAULT), LENGTH(COLUMN_DEFAULT) FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME='bug23037'; diff --git a/mysql-test/r/information_schema_all_engines.result b/mysql-test/r/information_schema_all_engines.result index a2209f84a45..941aa7e1380 100644 --- a/mysql-test/r/information_schema_all_engines.result +++ b/mysql-test/r/information_schema_all_engines.result @@ -33,13 +33,13 @@ INNODB_BUFFER_POOL_PAGES PBXT_STATISTICS INNODB_CMP INNODB_RSEG -XTRADB_ENHANCEMENTS +INNODB_INDEX_STATS INNODB_BUFFER_POOL_PAGES_INDEX XTRADB_ADMIN_COMMAND INNODB_TRX INNODB_SYS_TABLES INNODB_LOCK_WAITS -INNODB_SYS_STATS +INNODB_BUFFER_POOL_STATS INNODB_LOCKS INNODB_CMPMEM INNODB_TABLE_STATS @@ -47,7 +47,10 @@ INNODB_SYS_INDEXES INNODB_CMP_RESET INNODB_BUFFER_POOL_PAGES_BLOB INNODB_CMPMEM_RESET -INNODB_INDEX_STATS +INNODB_BUFFER_PAGE +INNODB_CHANGED_PAGES +INNODB_SYS_STATS +INNODB_BUFFER_PAGE_LRU SELECT t.table_name, c1.column_name FROM information_schema.tables t INNER JOIN @@ -95,13 +98,13 @@ INNODB_BUFFER_POOL_PAGES page_type PBXT_STATISTICS ID INNODB_CMP page_size INNODB_RSEG rseg_id -XTRADB_ENHANCEMENTS name +INNODB_INDEX_STATS table_schema INNODB_BUFFER_POOL_PAGES_INDEX index_id XTRADB_ADMIN_COMMAND result_message INNODB_TRX trx_id INNODB_SYS_TABLES SCHEMA INNODB_LOCK_WAITS requesting_trx_id -INNODB_SYS_STATS INDEX_ID +INNODB_BUFFER_POOL_STATS POOL_SIZE INNODB_LOCKS lock_id INNODB_CMPMEM page_size INNODB_TABLE_STATS table_schema @@ -109,7 +112,10 @@ INNODB_SYS_INDEXES TABLE_ID INNODB_CMP_RESET page_size INNODB_BUFFER_POOL_PAGES_BLOB space_id INNODB_CMPMEM_RESET page_size -INNODB_INDEX_STATS table_schema +INNODB_BUFFER_PAGE BLOCK_ID +INNODB_CHANGED_PAGES space_id +INNODB_SYS_STATS INDEX_ID +INNODB_BUFFER_PAGE_LRU LRU_POSITION SELECT t.table_name, c1.column_name FROM information_schema.tables t INNER JOIN @@ -157,13 +163,13 @@ INNODB_BUFFER_POOL_PAGES page_type PBXT_STATISTICS ID INNODB_CMP page_size INNODB_RSEG rseg_id -XTRADB_ENHANCEMENTS name +INNODB_INDEX_STATS table_schema INNODB_BUFFER_POOL_PAGES_INDEX index_id XTRADB_ADMIN_COMMAND result_message INNODB_TRX trx_id INNODB_SYS_TABLES SCHEMA INNODB_LOCK_WAITS requesting_trx_id -INNODB_SYS_STATS INDEX_ID +INNODB_BUFFER_POOL_STATS POOL_SIZE INNODB_LOCKS lock_id INNODB_CMPMEM page_size INNODB_TABLE_STATS table_schema @@ -171,7 +177,10 @@ INNODB_SYS_INDEXES TABLE_ID INNODB_CMP_RESET page_size INNODB_BUFFER_POOL_PAGES_BLOB space_id INNODB_CMPMEM_RESET page_size -INNODB_INDEX_STATS table_schema +INNODB_BUFFER_PAGE BLOCK_ID +INNODB_CHANGED_PAGES space_id +INNODB_SYS_STATS INDEX_ID +INNODB_BUFFER_PAGE_LRU LRU_POSITION select 1 as f1 from information_schema.tables where "CHARACTER_SETS"= (select cast(table_name as char) from information_schema.tables order by table_name limit 1) limit 1; @@ -203,9 +212,13 @@ EVENTS information_schema.EVENTS 1 FILES information_schema.FILES 1 GLOBAL_STATUS information_schema.GLOBAL_STATUS 1 GLOBAL_VARIABLES information_schema.GLOBAL_VARIABLES 1 +INNODB_BUFFER_PAGE information_schema.INNODB_BUFFER_PAGE 1 +INNODB_BUFFER_PAGE_LRU information_schema.INNODB_BUFFER_PAGE_LRU 1 INNODB_BUFFER_POOL_PAGES information_schema.INNODB_BUFFER_POOL_PAGES 1 INNODB_BUFFER_POOL_PAGES_BLOB information_schema.INNODB_BUFFER_POOL_PAGES_BLOB 1 INNODB_BUFFER_POOL_PAGES_INDEX information_schema.INNODB_BUFFER_POOL_PAGES_INDEX 1 +INNODB_BUFFER_POOL_STATS information_schema.INNODB_BUFFER_POOL_STATS 1 +INNODB_CHANGED_PAGES information_schema.INNODB_CHANGED_PAGES 1 INNODB_CMP information_schema.INNODB_CMP 1 INNODB_CMPMEM information_schema.INNODB_CMPMEM 1 INNODB_CMPMEM_RESET information_schema.INNODB_CMPMEM_RESET 1 @@ -238,7 +251,6 @@ TABLE_PRIVILEGES information_schema.TABLE_PRIVILEGES 1 TRIGGERS information_schema.TRIGGERS 1 USER_PRIVILEGES information_schema.USER_PRIVILEGES 1 VIEWS information_schema.VIEWS 1 -XTRADB_ENHANCEMENTS information_schema.XTRADB_ENHANCEMENTS 1 Database: information_schema +---------------------------------------+ | Tables | @@ -275,13 +287,13 @@ Database: information_schema | PBXT_STATISTICS | | INNODB_CMP | | INNODB_RSEG | -| XTRADB_ENHANCEMENTS | +| INNODB_INDEX_STATS | | INNODB_BUFFER_POOL_PAGES_INDEX | | XTRADB_ADMIN_COMMAND | | INNODB_TRX | | INNODB_SYS_TABLES | | INNODB_LOCK_WAITS | -| INNODB_SYS_STATS | +| INNODB_BUFFER_POOL_STATS | | INNODB_LOCKS | | INNODB_CMPMEM | | INNODB_TABLE_STATS | @@ -289,7 +301,10 @@ Database: information_schema | INNODB_CMP_RESET | | INNODB_BUFFER_POOL_PAGES_BLOB | | INNODB_CMPMEM_RESET | -| INNODB_INDEX_STATS | +| INNODB_BUFFER_PAGE | +| INNODB_CHANGED_PAGES | +| INNODB_SYS_STATS | +| INNODB_BUFFER_PAGE_LRU | +---------------------------------------+ Database: INFORMATION_SCHEMA +---------------------------------------+ @@ -327,13 +342,13 @@ Database: INFORMATION_SCHEMA | PBXT_STATISTICS | | INNODB_CMP | | INNODB_RSEG | -| XTRADB_ENHANCEMENTS | +| INNODB_INDEX_STATS | | INNODB_BUFFER_POOL_PAGES_INDEX | | XTRADB_ADMIN_COMMAND | | INNODB_TRX | | INNODB_SYS_TABLES | | INNODB_LOCK_WAITS | -| INNODB_SYS_STATS | +| INNODB_BUFFER_POOL_STATS | | INNODB_LOCKS | | INNODB_CMPMEM | | INNODB_TABLE_STATS | @@ -341,7 +356,10 @@ Database: INFORMATION_SCHEMA | INNODB_CMP_RESET | | INNODB_BUFFER_POOL_PAGES_BLOB | | INNODB_CMPMEM_RESET | -| INNODB_INDEX_STATS | +| INNODB_BUFFER_PAGE | +| INNODB_CHANGED_PAGES | +| INNODB_SYS_STATS | +| INNODB_BUFFER_PAGE_LRU | +---------------------------------------+ Wildcard: inf_rmation_schema +--------------------+ @@ -351,5 +369,5 @@ Wildcard: inf_rmation_schema +--------------------+ SELECT table_schema, count(*) FROM information_schema.TABLES WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test', 'mysqltest') AND table_name<>'ndb_binlog_index' AND table_name<>'ndb_apply_status' GROUP BY TABLE_SCHEMA; table_schema count(*) -information_schema 47 +information_schema 50 mysql 22 diff --git a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result index 718e363adb7..a34fc94fa99 100644 --- a/mysql-test/r/innodb.result +++ b/mysql-test/r/innodb.result @@ -3198,7 +3198,7 @@ c21 CHAR(255), c22 CHAR(255), c23 CHAR(255), c24 CHAR(255), c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255), c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255) ) ENGINE = InnoDB; -ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs +ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. DROP TABLE IF EXISTS t1; Warnings: Note 1051 Unknown table 't1' diff --git a/mysql-test/suite/innodb_plugin/r/innodb-index.result b/mysql-test/suite/innodb_plugin/r/innodb-index.result index 37bd81e5ec6..bf7c382327b 100644 --- a/mysql-test/suite/innodb_plugin/r/innodb-index.result +++ b/mysql-test/suite/innodb_plugin/r/innodb-index.result @@ -1096,7 +1096,7 @@ PRIMARY KEY (b(10), a), INDEX (c(10)) INSERT INTO bug12547647 VALUES (5,repeat('khdfo5AlOq',1900),repeat('g',7731)); COMMIT; UPDATE bug12547647 SET c = REPEAT('b',16928); -ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs +ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. DROP TABLE bug12547647; SET @r=REPEAT('a',500); CREATE TABLE t1(a INT, diff --git a/mysql-test/suite/innodb_plugin/r/innodb-zip.result b/mysql-test/suite/innodb_plugin/r/innodb-zip.result index 16947bf16dc..5ee0854367a 100644 --- a/mysql-test/suite/innodb_plugin/r/innodb-zip.result +++ b/mysql-test/suite/innodb_plugin/r/innodb-zip.result @@ -125,12 +125,12 @@ CREATE TABLE t1( c TEXT NOT NULL, d TEXT NOT NULL, PRIMARY KEY (c(767),d(767))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs +ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. CREATE TABLE t1( c TEXT NOT NULL, d TEXT NOT NULL, PRIMARY KEY (c(767),d(767))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII; -ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs +ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. CREATE TABLE t1( c TEXT NOT NULL, d TEXT NOT NULL, PRIMARY KEY (c(767),d(767))) @@ -138,7 +138,7 @@ ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; drop table t1; CREATE TABLE t1(c TEXT, PRIMARY KEY (c(440))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs +ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. CREATE TABLE t1(c TEXT, PRIMARY KEY (c(438))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512)); diff --git a/mysql-test/suite/innodb_plugin/r/innodb.result b/mysql-test/suite/innodb_plugin/r/innodb.result index f346e2d5917..95db9b5e983 100644 --- a/mysql-test/suite/innodb_plugin/r/innodb.result +++ b/mysql-test/suite/innodb_plugin/r/innodb.result @@ -3151,7 +3151,7 @@ c21 CHAR(255), c22 CHAR(255), c23 CHAR(255), c24 CHAR(255), c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255), c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255) ) ENGINE = InnoDB; -ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs +ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. DROP TABLE IF EXISTS t1; Warnings: Note 1051 Unknown table 't1' diff --git a/mysql-test/suite/innodb_plugin/r/innodb_bug53591.result b/mysql-test/suite/innodb_plugin/r/innodb_bug53591.result index cf226464a4b..b0196318801 100644 --- a/mysql-test/suite/innodb_plugin/r/innodb_bug53591.result +++ b/mysql-test/suite/innodb_plugin/r/innodb_bug53591.result @@ -8,7 +8,7 @@ ERROR HY000: Too big row SHOW WARNINGS; Level Code Message Error 139 Too big row -Error 1118 Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs +Error 1118 Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. DROP TABLE bug53591; SET GLOBAL innodb_file_format=Antelope; SET GLOBAL innodb_file_per_table=0; diff --git a/mysql-test/suite/innodb_plugin/r/innodb_misc1.result b/mysql-test/suite/innodb_plugin/r/innodb_misc1.result index 5b1774c6e99..81c65c34554 100644 --- a/mysql-test/suite/innodb_plugin/r/innodb_misc1.result +++ b/mysql-test/suite/innodb_plugin/r/innodb_misc1.result @@ -774,7 +774,7 @@ c21 CHAR(255), c22 CHAR(255), c23 CHAR(255), c24 CHAR(255), c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255), c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255) ) ENGINE = InnoDB; -ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs +ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. SET innodb_strict_mode=OFF; DROP TABLE IF EXISTS t1; Warnings: diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index 608d6865bf4..720ee6a6f75 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -62,7 +62,7 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c ibuf/ibuf0ibuf.c pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c lock/lock0lock.c lock/lock0iter.c - log/log0log.c log/log0recv.c + log/log0log.c log/log0recv.c log/log0online.c mach/mach0data.c mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c mtr/mtr0mtr.c diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog index 1588132fc8b..4ef88e3bca1 100644 --- a/storage/xtradb/ChangeLog +++ b/storage/xtradb/ChangeLog @@ -1,3 +1,32 @@ +2012-08-29 The InnoDB Team + + * btr/btr0btr.c, page/page0cur.c, page/page0page.c: + Fix Bug#14554000 CRASH IN PAGE_REC_GET_NTH_CONST(NTH=0) + DURING COMPRESSED PAGE SPLIT + +2012-08-16 The InnoDB Team + + * btr/btr0cur.c: + Fix Bug#12595091 POSSIBLY INVALID ASSERTION IN + BTR_CUR_PESSIMISTIC_UPDATE() + +2012-08-16 The InnoDB Team + + * btr/btr0btr.c, btr/btr0cur.c: + Fix Bug#12845774 OPTIMISTIC INSERT/UPDATE USES WRONG HEURISTICS FOR + COMPRESSED PAGE SIZE + +2012-08-16 The InnoDB Team + + * btr/btr0cur.c, page/page0page.c: + Fix Bug#13523839 ASSERTION FAILURES ON COMPRESSED INNODB TABLES + +2012-08-07 The InnoDB Team + + * btr/btr0pcur.c, row/row0merge.c: + Fix Bug#14399148 INNODB TABLES UNDER LOAD PRODUCE DUPLICATE COPIES + OF ROWS IN QUERIES + 2012-03-15 The InnoDB Team * fil/fil0fil.c, ibuf/ibuf0ibuf.c, include/fil0fil.h, diff --git a/storage/xtradb/Makefile.am b/storage/xtradb/Makefile.am index 4adcc5c45ff..b3450fe5031 100644 --- a/storage/xtradb/Makefile.am +++ b/storage/xtradb/Makefile.am @@ -101,6 +101,7 @@ noinst_HEADERS= \ include/lock0types.h \ include/log0log.h \ include/log0log.ic \ + include/log0online.h \ include/log0recv.h \ include/log0recv.ic \ include/mach0data.h \ @@ -226,7 +227,6 @@ noinst_HEADERS= \ include/ut0vec.h \ include/ut0vec.ic \ include/ut0wqueue.h \ - handler/innodb_patch_info.h \ mem/mem0dbg.c noinst_LTLIBRARIES= @plugin_xtradb_static_target@ @@ -265,6 +265,7 @@ libxtradb_la_SOURCES= \ lock/lock0iter.c \ lock/lock0lock.c \ log/log0log.c \ + log/log0online.c \ log/log0recv.c \ mach/mach0data.c \ mem/mem0mem.c \ diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c index 75cc80d0967..75be76b9dd1 100644 --- a/storage/xtradb/btr/btr0btr.c +++ b/storage/xtradb/btr/btr0btr.c @@ -664,6 +664,12 @@ btr_root_fseg_validate( { ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET); + if (UNIV_UNLIKELY(srv_pass_corrupt_table)) { + return (mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space) + && (offset >= FIL_PAGE_DATA) + && (offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END); + } + ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space); ut_a(offset >= FIL_PAGE_DATA); ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END); @@ -704,6 +710,17 @@ btr_root_block_get( if (!dict_index_is_ibuf(index)) { const page_t* root = buf_block_get_frame(block); + if (UNIV_UNLIKELY(srv_pass_corrupt_table)) { + if (!btr_root_fseg_validate(FIL_PAGE_DATA + + PAGE_BTR_SEG_LEAF + + root, space)) + return(NULL); + if (!btr_root_fseg_validate(FIL_PAGE_DATA + + PAGE_BTR_SEG_TOP + + root, space)) + return(NULL); + return(block); + } ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF + root, space)); ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP @@ -1852,6 +1869,7 @@ btr_root_raise_and_insert( root = btr_cur_get_page(cursor); root_block = btr_cur_get_block(cursor); root_page_zip = buf_block_get_page_zip(root_block); + ut_ad(page_get_n_recs(root) > 0); #ifdef UNIV_ZIP_DEBUG ut_a(!root_page_zip || page_zip_validate(root_page_zip, root)); #endif /* UNIV_ZIP_DEBUG */ @@ -2332,12 +2350,20 @@ btr_insert_on_non_leaf_level_func( BTR_CONT_MODIFY_TREE, &cursor, 0, file, line, mtr); - err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, - &cursor, tuple, &rec, - &dummy_big_rec, 0, NULL, mtr); - ut_a(err == DB_SUCCESS); + ut_ad(cursor.flag == BTR_CUR_BINARY); + + err = btr_cur_optimistic_insert( + BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG, &cursor, tuple, &rec, + &dummy_big_rec, 0, NULL, mtr); + + if (err == DB_FAIL) { + err = btr_cur_pessimistic_insert( + BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG, + &cursor, tuple, &rec, &dummy_big_rec, 0, NULL, mtr); + ut_a(err == DB_SUCCESS); + } } /**************************************************************//** @@ -3262,6 +3288,7 @@ btr_compress( if (adjust) { nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); + ut_ad(nth_rec > 0); } /* Decide the page to which we try to merge and which will inherit @@ -3497,6 +3524,7 @@ func_exit: mem_heap_free(heap); if (adjust) { + ut_ad(nth_rec > 0); btr_cur_position( index, page_rec_get_nth(merge_block->frame, nth_rec), @@ -4009,8 +4037,22 @@ btr_index_page_validate( { page_cur_t cur; ibool ret = TRUE; +#ifndef DBUG_OFF + ulint nth = 1; +#endif /* !DBUG_OFF */ page_cur_set_before_first(block, &cur); + + /* Directory slot 0 should only contain the infimum record. */ + DBUG_EXECUTE_IF("check_table_rec_next", + ut_a(page_rec_get_nth_const( + page_cur_get_page(&cur), 0) + == cur.rec); + ut_a(page_dir_slot_get_n_owned( + page_dir_get_nth_slot( + page_cur_get_page(&cur), 0)) + == 1);); + page_cur_move_to_next(&cur); for (;;) { @@ -4024,6 +4066,16 @@ btr_index_page_validate( return(FALSE); } + /* Verify that page_rec_get_nth_const() is correctly + retrieving each record. */ + DBUG_EXECUTE_IF("check_table_rec_next", + ut_a(cur.rec == page_rec_get_nth_const( + page_cur_get_page(&cur), + page_rec_get_n_recs_before( + cur.rec))); + ut_a(nth++ == page_rec_get_n_recs_before( + cur.rec));); + page_cur_move_to_next(&cur); } @@ -4435,6 +4487,12 @@ btr_validate_index( mtr_x_lock(dict_index_get_lock(index), &mtr); root = btr_root_get(index, &mtr); + + if (UNIV_UNLIKELY(srv_pass_corrupt_table && !root)) { + mtr_commit(&mtr); + return(FALSE); + } + n = btr_page_get_level(root, &mtr); for (i = 0; i <= n && !trx_is_interrupted(trx); i++) { diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index 4161be93a90..91f14beab96 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -1307,7 +1307,12 @@ fail_err: if (UNIV_UNLIKELY(reorg)) { ut_a(zip_size); - ut_a(*rec); + /* It's possible for rec to be NULL if the + page is compressed. This is because a + reorganized page may become incompressible. */ + if (!*rec) { + goto fail; + } } } @@ -1443,20 +1448,9 @@ btr_cur_pessimistic_insert( ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, btr_cur_get_block(cursor), MTR_MEMO_PAGE_X_FIX)); - /* Try first an optimistic insert; reset the cursor flag: we do not - assume anything of how it was positioned */ - cursor->flag = BTR_CUR_BINARY; - err = btr_cur_optimistic_insert(flags, cursor, entry, rec, - big_rec, n_ext, thr, mtr); - if (err != DB_FAIL) { - - return(err); - } - - /* Retry with a pessimistic insert. Check locks and write to undo log, - if specified */ + /* Check locks and write to undo log, if specified */ err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, mtr, &dummy_inh); @@ -2083,8 +2077,12 @@ any_extern: goto err_exit; } - max_size = old_rec_size - + page_get_max_insert_size_after_reorganize(page, 1); + /* We do not attempt to reorganize if the page is compressed. + This is because the page may fail to compress after reorganization. */ + max_size = page_zip + ? page_get_max_insert_size(page, 1) + : (old_rec_size + + page_get_max_insert_size_after_reorganize(page, 1)); if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT) && (max_size >= new_rec_size)) @@ -2452,7 +2450,12 @@ make_external: err = DB_SUCCESS; goto return_after_reservations; } else { - ut_a(optim_err != DB_UNDERFLOW); + /* If the page is compressed and it initially + compresses very well, and there is a subsequent insert + of a badly-compressing record, it is possible for + btr_cur_optimistic_update() to return DB_UNDERFLOW and + btr_cur_insert_if_possible() to return FALSE. */ + ut_a(page_zip || optim_err != DB_UNDERFLOW); /* Out of space: reset the free bits. */ if (!dict_index_is_clust(index) @@ -2480,8 +2483,10 @@ make_external: record on its page? */ was_first = page_cur_is_before_first(page_cursor); - /* The first parameter means that no lock checking and undo logging - is made in the insert */ + /* Lock checks and undo logging were already performed by + btr_cur_upd_lock_and_undo(). We do not try + btr_cur_optimistic_insert() because + btr_cur_insert_if_possible() already failed above. */ err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG @@ -3483,6 +3488,8 @@ btr_estimate_n_rows_in_range( n_rows = n_rows * 2; } + DBUG_EXECUTE_IF("bug14007649", return(n_rows);); + /* Do not estimate the number of rows in the range to over 1 / 2 of the estimated rows in the whole table */ @@ -3582,9 +3589,9 @@ btr_record_not_null_field_in_rec( for (i = 0; i < n_unique; i++) { if (rec_offs_nth_sql_null(offsets, i)) { - /* Break if we hit the first NULL value */ break; } + n_not_null[i]++; } } diff --git a/storage/xtradb/btr/btr0pcur.c b/storage/xtradb/btr/btr0pcur.c index be1a6cbd8ee..f5323adec91 100644 --- a/storage/xtradb/btr/btr0pcur.c +++ b/storage/xtradb/btr/btr0pcur.c @@ -342,44 +342,39 @@ btr_pcur_restore_position_func( /* Restore the old search mode */ cursor->search_mode = old_mode; - if (btr_pcur_is_on_user_rec(cursor)) { - switch (cursor->rel_pos) { - case BTR_PCUR_ON: - if (!cmp_dtuple_rec( - tuple, btr_pcur_get_rec(cursor), - rec_get_offsets(btr_pcur_get_rec(cursor), - index, NULL, - ULINT_UNDEFINED, &heap))) { - - /* We have to store the NEW value for - the modify clock, since the cursor can - now be on a different page! But we can - retain the value of old_rec */ - - cursor->block_when_stored = - btr_pcur_get_block(cursor); - cursor->modify_clock = - buf_block_get_modify_clock( - cursor->block_when_stored); - cursor->old_stored = BTR_PCUR_OLD_STORED; - - mem_heap_free(heap); - - return(TRUE); - } - - break; - case BTR_PCUR_BEFORE: - page_cur_move_to_next(btr_pcur_get_page_cur(cursor)); - break; - case BTR_PCUR_AFTER: - page_cur_move_to_prev(btr_pcur_get_page_cur(cursor)); - break; + switch (cursor->rel_pos) { + case BTR_PCUR_ON: + if (btr_pcur_is_on_user_rec(cursor) + && !cmp_dtuple_rec( + tuple, btr_pcur_get_rec(cursor), + rec_get_offsets(btr_pcur_get_rec(cursor), + index, NULL, + ULINT_UNDEFINED, &heap))) { + + /* We have to store the NEW value for + the modify clock, since the cursor can + now be on a different page! But we can + retain the value of old_rec */ + + cursor->block_when_stored = + btr_pcur_get_block(cursor); + cursor->modify_clock = + buf_block_get_modify_clock( + cursor->block_when_stored); + cursor->old_stored = BTR_PCUR_OLD_STORED; + + mem_heap_free(heap); + + return(TRUE); + } #ifdef UNIV_DEBUG - default: - ut_error; + /* fall through */ + case BTR_PCUR_BEFORE: + case BTR_PCUR_AFTER: + break; + default: + ut_error; #endif /* UNIV_DEBUG */ - } } mem_heap_free(heap); diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index 6998f1b53ac..22c1effc0d3 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -2123,6 +2123,7 @@ wait_until_unfixed: if (mode == BUF_GET_IF_IN_POOL && ibuf_debug) { /* Try to evict the block from the buffer pool, to use the insert buffer as much as possible. */ + ulint page_no = buf_block_get_page_no(block); if (buf_LRU_free_block(&block->page, TRUE, FALSE)) { //buf_pool_mutex_exit(); @@ -2131,6 +2132,18 @@ wait_until_unfixed: "innodb_change_buffering_debug evict %u %u\n", (unsigned) space, (unsigned) offset); return(NULL); + } else if (UNIV_UNLIKELY(buf_block_get_state(block) + != BUF_BLOCK_FILE_PAGE + || (buf_block_get_page_no(block) != page_no) + || (buf_block_get_space(block) != space))) { + + /* buf_LRU_free_block temporarily releases the + block mutex, and now block points to something + else. */ + mutex_exit(block_mutex); + block = NULL; + goto loop2; + } else if (buf_flush_page_try(block)) { fprintf(stderr, "innodb_change_buffering_debug flush %u %u\n", @@ -4078,6 +4091,133 @@ buf_get_free_list_len(void) return(len); } + +/*******************************************************************//** +Collect buffer pool stats information for a buffer pool. Also +record aggregated stats if there are more than one buffer pool +in the server */ +UNIV_INTERN +void +buf_stats_get_pool_info( +/*====================*/ + buf_pool_info_t* pool_info) /*!< in/out: buffer pool info + to fill */ +{ + time_t current_time; + double time_elapsed; + + buf_pool_mutex_enter(); + + pool_info->pool_size = buf_pool->curr_size; + + pool_info->lru_len = UT_LIST_GET_LEN(buf_pool->LRU); + + pool_info->old_lru_len = buf_pool->LRU_old_len; + + pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool->free); + + pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool->flush_list); + + pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool->unzip_LRU); + + pool_info->n_pend_reads = buf_pool->n_pend_reads; + + pool_info->n_pending_flush_lru = + (buf_pool->n_flush[BUF_FLUSH_LRU] + + buf_pool->init_flush[BUF_FLUSH_LRU]); + + pool_info->n_pending_flush_list = + (buf_pool->n_flush[BUF_FLUSH_LIST] + + buf_pool->init_flush[BUF_FLUSH_LIST]); + + pool_info->n_pending_flush_single_page = + (buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] + + buf_pool->init_flush[BUF_FLUSH_SINGLE_PAGE]); + + current_time = time(NULL); + time_elapsed = 0.001 + difftime(current_time, + buf_pool->last_printout_time); + + pool_info->n_pages_made_young = buf_pool->stat.n_pages_made_young; + + pool_info->n_pages_not_made_young = + buf_pool->stat.n_pages_not_made_young; + + pool_info->n_pages_read = buf_pool->stat.n_pages_read; + + pool_info->n_pages_created = buf_pool->stat.n_pages_created; + + pool_info->n_pages_written = buf_pool->stat.n_pages_written; + + pool_info->n_page_gets = buf_pool->stat.n_page_gets; + + pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd; + pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read; + + pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted; + + pool_info->page_made_young_rate = + (buf_pool->stat.n_pages_made_young + - buf_pool->old_stat.n_pages_made_young) / time_elapsed; + + pool_info->page_not_made_young_rate = + (buf_pool->stat.n_pages_not_made_young + - buf_pool->old_stat.n_pages_not_made_young) / time_elapsed; + + pool_info->pages_read_rate = + (buf_pool->stat.n_pages_read + - buf_pool->old_stat.n_pages_read) / time_elapsed; + + pool_info->pages_created_rate = + (buf_pool->stat.n_pages_created + - buf_pool->old_stat.n_pages_created) / time_elapsed; + + pool_info->pages_written_rate = + (buf_pool->stat.n_pages_written + - buf_pool->old_stat.n_pages_written) / time_elapsed; + + pool_info->n_page_get_delta = buf_pool->stat.n_page_gets + - buf_pool->old_stat.n_page_gets; + + if (pool_info->n_page_get_delta) { + pool_info->page_read_delta = buf_pool->stat.n_pages_read + - buf_pool->old_stat.n_pages_read; + + pool_info->young_making_delta = + buf_pool->stat.n_pages_made_young + - buf_pool->old_stat.n_pages_made_young; + + pool_info->not_young_making_delta = + buf_pool->stat.n_pages_not_made_young + - buf_pool->old_stat.n_pages_not_made_young; + } + pool_info->pages_readahead_rnd_rate = + (buf_pool->stat.n_ra_pages_read_rnd + - buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed; + + + pool_info->pages_readahead_rate = + (buf_pool->stat.n_ra_pages_read + - buf_pool->old_stat.n_ra_pages_read) / time_elapsed; + + pool_info->pages_evicted_rate = + (buf_pool->stat.n_ra_pages_evicted + - buf_pool->old_stat.n_ra_pages_evicted) / time_elapsed; + + pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU); + + pool_info->io_sum = buf_LRU_stat_sum.io; + + pool_info->io_cur = buf_LRU_stat_cur.io; + + pool_info->unzip_sum = buf_LRU_stat_sum.unzip; + + pool_info->unzip_cur = buf_LRU_stat_cur.unzip; + + buf_refresh_io_stats(); + buf_pool_mutex_exit(); +} + #else /* !UNIV_HOTBACKUP */ /********************************************************************//** Inits a page to the buffer buf_pool, for use in ibbackup --restore. */ @@ -4108,3 +4248,5 @@ buf_page_init_for_backup_restore( } } #endif /* !UNIV_HOTBACKUP */ + + diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index c53b71632c2..4e3b677d8a9 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -48,6 +48,7 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "log0recv.h" #include "srv0srv.h" +#include "srv0start.h" /** The number of blocks from the LRU_old pointer onward, including the block pointed to, must be buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV @@ -1428,13 +1429,12 @@ buf_LRU_make_block_old( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns TRUE, it will temporarily -release buf_pool_mutex. Furthermore, the page frame will no longer be -accessible via bpage. +NOTE: This will temporarily release buf_pool_mutex. Furthermore, the +page frame will no longer be accessible via bpage. -The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and -release these two mutexes after the call. No other -buf_page_get_mutex() may be held when calling this function. +The caller must hold buf_page_get_mutex(bpage) and release this mutex +after the call. No other buf_page_get_mutex() may be held when +calling this function. @return TRUE if freed, FALSE otherwise. */ UNIV_INTERN ibool @@ -2098,6 +2098,12 @@ func_exit: /********************************************************************//** Dump the LRU page list to the specific file. */ #define LRU_DUMP_FILE "ib_lru_dump" +#define LRU_DUMP_TEMP_FILE "ib_lru_dump.tmp" +#define LRU_OS_FILE_WRITE() \ + os_file_write(LRU_DUMP_FILE, dump_file, buffer, \ + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL, \ + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)), \ + UNIV_PAGE_SIZE) UNIV_INTERN ibool @@ -2109,17 +2115,19 @@ buf_LRU_file_dump(void) byte* buffer_base = NULL; byte* buffer = NULL; buf_page_t* bpage; + buf_page_t* first_bpage; ulint buffers; ulint offset; - ibool ret = FALSE; + ulint pages_written; ulint i; + ulint total_pages; for (i = 0; i < srv_n_data_files; i++) { if (strstr(srv_data_file_names[i], LRU_DUMP_FILE) != NULL) { fprintf(stderr, " InnoDB: The name '%s' seems to be used for" - " innodb_data_file_path. Dumping LRU list is not" - " done for safeness.\n", LRU_DUMP_FILE); + " innodb_data_file_path. Dumping LRU list is" + " not done for safeness.\n", LRU_DUMP_FILE); goto end; } } @@ -2132,7 +2140,7 @@ buf_LRU_file_dump(void) goto end; } - dump_file = os_file_create(LRU_DUMP_FILE, OS_FILE_OVERWRITE, + dump_file = os_file_create(LRU_DUMP_TEMP_FILE, OS_FILE_OVERWRITE, OS_FILE_NORMAL, OS_DATA_FILE, &success); if (!success) { os_file_get_last_error(TRUE); @@ -2142,12 +2150,21 @@ buf_LRU_file_dump(void) } mutex_enter(&LRU_list_mutex); - bpage = UT_LIST_GET_LAST(buf_pool->LRU); + bpage = first_bpage = UT_LIST_GET_FIRST(buf_pool->LRU); + total_pages = UT_LIST_GET_LEN(buf_pool->LRU); - buffers = offset = 0; - while (bpage != NULL) { - if (offset == 0) { - memset(buffer, 0, UNIV_PAGE_SIZE); + buffers = offset = pages_written = 0; + while (bpage != NULL && (pages_written++ < total_pages)) { + + buf_page_t* next_bpage = UT_LIST_GET_NEXT(LRU, bpage); + + if (next_bpage == first_bpage) { + mutex_exit(&LRU_list_mutex); + success = FALSE; + fprintf(stderr, + "InnoDB: detected cycle in LRU, skipping " + "dump\n"); + goto end; } mach_write_to_4(buffer + offset * 4, bpage->space); @@ -2156,50 +2173,79 @@ buf_LRU_file_dump(void) offset++; if (offset == UNIV_PAGE_SIZE/4) { - success = os_file_write(LRU_DUMP_FILE, dump_file, buffer, - (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL, - (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)), - UNIV_PAGE_SIZE); + mutex_t *next_block_mutex = NULL; + + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { + mutex_exit(&LRU_list_mutex); + success = FALSE; + fprintf(stderr, + " InnoDB: stopped dumping lru pages" + " because of server shutdown.\n"); + goto end; + } + + /* while writing file, release buffer pool mutex but + keep the next page fixed so we don't worry about + our list iterator becoming invalid */ + if (next_bpage) { + next_block_mutex = buf_page_get_mutex( + next_bpage); + + mutex_enter(next_block_mutex); + next_bpage->buf_fix_count++; + mutex_exit(next_block_mutex); + } + mutex_exit(&LRU_list_mutex); + + success = LRU_OS_FILE_WRITE(); + + /* grab this again here so that next_bpage + can't be purged when we drop the fix_count */ + mutex_enter(&LRU_list_mutex); + + if (next_bpage) { + mutex_enter(next_block_mutex); + next_bpage->buf_fix_count--; + mutex_exit(next_block_mutex); + } if (!success) { mutex_exit(&LRU_list_mutex); fprintf(stderr, - " InnoDB: cannot write page %lu of %s\n", + " InnoDB: cannot write page" + " %lu of %s\n", buffers, LRU_DUMP_FILE); goto end; } buffers++; offset = 0; + bpage = next_bpage; + } else { + bpage = UT_LIST_GET_NEXT(LRU, bpage); } - - bpage = UT_LIST_GET_PREV(LRU, bpage); - } + } /* while(bpage ...) */ mutex_exit(&LRU_list_mutex); - if (offset == 0) { - memset(buffer, 0, UNIV_PAGE_SIZE); - } - mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL); offset++; mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL); offset++; - success = os_file_write(LRU_DUMP_FILE, dump_file, buffer, - (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL, - (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)), - UNIV_PAGE_SIZE); - if (!success) { - goto end; - } - - ret = TRUE; + success = LRU_OS_FILE_WRITE(); end: - if (dump_file != (os_file_t) -1) + if (dump_file != (os_file_t) -1) { + if (success) { + success = os_file_flush(dump_file, TRUE); + } os_file_close(dump_file); + } + if (success) { + success = os_file_rename(LRU_DUMP_TEMP_FILE, + LRU_DUMP_FILE); + } if (buffer_base) ut_free(buffer_base); - return(ret); + return(success); } typedef struct { @@ -2241,6 +2287,7 @@ buf_LRU_file_restore(void) dump_record_t* records = NULL; ulint size; ulint size_high; + ulint recsize = sizeof(dump_record_t); ulint length; dump_file = os_file_create_simple_no_error_handling( @@ -2248,7 +2295,15 @@ buf_LRU_file_restore(void) if (!success || !os_file_get_size(dump_file, &size, &size_high)) { os_file_get_last_error(TRUE); fprintf(stderr, - " InnoDB: cannot open %s\n", LRU_DUMP_FILE); + " InnoDB: cannot open %s," + " buffer pool preload not done\n", + LRU_DUMP_FILE); + goto end; + } + + if (size == 0 || size_high > 0 || size % recsize) { + fprintf(stderr, " InnoDB: broken LRU dump file," + " buffer pool preload not done\n"); goto end; } @@ -2332,6 +2387,14 @@ buf_LRU_file_restore(void) if (offset % 16 == 15) { os_aio_simulated_wake_handler_threads(); buf_flush_free_margin(FALSE); + /* skip loading of the rest of the file if we are + terminating anyway*/ + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { + fprintf(stderr, + " InnoDB: stopped loading LRU pages" + " because of server shutdown.\n"); + break; + } } zip_size = fil_space_get_zip_size(space_id); diff --git a/storage/xtradb/dict/dict0boot.c b/storage/xtradb/dict/dict0boot.c index 2b6a208321d..538b5f861f5 100644 --- a/storage/xtradb/dict/dict0boot.c +++ b/storage/xtradb/dict/dict0boot.c @@ -237,6 +237,166 @@ dict_hdr_create( } /*****************************************************************//** +Verifies the SYS_STATS table by scanning its clustered index. This +function may only be called at InnoDB startup time. + +@return TRUE if SYS_STATS was verified successfully */ +UNIV_INTERN +ibool +dict_verify_xtradb_sys_stats(void) +/*==============================*/ +{ + dict_index_t* sys_stats_index; + ulint saved_srv_pass_corrupt_table = srv_pass_corrupt_table; + ibool result; + + sys_stats_index = dict_table_get_first_index(dict_sys->sys_stats); + + /* Since this may be called only during server startup, avoid hitting + various asserts by using XtraDB pass_corrupt_table option. */ + srv_pass_corrupt_table = 1; + result = btr_validate_index(sys_stats_index, NULL); + srv_pass_corrupt_table = saved_srv_pass_corrupt_table; + + return result; +} + +/*****************************************************************//** +Creates the B-tree for the SYS_STATS clustered index, adds the XtraDB +mark and the id of the index to the dictionary header page. Rewrites +both passed args. */ +static +void +dict_create_xtradb_sys_stats( +/*=========================*/ + dict_hdr_t** dict_hdr, /*!< in/out: dictionary header */ + mtr_t* mtr) /*!< in/out: mtr */ +{ + ulint root_page_no; + + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_STATS_ID, + dict_ind_redundant, mtr); + if (root_page_no == FIL_NULL) { + fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); + srv_use_sys_stats_table = FALSE; + } else { + mlog_write_ulint(*dict_hdr + DICT_HDR_STATS, root_page_no, + MLOG_4BYTES, mtr); + mlog_write_dulint(*dict_hdr + DICT_HDR_XTRADB_MARK, + DICT_HDR_XTRADB_FLAG, mtr); + } + mtr_commit(mtr); + /* restart mtr */ + mtr_start(mtr); + *dict_hdr = dict_hdr_get(mtr); +} + +/*****************************************************************//** +Create the table and index structure of SYS_STATS for the dictionary +cache and add it there. If called for the first time, also support +wrong root page id injection for testing purposes. */ +static +void +dict_add_to_cache_xtradb_sys_stats( +/*===============================*/ + ibool first_time __attribute__((unused)), + /*!< in: first invocation flag. If + TRUE, optionally inject wrong root page + id */ + mem_heap_t* heap, /*!< in: memory heap for table/index + allocation */ + dict_hdr_t* dict_hdr, /*!< in: dictionary header */ + mtr_t* mtr) /*!< in: mtr */ +{ + dict_table_t* table; + dict_index_t* index; + ulint root_page_id; + ulint error; + + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 4, 0); + table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "NON_NULL_VALS", DATA_BINARY, 0, 0); + + /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ +#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 +#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" +#endif +#if DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2 +#error "DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2" +#endif + + table->id = DICT_STATS_ID; + dict_table_add_to_cache(table, heap); + dict_sys->sys_stats = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_STATS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "INDEX_ID", 0); + dict_mem_index_add_field(index, "KEY_COLS", 0); + + index->id = DICT_STATS_ID; + + root_page_id = mtr_read_ulint(dict_hdr + DICT_HDR_STATS, MLOG_4BYTES, + mtr); +#ifdef UNIV_DEBUG + if ((srv_sys_stats_root_page != 0) && first_time) + root_page_id = srv_sys_stats_root_page; +#endif + error = dict_index_add_to_cache(table, index, root_page_id, FALSE); + ut_a(error == DB_SUCCESS); + + mem_heap_empty(heap); +} + +/*****************************************************************//** +Discard the existing dictionary cache SYS_STATS information, create and +add it there anew. Does not touch the old SYS_STATS tablespace page +under the assumption that they are corrupted or overwritten for other +purposes. */ +UNIV_INTERN +void +dict_recreate_xtradb_sys_stats(void) +/*================================*/ +{ + mtr_t mtr; + dict_hdr_t* dict_hdr; + dict_index_t* sys_stats_clust_idx; + mem_heap_t* heap; + + heap = mem_heap_create(450); + + mutex_enter(&(dict_sys->mutex)); + + sys_stats_clust_idx = dict_table_get_first_index(dict_sys->sys_stats); + dict_index_remove_from_cache(dict_sys->sys_stats, sys_stats_clust_idx); + + dict_table_remove_from_cache(dict_sys->sys_stats); + + dict_sys->sys_stats = NULL; + + mtr_start(&mtr); + + dict_hdr = dict_hdr_get(&mtr); + + dict_create_xtradb_sys_stats(&dict_hdr, &mtr); + dict_add_to_cache_xtradb_sys_stats(FALSE, heap, dict_hdr, &mtr); + + mem_heap_free(heap); + + mtr_commit(&mtr); + + mutex_exit(&(dict_sys->mutex)); +} + +/*****************************************************************//** Initializes the data dictionary memory structures when the database is started. This function is also called when the data dictionary is created. */ UNIV_INTERN @@ -251,39 +411,23 @@ dict_boot(void) mtr_t mtr; ulint error; + heap = mem_heap_create(450); + mtr_start(&mtr); /* Create the hash tables etc. */ dict_init(); - heap = mem_heap_create(450); - mutex_enter(&(dict_sys->mutex)); /* Get the dictionary header */ dict_hdr = dict_hdr_get(&mtr); - if (ut_dulint_cmp(mtr_read_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, &mtr), - DICT_HDR_XTRADB_FLAG) != 0) { + if (ut_dulint_cmp(mtr_read_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, + &mtr), DICT_HDR_XTRADB_FLAG) != 0) { + /* not extended yet by XtraDB, need to be extended */ - ulint root_page_no; - - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, 0, DICT_STATS_ID, - dict_ind_redundant, &mtr); - if (root_page_no == FIL_NULL) { - fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); - srv_use_sys_stats_table = FALSE; - } else { - mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no, - MLOG_4BYTES, &mtr); - mlog_write_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, - DICT_HDR_XTRADB_FLAG, &mtr); - } - mtr_commit(&mtr); - /* restart mtr */ - mtr_start(&mtr); - dict_hdr = dict_hdr_get(&mtr); + dict_create_xtradb_sys_stats(&dict_hdr, &mtr); } /* Because we only write new row ids to disk-based data structure @@ -464,42 +608,7 @@ dict_boot(void) FALSE); ut_a(error == DB_SUCCESS); - /*-------------------------*/ - table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 4, 0); - table->n_mysql_handles_opened = 1; /* for pin */ - - dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "NON_NULL_VALS", DATA_BINARY, 0, 0); - - /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ -#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 -#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" -#endif -#if DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2 -#error "DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2" -#endif - - table->id = DICT_STATS_ID; - dict_table_add_to_cache(table, heap); - dict_sys->sys_stats = table; - mem_heap_empty(heap); - - index = dict_mem_index_create("SYS_STATS", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 2); - - dict_mem_index_add_field(index, "INDEX_ID", 0); - dict_mem_index_add_field(index, "KEY_COLS", 0); - - index->id = DICT_STATS_ID; - error = dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr - + DICT_HDR_STATS, - MLOG_4BYTES, &mtr), - FALSE); - ut_a(error == DB_SUCCESS); + dict_add_to_cache_xtradb_sys_stats(TRUE, heap, dict_hdr, &mtr); mem_heap_free(heap); diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 574b0e5d7da..65189ba2961 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -4622,12 +4622,6 @@ next_rec: } btr_pcur_close(&pcur); mtr_commit(&mtr); - - if (rests) { - fprintf(stderr, "InnoDB: Warning: failed to store %lu stats entries" - " of %s/%s to SYS_STATS system table.\n", - rests, index->table_name, index->name); - } } /*===========================================*/ @@ -5394,6 +5388,28 @@ dict_table_replace_index_in_foreign_list( foreign->foreign_index = new_index; } } + + + for (foreign = UT_LIST_GET_FIRST(table->referenced_list); + foreign; + foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) { + + dict_index_t* new_index; + + if (foreign->referenced_index == index) { + ut_ad(foreign->referenced_table == index->table); + + new_index = dict_foreign_find_index( + foreign->referenced_table, + foreign->referenced_col_names, + foreign->n_fields, index, + /*check_charsets=*/TRUE, /*check_null=*/FALSE); + ut_ad(new_index || !trx->check_foreigns); + ut_ad(!new_index || new_index->table == index->table); + + foreign->referenced_index = new_index; + } + } } /**********************************************************************//** diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c index d026306a646..015d88852e9 100644 --- a/storage/xtradb/dict/dict0load.c +++ b/storage/xtradb/dict/dict0load.c @@ -165,7 +165,7 @@ dict_print(void) monitor printout */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold += SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); mutex_enter(&(dict_sys->mutex)); @@ -193,7 +193,7 @@ loop: /* Restore the fatal semaphore wait timeout */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold -= SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); return; diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index 1cc6654a88e..c035b628eb1 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -1876,7 +1876,7 @@ fil_inc_pending_ops( if (space == NULL) { fprintf(stderr, - "InnoDB: Error: trying to do ibuf merge to a" + "InnoDB: Error: trying to do an operation on a" " dropped tablespace %lu\n", (ulong) id); } @@ -3375,6 +3375,7 @@ skip_info: for (offset = 0; offset < free_limit_bytes; offset += zip_size ? zip_size : UNIV_PAGE_SIZE) { ibool page_is_corrupt; + ibool is_descr_page = FALSE; success = os_file_read(file, page, (ulint)(offset & 0xFFFFFFFFUL), @@ -3413,6 +3414,7 @@ skip_info: /* store as descr page */ memcpy(descr_page, page, (zip_size ? zip_size : UNIV_PAGE_SIZE)); + is_descr_page = TRUE; } else if (descr_is_corrupt) { /* unknown state of the page */ @@ -3489,7 +3491,8 @@ skip_info: } } - if (fil_page_get_type(page) == FIL_PAGE_INDEX) { + if (fil_page_get_type(page) == + FIL_PAGE_INDEX && !is_descr_page) { dulint tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID)); for (i = 0; i < n_index; i++) { diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index e3a55a55e93..1464a4fae90 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -195,6 +195,9 @@ static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; static my_bool innobase_use_sys_stats_table = FALSE; +#ifdef UNIV_DEBUG +static ulong innobase_sys_stats_root_page = 0; +#endif static my_bool innobase_buffer_pool_shm_checksum = TRUE; static uint innobase_buffer_pool_shm_key = 0; @@ -939,11 +942,23 @@ convert_error_code_to_mysql( case DB_TABLE_NOT_FOUND: return(HA_ERR_NO_SUCH_TABLE); - case DB_TOO_BIG_RECORD: - my_error(ER_TOO_BIG_ROWSIZE, MYF(0), - page_get_free_space_of_empty(flags - & DICT_TF_COMPACT) / 2); + case DB_TOO_BIG_RECORD: { + /* If prefix is true then a 768-byte prefix is stored + locally for BLOB fields. Refer to dict_table_get_format() */ + bool prefix = ((flags & DICT_TF_FORMAT_MASK) + >> DICT_TF_FORMAT_SHIFT) < UNIV_FORMAT_B; + my_printf_error(ER_TOO_BIG_ROWSIZE, + "Row size too large (> %lu). Changing some columns " + "to TEXT or BLOB %smay help. In current row " + "format, BLOB prefix of %d bytes is stored inline.", + MYF(0), + page_get_free_space_of_empty(flags & + DICT_TF_COMPACT) / 2, + prefix ? "or using ROW_FORMAT=DYNAMIC " + "or ROW_FORMAT=COMPRESSED ": "", + prefix ? DICT_MAX_INDEX_COL_LEN : 0); return(HA_ERR_TO_BIG_ROW); + } case DB_NO_SAVEPOINT: return(HA_ERR_NO_SAVEPOINT); @@ -2369,6 +2384,10 @@ mem_free_and_error: srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table; +#ifdef UNIV_DEBUG + srv_sys_stats_root_page = innobase_sys_stats_root_page; +#endif + /* -------------- Log files ---------------------------*/ /* The default dir for log files is the datadir of MySQL */ @@ -4268,6 +4287,27 @@ table_opened: } UNIV_INTERN +handler* +ha_innobase::clone( +/*===============*/ + const char* name, /*!< in: table name */ + MEM_ROOT* mem_root) /*!< in: memory context */ +{ + ha_innobase* new_handler; + + DBUG_ENTER("ha_innobase::clone"); + + new_handler = static_cast<ha_innobase*>(handler::clone(name, + mem_root)); + if (new_handler) { + new_handler->prebuilt->select_lock_type + = prebuilt->select_lock_type; + } + + DBUG_RETURN(new_handler); +} + +UNIV_INTERN uint ha_innobase::max_supported_key_part_length() const { @@ -8684,7 +8724,7 @@ ha_innobase::check( /* Enlarge the fatal lock wait timeout during CHECK TABLE. */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold += SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); for (index = dict_table_get_first_index(prebuilt->table); @@ -8780,7 +8820,7 @@ ha_innobase::check( /* Restore the fatal lock wait timeout after CHECK TABLE. */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold -= SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); prebuilt->trx->op_info = ""; @@ -11751,6 +11791,13 @@ static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table, "So you should use ANALYZE TABLE command intentionally.", NULL, NULL, FALSE); +#ifdef UNIV_DEBUG +static MYSQL_SYSVAR_ULONG(persistent_stats_root_page, + innobase_sys_stats_root_page, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Override the SYS_STATS root page id, 0 = no override (for testing only)", + NULL, NULL, 0, 0, ULONG_MAX, 0); +#endif + static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled, PLUGIN_VAR_OPCMDARG, "Enable InnoDB adaptive hash index (enabled by default). " @@ -11934,6 +11981,18 @@ static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, "NULLS_UNEQUAL and NULLS_IGNORED", NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); +static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Track the redo log for changed pages and output a changed page bitmap", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_ULONGLONG(changed_pages_limit, srv_changed_pages_limit, + PLUGIN_VAR_RQCMDARG, + "The maximum number of rows for " + "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES table, " + "0 - unlimited", + NULL, NULL, 1000000, 0, ~0ULL, 0); + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, PLUGIN_VAR_RQCMDARG, @@ -12070,7 +12129,7 @@ static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump, NULL, NULL, 0, 0, UINT_MAX32, 0); static MYSQL_SYSVAR_BOOL(blocking_lru_restore, innobase_blocking_lru_restore, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, "Block XtraDB startup process until buffer pool is full restored from a " "dump file (if present). Disabled by default.", NULL, NULL, FALSE); @@ -12149,6 +12208,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_auto_update), MYSQL_SYSVAR(stats_update_need_lock), MYSQL_SYSVAR(use_sys_stats_table), +#ifdef UNIV_DEBUG + MYSQL_SYSVAR(persistent_stats_root_page), +#endif MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), MYSQL_SYSVAR(stats_method), @@ -12180,6 +12242,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(dict_size_limit), MYSQL_SYSVAR(use_sys_malloc), MYSQL_SYSVAR(change_buffering), + MYSQL_SYSVAR(track_changed_pages), + MYSQL_SYSVAR(changed_pages_limit), #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG MYSQL_SYSVAR(change_buffering_debug), #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ @@ -12230,7 +12294,10 @@ i_s_innodb_admin_command, i_s_innodb_sys_tables, i_s_innodb_sys_indexes, i_s_innodb_sys_stats, -i_s_innodb_patches +i_s_innodb_changed_pages, +i_s_innodb_buffer_page, +i_s_innodb_buffer_page_lru, +i_s_innodb_buffer_stats mysql_declare_plugin_end; /** @brief Initialize the default value of innodb_commit_concurrency. diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 1e18b3a22b9..d04fe24cf79 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -133,6 +133,7 @@ class ha_innobase: public handler const key_map* keys_to_use_for_scanning(); int open(const char *name, int mode, uint test_if_locked); + handler* clone(const char *name, MEM_ROOT *mem_root); int close(void); double scan_time(); double read_time(uint index, uint ranges, ha_rows rows); diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 5efc3f96fa3..c746f65bf14 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -668,6 +668,10 @@ ha_innobase::add_index( DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); } + if (innodb_table->tablespace_discarded) { + DBUG_RETURN(-1); + } + /* Check that index keys are sensible */ error = innobase_check_index_keys(key_info, num_of_keys, innodb_table); @@ -823,6 +827,8 @@ ha_innobase::add_index( innodb_table, indexed_table, index, num_of_idx, table); + DBUG_EXECUTE_IF("crash_innodb_add_index_after", DBUG_SUICIDE();); + error_handling: /* After an error, remove all those index definitions from the dictionary which were defined. */ diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index b6e7da50bcf..aa493225db2 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -22,8 +22,15 @@ InnoDB INFORMATION SCHEMA tables interface to MySQL. Created July 18, 2007 Vasil Dimov *******************************************************/ - +#ifndef MYSQL_SERVER +#define MYSQL_SERVER /* For Item_* classes */ +#include <mysql_priv.h> +/* Prevent influence of this definition to other headers */ +#undef MYSQL_SERVER +#else #include <mysql_priv.h> +#endif //MYSQL_SERVER + #include <mysqld_error.h> #include <m_ctype.h> @@ -32,7 +39,6 @@ Created July 18, 2007 Vasil Dimov #include <mysys_err.h> #include <my_sys.h> #include "i_s.h" -#include "innodb_patch_info.h" #include <mysql/plugin.h> extern "C" { @@ -41,6 +47,7 @@ extern "C" { #include "buf0buddy.h" /* for i_s_cmpmem */ #include "buf0buf.h" /* for buf_pool and PAGE_ZIP_MIN_SIZE */ #include "ha_prototypes.h" /* for innobase_convert_name() */ +#include "srv0srv.h" /* for srv_track_changed_pages */ #include "srv0start.h" /* for srv_was_started */ #include "btr0btr.h" /* for btr_page_get_index_id */ #include "trx0rseg.h" /* for trx_rseg_struct */ @@ -48,10 +55,91 @@ extern "C" { #include "dict0dict.h" /* for dict_sys */ #include "btr0pcur.h" #include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */ +#include "log0online.h" +#include "btr0btr.h" +#include "log0log.h" } static const char plugin_author[] = "Innobase Oy"; +/** structure associates a name string with a file page type and/or buffer +page state. */ +struct buffer_page_desc_str_struct{ + const char* type_str; /*!< String explain the page + type/state */ + ulint type_value; /*!< Page type or page state */ +}; + +typedef struct buffer_page_desc_str_struct buf_page_desc_str_t; + +/** Any states greater than FIL_PAGE_TYPE_LAST would be treated as unknown. */ +#define I_S_PAGE_TYPE_UNKNOWN (FIL_PAGE_TYPE_LAST + 1) + +/** We also define I_S_PAGE_TYPE_INDEX as the Index Page's position +in i_s_page_type[] array */ +#define I_S_PAGE_TYPE_INDEX 1 + +/** Name string for File Page Types */ +static buf_page_desc_str_t i_s_page_type[] = { + {"ALLOCATED", FIL_PAGE_TYPE_ALLOCATED}, + {"INDEX", FIL_PAGE_INDEX}, + {"UNDO_LOG", FIL_PAGE_UNDO_LOG}, + {"INODE", FIL_PAGE_INODE}, + {"IBUF_FREE_LIST", FIL_PAGE_IBUF_FREE_LIST}, + {"IBUF_BITMAP", FIL_PAGE_IBUF_BITMAP}, + {"SYSTEM", FIL_PAGE_TYPE_SYS}, + {"TRX_SYSTEM", FIL_PAGE_TYPE_TRX_SYS}, + {"FILE_SPACE_HEADER", FIL_PAGE_TYPE_FSP_HDR}, + {"EXTENT_DESCRIPTOR", FIL_PAGE_TYPE_XDES}, + {"BLOB", FIL_PAGE_TYPE_BLOB}, + {"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB}, + {"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2}, + {"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN} +}; + +/* Check if we can hold all page type in a 4 bit value */ +#if I_S_PAGE_TYPE_UNKNOWN > 1<<4 +# error "i_s_page_type[] is too large" +#endif + +/** This structure defines information we will fetch from pages +currently cached in the buffer pool. It will be used to populate +table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE */ +struct buffer_page_info_struct{ + ulint block_id; /*!< Buffer Pool block ID */ + unsigned space_id:32; /*!< Tablespace ID */ + unsigned page_num:32; /*!< Page number/offset */ + unsigned access_time:32; /*!< Time of first access */ + unsigned flush_type:2; /*!< Flush type */ + unsigned io_fix:2; /*!< type of pending I/O operation */ + unsigned fix_count:19; /*!< Count of how manyfold this block + is bufferfixed */ + unsigned hashed:1; /*!< Whether hash index has been + built on this page */ + unsigned is_old:1; /*!< TRUE if the block is in the old + blocks in buf_pool->LRU_old */ + unsigned freed_page_clock:31; /*!< the value of + buf_pool->freed_page_clock */ + unsigned zip_ssize:PAGE_ZIP_SSIZE_BITS; + /*!< Compressed page size */ + unsigned page_state:BUF_PAGE_STATE_BITS; /*!< Page state */ + unsigned page_type:4; /*!< Page type */ + unsigned num_recs; + /*!< Number of records on Page */ + unsigned data_size; + /*!< Sum of the sizes of the records */ + lsn_t newest_mod; /*!< Log sequence number of + the youngest modification */ + lsn_t oldest_mod; /*!< Log sequence number of + the oldest modification */ + dulint index_id; /*!< Index ID if a index page */ +}; + +typedef struct buffer_page_info_struct buf_page_info_t; + +/** maximum number of buffer page info we would cache. */ +#define MAX_BUF_INFO_CACHED 10000 + #define OK(expr) \ if ((expr) != 0) { \ DBUG_RETURN(1); \ @@ -224,168 +312,11 @@ field_store_ulint( return(ret); } -/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_patches */ -static ST_FIELD_INFO innodb_patches_fields_info[] = -{ -#define IDX_PATCH_NAME 0 - {STRUCT_FLD(field_name, "name"), - STRUCT_FLD(field_length, 255), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_PATCH_DESCR 1 - {STRUCT_FLD(field_name, "description"), - STRUCT_FLD(field_length, 255), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_PATCH_COMMENT 2 - {STRUCT_FLD(field_name, "comment"), - STRUCT_FLD(field_length, 100), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_PATCH_LINK 3 - {STRUCT_FLD(field_name, "link"), - STRUCT_FLD(field_length, 255), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - static struct st_mysql_information_schema i_s_info = { MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION }; -/*********************************************************************** -Fill the dynamic table information_schema.innodb_patches */ -static -int -innodb_patches_fill( -/*=============*/ - /* out: 0 on success, 1 on failure */ - THD* thd, /* in: thread */ - TABLE_LIST* tables, /* in/out: tables to fill */ - COND* cond) /* in: condition (ignored) */ -{ - TABLE* table = (TABLE *) tables->table; - int status = 0; - int i; - Field** fields; - - - DBUG_ENTER("innodb_patches_fill"); - fields = table->field; - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL)) { - - DBUG_RETURN(0); - } - - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - for (i = 0; innodb_enhancements[i].file; i++) { - - field_store_string(fields[0],innodb_enhancements[i].file); - field_store_string(fields[1],innodb_enhancements[i].name); - field_store_string(fields[2],innodb_enhancements[i].comment); - field_store_string(fields[3],innodb_enhancements[i].link); - - if (schema_table_store_record(thd, table)) { - status = 1; - break; - } - - } - - - DBUG_RETURN(status); -} - -/*********************************************************************** -Bind the dynamic table information_schema.innodb_patches. */ -static -int -innodb_patches_init( -/*=========*/ - /* out: 0 on success */ - void* p) /* in/out: table schema object */ -{ - DBUG_ENTER("innodb_patches_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_patches_fields_info; - schema->fill_table = innodb_patches_fill; - - DBUG_RETURN(0); -} - - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_patches = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "XTRADB_ENHANCEMENTS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, "Percona"), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "Enhancements applied to InnoDB plugin"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_patches_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - /* reserved for dependency checking */ - /* void* */ - STRUCT_FLD(__reserved1, NULL) -}; - - static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_fields_info[] = { {STRUCT_FLD(field_name, "page_type"), @@ -1956,6 +1887,8 @@ i_s_cmp_fill_low( DBUG_ENTER("i_s_cmp_fill_low"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + /* deny access to non-superusers */ if (check_global_access(thd, PROCESS_ACL)) { @@ -2224,6 +2157,8 @@ i_s_cmpmem_fill_low( DBUG_ENTER("i_s_cmpmem_fill_low"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + /* deny access to non-superusers */ if (check_global_access(thd, PROCESS_ACL)) { @@ -2512,6 +2447,8 @@ i_s_innodb_rseg_fill( DBUG_ENTER("i_s_innodb_rseg_fill"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + /* deny access to non-superusers */ if (check_global_access(thd, PROCESS_ACL)) { @@ -2645,6 +2582,8 @@ i_s_innodb_admin_command_fill( DBUG_ENTER("i_s_innodb_admin_command_fill"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + /* deny access to non-superusers */ if (check_global_access(thd, PROCESS_ACL)) { DBUG_RETURN(0); @@ -2902,6 +2841,8 @@ i_s_innodb_table_stats_fill( DBUG_ENTER("i_s_innodb_table_stats_fill"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + /* deny access to non-superusers */ if (check_global_access(thd, PROCESS_ACL)) { DBUG_RETURN(0); @@ -2965,6 +2906,8 @@ i_s_innodb_index_stats_fill( DBUG_ENTER("i_s_innodb_index_stats_fill"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + /* deny access to non-superusers */ if (check_global_access(thd, PROCESS_ACL)) { DBUG_RETURN(0); @@ -3612,6 +3555,8 @@ i_s_innodb_schema_table_fill( DBUG_ENTER("i_s_innodb_schema_table_fill"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + /* deny access to non-superusers */ if (check_global_access(thd, PROCESS_ACL)) { DBUG_RETURN(0); @@ -3783,3 +3728,1965 @@ UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_stats = STRUCT_FLD(system_vars, NULL), STRUCT_FLD(__reserved1, NULL) }; + +static ST_FIELD_INFO i_s_innodb_changed_pages_info[] = +{ + {STRUCT_FLD(field_name, "space_id"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "page_id"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "start_lsn"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "end_lsn"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +/*********************************************************************** + This function parses condition and gets upper bounds for start and end LSN's + if condition corresponds to certain pattern. + + We can't know right position to avoid scanning bitmap files from the beginning + to the lower bound. But we can stop scanning bitmap files if we reach upper bound. + + It's expected the most used queries will be like the following: + + SELECT * FROM INNODB_CHANGED_PAGES WHERE START_LSN > num1 AND start_lsn < num2; + + That's why the pattern is: + + pattern: comp | and_comp; + comp: lsn < int_num | lsn <= int_num | int_num > lsn | int_num >= lsn; + lsn: start_lsn | end_lsn; + and_comp: some_expression AND some_expression | some_expression AND and_comp; + some_expression: comp | any_other_expression; + + Suppose the condition is start_lsn < 100, this means we have to read all + blocks with start_lsn < 100. Which is equivalent to reading all the blocks + with end_lsn <= 99, or just end_lsn < 100. That's why it's enough to find + maximum lsn value, doesn't matter if this is start or end lsn and compare + it with "start_lsn" field. + + Example: + + SELECT * FROM INNODB_CHANGED_PAGES + WHERE + start_lsn > 10 AND + end_lsn <= 1111 AND + 555 > end_lsn AND + page_id = 100; + + max_lsn will be set to 555. +*/ +static +void +limit_lsn_range_from_condition( +/*===========================*/ + TABLE* table, /*!<in: table */ + COND* cond, /*!<in: condition */ + ib_uint64_t* max_lsn) /*!<in/out: maximum LSN + (must be initialized with maximum + available value) */ +{ + if (cond->type() != Item::COND_ITEM && + cond->type() != Item::FUNC_ITEM) + return; + + switch (((Item_func*) cond)->functype()) + { + case Item_func::COND_AND_FUNC: + { + List_iterator<Item> li(*((Item_cond*) cond)-> + argument_list()); + Item *item; + while ((item= li++)) + limit_lsn_range_from_condition(table, + item, + max_lsn); + break; + } + case Item_func::LT_FUNC: + case Item_func::LE_FUNC: + case Item_func::GT_FUNC: + case Item_func::GE_FUNC: + { + Item *left; + Item *right; + Item_field *item_field; + ib_uint64_t tmp_result; + + /* + a <= b equals to b >= a that's why we just exchange + "left" and "right" in the case of ">" or ">=" + function + */ + if (((Item_func*) cond)->functype() == + Item_func::LT_FUNC || + ((Item_func*) cond)->functype() == + Item_func::LE_FUNC) + { + left = ((Item_func*) cond)->arguments()[0]; + right = ((Item_func*) cond)->arguments()[1]; + } else { + left = ((Item_func*) cond)->arguments()[1]; + right = ((Item_func*) cond)->arguments()[0]; + } + + if (!left || !right) + return; + if (left->type() != Item::FIELD_ITEM) + return; + if (right->type() != Item::INT_ITEM) + return; + + item_field = (Item_field*)left; + + if (/* START_LSN */ + table->field[2] != item_field->field && + /* END_LSN */ + table->field[3] != item_field->field) + { + return; + } + + /* Check if the current field belongs to our table */ + if (table != item_field->field->table) + return; + + tmp_result = right->val_int(); + if (tmp_result < *max_lsn) + *max_lsn = tmp_result; + + break; + } + default:; + } + +} + +/*********************************************************************** +Fill the dynamic table information_schema.innodb_changed_pages. +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_changed_pages_fill( +/*==========================*/ + THD* thd, /*!<in: thread */ + TABLE_LIST* tables, /*!<in/out: tables to fill */ + COND* cond) /*!<in: condition */ +{ + TABLE* table = (TABLE *) tables->table; + log_bitmap_iterator_t i; + ib_uint64_t output_rows_num = 0UL; + ib_uint64_t max_lsn = ~0ULL; + + if (!srv_track_changed_pages) + return 0; + + if (!log_online_bitmap_iterator_init(&i)) + return 1; + + if (cond) + limit_lsn_range_from_condition(table, cond, &max_lsn); + + while(log_online_bitmap_iterator_next(&i) && + (!srv_changed_pages_limit || + output_rows_num < srv_changed_pages_limit) && + /* + There is no need to compare both start LSN and end LSN fields + with maximum value. It's enough to compare only start LSN. + Example: + + max_lsn = 100 + \\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\ - Query 1 + I------I I-------I I-------------I I----I + ////////////////// | - Query 2 + 1 2 3 4 + + Query 1: + SELECT * FROM INNODB_CHANGED_PAGES WHERE start_lsn < 100 + will select 1,2,3 bitmaps + Query 2: + SELECT * FROM INNODB_CHANGED_PAGES WHERE end_lsn < 100 + will select 1,2 bitmaps + + The condition start_lsn <= 100 will be false after reading + 1,2,3 bitmaps which suits for both cases. + */ + LOG_BITMAP_ITERATOR_START_LSN(i) <= max_lsn) + { + if (!LOG_BITMAP_ITERATOR_PAGE_CHANGED(i)) + continue; + + /* SPACE_ID */ + table->field[0]->store( + LOG_BITMAP_ITERATOR_SPACE_ID(i)); + /* PAGE_ID */ + table->field[1]->store( + LOG_BITMAP_ITERATOR_PAGE_NUM(i)); + /* START_LSN */ + table->field[2]->store( + LOG_BITMAP_ITERATOR_START_LSN(i)); + /* END_LSN */ + table->field[3]->store( + LOG_BITMAP_ITERATOR_END_LSN(i)); + + /* + I_S tables are in-memory tables. If bitmap file is big enough + a lot of memory can be used to store the table. But the size + of used memory can be diminished if we store only data which + corresponds to some conditions (in WHERE sql clause). Here + conditions are checked for the field values stored above. + + Conditions are checked twice. The first is here (during table + generation) and the second during query execution. Maybe it + makes sense to use some flag in THD object to avoid double + checking. + */ + if (cond && !cond->val_int()) + continue; + + if (schema_table_store_record(thd, table)) + { + log_online_bitmap_iterator_release(&i); + return 1; + } + + ++output_rows_num; + } + + log_online_bitmap_iterator_release(&i); + return 0; +} + +static +int +i_s_innodb_changed_pages_init( +/*==========================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_changed_pages_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_changed_pages_info; + schema->fill_table = i_s_innodb_changed_pages_fill; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_changed_pages = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_CHANGED_PAGES"), + STRUCT_FLD(author, "Percona"), + STRUCT_FLD(descr, "InnoDB CHANGED_PAGES table"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_changed_pages_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL) +}; + +/* Fields of the dynamic table INNODB_BUFFER_POOL_STATS. */ +static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] = +{ +#define IDX_BUF_STATS_POOL_SIZE 0 + {STRUCT_FLD(field_name, "POOL_SIZE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_FREE_BUFFERS 1 + {STRUCT_FLD(field_name, "FREE_BUFFERS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_LRU_LEN 2 + {STRUCT_FLD(field_name, "DATABASE_PAGES"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_OLD_LRU_LEN 3 + {STRUCT_FLD(field_name, "OLD_DATABASE_PAGES"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_FLUSH_LIST_LEN 4 + {STRUCT_FLD(field_name, "MODIFIED_DATABASE_PAGES"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PENDING_ZIP 5 + {STRUCT_FLD(field_name, "PENDING_DECOMPRESS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PENDING_READ 6 + {STRUCT_FLD(field_name, "PENDING_READS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_FLUSH_LRU 7 + {STRUCT_FLD(field_name, "PENDING_FLUSH_LRU"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_FLUSH_LIST 8 + {STRUCT_FLD(field_name, "PENDING_FLUSH_LIST"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_YOUNG 9 + {STRUCT_FLD(field_name, "PAGES_MADE_YOUNG"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_NOT_YOUNG 10 + {STRUCT_FLD(field_name, "PAGES_NOT_MADE_YOUNG"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_YOUNG_RATE 11 + {STRUCT_FLD(field_name, "PAGES_MADE_YOUNG_RATE"), + STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE 12 + {STRUCT_FLD(field_name, "PAGES_MADE_NOT_YOUNG_RATE"), + STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_READ 13 + {STRUCT_FLD(field_name, "NUMBER_PAGES_READ"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_CREATED 14 + {STRUCT_FLD(field_name, "NUMBER_PAGES_CREATED"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_WRITTEN 15 + {STRUCT_FLD(field_name, "NUMBER_PAGES_WRITTEN"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_READ_RATE 16 + {STRUCT_FLD(field_name, "PAGES_READ_RATE"), + STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_CREATE_RATE 17 + {STRUCT_FLD(field_name, "PAGES_CREATE_RATE"), + STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_PAGE_WRITTEN_RATE 18 + {STRUCT_FLD(field_name, "PAGES_WRITTEN_RATE"), + STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_GET 19 + {STRUCT_FLD(field_name, "NUMBER_PAGES_GET"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_HIT_RATE 20 + {STRUCT_FLD(field_name, "HIT_RATE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_MADE_YOUNG_PCT 21 + {STRUCT_FLD(field_name, "YOUNG_MAKE_PER_THOUSAND_GETS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_NOT_MADE_YOUNG_PCT 22 + {STRUCT_FLD(field_name, "NOT_YOUNG_MAKE_PER_THOUSAND_GETS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_READ_AHREAD 23 + {STRUCT_FLD(field_name, "NUMBER_PAGES_READ_AHEAD"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_READ_AHEAD_EVICTED 24 + {STRUCT_FLD(field_name, "NUMBER_READ_AHEAD_EVICTED"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_READ_AHEAD_RATE 25 + {STRUCT_FLD(field_name, "READ_AHEAD_RATE"), + STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_READ_AHEAD_EVICT_RATE 26 + {STRUCT_FLD(field_name, "READ_AHEAD_EVICTED_RATE"), + STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_LRU_IO_SUM 27 + {STRUCT_FLD(field_name, "LRU_IO_TOTAL"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_LRU_IO_CUR 28 + {STRUCT_FLD(field_name, "LRU_IO_CURRENT"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_UNZIP_SUM 29 + {STRUCT_FLD(field_name, "UNCOMPRESS_TOTAL"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_STATS_UNZIP_CUR 30 + {STRUCT_FLD(field_name, "UNCOMPRESS_CURRENT"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +/*******************************************************************//** +Fill Information Schema table INNODB_BUFFER_POOL_STATS for a particular +buffer pool +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_stats_fill( +/*==================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + const buf_pool_info_t* info) /*!< in: buffer pool + information */ +{ + TABLE* table; + Field** fields; + + DBUG_ENTER("i_s_innodb_stats_fill"); + + table = tables->table; + + fields = table->field; + + OK(fields[IDX_BUF_STATS_POOL_SIZE]->store(info->pool_size)); + + OK(fields[IDX_BUF_STATS_LRU_LEN]->store(info->lru_len)); + + OK(fields[IDX_BUF_STATS_OLD_LRU_LEN]->store(info->old_lru_len)); + + OK(fields[IDX_BUF_STATS_FREE_BUFFERS]->store(info->free_list_len)); + + OK(fields[IDX_BUF_STATS_FLUSH_LIST_LEN]->store( + info->flush_list_len)); + + OK(fields[IDX_BUF_STATS_PENDING_ZIP]->store(info->n_pend_unzip)); + + OK(fields[IDX_BUF_STATS_PENDING_READ]->store(info->n_pend_reads)); + + OK(fields[IDX_BUF_STATS_FLUSH_LRU]->store(info->n_pending_flush_lru)); + + OK(fields[IDX_BUF_STATS_FLUSH_LIST]->store(info->n_pending_flush_list)); + + OK(fields[IDX_BUF_STATS_PAGE_YOUNG]->store(info->n_pages_made_young)); + + OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG]->store( + info->n_pages_not_made_young)); + + OK(fields[IDX_BUF_STATS_PAGE_YOUNG_RATE]->store( + info->page_made_young_rate)); + + OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE]->store( + info->page_not_made_young_rate)); + + OK(fields[IDX_BUF_STATS_PAGE_READ]->store(info->n_pages_read)); + + OK(fields[IDX_BUF_STATS_PAGE_CREATED]->store(info->n_pages_created)); + + OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store(info->n_pages_written)); + + OK(fields[IDX_BUF_STATS_GET]->store(info->n_page_gets)); + + OK(fields[IDX_BUF_STATS_PAGE_READ_RATE]->store(info->pages_read_rate)); + + OK(fields[IDX_BUF_STATS_PAGE_CREATE_RATE]->store(info->pages_created_rate)); + + OK(fields[IDX_BUF_STATS_PAGE_WRITTEN_RATE]->store(info->pages_written_rate)); + + if (info->n_page_get_delta) { + OK(fields[IDX_BUF_STATS_HIT_RATE]->store( + 1000 - (1000 * info->page_read_delta + / info->n_page_get_delta))); + + OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store( + 1000 * info->young_making_delta + / info->n_page_get_delta)); + + OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store( + 1000 * info->not_young_making_delta + / info->n_page_get_delta)); + } else { + OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0)); + OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(0)); + OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(0)); + } + + OK(fields[IDX_BUF_STATS_READ_AHREAD]->store(info->n_ra_pages_read)); + + OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICTED]->store( + info->n_ra_pages_evicted)); + + OK(fields[IDX_BUF_STATS_READ_AHEAD_RATE]->store( + info->pages_readahead_rate)); + + OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICT_RATE]->store( + info->pages_evicted_rate)); + + OK(fields[IDX_BUF_STATS_LRU_IO_SUM]->store(info->io_sum)); + + OK(fields[IDX_BUF_STATS_LRU_IO_CUR]->store(info->io_cur)); + + OK(fields[IDX_BUF_STATS_UNZIP_SUM]->store(info->unzip_sum)); + + OK(fields[IDX_BUF_STATS_UNZIP_CUR]->store( info->unzip_cur)); + + DBUG_RETURN(schema_table_store_record(thd, table)); +} + +/*******************************************************************//** +This is the function that loops through each buffer pool and fetch buffer +pool stats to information schema table: I_S_INNODB_BUFFER_POOL_STATS +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_buffer_stats_fill_table( +/*===============================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (ignored) */ +{ + int status = 0; + buf_pool_info_t* pool_info; + + DBUG_ENTER("i_s_innodb_buffer_fill_general"); + + /* Only allow the PROCESS privilege holder to access the stats */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + pool_info = (buf_pool_info_t*) mem_zalloc(sizeof *pool_info); + + /* Fetch individual buffer pool info */ + buf_stats_get_pool_info(pool_info); + status = i_s_innodb_stats_fill(thd, tables, pool_info); + + mem_free(pool_info); + + DBUG_RETURN(status); +} + +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_POOL_STATS. +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_buffer_pool_stats_init( +/*==============================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("i_s_innodb_buffer_pool_stats_init"); + + schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p); + + schema->fields_info = i_s_innodb_buffer_stats_fields_info; + schema->fill_table = i_s_innodb_buffer_stats_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_stats = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_BUFFER_POOL_STATS"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, plugin_author), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "InnoDB Buffer Pool Statistics Information "), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, i_s_innodb_buffer_pool_stats_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + /* reserved for dependency checking */ + /* void* */ + STRUCT_FLD(__reserved1, NULL), +}; + +/* Fields of the dynamic table INNODB_BUFFER_POOL_PAGE. */ +static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] = +{ +#define IDX_BUFFER_BLOCK_ID 0 + {STRUCT_FLD(field_name, "BLOCK_ID"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_SPACE 1 + {STRUCT_FLD(field_name, "SPACE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_NUM 2 + {STRUCT_FLD(field_name, "PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_TYPE 3 + {STRUCT_FLD(field_name, "PAGE_TYPE"), + STRUCT_FLD(field_length, 64), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_FLUSH_TYPE 4 + {STRUCT_FLD(field_name, "FLUSH_TYPE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_FIX_COUNT 5 + {STRUCT_FLD(field_name, "FIX_COUNT"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_HASHED 6 + {STRUCT_FLD(field_name, "IS_HASHED"), + STRUCT_FLD(field_length, 3), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_NEWEST_MOD 7 + {STRUCT_FLD(field_name, "NEWEST_MODIFICATION"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_OLDEST_MOD 8 + {STRUCT_FLD(field_name, "OLDEST_MODIFICATION"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_ACCESS_TIME 9 + {STRUCT_FLD(field_name, "ACCESS_TIME"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_TABLE_NAME 10 + {STRUCT_FLD(field_name, "TABLE_NAME"), + STRUCT_FLD(field_length, 1024), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_INDEX_NAME 11 + {STRUCT_FLD(field_name, "INDEX_NAME"), + STRUCT_FLD(field_length, 1024), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_NUM_RECS 12 + {STRUCT_FLD(field_name, "NUMBER_RECORDS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_DATA_SIZE 13 + {STRUCT_FLD(field_name, "DATA_SIZE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_ZIP_SIZE 14 + {STRUCT_FLD(field_name, "COMPRESSED_SIZE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_STATE 15 + {STRUCT_FLD(field_name, "PAGE_STATE"), + STRUCT_FLD(field_length, 64), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_IO_FIX 16 + {STRUCT_FLD(field_name, "IO_FIX"), + STRUCT_FLD(field_length, 64), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_IS_OLD 17 + {STRUCT_FLD(field_name, "IS_OLD"), + STRUCT_FLD(field_length, 3), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUFFER_PAGE_FREE_CLOCK 18 + {STRUCT_FLD(field_name, "FREE_PAGE_CLOCK"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +/*******************************************************************//** +Fill Information Schema table INNODB_BUFFER_PAGE with information +cached in the buf_page_info_t array +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_buffer_page_fill( +/*========================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + const buf_page_info_t* info_array, /*!< in: array cached page + info */ + ulint num_page, /*!< in: number of page info + cached */ + mem_heap_t* heap) /*!< in: temp heap memory */ +{ + TABLE* table; + Field** fields; + + DBUG_ENTER("i_s_innodb_buffer_page_fill"); + + table = tables->table; + + fields = table->field; + + /* Iterate through the cached array and fill the I_S table rows */ + for (ulint i = 0; i < num_page; i++) { + const buf_page_info_t* page_info; + const char* table_name; + const char* index_name; + const char* state_str; + enum buf_page_state state; + + page_info = info_array + i; + + table_name = NULL; + index_name = NULL; + state_str = NULL; + + OK(fields[IDX_BUFFER_BLOCK_ID]->store(page_info->block_id)); + + OK(fields[IDX_BUFFER_PAGE_SPACE]->store(page_info->space_id)); + + OK(fields[IDX_BUFFER_PAGE_NUM]->store(page_info->page_num)); + + OK(field_store_string( + fields[IDX_BUFFER_PAGE_TYPE], + i_s_page_type[page_info->page_type].type_str)); + + OK(fields[IDX_BUFFER_PAGE_FLUSH_TYPE]->store( + page_info->flush_type)); + + OK(fields[IDX_BUFFER_PAGE_FIX_COUNT]->store( + page_info->fix_count)); + + if (page_info->hashed) { + OK(field_store_string( + fields[IDX_BUFFER_PAGE_HASHED], "YES")); + } else { + OK(field_store_string( + fields[IDX_BUFFER_PAGE_HASHED], "NO")); + } + + OK(fields[IDX_BUFFER_PAGE_NEWEST_MOD]->store( + (longlong) page_info->newest_mod, true)); + + OK(fields[IDX_BUFFER_PAGE_OLDEST_MOD]->store( + (longlong) page_info->oldest_mod, true)); + + OK(fields[IDX_BUFFER_PAGE_ACCESS_TIME]->store( + page_info->access_time)); + + /* If this is an index page, fetch the index name + and table name */ + if (page_info->page_type == I_S_PAGE_TYPE_INDEX) { + const dict_index_t* index; + + mutex_enter(&dict_sys->mutex); + index = dict_index_get_if_in_cache_low( + page_info->index_id); + + /* Copy the index/table name under mutex. We + do not want to hold the InnoDB mutex while + filling the IS table */ + if (index) { + const char* name_ptr = index->name; + + if (name_ptr[0] == TEMP_INDEX_PREFIX) { + name_ptr++; + } + + index_name = mem_heap_strdup(heap, name_ptr); + + table_name = mem_heap_strdup(heap, + index->table_name); + + } + + mutex_exit(&dict_sys->mutex); + } + + OK(field_store_string( + fields[IDX_BUFFER_PAGE_TABLE_NAME], table_name)); + + OK(field_store_string( + fields[IDX_BUFFER_PAGE_INDEX_NAME], index_name)); + + OK(fields[IDX_BUFFER_PAGE_NUM_RECS]->store( + page_info->num_recs)); + + OK(fields[IDX_BUFFER_PAGE_DATA_SIZE]->store( + page_info->data_size)); + + OK(fields[IDX_BUFFER_PAGE_ZIP_SIZE]->store( + page_info->zip_ssize + ? (PAGE_ZIP_MIN_SIZE >> 1) << page_info->zip_ssize + : 0)); + +#if BUF_PAGE_STATE_BITS > 3 +# error "BUF_PAGE_STATE_BITS > 3, please ensure that all 1<<BUF_PAGE_STATE_BITS values are checked for" +#endif + state = static_cast<enum buf_page_state>(page_info->page_state); + + switch (state) { + /* First three states are for compression pages and + are not states we would get as we scan pages through + buffer blocks */ + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + state_str = NULL; + break; + case BUF_BLOCK_NOT_USED: + state_str = "NOT_USED"; + break; + case BUF_BLOCK_READY_FOR_USE: + state_str = "READY_FOR_USE"; + break; + case BUF_BLOCK_FILE_PAGE: + state_str = "FILE_PAGE"; + break; + case BUF_BLOCK_MEMORY: + state_str = "MEMORY"; + break; + case BUF_BLOCK_REMOVE_HASH: + state_str = "REMOVE_HASH"; + break; + }; + + OK(field_store_string(fields[IDX_BUFFER_PAGE_STATE], + state_str)); + + switch (page_info->io_fix) { + case BUF_IO_NONE: + OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], + "IO_NONE")); + break; + case BUF_IO_READ: + OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], + "IO_READ")); + break; + case BUF_IO_WRITE: + OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], + "IO_WRITE")); + break; + } + + OK(field_store_string(fields[IDX_BUFFER_PAGE_IS_OLD], + (page_info->is_old) ? "YES" : "NO")); + + OK(fields[IDX_BUFFER_PAGE_FREE_CLOCK]->store( + page_info->freed_page_clock)); + + if (schema_table_store_record(thd, table)) { + DBUG_RETURN(1); + } + } + + DBUG_RETURN(0); +} + +/*******************************************************************//** +Set appropriate page type to a buf_page_info_t structure */ +static +void +i_s_innodb_set_page_type( +/*=====================*/ + buf_page_info_t*page_info, /*!< in/out: structure to fill with + scanned info */ + ulint page_type, /*!< in: page type */ + const byte* frame) /*!< in: buffer frame */ +{ + if (page_type == FIL_PAGE_INDEX) { + const page_t* page = (const page_t*) frame; + + /* FIL_PAGE_INDEX is a bit special, its value + is defined as 17855, so we cannot use FIL_PAGE_INDEX + to index into i_s_page_type[] array, its array index + in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX + (1) */ + page_info->page_type = I_S_PAGE_TYPE_INDEX; + + page_info->index_id = btr_page_get_index_id(page); + + page_info->data_size = (ulint)(page_header_get_field( + page, PAGE_HEAP_TOP) - (page_is_comp(page) + ? PAGE_NEW_SUPREMUM_END + : PAGE_OLD_SUPREMUM_END) + - page_header_get_field(page, PAGE_GARBAGE)); + + page_info->num_recs = page_get_n_recs(page); + } else if (page_type >= I_S_PAGE_TYPE_UNKNOWN) { + /* Encountered an unknown page type */ + page_info->page_type = I_S_PAGE_TYPE_UNKNOWN; + } else { + /* Make sure we get the right index into the + i_s_page_type[] array */ + ut_a(page_type == i_s_page_type[page_type].type_value); + + page_info->page_type = page_type; + } + + if (page_info->page_type == FIL_PAGE_TYPE_ZBLOB + || page_info->page_type == FIL_PAGE_TYPE_ZBLOB2) { + page_info->page_num = mach_read_from_4( + frame + FIL_PAGE_OFFSET); + page_info->space_id = mach_read_from_4( + frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + } +} + +/*******************************************************************//** +Scans pages in the buffer cache, and collect their general information +into the buf_page_info_t array which is zero-filled. So any fields +that are not initialized in the function will default to 0 */ +static +void +i_s_innodb_buffer_page_get_info( +/*============================*/ + const buf_page_t*bpage, /*!< in: buffer pool page to scan */ + ulint pos, /*!< in: buffer block position in + buffer pool or in the LRU list */ + buf_page_info_t*page_info) /*!< in: zero filled info structure; + out: structure filled with scanned + info */ +{ + page_info->block_id = pos; + + page_info->page_state = buf_page_get_state(bpage); + + /* Only fetch information for buffers that map to a tablespace, + that is, buffer page with state BUF_BLOCK_ZIP_PAGE, + BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_FILE_PAGE */ + if (buf_page_in_file(bpage)) { + const byte* frame; + ulint page_type; + + page_info->space_id = buf_page_get_space(bpage); + + page_info->page_num = buf_page_get_page_no(bpage); + + page_info->flush_type = bpage->flush_type; + + page_info->fix_count = bpage->buf_fix_count; + + page_info->newest_mod = bpage->newest_modification; + + page_info->oldest_mod = bpage->oldest_modification; + + page_info->access_time = bpage->access_time; + + page_info->zip_ssize = bpage->zip.ssize; + + page_info->io_fix = bpage->io_fix; + + page_info->is_old = bpage->old; + + page_info->freed_page_clock = bpage->freed_page_clock; + + if (page_info->page_state == BUF_BLOCK_FILE_PAGE) { + const buf_block_t*block; + + block = reinterpret_cast<const buf_block_t*>(bpage); + frame = block->frame; + page_info->hashed = (block->index != NULL); + } else { + ut_ad(page_info->zip_ssize); + frame = bpage->zip.data; + } + + page_type = fil_page_get_type(frame); + + i_s_innodb_set_page_type(page_info, page_type, frame); + } else { + page_info->page_type = I_S_PAGE_TYPE_UNKNOWN; + } +} + +/*******************************************************************//** +This is the function that goes through each block of the buffer pool +and fetch information to information schema tables: INNODB_BUFFER_PAGE. +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_fill_buffer_pool( +/*========================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables) /*!< in/out: tables to fill */ +{ + int status = 0; + mem_heap_t* heap; + + DBUG_ENTER("i_s_innodb_fill_buffer_pool"); + + heap = mem_heap_create(10000); + + /* Go through each chunk of buffer pool. Currently, we only + have one single chunk for each buffer pool */ + for (ulint n = 0; n < buf_pool->n_chunks; n++) { + const buf_block_t* block; + ulint n_blocks; + buf_page_info_t* info_buffer; + ulint num_page; + ulint mem_size; + ulint chunk_size; + ulint num_to_process = 0; + ulint block_id = 0; + mutex_t* block_mutex; + + /* Get buffer block of the nth chunk */ + block = buf_get_nth_chunk_block(buf_pool, n, &chunk_size); + num_page = 0; + + while (chunk_size > 0) { + /* we cache maximum MAX_BUF_INFO_CACHED number of + buffer page info */ + num_to_process = ut_min(chunk_size, + MAX_BUF_INFO_CACHED); + + mem_size = num_to_process * sizeof(buf_page_info_t); + + /* For each chunk, we'll pre-allocate information + structures to cache the page information read from + the buffer pool. Doing so before obtain any mutex */ + info_buffer = (buf_page_info_t*) mem_heap_zalloc( + heap, mem_size); + + /* Obtain appropriate mutexes. Since this is diagnostic + buffer pool info printout, we are not required to + preserve the overall consistency, so we can + release mutex periodically */ + buf_pool_mutex_enter(); + + /* GO through each block in the chunk */ + for (n_blocks = num_to_process; n_blocks--; block++) { + block_mutex = buf_page_get_mutex_enter(&block->page); + i_s_innodb_buffer_page_get_info( + &block->page, block_id, + info_buffer + num_page); + mutex_exit(block_mutex); + block_id++; + num_page++; + } + + buf_pool_mutex_exit(); + + /* Fill in information schema table with information + just collected from the buffer chunk scan */ + status = i_s_innodb_buffer_page_fill( + thd, tables, info_buffer, + num_page, heap); + + /* If something goes wrong, break and return */ + if (status) { + break; + } + + mem_heap_empty(heap); + chunk_size -= num_to_process; + num_page = 0; + } + } + + mem_heap_free(heap); + + DBUG_RETURN(status); +} + +/*******************************************************************//** +Fill page information for pages in InnoDB buffer pool to the +dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_buffer_page_fill_table( +/*==============================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (ignored) */ +{ + int status = 0; + + DBUG_ENTER("i_s_innodb_buffer_page_fill_table"); + + /* deny access to user without PROCESS privilege */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + /* Fetch information from pages in this buffer pool, + and fill the corresponding I_S table */ + status = i_s_innodb_fill_buffer_pool(thd, tables); + + DBUG_RETURN(status); +} + +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE. +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_buffer_page_init( +/*========================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("i_s_innodb_buffer_page_init"); + + schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p); + + schema->fields_info = i_s_innodb_buffer_page_fields_info; + schema->fill_table = i_s_innodb_buffer_page_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_page = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_BUFFER_PAGE"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, plugin_author), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "InnoDB Buffer Page Information"), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, i_s_innodb_buffer_page_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + /* reserved for dependency checking */ + /* void* */ + STRUCT_FLD(__reserved1, NULL), +}; + +static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] = +{ +#define IDX_BUF_LRU_POS 0 + {STRUCT_FLD(field_name, "LRU_POSITION"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_SPACE 1 + {STRUCT_FLD(field_name, "SPACE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_NUM 2 + {STRUCT_FLD(field_name, "PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_TYPE 3 + {STRUCT_FLD(field_name, "PAGE_TYPE"), + STRUCT_FLD(field_length, 64), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_FLUSH_TYPE 4 + {STRUCT_FLD(field_name, "FLUSH_TYPE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_FIX_COUNT 5 + {STRUCT_FLD(field_name, "FIX_COUNT"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_HASHED 6 + {STRUCT_FLD(field_name, "IS_HASHED"), + STRUCT_FLD(field_length, 3), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_NEWEST_MOD 7 + {STRUCT_FLD(field_name, "NEWEST_MODIFICATION"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_OLDEST_MOD 8 + {STRUCT_FLD(field_name, "OLDEST_MODIFICATION"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_ACCESS_TIME 9 + {STRUCT_FLD(field_name, "ACCESS_TIME"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_TABLE_NAME 10 + {STRUCT_FLD(field_name, "TABLE_NAME"), + STRUCT_FLD(field_length, 1024), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_INDEX_NAME 11 + {STRUCT_FLD(field_name, "INDEX_NAME"), + STRUCT_FLD(field_length, 1024), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_NUM_RECS 12 + {STRUCT_FLD(field_name, "NUMBER_RECORDS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_DATA_SIZE 13 + {STRUCT_FLD(field_name, "DATA_SIZE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_ZIP_SIZE 14 + {STRUCT_FLD(field_name, "COMPRESSED_SIZE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_STATE 15 + {STRUCT_FLD(field_name, "COMPRESSED"), + STRUCT_FLD(field_length, 3), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_IO_FIX 16 + {STRUCT_FLD(field_name, "IO_FIX"), + STRUCT_FLD(field_length, 64), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_IS_OLD 17 + {STRUCT_FLD(field_name, "IS_OLD"), + STRUCT_FLD(field_length, 3), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define IDX_BUF_LRU_PAGE_FREE_CLOCK 18 + {STRUCT_FLD(field_name, "FREE_PAGE_CLOCK"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +/*******************************************************************//** +Fill Information Schema table INNODB_BUFFER_PAGE_LRU with information +cached in the buf_page_info_t array +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_buf_page_lru_fill( +/*=========================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + const buf_page_info_t* info_array, /*!< in: array cached page + info */ + ulint num_page) /*!< in: number of page info + cached */ +{ + TABLE* table; + Field** fields; + mem_heap_t* heap; + + DBUG_ENTER("i_s_innodb_buf_page_lru_fill"); + + table = tables->table; + + fields = table->field; + + heap = mem_heap_create(1000); + + /* Iterate through the cached array and fill the I_S table rows */ + for (ulint i = 0; i < num_page; i++) { + const buf_page_info_t* page_info; + const char* table_name; + const char* index_name; + const char* state_str; + enum buf_page_state state; + + table_name = NULL; + index_name = NULL; + state_str = NULL; + + page_info = info_array + i; + + OK(fields[IDX_BUF_LRU_POS]->store(page_info->block_id)); + + OK(fields[IDX_BUF_LRU_PAGE_SPACE]->store(page_info->space_id)); + + OK(fields[IDX_BUF_LRU_PAGE_NUM]->store(page_info->page_num)); + + OK(field_store_string( + fields[IDX_BUF_LRU_PAGE_TYPE], + i_s_page_type[page_info->page_type].type_str)); + + OK(fields[IDX_BUF_LRU_PAGE_FLUSH_TYPE]->store( + page_info->flush_type)); + + OK(fields[IDX_BUF_LRU_PAGE_FIX_COUNT]->store( + page_info->fix_count)); + + if (page_info->hashed) { + OK(field_store_string( + fields[IDX_BUF_LRU_PAGE_HASHED], "YES")); + } else { + OK(field_store_string( + fields[IDX_BUF_LRU_PAGE_HASHED], "NO")); + } + + OK(fields[IDX_BUF_LRU_PAGE_NEWEST_MOD]->store( + page_info->newest_mod, true)); + + OK(fields[IDX_BUF_LRU_PAGE_OLDEST_MOD]->store( + page_info->oldest_mod, true)); + + OK(fields[IDX_BUF_LRU_PAGE_ACCESS_TIME]->store( + page_info->access_time)); + + /* If this is an index page, fetch the index name + and table name */ + if (page_info->page_type == I_S_PAGE_TYPE_INDEX) { + const dict_index_t* index; + + mutex_enter(&dict_sys->mutex); + index = dict_index_get_if_in_cache_low( + page_info->index_id); + + /* Copy the index/table name under mutex. We + do not want to hold the InnoDB mutex while + filling the IS table */ + if (index) { + const char* name_ptr = index->name; + + if (name_ptr[0] == TEMP_INDEX_PREFIX) { + name_ptr++; + } + + index_name = mem_heap_strdup(heap, name_ptr); + + table_name = mem_heap_strdup(heap, + index->table_name); + } + + mutex_exit(&dict_sys->mutex); + } + + OK(field_store_string( + fields[IDX_BUF_LRU_PAGE_TABLE_NAME], table_name)); + + OK(field_store_string( + fields[IDX_BUF_LRU_PAGE_INDEX_NAME], index_name)); + OK(fields[IDX_BUF_LRU_PAGE_NUM_RECS]->store( + page_info->num_recs)); + + OK(fields[IDX_BUF_LRU_PAGE_DATA_SIZE]->store( + page_info->data_size)); + + OK(fields[IDX_BUF_LRU_PAGE_ZIP_SIZE]->store( + page_info->zip_ssize ? + 512 << page_info->zip_ssize : 0)); + + state = static_cast<enum buf_page_state>(page_info->page_state); + + switch (state) { + /* Compressed page */ + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + state_str = "YES"; + break; + /* Uncompressed page */ + case BUF_BLOCK_FILE_PAGE: + state_str = "NO"; + break; + /* We should not see following states */ + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + state_str = NULL; + break; + }; + + OK(field_store_string(fields[IDX_BUF_LRU_PAGE_STATE], + state_str)); + + switch (page_info->io_fix) { + case BUF_IO_NONE: + OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX], + "IO_NONE")); + break; + case BUF_IO_READ: + OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX], + "IO_READ")); + break; + case BUF_IO_WRITE: + OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX], + "IO_WRITE")); + break; + } + + OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IS_OLD], + (page_info->is_old) ? "YES" : "NO")); + + OK(fields[IDX_BUF_LRU_PAGE_FREE_CLOCK]->store( + page_info->freed_page_clock)); + + if (schema_table_store_record(thd, table)) { + mem_heap_free(heap); + DBUG_RETURN(1); + } + + mem_heap_empty(heap); + } + + mem_heap_free(heap); + + DBUG_RETURN(0); +} + +/*******************************************************************//** +This is the function that goes through buffer pool's LRU list +and fetch information to INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU. +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_fill_buffer_lru( +/*=======================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables) /*!< in/out: tables to fill */ +{ + int status = 0; + buf_page_info_t* info_buffer; + ulint lru_pos = 0; + const buf_page_t* bpage; + ulint lru_len; + mutex_t* block_mutex; + + DBUG_ENTER("i_s_innodb_fill_buffer_lru"); + + /* Obtain buf_pool mutex before allocate info_buffer, since + UT_LIST_GET_LEN(buf_pool->LRU) could change */ + mutex_enter(&LRU_list_mutex); + + lru_len = UT_LIST_GET_LEN(buf_pool->LRU); + + /* Print error message if malloc fail */ + info_buffer = (buf_page_info_t*) my_malloc( + lru_len * sizeof *info_buffer, MYF(MY_WME)); + + if (!info_buffer) { + status = 1; + goto exit; + } + + memset(info_buffer, 0, lru_len * sizeof *info_buffer); + + /* Walk through Pool's LRU list and print the buffer page + information */ + bpage = UT_LIST_GET_LAST(buf_pool->LRU); + + while (bpage != NULL) { + block_mutex = buf_page_get_mutex_enter(bpage); + /* Use the same function that collect buffer info for + INNODB_BUFFER_PAGE to get buffer page info */ + i_s_innodb_buffer_page_get_info(bpage, lru_pos, + (info_buffer + lru_pos)); + + bpage = UT_LIST_GET_PREV(LRU, bpage); + mutex_exit(block_mutex); + + lru_pos++; + } + + ut_ad(lru_pos == lru_len); + ut_ad(lru_pos == UT_LIST_GET_LEN(buf_pool->LRU)); + +exit: + mutex_exit(&LRU_list_mutex); + + if (info_buffer) { + status = i_s_innodb_buf_page_lru_fill( + thd, tables, info_buffer, lru_len); + + my_free(info_buffer, MYF(MY_ALLOW_ZERO_PTR)); + } + + DBUG_RETURN(status); +} + +/*******************************************************************//** +Fill page information for pages in InnoDB buffer pool to the +dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_buf_page_lru_fill_table( +/*===============================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (ignored) */ +{ + int status = 0; + + DBUG_ENTER("i_s_innodb_buf_page_lru_fill_table"); + + /* deny access to any users that do not hold PROCESS_ACL */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + /* Fetch information from pages in this buffer pool's LRU list, + and fill the corresponding I_S table */ + status = i_s_innodb_fill_buffer_lru(thd, tables); + + DBUG_RETURN(status); +} + +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU. +@return 0 on success, 1 on failure */ +static +int +i_s_innodb_buffer_page_lru_init( +/*============================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("i_s_innodb_buffer_page_lru_init"); + + schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p); + + schema->fields_info = i_s_innodb_buf_page_lru_fields_info; + schema->fill_table = i_s_innodb_buf_page_lru_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_page_lru = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_BUFFER_PAGE_LRU"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, plugin_author), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "InnoDB Buffer Page in LRU"), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, i_s_innodb_buffer_page_lru_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + /* reserved for dependency checking */ + /* void* */ + STRUCT_FLD(__reserved1, NULL), +}; diff --git a/storage/xtradb/handler/i_s.h b/storage/xtradb/handler/i_s.h index 3905fdc7b06..7585994543f 100644 --- a/storage/xtradb/handler/i_s.h +++ b/storage/xtradb/handler/i_s.h @@ -36,7 +36,6 @@ extern struct st_mysql_plugin i_s_innodb_cmp; extern struct st_mysql_plugin i_s_innodb_cmp_reset; extern struct st_mysql_plugin i_s_innodb_cmpmem; extern struct st_mysql_plugin i_s_innodb_cmpmem_reset; -extern struct st_mysql_plugin i_s_innodb_patches; extern struct st_mysql_plugin i_s_innodb_rseg; extern struct st_mysql_plugin i_s_innodb_table_stats; extern struct st_mysql_plugin i_s_innodb_index_stats; @@ -44,5 +43,9 @@ extern struct st_mysql_plugin i_s_innodb_admin_command; extern struct st_mysql_plugin i_s_innodb_sys_tables; extern struct st_mysql_plugin i_s_innodb_sys_indexes; extern struct st_mysql_plugin i_s_innodb_sys_stats; +extern struct st_mysql_plugin i_s_innodb_changed_pages; +extern struct st_mysql_plugin i_s_innodb_buffer_page; +extern struct st_mysql_plugin i_s_innodb_buffer_page_lru; +extern struct st_mysql_plugin i_s_innodb_buffer_stats; #endif /* i_s_h */ diff --git a/storage/xtradb/handler/innodb_patch_info.h b/storage/xtradb/handler/innodb_patch_info.h deleted file mode 100644 index 38b97411340..00000000000 --- a/storage/xtradb/handler/innodb_patch_info.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (C) 2002-2006 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#ifdef USE_PRAGMA_INTERFACE -#pragma interface /* gcc class implementation */ -#endif - -struct innodb_enhancement { - const char *file; - const char *name; - const char *comment; - const char *link; -}innodb_enhancements[] = { -{"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_expand_undo_slots","expandable maximum number of undo slots","from 1024 (default) to about 4000","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_extra_rseg","allow to create extra rollback segments","When create new db, the new parameter allows to create more rollback segments","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"}, -{"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""}, -{"innodb_expand_import","convert .ibd file automatically when import tablespace","the files are generated by xtrabackup export mode.","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_stats","Additional features about InnoDB statistics/optimizer","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_recovery_patches","Bugfixes and adjustments about recovery process","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_purge_thread","Enable to use purge devoted thread","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_admin_command_base","XtraDB specific command interface through i_s","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_show_lock_name","Show mutex/lock name instead of crated file/line","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_extend_slow","Extended statistics in slow.log","It is InnoDB-part only. It needs to patch also to mysqld.","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_lru_dump_restore","Dump and restore command for content of buffer pool","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_separate_doublewrite","Add option 'innodb_doublewrite_file' to separate doublewrite dedicated tablespace","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_pass_corrupt_table","Treat tables as corrupt instead of crash, when meet corrupt blocks","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"}, -{NULL, NULL, NULL, NULL} -}; diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c index ee9b85fa38c..e47794d2db1 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.c +++ b/storage/xtradb/ibuf/ibuf0ibuf.c @@ -2759,11 +2759,19 @@ ibuf_insert_low( root = ibuf_tree_root_get(&mtr); - err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG, - cursor, - ibuf_entry, &ins_rec, - &dummy_big_rec, 0, thr, &mtr); + err = btr_cur_optimistic_insert( + BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, + cursor, ibuf_entry, &ins_rec, + &dummy_big_rec, 0, thr, &mtr); + + if (err == DB_FAIL) { + err = btr_cur_pessimistic_insert( + BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG, + cursor, ibuf_entry, &ins_rec, + &dummy_big_rec, 0, thr, &mtr); + } + if (err == DB_SUCCESS) { /* Update the page max trx id field */ page_update_max_trx_id(btr_cur_get_block(cursor), NULL, diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index e3039fe520c..6566f8fa9e4 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -103,6 +103,81 @@ enum buf_page_state { before putting to the free list */ }; +/** This structure defines information we will fetch from each buffer pool. It +will be used to print table IO stats */ +struct buf_pool_info_struct{ + /* General buffer pool info */ + ulint pool_size; /*!< Buffer Pool size in pages */ + ulint lru_len; /*!< Length of buf_pool->LRU */ + ulint old_lru_len; /*!< buf_pool->LRU_old_len */ + ulint free_list_len; /*!< Length of buf_pool->free list */ + ulint flush_list_len; /*!< Length of buf_pool->flush_list */ + ulint n_pend_unzip; /*!< buf_pool->n_pend_unzip, pages + pending decompress */ + ulint n_pend_reads; /*!< buf_pool->n_pend_reads, pages + pending read */ + ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */ + ulint n_pending_flush_single_page;/*!< Pages pending to be + flushed as part of single page + flushes issued by various user + threads */ + ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH + LIST */ + ulint n_pages_made_young; /*!< number of pages made young */ + ulint n_pages_not_made_young; /*!< number of pages not made young */ + ulint n_pages_read; /*!< buf_pool->n_pages_read */ + ulint n_pages_created; /*!< buf_pool->n_pages_created */ + ulint n_pages_written; /*!< buf_pool->n_pages_written */ + ulint n_page_gets; /*!< buf_pool->n_page_gets */ + ulint n_ra_pages_read_rnd; /*!< buf_pool->n_ra_pages_read_rnd, + number of pages readahead */ + ulint n_ra_pages_read; /*!< buf_pool->n_ra_pages_read, number + of pages readahead */ + ulint n_ra_pages_evicted; /*!< buf_pool->n_ra_pages_evicted, + number of readahead pages evicted + without access */ + ulint n_page_get_delta; /*!< num of buffer pool page gets since + last printout */ + + /* Buffer pool access stats */ + double page_made_young_rate; /*!< page made young rate in pages + per second */ + double page_not_made_young_rate;/*!< page not made young rate + in pages per second */ + double pages_read_rate; /*!< num of pages read per second */ + double pages_created_rate; /*!< num of pages create per second */ + double pages_written_rate; /*!< num of pages written per second */ + ulint page_read_delta; /*!< num of pages read since last + printout */ + ulint young_making_delta; /*!< num of pages made young since + last printout */ + ulint not_young_making_delta; /*!< num of pages not make young since + last printout */ + + /* Statistics about read ahead algorithm. */ + double pages_readahead_rnd_rate;/*!< random readahead rate in pages per + second */ + double pages_readahead_rate; /*!< readahead rate in pages per + second */ + double pages_evicted_rate; /*!< rate of readahead page evicted + without access, in pages per second */ + + /* Stats about LRU eviction */ + ulint unzip_lru_len; /*!< length of buf_pool->unzip_LRU + list */ + /* Counters for LRU policy */ + ulint io_sum; /*!< buf_LRU_stat_sum.io */ + ulint io_cur; /*!< buf_LRU_stat_cur.io, num of IO + for current interval */ + ulint unzip_sum; /*!< buf_LRU_stat_sum.unzip */ + ulint unzip_cur; /*!< buf_LRU_stat_cur.unzip, num + pages decompressed in current + interval */ +}; + +typedef struct buf_pool_info_struct buf_pool_info_t; + + #ifndef UNIV_HOTBACKUP /********************************************************************//** Creates the buffer pool. @@ -623,6 +698,16 @@ void buf_print_io( /*=========*/ FILE* file); /*!< in: file where to print */ +/*******************************************************************//** +Collect buffer pool stats information for a buffer pool. Also +record aggregated stats if there are more than one buffer pool +in the server */ +UNIV_INTERN +void +buf_stats_get_pool_info( +/*====================*/ + buf_pool_info_t* pool_info); /*!< in/out: buffer pool info + to fill */ /*********************************************************************//** Returns the ratio in percents of modified pages in the buffer pool / database pages in the buffer pool. @@ -1051,12 +1136,27 @@ UNIV_INTERN ulint buf_get_free_list_len(void); /*=======================*/ + +/*********************************************************************//** +Get the nth chunk's buffer block in the specified buffer pool. +@return the nth chunk's buffer block. */ +UNIV_INLINE +buf_block_t* +buf_get_nth_chunk_block( +/*====================*/ + const buf_pool_t* buf_pool, /*!< in: buffer pool instance */ + ulint n, /*!< in: nth chunk in the buffer pool */ + ulint* chunk_size); /*!< in: chunk size */ + #endif /* !UNIV_HOTBACKUP */ /** The common buffer control block structure for compressed and uncompressed frames */ +/** Number of bits used for buffer page states. */ +#define BUF_PAGE_STATE_BITS 3 + struct buf_page_struct{ /** @name General fields None of these bit-fields must be modified without holding @@ -1071,7 +1171,8 @@ struct buf_page_struct{ unsigned offset:32; /*!< page number; also protected by buf_pool_mutex. */ - unsigned state:3; /*!< state of the control block; also + unsigned state:BUF_PAGE_STATE_BITS; + /*!< state of the control block; also protected by buf_pool_mutex. State transitions from BUF_BLOCK_READY_FOR_USE to diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index 92e0edf0444..8dae4b6f4c6 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -36,6 +36,7 @@ Created 11/5/1995 Heikki Tuuri #include "buf0lru.h" #include "buf0rea.h" #include "srv0srv.h" + /********************************************************************//** Reads the freed_page_clock of a buffer block. @return freed_page_clock */ @@ -1154,4 +1155,23 @@ buf_block_dbg_add_level( sync_thread_add_level(&block->lock, level, FALSE); } #endif /* UNIV_SYNC_DEBUG */ + +/*********************************************************************//** +Get the nth chunk's buffer block in the specified buffer pool. +@return the nth chunk's buffer block. */ +UNIV_INLINE +buf_block_t* +buf_get_nth_chunk_block( +/*====================*/ + const buf_pool_t* buf_pool, /*!< in: buffer pool instance */ + ulint n, /*!< in: nth chunk in the buffer pool */ + ulint* chunk_size) /*!< in: chunk size */ +{ + const buf_chunk_t* chunk; + + chunk = buf_pool->chunks + n; + *chunk_size = chunk->size; + return(chunk->blocks); +} #endif /* !UNIV_HOTBACKUP */ + diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h index 8abebfb675c..dfce4f6a117 100644 --- a/storage/xtradb/include/buf0lru.h +++ b/storage/xtradb/include/buf0lru.h @@ -93,13 +93,12 @@ buf_LRU_insert_zip_clean( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns TRUE, it will temporarily -release buf_pool_mutex. Furthermore, the page frame will no longer be -accessible via bpage. +NOTE: This will temporarily release buf_pool_mutex. Furthermore, the +page frame will no longer be accessible via bpage. -The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and -release these two mutexes after the call. No other -buf_page_get_mutex() may be held when calling this function. +The caller must hold buf_page_get_mutex(bpage) and release this mutex +after the call. No other buf_page_get_mutex() may be held when +calling this function. @return TRUE if freed, FALSE otherwise. */ UNIV_INTERN ibool diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h index a57c5127323..c1ed6ba4f2a 100644 --- a/storage/xtradb/include/dict0boot.h +++ b/storage/xtradb/include/dict0boot.h @@ -91,6 +91,26 @@ void dict_create(void); /*=============*/ +/*****************************************************************//** +Verifies the SYS_STATS table by scanning its clustered index. This +function may only be called at InnoDB startup time. + +@return TRUE if SYS_STATS was verified successfully */ +UNIV_INTERN +ibool +dict_verify_xtradb_sys_stats(void); +/*==============================*/ + +/*****************************************************************//** +Discard the existing dictionary cache SYS_STATS information, create and +add it there anew. Does not touch the old SYS_STATS tablespace page +under the assumption that they are corrupted or overwritten for other +purposes. */ +UNIV_INTERN +void +dict_recreate_xtradb_sys_stats(void); +/*================================*/ + /* Space id and page no where the dictionary header resides */ #define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */ diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index d3300b6b8aa..656a534a0c1 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -142,6 +142,8 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */ +#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_ZBLOB2 + /*!< Last page type */ /* @} */ /** Space types @{ */ diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 152d83a191f..4ead88458a4 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -41,6 +41,9 @@ Created 12/9/1995 Heikki Tuuri #include "sync0rw.h" #endif /* !UNIV_HOTBACKUP */ +/* Type used for all log sequence number storage and arithmetics */ +typedef ib_uint64_t lsn_t; + /** Redo log buffer */ typedef struct log_struct log_t; /** Redo log group */ @@ -953,6 +956,11 @@ struct log_struct{ become signaled */ /* @} */ #endif /* UNIV_LOG_ARCHIVE */ + ib_uint64_t tracked_lsn; /*!< log tracking has advanced to this + lsn. Field accessed atomically where + 64-bit atomic ops are supported, + protected by the log sys mutex + otherwise. */ }; #ifdef UNIV_LOG_ARCHIVE diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h new file mode 100644 index 00000000000..0e0ca169f6f --- /dev/null +++ b/storage/xtradb/include/log0online.h @@ -0,0 +1,111 @@ +/***************************************************************************** + +Copyright (c) 2011-2012, Percona Inc. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/log0online.h +Online database log parsing for changed page tracking +*******************************************************/ + +#ifndef log0online_h +#define log0online_h + +#include "univ.i" +#include "os0file.h" + +/*********************************************************************//** +Initializes the online log following subsytem. */ +UNIV_INTERN +void +log_online_read_init(); +/*===================*/ + +/*********************************************************************//** +Shuts down the online log following subsystem. */ +UNIV_INTERN +void +log_online_read_shutdown(); +/*=======================*/ + +/*********************************************************************//** +Reads and parses the redo log up to last checkpoint LSN to build the changed +page bitmap which is then written to disk. */ +UNIV_INTERN +void +log_online_follow_redo_log(); +/*=========================*/ + +/** The iterator through all bits of changed pages bitmap blocks */ +struct log_bitmap_iterator_struct +{ + char in_name[FN_REFLEN]; /*!< the file name for bitmap + input */ + os_file_t in; /*!< the bitmap input file */ + ib_uint64_t in_offset; /*!< the next write position in the + bitmap output file */ + ib_uint32_t bit_offset; /*!< bit offset inside of bitmap + block*/ + ib_uint64_t start_lsn; /*!< Start lsn of the block */ + ib_uint64_t end_lsn; /*!< End lsn of the block */ + ib_uint32_t space_id; /*!< Block space id */ + ib_uint32_t first_page_id; /*!< First block page id */ + ibool changed; /*!< true if current page was changed */ + byte* page; /*!< Bitmap block */ +}; + +typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t; + +#define LOG_BITMAP_ITERATOR_START_LSN(i) \ + ((i).start_lsn) +#define LOG_BITMAP_ITERATOR_END_LSN(i) \ + ((i).end_lsn) +#define LOG_BITMAP_ITERATOR_SPACE_ID(i) \ + ((i).space_id) +#define LOG_BITMAP_ITERATOR_PAGE_NUM(i) \ + ((i).first_page_id + (i).bit_offset) +#define LOG_BITMAP_ITERATOR_PAGE_CHANGED(i) \ + ((i).changed) + +/*********************************************************************//** +Initializes log bitmap iterator. +@return TRUE if the iterator is initialized OK, FALSE otherwise. */ +UNIV_INTERN +ibool +log_online_bitmap_iterator_init( +/*============================*/ + log_bitmap_iterator_t *i); /*!<in/out: iterator */ + +/*********************************************************************//** +Releases log bitmap iterator. */ +UNIV_INTERN +void +log_online_bitmap_iterator_release( +/*===============================*/ + log_bitmap_iterator_t *i); /*!<in/out: iterator */ + +/*********************************************************************//** +Iterates through bits of saved bitmap blocks. +Sequentially reads blocks from bitmap file(s) and interates through +their bits. Ignores blocks with wrong checksum. +@return TRUE if iteration is successful, FALSE if all bits are iterated. */ +UNIV_INTERN +ibool +log_online_bitmap_iterator_next( +/*============================*/ + log_bitmap_iterator_t *i); /*!<in/out: iterator */ + +#endif diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h index 15065267250..fdffd86e4c4 100644 --- a/storage/xtradb/include/log0recv.h +++ b/storage/xtradb/include/log0recv.h @@ -32,6 +32,28 @@ Created 9/20/1997 Heikki Tuuri #include "hash0hash.h" #include "log0log.h" +/******************************************************//** +Checks the 4-byte checksum to the trailer checksum field of a log +block. We also accept a log block in the old format before +InnoDB-3.23.52 where the checksum field contains the log block number. +@return TRUE if ok, or if the log block may be in the format of InnoDB +version predating 3.23.52 */ +UNIV_INTERN +ibool +log_block_checksum_is_ok_or_old_format( +/*===================================*/ + const byte* block); /*!< in: pointer to a log block */ + +/*******************************************************//** +Calculates the new value for lsn when more data is added to the log. */ +UNIV_INTERN +ib_uint64_t +recv_calc_lsn_on_data_add( +/*======================*/ + ib_uint64_t lsn, /*!< in: old lsn */ + ib_uint64_t len); /*!< in: this many bytes of data is + added, log block headers not included */ + #ifdef UNIV_HOTBACKUP extern ibool recv_replay_file_ops; @@ -182,6 +204,21 @@ UNIV_INTERN void recv_recovery_rollback_active(void); /*===============================*/ + +/*******************************************************************//** +Tries to parse a single log record and returns its length. +@return length of the record, or 0 if the record was not complete */ +UNIV_INTERN +ulint +recv_parse_log_rec( +/*===============*/ + byte* ptr, /*!< in: pointer to a buffer */ + byte* end_ptr,/*!< in: pointer to the buffer end */ + byte* type, /*!< out: type */ + ulint* space, /*!< out: space id */ + ulint* page_no,/*!< out: page number */ + byte** body); /*!< out: log record body start */ + /*******************************************************//** Scans log from a buffer and stores new log data to the parsing buffer. Parses and hashes the log records if new data found. Unless diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 98cab5ef874..bc3a54192d5 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -463,6 +463,14 @@ os_file_set_eof( /*============*/ FILE* file); /*!< in: file to be truncated */ /***********************************************************************//** +Truncates a file at the specified position. +@return TRUE if success */ +UNIV_INTERN +ibool +os_file_set_eof_at( + os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len);/*!< in: new file length */ +/***********************************************************************//** Flushes the write buffers of a given file to the disk. @return TRUE if success */ UNIV_INTERN diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 7366e2c3402..6732fa52b29 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -287,7 +287,11 @@ Atomic compare-and-swap and increment for InnoDB. */ #if defined(HAVE_IB_GCC_ATOMIC_BUILTINS) -#define HAVE_ATOMIC_BUILTINS +# define HAVE_ATOMIC_BUILTINS + +# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64 +# define HAVE_ATOMIC_BUILTINS_64 +# endif /**********************************************************//** Returns true if swapped, ptr is pointer to target, old_val is value to @@ -326,6 +330,9 @@ amount of increment. */ # define os_atomic_increment_ulint(ptr, amount) \ os_atomic_increment(ptr, amount) +# define os_atomic_increment_uint64(ptr, amount) \ + os_atomic_increment(ptr, amount) + /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val */ @@ -334,12 +341,13 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ #elif defined(HAVE_IB_SOLARIS_ATOMICS) -#define HAVE_ATOMIC_BUILTINS +# define HAVE_ATOMIC_BUILTINS +# define HAVE_ATOMIC_BUILTINS_64 /* If not compiling with GCC or GCC doesn't support the atomic intrinsics and running on Solaris >= 10 use Solaris atomics */ -#include <atomic.h> +# include <atomic.h> /**********************************************************//** Returns true if swapped, ptr is pointer to target, old_val is value to @@ -379,6 +387,9 @@ amount of increment. */ # define os_atomic_increment_ulint(ptr, amount) \ atomic_add_long_nv(ptr, amount) +# define os_atomic_increment_uint64(ptr, amount) \ + atomic_add_64_nv(ptr, amount) + /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val */ @@ -387,7 +398,11 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ #elif defined(HAVE_WINDOWS_ATOMICS) -#define HAVE_ATOMIC_BUILTINS +# define HAVE_ATOMIC_BUILTINS + +# ifndef _WIN32 +# define HAVE_ATOMIC_BUILTINS_64 +# endif /* On Windows, use Windows atomics / interlocked */ # ifdef _WIN64 @@ -425,6 +440,11 @@ amount of increment. */ # define os_atomic_increment_ulint(ptr, amount) \ ((ulint) (win_xchg_and_add(ptr, amount) + amount)) +# define os_atomic_increment_uint64(ptr, amount) \ + ((ib_uint64_t) (InterlockedExchangeAdd64( \ + (ib_int64_t*) ptr, \ + (ib_int64_t) amount) + amount)) + /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val. InterlockedExchange() operates on LONG, and the LONG will be diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 8038178c2f3..a144b1fdb67 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -60,6 +60,14 @@ extern os_event_t srv_lock_timeout_thread_event; /* This event is set at shutdown to wakeup threads from sleep */ extern os_event_t srv_shutdown_event; +/* This event is set on checkpoint completion to wake the redo log parser +thread */ +extern os_event_t srv_checkpoint_completed_event; + +/* This event is set on the online redo log following thread exit to signal +that the (slow) shutdown may proceed */ +extern os_event_t srv_redo_log_thread_finished_event; + /* If the last data file is auto-extended, we add this many pages to it at a time */ #define SRV_AUTO_EXTEND_INCREMENT \ @@ -126,6 +134,11 @@ extern ibool srv_recovery_stats; extern ulint srv_use_purge_thread; +extern my_bool srv_track_changed_pages; + +extern +ulonglong srv_changed_pages_limit; + extern ibool srv_auto_extend_last_data_file; extern ulint srv_last_file_size_max; extern char** srv_log_group_home_dirs; @@ -213,6 +226,9 @@ extern unsigned long long srv_stats_sample_pages; extern ulong srv_stats_auto_update; extern ulint srv_stats_update_need_lock; extern ibool srv_use_sys_stats_table; +#ifdef UNIV_DEBUG +extern ulong srv_sys_stats_root_page; +#endif extern ibool srv_use_doublewrite_buf; extern ibool srv_use_checksums; @@ -284,6 +300,7 @@ extern ibool srv_print_latch_waits; extern ulint srv_activity_count; extern ulint srv_fatal_semaphore_wait_threshold; +#define SRV_SEMAPHORE_WAIT_EXTENSION 7200 extern ulint srv_dml_needed_delay; extern long long srv_kill_idle_transaction; @@ -644,6 +661,15 @@ srv_LRU_dump_restore_thread( void* arg); /*!< in: a dummy parameter required by os_thread_create */ /******************************************************************//** +A thread which follows the redo log and outputs the changed page bitmap. +@return a dummy value */ +UNIV_INTERN +os_thread_ret_t +srv_redo_log_follow_thread( +/*=======================*/ + void* arg); /*!< in: a dummy parameter required by + os_thread_create */ +/******************************************************************//** Outputs to a file the output of the InnoDB Monitor. @return FALSE if not all information printed due to failure to obtain necessary mutex */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index ee515b7ca17..0305aec6a77 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 #define INNODB_VERSION_BUGFIX 17 -#define PERCONA_INNODB_VERSION 13.0 +#define PERCONA_INNODB_VERSION 14.1 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -270,6 +270,24 @@ management to ensure correct alignment for doubles etc. */ ======================== */ +/** There are currently two InnoDB file formats which are used to group +features with similar restrictions and dependencies. Using an enum allows +switch statements to give a compiler warning when a new one is introduced. */ +enum innodb_file_formats_enum { + /** Antelope File Format: InnoDB/MySQL up to 5.1. + This format includes REDUNDANT and COMPACT row formats */ + UNIV_FORMAT_A = 0, + + /** Barracuda File Format: Introduced in InnoDB plugin for 5.1: + This format includes COMPRESSED and DYNAMIC row formats. It + includes the ability to create secondary indexes from data that + is not on the clustered index page and the ability to store more + data off the clustered index page. */ + UNIV_FORMAT_B = 1 +}; + +typedef enum innodb_file_formats_enum innodb_file_formats_t; + /* The 2-logarithm of UNIV_PAGE_SIZE: */ /* #define UNIV_PAGE_SIZE_SHIFT 14 */ #define UNIV_PAGE_SIZE_SHIFT_MAX 14 diff --git a/storage/xtradb/include/ut0rbt.h b/storage/xtradb/include/ut0rbt.h index 6fd050acfe7..100cf5f648b 100644 --- a/storage/xtradb/include/ut0rbt.h +++ b/storage/xtradb/include/ut0rbt.h @@ -110,6 +110,10 @@ struct ib_rbt_bound_struct { /* Compare a key with the node value (t is tree, k is key, n is node)*/ #define rbt_compare(t, k, n) (t->compare(k, n->value)) +/* Node size. FIXME: name might clash, but currently it does not, so for easier +maintenance do not rename it for now. */ +#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1) + /****************************************************************//** Free an instance of a red black tree */ UNIV_INTERN @@ -181,6 +185,18 @@ rbt_add_node( const void* value); /*!< in: this value is copied to the node */ /****************************************************************//** +Add a new caller-provided node to tree at the specified position. +The node must have its key fields initialized correctly. +@return added node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_add_preallocated_node( +/*======================*/ + ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: parent */ + ib_rbt_node_t* node); /*!< in: node */ + +/****************************************************************//** Return the left most data node in the tree @return left most node */ UNIV_INTERN @@ -267,6 +283,13 @@ rbt_clear( /*======*/ ib_rbt_t* tree); /*!< in: rb tree */ /****************************************************************//** +Clear the tree without deleting and freeing its nodes. */ +UNIV_INTERN +void +rbt_reset( +/*======*/ + ib_rbt_t* tree); /*!< in: rb tree */ +/****************************************************************//** Merge the node from dst into src. Return the number of nodes merged. @return no. of recs merged */ UNIV_INTERN diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index a1da38c4579..9c02113dc20 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -201,6 +201,54 @@ log_buf_pool_get_oldest_modification(void) return(lsn); } +/****************************************************************//** +Safely reads the log_sys->tracked_lsn value. Uses atomic operations +if available, otherwise this field is protected with the log system +mutex. The writer counterpart function is log_set_tracked_lsn() in +log0online.c. + +@return log_sys->tracked_lsn value. */ +UNIV_INLINE +ib_uint64_t +log_get_tracked_lsn() +{ +#ifdef HAVE_ATOMIC_BUILTINS_64 + return os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); +#else + ut_ad(mutex_own(&(log_sys->mutex))); + return log_sys->tracked_lsn; +#endif +} + +/****************************************************************//** +Checks if the log groups have a big enough margin of free space in +so that a new log entry can be written without overwriting log data +that is not read by the changed page bitmap thread. +@return TRUE if there is not enough free space. */ +static +ibool +log_check_tracking_margin( + ulint lsn_advance) /*!< in: an upper limit on how much log data we + plan to write. If zero, the margin will be + checked for the already-written log. */ +{ + ib_uint64_t tracked_lsn; + ulint tracked_lsn_age; + + if (!srv_track_changed_pages) { + return FALSE; + } + + ut_ad(mutex_own(&(log_sys->mutex))); + + tracked_lsn = log_get_tracked_lsn(); + tracked_lsn_age = log_sys->lsn - tracked_lsn; + + /* The overwrite would happen when log_sys->log_group_capacity is + exceeded, but we use max_checkpoint_age for an extra safety margin. */ + return tracked_lsn_age + lsn_advance > log_sys->max_checkpoint_age; +} + /************************************************************//** Opens the log for log_write_low. The log must be closed with log_close and released with log_release. @@ -217,9 +265,7 @@ log_reserve_and_open( ulint archived_lsn_age; ulint dummy; #endif /* UNIV_LOG_ARCHIVE */ -#ifdef UNIV_DEBUG ulint count = 0; -#endif /* UNIV_DEBUG */ ut_a(len < log->buf_size / 2); loop: @@ -247,6 +293,19 @@ loop: goto loop; } + if (log_check_tracking_margin(len_upper_limit) && (++count < 50)) { + + /* This log write would violate the untracked LSN free space + margin. Limit this to 50 retries as there might be situations + where we have no choice but to proceed anyway, i.e. if the log + is about to be overflown, log tracking or not. */ + mutex_exit(&(log->mutex)); + + os_thread_sleep(10000); + + goto loop; + } + #ifdef UNIV_LOG_ARCHIVE if (log->archiving_state != LOG_ARCH_OFF) { @@ -385,6 +444,8 @@ log_close(void) ulint first_rec_group; ib_uint64_t oldest_lsn; ib_uint64_t lsn; + ib_uint64_t tracked_lsn; + ulint tracked_lsn_age; log_t* log = log_sys; ib_uint64_t checkpoint_age; @@ -411,6 +472,19 @@ log_close(void) log->check_flush_or_checkpoint = TRUE; } + if (srv_track_changed_pages) { + + tracked_lsn = log_get_tracked_lsn(); + tracked_lsn_age = lsn - tracked_lsn; + + if (tracked_lsn_age >= log->log_group_capacity) { + + fprintf(stderr, " InnoDB: Error: the age of the " + "oldest untracked record exceeds the log " + "group capacity!\n"); + } + } + checkpoint_age = lsn - log->last_checkpoint_lsn; if (checkpoint_age >= log->log_group_capacity) { @@ -872,6 +946,8 @@ log_init(void) log_sys->archiving_on = os_event_create(NULL); #endif /* UNIV_LOG_ARCHIVE */ + log_sys->tracked_lsn = 0; + /*----------------------------*/ log_block_init(log_sys->buf, log_sys->lsn); @@ -1721,6 +1797,12 @@ log_io_complete_checkpoint(void) } mutex_exit(&(log_sys->mutex)); + + /* Wake the redo log watching thread to parse the log up to this + checkpoint. */ + if (srv_track_changed_pages) { + os_event_set(srv_checkpoint_completed_event); + } } /*******************************************************************//** @@ -3065,6 +3147,15 @@ loop: log_checkpoint_margin(); + mutex_enter(&(log_sys->mutex)); + if (log_check_tracking_margin(0)) { + + mutex_exit(&(log_sys->mutex)); + os_thread_sleep(10000); + goto loop; + } + mutex_exit(&(log_sys->mutex)); + #ifdef UNIV_LOG_ARCHIVE log_archive_margin(); #endif /* UNIV_LOG_ARCHIVE */ @@ -3093,6 +3184,7 @@ logs_empty_and_mark_files_at_shutdown(void) /*=======================================*/ { ib_uint64_t lsn; + ib_uint64_t tracked_lsn; ulint arch_log_no; if (srv_print_verbose_log) { @@ -3198,9 +3290,12 @@ loop: mutex_enter(&(log_sys->mutex)); + tracked_lsn = log_get_tracked_lsn(); + lsn = log_sys->lsn; if (lsn != log_sys->last_checkpoint_lsn + || (srv_track_changed_pages && (tracked_lsn != log_sys->last_checkpoint_lsn)) #ifdef UNIV_LOG_ARCHIVE || (srv_log_archive_on && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE) @@ -3255,6 +3350,11 @@ loop: srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; + /* Signal the log following thread to quit */ + if (srv_track_changed_pages) { + os_event_set(srv_checkpoint_completed_event); + } + /* Make some checks that the server really is quiet */ ut_a(srv_n_threads_active[SRV_MASTER] == 0); ut_a(buf_all_freed()); @@ -3274,6 +3374,10 @@ loop: fil_flush_file_spaces(FIL_TABLESPACE); + if (srv_track_changed_pages) { + os_event_wait(srv_redo_log_thread_finished_event); + } + fil_close_all_files(); /* Make some checks that the server really is quiet */ @@ -3399,6 +3503,18 @@ log_print( ((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed)); + if (srv_track_changed_pages) { + + /* The maximum tracked LSN age is equal to the maximum + checkpoint age */ + fprintf(file, + "Log tracking enabled\n" + "Log tracked up to %llu\n" + "Max tracked LSN age %lu\n", + log_get_tracked_lsn(), + log_sys->max_checkpoint_age); + } + log_sys->n_log_ios_old = log_sys->n_log_ios; log_sys->last_printout_time = current_time; diff --git a/storage/xtradb/log/log0online.c b/storage/xtradb/log/log0online.c new file mode 100644 index 00000000000..512b13fb311 --- /dev/null +++ b/storage/xtradb/log/log0online.c @@ -0,0 +1,1085 @@ +/***************************************************************************** + +Copyright (c) 2011-2012 Percona Inc. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/**************************************************//** +@file log/log0online.c +Online database log parsing for changed page tracking + +*******************************************************/ + +#include "log0online.h" + +#include "my_dbug.h" + +#include "log0recv.h" +#include "mach0data.h" +#include "mtr0log.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "trx0sys.h" +#include "ut0rbt.h" + +enum { FOLLOW_SCAN_SIZE = 4 * (UNIV_PAGE_SIZE_MAX) }; + +/** Log parsing and bitmap output data structure */ +struct log_bitmap_struct { + byte read_buf[FOLLOW_SCAN_SIZE]; + /*!< log read buffer */ + byte parse_buf[RECV_PARSING_BUF_SIZE]; + /*!< log parse buffer */ + byte* parse_buf_end; /*!< parse buffer position where the + next read log data should be copied to. + If the previous log records were fully + parsed, it points to the start, + otherwise points immediatelly past the + end of the incomplete log record. */ + char* out_name; /*!< the file name for bitmap output */ + os_file_t out; /*!< the bitmap output file */ + ib_uint64_t out_offset; /*!< the next write position in the + bitmap output file */ + ib_uint64_t start_lsn; /*!< the LSN of the next unparsed + record and the start of the next LSN + interval to be parsed. */ + ib_uint64_t end_lsn; /*!< the end of the LSN interval to be + parsed, equal to the next checkpoint + LSN at the time of parse */ + ib_uint64_t next_parse_lsn; /*!< the LSN of the next unparsed + record in the current parse */ + ib_rbt_t* modified_pages; /*!< the current modified page set, + organized as the RB-tree with the keys + of (space, 4KB-block-start-page-id) + pairs */ + ib_rbt_node_t* page_free_list; /*!< Singly-linked list of freed nodes + of modified_pages tree for later + reuse. Nodes are linked through + ib_rbt_node_t.left as this field has + both the correct type and the tree does + not mind its overwrite during + rbt_next() tree traversal. */ +}; + +/* The log parsing and bitmap output struct instance */ +static struct log_bitmap_struct* log_bmp_sys; + +/* File name stem for modified page bitmaps */ +static const char* modified_page_stem = "ib_modified_log."; + +/* On server startup with empty database srv_start_lsn == 0, in +which case the first LSN of actual log records will be this. */ +#define MIN_TRACKED_LSN ((LOG_START_LSN) + (LOG_BLOCK_HDR_SIZE)) + +/* Tests if num bit of bitmap is set */ +#define IS_BIT_SET(bitmap, num) \ + (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL))) + +/** The bitmap file block size in bytes. All writes will be multiples of this. + */ +enum { + MODIFIED_PAGE_BLOCK_SIZE = 4096 +}; + + +/** Offsets in a file bitmap block */ +enum { + MODIFIED_PAGE_IS_LAST_BLOCK = 0,/* 1 if last block in the current + write, 0 otherwise. */ + MODIFIED_PAGE_START_LSN = 4, /* The starting tracked LSN of this and + other blocks in the same write */ + MODIFIED_PAGE_END_LSN = 12, /* The ending tracked LSN of this and + other blocks in the same write */ + MODIFIED_PAGE_SPACE_ID = 20, /* The space ID of tracked pages in + this block */ + MODIFIED_PAGE_1ST_PAGE_ID = 24, /* The page ID of the first tracked + page in this block */ + MODIFIED_PAGE_BLOCK_UNUSED_1 = 28,/* Unused in order to align the start + of bitmap at 8 byte boundary */ + MODIFIED_PAGE_BLOCK_BITMAP = 32,/* Start of the bitmap itself */ + MODIFIED_PAGE_BLOCK_UNUSED_2 = MODIFIED_PAGE_BLOCK_SIZE - 8, + /* Unused in order to align the end of + bitmap at 8 byte boundary */ + MODIFIED_PAGE_BLOCK_CHECKSUM = MODIFIED_PAGE_BLOCK_SIZE - 4 + /* The checksum of the current block */ +}; + +/** Length of the bitmap data in a block in bytes */ +enum { MODIFIED_PAGE_BLOCK_BITMAP_LEN + = MODIFIED_PAGE_BLOCK_UNUSED_2 - MODIFIED_PAGE_BLOCK_BITMAP }; + +/** Length of the bitmap data in a block in page ids */ +enum { MODIFIED_PAGE_BLOCK_ID_COUNT = MODIFIED_PAGE_BLOCK_BITMAP_LEN * 8 }; + +/****************************************************************//** +Provide a comparisson function for the RB-tree tree (space, +block_start_page) pairs. Actual implementation does not matter as +long as the ordering is full. +@return -1 if p1 < p2, 0 if p1 == p2, 1 if p1 > p2 +*/ +static +int +log_online_compare_bmp_keys( +/*========================*/ + const void* p1, /*!<in: 1st key to compare */ + const void* p2) /*!<in: 2nd key to compare */ +{ + const byte *k1 = (const byte *)p1; + const byte *k2 = (const byte *)p2; + + ulint k1_space = mach_read_from_4(k1 + MODIFIED_PAGE_SPACE_ID); + ulint k2_space = mach_read_from_4(k2 + MODIFIED_PAGE_SPACE_ID); + if (k1_space == k2_space) { + ulint k1_start_page + = mach_read_from_4(k1 + MODIFIED_PAGE_1ST_PAGE_ID); + ulint k2_start_page + = mach_read_from_4(k2 + MODIFIED_PAGE_1ST_PAGE_ID); + return k1_start_page < k2_start_page + ? -1 : k1_start_page > k2_start_page ? 1 : 0; + } + return k1_space < k2_space ? -1 : 1; +} + +/****************************************************************//** +Set a bit for tracked page in the bitmap. Expand the bitmap tree as +necessary. */ +static +void +log_online_set_page_bit( +/*====================*/ + ulint space, /*!<in: log record space id */ + ulint page_no)/*!<in: log record page id */ +{ + ulint block_start_page; + ulint block_pos; + uint bit_pos; + ib_rbt_bound_t tree_search_pos; + byte search_page[MODIFIED_PAGE_BLOCK_SIZE]; + byte *page_ptr; + + ut_a(space != ULINT_UNDEFINED); + ut_a(page_no != ULINT_UNDEFINED); + + block_start_page = page_no / MODIFIED_PAGE_BLOCK_ID_COUNT + * MODIFIED_PAGE_BLOCK_ID_COUNT; + block_pos = block_start_page ? (page_no % block_start_page / 8) + : (page_no / 8); + bit_pos = page_no % 8; + + mach_write_to_4(search_page + MODIFIED_PAGE_SPACE_ID, space); + mach_write_to_4(search_page + MODIFIED_PAGE_1ST_PAGE_ID, + block_start_page); + + if (!rbt_search(log_bmp_sys->modified_pages, &tree_search_pos, + search_page)) { + page_ptr = rbt_value(byte, tree_search_pos.last); + } + else { + ib_rbt_node_t *new_node; + + if (log_bmp_sys->page_free_list) { + new_node = log_bmp_sys->page_free_list; + log_bmp_sys->page_free_list = new_node->left; + } + else { + new_node = ut_malloc(SIZEOF_NODE( + log_bmp_sys->modified_pages)); + } + memset(new_node, 0, SIZEOF_NODE(log_bmp_sys->modified_pages)); + + page_ptr = rbt_value(byte, new_node); + mach_write_to_4(page_ptr + MODIFIED_PAGE_SPACE_ID, space); + mach_write_to_4(page_ptr + MODIFIED_PAGE_1ST_PAGE_ID, + block_start_page); + + rbt_add_preallocated_node(log_bmp_sys->modified_pages, + &tree_search_pos, new_node); + } + page_ptr[MODIFIED_PAGE_BLOCK_BITMAP + block_pos] |= (1U << bit_pos); +} + +/****************************************************************//** +Calculate a bitmap block checksum. Algorithm borrowed from +log_block_calc_checksum. +@return checksum */ +UNIV_INLINE +ulint +log_online_calc_checksum( +/*=====================*/ + const byte* block) /*!<in: bitmap block */ +{ + ulint sum; + ulint sh; + ulint i; + + sum = 1; + sh = 0; + + for (i = 0; i < MODIFIED_PAGE_BLOCK_CHECKSUM; i++) { + + ulint b = block[i]; + sum &= 0x7FFFFFFFUL; + sum += b; + sum += b << sh; + sh++; + if (sh > 24) { + sh = 0; + } + } + + return sum; +} + +/****************************************************************//** +Get the last tracked fully LSN from the bitmap file by reading +backwards untile a correct end page is found. Detects incomplete +writes and corrupted data. Sets the start output position for the +written bitmap data. +@return the last fully tracked LSN */ +static +ib_uint64_t +log_online_read_last_tracked_lsn() +/*==============================*/ +{ + byte page[MODIFIED_PAGE_BLOCK_SIZE]; + ib_uint64_t read_offset = log_bmp_sys->out_offset; + /* Initialize these to nonequal values so that file size == 0 case with + zero loop repetitions is handled correctly */ + ulint checksum = 0; + ulint actual_checksum = !checksum; + ibool is_last_page = FALSE; + ib_uint64_t result; + + ut_ad(log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE == 0); + + while (checksum != actual_checksum && read_offset > 0 && !is_last_page) + { + + ulint offset_low, offset_high; + ibool success; + + read_offset -= MODIFIED_PAGE_BLOCK_SIZE; + offset_high = (ulint)(read_offset >> 32); + offset_low = (ulint)(read_offset & 0xFFFFFFFF); + + success = os_file_read(log_bmp_sys->out, page, offset_low, + offset_high, MODIFIED_PAGE_BLOCK_SIZE); + if (!success) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + /* Here and below assume that bitmap file names do not + contain apostrophes, thus no need for + ut_print_filename(). */ + fprintf(stderr, "InnoDB: Warning: failed reading " + "changed page bitmap file \'%s\'\n", + log_bmp_sys->out_name); + return MIN_TRACKED_LSN; + } + + is_last_page + = mach_read_from_4(page + MODIFIED_PAGE_IS_LAST_BLOCK); + checksum = mach_read_from_4(page + + MODIFIED_PAGE_BLOCK_CHECKSUM); + actual_checksum = log_online_calc_checksum(page); + if (checksum != actual_checksum) { + + fprintf(stderr, "InnoDB: Warning: corruption " + "detected in \'%s\' at offset %llu\n", + log_bmp_sys->out_name, read_offset); + } + + }; + + if (UNIV_LIKELY(checksum == actual_checksum && is_last_page)) { + + log_bmp_sys->out_offset = read_offset + + MODIFIED_PAGE_BLOCK_SIZE; + result = mach_read_ull(page + MODIFIED_PAGE_END_LSN); + } + else { + log_bmp_sys->out_offset = read_offset; + result = 0; + } + + /* Truncate the output file to discard the corrupted bitmap data, if + any */ + if (!os_file_set_eof_at(log_bmp_sys->out, + log_bmp_sys->out_offset)) { + fprintf(stderr, "InnoDB: Warning: failed truncating " + "changed page bitmap file \'%s\' to %llu bytes\n", + log_bmp_sys->out_name, log_bmp_sys->out_offset); + result = 0; + } + return result; +} + +/****************************************************************//** +Safely write the log_sys->tracked_lsn value. Uses atomic operations +if available, otherwise this field is protected with the log system +mutex. The reader counterpart function is log_get_tracked_lsn() in +log0log.c. */ +UNIV_INLINE +void +log_set_tracked_lsn( +/*================*/ + ib_uint64_t tracked_lsn) /*!<in: new value */ +{ +#ifdef HAVE_ATOMIC_BUILTINS_64 + /* Single writer, no data race here */ + ib_uint64_t old_value + = os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); + (void) os_atomic_increment_uint64(&log_sys->tracked_lsn, + tracked_lsn - old_value); +#else + mutex_enter(&log_sys->mutex); + log_sys->tracked_lsn = tracked_lsn; + mutex_exit(&log_sys->mutex); +#endif +} + +/****************************************************************//** +Diagnose a gap in tracked LSN range on server startup due to crash or +very fast shutdown and try to close it by tracking the data +immediatelly, if possible. */ +static +void +log_online_track_missing_on_startup( +/*================================*/ + ib_uint64_t last_tracked_lsn, /*!<in: last tracked LSN read + from the bitmap file */ + ib_uint64_t tracking_start_lsn) /*!<in: last checkpoint LSN of + the current server startup */ +{ + ut_ad(last_tracked_lsn != tracking_start_lsn); + + fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' is %llu, but " + "last checkpoint LSN is %llu. This might be due to a server " + "crash or a very fast shutdown. ", log_bmp_sys->out_name, + last_tracked_lsn, tracking_start_lsn); + + /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty + bitmap file, handle this too. */ + last_tracked_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN); + + /* See if we can fully recover the missing interval */ + if (log_sys->lsn - last_tracked_lsn < log_sys->log_group_capacity) { + + fprintf(stderr, + "Reading the log to advance the last tracked LSN.\n"); + + log_bmp_sys->start_lsn = last_tracked_lsn; + log_set_tracked_lsn(log_bmp_sys->start_lsn); + log_online_follow_redo_log(); + ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn); + + fprintf(stderr, + "InnoDB: continuing tracking changed pages from LSN " + "%llu\n", log_bmp_sys->end_lsn); + } + else { + fprintf(stderr, + "The age of last tracked LSN exceeds log capacity, " + "tracking-based incremental backups will work only " + "from the higher LSN!\n"); + + log_bmp_sys->end_lsn = log_bmp_sys->start_lsn + = tracking_start_lsn; + log_set_tracked_lsn(log_bmp_sys->start_lsn); + + fprintf(stderr, + "InnoDB: starting tracking changed pages from LSN " + "%llu\n", log_bmp_sys->end_lsn); + } +} + +/*********************************************************************//** +Initialize the online log following subsytem. */ +UNIV_INTERN +void +log_online_read_init() +/*==================*/ +{ + char buf[FN_REFLEN]; + ibool success; + ib_uint64_t tracking_start_lsn + = ut_max(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN); + + /* Assert (could be compile-time assert) that bitmap data start and end + in a bitmap block is 8-byte aligned */ + ut_a(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0); + ut_a(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0); + + log_bmp_sys = ut_malloc(sizeof(*log_bmp_sys)); + + ut_snprintf(buf, FN_REFLEN, "%s%s%d", srv_data_home, + modified_page_stem, 1); + log_bmp_sys->out_name = ut_malloc(strlen(buf) + 1); + ut_strcpy(log_bmp_sys->out_name, buf); + + log_bmp_sys->modified_pages = rbt_create(MODIFIED_PAGE_BLOCK_SIZE, + log_online_compare_bmp_keys); + log_bmp_sys->page_free_list = NULL; + + log_bmp_sys->out + = os_file_create_simple_no_error_handling + (log_bmp_sys->out_name, OS_FILE_OPEN, OS_FILE_READ_WRITE, + &success); + + if (!success) { + + /* New file, tracking from scratch */ + log_bmp_sys->out + = os_file_create_simple_no_error_handling + (log_bmp_sys->out_name, OS_FILE_CREATE, + OS_FILE_READ_WRITE, &success); + if (!success) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Error: Cannot create \'%s\'\n", + log_bmp_sys->out_name); + exit(1); + } + + log_bmp_sys->out_offset = 0; + } + else { + + /* Old file, read last tracked LSN and continue from there */ + ulint size_low; + ulint size_high; + ib_uint64_t last_tracked_lsn; + + success = os_file_get_size(log_bmp_sys->out, &size_low, + &size_high); + ut_a(success); + + log_bmp_sys->out_offset + = ((ib_uint64_t)size_high << 32) | size_low; + + if (log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE != 0) { + + fprintf(stderr, + "InnoDB: Warning: truncated block detected " + "in \'%s\' at offset %llu\n", + log_bmp_sys->out_name, + log_bmp_sys->out_offset); + log_bmp_sys->out_offset -= + log_bmp_sys->out_offset + % MODIFIED_PAGE_BLOCK_SIZE; + } + + last_tracked_lsn = log_online_read_last_tracked_lsn(); + + if (last_tracked_lsn < tracking_start_lsn) { + + log_online_track_missing_on_startup(last_tracked_lsn, + tracking_start_lsn); + return; + } + + if (last_tracked_lsn > tracking_start_lsn) { + + fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' " + "is %llu, but last checkpoint LSN is %llu. " + "The tracking-based incremental backups will " + "work only from the latter LSN!\n", + log_bmp_sys->out_name, last_tracked_lsn, + tracking_start_lsn); + } + + } + + fprintf(stderr, "InnoDB: starting tracking changed pages from " + "LSN %llu\n", tracking_start_lsn); + log_bmp_sys->start_lsn = tracking_start_lsn; + log_set_tracked_lsn(tracking_start_lsn); +} + +/*********************************************************************//** +Shut down the online log following subsystem. */ +UNIV_INTERN +void +log_online_read_shutdown() +/*======================*/ +{ + ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list; + + os_file_close(log_bmp_sys->out); + + rbt_free(log_bmp_sys->modified_pages); + + while (free_list_node) { + ib_rbt_node_t *next = free_list_node->left; + ut_free(free_list_node); + free_list_node = next; + } + + ut_free(log_bmp_sys->out_name); + ut_free(log_bmp_sys); +} + +/*********************************************************************//** +For the given minilog record type determine if the record has (space; page) +associated with it. +@return TRUE if the record has (space; page) in it */ +static +ibool +log_online_rec_has_page( +/*====================*/ + byte type) /*!<in: the minilog record type */ +{ + return type != MLOG_MULTI_REC_END && type != MLOG_DUMMY_RECORD; +} + +/*********************************************************************//** +Check if a page field for a given log record type actually contains a page +id. It does not for file operations and MLOG_LSN. +@return TRUE if page field contains actual page id, FALSE otherwise */ +static +ibool +log_online_rec_page_means_page( +/*===========================*/ + byte type) /*!<in: log record type */ +{ + return log_online_rec_has_page(type) +#ifdef UNIV_LOG_LSN_DEBUG + && type != MLOG_LSN +#endif + && type != MLOG_FILE_CREATE + && type != MLOG_FILE_RENAME + && type != MLOG_FILE_DELETE + && type != MLOG_FILE_CREATE2; +} + +/*********************************************************************//** +Parse the log data in the parse buffer for the (space, page) pairs and add +them to the modified page set as necessary. Removes the fully-parsed records +from the buffer. If an incomplete record is found, moves it to the end of the +buffer. */ +static +void +log_online_parse_redo_log() +/*=======================*/ +{ + byte *ptr = log_bmp_sys->parse_buf; + byte *end = log_bmp_sys->parse_buf_end; + + ulint len = 0; + + while (ptr != end + && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) { + + byte type; + ulint space; + ulint page_no; + byte* body; + + /* recv_sys is not initialized, so on corrupt log we will + SIGSEGV. But the log of a live database should not be + corrupt. */ + len = recv_parse_log_rec(ptr, end, &type, &space, &page_no, + &body); + if (len > 0) { + + if (log_online_rec_page_means_page(type) + && (space != TRX_DOUBLEWRITE_SPACE)) { + + ut_a(len >= 3); + log_online_set_page_bit(space, page_no); + } + + ptr += len; + ut_ad(ptr <= end); + log_bmp_sys->next_parse_lsn + = recv_calc_lsn_on_data_add + (log_bmp_sys->next_parse_lsn, len); + } + else { + + /* Incomplete log record. Shift it to the + beginning of the parse buffer and leave it to be + completed on the next read. */ + ut_memmove(log_bmp_sys->parse_buf, ptr, end - ptr); + log_bmp_sys->parse_buf_end + = log_bmp_sys->parse_buf + (end - ptr); + ptr = end; + } + } + + if (len > 0) { + + log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf; + } +} + +/*********************************************************************//** +Check the log block checksum. +@return TRUE if the log block checksum is OK, FALSE otherwise. */ +static +ibool +log_online_is_valid_log_seg( +/*========================*/ + const byte* log_block) /*!< in: read log data */ +{ + ibool checksum_is_ok + = log_block_checksum_is_ok_or_old_format(log_block); + + if (!checksum_is_ok) { + + fprintf(stderr, + "InnoDB Error: log block checksum mismatch" + "expected %lu, calculated checksum %lu\n", + (ulong) log_block_get_checksum(log_block), + (ulong) log_block_calc_checksum(log_block)); + } + + return checksum_is_ok; +} + +/*********************************************************************//** +Copy new log data to the parse buffer while skipping log block header, +trailer and already parsed data. */ +static +void +log_online_add_to_parse_buf( +/*========================*/ + const byte* log_block, /*!< in: read log data */ + ulint data_len, /*!< in: length of read log data */ + ulint skip_len) /*!< in: how much of log data to + skip */ +{ + ulint start_offset = skip_len ? skip_len : LOG_BLOCK_HDR_SIZE; + ulint end_offset + = (data_len == OS_FILE_LOG_BLOCK_SIZE) + ? data_len - LOG_BLOCK_TRL_SIZE + : data_len; + ulint actual_data_len = (end_offset >= start_offset) + ? end_offset - start_offset : 0; + + ut_memcpy(log_bmp_sys->parse_buf_end, log_block + start_offset, + actual_data_len); + + log_bmp_sys->parse_buf_end += actual_data_len; + + ut_a(log_bmp_sys->parse_buf_end - log_bmp_sys->parse_buf + <= RECV_PARSING_BUF_SIZE); +} + +/*********************************************************************//** +Parse the log block: first copies the read log data to the parse buffer while +skipping log block header, trailer and already parsed data. Then it actually +parses the log to add to the modified page bitmap. */ +static +void +log_online_parse_redo_log_block( +/*============================*/ + const byte* log_block, /*!< in: read log data */ + ulint skip_already_parsed_len) /*!< in: how many bytes of + log data should be skipped as + they were parsed before */ +{ + ulint block_data_len; + + block_data_len = log_block_get_data_len(log_block); + + ut_ad(block_data_len % OS_FILE_LOG_BLOCK_SIZE == 0 + || block_data_len < OS_FILE_LOG_BLOCK_SIZE); + + log_online_add_to_parse_buf(log_block, block_data_len, + skip_already_parsed_len); + log_online_parse_redo_log(); +} + +/*********************************************************************//** +Read and parse one redo log chunk and updates the modified page bitmap. */ +static +void +log_online_follow_log_seg( +/*======================*/ + log_group_t* group, /*!< in: the log group to use */ + ib_uint64_t block_start_lsn, /*!< in: the LSN to read from */ + ib_uint64_t block_end_lsn) /*!< in: the LSN to read to */ +{ + /* Pointer to the current OS_FILE_LOG_BLOCK-sized chunk of the read log + data to parse */ + byte* log_block = log_bmp_sys->read_buf; + byte* log_block_end = log_bmp_sys->read_buf + + (block_end_lsn - block_start_lsn); + + mutex_enter(&log_sys->mutex); + log_group_read_log_seg(LOG_RECOVER, log_bmp_sys->read_buf, + group, block_start_lsn, block_end_lsn); + mutex_exit(&log_sys->mutex); + + while (log_block < log_block_end + && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) { + + /* How many bytes of log data should we skip in the current log + block. Skipping is necessary because we round down the next + parse LSN thus it is possible to read the already-processed log + data many times */ + ulint skip_already_parsed_len = 0; + + if (!log_online_is_valid_log_seg(log_block)) { + break; + } + + if ((block_start_lsn <= log_bmp_sys->next_parse_lsn) + && (block_start_lsn + OS_FILE_LOG_BLOCK_SIZE + > log_bmp_sys->next_parse_lsn)) { + + /* The next parse LSN is inside the current block, skip + data preceding it. */ + skip_already_parsed_len + = log_bmp_sys->next_parse_lsn + - block_start_lsn; + } + else { + + /* If the next parse LSN is not inside the current + block, then the only option is that we have processed + ahead already. */ + ut_a(block_start_lsn > log_bmp_sys->next_parse_lsn); + } + + /* TODO: merge the copying to the parse buf code with + skip_already_len calculations */ + log_online_parse_redo_log_block(log_block, + skip_already_parsed_len); + + log_block += OS_FILE_LOG_BLOCK_SIZE; + block_start_lsn += OS_FILE_LOG_BLOCK_SIZE; + } + + return; +} + +/*********************************************************************//** +Read and parse the redo log in a given group in FOLLOW_SCAN_SIZE-sized +chunks and updates the modified page bitmap. */ +static +void +log_online_follow_log_group( +/*========================*/ + log_group_t* group, /*!< in: the log group to use */ + ib_uint64_t contiguous_lsn) /*!< in: the LSN of log block start + containing the log_parse_start_lsn */ +{ + ib_uint64_t block_start_lsn = contiguous_lsn; + ib_uint64_t block_end_lsn; + + log_bmp_sys->next_parse_lsn = log_bmp_sys->start_lsn; + log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf; + + do { + block_end_lsn = block_start_lsn + FOLLOW_SCAN_SIZE; + + log_online_follow_log_seg(group, block_start_lsn, + block_end_lsn); + + /* Next parse LSN can become higher than the last read LSN + only in the case when the read LSN falls right on the block + boundary, in which case next parse lsn is bumped to the actual + data LSN on the next (not yet read) block. This assert is + slightly conservative. */ + ut_a(log_bmp_sys->next_parse_lsn + <= block_end_lsn + LOG_BLOCK_HDR_SIZE + + LOG_BLOCK_TRL_SIZE); + + block_start_lsn = block_end_lsn; + } while (block_end_lsn < log_bmp_sys->end_lsn); + + /* Assert that the last read log record is a full one */ + ut_a(log_bmp_sys->parse_buf_end == log_bmp_sys->parse_buf); +} + +/*********************************************************************//** +Write, flush one bitmap block to disk and advance the output position if +successful. */ +static +void +log_online_write_bitmap_page( +/*=========================*/ + const byte *block) /*!< in: block to write */ +{ + ibool success; + + success = os_file_write(log_bmp_sys->out_name,log_bmp_sys->out, + block, + (ulint)(log_bmp_sys->out_offset & 0xFFFFFFFF), + (ulint)(log_bmp_sys->out_offset << 32), + MODIFIED_PAGE_BLOCK_SIZE); + if (UNIV_UNLIKELY(!success)) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, "InnoDB: Error: failed writing changed page " + "bitmap file \'%s\'\n", log_bmp_sys->out_name); + return; + } + + success = os_file_flush(log_bmp_sys->out, FALSE); + if (UNIV_UNLIKELY(!success)) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, "InnoDB: Error: failed flushing " + "changed page bitmap file \'%s\'\n", + log_bmp_sys->out_name); + return; + } + + log_bmp_sys->out_offset += MODIFIED_PAGE_BLOCK_SIZE; +} + +/*********************************************************************//** +Append the current changed page bitmap to the bitmap file. Clears the +bitmap tree and recycles its nodes to the free list. */ +static +void +log_online_write_bitmap() +/*=====================*/ +{ + ib_rbt_node_t *bmp_tree_node; + const ib_rbt_node_t *last_bmp_tree_node; + + bmp_tree_node = (ib_rbt_node_t *) + rbt_first(log_bmp_sys->modified_pages); + last_bmp_tree_node = rbt_last(log_bmp_sys->modified_pages); + + while (bmp_tree_node) { + + byte *page = rbt_value(byte, bmp_tree_node); + + if (bmp_tree_node == last_bmp_tree_node) { + mach_write_to_4(page + MODIFIED_PAGE_IS_LAST_BLOCK, 1); + } + + mach_write_ull(page + MODIFIED_PAGE_START_LSN, + log_bmp_sys->start_lsn); + mach_write_ull(page + MODIFIED_PAGE_END_LSN, + log_bmp_sys->end_lsn); + mach_write_to_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM, + log_online_calc_checksum(page)); + + log_online_write_bitmap_page(page); + + bmp_tree_node->left = log_bmp_sys->page_free_list; + log_bmp_sys->page_free_list = bmp_tree_node; + + bmp_tree_node = (ib_rbt_node_t*) + rbt_next(log_bmp_sys->modified_pages, bmp_tree_node); + } + + rbt_reset(log_bmp_sys->modified_pages); +} + +/*********************************************************************//** +Read and parse the redo log up to last checkpoint LSN to build the changed +page bitmap which is then written to disk. */ +UNIV_INTERN +void +log_online_follow_redo_log() +/*========================*/ +{ + ib_uint64_t contiguous_start_lsn; + log_group_t* group; + + /* Grab the LSN of the last checkpoint, we will parse up to it */ + mutex_enter(&(log_sys->mutex)); + log_bmp_sys->end_lsn = log_sys->last_checkpoint_lsn; + mutex_exit(&(log_sys->mutex)); + + if (log_bmp_sys->end_lsn == log_bmp_sys->start_lsn) { + return; + } + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + ut_a(group); + + contiguous_start_lsn = ut_uint64_align_down(log_bmp_sys->start_lsn, + OS_FILE_LOG_BLOCK_SIZE); + + while (group) { + log_online_follow_log_group(group, contiguous_start_lsn); + group = UT_LIST_GET_NEXT(log_groups, group); + } + + /* A crash injection site that ensures last checkpoint LSN > last + tracked LSN, so that LSN tracking for this interval is tested. */ + DBUG_EXECUTE_IF("crash_before_bitmap_write", DBUG_SUICIDE();); + + log_online_write_bitmap(); + log_bmp_sys->start_lsn = log_bmp_sys->end_lsn; + log_set_tracked_lsn(log_bmp_sys->start_lsn); +} + +/*********************************************************************//** +Initializes log bitmap iterator. +@return TRUE if the iterator is initialized OK, FALSE otherwise. */ +UNIV_INTERN +ibool +log_online_bitmap_iterator_init( +/*============================*/ + log_bitmap_iterator_t *i) /*!<in/out: iterator */ +{ + ibool success; + + ut_a(i); + ut_snprintf(i->in_name, FN_REFLEN, "%s%s%d", srv_data_home, + modified_page_stem, 1); + i->in_offset = 0; + /* + Set up bit offset out of the reasonable limit + to intiate reading block from file in + log_online_bitmap_iterator_next() + */ + i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN; + i->in = + os_file_create_simple_no_error_handling( + i->in_name, + OS_FILE_OPEN, + OS_FILE_READ_ONLY, + &success); + + if (!success) { + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Error: Cannot open \'%s\'\n", + i->in_name); + return FALSE; + } + + i->page = ut_malloc(MODIFIED_PAGE_BLOCK_SIZE); + + i->start_lsn = i->end_lsn = 0; + i->space_id = 0; + i->first_page_id = 0; + i->changed = FALSE; + + return TRUE; +} + +/*********************************************************************//** +Releases log bitmap iterator. */ +UNIV_INTERN +void +log_online_bitmap_iterator_release( +/*===============================*/ + log_bitmap_iterator_t *i) /*!<in/out: iterator */ +{ + ut_a(i); + os_file_close(i->in); + ut_free(i->page); +} + +/*********************************************************************//** +Iterates through bits of saved bitmap blocks. +Sequentially reads blocks from bitmap file(s) and interates through +their bits. Ignores blocks with wrong checksum. +@return TRUE if iteration is successful, FALSE if all bits are iterated. */ +UNIV_INTERN +ibool +log_online_bitmap_iterator_next( +/*============================*/ + log_bitmap_iterator_t *i) /*!<in/out: iterator */ +{ + ulint offset_low; + ulint offset_high; + ulint size_low; + ulint size_high; + ulint checksum = 0; + ulint actual_checksum = !checksum; + + ibool success; + + ut_a(i); + + if (i->bit_offset < MODIFIED_PAGE_BLOCK_BITMAP_LEN) + { + ++i->bit_offset; + i->changed = + IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP, + i->bit_offset); + return TRUE; + } + + while (checksum != actual_checksum) + { + success = os_file_get_size(i->in, + &size_low, + &size_high); + if (!success) { + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Warning: can't get size of " + "page bitmap file \'%s\'\n", + i->in_name); + return FALSE; + } + + if (i->in_offset >= + (ib_uint64_t)(size_low) + + ((ib_uint64_t)(size_high) << 32)) + return FALSE; + + offset_high = (ulint)(i->in_offset >> 32); + offset_low = (ulint)(i->in_offset & 0xFFFFFFFF); + + success = os_file_read( + i->in, + i->page, + offset_low, + offset_high, + MODIFIED_PAGE_BLOCK_SIZE); + + if (!success) { + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Warning: failed reading " + "changed page bitmap file \'%s\'\n", + i->in_name); + return FALSE; + } + + checksum = mach_read_from_4( + i->page + MODIFIED_PAGE_BLOCK_CHECKSUM); + + actual_checksum = log_online_calc_checksum(i->page); + + i->in_offset += MODIFIED_PAGE_BLOCK_SIZE; + } + + i->start_lsn = + mach_read_ull(i->page + MODIFIED_PAGE_START_LSN); + i->end_lsn = + mach_read_ull(i->page + MODIFIED_PAGE_END_LSN); + i->space_id = + mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID); + i->first_page_id = + mach_read_from_4(i->page + MODIFIED_PAGE_1ST_PAGE_ID); + i->bit_offset = + 0; + i->changed = + IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP, + i->bit_offset); + + return TRUE; +} + diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c index fae7fbd0da0..33ce5443c49 100644 --- a/storage/xtradb/log/log0recv.c +++ b/storage/xtradb/log/log0recv.c @@ -840,7 +840,7 @@ block. We also accept a log block in the old format before InnoDB-3.23.52 where the checksum field contains the log block number. @return TRUE if ok, or if the log block may be in the format of InnoDB version predating 3.23.52 */ -static +UNIV_INTERN ibool log_block_checksum_is_ok_or_old_format( /*===================================*/ @@ -2084,7 +2084,7 @@ skip_this_recv_addr: /*******************************************************************//** Tries to parse a single log record and returns its length. @return length of the record, or 0 if the record was not complete */ -static +UNIV_INTERN ulint recv_parse_log_rec( /*===============*/ @@ -2155,7 +2155,7 @@ recv_parse_log_rec( /*******************************************************//** Calculates the new value for lsn when more data is added to the log. */ -static +UNIV_INTERN ib_uint64_t recv_calc_lsn_on_data_add( /*======================*/ @@ -3542,6 +3542,8 @@ recv_reset_logs( log_sys->archived_lsn = log_sys->lsn; #endif /* UNIV_LOG_ARCHIVE */ + log_sys->tracked_lsn = log_sys->lsn; + log_block_init(log_sys->buf, log_sys->lsn); log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index ef048188353..2fde176a93c 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -1939,6 +1939,25 @@ os_file_set_eof( #endif /* __WIN__ */ } +/***********************************************************************//** +Truncates a file at the specified position. +@return TRUE if success */ +UNIV_INTERN +ibool +os_file_set_eof_at( + os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len)/*!< in: new file length */ +{ +#ifdef __WIN__ + /* TODO: untested! */ + return(!_chsize_s(file, new_len)); +#else + /* TODO: works only with -D_FILE_OFFSET_BITS=64 ? */ + return(!ftruncate(file, new_len)); +#endif +} + + #ifndef __WIN__ /***********************************************************************//** Wrapper to fsync(2) that retries the call on some errors. diff --git a/storage/xtradb/page/page0cur.c b/storage/xtradb/page/page0cur.c index 88ee6bc09a9..00fb55d169c 100644 --- a/storage/xtradb/page/page0cur.c +++ b/storage/xtradb/page/page0cur.c @@ -1902,6 +1902,7 @@ page_cur_delete_rec( /* Save to local variables some data associated with current_rec */ cur_slot_no = page_dir_find_owner_slot(current_rec); + ut_ad(cur_slot_no > 0); cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no); cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot); diff --git a/storage/xtradb/page/page0page.c b/storage/xtradb/page/page0page.c index 7b72a22fd1c..ba43dae3858 100644 --- a/storage/xtradb/page/page0page.c +++ b/storage/xtradb/page/page0page.c @@ -780,17 +780,23 @@ page_copy_rec_list_start( if (UNIV_LIKELY_NULL(new_page_zip)) { mtr_set_log_mode(mtr, log_mode); + DBUG_EXECUTE_IF("page_copy_rec_list_start_compress_fail", + goto zip_reorganize;); + if (UNIV_UNLIKELY (!page_zip_compress(new_page_zip, new_page, index, mtr))) { + ulint ret_pos; +#ifndef DBUG_OFF +zip_reorganize: +#endif /* DBUG_OFF */ /* Before trying to reorganize the page, store the number of preceding records on the page. */ - ulint ret_pos - = page_rec_get_n_recs_before(ret); + ret_pos = page_rec_get_n_recs_before(ret); /* Before copying, "ret" was the predecessor of the predefined supremum record. If it was the predefined infimum record, then it would - still be the infimum. Thus, the assertion - ut_a(ret_pos > 0) would fail here. */ + still be the infimum, and we would have + ret_pos == 0. */ if (UNIV_UNLIKELY (!page_zip_reorganize(new_block, index, mtr))) { @@ -806,15 +812,10 @@ page_copy_rec_list_start( btr_blob_dbg_add(new_page, index, "copy_start_reorg_fail"); return(NULL); - } else { - /* The page was reorganized: - Seek to ret_pos. */ - ret = new_page + PAGE_NEW_INFIMUM; - - do { - ret = rec_get_next_ptr(ret, TRUE); - } while (--ret_pos); } + + /* The page was reorganized: Seek to ret_pos. */ + ret = page_rec_get_nth(new_page, ret_pos); } } @@ -1050,6 +1051,7 @@ page_delete_rec_list_end( n_owned = rec_get_n_owned_new(rec2) - count; slot_index = page_dir_find_owner_slot(rec2); + ut_ad(slot_index > 0); slot = page_dir_get_nth_slot(page, slot_index); } else { rec_t* rec2 = rec; @@ -1065,6 +1067,7 @@ page_delete_rec_list_end( n_owned = rec_get_n_owned_old(rec2) - count; slot_index = page_dir_find_owner_slot(rec2); + ut_ad(slot_index > 0); slot = page_dir_get_nth_slot(page, slot_index); } @@ -1491,6 +1494,10 @@ page_rec_get_nth_const( ulint n_owned; const rec_t* rec; + if (nth == 0) { + return(page_get_infimum_rec(page)); + } + ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); for (i = 0;; i++) { @@ -1584,7 +1591,7 @@ page_rec_get_n_recs_before( n--; ut_ad(n >= 0); - ut_ad(n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); + ut_ad((ulint) n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); return((ulint) n); } diff --git a/storage/xtradb/plug.in b/storage/xtradb/plug.in index 1846543a81c..3fadacce576 100644 --- a/storage/xtradb/plug.in +++ b/storage/xtradb/plug.in @@ -93,7 +93,6 @@ MYSQL_PLUGIN_ACTIONS(xtradb, [ if (res != 10 || c != 123) { return(1); } - return(0); } ], @@ -107,6 +106,42 @@ MYSQL_PLUGIN_ACTIONS(xtradb, [ ] ) + AC_MSG_CHECKING(whether GCC 64-bit atomic builtins are available) + # either define HAVE_IB_GCC_ATOMIC_BUILTINS_64 or not + AC_TRY_RUN( + [ + #include <stdint.h> + int main() + { + int64_t x, y, res; + + x = 10; + y = 123; + res = __sync_bool_compare_and_swap(&x, x, y); + if (!res || x != y) { + return(1); + } + + x = 10; + y = 123; + res = __sync_add_and_fetch(&x, y); + if (res != 123 + 10 || x != 123 + 10) { + return(1); + } + + return(0); + } + ], + [ + AC_DEFINE([HAVE_IB_GCC_ATOMIC_BUILTINS_64], [1], + [GCC 64-bit atomic builtins are available]) + AC_MSG_RESULT(yes) + ], + [ + AC_MSG_RESULT(no) + ] + ) + AC_MSG_CHECKING(whether pthread_t can be used by GCC atomic builtins) # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not AC_TRY_RUN( diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index d6f486e5c1b..4d005b6c7b5 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -1296,7 +1296,8 @@ run_again: check_index = foreign->foreign_index; } - if (check_table == NULL || check_table->ibd_file_missing) { + if (check_table == NULL || check_table->ibd_file_missing + || check_index == NULL) { if (check_ref) { FILE* ef = dict_foreign_err_file; @@ -1331,9 +1332,6 @@ run_again: goto exit_func; } - ut_a(check_table); - ut_a(check_index); - if (check_table != table) { /* We already have a LOCK_IX on table, but not necessarily on check_table */ @@ -2194,9 +2192,16 @@ row_ins_index_entry_low( goto function_exit; } - err = btr_cur_pessimistic_insert( + + err = btr_cur_optimistic_insert( 0, &cursor, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); + + if (err == DB_FAIL) { + err = btr_cur_pessimistic_insert( + 0, &cursor, entry, &insert_rec, + &big_rec, n_ext, thr, &mtr); + } } } diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c index efff92aa361..d09cd267fa8 100644 --- a/storage/xtradb/row/row0merge.c +++ b/storage/xtradb/row/row0merge.c @@ -1214,11 +1214,25 @@ row_merge_read_clustered_index( goto err_exit; } + /* Store the cursor position on the last user + record on the page. */ + btr_pcur_move_to_prev_on_page(&pcur); + /* Leaf pages must never be empty, unless + this is the only page in the index tree. */ + ut_ad(btr_pcur_is_on_user_rec(&pcur) + || buf_block_get_page_no( + btr_pcur_get_block(&pcur)) + == clust_index->page); + btr_pcur_store_position(&pcur, &mtr); mtr_commit(&mtr); mtr_start(&mtr); + /* Restore position on the record, or its + predecessor if the record was purged + meanwhile. */ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); + /* Move to the successor of the original record. */ has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr); } diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index 07cb1578024..c56711cadc4 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -3636,7 +3636,7 @@ row_mysql_drop_temp_tables(void) btr_pcur_store_position(&pcur, &mtr); btr_pcur_commit_specify_mtr(&pcur, &mtr); - table = dict_load_table(table_name); + table = dict_table_get_low(table_name); if (table) { row_drop_table_for_mysql(table_name, trx, FALSE); diff --git a/storage/xtradb/row/row0vers.c b/storage/xtradb/row/row0vers.c index 8a7bb842293..9aeaa2db6c0 100644 --- a/storage/xtradb/row/row0vers.c +++ b/storage/xtradb/row/row0vers.c @@ -208,18 +208,6 @@ row_vers_impl_x_locked_off_kernel( vers_del = rec_get_deleted_flag(prev_version, comp); prev_trx_id = row_get_rec_trx_id(prev_version, clust_index, clust_offsets); - - /* If the trx_id and prev_trx_id are different and if - the prev_version is marked deleted then the - prev_trx_id must have already committed for the trx_id - to be able to modify the row. Therefore, prev_trx_id - cannot hold any implicit lock. */ - if (vers_del && 0 != ut_dulint_cmp(trx_id, prev_trx_id)) { - - mutex_enter(&kernel_mutex); - break; - } - /* The stack of versions is locked by mtr. Thus, it is safe to fetch the prefixes for externally stored columns. */ diff --git a/storage/xtradb/scripts/install_innodb_plugins.sql b/storage/xtradb/scripts/install_innodb_plugins.sql index 3fdb8f11e22..8833d9c023c 100644 --- a/storage/xtradb/scripts/install_innodb_plugins.sql +++ b/storage/xtradb/scripts/install_innodb_plugins.sql @@ -7,3 +7,6 @@ INSTALL PLUGIN innodb_cmp SONAME 'ha_innodb.so'; INSTALL PLUGIN innodb_cmp_reset SONAME 'ha_innodb.so'; INSTALL PLUGIN innodb_cmpmem SONAME 'ha_innodb.so'; INSTALL PLUGIN innodb_cmpmem_reset SONAME 'ha_innodb.so'; +INSTALL PLUGIN innodb_buffer_pool_stats SONAME 'ha_innodb.so'; +INSTALL PLUGIN innodb_buffer_page SONAME 'ha_innodb.so'; +INSTALL PLUGIN innodb_buffer_page_lru SONAME 'ha_innodb.so'; diff --git a/storage/xtradb/scripts/install_innodb_plugins_win.sql b/storage/xtradb/scripts/install_innodb_plugins_win.sql index 8c94b4e240d..023b13132c3 100644 --- a/storage/xtradb/scripts/install_innodb_plugins_win.sql +++ b/storage/xtradb/scripts/install_innodb_plugins_win.sql @@ -7,3 +7,6 @@ INSTALL PLUGIN innodb_cmp SONAME 'ha_innodb.dll'; INSTALL PLUGIN innodb_cmp_reset SONAME 'ha_innodb.dll';
INSTALL PLUGIN innodb_cmpmem SONAME 'ha_innodb.dll';
INSTALL PLUGIN innodb_cmpmem_reset SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_buffer_pool_stats SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_buffer_page SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_buffer_page_lru SONAME 'ha_innodb.dll';
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index 176b063d147..f01ad40de41 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -69,6 +69,7 @@ Created 10/8/1995 Heikki Tuuri #include "thr0loc.h" #include "que0que.h" #include "srv0que.h" +#include "log0online.h" #include "log0recv.h" #include "pars0pars.h" #include "usr0sess.h" @@ -161,6 +162,10 @@ UNIV_INTERN ibool srv_recovery_stats = FALSE; UNIV_INTERN ulint srv_use_purge_thread = 0; +UNIV_INTERN my_bool srv_track_changed_pages = TRUE; + +UNIV_INTERN ulonglong srv_changed_pages_limit = 0; + /* if TRUE, then we auto-extend the last data file */ UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE; /* if != 0, this tells the max size auto-extending may increase the @@ -405,6 +410,9 @@ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; UNIV_INTERN ulong srv_stats_auto_update = 1; UNIV_INTERN ulint srv_stats_update_need_lock = 1; UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; +#ifdef UNIV_DEBUG +UNIV_INTERN ulong srv_sys_stats_root_page = 0; +#endif UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; UNIV_INTERN ibool srv_use_checksums = TRUE; @@ -724,6 +732,10 @@ UNIV_INTERN os_event_t srv_lock_timeout_thread_event; UNIV_INTERN os_event_t srv_shutdown_event; +UNIV_INTERN os_event_t srv_checkpoint_completed_event; + +UNIV_INTERN os_event_t srv_redo_log_thread_finished_event; + UNIV_INTERN srv_sys_t* srv_sys = NULL; /* padding to prevent other memory update hotspots from residing on @@ -1031,6 +1043,9 @@ srv_init(void) srv_lock_timeout_thread_event = os_event_create(NULL); srv_shutdown_event = os_event_create(NULL); + srv_checkpoint_completed_event = os_event_create(NULL); + srv_redo_log_thread_finished_event = os_event_create(NULL); + for (i = 0; i < SRV_MASTER + 1; i++) { srv_n_threads_active[i] = 0; srv_n_threads[i] = 0; @@ -1178,7 +1193,7 @@ retry: static void srv_conc_exit_innodb_timer_based(trx_t* trx) { - (void) os_atomic_increment_lint(&srv_conc_n_threads, -1); + (void) os_atomic_increment_lint(&srv_conc_n_threads, -1); trx->declared_to_be_inside_innodb = FALSE; trx->n_tickets_to_enter_innodb = 0; return; @@ -1385,7 +1400,7 @@ srv_conc_force_enter_innodb( ut_ad(srv_conc_n_threads >= 0); #ifdef HAVE_ATOMIC_BUILTINS if (srv_thread_concurrency_timer_based) { - (void) os_atomic_increment_lint(&srv_conc_n_threads, 1); + (void) os_atomic_increment_lint(&srv_conc_n_threads, 1); trx->declared_to_be_inside_innodb = TRUE; trx->n_tickets_to_enter_innodb = 1; return; @@ -2674,6 +2689,41 @@ exit_func: OS_THREAD_DUMMY_RETURN; } +/******************************************************************//** +A thread which follows the redo log and outputs the changed page bitmap. +@return a dummy value */ +UNIV_INTERN +os_thread_ret_t +srv_redo_log_follow_thread( +/*=======================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter + required by + os_thread_create */ +{ +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Redo log follower thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif + my_thread_init(); + + do { + os_event_wait(srv_checkpoint_completed_event); + os_event_reset(srv_checkpoint_completed_event); + + log_online_follow_redo_log(); + + } while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE); + + log_online_read_shutdown(); + os_event_set(srv_redo_log_thread_finished_event); + + my_thread_end(); + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + + /*******************************************************************//** Tells the InnoDB server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index ba4328c80e1..0c33b2208f2 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -51,6 +51,7 @@ Created 2/16/1996 Heikki Tuuri #include "rem0rec.h" #include "mtr0mtr.h" #include "log0log.h" +#include "log0online.h" #include "log0recv.h" #include "page0page.h" #include "page0cur.h" @@ -127,9 +128,9 @@ static mutex_t ios_mutex; static ulint ios; /** io_handler_thread parameters for thread identification */ -static ulint n[SRV_MAX_N_IO_THREADS + 7 + UNIV_MAX_PARALLELISM]; +static ulint n[SRV_MAX_N_IO_THREADS + 8 + UNIV_MAX_PARALLELISM]; /** io_handler_thread identifiers */ -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7 + UNIV_MAX_PARALLELISM]; +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 8 + UNIV_MAX_PARALLELISM]; /** We use this mutex to test the return value of pthread_mutex_trylock on successful locking. HP-UX does NOT return 0, though Linux et al do. */ @@ -1823,6 +1824,12 @@ innobase_start_or_create_for_mysql(void) trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); } + + if (UNIV_UNLIKELY(!dict_verify_xtradb_sys_stats())) { + fprintf(stderr, "InnoDB: Warning: " + "SYS_STATS table corrupted, recreating\n"); + dict_recreate_xtradb_sys_stats(); + } } if (!create_new_db && sum_of_new_sizes > 0) { @@ -1885,6 +1892,19 @@ innobase_start_or_create_for_mysql(void) if (srv_auto_lru_dump && srv_blocking_lru_restore) buf_LRU_file_restore(); + if (srv_track_changed_pages) { + + /* Initialize the log tracking subsystem here to block + server startup until it's completed due to the potential + need to re-read previous server run's log. */ + log_online_read_init(); + + /* Create the thread that follows the redo log to output the + changed page bitmap */ + os_thread_create(&srv_redo_log_follow_thread, NULL, + thread_ids + 5 + SRV_MAX_N_IO_THREADS); + } + srv_is_being_started = FALSE; if (trx_doublewrite == NULL) { diff --git a/storage/xtradb/sync/sync0arr.c b/storage/xtradb/sync/sync0arr.c index 4e788b4a968..35385507e40 100644 --- a/storage/xtradb/sync/sync0arr.c +++ b/storage/xtradb/sync/sync0arr.c @@ -926,6 +926,11 @@ sync_array_print_long_waits( ibool fatal = FALSE; double longest_diff = 0; + /* For huge tables, skip the check during CHECK TABLE etc... */ + if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) { + return(FALSE); + } + for (i = 0; i < sync_primary_wait_array->n_cells; i++) { double diff; diff --git a/storage/xtradb/ut/ut0rbt.c b/storage/xtradb/ut/ut0rbt.c index 3d7bc91e714..643312ab79d 100644 --- a/storage/xtradb/ut/ut0rbt.c +++ b/storage/xtradb/ut/ut0rbt.c @@ -48,7 +48,6 @@ red-black properties: #endif #define ROOT(t) (t->root->left) -#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1) /****************************************************************//** Print out the sub-tree recursively. */ @@ -829,6 +828,21 @@ rbt_add_node( node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree)); memcpy(node->value, value, tree->sizeof_value); + return(rbt_add_preallocated_node(tree, parent, node)); +} + +/****************************************************************//** +Add a new caller-provided node to tree at the specified position. +The node must have its key fields initialized correctly. +@return added node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_add_preallocated_node( +/*======================*/ + ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: parent */ + ib_rbt_node_t* node) /*!< in: node */ +{ node->parent = node->left = node->right = tree->nil; /* If tree is empty */ @@ -1137,7 +1151,17 @@ rbt_clear( ib_rbt_t* tree) /*!< in: rb tree */ { rbt_free_node(ROOT(tree), tree->nil); + rbt_reset(tree); +} +/****************************************************************//** +Clear the tree without deleting and freeing its nodes. */ +UNIV_INTERN +void +rbt_reset( +/*======*/ + ib_rbt_t* tree) /*!< in: rb tree */ +{ tree->n_nodes = 0; tree->root->left = tree->root->right = tree->nil; } |