diff options
116 files changed, 4904 insertions, 1145 deletions
diff --git a/mysql-test/include/percona_query_response_time_flush.inc b/mysql-test/include/percona_query_response_time_flush.inc new file mode 100644 index 00000000000..44bb320fe13 --- /dev/null +++ b/mysql-test/include/percona_query_response_time_flush.inc @@ -0,0 +1 @@ +FLUSH QUERY_RESPONSE_TIME; diff --git a/mysql-test/include/percona_query_response_time_show.inc b/mysql-test/include/percona_query_response_time_show.inc new file mode 100644 index 00000000000..761b2c6f0df --- /dev/null +++ b/mysql-test/include/percona_query_response_time_show.inc @@ -0,0 +1,7 @@ +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; diff --git a/mysql-test/include/percona_query_response_time_sleep.inc b/mysql-test/include/percona_query_response_time_sleep.inc new file mode 100644 index 00000000000..40688b173b0 --- /dev/null +++ b/mysql-test/include/percona_query_response_time_sleep.inc @@ -0,0 +1,19 @@ +SELECT SLEEP(0.31); +SELECT SLEEP(0.32); +SELECT SLEEP(0.33); +SELECT SLEEP(0.34); +SELECT SLEEP(0.35); +SELECT SLEEP(0.36); +SELECT SLEEP(0.37); +SELECT SLEEP(0.38); +SELECT SLEEP(0.39); +SELECT SLEEP(0.40); +SELECT SLEEP(1.1); +SELECT SLEEP(1.2); +SELECT SLEEP(1.3); +SELECT SLEEP(1.5); +SELECT SLEEP(1.4); +SELECT SLEEP(0.5); +SELECT SLEEP(2.1); +SELECT SLEEP(2.3); +SELECT SLEEP(2.5); diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result index bc556bac234..537b30520bd 100644 --- a/mysql-test/r/information_schema.result +++ b/mysql-test/r/information_schema.result @@ -66,6 +66,7 @@ INNODB_LOCKS INNODB_LOCK_WAITS INNODB_RSEG INNODB_SYS_INDEXES +INNODB_SYS_STATS INNODB_SYS_TABLES INNODB_TABLE_STATS INNODB_TRX @@ -859,7 +860,6 @@ TABLE_NAME COLUMN_NAME PRIVILEGES COLUMNS TABLE_NAME select COLUMN_PRIVILEGES TABLE_NAME select FILES TABLE_NAME select -INNODB_BUFFER_POOL_PAGES_INDEX table_name select INNODB_INDEX_STATS table_name select INNODB_TABLE_STATS table_name select KEY_COLUMN_USAGE TABLE_NAME select diff --git a/mysql-test/r/information_schema_all_engines.result b/mysql-test/r/information_schema_all_engines.result index a2f34fad221..a2209f84a45 100644 --- a/mysql-test/r/information_schema_all_engines.result +++ b/mysql-test/r/information_schema_all_engines.result @@ -39,13 +39,14 @@ XTRADB_ADMIN_COMMAND INNODB_TRX INNODB_SYS_TABLES INNODB_LOCK_WAITS -INNODB_CMPMEM_RESET +INNODB_SYS_STATS INNODB_LOCKS INNODB_CMPMEM INNODB_TABLE_STATS INNODB_SYS_INDEXES INNODB_CMP_RESET INNODB_BUFFER_POOL_PAGES_BLOB +INNODB_CMPMEM_RESET INNODB_INDEX_STATS SELECT t.table_name, c1.column_name FROM information_schema.tables t @@ -95,18 +96,19 @@ PBXT_STATISTICS ID INNODB_CMP page_size INNODB_RSEG rseg_id XTRADB_ENHANCEMENTS name -INNODB_BUFFER_POOL_PAGES_INDEX schema_name +INNODB_BUFFER_POOL_PAGES_INDEX index_id XTRADB_ADMIN_COMMAND result_message INNODB_TRX trx_id INNODB_SYS_TABLES SCHEMA INNODB_LOCK_WAITS requesting_trx_id -INNODB_CMPMEM_RESET page_size +INNODB_SYS_STATS INDEX_ID INNODB_LOCKS lock_id INNODB_CMPMEM page_size INNODB_TABLE_STATS table_schema INNODB_SYS_INDEXES TABLE_ID INNODB_CMP_RESET page_size INNODB_BUFFER_POOL_PAGES_BLOB space_id +INNODB_CMPMEM_RESET page_size INNODB_INDEX_STATS table_schema SELECT t.table_name, c1.column_name FROM information_schema.tables t @@ -156,18 +158,19 @@ PBXT_STATISTICS ID INNODB_CMP page_size INNODB_RSEG rseg_id XTRADB_ENHANCEMENTS name -INNODB_BUFFER_POOL_PAGES_INDEX schema_name +INNODB_BUFFER_POOL_PAGES_INDEX index_id XTRADB_ADMIN_COMMAND result_message INNODB_TRX trx_id INNODB_SYS_TABLES SCHEMA INNODB_LOCK_WAITS requesting_trx_id -INNODB_CMPMEM_RESET page_size +INNODB_SYS_STATS INDEX_ID INNODB_LOCKS lock_id INNODB_CMPMEM page_size INNODB_TABLE_STATS table_schema INNODB_SYS_INDEXES TABLE_ID INNODB_CMP_RESET page_size INNODB_BUFFER_POOL_PAGES_BLOB space_id +INNODB_CMPMEM_RESET page_size INNODB_INDEX_STATS table_schema select 1 as f1 from information_schema.tables where "CHARACTER_SETS"= (select cast(table_name as char) from information_schema.tables @@ -212,6 +215,7 @@ INNODB_LOCKS information_schema.INNODB_LOCKS 1 INNODB_LOCK_WAITS information_schema.INNODB_LOCK_WAITS 1 INNODB_RSEG information_schema.INNODB_RSEG 1 INNODB_SYS_INDEXES information_schema.INNODB_SYS_INDEXES 1 +INNODB_SYS_STATS information_schema.INNODB_SYS_STATS 1 INNODB_SYS_TABLES information_schema.INNODB_SYS_TABLES 1 INNODB_TABLE_STATS information_schema.INNODB_TABLE_STATS 1 INNODB_TRX information_schema.INNODB_TRX 1 @@ -277,13 +281,14 @@ Database: information_schema | INNODB_TRX | | INNODB_SYS_TABLES | | INNODB_LOCK_WAITS | -| INNODB_CMPMEM_RESET | +| INNODB_SYS_STATS | | INNODB_LOCKS | | INNODB_CMPMEM | | INNODB_TABLE_STATS | | INNODB_SYS_INDEXES | | INNODB_CMP_RESET | | INNODB_BUFFER_POOL_PAGES_BLOB | +| INNODB_CMPMEM_RESET | | INNODB_INDEX_STATS | +---------------------------------------+ Database: INFORMATION_SCHEMA @@ -328,13 +333,14 @@ Database: INFORMATION_SCHEMA | INNODB_TRX | | INNODB_SYS_TABLES | | INNODB_LOCK_WAITS | -| INNODB_CMPMEM_RESET | +| INNODB_SYS_STATS | | INNODB_LOCKS | | INNODB_CMPMEM | | INNODB_TABLE_STATS | | INNODB_SYS_INDEXES | | INNODB_CMP_RESET | | INNODB_BUFFER_POOL_PAGES_BLOB | +| INNODB_CMPMEM_RESET | | INNODB_INDEX_STATS | +---------------------------------------+ Wildcard: inf_rmation_schema @@ -345,5 +351,5 @@ Wildcard: inf_rmation_schema +--------------------+ SELECT table_schema, count(*) FROM information_schema.TABLES WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test', 'mysqltest') AND table_name<>'ndb_binlog_index' AND table_name<>'ndb_apply_status' GROUP BY TABLE_SCHEMA; table_schema count(*) -information_schema 46 +information_schema 47 mysql 22 diff --git a/mysql-test/suite/percona/disabled.def b/mysql-test/suite/percona/disabled.def index 4f35807b7a1..a99b952a5eb 100644 --- a/mysql-test/suite/percona/disabled.def +++ b/mysql-test/suite/percona/disabled.def @@ -8,6 +8,15 @@ percona_query_cache_with_comments_prepared_statements: Feature not merged int percona_show_temp_tables: Feature not merged into MariaDB percona_slow_query_log-use_global_long_query_time: Feature not merged into MariaDB percona_query_cache_with_comments_disable: Feature not merged into MariaDB +percona_log_connection_error: Feature not merged into MariaDB +percona_query_response_time: Feature not merged into MariaDB +percona_query_response_time-stored: Feature not merged into MariaDB +percona_sql_no_fcache: Feature not merged into MariaDB +percona_status_wait_query_cache_mutex: Feature not merged into MariaDB +percona_slave_innodb_stats: Feature not merged into MariaDB +percona_query_response_time-replication: Feature not merged into MariaDB +percona_server_variables: Feature not merged into MariaDB percona_slow_query_log-log_slow_verbosity: InnoDB filtering information not fully merged into MariaDB +percona_innodb_buffer_pool_shm: Requires big shmmax not default on many systems diff --git a/mysql-test/suite/percona/percona_innodb_buffer_pool_shm-master.opt b/mysql-test/suite/percona/percona_innodb_buffer_pool_shm-master.opt new file mode 100644 index 00000000000..5974ef6e2be --- /dev/null +++ b/mysql-test/suite/percona/percona_innodb_buffer_pool_shm-master.opt @@ -0,0 +1 @@ +--innodb_buffer_pool_shm_key=123456 diff --git a/mysql-test/suite/percona/percona_innodb_buffer_pool_shm.result b/mysql-test/suite/percona/percona_innodb_buffer_pool_shm.result new file mode 100644 index 00000000000..08ece8fb9a9 --- /dev/null +++ b/mysql-test/suite/percona/percona_innodb_buffer_pool_shm.result @@ -0,0 +1,6 @@ +show variables like 'innodb_buffer_pool_shm%'; +Variable_name Value +innodb_buffer_pool_shm_key 123456 +show variables like 'innodb_buffer_pool_shm%'; +Variable_name Value +innodb_buffer_pool_shm_key 123456 diff --git a/mysql-test/suite/percona/percona_innodb_buffer_pool_shm.test b/mysql-test/suite/percona/percona_innodb_buffer_pool_shm.test new file mode 100644 index 00000000000..7b81bb8d54b --- /dev/null +++ b/mysql-test/suite/percona/percona_innodb_buffer_pool_shm.test @@ -0,0 +1,18 @@ +--source include/have_innodb.inc +show variables like 'innodb_buffer_pool_shm%'; + +#clean shutdown (restart_mysqld.inc is not clean if over 10 sec...) +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF +shutdown_server 120; +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart +EOF +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect + +show variables like 'innodb_buffer_pool_shm%'; +--sleep 1 +--system ipcrm -M 123456 diff --git a/mysql-test/suite/percona/percona_innodb_use_sys_stats_table-master.opt b/mysql-test/suite/percona/percona_innodb_use_sys_stats_table-master.opt new file mode 100644 index 00000000000..c6865f5704c --- /dev/null +++ b/mysql-test/suite/percona/percona_innodb_use_sys_stats_table-master.opt @@ -0,0 +1 @@ +--innodb_use_sys_stats_table diff --git a/mysql-test/suite/percona/percona_innodb_use_sys_stats_table.result b/mysql-test/suite/percona/percona_innodb_use_sys_stats_table.result new file mode 100644 index 00000000000..cb64de41901 --- /dev/null +++ b/mysql-test/suite/percona/percona_innodb_use_sys_stats_table.result @@ -0,0 +1,3 @@ +show variables like 'innodb_use_sys_stats%'; +Variable_name Value +innodb_use_sys_stats_table ON diff --git a/mysql-test/suite/percona/percona_innodb_use_sys_stats_table.test b/mysql-test/suite/percona/percona_innodb_use_sys_stats_table.test new file mode 100644 index 00000000000..02791137f08 --- /dev/null +++ b/mysql-test/suite/percona/percona_innodb_use_sys_stats_table.test @@ -0,0 +1,2 @@ +--source include/have_innodb.inc +show variables like 'innodb_use_sys_stats%'; diff --git a/mysql-test/suite/percona/percona_log_connection_error-master.opt b/mysql-test/suite/percona/percona_log_connection_error-master.opt new file mode 100644 index 00000000000..32a891789f3 --- /dev/null +++ b/mysql-test/suite/percona/percona_log_connection_error-master.opt @@ -0,0 +1 @@ +--log-error
\ No newline at end of file diff --git a/mysql-test/suite/percona/percona_log_connection_error.result b/mysql-test/suite/percona/percona_log_connection_error.result new file mode 100644 index 00000000000..3c6c67f770c --- /dev/null +++ b/mysql-test/suite/percona/percona_log_connection_error.result @@ -0,0 +1,15 @@ +SET @old_max_connections = @@max_connections; +SET @old_log_warnings = @@log_warnings; +SET GLOBAL max_connections=2; +SET GLOBAL LOG_WARNINGS = 0; +connect(localhost,root,,test,port,socket); +ERROR HY000: Too many connections +SET GLOBAL LOG_WARNINGS = 1; +connect(localhost,root,,test,port,socket); +ERROR HY000: Too many connections +SET GLOBAL LOG_WARNINGS = 0; +connect(localhost,root,,test,port,socket); +ERROR HY000: Too many connections +SET GLOBAL max_connections = @old_max_connections; +SET GLOBAL log_warnings = @old_log_warnings; +1 diff --git a/mysql-test/suite/percona/percona_log_connection_error.test b/mysql-test/suite/percona/percona_log_connection_error.test new file mode 100644 index 00000000000..57cd652bd24 --- /dev/null +++ b/mysql-test/suite/percona/percona_log_connection_error.test @@ -0,0 +1,52 @@ +--source include/not_embedded.inc + +connect (main,localhost,root,,); +connection main; +SET @old_max_connections = @@max_connections; +SET @old_log_warnings = @@log_warnings; +SET GLOBAL max_connections=2; +let $port=`SELECT Variable_value FROM INFORMATION_SCHEMA.SESSION_VARIABLES WHERE Variable_name LIKE 'port'`; +let $socket=`SELECT Variable_value FROM INFORMATION_SCHEMA.SESSION_VARIABLES WHERE Variable_name LIKE 'socket'`; + +SET GLOBAL LOG_WARNINGS = 0; +--connect (conn0,localhost,root,,) +connection conn0; +replace_result $port port $socket socket; +--error 1040 +--connect(conn1,localhost,root,,) +disconnect conn0; +SLEEP 0.1; # tsarev: hack, but i don't know (and didn't find) how right + +connection main; +SET GLOBAL LOG_WARNINGS = 1; +--connect (conn1,localhost,root,,) +replace_result $port port $socket socket; +--error 1040 +--connect (conn0,localhost,root,,) +disconnect conn1; +SLEEP 0.1; # tsarev: hack, but i don't know (and didn't find) how right + +connection main; +SET GLOBAL LOG_WARNINGS = 0; +--connect (conn0,localhost,root,,) +replace_result $port port $socket socket; +--error 1040 +--connect(conn1,localhost,root,,) +disconnect conn0; +SLEEP 0.1; # tsarev: hack, but i don't know (and didn't find) how right + +connection main; +SET GLOBAL max_connections = @old_max_connections; +SET GLOBAL log_warnings = @old_log_warnings; +let $log_error_= `SELECT @@GLOBAL.log_error`; +if(!`select LENGTH('$log_error_')`) +{ + # MySQL Server on windows is started with --console and thus + # does not know the location of its .err log, use default location + let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; +} +# Assign env variable LOG_ERROR +let LOG_ERROR=$log_error_; + +let cmd=cat $log_error | grep "Too many connections" | wc -l; +exec $cmd; diff --git a/mysql-test/suite/percona/percona_query_response_time-replication.result b/mysql-test/suite/percona/percona_query_response_time-replication.result new file mode 100644 index 00000000000..df5c73812df --- /dev/null +++ b/mysql-test/suite/percona/percona_query_response_time-replication.result @@ -0,0 +1,60 @@ +stop slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +start slave; +DROP TABLE IF EXISTS t; +CREATE TABLE t(id INT); +SELECT * from t; +id +SELECT * from t; +id +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1; +Warnings: +Warning 1292 Truncated incorrect query_response_time_range_base value: '1' +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 2 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 10 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=ON; +INSERT INTO t VALUES(0); +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) +0 +INSERT INTO t VALUES(1); +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) +0 +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) +2 +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) +3 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 2 +FLUSH QUERY_RESPONSE_TIME; +INSERT INTO t VALUES(0); +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) +0 +INSERT INTO t VALUES(1); +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) +0 +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) +2 +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) +3 +DROP TABLE IF EXISTS t; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=OFF; diff --git a/mysql-test/suite/percona/percona_query_response_time-replication.test b/mysql-test/suite/percona/percona_query_response_time-replication.test new file mode 100644 index 00000000000..4f674c2fd19 --- /dev/null +++ b/mysql-test/suite/percona/percona_query_response_time-replication.test @@ -0,0 +1,52 @@ +--source include/master-slave.inc + +connection master; +-- disable_warnings +DROP TABLE IF EXISTS t; +-- enable_warnings +CREATE TABLE t(id INT); +SELECT * from t; + +sync_slave_with_master; + +connection slave; +SELECT * from t; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=ON; + +connection master; +INSERT INTO t VALUES(0); +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +INSERT INTO t VALUES(1); +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +sync_slave_with_master; + +connection slave; +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; + +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +source include/percona_query_response_time_flush.inc; + +connection master; +INSERT INTO t VALUES(0); +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +INSERT INTO t VALUES(1); +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +sync_slave_with_master; + +connection slave; +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; + +connection master; +DROP TABLE IF EXISTS t; +sync_slave_with_master; +connection slave; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=OFF; diff --git a/mysql-test/suite/percona/percona_query_response_time-stored.result b/mysql-test/suite/percona/percona_query_response_time-stored.result new file mode 100644 index 00000000000..386180c791a --- /dev/null +++ b/mysql-test/suite/percona/percona_query_response_time-stored.result @@ -0,0 +1,310 @@ +CREATE FUNCTION test_f() +RETURNS CHAR(30) DETERMINISTIC +BEGIN +DECLARE first VARCHAR(5); +DECLARE second VARCHAR(5); +DECLARE result VARCHAR(20); +SELECT SLEEP(1.11) INTO first; +SET first= 'Hello'; +SET second=', '; +SET result= CONCAT(first,second); +SET result= CONCAT(result,'world!'); +RETURN result; +END/ +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1; +Warnings: +Warning 1292 Truncated incorrect query_response_time_range_base value: '1' +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 2 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 2 +FLUSH QUERY_RESPONSE_TIME; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +44 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.000003 + 0.000007 + 0.000015 + 0.000030 + 0.000061 + 0.000122 + 0.000244 + 0.000488 + 0.000976 + 0.001953 + 0.003906 + 0.007812 + 0.015625 + 0.031250 + 0.062500 + 0.125000 + 0.250000 + 0.500000 + 1.000000 + 2.000000 + 4.000000 + 8.000000 + 16.000000 + 32.000000 + 64.000000 + 128.000000 + 256.000000 + 512.000000 + 1024.000000 + 2048.000000 + 4096.000000 + 8192.000000 + 16384.000000 + 32768.000000 + 65536.000000 + 131072.000000 + 262144.000000 + 524288.000000 + 1048576.00000 + 2097152.00000 + 4194304.00000 + 8388608.00000 +TOO LONG QUERY +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +test_f() +Hello, world! +SELECT test_f(); +test_f() +Hello, world! +SELECT test_f(); +test_f() +Hello, world! +SELECT test_f(); +test_f() +Hello, world! +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 5 2 44 +4 5 2 44 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +44 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.000003 + 0.000007 + 0.000015 + 0.000030 + 0.000061 + 0.000122 + 0.000244 + 0.000488 + 0.000976 + 0.001953 + 0.003906 + 0.007812 + 0.015625 + 0.031250 + 0.062500 + 0.125000 + 0.250000 + 0.500000 + 1.000000 + 2.000000 + 4.000000 + 8.000000 + 16.000000 + 32.000000 + 64.000000 + 128.000000 + 256.000000 + 512.000000 + 1024.000000 + 2048.000000 + 4096.000000 + 8192.000000 + 16384.000000 + 32768.000000 + 65536.000000 + 131072.000000 + 262144.000000 + 524288.000000 + 1048576.00000 + 2097152.00000 + 4194304.00000 + 8388608.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 2 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 10 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +test_f() +Hello, world! +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 2 2 14 +1 2 2 14 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +14 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.000010 + 0.000100 + 0.001000 + 0.010000 + 0.100000 + 1.000000 + 10.000000 + 100.000000 + 1000.000000 + 10000.000000 + 100000.000000 + 1000000.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 10 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 7 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +test_f() +Hello, world! +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 2 2 17 +1 2 2 17 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +17 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.000008 + 0.000059 + 0.000416 + 0.002915 + 0.020408 + 0.142857 + 1.000000 + 7.000000 + 49.000000 + 343.000000 + 2401.000000 + 16807.000000 + 117649.000000 + 823543.000000 + 5764801.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 7 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 156 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +test_f() +Hello, world! +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 2 2 7 +1 2 2 7 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +7 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000041 + 0.006410 + 1.000000 + 156.000000 + 24336.000000 + 3796416.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 156 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 1000 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +test_f() +Hello, world! +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 2 2 6 +1 2 2 6 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +6 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.001000 + 1.000000 + 1000.000000 + 1000000.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 1000 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001; +Warnings: +Warning 1292 Truncated incorrect query_response_time_range_base value: '1001' +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 1000 +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10; +DROP FUNCTION test_f; diff --git a/mysql-test/suite/percona/percona_query_response_time-stored.test b/mysql-test/suite/percona/percona_query_response_time-stored.test new file mode 100644 index 00000000000..f761dd7d01c --- /dev/null +++ b/mysql-test/suite/percona/percona_query_response_time-stored.test @@ -0,0 +1,87 @@ +source include/have_innodb.inc; + +delimiter /; +CREATE FUNCTION test_f() +RETURNS CHAR(30) DETERMINISTIC +BEGIN + DECLARE first VARCHAR(5); + DECLARE second VARCHAR(5); + DECLARE result VARCHAR(20); + SELECT SLEEP(1.11) INTO first; + SET first= 'Hello'; + SET second=', '; + SET result= CONCAT(first,second); + SET result= CONCAT(result,'world!'); + RETURN result; +END/ +delimiter ;/ + +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +source include/percona_query_response_time_show.inc; + +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +SELECT test_f(); +SELECT test_f(); +SELECT test_f(); +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT test_f(); +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10; + +DROP FUNCTION test_f;
\ No newline at end of file diff --git a/mysql-test/suite/percona/percona_query_response_time.result b/mysql-test/suite/percona/percona_query_response_time.result new file mode 100644 index 00000000000..3c12284a525 --- /dev/null +++ b/mysql-test/suite/percona/percona_query_response_time.result @@ -0,0 +1,564 @@ +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1; +Warnings: +Warning 1292 Truncated incorrect query_response_time_range_base value: '1' +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 2 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 2 +FLUSH QUERY_RESPONSE_TIME; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +44 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.000003 + 0.000007 + 0.000015 + 0.000030 + 0.000061 + 0.000122 + 0.000244 + 0.000488 + 0.000976 + 0.001953 + 0.003906 + 0.007812 + 0.015625 + 0.031250 + 0.062500 + 0.125000 + 0.250000 + 0.500000 + 1.000000 + 2.000000 + 4.000000 + 8.000000 + 16.000000 + 32.000000 + 64.000000 + 128.000000 + 256.000000 + 512.000000 + 1024.000000 + 2048.000000 + 4096.000000 + 8192.000000 + 16384.000000 + 32768.000000 + 65536.000000 + 131072.000000 + 262144.000000 + 524288.000000 + 1048576.00000 + 2097152.00000 + 4194304.00000 + 8388608.00000 +TOO LONG QUERY +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT SLEEP(0.31); +SLEEP(0.31) +0 +SELECT SLEEP(0.32); +SLEEP(0.32) +0 +SELECT SLEEP(0.33); +SLEEP(0.33) +0 +SELECT SLEEP(0.34); +SLEEP(0.34) +0 +SELECT SLEEP(0.35); +SLEEP(0.35) +0 +SELECT SLEEP(0.36); +SLEEP(0.36) +0 +SELECT SLEEP(0.37); +SLEEP(0.37) +0 +SELECT SLEEP(0.38); +SLEEP(0.38) +0 +SELECT SLEEP(0.39); +SLEEP(0.39) +0 +SELECT SLEEP(0.40); +SLEEP(0.40) +0 +SELECT SLEEP(1.1); +SLEEP(1.1) +0 +SELECT SLEEP(1.2); +SLEEP(1.2) +0 +SELECT SLEEP(1.3); +SLEEP(1.3) +0 +SELECT SLEEP(1.5); +SLEEP(1.5) +0 +SELECT SLEEP(1.4); +SLEEP(1.4) +0 +SELECT SLEEP(0.5); +SLEEP(0.5) +0 +SELECT SLEEP(2.1); +SLEEP(2.1) +0 +SELECT SLEEP(2.3); +SLEEP(2.3) +0 +SELECT SLEEP(2.5); +SLEEP(2.5) +0 +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 20 5 44 +10 20 5 44 +1 20 5 44 +5 20 5 44 +3 20 5 44 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +44 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.000003 + 0.000007 + 0.000015 + 0.000030 + 0.000061 + 0.000122 + 0.000244 + 0.000488 + 0.000976 + 0.001953 + 0.003906 + 0.007812 + 0.015625 + 0.031250 + 0.062500 + 0.125000 + 0.250000 + 0.500000 + 1.000000 + 2.000000 + 4.000000 + 8.000000 + 16.000000 + 32.000000 + 64.000000 + 128.000000 + 256.000000 + 512.000000 + 1024.000000 + 2048.000000 + 4096.000000 + 8192.000000 + 16384.000000 + 32768.000000 + 65536.000000 + 131072.000000 + 262144.000000 + 524288.000000 + 1048576.00000 + 2097152.00000 + 4194304.00000 + 8388608.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 2 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 10 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT SLEEP(0.31); +SLEEP(0.31) +0 +SELECT SLEEP(0.32); +SLEEP(0.32) +0 +SELECT SLEEP(0.33); +SLEEP(0.33) +0 +SELECT SLEEP(0.34); +SLEEP(0.34) +0 +SELECT SLEEP(0.35); +SLEEP(0.35) +0 +SELECT SLEEP(0.36); +SLEEP(0.36) +0 +SELECT SLEEP(0.37); +SLEEP(0.37) +0 +SELECT SLEEP(0.38); +SLEEP(0.38) +0 +SELECT SLEEP(0.39); +SLEEP(0.39) +0 +SELECT SLEEP(0.40); +SLEEP(0.40) +0 +SELECT SLEEP(1.1); +SLEEP(1.1) +0 +SELECT SLEEP(1.2); +SLEEP(1.2) +0 +SELECT SLEEP(1.3); +SLEEP(1.3) +0 +SELECT SLEEP(1.5); +SLEEP(1.5) +0 +SELECT SLEEP(1.4); +SLEEP(1.4) +0 +SELECT SLEEP(0.5); +SLEEP(0.5) +0 +SELECT SLEEP(2.1); +SLEEP(2.1) +0 +SELECT SLEEP(2.3); +SLEEP(2.3) +0 +SELECT SLEEP(2.5); +SLEEP(2.5) +0 +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 20 3 14 +11 20 3 14 +8 20 3 14 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +14 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.000010 + 0.000100 + 0.001000 + 0.010000 + 0.100000 + 1.000000 + 10.000000 + 100.000000 + 1000.000000 + 10000.000000 + 100000.000000 + 1000000.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 10 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 7 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT SLEEP(0.31); +SLEEP(0.31) +0 +SELECT SLEEP(0.32); +SLEEP(0.32) +0 +SELECT SLEEP(0.33); +SLEEP(0.33) +0 +SELECT SLEEP(0.34); +SLEEP(0.34) +0 +SELECT SLEEP(0.35); +SLEEP(0.35) +0 +SELECT SLEEP(0.36); +SLEEP(0.36) +0 +SELECT SLEEP(0.37); +SLEEP(0.37) +0 +SELECT SLEEP(0.38); +SLEEP(0.38) +0 +SELECT SLEEP(0.39); +SLEEP(0.39) +0 +SELECT SLEEP(0.40); +SLEEP(0.40) +0 +SELECT SLEEP(1.1); +SLEEP(1.1) +0 +SELECT SLEEP(1.2); +SLEEP(1.2) +0 +SELECT SLEEP(1.3); +SLEEP(1.3) +0 +SELECT SLEEP(1.5); +SLEEP(1.5) +0 +SELECT SLEEP(1.4); +SLEEP(1.4) +0 +SELECT SLEEP(0.5); +SLEEP(0.5) +0 +SELECT SLEEP(2.1); +SLEEP(2.1) +0 +SELECT SLEEP(2.3); +SLEEP(2.3) +0 +SELECT SLEEP(2.5); +SLEEP(2.5) +0 +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 20 3 17 +11 20 3 17 +8 20 3 17 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +17 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.000008 + 0.000059 + 0.000416 + 0.002915 + 0.020408 + 0.142857 + 1.000000 + 7.000000 + 49.000000 + 343.000000 + 2401.000000 + 16807.000000 + 117649.000000 + 823543.000000 + 5764801.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 7 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 156 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT SLEEP(0.31); +SLEEP(0.31) +0 +SELECT SLEEP(0.32); +SLEEP(0.32) +0 +SELECT SLEEP(0.33); +SLEEP(0.33) +0 +SELECT SLEEP(0.34); +SLEEP(0.34) +0 +SELECT SLEEP(0.35); +SLEEP(0.35) +0 +SELECT SLEEP(0.36); +SLEEP(0.36) +0 +SELECT SLEEP(0.37); +SLEEP(0.37) +0 +SELECT SLEEP(0.38); +SLEEP(0.38) +0 +SELECT SLEEP(0.39); +SLEEP(0.39) +0 +SELECT SLEEP(0.40); +SLEEP(0.40) +0 +SELECT SLEEP(1.1); +SLEEP(1.1) +0 +SELECT SLEEP(1.2); +SLEEP(1.2) +0 +SELECT SLEEP(1.3); +SLEEP(1.3) +0 +SELECT SLEEP(1.5); +SLEEP(1.5) +0 +SELECT SLEEP(1.4); +SLEEP(1.4) +0 +SELECT SLEEP(0.5); +SLEEP(0.5) +0 +SELECT SLEEP(2.1); +SLEEP(2.1) +0 +SELECT SLEEP(2.3); +SLEEP(2.3) +0 +SELECT SLEEP(2.5); +SLEEP(2.5) +0 +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 20 3 7 +11 20 3 7 +8 20 3 7 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +7 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000041 + 0.006410 + 1.000000 + 156.000000 + 24336.000000 + 3796416.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 156 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 1000 +FLUSH QUERY_RESPONSE_TIME; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +SELECT SLEEP(0.31); +SLEEP(0.31) +0 +SELECT SLEEP(0.32); +SLEEP(0.32) +0 +SELECT SLEEP(0.33); +SLEEP(0.33) +0 +SELECT SLEEP(0.34); +SLEEP(0.34) +0 +SELECT SLEEP(0.35); +SLEEP(0.35) +0 +SELECT SLEEP(0.36); +SLEEP(0.36) +0 +SELECT SLEEP(0.37); +SLEEP(0.37) +0 +SELECT SLEEP(0.38); +SLEEP(0.38) +0 +SELECT SLEEP(0.39); +SLEEP(0.39) +0 +SELECT SLEEP(0.40); +SLEEP(0.40) +0 +SELECT SLEEP(1.1); +SLEEP(1.1) +0 +SELECT SLEEP(1.2); +SLEEP(1.2) +0 +SELECT SLEEP(1.3); +SLEEP(1.3) +0 +SELECT SLEEP(1.5); +SLEEP(1.5) +0 +SELECT SLEEP(1.4); +SLEEP(1.4) +0 +SELECT SLEEP(0.5); +SLEEP(0.5) +0 +SELECT SLEEP(2.1); +SLEEP(2.1) +0 +SELECT SLEEP(2.3); +SLEEP(2.3) +0 +SELECT SLEEP(2.5); +SLEEP(2.5) +0 +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SELECT c.count, +(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count, +(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count +FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0; +count query_count not_zero_region_count region_count +1 20 3 6 +11 20 3 6 +8 20 3 6 +SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +region_count +6 +SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME; +time + 0.000001 + 0.001000 + 1.000000 + 1000.000000 + 1000000.00000 +TOO LONG QUERY +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 1000 +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001; +Warnings: +Warning 1292 Truncated incorrect query_response_time_range_base value: '1001' +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +Variable_name Value +query_response_time_range_base 1000 +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10; diff --git a/mysql-test/suite/percona/percona_query_response_time.test b/mysql-test/suite/percona/percona_query_response_time.test new file mode 100644 index 00000000000..a58cafc8d01 --- /dev/null +++ b/mysql-test/suite/percona/percona_query_response_time.test @@ -0,0 +1,65 @@ +source include/have_innodb.inc; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +source include/percona_query_response_time_show.inc; + +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +source include/percona_query_response_time_sleep.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +source include/percona_query_response_time_sleep.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +source include/percona_query_response_time_sleep.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +source include/percona_query_response_time_sleep.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +source include/percona_query_response_time_flush.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1; +source include/percona_query_response_time_sleep.inc; +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; + +source include/percona_query_response_time_show.inc; + +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001; +SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE'; + +SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0; +SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10; diff --git a/mysql-test/suite/percona/percona_server_variables.result b/mysql-test/suite/percona/percona_server_variables.result new file mode 100644 index 00000000000..25f961b201f --- /dev/null +++ b/mysql-test/suite/percona/percona_server_variables.result @@ -0,0 +1,340 @@ +show variables; +Variable_name Value +auto_increment_increment Value +auto_increment_offset Value +autocommit Value +automatic_sp_privileges Value +back_log Value +basedir Value +big_tables Value +binlog_cache_size Value +binlog_direct_non_transactional_updates Value +binlog_format Value +bulk_insert_buffer_size Value +character_set_client Value +character_set_connection Value +character_set_database Value +character_set_filesystem Value +character_set_results Value +character_set_server Value +character_set_system Value +character_sets_dir Value +collation_connection Value +collation_database Value +collation_server Value +completion_type Value +concurrent_insert Value +connect_timeout Value +datadir Value +date_format Value +datetime_format Value +debug Value +debug_sync Value +default_week_format Value +delay_key_write Value +delayed_insert_limit Value +delayed_insert_timeout Value +delayed_queue_size Value +div_precision_increment Value +enable_query_response_time_stats Value +engine_condition_pushdown Value +error_count Value +event_scheduler Value +expire_logs_days Value +fast_index_creation Value +flush Value +flush_time Value +foreign_key_checks Value +ft_boolean_syntax Value +ft_max_word_len Value +ft_min_word_len Value +ft_query_expansion_limit Value +ft_stopword_file Value +general_log Value +general_log_file Value +group_concat_max_len Value +have_community_features Value +have_compress Value +have_crypt Value +have_csv Value +have_dynamic_loading Value +have_geometry Value +have_innodb Value +have_ndbcluster Value +have_openssl Value +have_partitioning Value +have_query_cache Value +have_rtree_keys Value +have_ssl Value +have_symlink Value +hostname Value +identity Value +ignore_builtin_innodb Value +init_connect Value +init_file Value +init_slave Value +innodb_adaptive_checkpoint Value +innodb_adaptive_flushing Value +innodb_adaptive_hash_index Value +innodb_additional_mem_pool_size Value +innodb_autoextend_increment Value +innodb_autoinc_lock_mode Value +innodb_buffer_pool_shm_key Value +innodb_buffer_pool_size Value +innodb_change_buffering Value +innodb_checkpoint_age_target Value +innodb_checksums Value +innodb_commit_concurrency Value +innodb_concurrency_tickets Value +innodb_data_file_path Value +innodb_data_home_dir Value +innodb_dict_size_limit Value +innodb_doublewrite Value +innodb_doublewrite_file Value +innodb_enable_unsafe_group_commit Value +innodb_expand_import Value +innodb_extra_rsegments Value +innodb_extra_undoslots Value +innodb_fast_checksum Value +innodb_fast_recovery Value +innodb_fast_shutdown Value +innodb_file_format Value +innodb_file_format_check Value +innodb_file_per_table Value +innodb_flush_log_at_trx_commit Value +innodb_flush_log_at_trx_commit_session Value +innodb_flush_method Value +innodb_flush_neighbor_pages Value +innodb_force_recovery Value +innodb_ibuf_accel_rate Value +innodb_ibuf_active_contract Value +innodb_ibuf_max_size Value +innodb_io_capacity Value +innodb_lock_wait_timeout Value +innodb_locks_unsafe_for_binlog Value +innodb_log_buffer_size Value +innodb_log_file_size Value +innodb_log_files_in_group Value +innodb_log_group_home_dir Value +innodb_max_dirty_pages_pct Value +innodb_max_purge_lag Value +innodb_mirrored_log_groups Value +innodb_old_blocks_pct Value +innodb_old_blocks_time Value +innodb_open_files Value +innodb_overwrite_relay_log_info Value +innodb_page_size Value +innodb_pass_corrupt_table Value +innodb_read_ahead Value +innodb_read_ahead_threshold Value +innodb_read_io_threads Value +innodb_recovery_stats Value +innodb_replication_delay Value +innodb_rollback_on_timeout Value +innodb_show_locks_held Value +innodb_show_verbose_locks Value +innodb_spin_wait_delay Value +innodb_stats_auto_update Value +innodb_stats_method Value +innodb_stats_on_metadata Value +innodb_stats_sample_pages Value +innodb_stats_update_need_lock Value +innodb_strict_mode Value +innodb_support_xa Value +innodb_sync_spin_loops Value +innodb_table_locks Value +innodb_thread_concurrency Value +innodb_thread_concurrency_timer_based Value +innodb_thread_sleep_delay Value +innodb_use_purge_thread Value +innodb_use_sys_malloc Value +innodb_use_sys_stats_table Value +innodb_version Value +innodb_write_io_threads Value +insert_id Value +interactive_timeout Value +join_buffer_size Value +keep_files_on_create Value +key_buffer_size Value +key_cache_age_threshold Value +key_cache_block_size Value +key_cache_division_limit Value +language Value +large_files_support Value +large_page_size Value +large_pages Value +last_insert_id Value +lc_time_names Value +license Value +local_infile Value +locked_in_memory Value +log Value +log_bin Value +log_bin_trust_function_creators Value +log_bin_trust_routine_creators Value +log_error Value +log_output Value +log_queries_not_using_indexes Value +log_slave_updates Value +log_slow_filter Value +log_slow_queries Value +log_slow_rate_limit Value +log_slow_slave_statements Value +log_slow_sp_statements Value +log_slow_timestamp_every Value +log_slow_verbosity Value +log_warnings Value +long_query_time Value +low_priority_updates Value +lower_case_file_system Value +lower_case_table_names Value +max_allowed_packet Value +max_binlog_cache_size Value +max_binlog_size Value +max_connect_errors Value +max_connections Value +max_delayed_threads Value +max_error_count Value +max_heap_table_size Value +max_insert_delayed_threads Value +max_join_size Value +max_length_for_sort_data Value +max_prepared_stmt_count Value +max_relay_log_size Value +max_seeks_for_key Value +max_sort_length Value +max_sp_recursion_depth Value +max_tmp_tables Value +max_user_connections Value +max_write_lock_count Value +min_examined_row_limit Value +multi_range_count Value +myisam_data_pointer_size Value +myisam_max_sort_file_size Value +myisam_mmap_size Value +myisam_recover_options Value +myisam_repair_threads Value +myisam_sort_buffer_size Value +myisam_stats_method Value +myisam_use_mmap Value +net_buffer_length Value +net_read_timeout Value +net_retry_count Value +net_write_timeout Value +new Value +old Value +old_alter_table Value +old_passwords Value +open_files_limit Value +optimizer_fix Value +optimizer_prune_level Value +optimizer_search_depth Value +optimizer_switch Value +pid_file Value +plugin_dir Value +port Value +preload_buffer_size Value +profiling Value +profiling_history_size Value +profiling_server Value +profiling_use_getrusage Value +protocol_version Value +pseudo_thread_id Value +query_alloc_block_size Value +query_cache_limit Value +query_cache_min_res_unit Value +query_cache_size Value +query_cache_strip_comments Value +query_cache_type Value +query_cache_wlock_invalidate Value +query_prealloc_size Value +query_response_time_range_base Value +rand_seed1 Value +rand_seed2 Value +range_alloc_block_size Value +read_buffer_size Value +read_only Value +read_rnd_buffer_size Value +relay_log Value +relay_log_index Value +relay_log_info_file Value +relay_log_purge Value +relay_log_space_limit Value +report_host Value +report_password Value +report_port Value +report_user Value +rpl_recovery_rank Value +secure_auth Value +secure_file_priv Value +server_id Value +skip_external_locking Value +skip_name_resolve Value +skip_networking Value +skip_show_database Value +slave_compressed_protocol Value +slave_exec_mode Value +slave_load_tmpdir Value +slave_net_timeout Value +slave_skip_errors Value +slave_transaction_retries Value +slow_launch_time Value +slow_query_log Value +slow_query_log_file Value +slow_query_log_microseconds_timestamp Value +socket Value +sort_buffer_size Value +sql_auto_is_null Value +sql_big_selects Value +sql_big_tables Value +sql_buffer_result Value +sql_log_bin Value +sql_log_off Value +sql_log_update Value +sql_low_priority_updates Value +sql_max_join_size Value +sql_mode Value +sql_notes Value +sql_quote_show_create Value +sql_safe_updates Value +sql_select_limit Value +sql_slave_skip_counter Value +sql_warnings Value +ssl_ca Value +ssl_capath Value +ssl_cert Value +ssl_cipher Value +ssl_key Value +storage_engine Value +suppress_log_warning_1592 Value +sync_binlog Value +sync_frm Value +system_time_zone Value +table_definition_cache Value +table_lock_wait_timeout Value +table_open_cache Value +table_type Value +thread_cache_size Value +thread_handling Value +thread_stack Value +thread_statistics Value +time_format Value +time_zone Value +timed_mutexes Value +timestamp Value +tmp_table_size Value +tmpdir Value +transaction_alloc_block_size Value +transaction_prealloc_size Value +tx_isolation Value +unique_checks Value +updatable_views_with_limit Value +use_global_log_slow_control Value +use_global_long_query_time Value +userstat_running Value +version Value +version_comment Value +version_compile_machine Value +version_compile_os Value +wait_timeout Value +warning_count Value diff --git a/mysql-test/suite/percona/percona_server_variables.test b/mysql-test/suite/percona/percona_server_variables.test new file mode 100644 index 00000000000..232cbb15e25 --- /dev/null +++ b/mysql-test/suite/percona/percona_server_variables.test @@ -0,0 +1,7 @@ +--source include/have_innodb.inc +--source include/have_debug.inc + +#check the list of variable names +--replace_column 2 Value +show variables; + diff --git a/mysql-test/suite/percona/percona_slave_innodb_stats-master.opt b/mysql-test/suite/percona/percona_slave_innodb_stats-master.opt new file mode 100644 index 00000000000..286a9c4484d --- /dev/null +++ b/mysql-test/suite/percona/percona_slave_innodb_stats-master.opt @@ -0,0 +1 @@ +--long_query_time=0 --log_slow_verbosity=innodb --log_slow_slave_statements diff --git a/mysql-test/suite/percona/percona_slave_innodb_stats-slave.opt b/mysql-test/suite/percona/percona_slave_innodb_stats-slave.opt new file mode 100644 index 00000000000..286a9c4484d --- /dev/null +++ b/mysql-test/suite/percona/percona_slave_innodb_stats-slave.opt @@ -0,0 +1 @@ +--long_query_time=0 --log_slow_verbosity=innodb --log_slow_slave_statements diff --git a/mysql-test/suite/percona/percona_slave_innodb_stats.result b/mysql-test/suite/percona/percona_slave_innodb_stats.result new file mode 100644 index 00000000000..c2406bdfd85 --- /dev/null +++ b/mysql-test/suite/percona/percona_slave_innodb_stats.result @@ -0,0 +1,21 @@ +stop slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +start slave; +DROP TABLE IF EXISTS t; +CREATE TABLE t(id INT,data CHAR(30)) ENGINE=InnoDB; +INSERT INTO t VALUES +(1,"aaaaabbbbbcccccdddddeeeeefffff"), +(2,"aaaaabbbbbcccccdddddeeeeefffff"), +(3,"aaaaabbbbbcccccdddddeeeeefffff"), +(4,"aaaaabbbbbcccccdddddeeeeefffff"), +(5,"aaaaabbbbbcccccdddddeeeeefffff"); +INSERT INTO t SELECT t2.id,t2.data from t as t1, t as t2; +INSERT INTO t SELECT t2.id,t2.data from t as t1, t as t2; +STOP SLAVE; +START SLAVE; +INSERT INTO t SELECT t.id,t.data from t; +DROP TABLE IF EXISTS t; +4 diff --git a/mysql-test/suite/percona/percona_slave_innodb_stats.test b/mysql-test/suite/percona/percona_slave_innodb_stats.test new file mode 100644 index 00000000000..defcd1fc406 --- /dev/null +++ b/mysql-test/suite/percona/percona_slave_innodb_stats.test @@ -0,0 +1,43 @@ +source include/have_innodb.inc; +source include/master-slave.inc; + +connection master; +-- disable_warnings +DROP TABLE IF EXISTS t; +-- enable_warnings +CREATE TABLE t(id INT,data CHAR(30)) ENGINE=InnoDB; +INSERT INTO t VALUES +(1,"aaaaabbbbbcccccdddddeeeeefffff"), +(2,"aaaaabbbbbcccccdddddeeeeefffff"), +(3,"aaaaabbbbbcccccdddddeeeeefffff"), +(4,"aaaaabbbbbcccccdddddeeeeefffff"), +(5,"aaaaabbbbbcccccdddddeeeeefffff"); +INSERT INTO t SELECT t2.id,t2.data from t as t1, t as t2; +INSERT INTO t SELECT t2.id,t2.data from t as t1, t as t2; +sync_slave_with_master; + +connection slave; +STOP SLAVE; +-- source include/wait_for_slave_to_stop.inc +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +wait +EOF +--shutdown_server 10 +--source include/wait_until_disconnected.inc +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +restart +EOF +--enable_reconnect +--source include/wait_until_connected_again.inc +START SLAVE; +-- source include/wait_for_slave_to_start.inc + +connection master; +INSERT INTO t SELECT t.id,t.data from t; +sync_slave_with_master; + +connection master; +DROP TABLE IF EXISTS t; +sync_slave_with_master; + +exec cat var/mysqld.2/mysqld-slow.log | grep InnoDB_IO_r_ops | wc -l; diff --git a/mysql-test/suite/percona/percona_slow_query_log-use_global_long_query_time.result b/mysql-test/suite/percona/percona_slow_query_log-use_global_long_query_time.result index 759f0636b5c..e294525fdd2 100644 --- a/mysql-test/suite/percona/percona_slow_query_log-use_global_long_query_time.result +++ b/mysql-test/suite/percona/percona_slow_query_log-use_global_long_query_time.result @@ -22,3 +22,38 @@ set global long_query_time=2; set global use_global_long_query_time=0; cat MYSQLD_DATADIR/percona_slow_query_log-use_global_long_query_time.log | grep -c Query_time 3 +show global variables like 'use_global_log_slow_control'; +Variable_name Value +use_global_log_slow_control none +show global variables like 'use_global_long_query_time'; +Variable_name Value +use_global_long_query_time OFF +set global use_global_log_slow_control = long_query_time; +show global variables like 'use_global_log_slow_control'; +Variable_name Value +use_global_log_slow_control long_query_time +show global variables like 'use_global_long_query_time'; +Variable_name Value +use_global_long_query_time ON +set global use_global_log_slow_control = log_slow_filter; +show global variables like 'use_global_log_slow_control'; +Variable_name Value +use_global_log_slow_control log_slow_filter +show global variables like 'use_global_long_query_time'; +Variable_name Value +use_global_long_query_time OFF +set global use_global_long_query_time = ON; +show global variables like 'use_global_log_slow_control'; +Variable_name Value +use_global_log_slow_control log_slow_filter,long_query_time +show global variables like 'use_global_long_query_time'; +Variable_name Value +use_global_long_query_time ON +set global use_global_long_query_time = OFF; +show global variables like 'use_global_log_slow_control'; +Variable_name Value +use_global_log_slow_control log_slow_filter +show global variables like 'use_global_long_query_time'; +Variable_name Value +use_global_long_query_time OFF +set global use_global_log_slow_control = none; diff --git a/mysql-test/suite/percona/percona_slow_query_log-use_global_long_query_time.test b/mysql-test/suite/percona/percona_slow_query_log-use_global_long_query_time.test index 70021da1e4c..da627ae72b0 100644 --- a/mysql-test/suite/percona/percona_slow_query_log-use_global_long_query_time.test +++ b/mysql-test/suite/percona/percona_slow_query_log-use_global_long_query_time.test @@ -20,3 +20,24 @@ let $cmd = cat $MYSQLD_DATADIR/percona_slow_query_log-use_global_long_query_time --replace_result $MYSQLD_DATADIR MYSQLD_DATADIR exec echo '$cmd'; exec $cmd; + +show global variables like 'use_global_log_slow_control'; +show global variables like 'use_global_long_query_time'; + +set global use_global_log_slow_control = long_query_time; +show global variables like 'use_global_log_slow_control'; +show global variables like 'use_global_long_query_time'; + +set global use_global_log_slow_control = log_slow_filter; +show global variables like 'use_global_log_slow_control'; +show global variables like 'use_global_long_query_time'; + +set global use_global_long_query_time = ON; +show global variables like 'use_global_log_slow_control'; +show global variables like 'use_global_long_query_time'; + +set global use_global_long_query_time = OFF; +show global variables like 'use_global_log_slow_control'; +show global variables like 'use_global_long_query_time'; + +set global use_global_log_slow_control = none; diff --git a/mysql-test/suite/percona/percona_sql_no_fcache.result b/mysql-test/suite/percona/percona_sql_no_fcache.result new file mode 100644 index 00000000000..bc1413fb96d --- /dev/null +++ b/mysql-test/suite/percona/percona_sql_no_fcache.result @@ -0,0 +1,12 @@ +drop table if exists t1; +create table t (a int not null); +insert into t values (1),(2),(3); +SELECT SQL_NO_FCACHE SLEEP(0); +SLEEP(0) +0 +SELECT /*!40001 SQL_NO_CACHE */ /*!50084 SQL_NO_FCACHE */ * FROM t; +a +1 +2 +3 +DROP TABLE t; diff --git a/mysql-test/suite/percona/percona_sql_no_fcache.test b/mysql-test/suite/percona/percona_sql_no_fcache.test new file mode 100644 index 00000000000..da0c2ecef7d --- /dev/null +++ b/mysql-test/suite/percona/percona_sql_no_fcache.test @@ -0,0 +1,11 @@ +--disable_warnings +drop table if exists t1; +--enable_warnings + +create table t (a int not null); +insert into t values (1),(2),(3); + +SELECT SQL_NO_FCACHE SLEEP(0); +SELECT /*!40001 SQL_NO_CACHE */ /*!50084 SQL_NO_FCACHE */ * FROM t; + +DROP TABLE t;
\ No newline at end of file diff --git a/mysql-test/suite/percona/percona_status_wait_query_cache_mutex.result b/mysql-test/suite/percona/percona_status_wait_query_cache_mutex.result new file mode 100644 index 00000000000..348dcef4d30 --- /dev/null +++ b/mysql-test/suite/percona/percona_status_wait_query_cache_mutex.result @@ -0,0 +1,27 @@ +set GLOBAL query_cache_size=1355776; +flush query cache; +flush query cache; +reset query cache; +flush status; +DROP TABLE IF EXISTS t; +CREATE TABLE t(id INT, number INT); +INSERT INTO t VALUES (0,1); +INSERT INTO t VALUES (1,2); +INSERT INTO t VALUES (2,3); +SELECT number from t where id > 0; +number +2 +3 +SET SESSION debug="+d,status_wait_query_cache_mutex_sleep"; +SELECT number from t where id > 0; +SET SESSION debug="+d,status_wait_query_cache_mutex_sleep"; +SELECT number from t where id > 0; +SET SESSION debug="+d,status_wait_query_cache_mutex_sleep"; +SHOW PROCESSLIST; +Id User Host db Command Time State Info +Id root localhost test Sleep Time NULL +Id root localhost test Query Time Waiting on query cache mutex SELECT number from t where id > 0 +Id root localhost test Query Time Waiting on query cache mutex SELECT number from t where id > 0 +Id root localhost test Query Time NULL SHOW PROCESSLIST +DROP TABLE t; +set GLOBAL query_cache_size=0; diff --git a/mysql-test/suite/percona/percona_status_wait_query_cache_mutex.test b/mysql-test/suite/percona/percona_status_wait_query_cache_mutex.test new file mode 100644 index 00000000000..64a9fe7db71 --- /dev/null +++ b/mysql-test/suite/percona/percona_status_wait_query_cache_mutex.test @@ -0,0 +1,37 @@ +--source include/have_query_cache.inc +--source include/have_debug.inc +set GLOBAL query_cache_size=1355776; +--source include/percona_query_cache_with_comments_clear.inc + +-- disable_warnings +DROP TABLE IF EXISTS t; +-- enable_warnings +CREATE TABLE t(id INT, number INT); +INSERT INTO t VALUES (0,1); +INSERT INTO t VALUES (1,2); +INSERT INTO t VALUES (2,3); +SELECT number from t where id > 0; +--connect (conn0,localhost,root,,) +--connect (conn1,localhost,root,,) +--connect (conn2,localhost,root,,) + +--connection conn0 +--error 0, ER_UNKNOWN_SYSTEM_VARIABLE +SET SESSION debug="+d,status_wait_query_cache_mutex_sleep"; +SEND SELECT number from t where id > 0; +SLEEP 1.0; + +--connection conn1 +--error 0, ER_UNKNOWN_SYSTEM_VARIABLE +SET SESSION debug="+d,status_wait_query_cache_mutex_sleep"; +SEND SELECT number from t where id > 0; +SLEEP 1.0; + +--connection conn2 +--error 0, ER_UNKNOWN_SYSTEM_VARIABLE +SET SESSION debug="+d,status_wait_query_cache_mutex_sleep"; +--replace_column 1 Id 6 Time +SHOW PROCESSLIST; + +DROP TABLE t; +set GLOBAL query_cache_size=0;
\ No newline at end of file diff --git a/mysql-test/suite/percona/percona_xtradb_bug317074.test b/mysql-test/suite/percona/percona_xtradb_bug317074.test index 91c59cefb73..abf5d8ee1f6 100644 --- a/mysql-test/suite/percona/percona_xtradb_bug317074.test +++ b/mysql-test/suite/percona/percona_xtradb_bug317074.test @@ -22,15 +22,18 @@ CREATE PROCEDURE insert_many(p1 int) BEGIN SET @x = 0; SET @y = 0; +start transaction; REPEAT insert into test1 set b=1; SET @x = @x + 1; SET @y = @y + 1; IF @y >= 1000 THEN commit; + start transaction; SET @y = 0; END IF; UNTIL @x >= p1 END REPEAT; +commit; END| delimiter ;| call insert_many(100000); diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog index bc69aaca96a..5ebcf1e87a2 100644 --- a/storage/xtradb/ChangeLog +++ b/storage/xtradb/ChangeLog @@ -1,3 +1,128 @@ +2010-06-24 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54679 alter table causes compressed row_format to revert + to compact + +2010-06-22 The InnoDB Team + + * dict/dict0dict.c, dict/dict0mem.c, include/dict0mem.h, + include/univ.i, page/page0zip.c, row/row0merge.c: + Fix Bug#47991 InnoDB Dictionary Cache memory usage increases + indefinitely when renaming tables + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54686: "field->col->mtype == type" assertion error at + row/row0sel.c + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc, innodb_bug54044.result, innodb_bug54044.test: + Fix Bug#54044 Create temporary tables and using innodb crashes. + +2010-06-21 The InnoDB Team + + * dict/dict0load.c, fil/fil0fil.c: + Fix Bug#54658: InnoDB: Warning: allocated tablespace %lu, + old maximum was 0 (introduced in Bug #53578 fix) + +2010-06-16 The InnoDB Team + + * row/row0merge.c: + Fix Bug#54330 Broken fast index creation + +2010-06-10 The InnoDB Team + + * include/log0log.ic, row/row0ins.c, row/row0purge.c, + row/row0uins.c, row/row0umod.c, row/row0upd.c: + Fix Bug#39168 ERROR: the age of the last checkpoint ... exceeds + the log group capacity + +2010-06-08 The InnoDB Team + + * dict/dict0load.c: + Fix Bug#54009 Server crashes when data is selected from non backed + up table for InnoDB plugin + +2010-06-02 The InnoDB Team + + * include/db0err.h, include/lock0lock.h, include/row0mysql.h, + lock/lock0lock.c, row/row0ins.c, row/row0mysql.c, row/row0sel.c: + Fix Bug#53674 InnoDB: Error: unlock row could not find a + 4 mode lock on the record + +2010-06-01 The InnoDB Team + + * include/sync0rw.h, sync/sync0rw.c: + Fix Bug#48197 Concurrent rw_lock_free may cause assertion failure + +2010-06-01 The InnoDB Team + + * row/row0umod.c: + Fix Bug#53812 assert row/row0umod.c line 660 in txn rollback + after crash recovery + +2010-05-25 The InnoDB Team + + * handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c: + Fix Bug#53592: crash replacing duplicates into table after fast + alter table added unique key + +2010-05-24 The InnoDB Team + + * dict/dict0boot.c, dict/dict0crea.c, fil/fil0fil.c, + include/dict0boot.h, include/fil0fil.h, row/row0mysql.c: + Fix Bug#53578: assert on invalid page access, in fil_io() + +2010-05-14 The InnoDB Team + * mysql-test/innodb_bug48024.test, mysql-test/innodb_bug48024.result, + dict/dict0dict.c, handler/ha_innodb.cc, handler/ha_innodb.h, + include/dict0dict.h, include/ha_prototypes.h, include/row0mysql.h, + include/trx0trx.h, row/row0mysql.c, trx/trx0i_s.c, trx/trx0trx.c: + Fix Bug#48024 Innodb doesn't work with multi-statements + Fix Bug#53644 InnoDB thinks that /*/ starts and ends a comment + +2010-05-12 The InnoDB Team + + * handler/handler0alter.cc: + Fix Bug#53591 crash with fast alter table and text/blob prefix + primary key + +2010-05-12 The InnoDB Team + + * row/row0merge.c: + Fix Bug#53471 row_merge_drop_temp_indexes() refers freed memory, SEGVs + +2010-05-11 The InnoDB Team + + * mysql-test/innodb_bug53290.test, mysql-test/innodb_bug53290.result, + include/rem0cmp.h, rem/rem0cmp.c, row/row0merge.c: + Fix Bug#53290 wrong duplicate key error when adding a unique index + via fast alter table + +2010-05-11 The InnoDB Team + * buf/buf0lru.c, include/buf0buf.ic: + Fix Bug#53307 valgrind: warnings in main.partition_innodb_plugin + +2010-05-05 The InnoDB Team + + * row/row0merge.c: + Fix Bug#53256 in a stress test, assert dict/dict0dict.c:815 + table2 == NULL + +2010-05-05 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#53165 Setting innodb_change_buffering=DEFAULT produces + incorrect result + +2010-05-04 The InnoDB Team + + * fsp/fsp0fsp.c: + Fix Bug#53306 valgrind: warnings in innodb.innodb + 2010-05-03 The InnoDB Team * buf0buf.c: @@ -48,12 +173,6 @@ Only check the record size at index creation time when innodb_strict_mode is set or when ROW_FORMAT is DYNAMIC or COMPRESSED. -2010-04-20 The InnoDB Team - - * btr/btr0btr.c, include/univ.i: - Implement UNIV_BTR_AVOID_COPY, for avoiding writes when a B-tree - node is split at the first or last record. - 2010-04-15 The InnoDB Team * trx/trx0rec.c: @@ -72,6 +191,10 @@ * mysql-test/innodb_bug38231.test: Remove non-determinism in the test case. +2010-03-29 The InnoDB Team + + InnoDB Plugin 1.0.7 released + 2010-03-18 The InnoDB Team * CMakeLists.txt: @@ -194,6 +317,14 @@ Fix Bug#49497 Error 1467 (ER_AUTOINC_READ_FAILED) on inserting a negative value +2010-01-28 The InnoDB Team + * handler/ha_innodb.h, handler/ha_innodb.cc, + handler/handler0alter.cc, + mysql-test/innodb_bug47622.test, + mysql-test/innodb_bug47622.result: + Fix Bug#47622 the new index is added before the existing ones + in MySQL, but after one in SE + 2010-01-27 The InnoDB Team * include/row0mysql.h, log/log0recv.c, row/row0mysql.c: diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c index 12e57dcc490..ff047095aa4 100644 --- a/storage/xtradb/btr/btr0btr.c +++ b/storage/xtradb/btr/btr0btr.c @@ -2030,6 +2030,7 @@ func_start: goto insert_empty; } } else if (UNIV_UNLIKELY(insert_left)) { + ut_a(n_iterations > 0); first_rec = page_rec_get_next(page_get_infimum_rec(page)); move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); } else { @@ -2076,17 +2077,7 @@ insert_empty: } /* 5. Move then the records to the new page */ - if (direction == FSP_DOWN -#ifdef UNIV_BTR_AVOID_COPY - && page_rec_is_supremum(move_limit)) { - /* Instead of moving all records, make the new page - the empty page. */ - - left_block = block; - right_block = new_block; - } else if (direction == FSP_DOWN -#endif /* UNIV_BTR_AVOID_COPY */ - ) { + if (direction == FSP_DOWN) { /* fputs("Split left\n", stderr); */ if (0 @@ -2129,14 +2120,6 @@ insert_empty: right_block = block; lock_update_split_left(right_block, left_block); -#ifdef UNIV_BTR_AVOID_COPY - } else if (!split_rec) { - /* Instead of moving all records, make the new page - the empty page. */ - - left_block = new_block; - right_block = block; -#endif /* UNIV_BTR_AVOID_COPY */ } else { /* fputs("Split right\n", stderr); */ diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index 0e75b4e8442..9b87d969a64 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -2136,9 +2136,8 @@ any_extern: err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr, mtr, &roll_ptr); if (err != DB_SUCCESS) { -err_exit: - mem_heap_free(heap); - return(err); + + goto err_exit; } /* Ok, we may do the replacement. Store on the page infimum the @@ -2184,9 +2183,10 @@ err_exit: page_cur_move_to_next(page_cursor); + err = DB_SUCCESS; +err_exit: mem_heap_free(heap); - - return(DB_SUCCESS); + return(err); } /*************************************************************//** @@ -4259,6 +4259,8 @@ btr_store_big_rec_extern_fields( field_ref += local_len; } extern_len = big_rec_vec->fields[i].len; + UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data, + extern_len); ut_a(extern_len > 0); @@ -4895,6 +4897,7 @@ btr_copy_blob_prefix( mtr_commit(&mtr); if (page_no == FIL_NULL || copy_len != part_len) { + UNIV_MEM_ASSERT_RW(buf, copied_len); return(copied_len); } @@ -5078,6 +5081,7 @@ btr_copy_externally_stored_field_prefix_low( space_id, page_no, offset); inflateEnd(&d_stream); mem_heap_free(heap); + UNIV_MEM_ASSERT_RW(buf, d_stream.total_out); return(d_stream.total_out); } else { return(btr_copy_blob_prefix(buf, len, space_id, diff --git a/storage/xtradb/btr/btr0sea.c b/storage/xtradb/btr/btr0sea.c index 61909903a67..36dadd47e69 100644 --- a/storage/xtradb/btr/btr0sea.c +++ b/storage/xtradb/btr/btr0sea.c @@ -182,6 +182,7 @@ void btr_search_sys_free(void) /*=====================*/ { + rw_lock_free(&btr_search_latch); mem_free(btr_search_latch_temp); btr_search_latch_temp = NULL; mem_heap_free(btr_search_sys->hash_index->heap); diff --git a/storage/xtradb/buf/buf0buddy.c b/storage/xtradb/buf/buf0buddy.c index e4a79026d3a..8ce2d1888ef 100644 --- a/storage/xtradb/buf/buf0buddy.c +++ b/storage/xtradb/buf/buf0buddy.c @@ -490,11 +490,15 @@ buf_buddy_relocate( pool), so there is nothing wrong about this. The mach_read_from_4() calls here will only trigger bogus Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */ - bpage = buf_page_hash_get( - mach_read_from_4((const byte*) src - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID), - mach_read_from_4((const byte*) src - + FIL_PAGE_OFFSET)); + ulint space = mach_read_from_4( + (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + ulint page_no = mach_read_from_4( + (const byte*) src + FIL_PAGE_OFFSET); + /* Suppress Valgrind warnings about conditional jump + on uninitialized value. */ + UNIV_MEM_VALID(&space, sizeof space); + UNIV_MEM_VALID(&page_no, sizeof page_no); + bpage = buf_page_hash_get(space, page_no); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index a5a9e1d9004..dd98da19a21 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "trx0trx.h" #include "srv0start.h" +#include "que0que.h" +#include "read0read.h" +#include "row0row.h" +#include "ha_prototypes.h" /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); @@ -78,9 +82,9 @@ inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) block_hash_byte = block_hash >> 3; block_hash_offset = (byte) block_hash & 0x07; if (block_hash_byte >= DPAH_SIZE) - fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", (unsigned long) block_hash_byte, (unsigned long) block_hash_offset); + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); if (block_hash_offset > 7) - fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", (unsigned long) block_hash_byte, (unsigned long) block_hash_offset); + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) trx->distinct_page_access++; trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; @@ -310,14 +314,30 @@ read-ahead or flush occurs */ UNIV_INTERN ibool buf_debug_prints = FALSE; #endif /* UNIV_DEBUG */ -/** A chunk of buffers. The buffer pool is allocated in chunks. */ -struct buf_chunk_struct{ - ulint mem_size; /*!< allocated size of the chunk */ - ulint size; /*!< size of frames[] and blocks[] */ - void* mem; /*!< pointer to the memory area which - was allocated for the frames */ - buf_block_t* blocks; /*!< array of buffer control blocks */ +/* Buffer pool shared memory segment information */ +typedef struct buf_shm_info_struct buf_shm_info_t; + +struct buf_shm_info_struct { + char head_str[8]; + ulint binary_id; + ibool is_new; /* during initializing */ + ibool clean; /* clean shutdowned and free */ + ibool reusable; /* reusable */ + ulint buf_pool_size; /* backup value */ + ulint page_size; /* backup value */ + ulint frame_offset; /* offset of the first frame based on chunk->mem */ + ulint zip_hash_offset; + ulint zip_hash_n; + + ulint checksum; + + buf_pool_t buf_pool_backup; + buf_chunk_t chunk_backup; + + ib_uint64_t dummy; }; + +#define BUF_SHM_INFO_HEAD "XTRA_SHM" #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** @@ -764,6 +784,45 @@ buf_block_init( #endif /* UNIV_SYNC_DEBUG */ } +static +void +buf_block_reuse( +/*============*/ + buf_block_t* block, + ptrdiff_t frame_offset) +{ + /* block_init */ + block->frame = ((void*)(block->frame) + frame_offset); + + UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block); + + block->index = NULL; + +#ifdef UNIV_DEBUG + /* recreate later */ + block->page.in_page_hash = FALSE; + block->page.in_zip_hash = FALSE; +#endif /* UNIV_DEBUG */ + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + block->n_pointers = 0; +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + + if (block->page.zip.data) + block->page.zip.data = ((void*)(block->page.zip.data) + frame_offset); + + block->is_hashed = FALSE; + + mutex_create(&block->mutex, SYNC_BUF_BLOCK); + + rw_lock_create(&block->lock, SYNC_LEVEL_VARYING); + ut_ad(rw_lock_validate(&(block->lock))); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK); +#endif /* UNIV_SYNC_DEBUG */ +} + /********************************************************************//** Allocates a chunk of buffer frames. @return chunk, or NULL on failure */ @@ -776,26 +835,167 @@ buf_chunk_init( { buf_block_t* block; byte* frame; + ulint zip_hash_n = 0; + ulint zip_hash_mem_size = 0; + hash_table_t* zip_hash_tmp = NULL; ulint i; + buf_shm_info_t* shm_info = NULL; /* Round down to a multiple of page size, although it already should be. */ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); + + if (srv_buffer_pool_shm_key) { + /* zip_hash size */ + zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2; + zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + } + /* Reserve space for the block descriptors. */ mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block) + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + mem_size += ut_2pow_round(sizeof(buf_shm_info_t) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + mem_size += zip_hash_mem_size; + } chunk->mem_size = mem_size; + + if (srv_buffer_pool_shm_key) { + ulint binary_id; + ibool is_new; + + ut_a(buf_pool->n_chunks == 1); + + fprintf(stderr, + "InnoDB: Notice: innodb_buffer_pool_shm_key option is specified.\n" + "InnoDB: This option may not be safe to keep consistency of datafiles.\n" + "InnoDB: Because InnoDB cannot lock datafiles when shutdown until reusing shared memory segment.\n" + "InnoDB: You should ensure no change of InnoDB files while using innodb_buffer_pool_shm_key.\n"); + + /* FIXME: This is vague id still */ + binary_id = (ulint) ((void*)mtr_commit - (void*)btr_root_get) + + (ulint) ((void*)os_get_os_version - (void*)buf_calc_page_new_checksum) + + (ulint) ((void*)page_dir_find_owner_slot - (void*)dfield_data_is_binary_equal) + + (ulint) ((void*)que_graph_publish - (void*)dict_casedn_str) + + (ulint) ((void*)read_view_oldest_copy_or_open_new - (void*)fil_space_get_version) + + (ulint) ((void*)rec_get_n_extern_new - (void*)fsp_get_size_low) + + (ulint) ((void*)row_get_trx_id_offset - (void*)ha_create_func) + + (ulint) ((void*)srv_set_io_thread_op_info - (void*)thd_is_replication_slave_thread) + + (ulint) ((void*)mutex_create_func - (void*)ibuf_inside) + + (ulint) ((void*)trx_set_detailed_error - (void*)lock_check_trx_id_sanity) + + (ulint) ((void*)ut_time - (void*)mem_heap_strdup); + + chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new); + + if (UNIV_UNLIKELY(chunk->mem == NULL)) { + return(NULL); + } + +#ifdef UNIV_SET_MEM_TO_ZERO + if (is_new) { + memset(chunk->mem, '\0', chunk->mem_size); + } +#endif + + shm_info = chunk->mem; + + zip_hash_tmp = (hash_table_t*)((void*)chunk->mem + chunk->mem_size - zip_hash_mem_size); + + if (is_new) { + strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8); + shm_info->binary_id = binary_id; + shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */ + shm_info->buf_pool_size = srv_buf_pool_size; + shm_info->page_size = srv_page_size; + shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size; + shm_info->zip_hash_n = zip_hash_n; + } else { + ulint checksum; + + if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n"); + return(NULL); + } + if (shm_info->binary_id != binary_id) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for this binary.\n"); + return(NULL); + } + if (shm_info->is_new) { + fprintf(stderr, + "InnoDB: Error: The shared memory was not initialized yet.\n"); + return(NULL); + } + if (!shm_info->clean) { + fprintf(stderr, + "InnoDB: Error: The shared memory was not shut down cleanly.\n"); + return(NULL); + } + if (!shm_info->reusable) { + fprintf(stderr, + "InnoDB: Error: The shared memory has unrecoverable contents.\n"); + return(NULL); + } + if (shm_info->buf_pool_size != srv_buf_pool_size) { + fprintf(stderr, + "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n", + shm_info->buf_pool_size, srv_buf_pool_size); + return(NULL); + } + if (shm_info->page_size != srv_page_size) { + fprintf(stderr, + "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n", + shm_info->page_size, srv_page_size); + return(NULL); + } + + ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size); + ut_a(shm_info->zip_hash_n == zip_hash_n); + + /* check checksum */ + checksum = ut_fold_binary(chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + if (shm_info->checksum != checksum) { + fprintf(stderr, + "InnoDB: Error: checksum of the shared memory is not match. " + "(stored=%lu calculated=%lu)\n", + shm_info->checksum, checksum); + return(NULL); + } + + /* flag to use the segment. */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + } + + /* init zip_hash contents */ + if (is_new) { + hash_create_init(zip_hash_tmp, zip_hash_n); + } else { + /* adjust offset is done later */ + hash_create_reuse(zip_hash_tmp); + } + } else { chunk->mem = os_mem_alloc_large(&chunk->mem_size); if (UNIV_UNLIKELY(chunk->mem == NULL)) { return(NULL); } + } /* Allocate the block descriptors from the start of the memory block. */ + if (srv_buffer_pool_shm_key) { + chunk->blocks = chunk->mem + sizeof(buf_shm_info_t); + } else { chunk->blocks = chunk->mem; + } /* Align a pointer to the first frame. Note that when os_large_page_size is smaller than UNIV_PAGE_SIZE, @@ -803,8 +1003,13 @@ buf_chunk_init( it is bigger, we may allocate more blocks than requested. */ frame = ut_align(chunk->mem, UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + /* reserve zip_hash space and always -1 for reproductibity */ + chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1; + } else { chunk->size = chunk->mem_size / UNIV_PAGE_SIZE - (frame != chunk->mem); + } /* Subtract the space needed for block descriptors. */ { @@ -818,6 +1023,98 @@ buf_chunk_init( chunk->size = size; } + if (shm_info && !(shm_info->is_new)) { + /* convert the shared memory segment for reuse */ + ptrdiff_t phys_offset; + ptrdiff_t logi_offset; + ptrdiff_t blocks_offset; + void* previous_frame_address; + + if (chunk->size < shm_info->chunk_backup.size) { + fprintf(stderr, + "InnoDB: Error: The buffer pool became smaller because of allocated address.\n" + "InnoDB: Retrying may avoid this situation.\n"); + shm_info->clean = TRUE; /* release the flag for retrying */ + return(NULL); + } + + chunk->size = shm_info->chunk_backup.size; + phys_offset = (void*)frame - (void*)((void*)chunk->mem + shm_info->frame_offset); + logi_offset = (void*)frame - (void*)chunk->blocks[0].frame; + previous_frame_address = chunk->blocks[0].frame; + blocks_offset = (void*)chunk->blocks - (void*)shm_info->chunk_backup.blocks; + + if (phys_offset || logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment should be converted.\n" + "InnoDB: Previous frames in address : %p\n" + "InnoDB: Previous frames were located : %p\n" + "InnoDB: Current frames should be located: %p\n" + "InnoDB: Pysical offset : %ld (%#lx)\n" + "InnoDB: Logical offset (frames) : %ld (%#lx)\n" + "InnoDB: Logical offset (blocks) : %ld (%#lx)\n", + (void*)((void*)chunk->mem + shm_info->frame_offset), + (void*)chunk->blocks[0].frame, (void*)frame, + phys_offset, phys_offset, logi_offset, logi_offset, + blocks_offset, blocks_offset); + } else { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n"); + } + + if (phys_offset) { + fprintf(stderr, + "InnoDB: Aligning physical offset..."); + + memmove((void*)frame, (void*)((void*)chunk->mem + shm_info->frame_offset), + chunk->size * UNIV_PAGE_SIZE); + + fprintf(stderr, + " Done.\n"); + } + + if (logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Aligning logical offset..."); + + /* buf_block_t */ + block = chunk->blocks; + + for (i = chunk->size; i--; ) { + buf_block_reuse(block, logi_offset); + block++; + } + + /* buf_pool_t buf_pool_backup */ + UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU, + previous_frame_address, logi_offset, blocks_offset); + if (shm_info->buf_pool_backup.LRU_old) + shm_info->buf_pool_backup.LRU_old = + ((void*)(shm_info->buf_pool_backup.LRU_old) + + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address) + ? logi_offset : blocks_offset)); + + UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU, + previous_frame_address, logi_offset, blocks_offset); + + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean, + previous_frame_address, logi_offset, blocks_offset); + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i], + previous_frame_address, logi_offset, blocks_offset); + } + + HASH_OFFSET(zip_hash_tmp, buf_page_t, hash, + previous_frame_address, logi_offset, blocks_offset); + + fprintf(stderr, + " Done.\n"); + } + } else { /* Init block structs and assign frames for them. Then we assign the frames to the first blocks (we already mapped the memory above). */ @@ -841,6 +1138,11 @@ buf_chunk_init( block++; frame += UNIV_PAGE_SIZE; } + } + + if (shm_info) { + shm_info->frame_offset = (void*)chunk->blocks[0].frame - (void*)chunk->mem; + } return(chunk); } @@ -940,6 +1242,11 @@ buf_chunk_not_freed( ready = buf_flush_ready_for_replace(&block->page); mutex_exit(&block->mutex); + if (block->page.is_corrupt) { + /* corrupt page may remain, it can be skipped */ + break; + } + if (!ready) { return(block); @@ -1017,6 +1324,8 @@ buf_chunk_free( UNIV_MEM_UNDESC(block); } + ut_a(!srv_buffer_pool_shm_key); + os_mem_free_large(chunk->mem, chunk->mem_size); } @@ -1066,7 +1375,10 @@ buf_pool_init(void) srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE; buf_pool->page_hash = hash_create(2 * buf_pool->curr_size); + /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */ + if (!srv_buffer_pool_shm_key) { buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); + } buf_pool->last_printout_time = time(NULL); @@ -1081,6 +1393,86 @@ buf_pool_init(void) --------------------------- */ /* All fields are initialized by mem_zalloc(). */ + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a(chunk->blocks == chunk->mem + sizeof(buf_shm_info_t)); + shm_info = chunk->mem; + + buf_pool->zip_hash = (hash_table_t*)((void*)chunk->mem + shm_info->zip_hash_offset); + + if(shm_info->is_new) { + shm_info->is_new = FALSE; /* initialization was finished */ + } else { + buf_block_t* block = chunk->blocks; + buf_page_t* b; + + /* shm_info->buf_pool_backup should be converted */ + /* at buf_chunk_init(). So copy simply. */ + buf_pool->flush_list = shm_info->buf_pool_backup.flush_list; + buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock; + buf_pool->free = shm_info->buf_pool_backup.free; + buf_pool->LRU = shm_info->buf_pool_backup.LRU; + buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old; + buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len; + buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU; + buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean; + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i]; + } + + for (i = 0; i < chunk->size; i++, block++) { + if (buf_block_get_state(block) + == BUF_BLOCK_FILE_PAGE) { + ut_d(block->page.in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold( + block->page.space, + block->page.offset), + &block->page); + } + } + + for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; + b = UT_LIST_GET_NEXT(zip_list, b)) { + ut_ad(!b->in_flush_list); + ut_ad(b->in_LRU_list); + + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, b->offset), b); + } + + for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; + b = UT_LIST_GET_NEXT(flush_list, b)) { + ut_ad(b->in_flush_list); + ut_ad(b->in_LRU_list); + + switch (buf_page_get_state(b)) { + case BUF_BLOCK_ZIP_DIRTY: + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, + b->offset), b); + break; + case BUF_BLOCK_FILE_PAGE: + /* uncompressed page */ + break; + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + ut_error; + break; + } + } + + + } + } + mutex_exit(&LRU_list_mutex); rw_lock_x_unlock(&page_hash_latch); buf_pool_mutex_exit(); @@ -1105,6 +1497,30 @@ buf_pool_free(void) buf_chunk_t* chunk; buf_chunk_t* chunks; + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a(buf_pool->n_chunks == 1); + + chunk = buf_pool->chunks; + shm_info = chunk->mem; + ut_a(chunk->blocks == chunk->mem + sizeof(buf_shm_info_t)); + + /* validation the shared memory segment doesn't have unrecoverable contents. */ + /* Currently, validation became not needed */ + shm_info->reusable = TRUE; + + memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t)); + memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t)); + + if (srv_fast_shutdown < 2) { + shm_info->checksum = ut_fold_binary(chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + shm_info->clean = TRUE; + } + + os_shm_free(chunk->mem, chunk->mem_size); + } else { chunks = buf_pool->chunks; chunk = chunks + buf_pool->n_chunks; @@ -1113,10 +1529,13 @@ buf_pool_free(void) would fail at shutdown. */ os_mem_free_large(chunk->mem, chunk->mem_size); } + } mem_free(buf_pool->chunks); hash_table_free(buf_pool->page_hash); + if (!srv_buffer_pool_shm_key) { hash_table_free(buf_pool->zip_hash); + } mem_free(buf_pool); buf_pool = NULL; } @@ -1311,6 +1730,11 @@ try_again: //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); + if (srv_buffer_pool_shm_key) { + /* Cannot support shrink */ + goto func_done; + } + shrink_again: if (buf_pool->n_chunks <= 1) { @@ -1554,6 +1978,11 @@ void buf_pool_resize(void) /*=================*/ { + if (srv_buffer_pool_shm_key) { + /* Cannot support resize */ + return; + } + //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -2458,7 +2887,7 @@ wait_until_unfixed: block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); - rw_lock_x_lock(&block->lock); + rw_lock_x_lock_func(&block->lock, 0, file, line); UNIV_MEM_INVALID(bpage, sizeof *bpage); diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c index 17588475bbf..0a03d583549 100644 --- a/storage/xtradb/buf/buf0flu.c +++ b/storage/xtradb/buf/buf0flu.c @@ -257,6 +257,17 @@ buf_flush_insert_into_flush_list( ut_d(block->page.in_flush_list = TRUE); UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page); +#ifdef UNIV_DEBUG_VALGRIND + { + ulint zip_size = buf_block_get_zip_size(block); + + if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); + } else { + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); + } + } +#endif /* UNIV_DEBUG_VALGRIND */ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(buf_flush_validate_low()); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ @@ -286,6 +297,18 @@ buf_flush_insert_sorted_into_flush_list( ut_ad(!block->page.in_flush_list); ut_d(block->page.in_flush_list = TRUE); +#ifdef UNIV_DEBUG_VALGRIND + { + ulint zip_size = buf_block_get_zip_size(block); + + if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); + } else { + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); + } + } +#endif /* UNIV_DEBUG_VALGRIND */ + prev_b = NULL; /* For the most part when this function is called the flush_rbt @@ -830,6 +853,7 @@ try_again: zip_size = buf_page_get_zip_size(bpage); if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size); /* Copy the compressed page and clear the rest. */ memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, @@ -839,6 +863,8 @@ try_again: + zip_size, 0, UNIV_PAGE_SIZE - zip_size); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame, + UNIV_PAGE_SIZE); memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, @@ -1533,6 +1559,7 @@ retry: } else if (!have_LRU_mutex) { /* confirm it again with LRU_mutex for exactness */ have_LRU_mutex = TRUE; + distance = 0; goto retry; } diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index 7b01c4aec50..14ec1720873 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -1455,7 +1455,7 @@ buf_LRU_make_block_old( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +NOTE: If this function returns BUF_LRU_FREED, it will temporarily release buf_pool_mutex. Furthermore, the page frame will no longer be accessible via bpage. diff --git a/storage/xtradb/dict/dict0boot.c b/storage/xtradb/dict/dict0boot.c index 0a713f0deaa..43cfced65a0 100644 --- a/storage/xtradb/dict/dict0boot.c +++ b/storage/xtradb/dict/dict0boot.c @@ -62,32 +62,47 @@ dict_hdr_get( } /**********************************************************************//** -Returns a new table, index, or tree id. -@return the new id */ +Returns a new table, index, or space id. */ UNIV_INTERN -dulint +void dict_hdr_get_new_id( /*================*/ - ulint type) /*!< in: DICT_HDR_ROW_ID, ... */ + dulint* table_id, /*!< out: table id (not assigned if NULL) */ + dulint* index_id, /*!< out: index id (not assigned if NULL) */ + ulint* space_id) /*!< out: space id (not assigned if NULL) */ { dict_hdr_t* dict_hdr; dulint id; mtr_t mtr; - ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)); - mtr_start(&mtr); dict_hdr = dict_hdr_get(&mtr); - id = mtr_read_dulint(dict_hdr + type, &mtr); - id = ut_dulint_add(id, 1); + if (table_id) { + id = mtr_read_dulint(dict_hdr + DICT_HDR_TABLE_ID, &mtr); + id = ut_dulint_add(id, 1); + mlog_write_dulint(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr); + *table_id = id; + } - mlog_write_dulint(dict_hdr + type, id, &mtr); + if (index_id) { + id = mtr_read_dulint(dict_hdr + DICT_HDR_INDEX_ID, &mtr); + id = ut_dulint_add(id, 1); + mlog_write_dulint(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr); + *index_id = id; + } - mtr_commit(&mtr); + if (space_id) { + *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + if (fil_assign_new_space_id(space_id)) { + mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + *space_id, MLOG_4BYTES, &mtr); + } + } - return(id); + mtr_commit(&mtr); } /**********************************************************************//** @@ -151,9 +166,12 @@ dict_hdr_create( mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID, ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); - /* Obsolete, but we must initialize it to 0 anyway. */ - mlog_write_dulint(dict_header + DICT_HDR_MIX_ID, - ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); + mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID, + 0, MLOG_4BYTES, mtr); + + /* Obsolete, but we must initialize it anyway. */ + mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW, + DICT_HDR_FIRST_ID, MLOG_4BYTES, mtr); /* Create the B-tree roots for the clustered indexes of the basic system tables */ @@ -245,6 +263,29 @@ dict_boot(void) /* Get the dictionary header */ dict_hdr = dict_hdr_get(&mtr); + if (ut_dulint_cmp(mtr_read_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, &mtr), + DICT_HDR_XTRADB_FLAG) != 0) { + /* not extended yet by XtraDB, need to be extended */ + ulint root_page_no; + + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_STATS_ID, + dict_ind_redundant, &mtr); + if (root_page_no == FIL_NULL) { + fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); + srv_use_sys_stats_table = FALSE; + } else { + mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no, + MLOG_4BYTES, &mtr); + mlog_write_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, + DICT_HDR_XTRADB_FLAG, &mtr); + } + mtr_commit(&mtr); + /* restart mtr */ + mtr_start(&mtr); + dict_hdr = dict_hdr_get(&mtr); + } + /* Because we only write new row ids to disk-based data structure (dictionary header) when it is divisible by DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover @@ -406,7 +447,7 @@ dict_boot(void) table->id = DICT_FIELDS_ID; dict_table_add_to_cache(table, heap); dict_sys->sys_fields = table; - mem_heap_free(heap); + mem_heap_empty(heap); index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND", DICT_HDR_SPACE, @@ -423,6 +464,41 @@ dict_boot(void) FALSE); ut_a(error == DB_SUCCESS); + /*-------------------------*/ + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 3, 0); + table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + + /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ +#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 +#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" +#endif + + table->id = DICT_STATS_ID; + dict_table_add_to_cache(table, heap); + dict_sys->sys_stats = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_STATS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "INDEX_ID", 0); + dict_mem_index_add_field(index, "KEY_COLS", 0); + + index->id = DICT_STATS_ID; + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_STATS, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + mem_heap_free(heap); + mtr_commit(&mtr); /*-------------------------*/ @@ -436,6 +512,7 @@ dict_boot(void) dict_load_sys_table(dict_sys->sys_columns); dict_load_sys_table(dict_sys->sys_indexes); dict_load_sys_table(dict_sys->sys_fields); + dict_load_sys_table(dict_sys->sys_stats); mutex_exit(&(dict_sys->mutex)); } diff --git a/storage/xtradb/dict/dict0crea.c b/storage/xtradb/dict/dict0crea.c index d4e735c73ca..258bf77d1fc 100644 --- a/storage/xtradb/dict/dict0crea.c +++ b/storage/xtradb/dict/dict0crea.c @@ -239,16 +239,34 @@ dict_build_table_def_step( const char* path_or_name; ibool is_path; mtr_t mtr; + ulint space = 0; + ibool file_per_table; ut_ad(mutex_own(&(dict_sys->mutex))); table = node->table; - table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + /* Cache the global variable "srv_file_per_table" to + a local variable before using it. Please note + "srv_file_per_table" is not under dict_sys mutex + protection, and could be changed while executing + this function. So better to cache the current value + to a local variable, and all future reference to + "srv_file_per_table" should use this local variable. */ + file_per_table = srv_file_per_table; + + dict_hdr_get_new_id(&table->id, NULL, NULL); thr_get_trx(thr)->table_id = table->id; - if (srv_file_per_table) { + if (file_per_table) { + /* Get a new space id if srv_file_per_table is set */ + dict_hdr_get_new_id(NULL, NULL, &space); + + if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) { + return(DB_ERROR); + } + /* We create a new single-table tablespace for the table. We initially let it be 4 pages: - page 0 is the fsp header and an extent descriptor page, @@ -257,8 +275,6 @@ dict_build_table_def_step( - page 3 will contain the root of the clustered index of the table we create here. */ - ulint space = 0; /* reset to zero for the call below */ - if (table->dir_path_of_temp_table) { /* We place tables created with CREATE TEMPORARY TABLE in the tmp dir of mysqld server */ @@ -276,7 +292,7 @@ dict_build_table_def_step( flags = table->flags & ~(~0 << DICT_TF_BITS); error = fil_create_new_single_table_tablespace( - &space, path_or_name, is_path, + space, path_or_name, is_path, flags == DICT_TF_COMPACT ? 0 : flags, FIL_IBD_FILE_INITIAL_SIZE); table->space = (unsigned int) space; @@ -492,6 +508,51 @@ dict_create_sys_fields_tuple( } /*****************************************************************//** +Based on an index object, this function builds the entry to be inserted +in the SYS_STATS system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_stats_tuple( +/*========================*/ + const dict_index_t* index, + ulint i, + mem_heap_t* heap) +{ + dict_table_t* sys_stats; + dtuple_t* entry; + dfield_t* dfield; + byte* ptr; + + ut_ad(index); + ut_ad(heap); + + sys_stats = dict_sys->sys_stats; + + entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_stats); + + /* 0: INDEX_ID -----------------------*/ + dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, index->id); + dfield_set_data(dfield, ptr, 8); + /* 1: KEY_COLS -----------------------*/ + dfield = dtuple_get_nth_field(entry, 1/*KEY_COLS*/); + ptr = mem_heap_alloc(heap, 4); + mach_write_to_4(ptr, i); + dfield_set_data(dfield, ptr, 4); + /* 4: DIFF_VALS ----------------------*/ + dfield = dtuple_get_nth_field(entry, 2/*DIFF_VALS*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, ut_dulint_zero); /* initial value is 0 */ + dfield_set_data(dfield, ptr, 8); + + return(entry); +} + +/*****************************************************************//** Creates the tuple with which the index entry is searched for writing the index tree root page number, if such a tree is created. @return the tuple for search */ @@ -561,7 +622,7 @@ dict_build_index_def_step( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); + dict_hdr_get_new_id(NULL, &index->id, NULL); /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ @@ -601,6 +662,27 @@ dict_build_field_def_step( } /***************************************************************//** +Builds a row for storing stats to insert. +@return DB_SUCCESS */ +static +ulint +dict_build_stats_def_step( +/*======================*/ + ind_node_t* node) +{ + dict_index_t* index; + dtuple_t* row; + + index = node->index; + + row = dict_create_sys_stats_tuple(index, node->stats_no, node->heap); + + ins_node_set_new_row(node->stats_def, row); + + return(DB_SUCCESS); +} + +/***************************************************************//** Creates an index tree for the index if it is not a member of a cluster. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static @@ -924,6 +1006,49 @@ ind_create_graph_create( dict_sys->sys_fields, heap); node->field_def->common.parent = node; + if (srv_use_sys_stats_table) { + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + } else { + node->stats_def = NULL; + } + + node->commit_node = commit_node_create(heap); + node->commit_node->common.parent = node; + + return(node); +} + +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap) +{ + ind_node_t* node; + + node = mem_heap_alloc(heap, sizeof(ind_node_t)); + + node->common.type = QUE_NODE_INSERT_STATS; + + node->index = index; + + node->state = INDEX_BUILD_STATS_COLS; + node->page_no = FIL_NULL; + node->heap = mem_heap_create(256); + + node->ind_def = NULL; + node->field_def = NULL; + + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + node->stats_no = 0; + node->commit_node = commit_node_create(heap); node->commit_node->common.parent = node; @@ -1074,6 +1199,7 @@ dict_create_index_step( node->state = INDEX_BUILD_FIELD_DEF; node->field_no = 0; + node->stats_no = 0; thr->run_node = node->ind_def; @@ -1119,7 +1245,31 @@ dict_create_index_step( goto function_exit; } - node->state = INDEX_CREATE_INDEX_TREE; + if (srv_use_sys_stats_table) { + node->state = INDEX_BUILD_STATS_COLS; + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } + } + + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } } if (node->state == INDEX_CREATE_INDEX_TREE) { @@ -1171,6 +1321,66 @@ function_exit: } /****************************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ind_node_t* node; + ulint err = DB_ERROR; + trx_t* trx; + + ut_ad(thr); + + trx = thr_get_trx(thr); + + node = thr->run_node; + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = INDEX_BUILD_STATS_COLS; + } + + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_COMMIT_WORK; + } + } + + if (node->state == INDEX_COMMIT_WORK) { + + /* do not commit transaction here for now */ + } + +function_exit: + trx->error_state = err; + + if (err == DB_SUCCESS) { + } else { + return(NULL); + } + + thr->run_node = que_node_get_parent(node); + + return(thr); +} + +/****************************************************************//** Creates the foreign key constraints system tables inside InnoDB at database creation or database start if they are not found or are not of the right form. diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 1d088b2e02b..51ee7f9246f 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -83,7 +83,7 @@ static char dict_ibfk[] = "_ibfk_"; /** array of mutexes protecting dict_index_t::stat_n_diff_key_vals[] */ #define DICT_INDEX_STAT_MUTEX_SIZE 32 -mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; +static mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; /*******************************************************************//** Tries to find column names for the index and sets the col field of the @@ -571,13 +571,11 @@ dict_table_get_on_id( if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 || trx->dict_operation_lock_mode == RW_X_LATCH) { - /* It is a system table which will always exist in the table - cache: we avoid acquiring the dictionary mutex, because - if we are doing a rollback to handle an error in TABLE - CREATE, for example, we already have the mutex! */ - ut_ad(mutex_own(&(dict_sys->mutex)) - || trx->dict_operation_lock_mode == RW_X_LATCH); + /* Note: An X latch implies that the transaction + already owns the dictionary mutex. */ + + ut_ad(mutex_own(&dict_sys->mutex)); return(dict_table_get_on_id_low(table_id)); } @@ -712,7 +710,7 @@ dict_table_get( /* If table->ibd_file_missing == TRUE, this will print an error message and return without doing anything. */ - dict_update_statistics(table); + dict_update_statistics(table, FALSE); } } @@ -855,7 +853,8 @@ dict_table_add_to_cache( /* Add table to LRU list of tables */ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); - dict_sys->size += mem_heap_get_size(table->heap); + dict_sys->size += mem_heap_get_size(table->heap) + + strlen(table->name) + 1; } /**********************************************************************//** @@ -909,14 +908,21 @@ dict_table_rename_in_cache( dict_foreign_t* foreign; dict_index_t* index; ulint fold; - ulint old_size; - const char* old_name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); - old_size = mem_heap_get_size(table->heap); - old_name = table->name; + /* store the old/current name to an automatic variable */ + if (strlen(table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, table->name, strlen(table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } fold = ut_fold_string(new_name); @@ -962,12 +968,22 @@ dict_table_rename_in_cache( /* Remove table from the hash tables of tables */ HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, ut_fold_string(old_name), table); - table->name = mem_heap_strdup(table->heap, new_name); + + if (strlen(new_name) > strlen(table->name)) { + /* We allocate MAX_TABLE_NAME_LEN+1 bytes here to avoid + memory fragmentation, we assume a repeated calls of + ut_realloc() with the same size do not cause fragmentation */ + ut_a(strlen(new_name) <= MAX_TABLE_NAME_LEN); + table->name = ut_realloc(table->name, MAX_TABLE_NAME_LEN + 1); + } + memcpy(table->name, new_name, strlen(new_name) + 1); /* Add table to hash table of tables */ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); - dict_sys->size += (mem_heap_get_size(table->heap) - old_size); + + dict_sys->size += strlen(new_name) - strlen(old_name); + ut_a(dict_sys->size > 0); /* Update the table_name field in indexes */ index = dict_table_get_first_index(table); @@ -1192,7 +1208,7 @@ dict_table_remove_from_cache( /* Remove table from LRU list of tables */ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - size = mem_heap_get_size(table->heap); + size = mem_heap_get_size(table->heap) + strlen(table->name) + 1; ut_ad(dict_sys->size >= size); @@ -3076,25 +3092,28 @@ static char* dict_strip_comments( /*================*/ - const char* sql_string) /*!< in: SQL string */ + const char* sql_string, /*!< in: SQL string */ + size_t sql_length) /*!< in: length of sql_string */ { char* str; const char* sptr; + const char* eptr = sql_string + sql_length; char* ptr; /* unclosed quote character (0 if none) */ char quote = 0; - str = mem_alloc(strlen(sql_string) + 1); + str = mem_alloc(sql_length + 1); sptr = sql_string; ptr = str; for (;;) { scan_more: - if (*sptr == '\0') { + if (sptr >= eptr || *sptr == '\0') { +end_of_string: *ptr = '\0'; - ut_a(ptr <= str + strlen(sql_string)); + ut_a(ptr <= str + sql_length); return(str); } @@ -3113,30 +3132,35 @@ scan_more: || (sptr[0] == '-' && sptr[1] == '-' && sptr[2] == ' ')) { for (;;) { + if (++sptr >= eptr) { + goto end_of_string; + } + /* In Unix a newline is 0x0A while in Windows it is 0x0D followed by 0x0A */ - if (*sptr == (char)0x0A - || *sptr == (char)0x0D - || *sptr == '\0') { - + switch (*sptr) { + case (char) 0X0A: + case (char) 0x0D: + case '\0': goto scan_more; } - - sptr++; } } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { + sptr += 2; for (;;) { - if (*sptr == '*' && *(sptr + 1) == '/') { - - sptr += 2; - - goto scan_more; + if (sptr >= eptr) { + goto end_of_string; } - if (*sptr == '\0') { - + switch (*sptr) { + case '\0': goto scan_more; + case '*': + if (sptr[1] == '/') { + sptr += 2; + goto scan_more; + } } sptr++; @@ -3817,6 +3841,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -3831,7 +3856,7 @@ dict_create_foreign_constraints( ut_a(trx); ut_a(trx->mysql_thd); - str = dict_strip_comments(sql_string); + str = dict_strip_comments(sql_string, sql_length); heap = mem_heap_create(10000); err = dict_create_foreign_constraints_low( @@ -3864,6 +3889,7 @@ dict_foreign_parse_drop_constraints( dict_foreign_t* foreign; ibool success; char* str; + size_t len; const char* ptr; const char* id; FILE* ef = dict_foreign_err_file; @@ -3878,7 +3904,10 @@ dict_foreign_parse_drop_constraints( *constraints_to_drop = mem_heap_alloc(heap, 1000 * sizeof(char*)); - str = dict_strip_comments(*(trx->mysql_query_str)); + ptr = innobase_get_stmt(trx->mysql_thd, &len); + + str = dict_strip_comments(ptr, len); + ptr = str; ut_ad(mutex_own(&(dict_sys->mutex))); @@ -4219,6 +4248,259 @@ dict_index_calc_min_rec_len( } /*********************************************************************//** +functions to use SYS_STATS system table. */ +static +ibool +dict_reload_statistics( +/*===================*/ + dict_table_t* table, + ulint* sum_of_index_sizes) +{ + dict_index_t* index; + ulint size; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + if (index == NULL) { + /* Table definition is corrupt */ + + return(FALSE); + } + + heap = mem_heap_create(1000); + + while (index) { + if (table->is_corrupt) { + ut_a(srv_pass_corrupt_table); + mem_heap_free(heap); + return(FALSE); + } + + size = btr_get_size(index, BTR_TOTAL_SIZE); + + index->stat_index_size = size; + + *sum_of_index_sizes += size; + + size = btr_get_size(index, BTR_N_LEAF_PAGES); + + if (size == 0) { + /* The root node of the tree is a leaf */ + size = 1; + } + + index->stat_n_leaf_pages = size; + +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + const rec_t* rec; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), + index->id)) { + /* not found: even 1 if not found should not be alowed */ + fprintf(stderr, "InnoDB: Warning: stats for %s/%s (%lu/%lu)" + " not fonund in SYS_STATS\n", + index->table_name, index->name, i, n_cols); + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + return(FALSE); + } + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + ut_a(i == key_cols); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + stat_n_diff_key_vals_tmp[i] = ut_conv_dulint_to_longlong(mach_read_from_8(field)); +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + dict_index_stat_mutex_enter(index); + for (i = 0; i <= n_cols; i++) { + index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; + } + dict_index_stat_mutex_exit(index); +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); + return(TRUE); +} + +static +void +dict_store_statistics( +/*==================*/ + dict_table_t* table) +{ + dict_index_t* index; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + ut_a(index); + + heap = mem_heap_create(1000); + + while (index) { + if (table->is_corrupt) { + ut_a(srv_pass_corrupt_table); + mem_heap_free(heap); + return; + } + +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + ulint rests; + const rec_t* rec; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + dict_index_stat_mutex_enter(index); + for (i = 0; i <= n_cols; i++) { + stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; + } + dict_index_stat_mutex_exit(index); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr); + rests = n_cols + 1; + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), + index->id)) { + /* not found */ + btr_pcur_close(&pcur); + mtr_commit(&mtr); + break; + } + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + mlog_write_dulint((byte*)field, + ut_dulint_create((ulint) (stat_n_diff_key_vals_tmp[key_cols] >> 32), + (ulint) stat_n_diff_key_vals_tmp[key_cols] & 0xFFFFFFFF), + &mtr); + + rests--; + +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + if (rests) { + fprintf(stderr, "InnoDB: Warning: failed to store %lu stats entries" + " of %s/%s to SYS_STATS system table.\n", + rests, index->table_name, index->name); + } +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); +} + +/*********************************************************************//** Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN @@ -4226,9 +4508,10 @@ void dict_update_statistics_low( /*=======================*/ dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex __attribute__((unused))) + ibool has_dict_mutex __attribute__((unused)), /*!< in: TRUE if the caller has the dictionary mutex */ + ibool sync) /*!< in: TRUE if must update SYS_STATS */ { dict_index_t* index; ulint size; @@ -4254,6 +4537,23 @@ dict_update_statistics_low( return; } + if (srv_use_sys_stats_table && !sync) { + /* reload statistics from SYS_STATS table */ + if (dict_reload_statistics(table, &sum_of_index_sizes)) { + /* success */ +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: reload_statistics is scceeded for %s.\n", + table->name); +#endif + goto end; + } + } +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n", + table->name); +#endif + sum_of_index_sizes = 0; + /* Find out the sizes of the indexes and how many different values for the key they approximately have */ @@ -4291,6 +4591,11 @@ dict_update_statistics_low( index = dict_table_get_next_index(index); } + if (srv_use_sys_stats_table) { + /* store statistics to SYS_STATS table */ + dict_store_statistics(table); + } +end: index = dict_table_get_first_index(table); dict_index_stat_mutex_enter(index); @@ -4317,9 +4622,10 @@ UNIV_INTERN void dict_update_statistics( /*===================*/ - dict_table_t* table) /*!< in/out: table */ + dict_table_t* table, /*!< in/out: table */ + ibool sync) { - dict_update_statistics_low(table, FALSE); + dict_update_statistics_low(table, FALSE, sync); } /**********************************************************************//** @@ -4400,7 +4706,7 @@ dict_table_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); if (srv_stats_auto_update) - dict_update_statistics_low(table, TRUE); + dict_update_statistics_low(table, TRUE, FALSE); fprintf(stderr, "--------------------------------------\n" diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c index 528c3786254..0d8292cc2bf 100644 --- a/storage/xtradb/dict/dict0load.c +++ b/storage/xtradb/dict/dict0load.c @@ -223,7 +223,7 @@ loop: is no index */ if (srv_stats_auto_update && dict_table_get_first_index(table)) { - dict_update_statistics_low(table, TRUE); + dict_update_statistics_low(table, TRUE, FALSE); } dict_table_print_low(table); @@ -317,7 +317,7 @@ dict_check_tablespaces_and_store_max_id( dict_index_t* sys_index; btr_pcur_t pcur; const rec_t* rec; - ulint max_space_id = 0; + ulint max_space_id; mtr_t mtr; mutex_enter(&(dict_sys->mutex)); @@ -328,6 +328,11 @@ dict_check_tablespaces_and_store_max_id( sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); ut_a(!dict_table_is_comp(sys_tables)); + max_space_id = mtr_read_ulint(dict_hdr_get(&mtr) + + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + fil_set_max_space_id_if_bigger(max_space_id); + btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); loop: @@ -974,6 +979,7 @@ err_exit: /* Try to open the tablespace */ if (!fil_open_single_table_tablespace( TRUE, space, + flags == DICT_TF_COMPACT ? 0 : flags & ~(~0 << DICT_TF_BITS), name)) { /* We failed to find a sensible tablespace file */ diff --git a/storage/xtradb/dict/dict0mem.c b/storage/xtradb/dict/dict0mem.c index 388d46568ac..f2d219bfd4f 100644 --- a/storage/xtradb/dict/dict0mem.c +++ b/storage/xtradb/dict/dict0mem.c @@ -68,7 +68,8 @@ dict_mem_table_create( table->heap = heap; table->flags = (unsigned int) flags; - table->name = mem_heap_strdup(heap, name); + table->name = ut_malloc(strlen(name) + 1); + memcpy(table->name, name, strlen(name) + 1); table->space = (unsigned int) space; table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); @@ -108,6 +109,7 @@ dict_mem_table_free( #ifndef UNIV_HOTBACKUP mutex_free(&(table->autoinc_mutex)); #endif /* UNIV_HOTBACKUP */ + ut_free(table->name); mem_heap_free(table->heap); } diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index d69e5859d99..e9dc8185be6 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -43,8 +43,8 @@ Created 10/25/1995 Heikki Tuuri #include "trx0trx.h" #include "trx0sys.h" #include "pars0pars.h" -#include "row0row.h" #include "row0mysql.h" +#include "row0row.h" #include "que0que.h" #ifndef UNIV_HOTBACKUP # include "buf0lru.h" @@ -286,6 +286,10 @@ struct fil_system_struct { request */ UT_LIST_BASE_NODE_T(fil_space_t) space_list; /*!< list of all file spaces */ + ibool space_id_reuse_warned; + /* !< TRUE if fil_space_create() + has issued a warning about + potential space_id reuse */ }; /** The tablespace memory cache. This variable is NULL before the module is @@ -1200,7 +1204,19 @@ try_again: space->tablespace_version = fil_system->tablespace_version; space->mark = FALSE; - if (purpose == FIL_TABLESPACE && id > fil_system->max_assigned_id) { + if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on) + && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) { + if (!fil_system->space_id_reuse_warned) { + fil_system->space_id_reuse_warned = TRUE; + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: allocated tablespace %lu," + " old maximum was %lu\n", + (ulong) id, + (ulong) fil_system->max_assigned_id); + } + fil_system->max_assigned_id = id; } @@ -1240,19 +1256,25 @@ try_again: Assigns a new space id for a new single-table tablespace. This works simply by incrementing the global counter. If 4 billion id's is not enough, we may need to recycle id's. -@return new tablespace id; ULINT_UNDEFINED if could not assign an id */ -static -ulint -fil_assign_new_space_id(void) -/*=========================*/ +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id) /*!< in/out: space id */ { - ulint id; + ulint id; + ibool success; mutex_enter(&fil_system->mutex); - fil_system->max_assigned_id++; + id = *space_id; - id = fil_system->max_assigned_id; + if (id < fil_system->max_assigned_id) { + id = fil_system->max_assigned_id; + } + + id++; if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { ut_print_timestamp(stderr); @@ -1268,7 +1290,11 @@ fil_assign_new_space_id(void) (ulong) SRV_LOG_SPACE_FIRST_ID); } - if (id >= SRV_LOG_SPACE_FIRST_ID) { + success = (id < SRV_LOG_SPACE_FIRST_ID); + + if (success) { + *space_id = fil_system->max_assigned_id = id; + } else { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: You have run out of single-table" @@ -1278,14 +1304,12 @@ fil_assign_new_space_id(void) " have to dump all your tables and\n" "InnoDB: recreate the whole InnoDB installation.\n", (ulong) id); - fil_system->max_assigned_id--; - - id = ULINT_UNDEFINED; + *space_id = ULINT_UNDEFINED; } mutex_exit(&fil_system->mutex); - return(id); + return(success); } /*******************************************************************//** @@ -1521,7 +1545,7 @@ fil_init( ut_a(hash_size > 0); ut_a(max_n_open > 0); - fil_system = mem_alloc(sizeof(fil_system_t)); + fil_system = mem_zalloc(sizeof(fil_system_t)); mutex_create(&fil_system->mutex, SYNC_ANY_LATCH); @@ -1530,16 +1554,9 @@ fil_init( UT_LIST_INIT(fil_system->LRU); - fil_system->n_open = 0; fil_system->max_n_open = max_n_open; - fil_system->modification_counter = 0; fil_system->max_assigned_id = TRX_SYS_SPACE_MAX; - - fil_system->tablespace_version = 0; - - UT_LIST_INIT(fil_system->unflushed_spaces); - UT_LIST_INIT(fil_system->space_list); } /*******************************************************************//** @@ -2124,7 +2141,7 @@ fil_op_log_parse_or_replay( fil_create_directory_for_tablename(name); if (fil_create_new_single_table_tablespace( - &space_id, name, FALSE, flags, + space_id, name, FALSE, flags, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_error; } @@ -2571,9 +2588,7 @@ UNIV_INTERN ulint fil_create_new_single_table_tablespace( /*===================================*/ - ulint* space_id, /*!< in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ + ulint space_id, /*!< in: space id */ const char* tablename, /*!< in: the table name in the usual databasename/tablename format of InnoDB, or a dir path to a temp @@ -2593,6 +2608,8 @@ fil_create_new_single_table_tablespace( ibool success; char* path; + ut_a(space_id > 0); + ut_a(space_id < SRV_LOG_SPACE_FIRST_ID); ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for ROW_FORMAT=COMPACT @@ -2649,38 +2666,21 @@ fil_create_new_single_table_tablespace( return(DB_ERROR); } - buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0); if (!ret) { - ut_free(buf2); - os_file_close(file); - os_file_delete(path); - - mem_free(path); - return(DB_OUT_OF_FILE_SPACE); - } - - if (*space_id == 0) { - *space_id = fil_assign_new_space_id(); - } - - /* printf("Creating tablespace %s id %lu\n", path, *space_id); */ - - if (*space_id == ULINT_UNDEFINED) { - ut_free(buf2); + err = DB_OUT_OF_FILE_SPACE; error_exit: os_file_close(file); error_exit2: os_file_delete(path); mem_free(path); - return(DB_ERROR); + return(err); } + /* printf("Creating tablespace %s id %lu\n", path, space_id); */ + /* We have to write the space id to the file immediately and flush the file to disk. This is because in crash recovery we must be aware what tablespaces exist and what are their space id's, so that we can apply @@ -2690,10 +2690,14 @@ error_exit2: with zeros from the call of os_file_set_size(), until a buffer pool flush would write to it. */ + buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); + /* Align the memory for file i/o if we might have O_DIRECT set */ + page = ut_align(buf2, UNIV_PAGE_SIZE); + memset(page, '\0', UNIV_PAGE_SIZE); - fsp_header_init_fields(page, *space_id, flags); - mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, *space_id); + fsp_header_init_fields(page, space_id, flags); + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); if (!(flags & DICT_TF_ZSSIZE_MASK)) { buf_flush_init_for_writing(page, NULL, 0); @@ -2724,6 +2728,7 @@ error_exit2: " to tablespace ", stderr); ut_print_filename(stderr, path); putc('\n', stderr); + err = DB_ERROR; goto error_exit; } @@ -2733,22 +2738,20 @@ error_exit2: fputs("InnoDB: Error: file flush of tablespace ", stderr); ut_print_filename(stderr, path); fputs(" failed\n", stderr); + err = DB_ERROR; goto error_exit; } os_file_close(file); - if (*space_id == ULINT_UNDEFINED) { - goto error_exit2; - } - - success = fil_space_create(path, *space_id, flags, FIL_TABLESPACE); + success = fil_space_create(path, space_id, flags, FIL_TABLESPACE); if (!success) { + err = DB_ERROR; goto error_exit2; } - fil_node_create(path, size, *space_id, FALSE); + fil_node_create(path, size, space_id, FALSE); #ifndef UNIV_HOTBACKUP { @@ -2759,7 +2762,7 @@ error_exit2: fil_op_write_log(flags ? MLOG_FILE_CREATE2 : MLOG_FILE_CREATE, - *space_id, + space_id, is_temp ? MLOG_FILE_FLAG_TEMP : 0, flags, tablename, NULL, &mtr); @@ -3124,7 +3127,7 @@ fil_open_single_table_tablespace( for (i = 0; i < n_index; i++) { new_id[i] = dict_table_get_index_on_name(table, - (page + (i + 1) * 512 + 12))->id; + (char*)(page + (i + 1) * 512 + 12))->id; old_id[i] = mach_read_from_8(page + (i + 1) * 512); root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8); } @@ -3148,7 +3151,7 @@ skip_info: /* over write space id of all pages */ rec_offs_init(offsets_); - fprintf(stderr, "%s", "InnoDB: Progress in %:"); + fprintf(stderr, "InnoDB: Progress in %%:"); for (offset = 0; offset < size_bytes; offset += UNIV_PAGE_SIZE) { ulint checksum_field; @@ -3890,39 +3893,6 @@ next_datadir_item: return(err); } -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void) -/*================================*/ -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space) { - if (space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(space->id) - && !space->mark) { - fputs("InnoDB: Warning: tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, " of id %lu has no matching table in\n" - "InnoDB: the InnoDB data dictionary.\n", - (ulong) space->id); - } - - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&fil_system->mutex); -} - /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/xtradb/ha/hash0hash.c b/storage/xtradb/ha/hash0hash.c index 30c304dafcd..bc058cd4729 100644 --- a/storage/xtradb/ha/hash0hash.c +++ b/storage/xtradb/ha/hash0hash.c @@ -128,6 +128,70 @@ hash_create( } /*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + return(offset + sizeof(hash_cell_t) * prime); +} + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((void*)table) + offset); + table->n_cells = prime; +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + table->adaptive = FALSE; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + table->n_mutexes = 0; + table->mutexes = NULL; + table->heaps = NULL; + table->heap = NULL; + ut_d(table->magic_n = HASH_TABLE_MAGIC_N); + + /* Initialize the cell array */ + hash_table_clear(table); +} + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table) +{ + ulint offset; + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((void*)table) + offset); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); +} + +/*************************************************************//** Frees a hash table. */ UNIV_INTERN void diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 1eb0eb51f0a..602fbd59517 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -193,6 +193,7 @@ static my_bool innobase_overwrite_relay_log_info = FALSE; static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; +static my_bool innobase_use_sys_stats_table = FALSE; static char* internal_innobase_data_file_path = NULL; @@ -336,6 +337,12 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0); +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit_session, PLUGIN_VAR_RQCMDARG, + "Control innodb_flush_log_at_trx_commit for each sessions. " + "The value 0~2 are same meanings to innodb_flush_log_at_trx_commit. " + "The value 3 regards innodb_flush_log_at_trx_commit (default).", + NULL, NULL, 3, 0, 3, 0); + static handler *innobase_create_handler(handlerton *hton, TABLE_SHARE *table, @@ -716,6 +723,17 @@ thd_lock_wait_timeout( return(THDVAR((THD*) thd, lock_wait_timeout)); } +/******************************************************************//** +*/ +extern "C" UNIV_INTERN +ulong +thd_flush_log_at_trx_commit_session( +/*================================*/ + void* thd) +{ + return(THDVAR((THD*) thd, flush_log_at_trx_commit_session)); +} + /********************************************************************//** Obtain the InnoDB transaction of a MySQL thread. @return reference to transaction pointer */ @@ -1031,6 +1049,29 @@ innobase_get_charset( return(thd_charset((THD*) mysql_thd)); } +/**********************************************************************//** +Determines the current SQL statement. +@return SQL statement string */ +extern "C" UNIV_INTERN +const char* +innobase_get_stmt( +/*==============*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + size_t* length) /*!< out: length of the SQL statement */ +{ +#if MYSQL_VERSION_ID >= 50142 + LEX_STRING* stmt; + + stmt = thd_query_string((THD*) mysql_thd); + *length = stmt->length; + return(stmt->str); +#else + const char* stmt_str = thd_query((THD*) mysql_thd); + *length = strlen(stmt_str); + return(stmt_str); +#endif +} + #if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list; /*******************************************************************//** @@ -1351,7 +1392,6 @@ innobase_trx_allocate( trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); innobase_trx_init(thd, trx); @@ -2035,12 +2075,12 @@ innobase_init( srv_page_size_shift = 0; if (innobase_page_size != (1 << 14)) { - int n_shift; + uint n_shift; fprintf(stderr, "InnoDB: Warning: innodb_page_size has been changed from default value 16384. (###EXPERIMENTAL### operation)\n"); for (n_shift = 12; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) { - if (innobase_page_size == (1u << n_shift)) { + if (innobase_page_size == ((ulong)1 << n_shift)) { srv_page_size_shift = n_shift; srv_page_size = (1 << srv_page_size_shift); fprintf(stderr, @@ -2231,6 +2271,8 @@ mem_free_and_error: srv_extra_undoslots = (ibool) innobase_extra_undoslots; + srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table; + /* -------------- Log files ---------------------------*/ /* The default dir for log files is the datadir of MySQL */ @@ -5701,6 +5743,9 @@ ha_innobase::index_read( prebuilt->index_usable = FALSE; DBUG_RETURN(HA_ERR_CRASHED); } + if (UNIV_UNLIKELY(!prebuilt->index_usable)) { + DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED); + } /* Note that if the index for which the search template is built is not necessarily prebuilt->index, but can also be the clustered index */ @@ -6814,6 +6859,9 @@ ha_innobase::create( /* Cache the value of innodb_file_format, in case it is modified by another thread while the table is being created. */ const ulint file_format = srv_file_format; + const char* stmt; + size_t stmt_len; + enum row_type row_type; DBUG_ENTER("ha_innobase::create"); @@ -6934,94 +6982,94 @@ ha_innobase::create( } } - if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) { - if (flags) { - /* KEY_BLOCK_SIZE was specified. */ - if (form->s->row_type != ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT other than COMPRESSED - ignores KEY_BLOCK_SIZE. It does not - make sense to reject conflicting - KEY_BLOCK_SIZE and ROW_FORMAT, because - such combinations can be obtained - with ALTER TABLE anyway. */ - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" - " unless ROW_FORMAT=COMPRESSED.", - create_info->key_block_size); - flags = 0; - } - } else { - /* No KEY_BLOCK_SIZE */ - if (form->s->row_type == ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT=COMPRESSED without - KEY_BLOCK_SIZE implies half the - maximum KEY_BLOCK_SIZE. */ - flags = (DICT_TF_ZSSIZE_MAX - 1) - << DICT_TF_ZSSIZE_SHIFT - | DICT_TF_COMPACT - | DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT; + row_type = form->s->row_type; + + if (flags) { + /* KEY_BLOCK_SIZE was specified. */ + if (!(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { + /* ROW_FORMAT was not specified; + default to ROW_FORMAT=COMPRESSED */ + row_type = ROW_TYPE_COMPRESSED; + } else if (row_type != ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT other than COMPRESSED + ignores KEY_BLOCK_SIZE. It does not + make sense to reject conflicting + KEY_BLOCK_SIZE and ROW_FORMAT, because + such combinations can be obtained + with ALTER TABLE anyway. */ + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" + " unless ROW_FORMAT=COMPRESSED.", + create_info->key_block_size); + flags = 0; + } + } else { + /* No KEY_BLOCK_SIZE */ + if (row_type == ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT=COMPRESSED without + KEY_BLOCK_SIZE implies half the + maximum KEY_BLOCK_SIZE. */ + flags = (DICT_TF_ZSSIZE_MAX - 1) + << DICT_TF_ZSSIZE_SHIFT + | DICT_TF_COMPACT + | DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT; //#if DICT_TF_ZSSIZE_MAX < 1 //# error "DICT_TF_ZSSIZE_MAX < 1" //#endif - } } + } - switch (form->s->row_type) { - const char* row_format_name; - case ROW_TYPE_REDUNDANT: - break; - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - row_format_name - = form->s->row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - - if (!srv_file_per_table) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); - } else if (file_format < DICT_TF_FORMAT_ZIP) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); - } else { - flags |= DICT_TF_COMPACT - | (DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT); - break; - } + switch (row_type) { + const char* row_format_name; + case ROW_TYPE_REDUNDANT: + break; + case ROW_TYPE_COMPRESSED: + case ROW_TYPE_DYNAMIC: + row_format_name + = row_type == ROW_TYPE_COMPRESSED + ? "COMPRESSED" + : "DYNAMIC"; - /* fall through */ - case ROW_TYPE_NOT_USED: - case ROW_TYPE_FIXED: - default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: assuming ROW_FORMAT=COMPACT."); - case ROW_TYPE_DEFAULT: - case ROW_TYPE_COMPACT: - flags = DICT_TF_COMPACT; + if (!srv_file_per_table) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_per_table.", + row_format_name); + } else if (file_format < DICT_TF_FORMAT_ZIP) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_format >" + " Antelope.", + row_format_name); + } else { + flags |= DICT_TF_COMPACT + | (DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT); break; } - } else if (!flags) { - /* No KEY_BLOCK_SIZE or ROW_FORMAT specified: - use ROW_FORMAT=COMPACT by default. */ + + /* fall through */ + case ROW_TYPE_NOT_USED: + case ROW_TYPE_FIXED: + default: + push_warning(thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: assuming ROW_FORMAT=COMPACT."); + case ROW_TYPE_DEFAULT: + case ROW_TYPE_COMPACT: flags = DICT_TF_COMPACT; + break; } /* Look for a primary key */ @@ -7030,7 +7078,7 @@ ha_innobase::create( (int) form->s->primary_key : -1); - /* Our function row_get_mysql_key_number_for_index assumes + /* Our function innobase_get_mysql_key_number_for_index assumes the primary key is always number 0, if it exists */ ut_a(primary_key_no == -1 || primary_key_no == 0); @@ -7090,9 +7138,11 @@ ha_innobase::create( } } - if (*trx->mysql_query_str) { - error = row_table_add_foreign_constraints(trx, - *trx->mysql_query_str, norm_name, + stmt = innobase_get_stmt(thd, &stmt_len); + + if (stmt) { + error = row_table_add_foreign_constraints( + trx, stmt, stmt_len, norm_name, create_info->options & HA_LEX_CREATE_TMP_TABLE); error = convert_error_code_to_mysql(error, flags, NULL); @@ -7385,7 +7435,6 @@ innobase_drop_database( /* In the Windows plugin, thd = current_thd is always NULL */ trx = trx_allocate_for_mysql(); trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; #else trx = innobase_trx_allocate(thd); #endif @@ -7587,6 +7636,10 @@ ha_innobase::records_in_range( n_rows = HA_POS_ERROR; goto func_exit; } + if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) { + n_rows = HA_ERR_TABLE_DEF_CHANGED; + goto func_exit; + } heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t) + sizeof(dtuple_t))); @@ -7766,6 +7819,86 @@ ha_innobase::is_corrupt() const } /*********************************************************************//** +Calculates the key number used inside MySQL for an Innobase index. We will +first check the "index translation table" for a match of the index to get +the index number. If there does not exist an "index translation table", +or not able to find the index in the translation table, then we will fall back +to the traditional way of looping through dict_index_t list to find a +match. In this case, we have to take into account if we generated a +default clustered index for the table +@return the key number used inside MySQL */ +static +unsigned int +innobase_get_mysql_key_number_for_index( +/*====================================*/ + INNOBASE_SHARE* share, /*!< in: share structure for index + translation table. */ + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + dict_table_t* ib_table,/*!< in: table in Innodb data + dictionary */ + const dict_index_t* index) /*!< in: index */ +{ + const dict_index_t* ind; + unsigned int i; + + ut_ad(index); + ut_ad(ib_table); + ut_ad(table); + ut_ad(share); + + /* If index does not belong to the table of share structure. Search + index->table instead */ + if (index->table != ib_table) { + i = 0; + ind = dict_table_get_first_index(index->table); + + while (index != ind) { + ind = dict_table_get_next_index(ind); + i++; + } + + if (row_table_got_default_clust_index(index->table)) { + ut_a(i > 0); + i--; + } + + return(i); + } + + /* If index translation table exists, we will first check + the index through index translation table for a match. */ + if (share->idx_trans_tbl.index_mapping) { + for (i = 0; i < share->idx_trans_tbl.index_count; i++) { + if (share->idx_trans_tbl.index_mapping[i] == index) { + return(i); + } + } + + /* Print an error message if we cannot find the index + ** in the "index translation table". */ + sql_print_error("Cannot find index %s in InnoDB index " + "translation table.", index->name); + } + + /* If we do not have an "index translation table", or not able + to find the index in the translation table, we'll directly find + matching index in the dict_index_t list */ + for (i = 0; i < table->s->keys; i++) { + ind = dict_table_get_index_on_name( + ib_table, table->key_info[i].name); + + if (index == ind) { + return(i); + } + } + + sql_print_error("Cannot find matching index number for index %s " + "in InnoDB index list.", index->name); + + return(0); +} +/*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ UNIV_INTERN @@ -7822,9 +7955,30 @@ ha_innobase::info( /* In sql_show we call with this flag: update then statistics so that they are up-to-date */ + if (srv_use_sys_stats_table + && thd_sql_command(user_thd) == SQLCOM_ANALYZE) { + /* If the indexes on the table don't have enough rows in SYS_STATS system table, */ + /* they need to be created. */ + dict_index_t* index; + + prebuilt->trx->op_info = "confirming rows of SYS_STATS to store statistics"; + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + + for (index = dict_table_get_first_index(ib_table); + index != NULL; + index = dict_table_get_next_index(index)) { + row_insert_stats_for_mysql(index, prebuilt->trx); + innobase_commit_low(prebuilt->trx); + } + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + } + prebuilt->trx->op_info = "updating table statistics"; - dict_update_statistics(ib_table); + dict_update_statistics(ib_table, + (thd_sql_command(user_thd) == SQLCOM_ANALYZE)?TRUE:FALSE); prebuilt->trx->op_info = "returning various info to MySQL"; } @@ -8036,8 +8190,8 @@ ha_innobase::info( err_index = trx_get_error_info(prebuilt->trx); if (err_index) { - errkey = (unsigned int) - row_get_mysql_key_number_for_index(err_index); + errkey = innobase_get_mysql_key_number_for_index( + share, table, ib_table, err_index); } else { errkey = (unsigned int) prebuilt->trx->error_key_num; } @@ -10804,7 +10958,35 @@ innodb_old_blocks_pct_update( } /*************************************************************//** -Check if it is a valid value of innodb_change_buffering. This function is +Find the corresponding ibuf_use_t value that indexes into +innobase_change_buffering_values[] array for the input +change buffering option name. +@return corresponding IBUF_USE_* value for the input variable +name, or IBUF_USE_COUNT if not able to find a match */ +static +ibuf_use_t +innodb_find_change_buffering_value( +/*===============================*/ + const char* input_name) /*!< in: input change buffering + option name */ +{ + ulint use; + + for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); + use++) { + /* found a match */ + if (!innobase_strcasecmp( + input_name, innobase_change_buffering_values[use])) { + return((ibuf_use_t)use); + } + } + + /* Did not find any match */ + return(IBUF_USE_COUNT); +} + +/*************************************************************//** +Check if it is a valid value of innodb_change_buffering. This function is registered as a callback with MySQL. @return 0 for valid innodb_change_buffering */ static @@ -10828,19 +11010,22 @@ innodb_change_buffering_validate( change_buffering_input = value->val_str(value, buff, &len); if (change_buffering_input != NULL) { - ulint use; + ibuf_use_t use; - for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); - use++) { - if (!innobase_strcasecmp( - change_buffering_input, - innobase_change_buffering_values[use])) { - *(ibuf_use_t*) save = (ibuf_use_t) use; - return(0); - } + use = innodb_find_change_buffering_value( + change_buffering_input); + + if (use != IBUF_USE_COUNT) { + /* Find a matching change_buffering option value. */ + *static_cast<const char**>(save) = + innobase_change_buffering_values[use]; + + return(0); } } + /* No corresponding change buffering option for user supplied + "change_buffering_input" */ return(1); } @@ -10851,21 +11036,27 @@ static void innodb_change_buffering_update( /*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr, /*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ { + ibuf_use_t use; + ut_a(var_ptr != NULL); ut_a(save != NULL); - ut_a((*(ibuf_use_t*) save) < IBUF_USE_COUNT); - ibuf_use = *(const ibuf_use_t*) save; + use = innodb_find_change_buffering_value( + *static_cast<const char*const*>(save)); + + ut_a(use < IBUF_USE_COUNT); - *(const char**) var_ptr = innobase_change_buffering_values[ibuf_use]; + ibuf_use = use; + *static_cast<const char**>(var_ptr) = + *static_cast<const char*const*>(save); } static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff) @@ -11131,6 +11322,14 @@ static MYSQL_SYSVAR_ULINT(stats_update_need_lock, srv_stats_update_need_lock, "e.g. Data_free.", NULL, NULL, 1, 0, 1, 0); +static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Enable to use SYS_STATS system table to store statistics statically, " + "And avoids to calculate statistics at every first open of the tables. " + "This option may make the opportunities of update statistics less. " + "So you should use ANALYZE TABLE command intentionally.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled, PLUGIN_VAR_OPCMDARG, "Enable InnoDB adaptive hash index (enabled by default). " @@ -11156,7 +11355,12 @@ static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment, static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L); + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); + +static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "[experimental] The key value of shared memory segment for the buffer pool. 0 means disable the feature (default).", + NULL, NULL, 0, 0, INT_MAX32, 0); static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency, PLUGIN_VAR_RQCMDARG, @@ -11287,7 +11491,7 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, "Buffer changes to reduce random access: " "OFF, ON, none, inserts.", innodb_change_buffering_validate, - innodb_change_buffering_update, NULL); + innodb_change_buffering_update, "inserts"); static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold, PLUGIN_VAR_RQCMDARG, @@ -11417,6 +11621,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), + MYSQL_SYSVAR(buffer_pool_shm_key), MYSQL_SYSVAR(checksums), MYSQL_SYSVAR(fast_checksum), MYSQL_SYSVAR(commit_concurrency), @@ -11461,6 +11666,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(stats_auto_update), MYSQL_SYSVAR(stats_update_need_lock), + MYSQL_SYSVAR(use_sys_stats_table), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), MYSQL_SYSVAR(replication_delay), @@ -11484,6 +11690,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(flush_neighbor_pages), MYSQL_SYSVAR(read_ahead), MYSQL_SYSVAR(adaptive_checkpoint), + MYSQL_SYSVAR(flush_log_at_trx_commit_session), MYSQL_SYSVAR(enable_unsafe_group_commit), MYSQL_SYSVAR(expand_import), MYSQL_SYSVAR(extra_rsegments), @@ -11503,7 +11710,7 @@ mysql_declare_plugin(xtradb) &innobase_storage_engine, innobase_hton_name, "Percona", - "XtraDB engine based on InnoDB plugin. Supports transactions, row-level locking, and foreign keys", + "Percona-XtraDB, Supports transactions, row-level locking, and foreign keys", PLUGIN_LICENSE_GPL, innobase_init, /* Plugin Init */ NULL, /* Plugin Deinit */ @@ -11528,6 +11735,7 @@ i_s_innodb_index_stats, i_s_innodb_admin_command, i_s_innodb_sys_tables, i_s_innodb_sys_indexes, +i_s_innodb_sys_stats, i_s_innodb_patches mysql_declare_plugin_end; diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 729abcbcd0a..04224277deb 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -233,7 +233,11 @@ the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */ extern "C" { struct charset_info_st *thd_charset(MYSQL_THD thd); +#if MYSQL_VERSION_ID >= 50142 +LEX_STRING *thd_query_string(MYSQL_THD thd); +#else char **thd_query(MYSQL_THD thd); +#endif /** Get the file name of the MySQL binlog. * @return the name of the binlog file diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 2eb9c9f474c..3a32ed9cf36 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -894,6 +894,8 @@ error: prebuilt->trx->error_info = NULL; /* fall through */ default: + trx->error_state = DB_SUCCESS; + if (new_primary) { if (indexed_table != innodb_table) { row_merge_drop_table(trx, indexed_table); diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 5b5d5ef7186..3336ea95096 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -43,20 +43,11 @@ extern "C" { #include "ha_prototypes.h" /* for innobase_convert_name() */ #include "srv0start.h" /* for srv_was_started */ #include "btr0btr.h" /* for btr_page_get_index_id */ -#include "dict0dict.h" /* for dict_index_get_if_in_cache */ #include "trx0rseg.h" /* for trx_rseg_struct */ #include "trx0sys.h" /* for trx_sys */ #include "dict0dict.h" /* for dict_sys */ #include "btr0pcur.h" #include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */ -/* from buf0buf.c */ -struct buf_chunk_struct{ - ulint mem_size; /* allocated size of the chunk */ - ulint size; /* size of frames[] and blocks[] */ - void* mem; /* pointer to the memory area which - was allocated for the frames */ - buf_block_t* blocks; /* array of buffer control blocks */ -}; } static const char plugin_author[] = "Innobase Oy"; @@ -450,27 +441,11 @@ static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_fields_info[] = static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_index_fields_info[] = { - {STRUCT_FLD(field_name, "schema_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "table_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "index_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + {STRUCT_FLD(field_name, "index_id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), STRUCT_FLD(old_name, ""), STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, @@ -671,7 +646,6 @@ i_s_innodb_buffer_pool_pages_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -743,7 +717,6 @@ i_s_innodb_buffer_pool_pages_fill( } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -764,13 +737,8 @@ i_s_innodb_buffer_pool_pages_index_fill( int status = 0; ulint n_chunks, n_blocks; - dict_index_t* index; dulint index_id; - const char *p; - char db_name_raw[NAME_LEN*5+1], db_name[NAME_LEN+1]; - char table_name_raw[NAME_LEN*5+1], table_name[NAME_LEN+1]; - buf_chunk_t* chunk; DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_fill"); @@ -784,7 +752,6 @@ i_s_innodb_buffer_pool_pages_index_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -796,48 +763,28 @@ i_s_innodb_buffer_pool_pages_index_fill( if (fil_page_get_type(frame) == FIL_PAGE_INDEX) { index_id = btr_page_get_index_id(frame); - index = dict_index_get_if_in_cache_low(index_id); - if(index) - { - if((p = (char*) strchr(index->table_name, '/'))) - { - strncpy(db_name_raw, index->table_name, p-index->table_name); - db_name_raw[p-index->table_name] = 0; - filename_to_tablename(db_name_raw, db_name, sizeof(db_name)); - field_store_string(table->field[0], db_name); - p++; - } else { - field_store_string(table->field[0], NULL); - p = index->table_name; - } - strcpy(table_name_raw, (const char*)p); - filename_to_tablename(table_name_raw, table_name, sizeof(table_name)); - field_store_string(table->field[1], table_name); - field_store_string(table->field[2], index->name); + table->field[0]->store(ut_conv_dulint_to_longlong(index_id)); + table->field[1]->store(block->page.space); + table->field[2]->store(block->page.offset); + table->field[3]->store(page_get_n_recs(frame)); + table->field[4]->store(page_get_data_size(frame)); + table->field[5]->store(block->is_hashed); + table->field[6]->store(block->page.access_time); + table->field[7]->store(block->page.newest_modification != 0); + table->field[8]->store(block->page.oldest_modification != 0); + table->field[9]->store(block->page.old); + table->field[10]->store(0); + table->field[11]->store(block->page.buf_fix_count); + table->field[12]->store(block->page.flush_type); - table->field[3]->store(block->page.space); - table->field[4]->store(block->page.offset); - table->field[5]->store(page_get_n_recs(frame)); - table->field[6]->store(page_get_data_size(frame)); - table->field[7]->store(block->is_hashed); - table->field[8]->store(block->page.access_time); - table->field[9]->store(block->page.newest_modification != 0); - table->field[10]->store(block->page.oldest_modification != 0); - table->field[11]->store(block->page.old); - table->field[12]->store(0); - table->field[13]->store(block->page.buf_fix_count); - table->field[14]->store(block->page.flush_type); - - if (schema_table_store_record(thd, table)) { - status = 1; - break; - } + if (schema_table_store_record(thd, table)) { + status = 1; + break; } } } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -875,7 +822,6 @@ i_s_innodb_buffer_pool_pages_blob_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -931,7 +877,6 @@ i_s_innodb_buffer_pool_pages_blob_fill( } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -3309,6 +3254,35 @@ static ST_FIELD_INFO i_s_innodb_sys_indexes_info[] = END_OF_ST_FIELD_INFO }; +static ST_FIELD_INFO i_s_innodb_sys_stats_info[] = +{ + {STRUCT_FLD(field_name, "INDEX_ID"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "KEY_COLS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "DIFF_VALS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + static int copy_string_field( @@ -3565,6 +3539,40 @@ copy_sys_indexes_rec( static int +copy_sys_stats_rec( +/*===============*/ + TABLE* table, + const dict_index_t* index, + const rec_t* rec +) +{ + int status; + int field; + + /* INDEX_ID */ + field = dict_index_get_nth_col_pos(index, 0); + status = copy_id_field(table, 0, rec, field); + if (status) { + return status; + } + /* KEY_COLS */ + field = dict_index_get_nth_col_pos(index, 1); + status = copy_int_field(table, 1, rec, field); + if (status) { + return status; + } + /* DIFF_VALS */ + field = dict_index_get_nth_col_pos(index, 2); + status = copy_id_field(table, 2, rec, field); + if (status) { + return status; + } + + return 0; +} + +static +int i_s_innodb_schema_table_fill( /*=========================*/ THD* thd, @@ -3592,6 +3600,8 @@ i_s_innodb_schema_table_fill( id = 0; } else if (innobase_strcasecmp(table_name, "innodb_sys_indexes") == 0) { id = 1; + } else if (innobase_strcasecmp(table_name, "innodb_sys_stats") == 0) { + id = 2; } else { DBUG_RETURN(1); } @@ -3605,8 +3615,10 @@ i_s_innodb_schema_table_fill( if (id == 0) { innodb_table = dict_table_get_low("SYS_TABLES"); - } else { + } else if (id == 1) { innodb_table = dict_table_get_low("SYS_INDEXES"); + } else { + innodb_table = dict_table_get_low("SYS_STATS"); } index = UT_LIST_GET_FIRST(innodb_table->indexes); @@ -3631,8 +3643,10 @@ i_s_innodb_schema_table_fill( if (id == 0) { status = copy_sys_tables_rec(table, index, rec); - } else { + } else if (id == 1) { status = copy_sys_indexes_rec(table, index, rec); + } else { + status = copy_sys_stats_rec(table, index, rec); } if (status) { btr_pcur_close(&pcur); @@ -3697,6 +3711,21 @@ i_s_innodb_sys_indexes_init( DBUG_RETURN(0); } +static +int +i_s_innodb_sys_stats_init( +/*======================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_sys_stats_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_sys_stats_info; + schema->fill_table = i_s_innodb_schema_table_fill; + + DBUG_RETURN(0); +} + UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tables = { STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), @@ -3728,3 +3757,19 @@ UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_indexes = STRUCT_FLD(system_vars, NULL), STRUCT_FLD(__reserved1, NULL) }; + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_stats = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_SYS_STATS"), + STRUCT_FLD(author, plugin_author), + STRUCT_FLD(descr, "InnoDB SYS_STATS table"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_sys_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL) +}; diff --git a/storage/xtradb/handler/i_s.h b/storage/xtradb/handler/i_s.h index 41c63a98d76..3905fdc7b06 100644 --- a/storage/xtradb/handler/i_s.h +++ b/storage/xtradb/handler/i_s.h @@ -43,5 +43,6 @@ extern struct st_mysql_plugin i_s_innodb_index_stats; extern struct st_mysql_plugin i_s_innodb_admin_command; extern struct st_mysql_plugin i_s_innodb_sys_tables; extern struct st_mysql_plugin i_s_innodb_sys_indexes; +extern struct st_mysql_plugin i_s_innodb_sys_stats; #endif /* i_s_h */ diff --git a/storage/xtradb/handler/innodb_patch_info.h b/storage/xtradb/handler/innodb_patch_info.h index 38b97411340..e68f12d0fec 100644 --- a/storage/xtradb/handler/innodb_patch_info.h +++ b/storage/xtradb/handler/innodb_patch_info.h @@ -47,5 +47,6 @@ struct innodb_enhancement { {"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_buffer_pool_shm","Put buffer pool contents to shared memory segment and reuse it at clean restart [experimental]","","http://www.percona.com/docs/wiki/percona-xtradb"}, {NULL, NULL, NULL, NULL} }; diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index f93510be6d6..9484146d8a3 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -36,6 +36,7 @@ Created 11/5/1995 Heikki Tuuri #include "ut0rbt.h" #ifndef UNIV_HOTBACKUP #include "os0proc.h" +#include "srv0srv.h" /** @name Modes for buf_page_get_gen */ /* @{ */ @@ -1301,11 +1302,23 @@ struct buf_block_struct{ /**********************************************************************//** Compute the hash fold value for blocks in buf_pool->zip_hash. */ /* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE) +/* the fold should be relative when srv_buffer_pool_shm_key is enabled */ +#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\ + ?((ulint) (ptr) / UNIV_PAGE_SIZE)\ + :((ulint) ((void*)ptr - (void*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE)) #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ +/** A chunk of buffers. The buffer pool is allocated in chunks. */ +struct buf_chunk_struct{ + ulint mem_size; /*!< allocated size of the chunk */ + ulint size; /*!< size of frames[] and blocks[] */ + void* mem; /*!< pointer to the memory area which + was allocated for the frames */ + buf_block_t* blocks; /*!< array of buffer control blocks */ +}; + /** @brief The buffer pool statistics structure. */ struct buf_pool_stat_struct{ ulint n_page_gets; /*!< number of page gets performed; diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h index 0a7d01c95cf..d3b59e8b579 100644 --- a/storage/xtradb/include/buf0lru.h +++ b/storage/xtradb/include/buf0lru.h @@ -96,7 +96,7 @@ buf_LRU_insert_zip_clean( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +NOTE: If this function returns BUF_LRU_FREED, it will temporarily release buf_pool_mutex. Furthermore, the page frame will no longer be accessible via bpage. diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h index 71f62ff7b49..56d3d24a3b7 100644 --- a/storage/xtradb/include/buf0rea.h +++ b/storage/xtradb/include/buf0rea.h @@ -158,8 +158,7 @@ buf_read_recv_pages( /** The size in pages of the area which the read-ahead algorithms read if invoked */ -#define BUF_READ_AHEAD_AREA \ - ut_min(64, ut_2_power_up(buf_pool->curr_size / 32)) +#define BUF_READ_AHEAD_AREA 64 /** @name Modes used in read-ahead @{ */ /** read only pages belonging to the insert buffer tree */ diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h index 747e9b5364e..c841c2b4afe 100644 --- a/storage/xtradb/include/db0err.h +++ b/storage/xtradb/include/db0err.h @@ -28,6 +28,8 @@ Created 5/24/1996 Heikki Tuuri enum db_err { + DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new + explicit record lock was created */ DB_SUCCESS = 10, /* The following are error codes */ diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h index 1a13bd1503a..9239e031a7f 100644 --- a/storage/xtradb/include/dict0boot.h +++ b/storage/xtradb/include/dict0boot.h @@ -46,13 +46,14 @@ dict_hdr_get( /*=========*/ mtr_t* mtr); /*!< in: mtr */ /**********************************************************************//** -Returns a new row, table, index, or tree id. -@return the new id */ +Returns a new table, index, or space id. */ UNIV_INTERN -dulint +void dict_hdr_get_new_id( /*================*/ - ulint type); /*!< in: DICT_HDR_ROW_ID, ... */ + dulint* table_id, /*!< out: table id (not assigned if NULL) */ + dulint* index_id, /*!< out: index id (not assigned if NULL) */ + ulint* space_id); /*!< out: space id (not assigned if NULL) */ /**********************************************************************//** Returns a new row id. @return the new id */ @@ -100,6 +101,7 @@ dict_create(void); #define DICT_COLUMNS_ID ut_dulint_create(0, 2) #define DICT_INDEXES_ID ut_dulint_create(0, 3) #define DICT_FIELDS_ID ut_dulint_create(0, 4) +#define DICT_STATS_ID ut_dulint_create(0, 6) /* The following is a secondary index on SYS_TABLES */ #define DICT_TABLE_IDS_ID ut_dulint_create(0, 5) @@ -119,17 +121,21 @@ dict_create(void); #define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ #define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ #define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ -#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */ +#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id, or 0*/ +#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID */ #define DICT_HDR_TABLES 32 /* Root of the table index tree */ #define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */ #define DICT_HDR_COLUMNS 40 /* Root of the column index tree */ #define DICT_HDR_INDEXES 44 /* Root of the index index tree */ #define DICT_HDR_FIELDS 48 /* Root of the index field index tree */ +#define DICT_HDR_STATS 52 /* Root of the stats tree */ #define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace segment into which the dictionary header is created */ + +#define DICT_HDR_XTRADB_MARK 256 /* Flag to distinguish expansion of XtraDB */ /*-------------------------------------------------------------*/ /* The field number of the page number field in the sys_indexes table @@ -139,11 +145,15 @@ clustered index */ #define DICT_SYS_INDEXES_TYPE_FIELD 6 #define DICT_SYS_INDEXES_NAME_FIELD 4 +#define DICT_SYS_STATS_DIFF_VALS_FIELD 4 + /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is updated */ #define DICT_HDR_ROW_ID_WRITE_MARGIN 256 +#define DICT_HDR_XTRADB_FLAG ut_dulint_create(0x58545241UL,0x44425F31UL) /* "XTRADB_1" */ + #ifndef UNIV_NONINL #include "dict0boot.ic" #endif diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h index cce1246b789..0249091a195 100644 --- a/storage/xtradb/include/dict0crea.h +++ b/storage/xtradb/include/dict0crea.h @@ -53,6 +53,14 @@ ind_create_graph_create( dict_index_t* index, /*!< in: index to create, built as a memory data structure */ mem_heap_t* heap); /*!< in: heap where created */ +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap); /***********************************************************//** Creates a table. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -62,6 +70,13 @@ dict_create_table_step( /*===================*/ que_thr_t* thr); /*!< in: query thread */ /***********************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr); +/***********************************************************//** Creates an index. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -170,6 +185,7 @@ struct ind_node_struct{ ins_node_t* field_def; /* child node which does the inserts of the field definitions; the row to be inserted is built by the parent node */ + ins_node_t* stats_def; commit_node_t* commit_node; /* child node which performs a commit after a successful index creation */ @@ -180,6 +196,7 @@ struct ind_node_struct{ dict_table_t* table; /*!< table which owns the index */ dtuple_t* ind_row;/* index definition row built */ ulint field_no;/* next field definition to insert */ + ulint stats_no; mem_heap_t* heap; /*!< memory heap used as auxiliary storage */ }; @@ -189,6 +206,7 @@ struct ind_node_struct{ #define INDEX_CREATE_INDEX_TREE 3 #define INDEX_COMMIT_WORK 4 #define INDEX_ADD_TO_CACHE 5 +#define INDEX_BUILD_STATS_COLS 6 #ifndef UNIV_NONINL #include "dict0crea.ic" diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 0879e91ab33..3c5e620d3c1 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -352,6 +352,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -1039,8 +1040,9 @@ void dict_update_statistics_low( /*=======================*/ dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex);/*!< in: TRUE if the caller has the + ibool has_dict_mutex, /*!< in: TRUE if the caller has the dictionary mutex */ + ibool sync); /*********************************************************************//** Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ @@ -1048,7 +1050,8 @@ UNIV_INTERN void dict_update_statistics( /*===================*/ - dict_table_t* table); /*!< in/out: table */ + dict_table_t* table, /*!< in/out: table */ + ibool sync); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN @@ -1159,6 +1162,7 @@ struct dict_sys_struct{ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ dict_table_t* sys_fields; /*!< SYS_FIELDS table */ + dict_table_t* sys_stats; /*!< SYS_STATS table */ }; #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index ee3107a3be1..37c5a4a24fc 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -382,7 +382,7 @@ initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_struct{ dulint id; /*!< id of the table */ mem_heap_t* heap; /*!< memory heap */ - const char* name; /*!< table name */ + char* name; /*!< table name */ const char* dir_path_of_temp_table;/*!< NULL or the directory path where a TEMPORARY table that was explicitly created by a user should be placed if diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 163cacf2892..07c80ef8609 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -226,6 +226,16 @@ fil_space_create( 0 for uncompressed tablespaces */ ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ /*******************************************************************//** +Assigns a new space id for a new single-table tablespace. This works simply by +incrementing the global counter. If 4 billion id's is not enough, we may need +to recycle id's. +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id); /*!< in/out: space id */ +/*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @return space size, 0 if space not found */ @@ -428,9 +438,7 @@ UNIV_INTERN ulint fil_create_new_single_table_tablespace( /*===================================*/ - ulint* space_id, /*!< in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ + ulint space_id, /*!< in: space id */ const char* tablename, /*!< in: the table name in the usual databasename/tablename format of InnoDB, or a dir path to a temp @@ -499,16 +507,6 @@ UNIV_INTERN ulint fil_load_single_table_tablespaces(void); /*===================================*/ -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void); -/*================================*/ /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index b737a00b3dc..445d94eeabb 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -215,11 +215,21 @@ innobase_casedn_str( /**********************************************************************//** Determines the connection character set. @return connection character set */ +UNIV_INTERN struct charset_info_st* innobase_get_charset( /*=================*/ void* mysql_thd); /*!< in: MySQL thread handle */ - +/**********************************************************************//** +Determines the current SQL statement. +@return SQL statement string */ +UNIV_INTERN +const char* +innobase_get_stmt( +/*==============*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + size_t* length) /*!< out: length of the SQL statement */ + __attribute__((nonnull)); /******************************************************************//** This function is used to find the storage length in bytes of the first n characters for prefix indexes using a multibyte character set. The function @@ -258,4 +268,12 @@ thd_lock_wait_timeout( void* thd); /*!< in: thread handle (THD*), or NULL to query the global innodb_lock_wait_timeout */ +/******************************************************************//** +*/ + +ulong +thd_flush_log_at_trx_commit_session( +/*================================*/ + void* thd); + #endif diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h index b17c21a45ef..9cb410e2ad7 100644 --- a/storage/xtradb/include/hash0hash.h +++ b/storage/xtradb/include/hash0hash.h @@ -49,6 +49,28 @@ hash_table_t* hash_create( /*========*/ ulint n); /*!< in: number of array cells */ + +/*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n); + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n); + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table); + #ifndef UNIV_HOTBACKUP /*************************************************************//** Creates a mutex array to protect a hash table. */ @@ -328,6 +350,33 @@ do {\ }\ } while (0) +/********************************************************************//** +Align nodes with moving location.*/ +#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \ +do {\ + ulint i2222;\ + ulint cell_count2222;\ +\ + cell_count2222 = hash_get_n_cells(TABLE);\ +\ + for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ + NODE_TYPE* node2222;\ +\ + if ((TABLE)->array[i2222].node) \ + (TABLE)->array[i2222].node \ + += (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET);\ + node2222 = HASH_GET_FIRST((TABLE), i2222);\ +\ + while (node2222) {\ + if (node2222->PTR_NAME) \ + node2222->PTR_NAME = ((void*)node2222->PTR_NAME) \ + + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET);\ +\ + node2222 = node2222->PTR_NAME;\ + }\ + }\ +} while (0) + /************************************************************//** Gets the mutex index for a fold value in a hash table. @return mutex number */ diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h index 89a6977b589..73f885ecf04 100644 --- a/storage/xtradb/include/lock0lock.h +++ b/storage/xtradb/include/lock0lock.h @@ -341,11 +341,12 @@ lock_sec_rec_modify_check_and_lock( que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr); /*!< in/out: mini-transaction */ /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -372,9 +373,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index 139f4041a36..1ce00fd7313 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -433,7 +433,10 @@ void log_free_check(void) /*================*/ { - /* ut_ad(sync_thread_levels_empty()); */ + +#ifdef UNIV_SYNC_DEBUG + ut_ad(sync_thread_levels_empty_gen(TRUE)); +#endif /* UNIV_SYNC_DEBUG */ if (log_sys->check_flush_or_checkpoint) { diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic index ef20356bd31..96d2417ac81 100644 --- a/storage/xtradb/include/mach0data.ic +++ b/storage/xtradb/include/mach0data.ic @@ -36,7 +36,7 @@ mach_write_to_1( ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */ { ut_ad(b); - ut_ad(n <= 0xFFUL); + ut_ad((n | 0xFFUL) <= 0xFFUL); b[0] = (byte)n; } @@ -65,7 +65,7 @@ mach_write_to_2( ulint n) /*!< in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFUL); + ut_ad((n | 0xFFFFUL) <= 0xFFFFUL); b[0] = (byte)(n >> 8); b[1] = (byte)(n); @@ -81,10 +81,7 @@ mach_read_from_2( /*=============*/ const byte* b) /*!< in: pointer to 2 bytes */ { - ut_ad(b); - return( ((ulint)(b[0]) << 8) - + (ulint)(b[1]) - ); + return(((ulint)(b[0]) << 8) | (ulint)(b[1])); } /********************************************************//** @@ -129,7 +126,7 @@ mach_write_to_3( ulint n) /*!< in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFFFUL); + ut_ad((n | 0xFFFFFFUL) <= 0xFFFFFFUL); b[0] = (byte)(n >> 16); b[1] = (byte)(n >> 8); @@ -148,8 +145,8 @@ mach_read_from_3( { ut_ad(b); return( ((ulint)(b[0]) << 16) - + ((ulint)(b[1]) << 8) - + (ulint)(b[2]) + | ((ulint)(b[1]) << 8) + | (ulint)(b[2]) ); } @@ -183,9 +180,9 @@ mach_read_from_4( { ut_ad(b); return( ((ulint)(b[0]) << 24) - + ((ulint)(b[1]) << 16) - + ((ulint)(b[2]) << 8) - + (ulint)(b[3]) + | ((ulint)(b[1]) << 16) + | ((ulint)(b[2]) << 8) + | (ulint)(b[3]) ); } @@ -721,7 +718,7 @@ mach_read_from_2_little_endian( /*===========================*/ const byte* buf) /*!< in: from where to read */ { - return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256); + return((ulint)(buf[0]) | ((ulint)(buf[1]) << 8)); } /*********************************************************//** diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic index db017c7d16e..63af02ba409 100644 --- a/storage/xtradb/include/mtr0log.ic +++ b/storage/xtradb/include/mtr0log.ic @@ -203,7 +203,7 @@ mlog_write_initial_log_record_fast( system tablespace */ if ((space == TRX_SYS_SPACE || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE)) - && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) { + && offset >= (ulint)FSP_EXTENT_SIZE && offset < 3 * (ulint)FSP_EXTENT_SIZE) { if (trx_doublewrite_buf_is_being_created) { /* Do nothing: we only come to this branch in an InnoDB database creation. We do not redo log diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h index fd46bd7db87..582cef6f803 100644 --- a/storage/xtradb/include/os0proc.h +++ b/storage/xtradb/include/os0proc.h @@ -32,6 +32,11 @@ Created 9/30/1995 Heikki Tuuri #ifdef UNIV_LINUX #include <sys/ipc.h> #include <sys/shm.h> +#else +# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H +#include <sys/ipc.h> +#include <sys/shm.h> +# endif #endif typedef void* os_process_t; @@ -70,6 +75,29 @@ os_mem_free_large( ulint size); /*!< in: size returned by os_mem_alloc_large() */ + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new); + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size); /*!< in: size returned by + os_shm_alloc() */ #ifndef UNIV_NONINL #include "os0proc.ic" #endif diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h index e182c8f58be..5b2bcf7c054 100644 --- a/storage/xtradb/include/page0page.h +++ b/storage/xtradb/include/page0page.h @@ -500,7 +500,7 @@ ibool page_is_leaf( /*=========*/ const page_t* page) /*!< in: page */ - __attribute__((nonnull, pure)); + __attribute__((pure)); /************************************************************//** Gets the pointer to the next record on the page. @return pointer to next record */ diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic index 9655e6c7e27..dab9dc742e4 100644 --- a/storage/xtradb/include/page0page.ic +++ b/storage/xtradb/include/page0page.ic @@ -275,6 +275,9 @@ page_is_leaf( /*=========*/ const page_t* page) /*!< in: page */ { + if (!page) { + return(FALSE); + } return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL))); } diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h index 574809e5227..4d37302ed20 100644 --- a/storage/xtradb/include/page0zip.h +++ b/storage/xtradb/include/page0zip.h @@ -114,7 +114,7 @@ page_zip_compress( const page_t* page, /*!< in: uncompressed page */ dict_index_t* index, /*!< in: index of the B-tree node */ mtr_t* mtr) /*!< in: mini-transaction, or NULL */ - __attribute__((nonnull(1,2,3))); + __attribute__((nonnull(1,3))); /**********************************************************************//** Decompress a page. This function should tolerate errors on the compressed diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h index 39f8d07af89..09a671f49b1 100644 --- a/storage/xtradb/include/que0que.h +++ b/storage/xtradb/include/que0que.h @@ -492,6 +492,8 @@ struct que_fork_struct{ #define QUE_NODE_CALL 31 #define QUE_NODE_EXIT 32 +#define QUE_NODE_INSERT_STATS 34 + /* Query thread states */ #define QUE_THR_RUNNING 1 #define QUE_THR_PROCEDURE_WAIT 2 diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h index 421308af49b..fcea62ad486 100644 --- a/storage/xtradb/include/rem0cmp.h +++ b/storage/xtradb/include/rem0cmp.h @@ -148,7 +148,9 @@ cmp_rec_rec_simple( const rec_t* rec2, /*!< in: physical record */ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index); /*!< in: data dictionary index */ + const dict_index_t* index, /*!< in: data dictionary index */ + ibool* null_eq);/*!< out: set to TRUE if + found matching null values */ /*************************************************************//** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index d2a8734c61f..a604f6e3724 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -253,15 +253,6 @@ row_table_got_default_clust_index( /*==============================*/ const dict_table_t* table); /*!< in: table */ /*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table -@return the key number used inside MySQL */ -UNIV_INTERN -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - const dict_index_t* index); /*!< in: index */ -/*********************************************************************//** Does an update or delete of a row for MySQL. @return error code or DB_SUCCESS */ UNIV_INTERN @@ -273,27 +264,26 @@ row_update_for_mysql( row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL handle */ /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs);/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs);/*!< in: TRUE if called + so that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ /*********************************************************************//** Creates an query graph node of 'update' type to be used in the MySQL interface. @@ -386,6 +376,14 @@ row_create_index_for_mysql( then checked for not being too large. */ /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx); +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -403,6 +401,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -710,18 +709,17 @@ struct row_prebuilt_struct { ulint new_rec_locks; /*!< normally 0; if srv_locks_unsafe_for_binlog is TRUE or session is using READ - COMMITTED isolation level, in a - cursor search, if we set a new - record lock on an index, this is - incremented; this is used in - releasing the locks under the - cursors if we are performing an - UPDATE and we determine after - retrieving the row that it does - not need to be locked; thus, - these can be used to implement a - 'mini-rollback' that releases - the latest record locks */ + COMMITTED or READ UNCOMMITTED + isolation level, set in + row_search_for_mysql() if we set a new + record lock on the secondary + or clustered index; this is + used in row_unlock_for_mysql() + when releasing the lock under + the cursor if we determine + after retrieving the row that + it does not need to be locked + ('mini-rollback') */ ulint mysql_prefix_len;/*!< byte offset of the end of the last requested column */ ulint mysql_row_len; /*!< length in bytes of a row in the diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index a463075f435..0904a5da1eb 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -156,6 +156,8 @@ extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */ extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; +extern uint srv_buffer_pool_shm_key; + extern ibool srv_thread_concurrency_timer_based; extern ulint srv_n_file_io_threads; @@ -207,6 +209,7 @@ extern ulong srv_stats_method; #define SRV_STATS_METHOD_IGNORE_NULLS 2 extern ulong srv_stats_auto_update; extern ulint srv_stats_update_need_lock; +extern ibool srv_use_sys_stats_table; extern ibool srv_use_doublewrite_buf; extern ibool srv_use_checksums; @@ -367,8 +370,9 @@ enum { when writing data files, but do flush after writing to log files */ SRV_UNIX_NOSYNC, /*!< do not flush after writing */ - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on data files */ + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */ }; /** Alternatives for file i/o in Windows */ diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index 1fe517ab30a..4edf93f4042 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -556,11 +556,12 @@ struct rw_lock_struct { //unsigned cline:14; /*!< Line where created */ unsigned last_s_line:14; /*!< Line number where last time s-locked */ unsigned last_x_line:14; /*!< Line number where last time x-locked */ +#ifdef UNIV_DEBUG ulint magic_n; /*!< RW_LOCK_MAGIC_N */ -}; - /** Value of rw_lock_struct::magic_n */ #define RW_LOCK_MAGIC_N 22643 +#endif /* UNIV_DEBUG */ +}; #ifdef UNIV_SYNC_DEBUG /** The structure for storing debug info of an rw-lock */ diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index 7e210ea82f1..a500cf1da45 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -438,7 +438,7 @@ or row lock! */ #define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the file format tag */ #define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve - this in X-mode, implicit or backround + this in X-mode; implicit or backround operations purge, rollback, foreign key checks reserve this in S-mode */ #define SYNC_DICT 1000 diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h index 8b941cdd4e6..9ef9485b611 100644 --- a/storage/xtradb/include/trx0sys.h +++ b/storage/xtradb/include/trx0sys.h @@ -326,6 +326,7 @@ UNIV_INTERN void trx_sys_update_mysql_binlog_offset( /*===============================*/ + trx_sysf_t* sys_header, const char* file_name_in,/*!< in: MySQL log file name */ ib_int64_t offset, /*!< in: position in that log file */ ulint field, /*!< in: offset of the MySQL log info field in diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index 4a4b54b93a0..4c0ce392bcd 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -497,6 +497,7 @@ struct trx_struct{ FALSE, one can save CPU time and about 150 bytes in the undo log size as then we skip XA steps */ + ulint flush_log_at_trx_commit_session; ulint flush_log_later;/* In 2PC, we hold the prepare_commit mutex across both phases. In that case, we @@ -560,9 +561,6 @@ struct trx_struct{ /*------------------------------*/ void* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ - char** mysql_query_str;/* pointer to the field in mysqld_thd - which contains the pointer to the - current SQL query string */ const char* mysql_log_file_name; /* if MySQL binlog is used, this field contains a pointer to the latest file diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 90ce618b9da..71476443964 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -46,8 +46,8 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 8 -#define PERCONA_INNODB_VERSION 11.2 +#define INNODB_VERSION_BUGFIX 10 +#define PERCONA_INNODB_VERSION 12.0 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -170,7 +170,7 @@ command. Not tested on Windows. */ #if defined(HAVE_valgrind)&& defined(HAVE_VALGRIND_MEMCHECK_H) # define UNIV_DEBUG_VALGRIND -#endif /* HAVE_valgrind */ +#endif #if 0 #define UNIV_DEBUG_VALGRIND /* Enable extra Valgrind instrumentation */ @@ -208,10 +208,6 @@ operations (very slow); also UNIV_DEBUG must be defined */ adaptive hash index */ #define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output in sync0sync.c */ -#define UNIV_BTR_AVOID_COPY /* when splitting B-tree nodes, - do not move any records when - all the records would - be moved */ #define UNIV_BTR_PRINT /* enable functions for printing B-trees */ #define UNIV_ZIP_DEBUG /* extensive consistency checks @@ -301,6 +297,12 @@ management to ensure correct alignment for doubles etc. */ /* Maximum number of parallel threads in a parallelized operation */ #define UNIV_MAX_PARALLELISM 32 +/* The maximum length of a table name. This is the MySQL limit and is +defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the +number does not include a terminating '\0'. InnoDB probably can handle +longer names internally */ +#define MAX_TABLE_NAME_LEN 192 + /* UNIVERSAL TYPE DEFINITIONS ========================== diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h index 7b15c052978..69809f22d36 100644 --- a/storage/xtradb/include/ut0lst.h +++ b/storage/xtradb/include/ut0lst.h @@ -257,5 +257,48 @@ do { \ ut_a(ut_list_node_313 == NULL); \ } while (0) +/********************************************************************//** +Align nodes with moving location. +@param NAME the name of the list +@param TYPE node type +@param BASE base node (not a pointer to it) +@param OFFSET offset moved */ +#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \ +do { \ + ulint ut_list_i_313; \ + TYPE* ut_list_node_313; \ + \ + if ((BASE).start) \ + (BASE).start = ((void*)((BASE).start) \ + + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((BASE).end) \ + (BASE).end = ((void*)((BASE).end) \ + + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\ + \ + ut_list_node_313 = (BASE).start; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + if ((ut_list_node_313->NAME).prev) \ + (ut_list_node_313->NAME).prev = ((void*)((ut_list_node_313->NAME).prev)\ + + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((ut_list_node_313->NAME).next) \ + (ut_list_node_313->NAME).next = ((void*)((ut_list_node_313->NAME).next)\ + + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\ + ut_list_node_313 = (ut_list_node_313->NAME).next; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ + \ + ut_list_node_313 = (BASE).end; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + ut_list_node_313 = (ut_list_node_313->NAME).prev; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ +} while (0) + #endif diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c index b103ee79578..7ec4a53e0ea 100644 --- a/storage/xtradb/lock/lock0lock.c +++ b/storage/xtradb/lock/lock0lock.c @@ -1733,11 +1733,11 @@ lock_rec_create( Enqueues a waiting request for a lock which cannot be granted immediately. Checks for deadlocks. @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or -DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another -transaction was chosen as a victim, and we got the lock immediately: -no need to wait then */ +DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that +there was a deadlock, but another transaction was chosen as a victim, +and we got the lock immediately: no need to wait then */ static -ulint +enum db_err lock_rec_enqueue_waiting( /*=====================*/ ulint type_mode,/*!< in: lock mode this @@ -1811,7 +1811,7 @@ lock_rec_enqueue_waiting( if (trx->wait_lock == NULL) { - return(DB_SUCCESS); + return(DB_SUCCESS_LOCKED_REC); } trx->que_state = TRX_QUE_LOCK_WAIT; @@ -1931,6 +1931,16 @@ somebody_waits: return(lock_rec_create(type_mode, block, heap_no, index, trx)); } +/** Record locking request status */ +enum lock_rec_req_status { + /** Failed to acquire a lock */ + LOCK_REC_FAIL, + /** Succeeded in acquiring a lock (implicit or already acquired) */ + LOCK_REC_SUCCESS, + /** Explicitly created a new lock */ + LOCK_REC_SUCCESS_CREATED +}; + /*********************************************************************//** This is a fast routine for locking a record in the most common cases: there are no explicit locks on the page, or there is just one lock, owned @@ -1938,9 +1948,9 @@ by this transaction, and of the right type_mode. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return TRUE if locking succeeded */ +@return whether the locking succeeded */ UNIV_INLINE -ibool +enum lock_rec_req_status lock_rec_lock_fast( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -1979,19 +1989,19 @@ lock_rec_lock_fast( lock_rec_create(mode, block, heap_no, index, trx); } - return(TRUE); + return(LOCK_REC_SUCCESS_CREATED); } if (lock_rec_get_next_on_page(lock)) { - return(FALSE); + return(LOCK_REC_FAIL); } if (lock->trx != trx || lock->type_mode != (mode | LOCK_REC) || lock_rec_get_n_bits(lock) <= heap_no) { - return(FALSE); + return(LOCK_REC_FAIL); } if (!impl) { @@ -2000,10 +2010,11 @@ lock_rec_lock_fast( if (!lock_rec_get_nth_bit(lock, heap_no)) { lock_rec_set_nth_bit(lock, heap_no); + return(LOCK_REC_SUCCESS_CREATED); } } - return(TRUE); + return(LOCK_REC_SUCCESS); } /*********************************************************************//** @@ -2011,9 +2022,10 @@ This is the general, and slower, routine for locking a record. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock_slow( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2030,7 +2042,6 @@ lock_rec_lock_slow( que_thr_t* thr) /*!< in: query thread */ { trx_t* trx; - ulint err; ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S @@ -2049,27 +2060,23 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ - err = DB_SUCCESS; } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) { /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ - err = lock_rec_enqueue_waiting(mode, block, heap_no, - index, thr); - } else { - if (!impl) { - /* Set the requested lock on the record */ - - lock_rec_add_to_queue(LOCK_REC | mode, block, - heap_no, index, trx); - } + return(lock_rec_enqueue_waiting(mode, block, heap_no, + index, thr)); + } else if (!impl) { + /* Set the requested lock on the record */ - err = DB_SUCCESS; + lock_rec_add_to_queue(LOCK_REC | mode, block, + heap_no, index, trx); + return(DB_SUCCESS_LOCKED_REC); } - return(err); + return(DB_SUCCESS); } /*********************************************************************//** @@ -2078,9 +2085,10 @@ possible, enqueues a waiting lock request. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock( /*==========*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2096,8 +2104,6 @@ lock_rec_lock( dict_index_t* index, /*!< in: index of record */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); @@ -2109,18 +2115,20 @@ lock_rec_lock( || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP || mode - (LOCK_MODE_MASK & mode) == 0); - if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { - - /* We try a simplified and faster subroutine for the most - common cases */ - - err = DB_SUCCESS; - } else { - err = lock_rec_lock_slow(impl, mode, block, - heap_no, index, thr); + /* We try a simplified and faster subroutine for the most + common cases */ + switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { + case LOCK_REC_SUCCESS: + return(DB_SUCCESS); + case LOCK_REC_SUCCESS_CREATED: + return(DB_SUCCESS_LOCKED_REC); + case LOCK_REC_FAIL: + return(lock_rec_lock_slow(impl, mode, block, + heap_no, index, thr)); } - return(err); + ut_error; + return(DB_ERROR); } /*********************************************************************//** @@ -3948,8 +3956,8 @@ lock_rec_unlock( const rec_t* rec, /*!< in: record */ enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */ { + lock_t* first_lock; lock_t* lock; - lock_t* release_lock = NULL; ulint heap_no; ut_ad(trx && rec); @@ -3959,48 +3967,40 @@ lock_rec_unlock( mutex_enter(&kernel_mutex); - lock = lock_rec_get_first(block, heap_no); + first_lock = lock_rec_get_first(block, heap_no); /* Find the last lock with the same lock_mode and transaction from the record. */ - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock->trx == trx && lock_get_mode(lock) == lock_mode) { - release_lock = lock; ut_a(!lock_get_wait(lock)); + lock_rec_reset_nth_bit(lock, heap_no); + goto released; } - - lock = lock_rec_get_next(heap_no, lock); } - /* If a record lock is found, release the record lock */ - - if (UNIV_LIKELY(release_lock != NULL)) { - lock_rec_reset_nth_bit(release_lock, heap_no); - } else { - mutex_exit(&kernel_mutex); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unlock row could not" - " find a %lu mode lock on the record\n", - (ulong) lock_mode); + mutex_exit(&kernel_mutex); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: unlock row could not" + " find a %lu mode lock on the record\n", + (ulong) lock_mode); - return; - } + return; +released: /* Check if we can now grant waiting lock requests */ - lock = lock_rec_get_first(block, heap_no); - - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) { /* Grant the lock */ lock_grant(lock); } - - lock = lock_rec_get_next(heap_no, lock); } mutex_exit(&kernel_mutex); @@ -5095,7 +5095,14 @@ lock_rec_insert_check_and_lock( lock_mutex_exit_kernel(); - if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + /* fall through */ + case DB_SUCCESS: + if (dict_index_is_clust(index)) { + break; + } /* Update the page max trx id field */ page_update_max_trx_id(block, buf_block_get_page_zip(block), @@ -5218,6 +5225,10 @@ lock_clust_rec_modify_check_and_lock( ut_ad(lock_rec_queue_validate(block, rec, index, offsets)); + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + return(err); } @@ -5284,22 +5295,27 @@ lock_sec_rec_modify_check_and_lock( } #endif /* UNIV_DEBUG */ - if (err == DB_SUCCESS) { + if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) { /* Update the page max trx id field */ + /* It might not be necessary to do this if + err == DB_SUCCESS (no new lock created), + but it should not cost too much performance. */ page_update_max_trx_id(block, buf_block_get_page_zip(block), thr_get_trx(thr)->id, mtr); + err = DB_SUCCESS; } return(err); } /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5320,8 +5336,8 @@ lock_sec_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(!dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5372,9 +5388,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5395,8 +5412,8 @@ lock_clust_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5467,17 +5484,22 @@ lock_clust_rec_read_check_and_lock_alt( mem_heap_t* tmp_heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ulint ret; + ulint err; rec_offs_init(offsets_); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &tmp_heap); - ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index, + err = lock_clust_rec_read_check_and_lock(flags, block, rec, index, offsets, mode, gap_mode, thr); if (tmp_heap) { mem_heap_free(tmp_heap); } - return(ret); + + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + + return(err); } /*******************************************************************//** diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index 03d097d1c12..fade31037b5 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -1111,6 +1111,7 @@ log_io_complete( group = (log_group_t*)((ulint)group - 1); if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { fil_flush(group->space_id); @@ -1132,6 +1133,7 @@ log_io_complete( logs and cannot end up here! */ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT && srv_unix_file_flush_method != SRV_UNIX_NOSYNC && srv_flush_log_at_trx_commit != 2) { @@ -1512,7 +1514,8 @@ loop: mutex_exit(&(log_sys->mutex)); - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { /* O_DSYNC means the OS did not buffer the log file at all: so we have also flushed to disk what we have written */ diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c index 1c9b4960ee4..bbb634addb0 100644 --- a/storage/xtradb/log/log0recv.c +++ b/storage/xtradb/log/log0recv.c @@ -2965,9 +2965,12 @@ recv_recovery_from_checkpoint_start_func( ib_uint64_t contiguous_lsn; ib_uint64_t archived_lsn; byte* buf; - byte log_hdr_buf[LOG_FILE_HDR_SIZE]; + byte* log_hdr_buf; + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE]; ulint err; + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE); + #ifdef UNIV_LOG_ARCHIVE ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX); /** TRUE when recovering from a checkpoint */ diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index c79a41626c9..48d796c38e1 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -214,7 +214,7 @@ static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */ /* Per thread buffer used for merged IO requests. Used by os_aio_simulated_handle so that a buffer doesn't have to be allocated for each request. */ -static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS]; +static byte* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS]; static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS]; /** Number of asynchronous I/O segments. Set by os_aio_init(). */ @@ -1379,7 +1379,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? @@ -1466,7 +1470,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? @@ -1494,6 +1502,11 @@ try_again: os_file_set_nocache(file, name, mode_str); } + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */ + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { + os_file_set_nocache(file, name, mode_str); + } + #ifdef USE_FILE_LOCK if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) { diff --git a/storage/xtradb/os/os0proc.c b/storage/xtradb/os/os0proc.c index 48922886f23..8f6c7f430f7 100644 --- a/storage/xtradb/os/os0proc.c +++ b/storage/xtradb/os/os0proc.c @@ -229,3 +229,173 @@ os_mem_free_large( } #endif } + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new) +{ + void* ptr; + ulint size; + int shmid; + + *is_new = FALSE; +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + fprintf(stderr, + "InnoDB: The shared memory key %#x (%d) is specified.\n", + key, key); +# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX + if (!os_use_large_pages || !os_large_page_size) { + goto skip; + } + + /* Align block size to os_large_page_size */ + ut_ad(ut_is_2pow(os_large_page_size)); + size = ut_2pow_round(*n + (os_large_page_size - 1), + os_large_page_size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: HugeTLB: The shared memory segment seems to exist already.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n", + size, errno); + goto skip; + } else { + fprintf(stderr, + "InnoDB: HugeTLB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(new) errno %d\n", + size, errno); + goto skip; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: HugeTLB: The new shared memory segment is created.\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + return(ptr); + } +skip: + *is_new = FALSE; +# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */ +# ifdef HAVE_GETPAGESIZE + size = getpagesize(); +# else + size = UNIV_PAGE_SIZE; +# endif + /* Align block size to system page size */ + ut_ad(ut_is_2pow(size)); + size = *n = ut_2pow_round(*n + (size - 1), size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: The shared memory segment seems to exist already.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } else { + fprintf(stderr, + "InnoDB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes.(new) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: The new shared memory segment is created.\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + } +end: +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); + ptr = NULL; +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + return(ptr); +} + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size) /*!< in: size returned by + os_shm_alloc() */ +{ + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + os_fast_mutex_unlock(&ut_list_mutex); + +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + if (!shmdt(ptr)) { + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + ut_total_allocated_memory -= size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_FREE(ptr, size); + } +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ +} diff --git a/storage/xtradb/page/page0zip.c b/storage/xtradb/page/page0zip.c index 7ef44f3246f..a94d2d54417 100644 --- a/storage/xtradb/page/page0zip.c +++ b/storage/xtradb/page/page0zip.c @@ -571,7 +571,7 @@ page_zip_dir_encode( /* Traverse the list of stored records in the collation order, starting from the first user record. */ - rec = page + PAGE_NEW_INFIMUM, TRUE; + rec = page + PAGE_NEW_INFIMUM; i = 0; @@ -1153,6 +1153,10 @@ page_zip_compress( FILE* logfile = NULL; #endif + if (!page) { + return(FALSE); + } + ut_a(page_is_comp(page)); ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(page_simple_validate_new((page_t*) page)); @@ -1464,6 +1468,7 @@ page_zip_fields_free( dict_table_t* table = index->table; mem_heap_free(index->heap); mutex_free(&(table->autoinc_mutex)); + ut_free(table->name); mem_heap_free(table->heap); } } diff --git a/storage/xtradb/que/que0que.c b/storage/xtradb/que/que0que.c index 2fe046fa9b8..5c85a04d139 100644 --- a/storage/xtradb/que/que0que.c +++ b/storage/xtradb/que/que0que.c @@ -622,11 +622,21 @@ que_graph_free_recursive( que_graph_free_recursive(cre_ind->ind_def); que_graph_free_recursive(cre_ind->field_def); + if (srv_use_sys_stats_table) + que_graph_free_recursive(cre_ind->stats_def); que_graph_free_recursive(cre_ind->commit_node); mem_heap_free(cre_ind->heap); break; + case QUE_NODE_INSERT_STATS: + cre_ind = node; + + que_graph_free_recursive(cre_ind->stats_def); + que_graph_free_recursive(cre_ind->commit_node); + + mem_heap_free(cre_ind->heap); + break; case QUE_NODE_PROC: que_graph_free_stat_list(((proc_node_t*)node)->stat_list); @@ -1139,6 +1149,8 @@ que_node_print_info( str = "CREATE TABLE"; } else if (type == QUE_NODE_CREATE_INDEX) { str = "CREATE INDEX"; + } else if (type == QUE_NODE_INSERT_STATS) { + str = "INSERT TO SYS_STATS"; } else if (type == QUE_NODE_FOR) { str = "FOR LOOP"; } else if (type == QUE_NODE_RETURN) { @@ -1256,6 +1268,8 @@ que_thr_step( thr = dict_create_table_step(thr); } else if (type == QUE_NODE_CREATE_INDEX) { thr = dict_create_index_step(thr); + } else if (type == QUE_NODE_INSERT_STATS) { + thr = dict_insert_stats_step(thr); } else if (type == QUE_NODE_ROW_PRINTF) { thr = row_printf_step(thr); } else { diff --git a/storage/xtradb/rem/rem0cmp.c b/storage/xtradb/rem/rem0cmp.c index 45230f1d7b1..8ee434f85da 100644 --- a/storage/xtradb/rem/rem0cmp.c +++ b/storage/xtradb/rem/rem0cmp.c @@ -706,7 +706,9 @@ cmp_rec_rec_simple( const rec_t* rec2, /*!< in: physical record */ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index) /*!< in: data dictionary index */ + const dict_index_t* index, /*!< in: data dictionary index */ + ibool* null_eq)/*!< out: set to TRUE if + found matching null values */ { ulint rec1_f_len; /*!< length of current field in rec1 */ const byte* rec1_b_ptr; /*!< pointer to the current byte @@ -753,6 +755,9 @@ cmp_rec_rec_simple( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { + if (null_eq) { + *null_eq = TRUE; + } goto next_field; diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index d7475d613ad..d4925e46f97 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -51,6 +51,15 @@ Created 4/20/1996 Heikki Tuuri #define ROW_INS_PREV 1 #define ROW_INS_NEXT 2 +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ /*********************************************************************//** Creates an insert node struct. @@ -1121,9 +1130,9 @@ nonstandard_exit_func: /*********************************************************************//** Sets a shared lock on a record. Used in locking possible duplicate key records and also in checking foreign key constraints. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_shared_rec_lock( /*========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1134,7 +1143,7 @@ row_ins_set_shared_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1152,9 +1161,9 @@ row_ins_set_shared_rec_lock( /*********************************************************************//** Sets a exclusive lock on a record. Used in locking possible duplicate key records -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_exclusive_rec_lock( /*===========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1165,7 +1174,7 @@ row_ins_set_exclusive_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1205,7 +1214,6 @@ row_ins_check_foreign_constraint( dict_index_t* check_index; ulint n_fields_cmp; btr_pcur_t pcur; - ibool moved; int cmp; ulint err; ulint i; @@ -1336,7 +1344,7 @@ run_again: /* Scan index records and check if there is a matching record */ - for (;;) { + do { const rec_t* rec = btr_pcur_get_rec(&pcur); const buf_block_t* block = btr_pcur_get_block(&pcur); @@ -1348,7 +1356,7 @@ run_again: if (page_rec_is_infimum(rec)) { - goto next_rec; + continue; } offsets = rec_get_offsets(rec, check_index, @@ -1359,12 +1367,13 @@ run_again: err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + continue; + default: + goto end_scan; } - - goto next_rec; } cmp = cmp_dtuple_rec(entry, rec, offsets); @@ -1375,9 +1384,12 @@ run_again: err = row_ins_set_shared_rec_lock( LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } } else { /* Found a matching record. Lock only @@ -1388,15 +1400,18 @@ run_again: LOCK_REC_NOT_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } if (check_ref) { err = DB_SUCCESS; - break; + goto end_scan; } else if (foreign->type != 0) { /* There is an ON UPDATE or ON DELETE condition: check them in a separate @@ -1422,7 +1437,7 @@ run_again: err = DB_FOREIGN_DUPLICATE_KEY; } - break; + goto end_scan; } /* row_ins_foreign_check_on_constraint @@ -1435,49 +1450,41 @@ run_again: thr, foreign, rec, entry); err = DB_ROW_IS_REFERENCED; - break; + goto end_scan; } } - } + } else { + ut_a(cmp < 0); - if (cmp < 0) { err = row_ins_set_shared_rec_lock( LOCK_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - break; - } - - if (check_ref) { - err = DB_NO_REFERENCED_ROW; - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + row_ins_foreign_report_add_err( + trx, foreign, rec, entry); + } else { + err = DB_SUCCESS; + } } - break; + goto end_scan; } + } while (btr_pcur_move_to_next(&pcur, &mtr)); - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - if (check_ref) { - rec = btr_pcur_get_rec(&pcur); - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - err = DB_NO_REFERENCED_ROW; - } else { - err = DB_SUCCESS; - } - - break; - } + if (check_ref) { + row_ins_foreign_report_add_err( + trx, foreign, btr_pcur_get_rec(&pcur), entry); + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; } +end_scan: btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -1725,9 +1732,13 @@ row_ins_scan_sec_index_for_duplicate( rec, index, offsets, thr); } - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: break; + default: + goto end_scan; } if (page_rec_is_supremum(rec)) { @@ -1744,17 +1755,15 @@ row_ins_scan_sec_index_for_duplicate( thr_get_trx(thr)->error_info = index; - break; + goto end_scan; } + } else { + ut_a(cmp < 0); + goto end_scan; } - - if (cmp < 0) { - break; - } - - ut_a(cmp == 0); } while (btr_pcur_move_to_next(&pcur, &mtr)); +end_scan: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -1843,7 +1852,11 @@ row_ins_duplicate_error_in_clust( cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } @@ -1883,7 +1896,11 @@ row_ins_duplicate_error_in_clust( rec, cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c index 6ee93d24ed3..47c03c77850 100644 --- a/storage/xtradb/row/row0merge.c +++ b/storage/xtradb/row/row0merge.c @@ -717,14 +717,16 @@ row_merge_read( } /********************************************************************//** -Read a merge block from the file system. +Write a merge block to the file system. @return TRUE if request was successful, FALSE if fail */ static ibool row_merge_write( /*============*/ int fd, /*!< in: file descriptor */ - ulint offset, /*!< in: offset where to write */ + ulint offset, /*!< in: offset where to read + in number of row_merge_block_t + elements */ const void* buf) /*!< in: data */ { ib_uint64_t ofs = ((ib_uint64_t) offset) @@ -1075,11 +1077,14 @@ row_merge_cmp( record to be compared */ const ulint* offsets1, /*!< in: first record offsets */ const ulint* offsets2, /*!< in: second record offsets */ - const dict_index_t* index) /*!< in: index */ + const dict_index_t* index, /*!< in: index */ + ibool* null_eq) /*!< out: set to TRUE if + found matching null values */ { int cmp; - cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index); + cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index, + null_eq); #ifdef UNIV_DEBUG if (row_merge_print_cmp) { @@ -1452,11 +1457,13 @@ corrupt: } while (mrec0 && mrec1) { + ibool null_eq = FALSE; switch (row_merge_cmp(mrec0, mrec1, - offsets0, offsets1, index)) { + offsets0, offsets1, index, + &null_eq)) { case 0: if (UNIV_UNLIKELY - (dict_index_is_unique(index))) { + (dict_index_is_unique(index) && !null_eq)) { innobase_rec_to_mysql(table, mrec0, index, offsets0); mem_heap_free(heap); @@ -1578,22 +1585,28 @@ row_merge( const dict_index_t* index, /*!< in: index being created */ merge_file_t* file, /*!< in/out: file containing index entries */ - ulint* half, /*!< in/out: half the file */ row_merge_block_t* block, /*!< in/out: 3 buffers */ int* tmpfd, /*!< in/out: temporary file handle */ - TABLE* table) /*!< in/out: MySQL table, for + TABLE* table, /*!< in/out: MySQL table, for reporting erroneous key value if applicable */ + ulint* num_run,/*!< in/out: Number of runs remain + to be merged */ + ulint* run_offset) /*!< in/out: Array contains the + first offset number for each merge + run */ { ulint foffs0; /*!< first input offset */ ulint foffs1; /*!< second input offset */ ulint error; /*!< error code */ merge_file_t of; /*!< output file */ - const ulint ihalf = *half; + const ulint ihalf = run_offset[*num_run / 2]; /*!< half the input file */ - ulint ohalf; /*!< half the output file */ + ulint n_run = 0; + /*!< num of runs generated from this merge */ UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]); + ut_ad(ihalf < file->offset); of.fd = *tmpfd; @@ -1601,17 +1614,20 @@ row_merge( of.n_rec = 0; /* Merge blocks to the output file. */ - ohalf = 0; foffs0 = 0; foffs1 = ihalf; + UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset); + for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) { - ulint ahalf; /*!< arithmetic half the input file */ if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + error = row_merge_blocks(index, file, block, &foffs0, &foffs1, &of, table); @@ -1619,21 +1635,6 @@ row_merge( return(error); } - /* Record the offset of the output file when - approximately half the output has been generated. In - this way, the next invocation of row_merge() will - spend most of the time in this loop. The initial - estimate is ohalf==0. */ - ahalf = file->offset / 2; - ut_ad(ohalf <= of.offset); - - /* Improve the estimate until reaching half the input - file size, or we can not get any closer to it. All - comparands should be non-negative when !(ohalf < ahalf) - because ohalf <= of.offset. */ - if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) { - ohalf = of.offset; - } } /* Copy the last blocks, if there are any. */ @@ -1643,6 +1644,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) { return(DB_CORRUPTION); } @@ -1655,6 +1659,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) { return(DB_CORRUPTION); } @@ -1666,10 +1673,23 @@ row_merge( return(DB_CORRUPTION); } + ut_ad(n_run <= *num_run); + + *num_run = n_run; + + /* Each run can contain one or more offsets. As merge goes on, + the number of runs (to merge) will reduce until we have one + single run. So the number of runs will always be smaller than + the number of offsets in file */ + ut_ad((*num_run) <= file->offset); + + /* The number of offsets in output file is always equal or + smaller than input file */ + ut_ad(of.offset <= file->offset); + /* Swap file descriptors for the next pass. */ *tmpfd = file->fd; *file = of; - *half = ohalf; UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]); @@ -1694,27 +1714,44 @@ row_merge_sort( if applicable */ { ulint half = file->offset / 2; + ulint num_runs; + ulint* run_offset; + ulint error = DB_SUCCESS; + + /* Record the number of merge runs we need to perform */ + num_runs = file->offset; + + /* If num_runs are less than 1, nothing to merge */ + if (num_runs <= 1) { + return(error); + } + + /* "run_offset" records each run's first offset number */ + run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint)); + + /* This tells row_merge() where to start for the first round + of merge. */ + run_offset[half] = half; /* The file should always contain at least one byte (the end of file marker). Thus, it must be at least one block. */ ut_ad(file->offset > 0); + /* Merge the runs until we have one big run */ do { - ulint error; + error = row_merge(trx, index, file, block, tmpfd, + table, &num_runs, run_offset); - error = row_merge(trx, index, file, &half, - block, tmpfd, table); + UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset); if (error != DB_SUCCESS) { - return(error); + break; } + } while (num_runs > 1); - /* half > 0 should hold except when the file consists - of one block. No need to merge further then. */ - ut_ad(half > 0 || file->offset == 1); - } while (half < file->offset && half > 0); + mem_free(run_offset); - return(DB_SUCCESS); + return(error); } /*************************************************************//** @@ -1986,6 +2023,8 @@ row_merge_drop_index( "UPDATE SYS_INDEXES SET NAME=CONCAT('" TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n" "COMMIT WORK;\n" + /* Drop the statistics of the index. */ + "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n" /* Drop the field definitions of the index. */ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n" /* Drop the index definition and the B-tree. */ @@ -2094,13 +2133,16 @@ row_merge_drop_temp_indexes(void) btr_pcur_store_position(&pcur, &mtr); btr_pcur_commit_specify_mtr(&pcur, &mtr); - table = dict_load_table_on_id(table_id); + table = dict_table_get_on_id_low(table_id); if (table) { dict_index_t* index; + dict_index_t* next_index; for (index = dict_table_get_first_index(table); - index; index = dict_table_get_next_index(index)) { + index; index = next_index) { + + next_index = dict_table_get_next_index(index); if (*index->name == TEMP_INDEX_PREFIX) { row_merge_drop_index(index, table, trx); @@ -2303,7 +2345,7 @@ row_merge_rename_tables( { ulint err = DB_ERROR; pars_info_t* info; - const char* old_name= old_table->name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(old_table != new_table); @@ -2311,6 +2353,17 @@ row_merge_rename_tables( ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); + /* store the old/current name to an automatic variable */ + if (strlen(old_table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, old_table->name, strlen(old_table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", old_table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } + trx->op_info = "renaming tables"; /* We use the private SQL parser of Innobase to generate the query diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index e520065ea04..98e2d22c56b 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -522,6 +522,7 @@ handle_new_error: case DB_CANNOT_ADD_CONSTRAINT: case DB_TOO_MANY_CONCURRENT_TRXS: case DB_OUT_OF_FILE_SPACE: + case DB_INTERRUPTED: if (savept) { /* Roll back the latest, possibly incomplete insertion or update */ @@ -624,6 +625,8 @@ row_create_prebuilt( prebuilt->select_lock_type = LOCK_NONE; prebuilt->stored_select_lock_type = 99999999; + UNIV_MEM_INVALID(&prebuilt->stored_select_lock_type, + sizeof prebuilt->stored_select_lock_type); prebuilt->search_tuple = dtuple_create( heap, 2 * dict_table_get_n_cols(table)); @@ -864,7 +867,7 @@ row_update_statistics_if_needed( if (counter > 2000000000 || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) { - dict_update_statistics(table); + dict_update_statistics(table, TRUE); } } @@ -1124,6 +1127,13 @@ row_insert_for_mysql( thr = que_fork_get_first_thr(prebuilt->ins_graph); + if (!prebuilt->mysql_has_locked) { + fprintf(stderr, "InnoDB: Error: row_insert_for_mysql is called without ha_innobase::external_lock()\n"); + if (trx->mysql_thd != NULL) { + innobase_mysql_print_thd(stderr, trx->mysql_thd, 600); + } + } + if (prebuilt->sql_stat_start) { node->state = INS_NODE_SET_IX_LOCK; prebuilt->sql_stat_start = FALSE; @@ -1430,27 +1440,26 @@ run_again: } /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -this session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs)/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs)/*!< in: TRUE if called so + that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ { btr_pcur_t* pcur = prebuilt->pcur; btr_pcur_t* clust_pcur = prebuilt->clust_pcur; @@ -1648,37 +1657,6 @@ row_table_got_default_clust_index( } /*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table -@return the key number used inside MySQL */ -UNIV_INTERN -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - const dict_index_t* index) /*!< in: index */ -{ - const dict_index_t* ind; - ulint i; - - ut_a(index); - - i = 0; - ind = dict_table_get_first_index(index->table); - - while (index != ind) { - ind = dict_table_get_next_index(ind); - i++; - } - - if (row_table_got_default_clust_index(index->table)) { - ut_a(i > 0); - i--; - } - - return(i); -} - -/*********************************************************************//** Locks the data dictionary in shared mode from modifications, for performing foreign key check, rollback, or other operation invisible to MySQL. */ UNIV_INTERN @@ -2044,6 +2022,45 @@ error_handling: } /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx) +{ + ind_node_t* node; + mem_heap_t* heap; + que_thr_t* thr; + ulint err; + + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + + trx->op_info = "try to insert rows to SYS_STATS"; + + trx_start_if_not_started(trx); + trx->error_state = DB_SUCCESS; + + heap = mem_heap_create(512); + + node = ind_insert_stats_graph_create(index, heap); + + thr = pars_complete_graph_for_exec(node, trx, heap); + + ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); + que_run_threads(thr); + + err = trx->error_state; + + que_graph_free((que_t*) que_node_get_parent(thr)); + + trx->op_info = ""; + + return((int) err); +} + +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -2062,6 +2079,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -2083,8 +2101,8 @@ row_table_add_foreign_constraints( trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - err = dict_create_foreign_constraints(trx, sql_string, name, - reject_fks); + err = dict_create_foreign_constraints(trx, sql_string, sql_length, + name, reject_fks); if (err == DB_SUCCESS) { /* Check that also referencing constraints are ok */ err = dict_load_foreigns(name, TRUE); @@ -2428,7 +2446,7 @@ row_discard_tablespace_for_mysql( goto funct_exit; } - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + dict_hdr_get_new_id(&new_id, NULL, NULL); /* Remove all locks except the table-level S and X locks. */ lock_remove_all_on_table(table, FALSE); @@ -2790,10 +2808,11 @@ row_truncate_table_for_mysql( dict_index_t* index; - space = 0; + dict_hdr_get_new_id(NULL, NULL, &space); - if (fil_create_new_single_table_tablespace( - &space, table->name, FALSE, flags, + if (space == ULINT_UNDEFINED + || fil_create_new_single_table_tablespace( + space, table->name, FALSE, flags, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_print_timestamp(stderr); fprintf(stderr, @@ -2898,7 +2917,7 @@ next_rec: mem_heap_free(heap); - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + dict_hdr_get_new_id(&new_id, NULL, NULL); info = pars_info_create(); @@ -2942,7 +2961,7 @@ next_rec: dict_table_autoinc_lock(table); dict_table_autoinc_initialize(table, 1); dict_table_autoinc_unlock(table); - dict_update_statistics(table); + dict_update_statistics(table, TRUE); trx_commit_for_mysql(trx); @@ -3244,6 +3263,8 @@ check_next_foreign: " IF (SQL % NOTFOUND) THEN\n" " found := 0;\n" " ELSE\n" + " DELETE FROM SYS_STATS\n" + " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_FIELDS\n" " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_INDEXES\n" diff --git a/storage/xtradb/row/row0purge.c b/storage/xtradb/row/row0purge.c index 500ebe571ab..835af990672 100644 --- a/storage/xtradb/row/row0purge.c +++ b/storage/xtradb/row/row0purge.c @@ -44,6 +44,16 @@ Created 3/14/1997 Heikki Tuuri #include "row0mysql.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /********************************************************************//** Creates a purge node to a query graph. @return own: purge node */ @@ -126,6 +136,7 @@ row_purge_remove_clust_if_poss_low( pcur = &(node->pcur); btr_cur = btr_pcur_get_btr_cur(pcur); + log_free_check(); mtr_start(&mtr); success = row_purge_reposition_pcur(mode, node, &mtr); diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 43e67ff6ded..0db4fb6f3db 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -863,8 +863,14 @@ row_sel_get_clust_rec( clust_rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + /* Declare the variable uninitialized in Valgrind. + It should be set to DB_SUCCESS at func_exit. */ + UNIV_MEM_INVALID(&err, sizeof err); + break; + default: goto err_exit; } } else { @@ -934,9 +940,9 @@ err_exit: /*********************************************************************//** Sets a lock on a record. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ UNIV_INLINE -ulint +enum db_err sel_set_rec_lock( /*=============*/ const buf_block_t* block, /*!< in: buffer block of rec */ @@ -948,8 +954,8 @@ sel_set_rec_lock( LOC_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - trx_t* trx; - ulint err; + trx_t* trx; + enum db_err err; trx = thr_get_trx(thr); @@ -1482,11 +1488,15 @@ rec_loop: node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting the lock for */ - goto lock_wait_or_error; } } @@ -1538,8 +1548,12 @@ skip_lock: rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -2498,6 +2512,7 @@ row_sel_field_store_in_mysql_format( byte* pad_ptr; ut_ad(len != UNIV_SQL_NULL); + UNIV_MEM_ASSERT_RW(data, len); switch (templ->type) { case DATA_INT: @@ -2752,6 +2767,9 @@ row_sel_store_mysql_rec( /* MySQL assumes that the field for an SQL NULL value is set to the default value. */ + UNIV_MEM_ASSERT_RW(prebuilt->default_rec + + templ->mysql_col_offset, + templ->mysql_col_len); mysql_rec[templ->mysql_null_byte_offset] |= (byte) templ->mysql_null_bit_mask; memcpy(mysql_rec + templ->mysql_col_offset, @@ -2803,9 +2821,9 @@ row_sel_build_prev_vers_for_mysql( Retrieves the clustered index record corresponding to a record in a non-clustered index. Does the necessary locking. Used in the MySQL interface. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_sel_get_clust_rec_for_mysql( /*============================*/ row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */ @@ -2832,7 +2850,7 @@ row_sel_get_clust_rec_for_mysql( dict_index_t* clust_index; const rec_t* clust_rec; rec_t* old_vers; - ulint err; + enum db_err err; trx_t* trx; *out_rec = NULL; @@ -2891,6 +2909,7 @@ row_sel_get_clust_rec_for_mysql( clust_rec = NULL; + err = DB_SUCCESS; goto func_exit; } @@ -2906,8 +2925,11 @@ row_sel_get_clust_rec_for_mysql( 0, btr_pcur_get_block(prebuilt->clust_pcur), clust_rec, clust_index, *offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + break; + default: goto err_exit; } } else { @@ -2967,6 +2989,8 @@ row_sel_get_clust_rec_for_mysql( rec, sec_index, clust_rec, clust_index)); #endif } + + err = DB_SUCCESS; } func_exit: @@ -2979,7 +3003,6 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } - err = DB_SUCCESS; err_exit: return(err); } @@ -3076,6 +3099,11 @@ row_sel_pop_cached_row_for_mysql( for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(cached_rec + + templ->mysql_col_offset, + templ->mysql_col_len); +#endif ut_memcpy(buf + templ->mysql_col_offset, cached_rec + templ->mysql_col_offset, templ->mysql_col_len); @@ -3090,6 +3118,11 @@ row_sel_pop_cached_row_for_mysql( } } else { +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache + [prebuilt->fetch_cache_first], + prebuilt->mysql_prefix_len); +#endif ut_memcpy(buf, prebuilt->fetch_cache[prebuilt->fetch_cache_first], prebuilt->mysql_prefix_len); @@ -3140,6 +3173,8 @@ row_sel_push_cache_row_for_mysql( } ut_ad(prebuilt->fetch_cache_first == 0); + UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], + prebuilt->mysql_row_len); if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( prebuilt->fetch_cache[ @@ -3286,6 +3321,7 @@ row_search_for_mysql( mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; + ibool problematic_use = FALSE; rec_offs_init(offsets_); @@ -3601,6 +3637,13 @@ shortcut_fails_too_big_rec: trx->has_search_latch = FALSE; } + ut_ad(prebuilt->sql_stat_start || trx->conc_state == TRX_ACTIVE); + ut_ad(trx->conc_state == TRX_NOT_STARTED + || trx->conc_state == TRX_ACTIVE); + ut_ad(prebuilt->sql_stat_start + || prebuilt->select_lock_type != LOCK_NONE + || trx->read_view); + trx_start_if_not_started(trx); if (trx->isolation_level <= TRX_ISO_READ_COMMITTED @@ -3685,8 +3728,12 @@ shortcut_fails_too_big_rec: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3702,6 +3749,15 @@ shortcut_fails_too_big_rec: } } + if (!prebuilt->mysql_has_locked) { + fprintf(stderr, "InnoDB: Error: row_search_for_mysql() is called without ha_innobase::external_lock()\n"); + if (trx->mysql_thd != NULL) { + innobase_mysql_print_thd(stderr, trx->mysql_thd, 600); + } + problematic_use = TRUE; + } +retry_check: + if (!prebuilt->sql_stat_start) { /* No need to set an intention lock or assign a read view */ @@ -3712,6 +3768,14 @@ shortcut_fails_too_big_rec: " perform a consistent read\n" "InnoDB: but the read view is not assigned!\n", stderr); + if (problematic_use) { + fprintf(stderr, "InnoDB: It may be caused by calling " + "without ha_innobase::external_lock()\n" + "InnoDB: For the first-aid, avoiding the crash. " + "But it should be fixed ASAP.\n"); + prebuilt->sql_stat_start = TRUE; + goto retry_check; + } trx_print(stderr, trx, 600); fputc('\n', stderr); ut_a(0); @@ -3791,8 +3855,12 @@ rec_loop: prebuilt->select_lock_type, LOCK_ORDINARY, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3922,8 +3990,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3958,8 +4029,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -4029,15 +4103,21 @@ no_gap_lock: switch (err) { const rec_t* old_vers; - case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { /* Note that a record of prebuilt->index was locked. */ prebuilt->new_rec_locks = 1; } + err = DB_SUCCESS; + case DB_SUCCESS: break; case DB_LOCK_WAIT: + /* Never unlock rows that were part of a conflict. */ + prebuilt->new_rec_locks = 0; + if (UNIV_LIKELY(prebuilt->row_read_type != ROW_READ_TRY_SEMI_CONSISTENT) || unique_search @@ -4067,7 +4147,6 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); - prebuilt->new_rec_locks = 0; } else { mutex_exit(&kernel_mutex); @@ -4079,9 +4158,6 @@ no_gap_lock: ULINT_UNDEFINED, &heap); err = DB_SUCCESS; - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; break; } mutex_exit(&kernel_mutex); @@ -4218,27 +4294,30 @@ requires_clust_rec: err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, thr, &clust_rec, &offsets, &heap, &mtr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS: + if (clust_rec == NULL) { + /* The record did not exist in the read view */ + ut_ad(prebuilt->select_lock_type == LOCK_NONE); + goto next_rec; + } + break; + case DB_SUCCESS_LOCKED_REC: + ut_a(clust_rec != NULL); + if (srv_locks_unsafe_for_binlog + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { + /* Note that the clustered index record + was locked. */ + prebuilt->new_rec_locks = 2; + } + err = DB_SUCCESS; + break; + default: goto lock_wait_or_error; } - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(prebuilt->select_lock_type == LOCK_NONE); - - goto next_rec; - } - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - /* Note that both the secondary index record - and the clustered index record were locked. */ - ut_ad(prebuilt->new_rec_locks == 1); - prebuilt->new_rec_locks = 2; - } - if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) { /* The record is delete marked: we can skip it */ diff --git a/storage/xtradb/row/row0uins.c b/storage/xtradb/row/row0uins.c index 9f9c814f1a5..930a5cf13b6 100644 --- a/storage/xtradb/row/row0uins.c +++ b/storage/xtradb/row/row0uins.c @@ -46,6 +46,16 @@ Created 2/25/1997 Heikki Tuuri #include "ibuf0ibuf.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***************************************************************//** Removes a clustered index record. The pcur in node was positioned on the record, now it is detached. @@ -152,7 +162,6 @@ row_undo_ins_remove_sec_low( ulint err; mtr_t mtr; - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, mode, &pcur, &mtr); @@ -335,6 +344,7 @@ row_undo_ins( transactions. */ ut_a(trx_is_recv(node->trx)); } else { + log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { @@ -346,5 +356,6 @@ row_undo_ins( node->index = dict_table_get_next_index(node->index); } + log_free_check(); return(row_undo_ins_remove_clust_rec(node)); } diff --git a/storage/xtradb/row/row0umod.c b/storage/xtradb/row/row0umod.c index e7245dbee41..8464b0f95cc 100644 --- a/storage/xtradb/row/row0umod.c +++ b/storage/xtradb/row/row0umod.c @@ -58,12 +58,22 @@ delete marked clustered index record was delete unmarked and possibly also some of its fields were changed. Now, it is possible that the delete marked version has become obsolete at the time the undo is started. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if also the previous version of the clustered index record was modified or inserted by the same transaction, and its undo number is such that it should be undone in the same rollback. @return TRUE if also previous modify or insert of this row should be undone */ -UNIV_INLINE +static ibool row_undo_mod_undo_also_prev_vers( /*=============================*/ @@ -231,6 +241,8 @@ row_undo_mod_clust( ut_ad(node && thr); + log_free_check(); + /* Check if also the previous version of the clustered index record should be undone in this same rollback operation */ @@ -657,24 +669,55 @@ row_undo_mod_upd_exist_sec( /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); - ut_a(entry); - /* NOTE that if we updated the fields of a - delete-marked secondary index record so that - alphabetically they stayed the same, e.g., - 'abc' -> 'aBc', we cannot return to the original - values because we do not know them. But this should - not cause problems because in row0sel.c, in queries - we always retrieve the clustered index record or an - earlier version of it, if the secondary index record - through which we do the search is delete-marked. */ - - err = row_undo_mod_del_mark_or_remove_sec(node, thr, - index, - entry); - if (err != DB_SUCCESS) { - mem_heap_free(heap); - - return(err); + if (UNIV_UNLIKELY(!entry)) { + /* The server must have crashed in + row_upd_clust_rec_by_insert(), in + row_ins_index_entry_low() before + btr_store_big_rec_extern_fields() + has written the externally stored columns + (BLOBs) of the new clustered index entry. */ + + /* The table must be in DYNAMIC or COMPRESSED + format. REDUNDANT and COMPACT formats + store a local 768-byte prefix of each + externally stored column. */ + ut_a(dict_table_get_format(index->table) + >= DICT_TF_FORMAT_ZIP); + + /* This is only legitimate when + rolling back an incomplete transaction + after crash recovery. */ + ut_a(thr_get_trx(thr)->is_recovered); + + /* The server must have crashed before + completing the insert of the new + clustered index entry and before + inserting to the secondary indexes. + Because node->row was not yet written + to this index, we can ignore it. But + we must restore node->undo_row. */ + } else { + /* NOTE that if we updated the fields of a + delete-marked secondary index record so that + alphabetically they stayed the same, e.g., + 'abc' -> 'aBc', we cannot return to the + original values because we do not know them. + But this should not cause problems because + in row0sel.c, in queries we always retrieve + the clustered index record or an earlier + version of it, if the secondary index record + through which we do the search is + delete-marked. */ + + err = row_undo_mod_del_mark_or_remove_sec( + node, thr, index, entry); + if (err != DB_SUCCESS) { + mem_heap_free(heap); + + return(err); + } + + mem_heap_empty(heap); } /* We may have to update the delete mark in the @@ -683,7 +726,6 @@ row_undo_mod_upd_exist_sec( the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ - mem_heap_empty(heap); entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); diff --git a/storage/xtradb/row/row0undo.c b/storage/xtradb/row/row0undo.c index 3d739c9689a..9ef842b5114 100644 --- a/storage/xtradb/row/row0undo.c +++ b/storage/xtradb/row/row0undo.c @@ -297,7 +297,7 @@ row_undo( if (locked_data_dict) { - row_mysql_lock_data_dictionary(trx); + row_mysql_freeze_data_dictionary(trx); } if (node->state == UNDO_NODE_INSERT) { @@ -312,7 +312,7 @@ row_undo( if (locked_data_dict) { - row_mysql_unlock_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(trx); } /* Do some cleanup */ diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index 95d1d00aeef..d0aaecd3dae 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -92,6 +92,16 @@ the x-latch freed? The most efficient way for performing a searched delete is obviously to keep the x-latch for several steps of query graph execution. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if an update vector changes some of the first ordering fields of an index record. This is only used in foreign key checks and we can assume @@ -1453,7 +1463,6 @@ row_upd_sec_index_entry( entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, @@ -1529,7 +1538,7 @@ Updates the secondary index record if it is changed in the row update or deletes it if this is a delete. @return DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ -UNIV_INLINE +static ulint row_upd_sec_step( /*=============*/ @@ -2015,6 +2024,7 @@ row_upd( if (node->state == UPD_NODE_UPDATE_CLUSTERED || node->state == UPD_NODE_INSERT_CLUSTERED) { + log_free_check(); err = row_upd_clust_step(node, thr); if (err != DB_SUCCESS) { @@ -2029,6 +2039,8 @@ row_upd( } while (node->index != NULL) { + + log_free_check(); err = row_upd_sec_step(node, thr); if (err != DB_SUCCESS) { diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index bc2dd562697..b9905116603 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -211,6 +211,9 @@ UNIV_INTERN ulint srv_buf_pool_curr_size = 0; UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; +/* key value for shm */ +UNIV_INTERN uint srv_buffer_pool_shm_key = 0; + /* This parameter is deprecated. Use srv_n_io_[read|write]_threads instead. */ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; @@ -380,6 +383,7 @@ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; UNIV_INTERN ulong srv_stats_method = 0; UNIV_INTERN ulong srv_stats_auto_update = 1; UNIV_INTERN ulint srv_stats_update_need_lock = 1; +UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; UNIV_INTERN ibool srv_use_checksums = TRUE; @@ -1758,12 +1762,16 @@ srv_suspend_mysql_thread( innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */ lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd); - if (trx_is_interrupted(trx) - || (lock_wait_timeout < 100000000 - && wait_time > (double) lock_wait_timeout)) { + if (lock_wait_timeout < 100000000 + && wait_time > (double) lock_wait_timeout) { trx->error_state = DB_LOCK_WAIT_TIMEOUT; } + + if (trx_is_interrupted(trx)) { + + trx->error_state = DB_INTERRUPTED; + } } /********************************************************************//** diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index 7b5581a24f0..62ffa366f18 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1302,6 +1302,9 @@ innobase_start_or_create_for_mysql(void) } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) { + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT; + } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; @@ -1716,6 +1719,8 @@ innobase_start_or_create_for_mysql(void) Note that this is not as heavy weight as it seems. At this point there will be only ONE page in the buf_LRU and there must be no page in the buf_flush list. */ + /* TODO: treat more correctly */ + if (!srv_buffer_pool_shm_key) buf_pool_invalidate(); /* We always try to do a recovery, even if the database had diff --git a/storage/xtradb/sync/sync0arr.c b/storage/xtradb/sync/sync0arr.c index cfa52cdcc88..223e1715944 100644 --- a/storage/xtradb/sync/sync0arr.c +++ b/storage/xtradb/sync/sync0arr.c @@ -498,7 +498,9 @@ sync_array_cell_print( || type == RW_LOCK_WAIT_EX || type == RW_LOCK_SHARED) { - fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file); + fputs(type == RW_LOCK_EX ? "X-lock on" + : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on" + : "S-lock on", file); rwlock = cell->old_wait_rw_lock; diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c index 07eac403dfe..9e10f6e943b 100644 --- a/storage/xtradb/sync/sync0rw.c +++ b/storage/xtradb/sync/sync0rw.c @@ -268,7 +268,7 @@ rw_lock_create_func( lock->level = level; #endif /* UNIV_SYNC_DEBUG */ - lock->magic_n = RW_LOCK_MAGIC_N; + ut_d(lock->magic_n = RW_LOCK_MAGIC_N); lock->lock_name = cmutex_name; @@ -282,10 +282,8 @@ rw_lock_create_func( mutex_enter(&rw_lock_list_mutex); - if (UT_LIST_GET_LEN(rw_lock_list) > 0) { - ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n - == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL + || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_ADD_FIRST(list, rw_lock_list, lock); @@ -314,18 +312,16 @@ rw_lock_free( os_event_free(lock->wait_ex_event); - if (UT_LIST_GET_PREV(list, lock)) { - ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } - if (UT_LIST_GET_NEXT(list, lock)) { - ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_PREV(list, lock) == NULL + || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); + ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL + || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_REMOVE(list, rw_lock_list, lock); mutex_exit(&rw_lock_list_mutex); - lock->magic_n = 0; + ut_d(lock->magic_n = 0); } #ifdef UNIV_DEBUG @@ -344,7 +340,7 @@ rw_lock_validate( ulint waiters = rw_lock_get_waiters(lock); lint lock_word = lock->lock_word; - ut_a(lock->magic_n == RW_LOCK_MAGIC_N); + ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); ut_a(waiters == 0 || waiters == 1); ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0); diff --git a/storage/xtradb/sync/sync0sync.c b/storage/xtradb/sync/sync0sync.c index fe14cbf0886..225f28df78e 100644 --- a/storage/xtradb/sync/sync0sync.c +++ b/storage/xtradb/sync/sync0sync.c @@ -434,20 +434,19 @@ mutex_set_waiters( mutex_t* mutex, /*!< in: mutex */ ulint n) /*!< in: value to set */ { -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - volatile ulint* ptr; /* declared volatile to ensure that - the value is stored to memory */ -#endif - +#ifdef INNODB_RW_LOCKS_USE_ATOMICS ut_ad(mutex); -#ifdef INNODB_RW_LOCKS_USE_ATOMICS if (n) { os_compare_and_swap_ulint(&mutex->waiters, 0, 1); } else { os_compare_and_swap_ulint(&mutex->waiters, 1, 0); } #else + volatile ulint* ptr; /* declared volatile to ensure that + the value is stored to memory */ + ut_ad(mutex); + ptr = &(mutex->waiters); *ptr = n; /* Here we assume that the write of a single diff --git a/storage/xtradb/trx/trx0i_s.c b/storage/xtradb/trx/trx0i_s.c index c160eb2942a..5bc8302d0c0 100644 --- a/storage/xtradb/trx/trx0i_s.c +++ b/storage/xtradb/trx/trx0i_s.c @@ -429,6 +429,9 @@ fill_trx_row( which to copy volatile strings */ { + const char* stmt; + size_t stmt_len; + row->trx_id = trx_get_id(trx); row->trx_started = (ib_time_t) trx->start_time; row->trx_state = trx_get_que_state_str(trx); @@ -449,37 +452,32 @@ fill_trx_row( row->trx_weight = (ullint) ut_conv_dulint_to_longlong(TRX_WEIGHT(trx)); - if (trx->mysql_thd != NULL) { - row->trx_mysql_thread_id - = thd_get_thread_id(trx->mysql_thd); - } else { + if (trx->mysql_thd == NULL) { /* For internal transactions e.g., purge and transactions being recovered at startup there is no associated MySQL thread data structure. */ row->trx_mysql_thread_id = 0; + row->trx_query = NULL; + return(TRUE); } - if (trx->mysql_query_str != NULL && *trx->mysql_query_str != NULL) { + row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd); + stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); - if (strlen(*trx->mysql_query_str) - > TRX_I_S_TRX_QUERY_MAX_LEN) { + if (stmt != NULL) { - char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; + char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; - memcpy(query, *trx->mysql_query_str, - TRX_I_S_TRX_QUERY_MAX_LEN); - query[TRX_I_S_TRX_QUERY_MAX_LEN] = '\0'; + if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) { + stmt_len = TRX_I_S_TRX_QUERY_MAX_LEN; + } - row->trx_query = ha_storage_put_memlim( - cache->storage, query, - TRX_I_S_TRX_QUERY_MAX_LEN + 1, - MAX_ALLOWED_FOR_STORAGE(cache)); - } else { + memcpy(query, stmt, stmt_len); + query[stmt_len] = '\0'; - row->trx_query = ha_storage_put_str_memlim( - cache->storage, *trx->mysql_query_str, - MAX_ALLOWED_FOR_STORAGE(cache)); - } + row->trx_query = ha_storage_put_memlim( + cache->storage, stmt, stmt_len + 1, + MAX_ALLOWED_FOR_STORAGE(cache)); if (row->trx_query == NULL) { diff --git a/storage/xtradb/trx/trx0purge.c b/storage/xtradb/trx/trx0purge.c index 41e16b35e85..1c317665878 100644 --- a/storage/xtradb/trx/trx0purge.c +++ b/storage/xtradb/trx/trx0purge.c @@ -1148,8 +1148,7 @@ trx_purge(void) /* If we cannot advance the 'purge view' because of an old 'consistent read view', then the DML statements cannot be delayed. Also, srv_max_purge_lag <= 0 means 'infinity'. */ - if (srv_max_purge_lag > 0 - && !UT_LIST_GET_LAST(trx_sys->view_list)) { + if (srv_max_purge_lag > 0) { float ratio = (float) trx_sys->rseg_history_len / srv_max_purge_lag; if (ratio > ULINT_MAX / 10000) { diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c index 47a21c3a318..ad4471ada0b 100644 --- a/storage/xtradb/trx/trx0sys.c +++ b/storage/xtradb/trx/trx0sys.c @@ -840,13 +840,13 @@ UNIV_INTERN void trx_sys_update_mysql_binlog_offset( /*===============================*/ + trx_sysf_t* sys_header, const char* file_name_in,/*!< in: MySQL log file name */ ib_int64_t offset, /*!< in: position in that log file */ ulint field, /*!< in: offset of the MySQL log info field in the trx sys header */ mtr_t* mtr) /*!< in: mtr */ { - trx_sysf_t* sys_header; const char* file_name; if (ut_strlen(file_name_in) >= TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN) { @@ -860,8 +860,6 @@ trx_sys_update_mysql_binlog_offset( file_name = file_name_in; } - sys_header = trx_sysf_get(mtr); - if (mach_read_from_4(sys_header + field + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) != TRX_SYS_MYSQL_LOG_MAGIC_N) { @@ -1143,14 +1141,8 @@ trx_sysf_dummy_create( ulint space, mtr_t* mtr) { -#ifdef UNDEFINED - trx_sysf_t* sys_header; - ulint slot_no; - ulint page_no; - ulint i; -#endif - page_t* page; buf_block_t* block; + page_t* page; ut_ad(mtr); diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c index f150d64f8dc..9584f0c4c46 100644 --- a/storage/xtradb/trx/trx0trx.c +++ b/storage/xtradb/trx/trx0trx.c @@ -109,6 +109,8 @@ trx_create( trx->support_xa = TRUE; + trx->flush_log_at_trx_commit_session = 3; /* means to use innodb_flush_log_at_trx_commit value */ + trx->check_foreigns = TRUE; trx->check_unique_secondary = TRUE; @@ -119,7 +121,6 @@ trx_create( trx->table_id = ut_dulint_zero; trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; trx->active_trans = 0; trx->duplicates = 0; @@ -736,6 +737,9 @@ trx_start( generated by the same transaction, doesn't. */ trx->support_xa = thd_supports_xa(trx->mysql_thd); + trx->flush_log_at_trx_commit_session = + thd_flush_log_at_trx_commit_session(trx->mysql_thd); + mutex_enter(&kernel_mutex); ret = trx_start_low(trx, rseg_id); @@ -758,6 +762,7 @@ trx_commit_off_kernel( trx_rseg_t* rseg; trx_undo_t* undo; mtr_t mtr; + trx_sysf_t* sys_header = NULL; ut_ad(mutex_own(&kernel_mutex)); @@ -815,7 +820,11 @@ trx_commit_off_kernel( if (trx->mysql_log_file_name && trx->mysql_log_file_name[0] != '\0') { + if (!sys_header) { + sys_header = trx_sysf_get(&mtr); + } trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_log_file_name, trx->mysql_log_offset, TRX_SYS_MYSQL_LOG_INFO, &mtr); @@ -824,11 +833,16 @@ trx_commit_off_kernel( if (trx->mysql_master_log_file_name[0] != '\0') { /* This database server is a MySQL replication slave */ + if (!sys_header) { + sys_header = trx_sysf_get(&mtr); + } trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_relay_log_file_name, trx->mysql_relay_log_pos, TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr); trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_master_log_file_name, trx->mysql_master_log_pos, TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr); @@ -907,6 +921,7 @@ trx_commit_off_kernel( trx->read_view = NULL; if (lsn) { + ulint flush_log_at_trx_commit; mutex_exit(&kernel_mutex); @@ -915,6 +930,12 @@ trx_commit_off_kernel( trx_undo_insert_cleanup(trx); } + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + /* NOTE that we could possibly make a group commit more efficient here: call os_thread_yield here to allow also other trxs to come to commit! */ @@ -946,9 +967,9 @@ trx_commit_off_kernel( if (trx->flush_log_later) { /* Do nothing yet */ trx->must_flush_log_later = TRUE; - } else if (srv_flush_log_at_trx_commit == 0) { + } else if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -960,7 +981,7 @@ trx_commit_off_kernel( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ @@ -983,7 +1004,6 @@ trx_commit_off_kernel( trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; - trx->mysql_query_str = NULL; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); @@ -1641,16 +1661,23 @@ trx_commit_complete_for_mysql( trx_t* trx) /*!< in: trx handle */ { ib_uint64_t lsn = trx->commit_lsn; + ulint flush_log_at_trx_commit; ut_a(trx); trx->op_info = "flushing log"; + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + if (!trx->must_flush_log_later) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 0) { + } else if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -1661,7 +1688,7 @@ trx_commit_complete_for_mysql( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ @@ -1922,6 +1949,8 @@ trx_prepare_off_kernel( /*--------------------------------------*/ if (lsn) { + ulint flush_log_at_trx_commit; + /* Depending on the my.cnf options, we may now write the log buffer to the log files, making the prepared state of the transaction durable if the OS does not crash. We may also @@ -1941,9 +1970,15 @@ trx_prepare_off_kernel( mutex_exit(&kernel_mutex); - if (srv_flush_log_at_trx_commit == 0) { + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + + if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -1955,7 +1990,7 @@ trx_prepare_off_kernel( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ diff --git a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c b/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c deleted file mode 100644 index 30de5aa6f17..00000000000 --- a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c +++ /dev/null @@ -1,43 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles, then pthread_t objects can be used as arguments -to GCC atomic builtin functions. - -Created March 5, 2009 Vasil Dimov -*****************************************************************************/ - -#include <pthread.h> -#include <string.h> - -int -main(int argc, char** argv) -{ - pthread_t x1; - pthread_t x2; - pthread_t x3; - - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); - - __sync_bool_compare_and_swap(&x1, x2, x3); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c b/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c deleted file mode 100644 index 310603c7503..00000000000 --- a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c +++ /dev/null @@ -1,54 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and returns 0, then pthread_t objects can be used as -arguments to Solaris libc atomic functions. - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ - -#include <pthread.h> -#include <string.h> - -int -main(int argc, char** argv) -{ - pthread_t x1; - pthread_t x2; - pthread_t x3; - - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); - - if (sizeof(pthread_t) == 4) { - - atomic_cas_32(&x1, x2, x3); - - } else if (sizeof(pthread_t) == 8) { - - atomic_cas_64(&x1, x2, x3); - - } else { - - return(1); - } - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c b/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c deleted file mode 100644 index da5c13d7d79..00000000000 --- a/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c +++ /dev/null @@ -1,61 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and returns 0, then GCC atomic funcions are available. - -Created September 12, 2009 Vasil Dimov -*****************************************************************************/ - -int -main(int argc, char** argv) -{ - long x; - long y; - long res; - char c; - - x = 10; - y = 123; - res = __sync_bool_compare_and_swap(&x, x, y); - if (!res || x != y) { - return(1); - } - - x = 10; - y = 123; - res = __sync_bool_compare_and_swap(&x, x + 1, y); - if (res || x != 10) { - return(1); - } - - x = 10; - y = 123; - res = __sync_add_and_fetch(&x, y); - if (res != 123 + 10 || x != 123 + 10) { - return(1); - } - - c = 10; - res = __sync_lock_test_and_set(&c, 123); - if (res != 10 || c != 123) { - return(1); - } - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c b/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c deleted file mode 100644 index 7eb704edd4b..00000000000 --- a/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c +++ /dev/null @@ -1,39 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles, then Solaris libc atomic funcions are available. - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ -#include <atomic.h> - -int -main(int argc, char** argv) -{ - ulong_t ulong = 0; - uint32_t uint32 = 0; - uint64_t uint64 = 0; - - atomic_cas_ulong(&ulong, 0, 1); - atomic_cas_32(&uint32, 0, 1); - atomic_cas_64(&uint64, 0, 1); - atomic_add_long(&ulong, 0); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_pause.c b/storage/xtradb/ut/ut0auxconf_pause.c deleted file mode 100644 index 54d63bdd9bc..00000000000 --- a/storage/xtradb/ut/ut0auxconf_pause.c +++ /dev/null @@ -1,32 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and can be run and returns 0, then the pause -instruction is available. - -Created Jul 21, 2009 Vasil Dimov -*****************************************************************************/ - -int -main(int argc, char** argv) -{ - __asm__ __volatile__ ("pause"); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c b/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c deleted file mode 100644 index 96add4526ef..00000000000 --- a/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c +++ /dev/null @@ -1,35 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -This program should compile and when run, print a single line like: -#define SIZEOF_PTHREAD_T %d - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ - -#include <stdio.h> -#include <pthread.h> - -int -main(int argc, char** argv) -{ - printf("#define SIZEOF_PTHREAD_T %d\n", (int) sizeof(pthread_t)); - - return(0); -} |