summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Golubchik <sergii@pisem.net>2014-02-01 09:33:26 +0100
committerSergei Golubchik <sergii@pisem.net>2014-02-01 09:33:26 +0100
commit27d45e46968e4bd623565688a997b83b0a5cc1a8 (patch)
tree8df9c87f8fd3855d81e3ae46078d34609668e63a
parent27fbb637d36324992b270f0dc0472807ffa4ebc2 (diff)
downloadmariadb-git-27d45e46968e4bd623565688a997b83b0a5cc1a8.tar.gz
MDEV-5574 Set AUTO_INCREMENT below max value of column.
Update InnoDB to 5.6.14 Apply MySQL-5.6 hack for MySQL Bug#16434374 Move Aria-only HA_RTREE_INDEX from my_base.h to maria_def.h (breaks an assert in InnoDB) Fix InnoDB memory leak
-rw-r--r--include/my_base.h4
-rw-r--r--mysql-test/r/alter_table_autoinc-5574.result11
-rw-r--r--mysql-test/suite/innodb/r/innodb.result4
-rw-r--r--mysql-test/suite/innodb/t/innodb.test10
-rw-r--r--mysql-test/suite/innodb_fts/r/fulltext.result17
-rw-r--r--mysql-test/suite/innodb_fts/r/fulltext_left_join.result6
-rw-r--r--mysql-test/suite/innodb_fts/r/fulltext_misc.result2
-rw-r--r--mysql-test/suite/innodb_fts/r/innodb_fts_misc.result3
-rw-r--r--mysql-test/suite/innodb_fts/r/innodb_fts_plugin.result29
-rw-r--r--mysql-test/suite/innodb_fts/r/innodb_fts_result_cache_limit.result31
-rw-r--r--mysql-test/suite/innodb_fts/r/innodb_fts_stopword_charset.result321
-rw-r--r--mysql-test/suite/innodb_fts/t/fulltext.test20
-rw-r--r--mysql-test/suite/innodb_fts/t/fulltext_left_join.test2
-rw-r--r--mysql-test/suite/innodb_fts/t/fulltext_misc.test2
-rw-r--r--mysql-test/suite/innodb_fts/t/fulltext_order_by.test2
-rw-r--r--mysql-test/suite/innodb_fts/t/innodb_fts_misc.test6
-rw-r--r--mysql-test/suite/innodb_fts/t/innodb_fts_plugin.test45
-rw-r--r--mysql-test/suite/innodb_fts/t/innodb_fts_result_cache_limit.test51
-rw-r--r--mysql-test/suite/innodb_fts/t/innodb_fts_stopword_charset.test421
-rw-r--r--mysql-test/t/alter_table_autoinc-5574.test12
-rw-r--r--sql/item_func.cc8
-rw-r--r--sql/item_func.h35
-rw-r--r--storage/innobase/api/api0api.cc137
-rw-r--r--storage/innobase/btr/btr0btr.cc486
-rw-r--r--storage/innobase/btr/btr0cur.cc593
-rw-r--r--storage/innobase/btr/btr0pcur.cc4
-rw-r--r--storage/innobase/buf/buf0buddy.cc446
-rw-r--r--storage/innobase/buf/buf0buf.cc54
-rw-r--r--storage/innobase/buf/buf0dblwr.cc200
-rw-r--r--storage/innobase/buf/buf0flu.cc306
-rw-r--r--storage/innobase/buf/buf0lru.cc236
-rw-r--r--storage/innobase/buf/buf0rea.cc35
-rw-r--r--storage/innobase/compile-innodb2
-rw-r--r--storage/innobase/dict/dict0boot.cc1
-rw-r--r--storage/innobase/dict/dict0crea.cc87
-rw-r--r--storage/innobase/dict/dict0dict.cc387
-rw-r--r--storage/innobase/dict/dict0load.cc148
-rw-r--r--storage/innobase/dict/dict0mem.cc6
-rw-r--r--storage/innobase/dict/dict0stats.cc166
-rw-r--r--storage/innobase/dict/dict0stats_bg.cc63
-rw-r--r--storage/innobase/dyn/dyn0dyn.cc4
-rw-r--r--storage/innobase/fil/fil0fil.cc434
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc7
-rw-r--r--storage/innobase/fts/fts0ast.cc232
-rw-r--r--storage/innobase/fts/fts0blex.cc30
-rw-r--r--storage/innobase/fts/fts0config.cc6
-rw-r--r--storage/innobase/fts/fts0fts.cc312
-rw-r--r--storage/innobase/fts/fts0opt.cc67
-rw-r--r--storage/innobase/fts/fts0pars.cc636
-rw-r--r--storage/innobase/fts/fts0pars.y6
-rw-r--r--storage/innobase/fts/fts0que.cc1039
-rw-r--r--storage/innobase/fts/fts0tlex.cc81
-rw-r--r--storage/innobase/fts/fts0tlex.l2
-rw-r--r--storage/innobase/handler/ha_innodb.cc581
-rw-r--r--storage/innobase/handler/ha_innodb.h8
-rw-r--r--storage/innobase/handler/handler0alter.cc3056
-rw-r--r--storage/innobase/handler/i_s.cc215
-rw-r--r--storage/innobase/handler/i_s.h3
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc142
-rw-r--r--storage/innobase/include/api0api.h4
-rw-r--r--storage/innobase/include/btr0btr.h50
-rw-r--r--storage/innobase/include/btr0cur.h111
-rw-r--r--storage/innobase/include/buf0buf.h42
-rw-r--r--storage/innobase/include/buf0buf.ic22
-rw-r--r--storage/innobase/include/buf0dblwr.h50
-rw-r--r--storage/innobase/include/buf0flu.h27
-rw-r--r--storage/innobase/include/buf0lru.h14
-rw-r--r--storage/innobase/include/buf0rea.h4
-rw-r--r--storage/innobase/include/buf0types.h4
-rw-r--r--storage/innobase/include/db0err.h3
-rw-r--r--storage/innobase/include/dict0crea.h34
-rw-r--r--storage/innobase/include/dict0crea.ic75
-rw-r--r--storage/innobase/include/dict0dict.h60
-rw-r--r--storage/innobase/include/dict0load.h36
-rw-r--r--storage/innobase/include/dict0mem.h8
-rw-r--r--storage/innobase/include/dict0priv.h6
-rw-r--r--storage/innobase/include/dict0priv.ic8
-rw-r--r--storage/innobase/include/dict0stats.ic22
-rw-r--r--storage/innobase/include/dict0stats_bg.h59
-rw-r--r--storage/innobase/include/dict0stats_bg.ic45
-rw-r--r--storage/innobase/include/dict0types.h17
-rw-r--r--storage/innobase/include/dyn0dyn.h80
-rw-r--r--storage/innobase/include/dyn0dyn.ic137
-rw-r--r--storage/innobase/include/fil0fil.h17
-rw-r--r--storage/innobase/include/fts0ast.h14
-rw-r--r--storage/innobase/include/fts0blex.h6
-rw-r--r--storage/innobase/include/fts0fts.h52
-rw-r--r--storage/innobase/include/fts0pars.h26
-rw-r--r--storage/innobase/include/fts0tlex.h6
-rw-r--r--storage/innobase/include/fts0types.h6
-rw-r--r--storage/innobase/include/fts0types.ic3
-rw-r--r--storage/innobase/include/ha_prototypes.h37
-rw-r--r--storage/innobase/include/handler0alter.h2
-rw-r--r--storage/innobase/include/ibuf0ibuf.h6
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic20
-rw-r--r--storage/innobase/include/lock0lock.h23
-rw-r--r--storage/innobase/include/log0log.h4
-rw-r--r--storage/innobase/include/mach0data.ic2
-rw-r--r--storage/innobase/include/mtr0mtr.h15
-rw-r--r--storage/innobase/include/os0file.h78
-rw-r--r--storage/innobase/include/os0file.ic60
-rw-r--r--storage/innobase/include/page0cur.h37
-rw-r--r--storage/innobase/include/page0cur.ic22
-rw-r--r--storage/innobase/include/page0page.h78
-rw-r--r--storage/innobase/include/page0page.ic34
-rw-r--r--storage/innobase/include/page0types.h6
-rw-r--r--storage/innobase/include/page0zip.h10
-rw-r--r--storage/innobase/include/page0zip.ic30
-rw-r--r--storage/innobase/include/rem0rec.ic14
-rw-r--r--storage/innobase/include/row0log.h31
-rw-r--r--storage/innobase/include/row0merge.h10
-rw-r--r--storage/innobase/include/srv0mon.h5
-rw-r--r--storage/innobase/include/srv0start.h9
-rw-r--r--storage/innobase/include/trx0rec.h4
-rw-r--r--storage/innobase/include/trx0trx.h21
-rw-r--r--storage/innobase/include/trx0types.h5
-rw-r--r--storage/innobase/include/trx0undo.h3
-rw-r--r--storage/innobase/include/univ.i20
-rw-r--r--storage/innobase/include/ut0dbg.h42
-rw-r--r--storage/innobase/include/ut0rbt.h2
-rw-r--r--storage/innobase/lock/lock0lock.cc262
-rw-r--r--storage/innobase/lock/lock0wait.cc29
-rw-r--r--storage/innobase/log/log0log.cc30
-rw-r--r--storage/innobase/log/log0recv.cc129
-rw-r--r--storage/innobase/mem/mem0mem.cc6
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc155
-rw-r--r--storage/innobase/os/os0file.cc41
-rw-r--r--storage/innobase/page/page0cur.cc281
-rw-r--r--storage/innobase/page/page0page.cc125
-rw-r--r--storage/innobase/page/page0zip.cc117
-rw-r--r--storage/innobase/pars/pars0pars.cc6
-rw-r--r--storage/innobase/read/read0read.cc4
-rw-r--r--storage/innobase/rem/rem0rec.cc9
-rw-r--r--storage/innobase/row/row0ext.cc32
-rw-r--r--storage/innobase/row/row0ftsort.cc4
-rw-r--r--storage/innobase/row/row0import.cc6
-rw-r--r--storage/innobase/row/row0ins.cc58
-rw-r--r--storage/innobase/row/row0log.cc529
-rw-r--r--storage/innobase/row/row0merge.cc383
-rw-r--r--storage/innobase/row/row0mysql.cc82
-rw-r--r--storage/innobase/row/row0purge.cc104
-rw-r--r--storage/innobase/row/row0quiesce.cc11
-rw-r--r--storage/innobase/row/row0sel.cc12
-rw-r--r--storage/innobase/row/row0uins.cc28
-rw-r--r--storage/innobase/row/row0umod.cc48
-rw-r--r--storage/innobase/row/row0upd.cc4
-rw-r--r--storage/innobase/srv/srv0mon.cc8
-rw-r--r--storage/innobase/srv/srv0srv.cc46
-rw-r--r--storage/innobase/srv/srv0start.cc134
-rw-r--r--storage/innobase/sync/sync0arr.cc12
-rw-r--r--storage/innobase/trx/trx0purge.cc4
-rw-r--r--storage/innobase/trx/trx0roll.cc25
-rw-r--r--storage/innobase/trx/trx0sys.cc2
-rw-r--r--storage/innobase/trx/trx0trx.cc330
-rw-r--r--storage/innobase/trx/trx0undo.cc14
-rw-r--r--storage/innobase/ut/ut0crc32.cc2
-rw-r--r--storage/innobase/ut/ut0dbg.cc32
-rw-r--r--storage/innobase/ut/ut0ut.cc4
-rw-r--r--storage/maria/maria_def.h7
-rw-r--r--storage/xtradb/handler/ha_innodb.cc3
160 files changed, 10265 insertions, 5791 deletions
diff --git a/include/my_base.h b/include/my_base.h
index c9f9a8a4ed3..2880b52e6e2 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -264,13 +264,11 @@ enum ha_base_keytype {
#define HA_SPATIAL 1024 /* For spatial search */
#define HA_NULL_ARE_EQUAL 2048 /* NULL in key are cmp as equal */
#define HA_GENERATED_KEY 8192 /* Automaticly generated key */
-#define HA_RTREE_INDEX 16384 /* For RTREE search */
/* The combination of the above can be used for key type comparison. */
#define HA_KEYFLAG_MASK (HA_NOSAME | HA_PACK_KEY | HA_AUTO_KEY | \
HA_BINARY_PACK_KEY | HA_FULLTEXT | HA_UNIQUE_CHECK | \
- HA_SPATIAL | HA_NULL_ARE_EQUAL | HA_GENERATED_KEY | \
- HA_RTREE_INDEX)
+ HA_SPATIAL | HA_NULL_ARE_EQUAL | HA_GENERATED_KEY)
/*
Key contains partial segments.
diff --git a/mysql-test/r/alter_table_autoinc-5574.result b/mysql-test/r/alter_table_autoinc-5574.result
new file mode 100644
index 00000000000..9476313c773
--- /dev/null
+++ b/mysql-test/r/alter_table_autoinc-5574.result
@@ -0,0 +1,11 @@
+create table t1(a int(10)unsigned not null auto_increment primary key,
+b varchar(255) not null) engine=innodb default charset=utf8;
+insert into t1 values(1,'aaa'),(2,'bbb');
+alter table t1 auto_increment=1;
+insert into t1 values(NULL, 'ccc');
+select * from t1;
+a b
+1 aaa
+2 bbb
+3 ccc
+drop table t1;
diff --git a/mysql-test/suite/innodb/r/innodb.result b/mysql-test/suite/innodb/r/innodb.result
index 7d255e794c0..bc961ba1e46 100644
--- a/mysql-test/suite/innodb/r/innodb.result
+++ b/mysql-test/suite/innodb/r/innodb.result
@@ -1597,10 +1597,6 @@ select distinct concat(a, b) from t1;
concat(a, b)
11113333
drop table t1;
-CREATE TABLE t1 ( a char(10) ) ENGINE=InnoDB;
-SELECT a FROM t1 WHERE MATCH (a) AGAINST ('test' IN BOOLEAN MODE);
-ERROR HY000: The table does not have FULLTEXT index to support this query
-DROP TABLE t1;
CREATE TABLE t1 (a_id tinyint(4) NOT NULL default '0', PRIMARY KEY (a_id)) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO t1 VALUES (1),(2),(3);
CREATE TABLE t2 (b_id tinyint(4) NOT NULL default '0',b_a tinyint(4) NOT NULL default '0', PRIMARY KEY (b_id), KEY (b_a),
diff --git a/mysql-test/suite/innodb/t/innodb.test b/mysql-test/suite/innodb/t/innodb.test
index e2056d66855..157e2480740 100644
--- a/mysql-test/suite/innodb/t/innodb.test
+++ b/mysql-test/suite/innodb/t/innodb.test
@@ -1245,16 +1245,6 @@ select distinct concat(a, b) from t1;
drop table t1;
#
-# BUG#7709 test case - Boolean fulltext query against unsupported
-# engines does not fail
-#
-
-CREATE TABLE t1 ( a char(10) ) ENGINE=InnoDB;
---error 1764
-SELECT a FROM t1 WHERE MATCH (a) AGAINST ('test' IN BOOLEAN MODE);
-DROP TABLE t1;
-
-#
# check null values #1
#
diff --git a/mysql-test/suite/innodb_fts/r/fulltext.result b/mysql-test/suite/innodb_fts/r/fulltext.result
index f3c913110d2..e665fd1370f 100644
--- a/mysql-test/suite/innodb_fts/r/fulltext.result
+++ b/mysql-test/suite/innodb_fts/r/fulltext.result
@@ -337,9 +337,9 @@ insert into t2 values (1, 1, 'xxfoo');
insert into t2 values (2, 1, 'xxbar');
insert into t2 values (3, 1, 'xxbuz');
select * from t1 join t2 using(`t1_id`) where match (t1.name, t2.name) against('xxfoo' in boolean mode);
-ERROR HY000: The table does not have FULLTEXT index to support this query
+ERROR HY000: Incorrect arguments to MATCH
select * from t2 where match name against ('*a*b*c*d*e*f*' in boolean mode);
-ERROR HY000: The table does not have FULLTEXT index to support this query
+ERROR HY000: Can't find FULLTEXT index matching the column list
drop table t1,t2;
create table t1 (a text, fulltext key (a)) ENGINE = InnoDB;
insert into t1 select "xxxx yyyy zzzz";
@@ -479,16 +479,13 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 fulltext a a 0 1 Using where
EXPLAIN SELECT * FROM t1 IGNORE INDEX(a)
WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
-id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ALL b NULL NULL NULL 8 Using where
+ERROR HY000: Can't find FULLTEXT index matching the column list
EXPLAIN SELECT * FROM t1 USE INDEX(b)
WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
-id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ALL b NULL NULL NULL 8 Using where
+ERROR HY000: Can't find FULLTEXT index matching the column list
EXPLAIN SELECT * FROM t1 FORCE INDEX(b)
WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
-id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ref b b 5 const 5 Using where
+ERROR HY000: Can't find FULLTEXT index matching the column list
DROP TABLE t1;
CREATE TABLE t1(a CHAR(10), fulltext(a)) ENGINE = InnoDB;
INSERT INTO t1 VALUES('aaa15');
@@ -562,14 +559,12 @@ WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
);
count(*)
1
-# should return 0
SELECT count(*) FROM t1 WHERE
not exists(
SELECT 1 FROM t2 IGNORE INDEX (b2), t3
WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
);
-count(*)
-0
+ERROR HY000: Can't find FULLTEXT index matching the column list
DROP TABLE t1,t2,t3;
CREATE TABLE t1 (a VARCHAR(4), FULLTEXT(a)) ENGINE = InnoDB;
INSERT INTO t1 VALUES
diff --git a/mysql-test/suite/innodb_fts/r/fulltext_left_join.result b/mysql-test/suite/innodb_fts/r/fulltext_left_join.result
index f40d22caa92..fc8d29bbef2 100644
--- a/mysql-test/suite/innodb_fts/r/fulltext_left_join.result
+++ b/mysql-test/suite/innodb_fts/r/fulltext_left_join.result
@@ -66,7 +66,7 @@ create table t2 (m_id int not null, f char(200), key (m_id), fulltext (f)) engin
insert into t2 values (1, 'bword'), (3, 'aword'), (5, '');
ANALYZE TABLE t2;
select * from t1 left join t2 on m_id = id where match(d, e, f) against ('+aword +bword' in boolean mode);
-id d e m_id f
+ERROR HY000: Incorrect arguments to MATCH
drop table t1,t2;
CREATE TABLE t1 (
id int(10) NOT NULL auto_increment,
@@ -89,9 +89,7 @@ ANALYZE TABLE t2;
SELECT t1.*, MATCH(t1.name) AGAINST('string') AS relevance
FROM t1 LEFT JOIN t2 ON t1.link = t2.id
WHERE MATCH(t1.name, t2.name) AGAINST('string' IN BOOLEAN MODE);
-id link name relevance
-1 1 string 0.000000001885928302414186
-2 0 string 0.000000001885928302414186
+ERROR HY000: Incorrect arguments to MATCH
DROP TABLE t1,t2;
CREATE TABLE t1 (a INT) ENGINE = InnoDB;
CREATE TABLE t2 (b INT, c TEXT, KEY(b), FULLTEXT(c)) ENGINE = InnoDB;
diff --git a/mysql-test/suite/innodb_fts/r/fulltext_misc.result b/mysql-test/suite/innodb_fts/r/fulltext_misc.result
index 2e803d1f815..7c342475eeb 100644
--- a/mysql-test/suite/innodb_fts/r/fulltext_misc.result
+++ b/mysql-test/suite/innodb_fts/r/fulltext_misc.result
@@ -115,7 +115,7 @@ drop table t1, t2;
CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci) ENGINE = InnoDB;
INSERT INTO t1 VALUES('abcd');
SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
-ERROR HY000: The table does not have FULLTEXT index to support this query
+ERROR HY000: Can't find FULLTEXT index matching the column list
DROP TABLE t1;
create table t1 (a varchar(10), key(a), fulltext (a)) ENGINE = InnoDB;
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_misc.result b/mysql-test/suite/innodb_fts/r/innodb_fts_misc.result
index 3f22e79a384..f6be36a24ef 100644
--- a/mysql-test/suite/innodb_fts/r/innodb_fts_misc.result
+++ b/mysql-test/suite/innodb_fts/r/innodb_fts_misc.result
@@ -710,8 +710,7 @@ CREATE FULLTEXT INDEX i ON t1 (char_column2);
Warnings:
Warning 124 InnoDB rebuilding table to add column FTS_DOC_ID
SELECT * FROM t1 WHERE MATCH(char_column) AGAINST ('abc*' IN BOOLEAN MODE);
-id char_column char_column2
-NULL abcde abcde
+ERROR HY000: Can't find FULLTEXT index matching the column list
DROP TABLE t1;
"----------Test22---------"
CREATE TABLE t1 ( id INT , char_column VARCHAR(60) CHARACTER SET UTF8) ENGINE = InnoDB;
diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_plugin.result b/mysql-test/suite/innodb_fts/r/innodb_fts_plugin.result
new file mode 100644
index 00000000000..569de081762
--- /dev/null
+++ b/mysql-test/suite/innodb_fts/r/innodb_fts_plugin.result
@@ -0,0 +1,29 @@
+INSTALL PLUGIN simple_parser SONAME 'mypluglib';
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200),
+body TEXT,
+FULLTEXT (title) WITH PARSER simple_parser
+) ENGINE=MyISAM;
+ALTER TABLE articles ENGINE=InnoDB;
+ERROR HY000: Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table
+DROP TABLE articles;
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200),
+body TEXT,
+FULLTEXT (title) WITH PARSER simple_parser
+) ENGINE=InnoDB;
+ERROR HY000: Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200),
+body TEXT,
+FULLTEXT (title)
+) ENGINE=InnoDB;
+ALTER TABLE articles ADD FULLTEXT INDEX (body) WITH PARSER simple_parser;
+ERROR HY000: Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table
+CREATE FULLTEXT INDEX ft_index ON articles(body) WITH PARSER simple_parser;
+ERROR HY000: Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table
+DROP TABLE articles;
+UNINSTALL PLUGIN simple_parser;
diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_result_cache_limit.result b/mysql-test/suite/innodb_fts/r/innodb_fts_result_cache_limit.result
new file mode 100644
index 00000000000..b4fe5154cc3
--- /dev/null
+++ b/mysql-test/suite/innodb_fts/r/innodb_fts_result_cache_limit.result
@@ -0,0 +1,31 @@
+CREATE TABLE t1 (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+a VARCHAR(200),
+b TEXT
+) ENGINE= InnoDB;
+CREATE FULLTEXT INDEX idx on t1 (a,b);
+Warnings:
+Warning 124 InnoDB rebuilding table to add column FTS_DOC_ID
+INSERT INTO t1 (a,b) VALUES
+('MySQL from Tutorial','DBMS stands for DataBase ...') ,
+('when To Use MySQL Well','After that you went through a ...'),
+('where will Optimizing MySQL','what In this tutorial we will show ...'),
+('MySQL from Tutorial','DBMS stands for DataBase ...') ,
+('when To Use MySQL Well','After that you went through a ...'),
+('where will Optimizing MySQL','what In this tutorial we will show ...'),
+('MySQL from Tutorial','DBMS stands for DataBase ...') ,
+('when To Use MySQL Well','After that you went through a ...'),
+('where will Optimizing MySQL','what In this tutorial we will show ...');
+SET SESSION debug_dbug="+d,fts_instrument_result_cache_limit";
+SELECT COUNT(*) FROM t1 WHERE MATCH (a,b) AGAINST ('mysql' IN BOOLEAN MODE);
+COUNT(*)
+9
+SELECT COUNT(*) FROM t1 WHERE MATCH (a,b) AGAINST ('mysql' WITH QUERY EXPANSION);
+ERROR HY000: Table handler out of memory
+SELECT COUNT(*) FROM t1 WHERE MATCH (a,b) AGAINST ('"mysql database"' IN BOOLEAN MODE);
+ERROR HY000: Table handler out of memory
+SELECT COUNT(*) FROM t1 WHERE MATCH (a,b) AGAINST ('"mysql database" @ 5' IN BOOLEAN MODE);
+ERROR HY000: Table handler out of memory
+SET SESSION debug_dbug="-d,fts_instrument_result_cache_limit";
+DROP TABLE t1;
+SET GLOBAL innodb_ft_result_cache_limit=default;
diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_stopword_charset.result b/mysql-test/suite/innodb_fts/r/innodb_fts_stopword_charset.result
new file mode 100644
index 00000000000..5f8d5e37680
--- /dev/null
+++ b/mysql-test/suite/innodb_fts/r/innodb_fts_stopword_charset.result
@@ -0,0 +1,321 @@
+SELECT @@innodb_ft_server_stopword_table;
+@@innodb_ft_server_stopword_table
+NULL
+SELECT @@innodb_ft_enable_stopword;
+@@innodb_ft_enable_stopword
+1
+SELECT @@innodb_ft_user_stopword_table;
+@@innodb_ft_user_stopword_table
+NULL
+SET NAMES utf8;
+# Test 1 : default latin1_swedish_ci
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+1 love
+2 LOVE
+3 lòve
+4 LÃ’VE
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+DROP TABLE articles;
+DROP TABLE user_stopword;
+# Test 2 : latin1_general_ci
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB DEFAULT CHARACTER SET latin1 COLLATE latin1_general_ci;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+3 lòve
+4 LÃ’VE
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+DEFAULT CHARACTER SET latin1 COLLATE latin1_general_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+1 love
+2 LOVE
+DROP TABLE articles;
+DROP TABLE user_stopword;
+# Test 3 : latin1_spanish_ci
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB DEFAULT CHARACTER SET latin1 COLLATE latin1_spanish_ci;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+1 love
+2 LOVE
+3 lòve
+4 LÃ’VE
+5 löve
+6 LÖVE
+7 løve
+8 LØVE
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+DEFAULT CHARACTER SET latin1 COLLATE latin1_spanish_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+DROP TABLE articles;
+DROP TABLE user_stopword;
+# Test 4 : utf8_general_ci
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+1 love
+2 LOVE
+3 lòve
+4 LÃ’VE
+5 löve
+6 LÖVE
+9 lṓve
+10 Lá¹’VE
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+DROP TABLE articles;
+DROP TABLE user_stopword;
+# Test 5 : utf8_unicode_ci
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_swedish_ci;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+1 love
+2 LOVE
+3 lòve
+4 LÃ’VE
+9 lṓve
+10 Lá¹’VE
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+DEFAULT CHARACTER SET utf8 COLLATE utf8_swedish_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+DROP TABLE articles;
+DROP TABLE user_stopword;
+# Test 6 : utf8_unicode_ci
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+1 love
+2 LOVE
+3 lòve
+4 LÃ’VE
+5 löve
+6 LÖVE
+9 lṓve
+10 Lá¹’VE
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+DROP TABLE articles;
+DROP TABLE user_stopword;
+# Test 7 : gb2312_chinese_ci
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB DEFAULT CHARACTER SET gb2312 COLLATE gb2312_chinese_ci;
+INSERT INTO articles (title) VALUES
+('相亲相爱'),('怜香惜爱'),('充满å¯çˆ±'),('爱æ¨äº¤ç»‡');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('相亲相爱' IN NATURAL LANGUAGE MODE);
+id title
+1 相亲相爱
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+DEFAULT CHARACTER SET gb2312 COLLATE gb2312_chinese_ci;
+INSERT INTO user_stopword VALUES('相亲相爱');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('相亲相爱' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('怜香惜爱' IN NATURAL LANGUAGE MODE);
+id title
+2 怜香惜爱
+DROP TABLE articles;
+DROP TABLE user_stopword;
+# Test 8 : test shutdown to check if stopword still works
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+1 love
+2 LOVE
+3 lòve
+4 LÃ’VE
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+# Shutdown and restart mysqld
+SET NAMES utf8;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+DROP TABLE articles;
+DROP TABLE user_stopword;
+# Test 9 : drop user stopwrod table,test shutdown to check if it works
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200)
+) ENGINE=InnoDB;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+1 love
+2 LOVE
+3 lòve
+4 LÃ’VE
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+DROP TABLE user_stopword;
+# Shutdown and restart mysqld
+SET NAMES utf8;
+INSERT INTO articles (title) VALUES
+('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+('lṓve'),('LṒVE');
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+id title
+11 love
+12 LOVE
+13 lòve
+14 LÃ’VE
+SELECT * FROM articles WHERE MATCH (title)
+AGAINST ('love' IN NATURAL LANGUAGE MODE);
+id title
+11 love
+12 LOVE
+13 lòve
+14 LÃ’VE
+DROP TABLE articles;
+SET SESSION innodb_ft_enable_stopword=1;
+SET GLOBAL innodb_ft_server_stopword_table=default;
+SET SESSION innodb_ft_user_stopword_table=default;
diff --git a/mysql-test/suite/innodb_fts/t/fulltext.test b/mysql-test/suite/innodb_fts/t/fulltext.test
index d75a650ca4d..90d5d5c71e0 100644
--- a/mysql-test/suite/innodb_fts/t/fulltext.test
+++ b/mysql-test/suite/innodb_fts/t/fulltext.test
@@ -4,11 +4,6 @@
--source include/have_innodb.inc
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
- --skip Not fixed in InnoDB 5.6.10 or earlier
-}
-
--disable_warnings
drop table if exists t1,t2,t3;
--enable_warnings
@@ -281,17 +276,17 @@ create table t2 (t2_id int(11) primary key, t1_id int(11), name varchar(32)) ENG
insert into t2 values (1, 1, 'xxfoo');
insert into t2 values (2, 1, 'xxbar');
insert into t2 values (3, 1, 'xxbuz');
-# INNODB_FTS: Note there is no fulltext index on table. InnoDB do not support
-# Fulltext search in such case, will return 1739
---error ER_TABLE_HAS_NO_FT
+# INNODB_FTS: InnoDB do not support MATCH expressions with arguments from
+# different tables
+--error ER_WRONG_ARGUMENTS
select * from t1 join t2 using(`t1_id`) where match (t1.name, t2.name) against('xxfoo' in boolean mode);
#
# Bug #7858: bug with many short (< ft_min_word_len) words in boolean search
#
# INNODB_FTS: Note there is no fulltext index on table. InnoDB do not support
-# Fulltext search in such case, will return 1739
---error ER_TABLE_HAS_NO_FT
+# Fulltext search in such case
+--error ER_FT_MATCHING_KEY_NOT_FOUND
select * from t2 where match name against ('*a*b*c*d*e*f*' in boolean mode);
drop table t1,t2;
@@ -490,12 +485,15 @@ WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
EXPLAIN SELECT * FROM t1 FORCE INDEX(a)
WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
+--error ER_FT_MATCHING_KEY_NOT_FOUND
EXPLAIN SELECT * FROM t1 IGNORE INDEX(a)
WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
+--error ER_FT_MATCHING_KEY_NOT_FOUND
EXPLAIN SELECT * FROM t1 USE INDEX(b)
WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
+--error ER_FT_MATCHING_KEY_NOT_FOUND
EXPLAIN SELECT * FROM t1 FORCE INDEX(b)
WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
@@ -592,7 +590,7 @@ SELECT count(*) FROM t1 WHERE
WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
);
---echo # should return 0
+--error ER_FT_MATCHING_KEY_NOT_FOUND
SELECT count(*) FROM t1 WHERE
not exists(
SELECT 1 FROM t2 IGNORE INDEX (b2), t3
diff --git a/mysql-test/suite/innodb_fts/t/fulltext_left_join.test b/mysql-test/suite/innodb_fts/t/fulltext_left_join.test
index 54cec263bfa..23bbd5ddc10 100644
--- a/mysql-test/suite/innodb_fts/t/fulltext_left_join.test
+++ b/mysql-test/suite/innodb_fts/t/fulltext_left_join.test
@@ -77,6 +77,7 @@ insert into t2 values (1, 'bword'), (3, 'aword'), (5, '');
-- disable_result_log
ANALYZE TABLE t2;
-- enable_result_log
+--error ER_WRONG_ARGUMENTS
select * from t1 left join t2 on m_id = id where match(d, e, f) against ('+aword +bword' in boolean mode);
drop table t1,t2;
@@ -107,6 +108,7 @@ ANALYZE TABLE t1;
ANALYZE TABLE t2;
-- enable_result_log
+--error ER_WRONG_ARGUMENTS
SELECT t1.*, MATCH(t1.name) AGAINST('string') AS relevance
FROM t1 LEFT JOIN t2 ON t1.link = t2.id
WHERE MATCH(t1.name, t2.name) AGAINST('string' IN BOOLEAN MODE);
diff --git a/mysql-test/suite/innodb_fts/t/fulltext_misc.test b/mysql-test/suite/innodb_fts/t/fulltext_misc.test
index 18fbd7112fd..424cfda2f95 100644
--- a/mysql-test/suite/innodb_fts/t/fulltext_misc.test
+++ b/mysql-test/suite/innodb_fts/t/fulltext_misc.test
@@ -139,7 +139,7 @@ CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci) ENGINE = InnoDB;
INSERT INTO t1 VALUES('abcd');
# INNODB_FTS: Please Note this table do not have FTS. InnoDB return 1214 error
---error ER_TABLE_HAS_NO_FT
+--error ER_FT_MATCHING_KEY_NOT_FOUND
SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_fts/t/fulltext_order_by.test b/mysql-test/suite/innodb_fts/t/fulltext_order_by.test
index 5ea5c89a49c..b8f64a2fe78 100644
--- a/mysql-test/suite/innodb_fts/t/fulltext_order_by.test
+++ b/mysql-test/suite/innodb_fts/t/fulltext_order_by.test
@@ -152,7 +152,7 @@ order by
(select b.id, b.betreff from t3 b)
order by match(betreff) against ('+abc' in boolean mode) desc;
---error 1191
+--error ER_FT_MATCHING_KEY_NOT_FOUND
(select b.id, b.betreff from t3 b) union
(select b.id, b.betreff from t3 b)
order by match(betreff) against ('+abc') desc;
diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_misc.test b/mysql-test/suite/innodb_fts/t/innodb_fts_misc.test
index 934d52f764f..68ca8974512 100644
--- a/mysql-test/suite/innodb_fts/t/innodb_fts_misc.test
+++ b/mysql-test/suite/innodb_fts/t/innodb_fts_misc.test
@@ -9,11 +9,6 @@ let collation=UTF8_UNICODE_CI;
drop table if exists t1;
--enable_warnings
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
- --skip Not fixed in InnoDB 5.6.10 or earlier
-}
-
# Create FTS table
CREATE TABLE t1 (
id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
@@ -643,6 +638,7 @@ CREATE TABLE t1 ( id INT , char_column VARCHAR(60) CHARACTER SET UTF32, char_col
INSERT INTO t1 (char_column) VALUES ('abcde'),('fghij'),('klmno'),('qrstu');
UPDATE t1 SET char_column2 = char_column;
CREATE FULLTEXT INDEX i ON t1 (char_column2);
+--error ER_FT_MATCHING_KEY_NOT_FOUND
SELECT * FROM t1 WHERE MATCH(char_column) AGAINST ('abc*' IN BOOLEAN MODE);
DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_plugin.test b/mysql-test/suite/innodb_fts/t/innodb_fts_plugin.test
new file mode 100644
index 00000000000..e800faed0f5
--- /dev/null
+++ b/mysql-test/suite/innodb_fts/t/innodb_fts_plugin.test
@@ -0,0 +1,45 @@
+--source include/have_simple_parser.inc
+--source include/have_innodb.inc
+
+# Install fts parser plugin
+INSTALL PLUGIN simple_parser SONAME 'mypluglib';
+
+# Create a myisam table and alter it to innodb table
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200),
+ body TEXT,
+ FULLTEXT (title) WITH PARSER simple_parser
+ ) ENGINE=MyISAM;
+--error ER_INNODB_NO_FT_USES_PARSER
+ALTER TABLE articles ENGINE=InnoDB;
+
+DROP TABLE articles;
+
+# Create a table having a full text index with parser
+--error ER_INNODB_NO_FT_USES_PARSER
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200),
+ body TEXT,
+ FULLTEXT (title) WITH PARSER simple_parser
+ ) ENGINE=InnoDB;
+
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200),
+ body TEXT,
+ FULLTEXT (title)
+ ) ENGINE=InnoDB;
+
+# Alter table to add a full text index with parser
+--error ER_INNODB_NO_FT_USES_PARSER
+ALTER TABLE articles ADD FULLTEXT INDEX (body) WITH PARSER simple_parser;
+
+# Create a full text index with parser
+--error ER_INNODB_NO_FT_USES_PARSER
+CREATE FULLTEXT INDEX ft_index ON articles(body) WITH PARSER simple_parser;
+
+DROP TABLE articles;
+# Uninstall plugin
+UNINSTALL PLUGIN simple_parser;
diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_result_cache_limit.test b/mysql-test/suite/innodb_fts/t/innodb_fts_result_cache_limit.test
new file mode 100644
index 00000000000..dc55712b47c
--- /dev/null
+++ b/mysql-test/suite/innodb_fts/t/innodb_fts_result_cache_limit.test
@@ -0,0 +1,51 @@
+# This is a basic test for innodb fts result cache limit.
+
+-- source include/have_innodb.inc
+
+# Must have debug code to use SET SESSION debug
+--source include/have_debug.inc
+
+# Create FTS table
+CREATE TABLE t1 (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ a VARCHAR(200),
+ b TEXT
+ ) ENGINE= InnoDB;
+
+# Create the FTS index again
+CREATE FULLTEXT INDEX idx on t1 (a,b);
+
+# Insert rows
+INSERT INTO t1 (a,b) VALUES
+ ('MySQL from Tutorial','DBMS stands for DataBase ...') ,
+ ('when To Use MySQL Well','After that you went through a ...'),
+ ('where will Optimizing MySQL','what In this tutorial we will show ...'),
+ ('MySQL from Tutorial','DBMS stands for DataBase ...') ,
+ ('when To Use MySQL Well','After that you went through a ...'),
+ ('where will Optimizing MySQL','what In this tutorial we will show ...'),
+ ('MySQL from Tutorial','DBMS stands for DataBase ...') ,
+ ('when To Use MySQL Well','After that you went through a ...'),
+ ('where will Optimizing MySQL','what In this tutorial we will show ...');
+
+SET SESSION debug_dbug="+d,fts_instrument_result_cache_limit";
+
+# Simple term search
+SELECT COUNT(*) FROM t1 WHERE MATCH (a,b) AGAINST ('mysql' IN BOOLEAN MODE);
+
+# Query expansion
+--error 128
+SELECT COUNT(*) FROM t1 WHERE MATCH (a,b) AGAINST ('mysql' WITH QUERY EXPANSION);
+
+# Simple phrase search
+--error 128
+SELECT COUNT(*) FROM t1 WHERE MATCH (a,b) AGAINST ('"mysql database"' IN BOOLEAN MODE);
+
+# Simple proximity search
+--error 128
+SELECT COUNT(*) FROM t1 WHERE MATCH (a,b) AGAINST ('"mysql database" @ 5' IN BOOLEAN MODE);
+
+SET SESSION debug_dbug="-d,fts_instrument_result_cache_limit";
+
+DROP TABLE t1;
+
+SET GLOBAL innodb_ft_result_cache_limit=default;
diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_stopword_charset.test b/mysql-test/suite/innodb_fts/t/innodb_fts_stopword_charset.test
new file mode 100644
index 00000000000..cb49ca0e39f
--- /dev/null
+++ b/mysql-test/suite/innodb_fts/t/innodb_fts_stopword_charset.test
@@ -0,0 +1,421 @@
+# This is the basic function tests for innodb FTS stopword charset
+
+-- source include/have_innodb.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+let $innodb_ft_server_stopword_table_orig=`SELECT @@innodb_ft_server_stopword_table`;
+let $innodb_ft_enable_stopword_orig=`SELECT @@innodb_ft_enable_stopword`;
+let $innodb_ft_user_stopword_table_orig=`SELECT @@innodb_ft_user_stopword_table`;
+
+SELECT @@innodb_ft_server_stopword_table;
+SELECT @@innodb_ft_enable_stopword;
+SELECT @@innodb_ft_user_stopword_table;
+
+SET NAMES utf8;
+
+-- echo # Test 1 : default latin1_swedish_ci
+# Create FTS table with default charset latin1_swedish_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find 'lòve' and check result with 'love'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+DROP TABLE user_stopword;
+
+-- echo # Test 2 : latin1_general_ci
+# Create FTS table with default charset latin1_swedish_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB DEFAULT CHARACTER SET latin1 COLLATE latin1_general_ci;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+ DEFAULT CHARACTER SET latin1 COLLATE latin1_general_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+DROP TABLE user_stopword;
+
+-- echo # Test 3 : latin1_spanish_ci
+# Create FTS table with default charset latin1_swedish_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB DEFAULT CHARACTER SET latin1 COLLATE latin1_spanish_ci;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+ DEFAULT CHARACTER SET latin1 COLLATE latin1_spanish_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+DROP TABLE user_stopword;
+
+-- echo # Test 4 : utf8_general_ci
+# Create FTS table with default charset utf8_general_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+ DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+DROP TABLE user_stopword;
+
+-- echo # Test 5 : utf8_unicode_ci
+# Create FTS table with default charset utf8_swedish_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_swedish_ci;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+ DEFAULT CHARACTER SET utf8 COLLATE utf8_swedish_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+DROP TABLE user_stopword;
+
+-- echo # Test 6 : utf8_unicode_ci
+# Create FTS table with default charset utf8_unicode_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+ DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+DROP TABLE user_stopword;
+
+-- echo # Test 7 : gb2312_chinese_ci
+# Create FTS table with default charset gb2312_chinese_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB DEFAULT CHARACTER SET gb2312 COLLATE gb2312_chinese_ci;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('相亲相爱'),('怜香惜爱'),('充满å¯çˆ±'),('爱æ¨äº¤ç»‡');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find '相亲相爱'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('相亲相爱' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB
+ DEFAULT CHARACTER SET gb2312 COLLATE gb2312_chinese_ci;
+INSERT INTO user_stopword VALUES('相亲相爱');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find '相亲相爱'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('相亲相爱' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('怜香惜爱' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+DROP TABLE user_stopword;
+
+-- echo # Test 8 : test shutdown to check if stopword still works
+# Create FTS table with default charset latin1_swedish_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find 'lòve' and check result with 'love'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+--echo # Shutdown and restart mysqld
+--source include/restart_mysqld.inc
+
+SET NAMES utf8;
+
+# Insert rows to check if it uses user stopword
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+--enable_warnings
+
+# Now we will not find 'lòve' and check result with 'love'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+DROP TABLE user_stopword;
+
+-- echo # Test 9 : drop user stopwrod table,test shutdown to check if it works
+# Create FTS table with default charset latin1_swedish_ci
+CREATE TABLE articles (
+ id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+ title VARCHAR(200)
+ ) ENGINE=InnoDB;
+
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+
+# Build full text index with default stopword
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+--enable_warnings
+
+# We can find 'lòve'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+# Define a user stopword table and set to it
+CREATE TABLE user_stopword(value varchar(30)) ENGINE = InnoDB;
+INSERT INTO user_stopword VALUES('lòve');
+SET GLOBAL innodb_ft_server_stopword_table = 'test/user_stopword';
+
+# Rebuild the full text index with user stopword
+DROP INDEX ft_idx ON articles;
+CREATE FULLTEXT INDEX ft_idx ON articles(title);
+
+# Now we will not find 'lòve' and check result with 'love'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+# Drop user stopword table
+DROP TABLE user_stopword;
+
+--echo # Shutdown and restart mysqld
+--source include/restart_mysqld.inc
+
+SET NAMES utf8;
+
+# Insert rows to check if it uses user stopword
+--disable_warnings
+INSERT INTO articles (title) VALUES
+ ('love'),('LOVE'),('lòve'),('LÒVE'),('löve'),('LÖVE'),('løve'),('LØVE'),
+ ('lṓve'),('LṒVE');
+--enable_warnings
+
+# Now we will not find 'lòve' and check result with 'love'
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('lòve' IN NATURAL LANGUAGE MODE);
+
+SELECT * FROM articles WHERE MATCH (title)
+ AGAINST ('love' IN NATURAL LANGUAGE MODE);
+
+DROP TABLE articles;
+
+# Restore Values
+eval SET SESSION innodb_ft_enable_stopword=$innodb_ft_enable_stopword_orig;
+eval SET GLOBAL innodb_ft_server_stopword_table=default;
+eval SET SESSION innodb_ft_user_stopword_table=default;
diff --git a/mysql-test/t/alter_table_autoinc-5574.test b/mysql-test/t/alter_table_autoinc-5574.test
new file mode 100644
index 00000000000..95c2b8d81bb
--- /dev/null
+++ b/mysql-test/t/alter_table_autoinc-5574.test
@@ -0,0 +1,12 @@
+#
+# MDEV-5574 Set AUTO_INCREMENT below max value of column
+#
+--source include/have_innodb.inc
+create table t1(a int(10)unsigned not null auto_increment primary key,
+b varchar(255) not null) engine=innodb default charset=utf8;
+insert into t1 values(1,'aaa'),(2,'bbb');
+alter table t1 auto_increment=1;
+insert into t1 values(NULL, 'ccc');
+select * from t1;
+drop table t1;
+
diff --git a/sql/item_func.cc b/sql/item_func.cc
index f9f467f44f3..8858837188d 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -6214,6 +6214,7 @@ bool Item_func_match::fix_fields(THD *thd, Item **ref)
return TRUE;
}
+ bool allows_multi_table_search= true;
const_item_cache=0;
for (uint i=1 ; i < arg_count ; i++)
{
@@ -6225,7 +6226,10 @@ bool Item_func_match::fix_fields(THD *thd, Item **ref)
my_error(ER_WRONG_ARGUMENTS, MYF(0), "AGAINST");
return TRUE;
}
+ allows_multi_table_search &=
+ allows_search_on_non_indexed_columns(((Item_field *)item)->field->table);
}
+
/*
Check that all columns come from the same table.
We've already checked that columns in MATCH are fields so
@@ -6234,7 +6238,7 @@ bool Item_func_match::fix_fields(THD *thd, Item **ref)
if ((used_tables_cache & ~PARAM_TABLE_BIT) != item->used_tables())
key=NO_SUCH_KEY;
- if (key == NO_SUCH_KEY && !(flags & FT_BOOL))
+ if (key == NO_SUCH_KEY && !allows_multi_table_search)
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
return TRUE;
@@ -6332,7 +6336,7 @@ bool Item_func_match::fix_index()
}
err:
- if (flags & FT_BOOL)
+ if (allows_search_on_non_indexed_columns(table))
{
key=NO_SUCH_KEY;
return 0;
diff --git a/sql/item_func.h b/sql/item_func.h
index 4b1516dcc4d..270c031955a 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -1909,6 +1909,41 @@ public:
/* TODO: consider adding in support for the MATCH-based virtual columns */
return trace_unsupported_by_check_vcol_func_processor(func_name());
}
+private:
+ /**
+ Check whether storage engine for given table,
+ allows FTS Boolean search on non-indexed columns.
+
+ @todo A flag should be added to the extended fulltext API so that
+ it may be checked whether search on non-indexed columns are
+ supported. Currently, it is not possible to check for such a
+ flag since @c this->ft_handler is not yet set when this function is
+ called. The current hack is to assume that search on non-indexed
+ columns are supported for engines that does not support the extended
+ fulltext API (e.g., MyISAM), while it is not supported for other
+ engines (e.g., InnoDB)
+
+ @param table_arg Table for which storage engine to check
+
+ @retval true if BOOLEAN search on non-indexed columns is supported
+ @retval false otherwise
+ */
+ bool allows_search_on_non_indexed_columns(TABLE* table_arg)
+ {
+ // Only Boolean search may support non_indexed columns
+ if (!(flags & FT_BOOL))
+ return false;
+
+ DBUG_ASSERT(table_arg && table_arg->file);
+
+ // Assume that if extended fulltext API is not supported,
+ // non-indexed columns are allowed. This will be true for MyISAM.
+ if ((table_arg->file->ha_table_flags() & HA_CAN_FULLTEXT_EXT) == 0)
+ return true;
+
+ return false;
+ }
+
};
diff --git a/storage/innobase/api/api0api.cc b/storage/innobase/api/api0api.cc
index 5f9762a1846..647ebcde6f0 100644
--- a/storage/innobase/api/api0api.cc
+++ b/storage/innobase/api/api0api.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -245,7 +245,7 @@ ib_open_table_by_id(
dict_mutex_enter_for_mysql();
}
- table = dict_table_open_on_id(table_id, FALSE, FALSE);
+ table = dict_table_open_on_id(table_id, FALSE, DICT_TABLE_OP_NORMAL);
if (table != NULL && table->ibd_file_missing) {
table = NULL;
@@ -1183,7 +1183,7 @@ ib_cursor_open_index_using_name(
/* We want to increment the ref count, so we do a redundant search. */
table = dict_table_open_on_id(cursor->prebuilt->table->id,
- FALSE, FALSE);
+ FALSE, DICT_TABLE_OP_NORMAL);
ut_a(table != NULL);
/* The first index is always the cluster index. */
@@ -1630,6 +1630,8 @@ ib_cursor_insert_row(
src_tuple->index->table, q_proc->grph.ins, node->ins);
}
+ srv_active_wake_master_thread();
+
return(err);
}
@@ -1914,6 +1916,8 @@ ib_cursor_update_row(
err = ib_execute_update_query_graph(cursor, pcur);
}
+ srv_active_wake_master_thread();
+
return(err);
}
@@ -2039,6 +2043,8 @@ ib_cursor_delete_row(
err = DB_RECORD_NOT_FOUND;
}
+ srv_active_wake_master_thread();
+
return(err);
}
@@ -2296,12 +2302,14 @@ ib_col_set_value(
ib_tpl_t ib_tpl, /*!< in: tuple instance */
ib_ulint_t col_no, /*!< in: column index in tuple */
const void* src, /*!< in: data value */
- ib_ulint_t len) /*!< in: data value len */
+ ib_ulint_t len, /*!< in: data value len */
+ ib_bool_t need_cpy) /*!< in: if need memcpy */
{
const dtype_t* dtype;
dfield_t* dfield;
void* dst = NULL;
ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+ ulint col_len;
dfield = ib_col_get_dfield(tuple, col_no);
@@ -2312,6 +2320,7 @@ ib_col_set_value(
}
dtype = dfield_get_type(dfield);
+ col_len = dtype_get_len(dtype);
/* Not allowed to update system columns. */
if (dtype_get_mtype(dtype) == DATA_SYS) {
@@ -2325,10 +2334,10 @@ ib_col_set_value(
for that. */
if (ib_col_is_capped(dtype)) {
- len = ut_min(len, dtype_get_len(dtype));
+ len = ut_min(len, col_len);
if (dst == NULL || len > dfield_get_len(dfield)) {
- dst = mem_heap_alloc(tuple->heap, dtype_get_len(dtype));
+ dst = mem_heap_alloc(tuple->heap, col_len);
ut_a(dst != NULL);
}
} else if (dst == NULL || len > dfield_get_len(dfield)) {
@@ -2342,7 +2351,7 @@ ib_col_set_value(
switch (dtype_get_mtype(dtype)) {
case DATA_INT: {
- if (dtype_get_len(dtype) == len) {
+ if (col_len == len) {
ibool usign;
usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
@@ -2387,22 +2396,96 @@ ib_col_set_value(
memset((byte*) dst + len,
pad_char,
- dtype_get_len(dtype) - len);
+ col_len - len);
memcpy(dst, src, len);
- len = dtype_get_len(dtype);
+ len = col_len;
break;
}
case DATA_BLOB:
case DATA_BINARY:
- case DATA_MYSQL:
case DATA_DECIMAL:
case DATA_VARCHAR:
- case DATA_VARMYSQL:
case DATA_FIXBINARY:
+ if (need_cpy) {
+ memcpy(dst, src, len);
+ } else {
+ dfield_set_data(dfield, src, len);
+ dst = dfield_get_data(dfield);
+ }
+ break;
+
+ case DATA_MYSQL:
+ case DATA_VARMYSQL: {
+ ulint cset;
+ CHARSET_INFO* cs;
+ int error = 0;
+ ulint true_len = len;
+
+ /* For multi byte character sets we need to
+ calculate the true length of the data. */
+ cset = dtype_get_charset_coll(
+ dtype_get_prtype(dtype));
+ cs = all_charsets[cset];
+ if (cs) {
+ uint pos = (uint)(col_len / cs->mbmaxlen);
+
+ if (len > 0 && cs->mbmaxlen > 1) {
+ true_len = (ulint)
+ cs->cset->well_formed_len(
+ cs,
+ (const char*)src,
+ (const char*)src + len,
+ pos,
+ &error);
+
+ if (true_len < len) {
+ len = true_len;
+ }
+ }
+ }
+
+ /* All invalid bytes in data need be truncated.
+ If len == 0, means all bytes of the data is invalid.
+ In this case, the data will be truncated to empty.*/
memcpy(dst, src, len);
+
+ /* For DATA_MYSQL, need to pad the unused
+ space with spaces. */
+ if (dtype_get_mtype(dtype) == DATA_MYSQL) {
+ ulint n_chars;
+
+ if (len < col_len) {
+ ulint pad_len = col_len - len;
+
+ ut_a(cs != NULL);
+ ut_a(!(pad_len % cs->mbminlen));
+
+ cs->cset->fill(cs, (char*)dst + len,
+ pad_len,
+ 0x20 /* space */);
+ }
+
+ /* Why we should do below? See function
+ row_mysql_store_col_in_innobase_format */
+
+ ut_a(!(dtype_get_len(dtype)
+ % dtype_get_mbmaxlen(dtype)));
+
+ n_chars = dtype_get_len(dtype)
+ / dtype_get_mbmaxlen(dtype);
+
+ /* Strip space padding. */
+ while (col_len > n_chars
+ && ((char*)dst)[col_len - 1] == 0x20) {
+ col_len--;
+ }
+
+ len = col_len;
+ }
break;
+ }
default:
ut_error;
@@ -2476,7 +2559,9 @@ ib_col_copy_value_low(
data_len, usign);
if (usign) {
- if (len == 2) {
+ if (len == 1) {
+ *(ib_i8_t*)dst = (ib_i8_t)ret;
+ } else if (len == 2) {
*(ib_i16_t*)dst = (ib_i16_t)ret;
} else if (len == 4) {
*(ib_i32_t*)dst = (ib_i32_t)ret;
@@ -2484,7 +2569,9 @@ ib_col_copy_value_low(
*(ib_i64_t*)dst = (ib_i64_t)ret;
}
} else {
- if (len == 2) {
+ if (len == 1) {
+ *(ib_u8_t*)dst = (ib_i8_t)ret;
+ } else if (len == 2) {
*(ib_u16_t*)dst = (ib_i16_t)ret;
} else if (len == 4) {
*(ib_u32_t*)dst = (ib_i32_t)ret;
@@ -3450,7 +3537,7 @@ ib_tuple_write_int(
return(DB_DATA_MISMATCH);
}
- return(ib_col_set_value(ib_tpl, col_no, value, type_len));
+ return(ib_col_set_value(ib_tpl, col_no, value, type_len, true));
}
/*****************************************************************//**
@@ -3465,7 +3552,7 @@ ib_tuple_write_i8(
int col_no, /*!< in: column number */
ib_i8_t val) /*!< in: value to write */
{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
}
/*****************************************************************//**
@@ -3480,7 +3567,7 @@ ib_tuple_write_i16(
int col_no, /*!< in: column number */
ib_i16_t val) /*!< in: value to write */
{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
}
/*****************************************************************//**
@@ -3495,7 +3582,7 @@ ib_tuple_write_i32(
int col_no, /*!< in: column number */
ib_i32_t val) /*!< in: value to write */
{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
}
/*****************************************************************//**
@@ -3510,7 +3597,7 @@ ib_tuple_write_i64(
int col_no, /*!< in: column number */
ib_i64_t val) /*!< in: value to write */
{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
}
/*****************************************************************//**
@@ -3525,7 +3612,7 @@ ib_tuple_write_u8(
int col_no, /*!< in: column number */
ib_u8_t val) /*!< in: value to write */
{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
}
/*****************************************************************//**
@@ -3540,7 +3627,7 @@ ib_tuple_write_u16(
int col_no, /*!< in: column number */
ib_u16_t val) /*!< in: value to write */
{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
}
/*****************************************************************//**
@@ -3555,7 +3642,7 @@ ib_tuple_write_u32(
int col_no, /*!< in: column number */
ib_u32_t val) /*!< in: value to write */
{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
}
/*****************************************************************//**
@@ -3570,7 +3657,7 @@ ib_tuple_write_u64(
int col_no, /*!< in: column number */
ib_u64_t val) /*!< in: value to write */
{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
}
/*****************************************************************//**
@@ -3603,7 +3690,8 @@ ib_tuple_write_double(
dfield = ib_col_get_dfield(tuple, col_no);
if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no,
+ &val, sizeof(val), true));
} else {
return(DB_DATA_MISMATCH);
}
@@ -3653,7 +3741,8 @@ ib_tuple_write_float(
dfield = ib_col_get_dfield(tuple, col_no);
if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ return(ib_col_set_value(ib_tpl, col_no,
+ &val, sizeof(val), true));
} else {
return(DB_DATA_MISMATCH);
}
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index e3e127c3ace..63b01f65f4b 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -44,7 +44,21 @@ Created 6/2/1994 Heikki Tuuri
#include "trx0trx.h"
#include "srv0mon.h"
+/**************************************************************//**
+Checks if the page in the cursor can be merged with given page.
+If necessary, re-organize the merge_page.
+@return TRUE if possible to merge. */
+UNIV_INTERN
+ibool
+btr_can_merge_with_page(
+/*====================*/
+ btr_cur_t* cursor, /*!< in: cursor on the page to merge */
+ ulint page_no, /*!< in: a sibling page */
+ buf_block_t** merge_block, /*!< out: the merge block */
+ mtr_t* mtr); /*!< in: mini-transaction */
+
#endif /* UNIV_HOTBACKUP */
+
/**************************************************************//**
Report that an index page is corrupted. */
UNIV_INTERN
@@ -1032,7 +1046,7 @@ btr_page_create(
btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
if (page_zip) {
- page_create_zip(block, index, level, mtr);
+ page_create_zip(block, index, level, 0, mtr);
} else {
page_create(block, mtr, dict_table_is_comp(index->table));
/* Set the level of the new index page */
@@ -1602,7 +1616,7 @@ btr_create(
page_zip = buf_block_get_page_zip(block);
if (page_zip) {
- page = page_create_zip(block, index, 0, mtr);
+ page = page_create_zip(block, index, 0, 0, mtr);
} else {
page = page_create(block, mtr,
dict_table_is_comp(index->table));
@@ -1727,22 +1741,32 @@ btr_free_root(
#endif /* !UNIV_HOTBACKUP */
/*************************************************************//**
-Reorganizes an index page. */
-static
-ibool
+Reorganizes an index page.
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
+UNIV_INTERN
+bool
btr_page_reorganize_low(
/*====================*/
- ibool recovery,/*!< in: TRUE if called in recovery:
+ bool recovery,/*!< in: true if called in recovery:
locks should not be updated, i.e.,
there cannot exist locks on the
page, and a hash index should not be
dropped: it cannot exist */
- ulint compression_level,/*!< in: compression level to be used
+ ulint z_level,/*!< in: compression level to be used
if dealing with compressed page */
- buf_block_t* block, /*!< in: page to be reorganized */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ dict_index_t* index, /*!< in: the index tree of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
+ buf_block_t* block = page_cur_get_block(cursor);
#ifndef UNIV_HOTBACKUP
buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page);
#endif /* !UNIV_HOTBACKUP */
@@ -1755,9 +1779,9 @@ btr_page_reorganize_low(
ulint data_size2;
ulint max_ins_size1;
ulint max_ins_size2;
- ibool success = FALSE;
- byte type;
- byte* log_ptr;
+ bool success = false;
+ ulint pos;
+ bool log_compressed;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
btr_assert_not_corrupted(block, index);
@@ -1767,27 +1791,6 @@ btr_page_reorganize_low(
data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
-#ifndef UNIV_HOTBACKUP
- /* Write the log record */
- if (page_zip) {
- type = MLOG_ZIP_PAGE_REORGANIZE;
- } else if (page_is_comp(page)) {
- type = MLOG_COMP_PAGE_REORGANIZE;
- } else {
- type = MLOG_PAGE_REORGANIZE;
- }
-
- log_ptr = mlog_open_and_write_index(
- mtr, page, index, type, page_zip ? 1 : 0);
-
- /* For compressed pages write the compression level. */
- if (log_ptr && page_zip) {
- mach_write_to_1(log_ptr, compression_level);
- mlog_close(mtr, log_ptr + 1);
- }
-
-#endif /* !UNIV_HOTBACKUP */
-
/* Turn logging off */
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
@@ -1811,6 +1814,9 @@ btr_page_reorganize_low(
#endif /* !UNIV_HOTBACKUP */
btr_blob_dbg_remove(page, index, "btr_page_reorganize");
+ /* Save the cursor position. */
+ pos = page_rec_get_n_recs_before(page_cur_get_rec(cursor));
+
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
@@ -1828,14 +1834,21 @@ btr_page_reorganize_low(
trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
page_set_max_trx_id(block, NULL, max_trx_id, mtr);
/* In crash recovery, dict_index_is_sec_or_ibuf() always
- returns TRUE, even for clustered indexes. max_trx_id is
+ holds, even for clustered indexes. max_trx_id is
unused in clustered index pages. */
ut_ad(max_trx_id != 0 || recovery);
}
+ /* If innodb_log_compressed_pages is ON, page reorganize should log the
+ compressed page image.*/
+ log_compressed = page_zip && page_zip_log_pages;
+
+ if (log_compressed) {
+ mtr_set_log_mode(mtr, log_mode);
+ }
+
if (page_zip
- && !page_zip_compress(page_zip, page, index,
- compression_level, NULL)) {
+ && !page_zip_compress(page_zip, page, index, z_level, mtr)) {
/* Restore the old page and exit. */
btr_blob_dbg_restore(page, temp_page, index,
@@ -1890,7 +1903,14 @@ btr_page_reorganize_low(
(unsigned long) max_ins_size2);
ut_ad(0);
} else {
- success = TRUE;
+ success = true;
+ }
+
+ /* Restore the cursor position. */
+ if (pos > 0) {
+ cursor->rec = page_rec_get_nth(page, pos);
+ } else {
+ ut_ad(cursor->rec == page_get_infimum_rec(page));
}
func_exit:
@@ -1904,27 +1924,92 @@ func_exit:
/* Restore logging mode */
mtr_set_log_mode(mtr, log_mode);
+#ifndef UNIV_HOTBACKUP
+ if (success) {
+ byte type;
+ byte* log_ptr;
+
+ /* Write the log record */
+ if (page_zip) {
+ ut_ad(page_is_comp(page));
+ type = MLOG_ZIP_PAGE_REORGANIZE;
+ } else if (page_is_comp(page)) {
+ type = MLOG_COMP_PAGE_REORGANIZE;
+ } else {
+ type = MLOG_PAGE_REORGANIZE;
+ }
+
+ log_ptr = log_compressed
+ ? NULL
+ : mlog_open_and_write_index(
+ mtr, page, index, type,
+ page_zip ? 1 : 0);
+
+ /* For compressed pages write the compression level. */
+ if (log_ptr && page_zip) {
+ mach_write_to_1(log_ptr, z_level);
+ mlog_close(mtr, log_ptr + 1);
+ }
+ }
+#endif /* !UNIV_HOTBACKUP */
+
return(success);
}
+/*************************************************************//**
+Reorganizes an index page.
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
+static __attribute__((nonnull))
+bool
+btr_page_reorganize_block(
+/*======================*/
+ bool recovery,/*!< in: true if called in recovery:
+ locks should not be updated, i.e.,
+ there cannot exist locks on the
+ page, and a hash index should not be
+ dropped: it cannot exist */
+ ulint z_level,/*!< in: compression level to be used
+ if dealing with compressed page */
+ buf_block_t* block, /*!< in/out: B-tree page */
+ dict_index_t* index, /*!< in: the index tree of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ page_cur_t cur;
+ page_cur_set_before_first(block, &cur);
+
+ return(btr_page_reorganize_low(recovery, z_level, &cur, index, mtr));
+}
+
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Reorganizes an index page.
-IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
-page of a non-clustered index, the caller must update the insert
-buffer free bits in the same mini-transaction in such a way that the
-modification will be redo-logged.
-@return TRUE on success, FALSE on failure */
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
UNIV_INTERN
-ibool
+bool
btr_page_reorganize(
/*================*/
- buf_block_t* block, /*!< in: page to be reorganized */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ dict_index_t* index, /*!< in: the index tree of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- return(btr_page_reorganize_low(FALSE, page_compression_level,
- block, index, mtr));
+ return(btr_page_reorganize_low(false, page_zip_level,
+ cursor, index, mtr));
}
#endif /* !UNIV_HOTBACKUP */
@@ -1942,7 +2027,7 @@ btr_parse_page_reorganize(
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
{
- ulint level = page_compression_level;
+ ulint level;
ut_ad(ptr && end_ptr);
@@ -1954,14 +2039,16 @@ btr_parse_page_reorganize(
return(NULL);
}
- level = (ulint)mach_read_from_1(ptr);
+ level = mach_read_from_1(ptr);
ut_a(level <= 9);
++ptr;
+ } else {
+ level = page_zip_level;
}
if (block != NULL) {
- btr_page_reorganize_low(TRUE, level, block, index, mtr);
+ btr_page_reorganize_block(true, level, block, index, mtr);
}
return(ptr);
@@ -1995,7 +2082,7 @@ btr_page_empty(
segment headers, next page-field, etc.) is preserved intact */
if (page_zip) {
- page_create_zip(block, index, level, mtr);
+ page_create_zip(block, index, level, 0, mtr);
} else {
page_create(block, mtr, dict_table_is_comp(index->table));
btr_page_set_level(page, NULL, level, mtr);
@@ -2043,7 +2130,7 @@ btr_root_raise_and_insert(
root = btr_cur_get_page(cursor);
root_block = btr_cur_get_block(cursor);
root_page_zip = buf_block_get_page_zip(root_block);
- ut_ad(page_get_n_recs(root) > 0);
+ ut_ad(!page_is_empty(root));
index = btr_cur_get_index(cursor);
#ifdef UNIV_ZIP_DEBUG
ut_a(!root_page_zip || page_zip_validate(root_page_zip, root, index));
@@ -2091,8 +2178,8 @@ btr_root_raise_and_insert(
|| new_page_zip
#endif /* UNIV_ZIP_COPY */
|| !page_copy_rec_list_end(new_block, root_block,
- page_get_infimum_rec(root),
- index, mtr)) {
+ page_get_infimum_rec(root),
+ index, mtr)) {
ut_a(new_page_zip);
/* Copy the page byte for byte. */
@@ -2779,7 +2866,7 @@ func_start:
page_zip = buf_block_get_page_zip(block);
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_get_n_recs(page) >= 1);
+ ut_ad(!page_is_empty(page));
page_no = buf_block_get_page_no(block);
@@ -2909,7 +2996,7 @@ insert_empty:
|| page_zip
#endif /* UNIV_ZIP_COPY */
|| !page_move_rec_list_start(new_block, block, move_limit,
- cursor->index, mtr)) {
+ cursor->index, mtr)) {
/* For some reason, compressing new_page failed,
even though it should contain fewer records than
the original page. Copy the page byte for byte
@@ -2951,7 +3038,7 @@ insert_empty:
|| page_zip
#endif /* UNIV_ZIP_COPY */
|| !page_move_rec_list_end(new_block, block, move_limit,
- cursor->index, mtr)) {
+ cursor->index, mtr)) {
/* For some reason, compressing new_page failed,
even though it should contain fewer records than
the original page. Copy the page byte for byte
@@ -3033,15 +3120,16 @@ insert_empty:
goto func_exit;
}
- /* 8. If insert did not fit, try page reorganization */
+ /* 8. If insert did not fit, try page reorganization.
+ For compressed pages, page_cur_tuple_insert() will have
+ attempted this already. */
- if (!btr_page_reorganize(insert_block, cursor->index, mtr)) {
+ if (page_cur_get_page_zip(page_cursor)
+ || !btr_page_reorganize(page_cursor, cursor->index, mtr)) {
goto insert_failed;
}
- page_cur_search(insert_block, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
offsets, heap, n_ext, mtr);
@@ -3049,9 +3137,10 @@ insert_empty:
/* The insert did not fit on the page: loop back to the
start of the function for a new split */
insert_failed:
- /* We play safe and reset the free bits for new_page */
+ /* We play safe and reset the free bits */
if (!dict_index_is_clust(cursor->index)) {
ibuf_reset_free_bits(new_block);
+ ibuf_reset_free_bits(block);
}
/* fprintf(stderr, "Split second round %lu\n",
@@ -3461,7 +3550,7 @@ btr_compress(
ulint left_page_no;
ulint right_page_no;
buf_block_t* merge_block;
- page_t* merge_page;
+ page_t* merge_page = NULL;
page_zip_des_t* merge_page_zip;
ibool is_left;
buf_block_t* block;
@@ -3469,11 +3558,8 @@ btr_compress(
btr_cur_t father_cursor;
mem_heap_t* heap;
ulint* offsets;
- ulint data_size;
- ulint n_recs;
ulint nth_rec = 0; /* remove bogus warning */
- ulint max_ins_size;
- ulint max_ins_size_reorg;
+ DBUG_ENTER("btr_compress");
block = btr_cur_get_block(cursor);
page = btr_cur_get_page(cursor);
@@ -3490,10 +3576,13 @@ btr_compress(
left_page_no = btr_page_get_prev(page, mtr);
right_page_no = btr_page_get_next(page, mtr);
-#if 0
- fprintf(stderr, "Merge left page %lu right %lu \n",
- left_page_no, right_page_no);
-#endif
+#ifdef UNIV_DEBUG
+ if (!page_is_leaf(page) && left_page_no == FIL_NULL) {
+ ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ page_rec_get_next(page_get_infimum_rec(page)),
+ page_is_comp(page)));
+ }
+#endif /* UNIV_DEBUG */
heap = mem_heap_create(100);
offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
@@ -3504,30 +3593,7 @@ btr_compress(
ut_ad(nth_rec > 0);
}
- /* Decide the page to which we try to merge and which will inherit
- the locks */
-
- is_left = left_page_no != FIL_NULL;
-
- if (is_left) {
-
- merge_block = btr_block_get(space, zip_size, left_page_no,
- RW_X_LATCH, index, mtr);
- merge_page = buf_block_get_frame(merge_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
- } else if (right_page_no != FIL_NULL) {
-
- merge_block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, index, mtr);
- merge_page = buf_block_get_frame(merge_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
- } else {
+ if (left_page_no == FIL_NULL && right_page_no == FIL_NULL) {
/* The page is the only one on the level, lift the records
to the father */
@@ -3535,66 +3601,34 @@ btr_compress(
goto func_exit;
}
- n_recs = page_get_n_recs(page);
- data_size = page_get_data_size(page);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(merge_page) == page_is_comp(page));
-#endif /* UNIV_BTR_DEBUG */
+ /* Decide the page to which we try to merge and which will inherit
+ the locks */
- max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
- merge_page, n_recs);
- if (data_size > max_ins_size_reorg) {
+ is_left = btr_can_merge_with_page(cursor, left_page_no,
+ &merge_block, mtr);
- /* No space for merge */
-err_exit:
- /* We play it safe and reset the free bits. */
- if (zip_size
- && page_is_leaf(merge_page)
- && !dict_index_is_clust(index)) {
- ibuf_reset_free_bits(merge_block);
- }
+ DBUG_EXECUTE_IF("ib_always_merge_right", is_left = FALSE;);
- mem_heap_free(heap);
- return(FALSE);
+ if(!is_left
+ && !btr_can_merge_with_page(cursor, right_page_no, &merge_block,
+ mtr)) {
+ goto err_exit;
}
- /* If compression padding tells us that merging will result in
- too packed up page i.e.: which is likely to cause compression
- failure then don't merge the pages. */
- if (zip_size && page_is_leaf(merge_page)
- && (page_get_data_size(merge_page) + data_size
- >= dict_index_zip_pad_optimal_page_size(index))) {
+ merge_page = buf_block_get_frame(merge_block);
- goto err_exit;
+#ifdef UNIV_BTR_DEBUG
+ if (is_left) {
+ ut_a(btr_page_get_next(merge_page, mtr)
+ == buf_block_get_page_no(block));
+ } else {
+ ut_a(btr_page_get_prev(merge_page, mtr)
+ == buf_block_get_page_no(block));
}
+#endif /* UNIV_BTR_DEBUG */
ut_ad(page_validate(merge_page, index));
- max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
- if (data_size > max_ins_size) {
-
- /* We have to reorganize merge_page */
-
- if (!btr_page_reorganize(merge_block, index, mtr)) {
-
- goto err_exit;
- }
-
- max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
- ut_ad(page_validate(merge_page, index));
- ut_ad(max_ins_size == max_ins_size_reorg);
-
- if (data_size > max_ins_size) {
-
- /* Add fault tolerance, though this should
- never happen */
-
- goto err_exit;
- }
- }
-
merge_page_zip = buf_block_get_page_zip(merge_block);
#ifdef UNIV_ZIP_DEBUG
if (merge_page_zip) {
@@ -3629,11 +3663,19 @@ err_exit:
}
} else {
rec_t* orig_succ;
+ ibool compressed;
+ dberr_t err;
+ btr_cur_t cursor2;
+ /* father cursor pointing to node ptr
+ of the right sibling */
#ifdef UNIV_BTR_DEBUG
byte fil_page_prev[4];
#endif /* UNIV_BTR_DEBUG */
- if (merge_page_zip) {
+ btr_page_get_father(index, merge_block, mtr, &cursor2);
+
+ if (merge_page_zip && left_page_no == FIL_NULL) {
+
/* The function page_zip_compress(), which will be
invoked by page_copy_rec_list_end() below,
requires that FIL_PAGE_PREV be FIL_NULL.
@@ -3654,9 +3696,12 @@ err_exit:
if (!orig_succ) {
ut_a(merge_page_zip);
#ifdef UNIV_BTR_DEBUG
- /* FIL_PAGE_PREV was restored from merge_page_zip. */
- ut_a(!memcmp(fil_page_prev,
- merge_page + FIL_PAGE_PREV, 4));
+ if (left_page_no == FIL_NULL) {
+ /* FIL_PAGE_PREV was restored from
+ merge_page_zip. */
+ ut_a(!memcmp(fil_page_prev,
+ merge_page + FIL_PAGE_PREV, 4));
+ }
#endif /* UNIV_BTR_DEBUG */
goto err_exit;
}
@@ -3664,7 +3709,8 @@ err_exit:
btr_search_drop_page_hash_index(block);
#ifdef UNIV_BTR_DEBUG
- if (merge_page_zip) {
+ if (merge_page_zip && left_page_no == FIL_NULL) {
+
/* Restore FIL_PAGE_PREV in order to avoid an assertion
failure in btr_level_list_remove(), which will set
the field again to FIL_NULL. Even though this makes
@@ -3680,12 +3726,19 @@ err_exit:
/* Replace the address of the old child node (= page) with the
address of the merge page to the right */
-
btr_node_ptr_set_child_page_no(
btr_cur_get_rec(&father_cursor),
btr_cur_get_page_zip(&father_cursor),
offsets, right_page_no, mtr);
- btr_node_ptr_delete(index, merge_block, mtr);
+
+ compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor2,
+ BTR_CREATE_FLAG,
+ RB_NONE, mtr);
+ ut_a(err == DB_SUCCESS);
+
+ if (!compressed) {
+ btr_cur_compress_if_useful(&cursor2, FALSE, mtr);
+ }
lock_update_merge_right(merge_block, orig_succ, block);
}
@@ -3753,8 +3806,19 @@ func_exit:
page_rec_get_nth(merge_block->frame, nth_rec),
merge_block, cursor);
}
+ DBUG_RETURN(TRUE);
- return(TRUE);
+err_exit:
+ /* We play it safe and reset the free bits. */
+ if (zip_size
+ && merge_page
+ && page_is_leaf(merge_page)
+ && !dict_index_is_clust(index)) {
+ ibuf_reset_free_bits(merge_block);
+ }
+
+ mem_heap_free(heap);
+ DBUG_RETURN(FALSE);
}
/*************************************************************//**
@@ -3816,17 +3880,16 @@ btr_discard_only_page_on_level(
#endif /* UNIV_BTR_DEBUG */
btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
+ ut_ad(page_is_leaf(buf_block_get_frame(block)));
if (!dict_index_is_clust(index)) {
/* We play it safe and reset the free bits for the root */
ibuf_reset_free_bits(block);
- if (page_is_leaf(buf_block_get_frame(block))) {
- ut_a(max_trx_id);
- page_set_max_trx_id(block,
- buf_block_get_page_zip(block),
- max_trx_id, mtr);
- }
+ ut_a(max_trx_id);
+ page_set_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ max_trx_id, mtr);
}
}
@@ -4489,9 +4552,9 @@ loop:
right_page_no = btr_page_get_next(page, &mtr);
left_page_no = btr_page_get_prev(page, &mtr);
- ut_a(page_get_n_recs(page) > 0 || (level == 0
- && page_get_page_no(page)
- == dict_index_get_page(index)));
+ ut_a(!page_is_empty(page)
+ || (level == 0
+ && page_get_page_no(page) == dict_index_get_page(index)));
if (right_page_no != FIL_NULL) {
const rec_t* right_rec;
@@ -4799,4 +4862,97 @@ btr_validate_index(
return(ok);
}
+/**************************************************************//**
+Checks if the page in the cursor can be merged with given page.
+If necessary, re-organize the merge_page.
+@return TRUE if possible to merge. */
+UNIV_INTERN
+ibool
+btr_can_merge_with_page(
+/*====================*/
+ btr_cur_t* cursor, /*!< in: cursor on the page to merge */
+ ulint page_no, /*!< in: a sibling page */
+ buf_block_t** merge_block, /*!< out: the merge block */
+ mtr_t* mtr) /*!< in: mini-transaction */
+{
+ dict_index_t* index;
+ page_t* page;
+ ulint space;
+ ulint zip_size;
+ ulint n_recs;
+ ulint data_size;
+ ulint max_ins_size_reorg;
+ ulint max_ins_size;
+ buf_block_t* mblock;
+ page_t* mpage;
+ DBUG_ENTER("btr_can_merge_with_page");
+
+ if (page_no == FIL_NULL) {
+ goto error;
+ }
+
+ index = btr_cur_get_index(cursor);
+ page = btr_cur_get_page(cursor);
+ space = dict_index_get_space(index);
+ zip_size = dict_table_zip_size(index->table);
+
+ mblock = btr_block_get(space, zip_size, page_no, RW_X_LATCH, index,
+ mtr);
+ mpage = buf_block_get_frame(mblock);
+
+ n_recs = page_get_n_recs(page);
+ data_size = page_get_data_size(page);
+
+ max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
+ mpage, n_recs);
+
+ if (data_size > max_ins_size_reorg) {
+ goto error;
+ }
+
+ /* If compression padding tells us that merging will result in
+ too packed up page i.e.: which is likely to cause compression
+ failure then don't merge the pages. */
+ if (zip_size && page_is_leaf(mpage)
+ && (page_get_data_size(mpage) + data_size
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+
+ goto error;
+ }
+
+
+ max_ins_size = page_get_max_insert_size(mpage, n_recs);
+
+ if (data_size > max_ins_size) {
+
+ /* We have to reorganize mpage */
+
+ if (!btr_page_reorganize_block(
+ false, page_zip_level, mblock, index, mtr)) {
+
+ goto error;
+ }
+
+ max_ins_size = page_get_max_insert_size(mpage, n_recs);
+
+ ut_ad(page_validate(mpage, index));
+ ut_ad(max_ins_size == max_ins_size_reorg);
+
+ if (data_size > max_ins_size) {
+
+ /* Add fault tolerance, though this should
+ never happen */
+
+ goto error;
+ }
+ }
+
+ *merge_block = mblock;
+ DBUG_RETURN(TRUE);
+
+error:
+ *merge_block = NULL;
+ DBUG_RETURN(FALSE);
+}
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index ecc17188770..bbc189d1d39 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
@@ -1086,6 +1086,12 @@ Inserts a record if there is enough space, or if enough space can
be freed by reorganizing. Differs from btr_cur_optimistic_insert because
no heuristics is applied to whether it pays to use CPU time for
reorganizing the page or not.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to inserted record if succeed, else NULL */
static __attribute__((nonnull, warn_unused_result))
rec_t*
@@ -1098,35 +1104,29 @@ btr_cur_insert_if_possible(
ulint** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
page_cur_t* page_cursor;
- buf_block_t* block;
rec_t* rec;
ut_ad(dtuple_check_typed(tuple));
- block = btr_cur_get_block(cursor);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+ MTR_MEMO_PAGE_X_FIX));
page_cursor = btr_cur_get_page_cur(cursor);
/* Now, try the insert */
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
offsets, heap, n_ext, mtr);
- if (UNIV_UNLIKELY(!rec)) {
- /* If record did not fit, reorganize */
-
- if (btr_page_reorganize(block, cursor->index, mtr)) {
-
- page_cur_search(block, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
-
- rec = page_cur_tuple_insert(
- page_cursor, tuple, cursor->index,
- offsets, heap, n_ext, mtr);
- }
+ /* If the record did not fit, reorganize.
+ For compressed pages, page_cur_tuple_insert()
+ attempted this already. */
+ if (!rec && !page_cur_get_page_zip(page_cursor)
+ && btr_page_reorganize(page_cursor, cursor->index, mtr)) {
+ rec = page_cur_tuple_insert(
+ page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
}
ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
@@ -1240,10 +1240,11 @@ btr_cur_optimistic_insert(
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr) /*!< in: mtr; if this function returns
- DB_SUCCESS on a leaf page of a secondary
- index in a compressed tablespace, the
- mtr must be committed before latching
+ mtr_t* mtr) /*!< in/out: mini-transaction;
+ if this function returns DB_SUCCESS on
+ a leaf page of a secondary index in a
+ compressed tablespace, the caller must
+ mtr_commit(mtr) before latching
any further pages */
{
big_rec_t* big_rec_vec = NULL;
@@ -1251,8 +1252,7 @@ btr_cur_optimistic_insert(
page_cur_t* page_cursor;
buf_block_t* block;
page_t* page;
- ulint max_size;
- rec_t* dummy_rec;
+ rec_t* dummy;
ibool leaf;
ibool reorg;
ibool inherit;
@@ -1265,9 +1265,13 @@ btr_cur_optimistic_insert(
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
index = cursor->index;
+
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_ad(!dict_index_is_online_ddl(index)
|| dict_index_is_clust(index)
|| (flags & BTR_CREATE_FLAG));
+ ut_ad(dtuple_check_typed(entry));
+
zip_size = buf_block_get_zip_size(block);
#ifdef UNIV_DEBUG_VALGRIND
if (zip_size) {
@@ -1276,10 +1280,6 @@ btr_cur_optimistic_insert(
}
#endif /* UNIV_DEBUG_VALGRIND */
- if (!dtuple_check_typed_no_assert(entry)) {
- fputs("InnoDB: Error in a tuple to insert into ", stderr);
- dict_index_name_print(stderr, thr_get_trx(thr), index);
- }
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert ");
@@ -1287,8 +1287,6 @@ btr_cur_optimistic_insert(
}
#endif /* UNIV_DEBUG */
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- max_size = page_get_max_insert_size_after_reorganize(page, 1);
leaf = page_is_leaf(page);
/* Calculate the record size when entry is converted to a record */
@@ -1314,27 +1312,13 @@ btr_cur_optimistic_insert(
Subtract one byte for the encoded heap_no in the
modification log. */
ulint free_space_zip = page_zip_empty_size(
- cursor->index->n_fields, zip_size) - 1;
+ cursor->index->n_fields, zip_size);
ulint n_uniq = dict_index_get_n_unique_in_tree(index);
ut_ad(dict_table_is_comp(index->table));
- /* There should be enough room for two node pointer
- records on an empty non-leaf page. This prevents
- infinite page splits. */
-
- if (UNIV_LIKELY(entry->n_fields >= n_uniq)
- && UNIV_UNLIKELY(REC_NODE_PTR_SIZE
- + rec_get_converted_size_comp_prefix(
- index, entry->fields, n_uniq,
- NULL)
- /* On a compressed page, there is
- a two-byte entry in the dense
- page directory for every record.
- But there is no record header. */
- - (REC_N_NEW_EXTRA_BYTES - 2)
- > free_space_zip / 2)) {
-
+ if (free_space_zip == 0) {
+too_big:
if (big_rec_vec) {
dtuple_convert_back_big_rec(
index, entry, big_rec_vec);
@@ -1342,21 +1326,39 @@ btr_cur_optimistic_insert(
return(DB_TOO_BIG_RECORD);
}
+
+ /* Subtract one byte for the encoded heap_no in the
+ modification log. */
+ free_space_zip--;
+
+ /* There should be enough room for two node pointer
+ records on an empty non-leaf page. This prevents
+ infinite page splits. */
+
+ if (entry->n_fields >= n_uniq
+ && (REC_NODE_PTR_SIZE
+ + rec_get_converted_size_comp_prefix(
+ index, entry->fields, n_uniq, NULL)
+ /* On a compressed page, there is
+ a two-byte entry in the dense
+ page directory for every record.
+ But there is no record header. */
+ - (REC_N_NEW_EXTRA_BYTES - 2)
+ > free_space_zip / 2)) {
+ goto too_big;
+ }
}
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
goto fail);
- /* If there have been many consecutive inserts, and we are on the leaf
- level, check if we have to split the page to reserve enough free space
- for future updates of records. */
-
- if (dict_index_is_clust(index)
- && (page_get_n_recs(page) >= 2)
- && UNIV_LIKELY(leaf)
- && (dict_index_get_space_reserve() + rec_size > max_size)
- && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
- || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
+ if (leaf && zip_size
+ && (page_get_data_size(page) + rec_size
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+ /* If compression padding tells us that insertion will
+ result in too packed up page i.e.: which is likely to
+ cause compression failure then don't do an optimistic
+ insertion. */
fail:
err = DB_FAIL;
fail_err:
@@ -1368,23 +1370,33 @@ fail_err:
return(err);
}
- if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
- || max_size < rec_size)
- && UNIV_LIKELY(page_get_n_recs(page) > 1)
- && page_get_max_insert_size(page, 1) < rec_size) {
+ ulint max_size = page_get_max_insert_size_after_reorganize(page, 1);
+
+ if (page_has_garbage(page)) {
+ if ((max_size < rec_size
+ || max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT)
+ && page_get_n_recs(page) > 1
+ && page_get_max_insert_size(page, 1) < rec_size) {
+ goto fail;
+ }
+ } else if (max_size < rec_size) {
goto fail;
}
- /* If compression padding tells us that insertion will result in
- too packed up page i.e.: which is likely to cause compression
- failure then don't do an optimistic insertion. */
- if (zip_size && leaf
- && (page_get_data_size(page) + rec_size
- >= dict_index_zip_pad_optimal_page_size(index))) {
+ /* If there have been many consecutive inserts to the
+ clustered index leaf page of an uncompressed table, check if
+ we have to split the page to reserve enough free space for
+ future updates of records. */
+ if (leaf && !zip_size && dict_index_is_clust(index)
+ && page_get_n_recs(page) >= 2
+ && dict_index_get_space_reserve() + rec_size > max_size
+ && (btr_page_get_split_rec_to_right(cursor, &dummy)
+ || btr_page_get_split_rec_to_left(cursor, &dummy))) {
goto fail;
}
+
/* Check locks and write to the undo log, if specified */
err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
thr, mtr, &inherit);
@@ -1403,42 +1415,35 @@ fail_err:
*rec = page_cur_tuple_insert(page_cursor, entry, index,
offsets, heap, n_ext, mtr);
reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
+ }
- if (UNIV_UNLIKELY(reorg)) {
- ut_a(zip_size);
- /* It's possible for rec to be NULL if the
- page is compressed. This is because a
- reorganized page may become incompressible. */
- if (!*rec) {
- goto fail;
- }
+ if (*rec) {
+ } else if (zip_size) {
+ /* Reset the IBUF_BITMAP_FREE bits, because
+ page_cur_tuple_insert() will have attempted page
+ reorganize before failing. */
+ if (leaf && !dict_index_is_clust(index)) {
+ ibuf_reset_free_bits(block);
}
- }
- if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
- /* If the record did not fit, reorganize */
- if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
- ut_a(zip_size);
+ goto fail;
+ } else {
+ ut_ad(!reorg);
+ /* If the record did not fit, reorganize */
+ if (!btr_page_reorganize(page_cursor, index, mtr)) {
+ ut_ad(0);
goto fail;
}
- ut_ad(zip_size
- || page_get_max_insert_size(page, 1) == max_size);
+ ut_ad(page_get_max_insert_size(page, 1) == max_size);
reorg = TRUE;
- page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
-
*rec = page_cur_tuple_insert(page_cursor, entry, index,
offsets, heap, n_ext, mtr);
if (UNIV_UNLIKELY(!*rec)) {
- if (zip_size != 0) {
-
- goto fail;
- }
-
fputs("InnoDB: Error: cannot insert tuple ", stderr);
dtuple_print(stderr, entry);
fputs(" into ", stderr);
@@ -1462,12 +1467,6 @@ fail_err:
lock_update_insert(block, *rec);
}
-#if 0
- fprintf(stderr, "Insert into page %lu, max ins size %lu,"
- " rec %lu ind type %lu\n",
- buf_block_get_page_no(block), max_size,
- rec_size + PAGE_DIR_SLOT_SIZE, index->type);
-#endif
if (leaf && !dict_index_is_clust(index)) {
/* Update the free bits of the B-tree page in the
insert buffer bitmap. */
@@ -1528,7 +1527,7 @@ btr_cur_pessimistic_insert(
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
dict_index_t* index = cursor->index;
ulint zip_size = dict_table_zip_size(index->table);
@@ -1536,8 +1535,7 @@ btr_cur_pessimistic_insert(
dberr_t err;
ibool dummy_inh;
ibool success;
- ulint n_extents = 0;
- ulint n_reserved;
+ ulint n_reserved = 0;
ut_ad(dtuple_check_typed(entry));
@@ -1569,7 +1567,7 @@ btr_cur_pessimistic_insert(
of the index tree, so that the insert will not fail because
of lack of space */
- n_extents = cursor->tree_height / 16 + 3;
+ ulint n_extents = cursor->tree_height / 16 + 3;
success = fsp_reserve_free_extents(&n_reserved, index->space,
n_extents, FSP_NORMAL, mtr);
@@ -1596,7 +1594,7 @@ btr_cur_pessimistic_insert(
if (big_rec_vec == NULL) {
- if (n_extents > 0) {
+ if (n_reserved > 0) {
fil_space_release_free_extents(index->space,
n_reserved);
}
@@ -1625,7 +1623,7 @@ btr_cur_pessimistic_insert(
lock_update_insert(btr_cur_get_block(cursor), *rec);
}
- if (n_extents > 0) {
+ if (n_reserved > 0) {
fil_space_release_free_extents(index->space, n_reserved);
}
@@ -1827,61 +1825,59 @@ func_exit:
/*************************************************************//**
See if there is enough place in the page modification log to log
an update-in-place.
-@return TRUE if enough place */
+
+@retval false if out of space; IBUF_BITMAP_FREE will be reset
+outside mtr if the page was recompressed
+@retval true if enough place;
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
+a secondary index leaf page. This has to be done either within the
+same mini-transaction, or by invoking ibuf_reset_free_bits() before
+mtr_commit(mtr). */
UNIV_INTERN
-ibool
-btr_cur_update_alloc_zip(
-/*=====================*/
+bool
+btr_cur_update_alloc_zip_func(
+/*==========================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
- buf_block_t* block, /*!< in/out: buffer page */
- dict_index_t* index, /*!< in: the index corresponding to the block */
+ page_cur_t* cursor, /*!< in/out: B-tree page cursor */
+ dict_index_t* index, /*!< in: the index corresponding to cursor */
+#ifdef UNIV_DEBUG
+ ulint* offsets,/*!< in/out: offsets of the cursor record */
+#endif /* UNIV_DEBUG */
ulint length, /*!< in: size needed */
- ibool create, /*!< in: TRUE=delete-and-insert,
- FALSE=update-in-place */
- mtr_t* mtr) /*!< in: mini-transaction */
+ bool create, /*!< in: true=delete-and-insert,
+ false=update-in-place */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
+ const page_t* page = page_cur_get_page(cursor);
- /* Have a local copy of the variables as these can change
- dynamically. */
- bool log_compressed = page_log_compressed_pages;
- ulint compression_level = page_compression_level;
- page_t* page = buf_block_get_frame(block);
-
- ut_a(page_zip == buf_block_get_page_zip(block));
+ ut_ad(page_zip == page_cur_get_page_zip(cursor));
ut_ad(page_zip);
ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
if (page_zip_available(page_zip, dict_index_is_clust(index),
length, create)) {
- return(TRUE);
+ return(true);
}
- if (!page_zip->m_nonempty) {
+ if (!page_zip->m_nonempty && !page_has_garbage(page)) {
/* The page has been freshly compressed, so
- recompressing it will not help. */
- return(FALSE);
+ reorganizing it will not help. */
+ return(false);
}
- page = buf_block_get_frame(block);
-
if (create && page_is_leaf(page)
&& (length + page_get_data_size(page)
>= dict_index_zip_pad_optimal_page_size(index))) {
-
- return(FALSE);
+ return(false);
}
- if (!page_zip_compress(
- page_zip, page, index, compression_level,
- log_compressed ? mtr : NULL)) {
- /* Unable to compress the page */
- return(FALSE);
+ if (!btr_page_reorganize(cursor, index, mtr)) {
+ goto out_of_space;
}
- if (mtr && !log_compressed) {
- page_zip_compress_write_log_no_data(
- compression_level, page, index, mtr);
- }
+ rec_offs_make_valid(page_cur_get_rec(cursor), index, offsets);
/* After recompressing a page, we must make sure that the free
bits in the insert buffer bitmap will not exceed the free
@@ -1890,25 +1886,32 @@ btr_cur_update_alloc_zip(
safe to reset the free bits if page_zip_available() fails
again, below. The free bits can safely be reset in a separate
mini-transaction. If page_zip_available() succeeds below, we
- can be sure that the page_zip_compress() above did not reduce
+ can be sure that the btr_page_reorganize() above did not reduce
the free space available on the page. */
- if (!page_zip_available(page_zip, dict_index_is_clust(index),
- length, create)) {
- /* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index) && page_is_leaf(page)) {
- ibuf_reset_free_bits(block);
- }
- return(FALSE);
+ if (page_zip_available(page_zip, dict_index_is_clust(index),
+ length, create)) {
+ return(true);
}
- return(TRUE);
+out_of_space:
+ ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
+
+ /* Out of space: reset the free bits. */
+ if (!dict_index_is_clust(index) && page_is_leaf(page)) {
+ ibuf_reset_free_bits(page_cur_get_block(cursor));
+ }
+
+ return(false);
}
/*************************************************************//**
Updates a record when the update causes no size changes in its fields.
We assume here that the ordering fields of the record do not change.
-@return DB_SUCCESS or error number */
+@return locking or undo log related error code, or
+@retval DB_SUCCESS on success
+@retval DB_ZIP_OVERFLOW if there is not enough space left
+on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
UNIV_INTERN
dberr_t
btr_cur_update_in_place(
@@ -1917,15 +1920,16 @@ btr_cur_update_in_place(
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- const ulint* offsets,/*!< in: offsets on cursor->page_cur.rec */
+ ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread, or NULL if
- appropriate flags are set */
+ que_thr_t* thr, /*!< in: query thread */
trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
+ mtr_t* mtr) /*!< in/out: mini-transaction; if this
+ is a secondary index, the caller must
+ mtr_commit(mtr) before latching any
+ further pages */
{
dict_index_t* index;
buf_block_t* block;
@@ -1944,8 +1948,8 @@ btr_cur_update_in_place(
ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
- ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
- ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ ut_ad(thr_get_trx(thr)->id == trx_id
+ || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX);
@@ -1962,10 +1966,15 @@ btr_cur_update_in_place(
page_zip = buf_block_get_page_zip(block);
/* Check that enough space is available on the compressed page. */
- if (page_zip
- && !btr_cur_update_alloc_zip(page_zip, block, index,
- rec_offs_size(offsets), FALSE, mtr)) {
- return(DB_ZIP_OVERFLOW);
+ if (page_zip) {
+ if (!btr_cur_update_alloc_zip(
+ page_zip, btr_cur_get_page_cur(cursor),
+ index, offsets, rec_offs_size(offsets),
+ false, mtr)) {
+ return(DB_ZIP_OVERFLOW);
+ }
+
+ rec = btr_cur_get_rec(cursor);
}
/* Do lock checking and undo logging */
@@ -1973,8 +1982,10 @@ btr_cur_update_in_place(
update, cmpl_info,
thr, mtr, &roll_ptr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
- return(err);
+ /* We may need to update the IBUF_BITMAP_FREE
+ bits after a reorganize that was done in
+ btr_cur_update_alloc_zip(). */
+ goto func_exit;
}
if (!(flags & BTR_KEEP_SYS_FLAG)) {
@@ -2013,12 +2024,6 @@ btr_cur_update_in_place(
rw_lock_x_unlock(&btr_search_latch);
}
- if (page_zip && !dict_index_is_clust(index)
- && page_is_leaf(buf_block_get_frame(block))) {
- /* Update the free bits in the insert buffer. */
- ibuf_update_free_bits_zip(block, mtr);
- }
-
btr_cur_update_in_place_log(flags, rec, index, update,
trx_id, roll_ptr, mtr);
@@ -2032,7 +2037,18 @@ btr_cur_update_in_place(
rec, index, offsets, mtr);
}
- return(DB_SUCCESS);
+ ut_ad(err == DB_SUCCESS);
+
+func_exit:
+ if (page_zip
+ && !(flags & BTR_KEEP_IBUF_BITMAP)
+ && !dict_index_is_clust(index)
+ && page_is_leaf(buf_block_get_frame(block))) {
+ /* Update the free bits in the insert buffer. */
+ ibuf_update_free_bits_zip(block, mtr);
+ }
+
+ return(err);
}
/*************************************************************//**
@@ -2041,9 +2057,12 @@ holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended. We assume here that the ordering
fields of the record do not change.
-@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
-DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
-there is not enough space left on the compressed page */
+@return error code, including
+@retval DB_SUCCESS on success
+@retval DB_OVERFLOW if the updated record does not fit
+@retval DB_UNDERFLOW if the page would become too empty
+@retval DB_ZIP_OVERFLOW if there is not enough space left
+on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
UNIV_INTERN
dberr_t
btr_cur_optimistic_update(
@@ -2053,7 +2072,7 @@ btr_cur_optimistic_update(
cursor stays valid and positioned on the
same record */
ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/*!< in: compiler info on secondary index
@@ -2061,8 +2080,10 @@ btr_cur_optimistic_update(
que_thr_t* thr, /*!< in: query thread, or NULL if
appropriate flags are set */
trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
+ mtr_t* mtr) /*!< in/out: mini-transaction; if this
+ is a secondary index, the caller must
+ mtr_commit(mtr) before latching any
+ further pages */
{
dict_index_t* index;
page_cur_t* page_cursor;
@@ -2089,8 +2110,8 @@ btr_cur_optimistic_update(
ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
- ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
- ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ ut_ad(thr_get_trx(thr)->id == trx_id
+ || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
@@ -2163,26 +2184,36 @@ any_extern:
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- if (page_zip
- && !btr_cur_update_alloc_zip(page_zip, block, index,
- new_rec_size, TRUE, mtr)) {
- return(DB_ZIP_OVERFLOW);
+ if (page_zip) {
+ if (!btr_cur_update_alloc_zip(
+ page_zip, page_cursor, index, *offsets,
+ new_rec_size, true, mtr)) {
+ return(DB_ZIP_OVERFLOW);
+ }
+
+ rec = page_cur_get_rec(page_cursor);
}
if (UNIV_UNLIKELY(new_rec_size
>= (page_get_free_space_of_empty(page_is_comp(page))
/ 2))) {
-
- return(DB_OVERFLOW);
+ /* We may need to update the IBUF_BITMAP_FREE
+ bits after a reorganize that was done in
+ btr_cur_update_alloc_zip(). */
+ err = DB_OVERFLOW;
+ goto func_exit;
}
if (UNIV_UNLIKELY(page_get_data_size(page)
- old_rec_size + new_rec_size
< BTR_CUR_PAGE_COMPRESS_LIMIT)) {
+ /* We may need to update the IBUF_BITMAP_FREE
+ bits after a reorganize that was done in
+ btr_cur_update_alloc_zip(). */
/* The page would become too empty */
-
- return(DB_UNDERFLOW);
+ err = DB_UNDERFLOW;
+ goto func_exit;
}
/* We do not attempt to reorganize if the page is compressed.
@@ -2196,11 +2227,16 @@ any_extern:
&& (max_size >= new_rec_size))
|| (page_get_n_recs(page) <= 1))) {
+ /* We may need to update the IBUF_BITMAP_FREE
+ bits after a reorganize that was done in
+ btr_cur_update_alloc_zip(). */
+
/* There was not enough space, or it did not pay to
reorganize: for simplicity, we decide what to do assuming a
reorganization is needed, though it might not be necessary */
- return(DB_OVERFLOW);
+ err = DB_OVERFLOW;
+ goto func_exit;
}
/* Do lock checking and undo logging */
@@ -2208,8 +2244,10 @@ any_extern:
update, cmpl_info,
thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
-
- return(err);
+ /* We may need to update the IBUF_BITMAP_FREE
+ bits after a reorganize that was done in
+ btr_cur_update_alloc_zip(). */
+ goto func_exit;
}
/* Ok, we may do the replacement. Store on the page infimum the
@@ -2236,19 +2274,23 @@ any_extern:
cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */
- if (page_zip && !dict_index_is_clust(index)
- && page_is_leaf(page)) {
- /* Update the free bits in the insert buffer. */
- ibuf_update_free_bits_zip(block, mtr);
- }
-
/* Restore the old explicit lock state on the record */
lock_rec_restore_from_page_infimum(block, rec, block);
page_cur_move_to_next(page_cursor);
+ ut_ad(err == DB_SUCCESS);
- return(DB_SUCCESS);
+func_exit:
+ if (page_zip
+ && !(flags & BTR_KEEP_IBUF_BITMAP)
+ && !dict_index_is_clust(index)
+ && page_is_leaf(page)) {
+ /* Update the free bits in the insert buffer. */
+ ibuf_update_free_bits_zip(block, mtr);
+ }
+
+ return(err);
}
/*************************************************************//**
@@ -2332,8 +2374,8 @@ btr_cur_pessimistic_update(
que_thr_t* thr, /*!< in: query thread, or NULL if
appropriate flags are set */
trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
+ mtr_t* mtr) /*!< in/out: mini-transaction; must be
+ committed before latching any further pages */
{
big_rec_t* big_rec_vec = NULL;
big_rec_t* dummy_big_rec;
@@ -2347,8 +2389,7 @@ btr_cur_pessimistic_update(
dberr_t optim_err;
roll_ptr_t roll_ptr;
ibool was_first;
- ulint n_extents = 0;
- ulint n_reserved;
+ ulint n_reserved = 0;
ulint n_ext;
*offsets = NULL;
@@ -2357,7 +2398,6 @@ btr_cur_pessimistic_update(
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
page_zip = buf_block_get_page_zip(block);
- rec = btr_cur_get_rec(cursor);
index = cursor->index;
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
@@ -2370,22 +2410,35 @@ btr_cur_pessimistic_update(
ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
- ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
- ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ ut_ad(thr_get_trx(thr)->id == trx_id
+ || (flags & ~BTR_KEEP_POS_FLAG)
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
- optim_err = btr_cur_optimistic_update(
- flags, cursor, offsets, offsets_heap, update,
+ err = optim_err = btr_cur_optimistic_update(
+ flags | BTR_KEEP_IBUF_BITMAP,
+ cursor, offsets, offsets_heap, update,
cmpl_info, thr, trx_id, mtr);
- switch (optim_err) {
+ switch (err) {
+ case DB_ZIP_OVERFLOW:
case DB_UNDERFLOW:
case DB_OVERFLOW:
- case DB_ZIP_OVERFLOW:
break;
default:
- return(optim_err);
+ err_exit:
+ /* We suppressed this with BTR_KEEP_IBUF_BITMAP.
+ For DB_ZIP_OVERFLOW, the IBUF_BITMAP_FREE bits were
+ already reset by btr_cur_update_alloc_zip() if the
+ page was recompressed. */
+ if (page_zip
+ && optim_err != DB_ZIP_OVERFLOW
+ && !dict_index_is_clust(index)
+ && page_is_leaf(page)) {
+ ibuf_update_free_bits_zip(block, mtr);
+ }
+
+ return(err);
}
/* Do lock checking and undo logging */
@@ -2393,8 +2446,7 @@ btr_cur_pessimistic_update(
update, cmpl_info,
thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
-
- return(err);
+ goto err_exit;
}
if (optim_err == DB_OVERFLOW) {
@@ -2404,7 +2456,7 @@ btr_cur_pessimistic_update(
of the index tree, so that the update will not fail because
of lack of space */
- n_extents = cursor->tree_height / 16 + 3;
+ ulint n_extents = cursor->tree_height / 16 + 3;
if (flags & BTR_NO_UNDO_LOG_FLAG) {
reserve_flag = FSP_CLEANING;
@@ -2414,10 +2466,13 @@ btr_cur_pessimistic_update(
if (!fsp_reserve_free_extents(&n_reserved, index->space,
n_extents, reserve_flag, mtr)) {
- return(DB_OUT_OF_FILE_SPACE);
+ err = DB_OUT_OF_FILE_SPACE;
+ goto err_exit;
}
}
+ rec = btr_cur_get_rec(cursor);
+
*offsets = rec_get_offsets(
rec, index, *offsets, ULINT_UNDEFINED, offsets_heap);
@@ -2479,8 +2534,21 @@ make_external:
big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
+ /* We cannot goto return_after_reservations,
+ because we may need to update the
+ IBUF_BITMAP_FREE bits, which was suppressed by
+ BTR_KEEP_IBUF_BITMAP. */
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip
+ || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+ if (n_reserved > 0) {
+ fil_space_release_free_extents(
+ index->space, n_reserved);
+ }
+
err = DB_TOO_BIG_RECORD;
- goto return_after_reservations;
+ goto err_exit;
}
ut_ad(page_is_leaf(page));
@@ -2528,14 +2596,17 @@ make_external:
bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG);
- if (btr_cur_compress_if_useful(cursor, adjust, mtr)
- && adjust) {
- rec_offs_make_valid(page_cursor->rec, index, *offsets);
- }
-
- if (page_zip && !dict_index_is_clust(index)
- && page_is_leaf(page)) {
- /* Update the free bits in the insert buffer. */
+ if (btr_cur_compress_if_useful(cursor, adjust, mtr)) {
+ if (adjust) {
+ rec_offs_make_valid(
+ page_cursor->rec, index, *offsets);
+ }
+ } else if (page_zip &&
+ !dict_index_is_clust(index)
+ && page_is_leaf(page)) {
+ /* Update the free bits in the insert buffer.
+ This is the same block which was skipped by
+ BTR_KEEP_IBUF_BITMAP. */
ibuf_update_free_bits_zip(block, mtr);
}
@@ -2549,7 +2620,9 @@ make_external:
btr_cur_insert_if_possible() to return FALSE. */
ut_a(page_zip || optim_err != DB_UNDERFLOW);
- /* Out of space: reset the free bits. */
+ /* Out of space: reset the free bits.
+ This is the same block which was skipped by
+ BTR_KEEP_IBUF_BITMAP. */
if (!dict_index_is_clust(index) && page_is_leaf(page)) {
ibuf_reset_free_bits(block);
}
@@ -2637,7 +2710,7 @@ return_after_reservations:
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- if (n_extents > 0) {
+ if (n_reserved > 0) {
fil_space_release_free_extents(index->space, n_reserved);
}
@@ -2786,7 +2859,7 @@ btr_cur_del_mark_set_clust_rec(
dict_index_t* index, /*!< in: clustered index of the record */
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
roll_ptr_t roll_ptr;
dberr_t err;
@@ -2838,7 +2911,7 @@ btr_cur_del_mark_set_clust_rec(
if (dict_index_is_online_ddl(index)) {
row_log_table_delete(
- rec, index, offsets,
+ rec, index, offsets, false,
trx_read_trx_id(row_get_trx_id_offset(index, offsets)
+ rec));
}
@@ -2941,7 +3014,7 @@ btr_cur_del_mark_set_sec_rec(
btr_cur_t* cursor, /*!< in: cursor */
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
buf_block_t* block;
rec_t* rec;
@@ -3088,38 +3161,42 @@ btr_cur_optimistic_delete_func(
page_t* page = buf_block_get_frame(block);
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
- ulint max_ins = 0;
lock_update_delete(block, rec);
btr_search_update_hash_on_delete(cursor);
- if (!page_zip) {
- max_ins = page_get_max_insert_size_after_reorganize(
- page, 1);
- }
+ if (page_zip) {
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip
- || page_zip_validate(page_zip, page, cursor->index));
+ ut_a(page_zip_validate(page_zip, page, cursor->index));
#endif /* UNIV_ZIP_DEBUG */
- page_cur_delete_rec(btr_cur_get_page_cur(cursor),
- cursor->index, offsets, mtr);
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor),
+ cursor->index, offsets, mtr);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip
- || page_zip_validate(page_zip, page, cursor->index));
+ ut_a(page_zip_validate(page_zip, page, cursor->index));
#endif /* UNIV_ZIP_DEBUG */
- if (dict_index_is_clust(cursor->index)
- || dict_index_is_ibuf(cursor->index)
- || !page_is_leaf(page)) {
- /* The insert buffer does not handle
- inserts to clustered indexes, to
- non-leaf pages of secondary index B-trees,
- or to the insert buffer. */
- } else if (page_zip) {
- ibuf_update_free_bits_zip(block, mtr);
+ /* On compressed pages, the IBUF_BITMAP_FREE
+ space is not affected by deleting (purging)
+ records, because it is defined as the minimum
+ of space available *without* reorganize, and
+ space available in the modification log. */
} else {
- ibuf_update_free_bits_low(block, max_ins, mtr);
+ const ulint max_ins
+ = page_get_max_insert_size_after_reorganize(
+ page, 1);
+
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor),
+ cursor->index, offsets, mtr);
+
+ /* The change buffer does not handle inserts
+ into non-leaf pages, into clustered indexes,
+ or into the change buffer. */
+ if (page_is_leaf(page)
+ && !dict_index_is_clust(cursor->index)
+ && !dict_index_is_ibuf(cursor->index)) {
+ ibuf_update_free_bits_low(block, max_ins, mtr);
+ }
}
}
@@ -3164,8 +3241,7 @@ btr_cur_pessimistic_delete(
page_zip_des_t* page_zip;
dict_index_t* index;
rec_t* rec;
- ulint n_extents = 0;
- ulint n_reserved;
+ ulint n_reserved = 0;
ibool success;
ibool ret = FALSE;
ulint level;
@@ -3188,7 +3264,7 @@ btr_cur_pessimistic_delete(
of the index tree, so that the node pointer updates will
not fail because of lack of space */
- n_extents = cursor->tree_height / 32 + 1;
+ ulint n_extents = cursor->tree_height / 32 + 1;
success = fsp_reserve_free_extents(&n_reserved,
index->space,
@@ -3292,7 +3368,7 @@ return_after_reservations:
ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
}
- if (n_extents > 0) {
+ if (n_reserved > 0) {
fil_space_release_free_extents(index->space, n_reserved);
}
@@ -4250,12 +4326,12 @@ btr_blob_free(
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
- if (!buf_LRU_free_block(&block->page, all)
+ if (!buf_LRU_free_page(&block->page, all)
&& all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */
- buf_LRU_free_block(&block->page, FALSE);
+ buf_LRU_free_page(&block->page, false);
}
}
@@ -4334,7 +4410,7 @@ btr_store_big_rec_extern_fields(
heap = mem_heap_create(250000);
page_zip_set_alloc(&c_stream, heap);
- err = deflateInit2(&c_stream, page_compression_level,
+ err = deflateInit2(&c_stream, page_zip_level,
Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
ut_a(err == Z_OK);
}
@@ -4485,6 +4561,8 @@ alloc_another:
page_no, MLOG_4BYTES, &mtr);
}
+ } else if (dict_index_is_online_ddl(index)) {
+ row_log_table_blob_alloc(index, page_no);
}
if (page_zip) {
@@ -4728,6 +4806,10 @@ func_exit:
for (i = 0; i < n_freed_pages; i++) {
btr_page_free_low(index, freed_pages[i], 0, alloc_mtr);
}
+
+ DBUG_EXECUTE_IF("btr_store_big_rec_extern",
+ error = DB_OUT_OF_FILE_SPACE;
+ goto func_exit;);
}
if (heap != NULL) {
@@ -4827,13 +4909,17 @@ btr_free_externally_stored_field(
X-latch to the index tree */
{
page_t* page;
- ulint space_id;
+ const ulint space_id = mach_read_from_4(
+ field_ref + BTR_EXTERN_SPACE_ID);
+ const ulint start_page = mach_read_from_4(
+ field_ref + BTR_EXTERN_PAGE_NO);
ulint rec_zip_size = dict_table_zip_size(index->table);
ulint ext_zip_size;
ulint page_no;
ulint next_page_no;
mtr_t mtr;
+ ut_ad(dict_index_is_clust(index));
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
@@ -4850,7 +4936,7 @@ btr_free_externally_stored_field(
return;
}
- space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
+ ut_ad(space_id == index->space);
if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
ext_zip_size = fil_space_get_zip_size(space_id);
@@ -4880,8 +4966,7 @@ btr_free_externally_stored_field(
btr_blob_dbg_t b;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
+ b.blob_page_no = start_page;
if (rec) {
/* Remove the reference from the record to the
@@ -4936,6 +5021,10 @@ btr_free_externally_stored_field(
return;
}
+ if (page_no == start_page && dict_index_is_online_ddl(index)) {
+ row_log_table_blob_free(index, start_page);
+ }
+
ext_block = buf_page_get(space_id, ext_zip_size, page_no,
RW_X_LATCH, &mtr);
buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index aceb6bd1d41..54ef2a243ba 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -126,7 +126,7 @@ btr_pcur_store_position(
|| mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_a(cursor->latch_mode != BTR_NO_LATCHES);
- if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
+ if (page_is_empty(page)) {
/* It must be an empty index tree; NOTE that in this case
we do not store the modify_clock, but always do a search
if we restore the cursor position */
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index e34216dbc8f..ee400fcdf23 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,12 +33,128 @@ Created December 2006 by Marko Makela
#include "buf0lru.h"
#include "buf0flu.h"
#include "page0zip.h"
+#include "srv0start.h"
+
+/** When freeing a buf we attempt to coalesce by looking at its buddy
+and deciding whether it is free or not. To ascertain if the buddy is
+free we look for BUF_BUDDY_STAMP_FREE at BUF_BUDDY_STAMP_OFFSET
+within the buddy. The question is how we can be sure that it is
+safe to look at BUF_BUDDY_STAMP_OFFSET.
+The answer lies in following invariants:
+* All blocks allocated by buddy allocator are used for compressed
+page frame.
+* A compressed table always have space_id < SRV_LOG_SPACE_FIRST_ID
+* BUF_BUDDY_STAMP_OFFSET always points to the space_id field in
+a frame.
+ -- The above is true because we look at these fields when the
+ corresponding buddy block is free which implies that:
+ * The block we are looking at must have an address aligned at
+ the same size that its free buddy has. For example, if we have
+ a free block of 8K then its buddy's address must be aligned at
+ 8K as well.
+ * It is possible that the block we are looking at may have been
+ further divided into smaller sized blocks but its starting
+ address must still remain the start of a page frame i.e.: it
+ cannot be middle of a block. For example, if we have a free
+ block of size 8K then its buddy may be divided into blocks
+ of, say, 1K, 1K, 2K, 4K but the buddy's address will still be
+ the starting address of first 1K compressed page.
+ * What is important to note is that for any given block, the
+ buddy's address cannot be in the middle of a larger block i.e.:
+ in above example, our 8K block cannot have a buddy whose address
+ is aligned on 8K but it is part of a larger 16K block.
+*/
+
+/** Offset within buf_buddy_free_t where free or non_free stamps
+are written.*/
+#define BUF_BUDDY_STAMP_OFFSET FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
+
+/** Value that we stamp on all buffers that are currently on the zip_free
+list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
+#define BUF_BUDDY_STAMP_FREE (SRV_LOG_SPACE_FIRST_ID)
+
+/** Stamp value for non-free buffers. Will be overwritten by a non-zero
+value by the consumer of the block */
+#define BUF_BUDDY_STAMP_NONFREE (0XFFFFFFFF)
+
+#if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE
+# error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE"
+#endif
+
+/** Return type of buf_buddy_is_free() */
+enum buf_buddy_state_t {
+ BUF_BUDDY_STATE_FREE, /*!< If the buddy to completely free */
+ BUF_BUDDY_STATE_USED, /*!< Buddy currently in used */
+ BUF_BUDDY_STATE_PARTIALLY_USED/*!< Some sub-blocks in the buddy
+ are in use */
+};
+
+#ifdef UNIV_DEBUG_VALGRIND
+/**********************************************************************//**
+Invalidate memory area that we won't access while page is free */
+UNIV_INLINE
+void
+buf_buddy_mem_invalid(
+/*==================*/
+ buf_buddy_free_t* buf, /*!< in: block to check */
+ ulint i) /*!< in: index of zip_free[] */
+{
+ const size_t size = BUF_BUDDY_LOW << i;
+ ut_ad(i <= BUF_BUDDY_SIZES);
+
+ UNIV_MEM_ASSERT_W(buf, size);
+ UNIV_MEM_INVALID(buf, size);
+}
+#else /* UNIV_DEBUG_VALGRIND */
+# define buf_buddy_mem_invalid(buf, i) ut_ad((i) <= BUF_BUDDY_SIZES)
+#endif /* UNIV_DEBUG_VALGRIND */
+
+/**********************************************************************//**
+Check if a buddy is stamped free.
+@return whether the buddy is free */
+UNIV_INLINE __attribute__((warn_unused_result))
+bool
+buf_buddy_stamp_is_free(
+/*====================*/
+ const buf_buddy_free_t* buf) /*!< in: block to check */
+{
+ return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET)
+ == BUF_BUDDY_STAMP_FREE);
+}
+
+/**********************************************************************//**
+Stamps a buddy free. */
+UNIV_INLINE
+void
+buf_buddy_stamp_free(
+/*=================*/
+ buf_buddy_free_t* buf, /*!< in/out: block to stamp */
+ ulint i) /*!< in: block size */
+{
+ ut_d(memset(buf, i, BUF_BUDDY_LOW << i));
+ buf_buddy_mem_invalid(buf, i);
+ mach_write_to_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET,
+ BUF_BUDDY_STAMP_FREE);
+ buf->stamp.size = i;
+}
+
+/**********************************************************************//**
+Stamps a buddy nonfree.
+@param[in/out] buf block to stamp
+@param[in] i block size */
+#define buf_buddy_stamp_nonfree(buf, i) do { \
+ buf_buddy_mem_invalid(buf, i); \
+ memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4); \
+} while (0)
+#if BUF_BUDDY_STAMP_NONFREE != 0xffffffff
+# error "BUF_BUDDY_STAMP_NONFREE != 0xffffffff"
+#endif
/**********************************************************************//**
Get the offset of the buddy of a compressed page frame.
@return the buddy relative of page */
UNIV_INLINE
-byte*
+void*
buf_buddy_get(
/*==========*/
byte* page, /*!< in: compressed page */
@@ -60,14 +176,96 @@ buf_buddy_get(
/** Validate a given zip_free list. */
struct CheckZipFree {
- void operator()(const buf_page_t* elem) const
+ ulint i;
+ CheckZipFree(ulint i) : i (i) {}
+
+ void operator()(const buf_buddy_free_t* elem) const
{
- ut_a(buf_page_get_state(elem) == BUF_BLOCK_ZIP_FREE);
+ ut_a(buf_buddy_stamp_is_free(elem));
+ ut_a(elem->stamp.size <= i);
}
};
#define BUF_BUDDY_LIST_VALIDATE(bp, i) \
- UT_LIST_VALIDATE(list, buf_page_t, bp->zip_free[i], CheckZipFree())
+ UT_LIST_VALIDATE(list, buf_buddy_free_t, \
+ bp->zip_free[i], CheckZipFree(i))
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Debug function to validate that a buffer is indeed free i.e.: in the
+zip_free[].
+@return true if free */
+UNIV_INLINE
+bool
+buf_buddy_check_free(
+/*=================*/
+ buf_pool_t* buf_pool,/*!< in: buffer pool instance */
+ const buf_buddy_free_t* buf, /*!< in: block to check */
+ ulint i) /*!< in: index of buf_pool->zip_free[] */
+{
+ const ulint size = BUF_BUDDY_LOW << i;
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!ut_align_offset(buf, size));
+ ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+
+ buf_buddy_free_t* itr;
+
+ for (itr = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+ itr && itr != buf;
+ itr = UT_LIST_GET_NEXT(list, itr)) {
+ }
+
+ return(itr == buf);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Checks if a buf is free i.e.: in the zip_free[].
+@retval BUF_BUDDY_STATE_FREE if fully free
+@retval BUF_BUDDY_STATE_USED if currently in use
+@retval BUF_BUDDY_STATE_PARTIALLY_USED if partially in use. */
+static __attribute__((warn_unused_result))
+buf_buddy_state_t
+buf_buddy_is_free(
+/*==============*/
+ buf_buddy_free_t* buf, /*!< in: block to check */
+ ulint i) /*!< in: index of
+ buf_pool->zip_free[] */
+{
+#ifdef UNIV_DEBUG
+ const ulint size = BUF_BUDDY_LOW << i;
+ ut_ad(!ut_align_offset(buf, size));
+ ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+#endif /* UNIV_DEBUG */
+
+ /* We assume that all memory from buf_buddy_alloc()
+ is used for compressed page frames. */
+
+ /* We look inside the allocated objects returned by
+ buf_buddy_alloc() and assume that each block is a compressed
+ page that contains one of the following in space_id.
+ * BUF_BUDDY_STAMP_FREE if the block is in a zip_free list or
+ * BUF_BUDDY_STAMP_NONFREE if the block has been allocated but
+ not initialized yet or
+ * A valid space_id of a compressed tablespace
+
+ The call below attempts to read from free memory. The memory
+ is "owned" by the buddy allocator (and it has been allocated
+ from the buffer pool), so there is nothing wrong about this. */
+ if (!buf_buddy_stamp_is_free(buf)) {
+ return(BUF_BUDDY_STATE_USED);
+ }
+
+ /* A block may be free but a fragment of it may still be in use.
+ To guard against that we write the free block size in terms of
+ zip_free index at start of stamped block. Note that we can
+ safely rely on this value only if the buf is free. */
+ ut_ad(buf->stamp.size <= i);
+ return(buf->stamp.size == i
+ ? BUF_BUDDY_STATE_FREE
+ : BUF_BUDDY_STATE_PARTIALLY_USED);
+}
/**********************************************************************//**
Add a block to the head of the appropriate buddy free list. */
@@ -75,15 +273,17 @@ UNIV_INLINE
void
buf_buddy_add_to_free(
/*==================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_page_t* bpage, /*!< in,own: block to be freed */
- ulint i) /*!< in: index of
- buf_pool->zip_free[] */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_buddy_free_t* buf, /*!< in,own: block to be freed */
+ ulint i) /*!< in: index of
+ buf_pool->zip_free[] */
{
ut_ad(buf_pool_mutex_own(buf_pool));
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
- ut_ad(buf_pool->zip_free[i].start != bpage);
- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
+ ut_ad(buf_pool->zip_free[i].start != buf);
+
+ buf_buddy_stamp_free(buf, i);
+ UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buf);
+ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
}
/**********************************************************************//**
@@ -92,35 +292,29 @@ UNIV_INLINE
void
buf_buddy_remove_from_free(
/*=======================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_page_t* bpage, /*!< in: block to be removed */
- ulint i) /*!< in: index of
- buf_pool->zip_free[] */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_buddy_free_t* buf, /*!< in,own: block to be freed */
+ ulint i) /*!< in: index of
+ buf_pool->zip_free[] */
{
-#ifdef UNIV_DEBUG
- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
-
- ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
- ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
-#endif /* UNIV_DEBUG */
-
ut_ad(buf_pool_mutex_own(buf_pool));
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
+ ut_ad(buf_buddy_check_free(buf_pool, buf, i));
+
+ UT_LIST_REMOVE(list, buf_pool->zip_free[i], buf);
+ buf_buddy_stamp_nonfree(buf, i);
}
/**********************************************************************//**
Try to allocate a block from buf_pool->zip_free[].
@return allocated block, or NULL if buf_pool->zip_free[] was empty */
static
-void*
+buf_buddy_free_t*
buf_buddy_alloc_zip(
/*================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
- buf_page_t* bpage;
+ buf_buddy_free_t* buf;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_a(i < BUF_BUDDY_SIZES);
@@ -128,33 +322,38 @@ buf_buddy_alloc_zip(
ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
-
- if (bpage) {
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+ buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
- buf_buddy_remove_from_free(buf_pool, bpage, i);
+ if (buf) {
+ buf_buddy_remove_from_free(buf_pool, buf, i);
} else if (i + 1 < BUF_BUDDY_SIZES) {
/* Attempt to split. */
- bpage = (buf_page_t*) buf_buddy_alloc_zip(buf_pool, i + 1);
+ buf = buf_buddy_alloc_zip(buf_pool, i + 1);
- if (bpage) {
- buf_page_t* buddy = (buf_page_t*)
- (((char*) bpage) + (BUF_BUDDY_LOW << i));
+ if (buf) {
+ buf_buddy_free_t* buddy =
+ reinterpret_cast<buf_buddy_free_t*>(
+ buf->stamp.bytes
+ + (BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
- ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
- buddy->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(buf_pool, buddy, i);
}
}
- if (bpage) {
- ut_d(memset(bpage, ~i, BUF_BUDDY_LOW << i));
- UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
+ if (buf) {
+ /* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */
+ UNIV_MEM_TRASH(buf, ~i, BUF_BUDDY_STAMP_OFFSET);
+ UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4
+ + buf->stamp.bytes, ~i,
+ (BUF_BUDDY_LOW << i)
+ - (BUF_BUDDY_STAMP_OFFSET + 4));
+ ut_ad(mach_read_from_4(buf->stamp.bytes
+ + BUF_BUDDY_STAMP_OFFSET)
+ == BUF_BUDDY_STAMP_NONFREE);
}
- return(bpage);
+ return(buf);
}
/**********************************************************************//**
@@ -246,18 +445,17 @@ buf_buddy_alloc_from(
/* Add the unused parts of the block to the free lists. */
while (j > i) {
- buf_page_t* bpage;
+ buf_buddy_free_t* zip_buf;
offs >>= 1;
j--;
- bpage = (buf_page_t*) ((byte*) buf + offs);
- ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
- bpage->state = BUF_BLOCK_ZIP_FREE;
- ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
- buf_buddy_add_to_free(buf_pool, bpage, j);
+ zip_buf = reinterpret_cast<buf_buddy_free_t*>(
+ reinterpret_cast<byte*>(buf) + offs);
+ buf_buddy_add_to_free(buf_pool, zip_buf, j);
}
+ buf_buddy_stamp_nonfree(reinterpret_cast<buf_buddy_free_t*>(buf), i);
return(buf);
}
@@ -322,9 +520,9 @@ func_exit:
/**********************************************************************//**
Try to relocate a block.
-@return TRUE if relocated */
+@return true if relocated */
static
-ibool
+bool
buf_buddy_relocate(
/*===============*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
@@ -337,7 +535,7 @@ buf_buddy_relocate(
const ulint size = BUF_BUDDY_LOW << i;
ib_mutex_t* mutex;
ulint space;
- ulint page_no;
+ ulint offset;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(!mutex_own(&buf_pool->zip_mutex));
@@ -346,32 +544,19 @@ buf_buddy_relocate(
ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
UNIV_MEM_ASSERT_W(dst, size);
- /* We assume that all memory from buf_buddy_alloc()
- is used for compressed page frames. */
-
- /* We look inside the allocated objects returned by
- buf_buddy_alloc() and assume that each block is a compressed
- page that contains a valid space_id and page_no in the page
- header. Should the fields be invalid, we will be unable to
- relocate the block. */
-
- /* The src block may be split into smaller blocks,
- some of which may be free. Thus, the
- mach_read_from_4() calls below may attempt to read
- from free memory. The memory is "owned" by the buddy
- allocator (and it has been allocated from the buffer
- pool), so there is nothing wrong about this. The
- mach_read_from_4() calls here will only trigger bogus
- Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
space = mach_read_from_4((const byte*) src
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- page_no = mach_read_from_4((const byte*) src
+ offset = mach_read_from_4((const byte*) src
+ FIL_PAGE_OFFSET);
+
/* Suppress Valgrind warnings about conditional jump
on uninitialized value. */
UNIV_MEM_VALID(&space, sizeof space);
- UNIV_MEM_VALID(&page_no, sizeof page_no);
- bpage = buf_page_hash_get(buf_pool, space, page_no);
+ UNIV_MEM_VALID(&offset, sizeof offset);
+
+ ut_ad(space != BUF_BUDDY_STAMP_FREE);
+
+ bpage = buf_page_hash_get(buf_pool, space, offset);
if (!bpage || bpage->zip.data != src) {
/* The block has probably been freshly
@@ -379,7 +564,7 @@ buf_buddy_relocate(
added to buf_pool->page_hash yet. Obviously,
it cannot be relocated. */
- return(FALSE);
+ return(false);
}
if (page_zip_get_size(&bpage->zip) != size) {
@@ -388,7 +573,7 @@ buf_buddy_relocate(
For the sake of simplicity, give up. */
ut_ad(page_zip_get_size(&bpage->zip) < size);
- return(FALSE);
+ return(false);
}
/* The block must have been allocated, but it may
@@ -406,19 +591,17 @@ buf_buddy_relocate(
memcpy(dst, src, size);
bpage->zip.data = (page_zip_t*) dst;
mutex_exit(mutex);
- UNIV_MEM_INVALID(src, size);
- {
- buf_buddy_stat_t* buddy_stat
- = &buf_pool->buddy_stat[i];
- buddy_stat->relocated++;
- buddy_stat->relocated_usec
- += ut_time_us(NULL) - usec;
- }
- return(TRUE);
+ buf_buddy_mem_invalid(
+ reinterpret_cast<buf_buddy_free_t*>(src), i);
+
+ buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i];
+ buddy_stat->relocated++;
+ buddy_stat->relocated_usec += ut_time_us(NULL) - usec;
+ return(true);
}
mutex_exit(mutex);
- return(FALSE);
+ return(false);
}
/**********************************************************************//**
@@ -433,8 +616,7 @@ buf_buddy_free_low(
ulint i) /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
{
- buf_page_t* bpage;
- buf_page_t* buddy;
+ buf_buddy_free_t* buddy;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(!mutex_own(&buf_pool->zip_mutex));
@@ -445,7 +627,6 @@ buf_buddy_free_low(
buf_pool->buddy_stat[i].used--;
recombine:
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
- ((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
if (i == BUF_BUDDY_SIZES) {
buf_buddy_block_free(buf_pool, buf);
@@ -464,73 +645,54 @@ recombine:
}
/* Try to combine adjacent blocks. */
- buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
-
-#ifndef UNIV_DEBUG_VALGRIND
- /* When Valgrind instrumentation is not enabled, we can read
- buddy->state to quickly determine that a block is not free.
- When the block is not free, buddy->state belongs to a compressed
- page frame that may be flagged uninitialized in our Valgrind
- instrumentation. */
-
- if (buddy->state != BUF_BLOCK_ZIP_FREE) {
-
- goto buddy_nonfree;
- }
-#endif /* !UNIV_DEBUG_VALGRIND */
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
-
- if (bpage == buddy) {
- /* The buddy is free: recombine */
- buf_buddy_remove_from_free(buf_pool, bpage, i);
+ buddy = reinterpret_cast<buf_buddy_free_t*>(
+ buf_buddy_get(reinterpret_cast<byte*>(buf),
+ BUF_BUDDY_LOW << i));
+
+ switch (buf_buddy_is_free(buddy, i)) {
+ case BUF_BUDDY_STATE_FREE:
+ /* The buddy is free: recombine */
+ buf_buddy_remove_from_free(buf_pool, buddy, i);
buddy_is_free:
- ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
- ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
- i++;
- buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
+ ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
+ i++;
+ buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
- goto recombine;
- }
+ goto recombine;
- ut_a(bpage != buf);
- UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
- bpage = UT_LIST_GET_NEXT(list, bpage);
- }
-
-#ifndef UNIV_DEBUG_VALGRIND
-buddy_nonfree:
-#endif /* !UNIV_DEBUG_VALGRIND */
-
- ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+ case BUF_BUDDY_STATE_USED:
+ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
- /* The buddy is not free. Is there a free block of this size? */
- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+ /* The buddy is not free. Is there a free block of
+ this size? */
+ if (buf_buddy_free_t* zip_buf =
+ UT_LIST_GET_FIRST(buf_pool->zip_free[i])) {
- if (bpage) {
+ /* Remove the block from the free list, because
+ a successful buf_buddy_relocate() will overwrite
+ zip_free->list. */
+ buf_buddy_remove_from_free(buf_pool, zip_buf, i);
- /* Remove the block from the free list, because a successful
- buf_buddy_relocate() will overwrite bpage->list. */
- buf_buddy_remove_from_free(buf_pool, bpage, i);
+ /* Try to relocate the buddy of buf to the free
+ block. */
+ if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i)) {
- /* Try to relocate the buddy of buf to the free block. */
- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
+ goto buddy_is_free;
+ }
- buddy->state = BUF_BLOCK_ZIP_FREE;
- goto buddy_is_free;
+ buf_buddy_add_to_free(buf_pool, zip_buf, i);
}
- buf_buddy_add_to_free(buf_pool, bpage, i);
+ break;
+ case BUF_BUDDY_STATE_PARTIALLY_USED:
+ /* Some sub-blocks in the buddy are still in use.
+ Relocation will fail. No need to try. */
+ break;
}
func_exit:
/* Free the block to the buddy list. */
- bpage = (buf_page_t*) buf;
-
- /* Fill large blocks with a constant pattern. */
- ut_d(memset(bpage, i, BUF_BUDDY_LOW << i));
- UNIV_MEM_INVALID(bpage, BUF_BUDDY_LOW << i);
- bpage->state = BUF_BLOCK_ZIP_FREE;
- buf_buddy_add_to_free(buf_pool, bpage, i);
+ buf_buddy_add_to_free(buf_pool,
+ reinterpret_cast<buf_buddy_free_t*>(buf),
+ i);
}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 913f0a40ef4..a8e833b5fa3 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -496,14 +496,13 @@ buf_page_is_corrupted(
}
#ifndef UNIV_HOTBACKUP
- if (recv_lsn_checks_on) {
+ if (check_lsn && recv_lsn_checks_on) {
lsn_t current_lsn;
/* Since we are going to reset the page LSN during the import
phase it makes no sense to spam the log with error messages. */
- if (check_lsn
- && log_peek_lsn(&current_lsn)
+ if (log_peek_lsn(&current_lsn)
&& current_lsn
< mach_read_from_8(read_buf + FIL_PAGE_LSN)) {
ut_print_timestamp(stderr);
@@ -1167,7 +1166,7 @@ buf_chunk_not_freed(
ibool ready;
switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* The uncompressed buffer pool should never
@@ -1492,7 +1491,7 @@ buf_relocate(
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
#ifdef UNIV_DEBUG
switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_FILE_PAGE:
@@ -1964,7 +1963,7 @@ buf_block_try_discard_uncompressed(
bpage = buf_page_hash_get(buf_pool, space, offset);
if (bpage) {
- buf_LRU_free_block(bpage, FALSE);
+ buf_LRU_free_page(bpage, false);
}
buf_pool_mutex_exit(buf_pool);
@@ -2014,7 +2013,7 @@ lookup:
buf_read_page(space, zip_size, offset);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 37 || buf_validate());
+ ut_a(++buf_dbg_counter % 5771 || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
}
@@ -2030,11 +2029,11 @@ err_exit:
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
- case BUF_BLOCK_ZIP_FREE:
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
@@ -2240,7 +2239,7 @@ buf_block_align_instance(
mutex_enter(&block->mutex);
switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* These types should only be used in
@@ -2436,7 +2435,6 @@ buf_page_get_gen(
ibool must_read;
rw_lock_t* hash_lock;
ib_mutex_t* block_mutex;
- buf_page_t* hash_bpage;
ulint retries = 0;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
@@ -2489,7 +2487,6 @@ loop:
block = guess = NULL;
} else {
ut_ad(!block->page.in_zip_hash);
- ut_ad(block->page.in_page_hash);
}
}
@@ -2543,6 +2540,10 @@ loop:
retries = 0;
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
++retries;
+ DBUG_EXECUTE_IF(
+ "innodb_page_corruption_retries",
+ retries = BUF_PAGE_READ_MAX_RETRIES;
+ );
} else {
fprintf(stderr, "InnoDB: Error: Unable"
" to read tablespace %lu page no"
@@ -2564,7 +2565,7 @@ loop:
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 37 || buf_validate());
+ ut_a(++buf_dbg_counter % 5771 || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
goto loop;
}
@@ -2590,6 +2591,7 @@ got_block:
/* The page is being read to buffer pool,
but we cannot wait around for the read to
complete. */
+null_exit:
mutex_exit(block_mutex);
return(NULL);
@@ -2603,6 +2605,14 @@ got_block:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
+ if (mode == BUF_PEEK_IF_IN_POOL) {
+ /* This mode is only used for dropping an
+ adaptive hash index. There cannot be an
+ adaptive hash index for a compressed-only
+ page, so do not bother decompressing the page. */
+ goto null_exit;
+ }
+
bpage = &block->page;
if (bpage->buf_fix_count
@@ -2735,7 +2745,7 @@ wait_until_unfixed:
break;
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
@@ -2780,7 +2790,7 @@ wait_until_unfixed:
relocated or enter or exit the buf_pool while we
are holding the buf_pool->mutex. */
- if (buf_LRU_free_block(&block->page, TRUE)) {
+ if (buf_LRU_free_page(&block->page, true)) {
buf_pool_mutex_exit(buf_pool);
rw_lock_x_lock(hash_lock);
@@ -3728,7 +3738,7 @@ buf_page_create(
memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 357 || buf_validate());
+ ut_a(++buf_dbg_counter % 5771 || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(buf_block_get_space(block),
@@ -4196,7 +4206,7 @@ buf_pool_invalidate_instance(
pool invalidation to proceed we must ensure there is NO
write activity happening. */
if (buf_pool->n_flush[i] > 0) {
- enum buf_flush type = static_cast<enum buf_flush>(i);
+ buf_flush_t type = static_cast<buf_flush_t>(i);
buf_pool_mutex_exit(buf_pool);
buf_flush_wait_batch_end(buf_pool, type);
@@ -4285,7 +4295,7 @@ buf_pool_validate_instance(
mutex_enter(&block->mutex);
switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* These should only occur on
@@ -4378,7 +4388,7 @@ assert_s_latched:
/* All clean blocks should be I/O-unfixed. */
break;
case BUF_IO_READ:
- /* In buf_LRU_free_block(), we temporarily set
+ /* In buf_LRU_free_page(), we temporarily set
b->io_fix = BUF_IO_READ for a newly allocated
control block in order to prevent
buf_page_get_gen() from decompressing the block. */
@@ -4437,7 +4447,7 @@ assert_s_latched:
case BUF_BLOCK_FILE_PAGE:
/* uncompressed page */
break;
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
@@ -4720,7 +4730,7 @@ buf_get_latched_pages_number_instance(
case BUF_BLOCK_FILE_PAGE:
/* uncompressed page */
break;
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
@@ -5015,7 +5025,7 @@ buf_print_io_instance(
"Old database pages %lu\n"
"Modified db pages %lu\n"
"Pending reads %lu\n"
- "Pending writes: LRU %lu, flush list %lu single page %lu\n",
+ "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
pool_info->pool_size,
pool_info->free_list_len,
pool_info->lru_len,
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index fb853fe1543..2b2483fde6d 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,11 +38,6 @@ Created 2011/12/19
#ifndef UNIV_HOTBACKUP
-/** Time in milliseconds that we sleep when unable to find a slot in
-the doublewrite buffer or when we have to wait for a running batch
-to end. */
-#define TRX_DOUBLEWRITE_BATCH_POLL_DELAY 10000
-
#ifdef UNIV_PFS_MUTEX
/* Key to register the mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t buf_dblwr_mutex_key;
@@ -104,6 +99,25 @@ buf_dblwr_get(
return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE);
}
+/********************************************************************//**
+Flush a batch of writes to the datafiles that have already been
+written to the dblwr buffer on disk. */
+UNIV_INLINE
+void
+buf_dblwr_sync_datafiles()
+/*======================*/
+{
+ /* Wake possible simulated aio thread to actually post the
+ writes to the operating system */
+ os_aio_simulated_wake_handler_threads();
+
+ /* Wait that all async writes to tablespaces have been posted to
+ the OS */
+ os_aio_wait_until_no_pending_writes();
+
+ /* Now we flush the data to disk (for example, with fsync) */
+ fil_flush_file_spaces(FIL_TABLESPACE);
+}
/****************************************************************//**
Creates or initialializes the doublewrite buffer at a database start. */
@@ -131,6 +145,8 @@ buf_dblwr_init(
mutex_create(buf_dblwr_mutex_key,
&buf_dblwr->mutex, SYNC_DOUBLEWRITE);
+ buf_dblwr->b_event = os_event_create();
+ buf_dblwr->s_event = os_event_create();
buf_dblwr->first_free = 0;
buf_dblwr->s_reserved = 0;
buf_dblwr->b_reserved = 0;
@@ -140,8 +156,8 @@ buf_dblwr_init(
buf_dblwr->block2 = mach_read_from_4(
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
- buf_dblwr->in_use = static_cast<ibool*>(
- mem_zalloc(buf_size * sizeof(ibool)));
+ buf_dblwr->in_use = static_cast<bool*>(
+ mem_zalloc(buf_size * sizeof(bool)));
buf_dblwr->write_buf_unaligned = static_cast<byte*>(
ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE));
@@ -365,7 +381,7 @@ buf_dblwr_init_or_restore_pages(
/* Read the trx sys header to check if we are using the doublewrite
buffer */
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
+ fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
UNIV_PAGE_SIZE, read_buf, NULL);
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
@@ -400,10 +416,10 @@ buf_dblwr_init_or_restore_pages(
/* Read the pages from the doublewrite buffer to memory */
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
+ fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block1, 0,
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
buf, NULL);
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
+ fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block2, 0,
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
NULL);
@@ -433,7 +449,7 @@ buf_dblwr_init_or_restore_pages(
+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
}
- fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
+ fil_io(OS_FILE_WRITE, true, 0, 0, source_page_no, 0,
UNIV_PAGE_SIZE, page, NULL);
} else {
@@ -473,7 +489,7 @@ buf_dblwr_init_or_restore_pages(
ulint zip_size = fil_space_get_zip_size(space_id);
/* Read in the actual page from the file */
- fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
+ fil_io(OS_FILE_READ, true, space_id, zip_size,
page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
read_buf, NULL);
@@ -525,7 +541,7 @@ buf_dblwr_init_or_restore_pages(
doublewrite buffer to the intended
position */
- fil_io(OS_FILE_WRITE, TRUE, space_id,
+ fil_io(OS_FILE_WRITE, true, space_id,
zip_size, page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
page, NULL);
@@ -557,6 +573,8 @@ buf_dblwr_free(void)
ut_ad(buf_dblwr->s_reserved == 0);
ut_ad(buf_dblwr->b_reserved == 0);
+ os_event_free(buf_dblwr->b_event);
+ os_event_free(buf_dblwr->s_event);
ut_free(buf_dblwr->write_buf_unaligned);
buf_dblwr->write_buf_unaligned = NULL;
@@ -572,38 +590,68 @@ buf_dblwr_free(void)
}
/********************************************************************//**
-Updates the doublewrite buffer when an IO request that is part of an
-LRU or flush batch is completed. */
+Updates the doublewrite buffer when an IO request is completed. */
UNIV_INTERN
void
-buf_dblwr_update(void)
-/*==================*/
+buf_dblwr_update(
+/*=============*/
+ const buf_page_t* bpage, /*!< in: buffer block descriptor */
+ buf_flush_t flush_type)/*!< in: flush type */
{
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
return;
}
- mutex_enter(&buf_dblwr->mutex);
+ switch (flush_type) {
+ case BUF_FLUSH_LIST:
+ case BUF_FLUSH_LRU:
+ mutex_enter(&buf_dblwr->mutex);
- ut_ad(buf_dblwr->batch_running);
- ut_ad(buf_dblwr->b_reserved > 0);
- ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free);
+ ut_ad(buf_dblwr->batch_running);
+ ut_ad(buf_dblwr->b_reserved > 0);
+ ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free);
- buf_dblwr->b_reserved--;
- if (buf_dblwr->b_reserved == 0) {
+ buf_dblwr->b_reserved--;
+
+ if (buf_dblwr->b_reserved == 0) {
+ mutex_exit(&buf_dblwr->mutex);
+ /* This will finish the batch. Sync data files
+ to the disk. */
+ fil_flush_file_spaces(FIL_TABLESPACE);
+ mutex_enter(&buf_dblwr->mutex);
+
+ /* We can now reuse the doublewrite memory buffer: */
+ buf_dblwr->first_free = 0;
+ buf_dblwr->batch_running = false;
+ os_event_set(buf_dblwr->b_event);
+ }
mutex_exit(&buf_dblwr->mutex);
- /* This will finish the batch. Sync data files
- to the disk. */
- fil_flush_file_spaces(FIL_TABLESPACE);
- mutex_enter(&buf_dblwr->mutex);
+ break;
+ case BUF_FLUSH_SINGLE_PAGE:
+ {
+ const ulint size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+ ulint i;
+ mutex_enter(&buf_dblwr->mutex);
+ for (i = srv_doublewrite_batch_size; i < size; ++i) {
+ if (buf_dblwr->buf_block_arr[i] == bpage) {
+ buf_dblwr->s_reserved--;
+ buf_dblwr->buf_block_arr[i] = NULL;
+ buf_dblwr->in_use[i] = false;
+ break;
+ }
+ }
- /* We can now reuse the doublewrite memory buffer: */
- buf_dblwr->first_free = 0;
- buf_dblwr->batch_running = FALSE;
+ /* The block we are looking for must exist as a
+ reserved block. */
+ ut_a(i < size);
+ }
+ os_event_set(buf_dblwr->s_event);
+ mutex_exit(&buf_dblwr->mutex);
+ break;
+ case BUF_FLUSH_N_TYPES:
+ ut_error;
}
-
- mutex_exit(&buf_dblwr->mutex);
}
/********************************************************************//**
@@ -698,18 +746,19 @@ static
void
buf_dblwr_write_block_to_datafile(
/*==============================*/
- const buf_page_t* bpage) /*!< in: page to write */
+ const buf_page_t* bpage, /*!< in: page to write */
+ bool sync) /*!< in: true if sync IO
+ is requested */
{
ut_a(bpage);
ut_a(buf_page_in_file(bpage));
- /* Increment the counter of I/O operations used
- for selecting LRU policy. */
- buf_LRU_stat_inc_io();
+ const ulint flags = sync
+ ? OS_FILE_WRITE
+ : OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER;
if (bpage->zip.data) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_page_get_space(bpage),
+ fil_io(flags, sync, buf_page_get_space(bpage),
buf_page_get_zip_size(bpage),
buf_page_get_page_no(bpage), 0,
buf_page_get_zip_size(bpage),
@@ -724,8 +773,7 @@ buf_dblwr_write_block_to_datafile(
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_dblwr_check_page_lsn(block->frame);
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_block_get_space(block), 0,
+ fil_io(flags, sync, buf_block_get_space(block), 0,
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
(void*) block->frame, (void*) block);
}
@@ -747,12 +795,12 @@ buf_dblwr_flush_buffered_writes(void)
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
/* Sync the writes to the disk. */
- buf_flush_sync_datafiles();
+ buf_dblwr_sync_datafiles();
return;
}
try_again:
- mutex_enter(&(buf_dblwr->mutex));
+ mutex_enter(&buf_dblwr->mutex);
/* Write first to doublewrite buffer blocks. We use synchronous
aio and thus know that file write has been completed when the
@@ -760,17 +808,18 @@ try_again:
if (buf_dblwr->first_free == 0) {
- mutex_exit(&(buf_dblwr->mutex));
+ mutex_exit(&buf_dblwr->mutex);
return;
}
if (buf_dblwr->batch_running) {
- mutex_exit(&buf_dblwr->mutex);
-
/* Another thread is running the batch right now. Wait
for it to finish. */
- os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+ ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event);
+ mutex_exit(&buf_dblwr->mutex);
+
+ os_event_wait_low(buf_dblwr->b_event, sig_count);
goto try_again;
}
@@ -779,7 +828,7 @@ try_again:
/* Disallow anyone else to post to doublewrite buffer or to
start another batch of flushing. */
- buf_dblwr->batch_running = TRUE;
+ buf_dblwr->batch_running = true;
first_free = buf_dblwr->first_free;
/* Now safe to release the mutex. Note that though no other
@@ -818,7 +867,7 @@ try_again:
len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
buf_dblwr->first_free) * UNIV_PAGE_SIZE;
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block1, 0, len,
(void*) write_buf, NULL);
@@ -834,7 +883,7 @@ try_again:
write_buf = buf_dblwr->write_buf
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block2, 0, len,
(void*) write_buf, NULL);
@@ -864,7 +913,7 @@ flush:
ut_ad(first_free == buf_dblwr->first_free);
for (ulint i = 0; i < first_free; i++) {
buf_dblwr_write_block_to_datafile(
- buf_dblwr->buf_block_arr[i]);
+ buf_dblwr->buf_block_arr[i], false);
}
/* Wake possible simulated aio thread to actually post the
@@ -889,12 +938,11 @@ buf_dblwr_add_to_batch(
ut_a(buf_page_in_file(bpage));
try_again:
- mutex_enter(&(buf_dblwr->mutex));
+ mutex_enter(&buf_dblwr->mutex);
ut_a(buf_dblwr->first_free <= srv_doublewrite_batch_size);
if (buf_dblwr->batch_running) {
- mutex_exit(&buf_dblwr->mutex);
/* This not nearly as bad as it looks. There is only
page_cleaner thread which does background flushing
@@ -902,7 +950,10 @@ try_again:
point. The only exception is when a user thread is
forced to do a flush batch because of a sync
checkpoint. */
- os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+ ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event);
+ mutex_exit(&buf_dblwr->mutex);
+
+ os_event_wait_low(buf_dblwr->b_event, sig_count);
goto try_again;
}
@@ -967,7 +1018,8 @@ UNIV_INTERN
void
buf_dblwr_write_single_page(
/*========================*/
- buf_page_t* bpage) /*!< in: buffer block to write */
+ buf_page_t* bpage, /*!< in: buffer block to write */
+ bool sync) /*!< in: true if sync IO requested */
{
ulint n_slots;
ulint size;
@@ -1004,11 +1056,12 @@ retry:
mutex_enter(&buf_dblwr->mutex);
if (buf_dblwr->s_reserved == n_slots) {
+ /* All slots are reserved. */
+ ib_int64_t sig_count =
+ os_event_reset(buf_dblwr->s_event);
mutex_exit(&buf_dblwr->mutex);
- /* All slots are reserved. Since it involves two IOs
- during the processing a sleep of 10ms should be
- enough. */
- os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+ os_event_wait_low(buf_dblwr->s_event, sig_count);
+
goto retry;
}
@@ -1021,9 +1074,14 @@ retry:
/* We are guaranteed to find a slot. */
ut_a(i < size);
- buf_dblwr->in_use[i] = TRUE;
+ buf_dblwr->in_use[i] = true;
buf_dblwr->s_reserved++;
buf_dblwr->buf_block_arr[i] = bpage;
+
+ /* increment the doublewrite flushed pages counter */
+ srv_stats.dblwr_pages_written.inc();
+ srv_stats.dblwr_writes.inc();
+
mutex_exit(&buf_dblwr->mutex);
/* Lets see if we are going to write in the first or second
@@ -1053,14 +1111,14 @@ retry:
memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i
+ zip_size, 0, UNIV_PAGE_SIZE - zip_size);
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) (buf_dblwr->write_buf
+ UNIV_PAGE_SIZE * i), NULL);
} else {
/* It is a regular page. Write it directly to the
doublewrite buffer */
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) ((buf_block_t*) bpage)->frame,
NULL);
@@ -1072,22 +1130,6 @@ retry:
/* We know that the write has been flushed to disk now
and during recovery we will find it in the doublewrite buffer
blocks. Next do the write to the intended position. */
- buf_dblwr_write_block_to_datafile(bpage);
-
- /* Sync the writes to the disk. */
- buf_flush_sync_datafiles();
-
- mutex_enter(&buf_dblwr->mutex);
-
- buf_dblwr->s_reserved--;
- buf_dblwr->buf_block_arr[i] = NULL;
- buf_dblwr->in_use[i] = FALSE;
-
- /* increment the doublewrite flushed pages counter */
- srv_stats.dblwr_pages_written.inc();
- srv_stats.dblwr_writes.inc();
-
- mutex_exit(&(buf_dblwr->mutex));
-
+ buf_dblwr_write_block_to_datafile(bpage, sync);
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 542c1669667..3af434b77f4 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -130,6 +130,60 @@ buf_flush_validate_skip(
}
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+/*******************************************************************//**
+Sets hazard pointer during flush_list iteration. */
+UNIV_INLINE
+void
+buf_flush_set_hp(
+/*=============*/
+ buf_pool_t* buf_pool,/*!< in/out: buffer pool instance */
+ const buf_page_t* bpage) /*!< in: buffer control block */
+{
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
+ ut_ad(buf_pool->flush_list_hp == NULL || bpage == NULL);
+ ut_ad(!bpage || buf_page_in_file(bpage));
+ ut_ad(!bpage || bpage->in_flush_list);
+ ut_ad(!bpage || buf_pool_from_bpage(bpage) == buf_pool);
+
+ buf_pool->flush_list_hp = bpage;
+}
+
+/*******************************************************************//**
+Checks if the given block is a hazard pointer
+@return true if bpage is hazard pointer */
+UNIV_INLINE
+bool
+buf_flush_is_hp(
+/*============*/
+ buf_pool_t* buf_pool,/*!< in: buffer pool instance */
+ const buf_page_t* bpage) /*!< in: buffer control block */
+{
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
+
+ return(buf_pool->flush_list_hp == bpage);
+}
+
+/*******************************************************************//**
+Whenever we move a block in flush_list (either to remove it or to
+relocate it) we check the hazard pointer set by some other thread
+doing the flush list scan. If the hazard pointer is the same as the
+one we are about going to move then we set it to NULL to force a rescan
+in the thread doing the batch. */
+UNIV_INLINE
+void
+buf_flush_update_hp(
+/*================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_page_t* bpage) /*!< in: buffer control block */
+{
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
+
+ if (buf_flush_is_hp(buf_pool, bpage)) {
+ buf_flush_set_hp(buf_pool, NULL);
+ MONITOR_INC(MONITOR_FLUSH_HP_RESCAN);
+ }
+}
+
/******************************************************************//**
Insert a block in the flush_rbt and returns a pointer to its
predecessor or NULL if no predecessor. The ordering is maintained
@@ -471,34 +525,35 @@ buf_flush_ready_for_replace(
}
/********************************************************************//**
-Returns TRUE if the block is modified and ready for flushing.
-@return TRUE if can flush immediately */
-UNIV_INLINE
-ibool
+Returns true if the block is modified and ready for flushing.
+@return true if can flush immediately */
+UNIV_INTERN
+bool
buf_flush_ready_for_flush(
/*======================*/
buf_page_t* bpage, /*!< in: buffer control block, must be
buf_page_in_file(bpage) */
- enum buf_flush flush_type)/*!< in: type of flush */
+ buf_flush_t flush_type)/*!< in: type of flush */
{
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
+
ut_a(buf_page_in_file(bpage));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(flush_type < BUF_FLUSH_N_TYPES);
if (bpage->oldest_modification == 0
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
- return(FALSE);
+ return(false);
}
ut_ad(bpage->in_flush_list);
switch (flush_type) {
case BUF_FLUSH_LIST:
- return(TRUE);
+ return(true);
case BUF_FLUSH_LRU:
case BUF_FLUSH_SINGLE_PAGE:
@@ -514,7 +569,7 @@ buf_flush_ready_for_flush(
}
ut_error;
- return(FALSE);
+ return(false);
}
/********************************************************************//**
@@ -535,9 +590,9 @@ buf_flush_remove(
buf_flush_list_mutex_enter(buf_pool);
switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
/* Clean compressed pages should not be on the flush list */
- case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
@@ -574,6 +629,7 @@ buf_flush_remove(
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+ buf_flush_update_hp(buf_pool, bpage);
buf_flush_list_mutex_exit(buf_pool);
}
@@ -652,6 +708,7 @@ buf_flush_relocate_on_flush_list(
ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+ buf_flush_update_hp(buf_pool, bpage);
buf_flush_list_mutex_exit(buf_pool);
}
@@ -663,7 +720,7 @@ buf_flush_write_complete(
/*=====================*/
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
- enum buf_flush flush_type;
+ buf_flush_t flush_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(bpage);
@@ -684,18 +741,7 @@ buf_flush_write_complete(
os_event_set(buf_pool->no_flush[flush_type]);
}
- switch (flush_type) {
- case BUF_FLUSH_LIST:
- case BUF_FLUSH_LRU:
- buf_dblwr_update();
- break;
- case BUF_FLUSH_SINGLE_PAGE:
- /* Single page flushes are synchronous. No need
- to update doublewrite */
- break;
- case BUF_FLUSH_N_TYPES:
- ut_error;
- }
+ buf_dblwr_update(bpage, flush_type);
}
#endif /* !UNIV_HOTBACKUP */
@@ -827,28 +873,6 @@ buf_flush_init_for_writing(
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
-Flush a batch of writes to the datafiles that have already been
-written by the OS. */
-UNIV_INTERN
-void
-buf_flush_sync_datafiles(void)
-/*==========================*/
-{
- /* Wake possible simulated aio thread to actually post the
- writes to the operating system */
- os_aio_simulated_wake_handler_threads();
-
- /* Wait that all async writes to tablespaces have been posted to
- the OS */
- os_aio_wait_until_no_pending_writes();
-
- /* Now we flush the data to disk (for example, with fsync) */
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- return;
-}
-
-/********************************************************************//**
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
also when the doublewrite buffer is used, we must call
buf_dblwr_flush_buffered_writes after we have posted a batch of
@@ -858,7 +882,8 @@ void
buf_flush_write_block_low(
/*======================*/
buf_page_t* bpage, /*!< in: buffer block to write */
- enum buf_flush flush_type) /*!< in: type of flush */
+ buf_flush_t flush_type, /*!< in: type of flush */
+ bool sync) /*!< in: true if sync IO request */
{
ulint zip_size = buf_page_get_zip_size(bpage);
page_t* frame = NULL;
@@ -903,7 +928,7 @@ buf_flush_write_block_low(
log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
#endif
switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
@@ -935,15 +960,29 @@ buf_flush_write_block_low(
if (!srv_use_doublewrite_buf || !buf_dblwr) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_page_get_space(bpage), zip_size,
+ sync, buf_page_get_space(bpage), zip_size,
buf_page_get_page_no(bpage), 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
frame, bpage);
} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
- buf_dblwr_write_single_page(bpage);
+ buf_dblwr_write_single_page(bpage, sync);
} else {
+ ut_ad(!sync);
buf_dblwr_add_to_batch(bpage);
}
+
+ /* When doing single page flushing the IO is done synchronously
+ and we flush the changes to disk only for the tablespace we
+ are working on. */
+ if (sync) {
+ ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
+ fil_flush(buf_page_get_space(bpage));
+ buf_page_io_complete(bpage);
+ }
+
+ /* Increment the counter of I/O operations used
+ for selecting LRU policy. */
+ buf_LRU_stat_inc_io();
}
/********************************************************************//**
@@ -959,7 +998,8 @@ buf_flush_page(
/*===========*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_page_t* bpage, /*!< in: buffer control block */
- buf_flush flush_type) /*!< in: type of flush */
+ buf_flush_t flush_type, /*!< in: type of flush */
+ bool sync) /*!< in: true if sync IO request */
{
ib_mutex_t* block_mutex;
ibool is_uncompressed;
@@ -967,6 +1007,7 @@ buf_flush_page(
ut_ad(flush_type < BUF_FLUSH_N_TYPES);
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_in_file(bpage));
+ ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
block_mutex = buf_page_get_mutex(bpage);
ut_ad(mutex_own(block_mutex));
@@ -1062,7 +1103,7 @@ buf_flush_page(
flush_type, bpage->space, bpage->offset);
}
#endif /* UNIV_DEBUG */
- buf_flush_write_block_low(bpage, flush_type);
+ buf_flush_write_block_low(bpage, flush_type, sync);
}
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
@@ -1089,8 +1130,7 @@ buf_flush_page_try(
/* The following call will release the buffer pool and
block mutex. */
- buf_flush_page(buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE);
- buf_flush_sync_datafiles();
+ buf_flush_page(buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true);
return(TRUE);
}
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
@@ -1103,7 +1143,7 @@ buf_flush_check_neighbor(
/*=====================*/
ulint space, /*!< in: space id */
ulint offset, /*!< in: page offset */
- enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
+ buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST */
{
buf_page_t* bpage;
@@ -1153,7 +1193,7 @@ buf_flush_try_neighbors(
/*====================*/
ulint space, /*!< in: space id */
ulint offset, /*!< in: page offset */
- enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
+ buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST */
ulint n_flushed, /*!< in: number of pages
flushed so far in this batch */
@@ -1274,7 +1314,7 @@ buf_flush_try_neighbors(
doublewrite buffer before we start
waiting. */
- buf_flush_page(buf_pool, bpage, flush_type);
+ buf_flush_page(buf_pool, bpage, flush_type, false);
ut_ad(!mutex_own(block_mutex));
ut_ad(!buf_pool_mutex_own(buf_pool));
count++;
@@ -1311,7 +1351,7 @@ buf_flush_page_and_try_neighbors(
buf_page_t* bpage, /*!< in: buffer control block,
must be
buf_page_in_file(bpage) */
- enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU
+ buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
ulint n_to_flush, /*!< in: number of pages to
flush */
@@ -1396,7 +1436,7 @@ buf_free_from_unzip_LRU_list_batch(
&& lru_len > UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
++scanned;
- if (buf_LRU_free_block(&block->page, FALSE)) {
+ if (buf_LRU_free_page(&block->page, false)) {
/* Block was freed. buf_pool->mutex potentially
released and reacquired */
++count;
@@ -1473,7 +1513,7 @@ buf_flush_LRU_list_batch(
of the flushed pages then the scan becomes
O(n*n). */
if (evict) {
- if (buf_LRU_free_block(bpage, TRUE)) {
+ if (buf_LRU_free_page(bpage, true)) {
/* buf_pool->mutex was potentially
released and reacquired. */
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
@@ -1561,82 +1601,62 @@ buf_do_flush_list_batch(
their number does not exceed
min_n) */
{
- ulint len;
- buf_page_t* bpage;
ulint count = 0;
ulint scanned = 0;
ut_ad(buf_pool_mutex_own(buf_pool));
- /* If we have flushed enough, leave the loop */
- do {
- /* Start from the end of the list looking for a suitable
- block to be flushed. */
-
- buf_flush_list_mutex_enter(buf_pool);
-
- /* We use len here because theoretically insertions can
- happen in the flush_list below while we are traversing
- it for a suitable candidate for flushing. We'd like to
- set a limit on how farther we are willing to traverse
- the list. */
- len = UT_LIST_GET_LEN(buf_pool->flush_list);
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ /* Start from the end of the list looking for a suitable
+ block to be flushed. */
+ buf_flush_list_mutex_enter(buf_pool);
+ ulint len = UT_LIST_GET_LEN(buf_pool->flush_list);
- if (bpage) {
- ut_a(bpage->oldest_modification > 0);
- }
+ /* In order not to degenerate this scan to O(n*n) we attempt
+ to preserve pointer of previous block in the flush list. To do
+ so we declare it a hazard pointer. Any thread working on the
+ flush list must check the hazard pointer and if it is removing
+ the same block then it must reset it. */
+ for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ count < min_n && bpage != NULL && len > 0
+ && bpage->oldest_modification < lsn_limit;
+ ++scanned) {
- if (!bpage || bpage->oldest_modification >= lsn_limit) {
-
- /* We have flushed enough */
- buf_flush_list_mutex_exit(buf_pool);
- break;
- }
+ buf_page_t* prev;
ut_a(bpage->oldest_modification > 0);
-
ut_ad(bpage->in_flush_list);
- buf_flush_list_mutex_exit(buf_pool);
+ prev = UT_LIST_GET_PREV(list, bpage);
+ buf_flush_set_hp(buf_pool, prev);
- /* The list may change during the flushing and we cannot
- safely preserve within this function a pointer to a
- block in the list! */
- while (bpage != NULL
- && len > 0
- && !buf_flush_page_and_try_neighbors(
- bpage, BUF_FLUSH_LIST, min_n, &count)) {
-
- ++scanned;
- buf_flush_list_mutex_enter(buf_pool);
-
- /* If we are here that means that buf_pool->mutex
- was not released in buf_flush_page_and_try_neighbors()
- above and this guarantees that bpage didn't get
- relocated since we released the flush_list
- mutex above. There is a chance, however, that
- the bpage got removed from flush_list (not
- currently possible because flush_list_remove()
- also obtains buf_pool mutex but that may change
- in future). To avoid this scenario we check
- the oldest_modification and if it is zero
- we start all over again. */
- if (bpage->oldest_modification == 0) {
- buf_flush_list_mutex_exit(buf_pool);
- break;
- }
+ buf_flush_list_mutex_exit(buf_pool);
- bpage = UT_LIST_GET_PREV(list, bpage);
+#ifdef UNIV_DEBUG
+ bool flushed =
+#endif /* UNIV_DEBUG */
+ buf_flush_page_and_try_neighbors(
+ bpage, BUF_FLUSH_LIST, min_n, &count);
- ut_ad(!bpage || bpage->in_flush_list);
+ buf_flush_list_mutex_enter(buf_pool);
- buf_flush_list_mutex_exit(buf_pool);
+ ut_ad(flushed || buf_flush_is_hp(buf_pool, prev));
+ if (!buf_flush_is_hp(buf_pool, prev)) {
+ /* The hazard pointer was reset by some other
+ thread. Restart the scan. */
+ ut_ad(buf_flush_is_hp(buf_pool, NULL));
+ bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ len = UT_LIST_GET_LEN(buf_pool->flush_list);
+ } else {
+ bpage = prev;
--len;
+ buf_flush_set_hp(buf_pool, NULL);
}
- } while (count < min_n && bpage != NULL && len > 0);
+ ut_ad(!bpage || bpage->in_flush_list);
+ }
+
+ buf_flush_list_mutex_exit(buf_pool);
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED,
MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
@@ -1660,7 +1680,7 @@ ulint
buf_flush_batch(
/*============*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
+ buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
then the caller must not own any
latches on pages */
@@ -1716,7 +1736,7 @@ static
void
buf_flush_common(
/*=============*/
- enum buf_flush flush_type, /*!< in: type of flush */
+ buf_flush_t flush_type, /*!< in: type of flush */
ulint page_count) /*!< in: number of pages flushed */
{
buf_dblwr_flush_buffered_writes();
@@ -1742,7 +1762,7 @@ ibool
buf_flush_start(
/*============*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
- enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
+ buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
{
buf_pool_mutex_enter(buf_pool);
@@ -1771,7 +1791,7 @@ void
buf_flush_end(
/*==========*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
- enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
+ buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
{
buf_pool_mutex_enter(buf_pool);
@@ -1797,7 +1817,7 @@ void
buf_flush_wait_batch_end(
/*=====================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
- enum buf_flush type) /*!< in: BUF_FLUSH_LRU
+ buf_flush_t type) /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
{
ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
@@ -1967,7 +1987,7 @@ buf_flush_single_page_from_LRU(
buf_page_t* bpage;
ib_mutex_t* block_mutex;
ibool freed;
- ibool evict_zip;
+ bool evict_zip;
buf_pool_mutex_enter(buf_pool);
@@ -2000,9 +2020,7 @@ buf_flush_single_page_from_LRU(
/* The following call will release the buffer pool and
block mutex. */
- buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE);
-
- buf_flush_sync_datafiles();
+ buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true);
/* At this point the page has been written to the disk.
As we are not holding buffer pool or block mutex therefore
@@ -2037,7 +2055,7 @@ buf_flush_single_page_from_LRU(
evict_zip = !buf_LRU_evict_from_unzip_LRU(buf_pool);;
- freed = buf_LRU_free_block(bpage, evict_zip);
+ freed = buf_LRU_free_page(bpage, evict_zip);
buf_pool_mutex_exit(buf_pool);
return(freed);
@@ -2060,12 +2078,21 @@ buf_flush_LRU_tail(void)
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i);
+ ulint scan_depth;
+
+ /* srv_LRU_scan_depth can be arbitrarily large value.
+ We cap it with current LRU size. */
+ buf_pool_mutex_enter(buf_pool);
+ scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
+ buf_pool_mutex_exit(buf_pool);
+
+ scan_depth = ut_min(srv_LRU_scan_depth, scan_depth);
/* We divide LRU flush into smaller chunks because
there may be user threads waiting for the flush to
end in buf_LRU_get_free_block(). */
for (ulint j = 0;
- j < srv_LRU_scan_depth;
+ j < scan_depth;
j += PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE) {
ulint n_flushed = 0;
@@ -2074,11 +2101,22 @@ buf_flush_LRU_tail(void)
that can trigger an LRU flush. It is possible
that a batch triggered during last iteration is
still running, */
- buf_flush_LRU(buf_pool,
- PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE,
- &n_flushed);
+ if (buf_flush_LRU(buf_pool,
+ PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE,
+ &n_flushed)) {
+
+ /* Allowed only one batch per
+ buffer pool instance. */
+ buf_flush_wait_batch_end(
+ buf_pool, BUF_FLUSH_LRU);
+ }
- total_flushed += n_flushed;
+ if (n_flushed) {
+ total_flushed += n_flushed;
+ } else {
+ /* Nothing to flush */
+ break;
+ }
}
}
@@ -2272,9 +2310,9 @@ page_cleaner_flush_pages_if_needed(void)
oldest_lsn = buf_pool_get_oldest_modification();
- ut_ad(oldest_lsn <= cur_lsn);
+ ut_ad(oldest_lsn <= log_get_lsn());
- age = cur_lsn - oldest_lsn;
+ age = cur_lsn > oldest_lsn ? cur_lsn - oldest_lsn : 0;
pct_for_dirty = af_get_pct_for_dirty();
pct_for_lsn = af_get_pct_for_lsn(age);
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 270263d95f1..bc73119c227 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -137,19 +137,19 @@ The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex
and the appropriate hash_lock. This function will release the
buf_page_get_mutex() and the hash_lock.
-If a compressed page or a compressed-only block descriptor is freed,
-other compressed pages or compressed-only block descriptors may be
-relocated.
-@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state
-was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */
-static
-enum buf_page_state
-buf_LRU_block_remove_hashed_page(
-/*=============================*/
+If a compressed page is freed other compressed pages may be relocated.
+@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
+caller needs to free the page to the free list
+@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
+this case the block is already returned to the buddy allocator. */
+static __attribute__((nonnull, warn_unused_result))
+bool
+buf_LRU_block_remove_hashed(
+/*========================*/
buf_page_t* bpage, /*!< in: block, must contain a file page and
be in a state where it can be freed; there
may or may not be a hash index to the page */
- ibool zip); /*!< in: TRUE if should remove also the
+ bool zip); /*!< in: true if should remove also the
compressed page of an uncompressed page */
/******************************************************************//**
Puts a file page whose has no hash index to the free list. */
@@ -460,14 +460,9 @@ buf_flush_or_remove_page(
don't remove else remove without
flushing to disk */
{
- ib_mutex_t* block_mutex;
- bool processed = false;
-
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_flush_list_mutex_own(buf_pool));
- block_mutex = buf_page_get_mutex(bpage);
-
/* bpage->space and bpage->io_fix are protected by
buf_pool->mutex and block_mutex. It is safe to check
them while holding buf_pool->mutex only. */
@@ -477,63 +472,60 @@ buf_flush_or_remove_page(
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
+ return(false);
- } else {
-
- /* We have to release the flush_list_mutex to obey the
- latching order. We are however guaranteed that the page
- will stay in the flush_list because buf_flush_remove()
- needs buf_pool->mutex as well (for the non-flush case). */
-
- buf_flush_list_mutex_exit(buf_pool);
-
- mutex_enter(block_mutex);
-
- ut_ad(bpage->oldest_modification != 0);
+ }
- if (bpage->buf_fix_count > 0) {
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ bool processed = false;
- mutex_exit(block_mutex);
+ /* We have to release the flush_list_mutex to obey the
+ latching order. We are however guaranteed that the page
+ will stay in the flush_list and won't be relocated because
+ buf_flush_remove() and buf_flush_relocate_on_flush_list()
+ need buf_pool->mutex as well. */
- /* We cannot remove this page yet;
- maybe the system is currently reading
- it in, or flushing the modifications
- to the file */
+ buf_flush_list_mutex_exit(buf_pool);
- } else if (!flush) {
+ mutex_enter(block_mutex);
- buf_flush_remove(bpage);
+ ut_ad(bpage->oldest_modification != 0);
- mutex_exit(block_mutex);
+ if (!flush) {
- processed = true;
+ buf_flush_remove(bpage);
- } else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ mutex_exit(block_mutex);
- /* Check the status again after releasing the flush
- list mutex and acquiring the block mutex. The background
- flush thread may be in the process of flushing this
- page when we released the flush list mutex. */
+ processed = true;
- /* The following call will release the buffer pool
- and block mutex. */
- buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE);
+ } else if (buf_flush_ready_for_flush(bpage,
+ BUF_FLUSH_SINGLE_PAGE)) {
- /* Wake possible simulated aio thread to actually
- post the writes to the operating system */
- os_aio_simulated_wake_handler_threads();
+ /* The following call will release the buffer pool
+ and block mutex. */
+ buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false);
+ ut_ad(!mutex_own(block_mutex));
- buf_pool_mutex_enter(buf_pool);
+ /* Wake possible simulated aio thread to actually
+ post the writes to the operating system */
+ os_aio_simulated_wake_handler_threads();
- processed = true;
- } else {
- mutex_exit(block_mutex);
- }
+ buf_pool_mutex_enter(buf_pool);
- buf_flush_list_mutex_enter(buf_pool);
+ processed = true;
+ } else {
+ /* Not ready for flush. It can't be IO fixed because we
+ checked for that at the start of the function. It must
+ be buffer fixed. */
+ ut_ad(bpage->buf_fix_count > 0);
+ mutex_exit(block_mutex);
}
+ buf_flush_list_mutex_enter(buf_pool);
+
ut_ad(!mutex_own(block_mutex));
+ ut_ad(buf_pool_mutex_own(buf_pool));
return(processed);
}
@@ -562,10 +554,12 @@ buf_flush_or_remove_pages(
buf_page_t* prev;
buf_page_t* bpage;
ulint processed = 0;
- bool all_freed = true;
buf_flush_list_mutex_enter(buf_pool);
+rescan:
+ bool all_freed = true;
+
for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
bpage != NULL;
bpage = prev) {
@@ -585,9 +579,33 @@ buf_flush_or_remove_pages(
} else if (!buf_flush_or_remove_page(buf_pool, bpage, flush)) {
/* Remove was unsuccessful, we have to try again
- by scanning the entire list from the end. */
+ by scanning the entire list from the end.
+ This also means that we never released the
+ buf_pool mutex. Therefore we can trust the prev
+ pointer.
+ buf_flush_or_remove_page() released the
+ flush list mutex but not the buf_pool mutex.
+ Therefore it is possible that a new page was
+ added to the flush list. For example, in case
+ where we are at the head of the flush list and
+ prev == NULL. That is OK because we have the
+ tablespace quiesced and no new pages for this
+ space-id should enter flush_list. This is
+ because the only callers of this function are
+ DROP TABLE and FLUSH TABLE FOR EXPORT.
+ We know that we'll have to do at least one more
+ scan but we don't break out of loop here and
+ try to do as much work as we can in this
+ iteration. */
all_freed = false;
+ } else if (flush) {
+
+ /* The processing was successful. And during the
+ processing we have released the buf_pool mutex
+ when calling buf_page_flush(). We cannot trust
+ prev pointer. */
+ goto rescan;
}
++processed;
@@ -649,7 +667,7 @@ buf_flush_dirty_pages(
ut_ad(buf_flush_validate(buf_pool));
if (err == DB_FAIL) {
- os_thread_sleep(20000);
+ os_thread_sleep(2000);
}
/* DB_FAIL is a soft error, it means that the task wasn't
@@ -658,6 +676,9 @@ buf_flush_dirty_pages(
ut_ad(buf_flush_validate(buf_pool));
} while (err == DB_FAIL);
+
+ ut_ad(err == DB_INTERRUPTED
+ || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
}
/******************************************************************//**
@@ -778,22 +799,16 @@ scan_again:
/* Remove from the LRU list. */
- if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
- != BUF_BLOCK_ZIP_FREE) {
-
+ if (buf_LRU_block_remove_hashed(bpage, true)) {
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
-
} else {
- /* The block_mutex should have been released
- by buf_LRU_block_remove_hashed_page() when it
- returns BUF_BLOCK_ZIP_FREE. */
ut_ad(block_mutex == &buf_pool->zip_mutex);
}
ut_ad(!mutex_own(block_mutex));
#ifdef UNIV_SYNC_DEBUG
- /* buf_LRU_block_remove_hashed_page() releases the hash_lock */
+ /* buf_LRU_block_remove_hashed() releases the hash_lock */
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
@@ -835,15 +850,11 @@ buf_LRU_remove_pages(
case BUF_REMOVE_FLUSH_NO_WRITE:
ut_a(trx == 0);
buf_flush_dirty_pages(buf_pool, id, false, NULL);
- ut_ad(trx_is_interrupted(trx)
- || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
break;
case BUF_REMOVE_FLUSH_WRITE:
ut_a(trx != 0);
buf_flush_dirty_pages(buf_pool, id, true, trx);
- ut_ad(trx_is_interrupted(trx)
- || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
fil_flush(id);
@@ -880,10 +891,14 @@ buf_LRU_flush_or_remove_pages(
switch (buf_remove) {
case BUF_REMOVE_ALL_NO_WRITE:
- case BUF_REMOVE_FLUSH_NO_WRITE:
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
break;
+ case BUF_REMOVE_FLUSH_NO_WRITE:
+ /* It is a DROP TABLE for a single table
+ tablespace. No AHI entries exist because
+ we already dealt with them when freeing up
+ extents. */
case BUF_REMOVE_FLUSH_WRITE:
/* We allow read-only queries against the
table, there is no need to drop the AHI entries. */
@@ -892,13 +907,6 @@ buf_LRU_flush_or_remove_pages(
buf_LRU_remove_pages(buf_pool, id, buf_remove, trx);
}
-
-#ifdef UNIV_DEBUG
- if (trx != 0 && id != 0) {
- ut_ad(trx_is_interrupted(trx)
- || buf_flush_get_dirty_pages_count(id) == 0);
- }
-#endif /* UNIV_DEBUG */
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -972,7 +980,7 @@ buf_LRU_free_from_unzip_LRU_list(
ut_ad(block->in_unzip_LRU_list);
ut_ad(block->page.in_LRU_list);
- freed = buf_LRU_free_block(&block->page, FALSE);
+ freed = buf_LRU_free_page(&block->page, false);
block = prev_block;
}
@@ -1017,7 +1025,7 @@ buf_LRU_free_from_common_LRU_list(
ut_ad(bpage->in_LRU_list);
accessed = buf_page_is_accessed(bpage);
- freed = buf_LRU_free_block(bpage, TRUE);
+ freed = buf_LRU_free_page(bpage, true);
if (freed && !accessed) {
/* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of
@@ -1788,24 +1796,23 @@ buf_LRU_make_block_old(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
-NOTE: If this function returns TRUE, it will temporarily
+NOTE: If this function returns true, it will temporarily
release buf_pool->mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool->mutex and must not hold any
buf_page_get_mutex() when calling this function.
-@return TRUE if freed, FALSE otherwise. */
+@return true if freed, false otherwise. */
UNIV_INTERN
-ibool
-buf_LRU_free_block(
+bool
+buf_LRU_free_page(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
- ibool zip) /*!< in: TRUE if should remove also the
+ bool zip) /*!< in: true if should remove also the
compressed page of an uncompressed page */
{
buf_page_t* b = NULL;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- enum buf_page_state page_state;
const ulint fold = buf_page_address_fold(bpage->space,
bpage->offset);
rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
@@ -1853,7 +1860,7 @@ buf_LRU_free_block(
func_exit:
rw_lock_x_unlock(hash_lock);
mutex_exit(block_mutex);
- return(FALSE);
+ return(false);
} else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
b = buf_page_alloc_descriptor();
@@ -1885,20 +1892,16 @@ func_exit:
#endif /* UNIV_SYNC_DEBUG */
ut_ad(buf_page_can_relocate(bpage));
- page_state = buf_LRU_block_remove_hashed_page(bpage, zip);
+ if (!buf_LRU_block_remove_hashed(bpage, zip)) {
+ return(true);
+ }
#ifdef UNIV_SYNC_DEBUG
- /* buf_LRU_block_remove_hashed_page() releases the hash_lock */
+ /* buf_LRU_block_remove_hashed() releases the hash_lock */
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
&& !rw_lock_own(hash_lock, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
- if (page_state == BUF_BLOCK_ZIP_FREE) {
- return(TRUE);
- }
-
- ut_ad(page_state == BUF_BLOCK_REMOVE_HASH);
-
/* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
then it was a compressed page with an uncompressed frame and
we are interested in freeing only the uncompressed frame.
@@ -1926,7 +1929,7 @@ func_exit:
/* The fields in_page_hash and in_LRU_list of
the to-be-freed block descriptor should have
been cleared in
- buf_LRU_block_remove_hashed_page(), which
+ buf_LRU_block_remove_hashed(), which
invokes buf_LRU_remove_block(). */
ut_ad(!bpage->in_page_hash);
ut_ad(!bpage->in_LRU_list);
@@ -1935,7 +1938,7 @@ func_exit:
ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
/* The fields of bpage were copied to b before
- buf_LRU_block_remove_hashed_page() was invoked. */
+ buf_LRU_block_remove_hashed() was invoked. */
ut_ad(!b->in_zip_hash);
ut_ad(b->in_page_hash);
ut_ad(b->in_LRU_list);
@@ -2037,7 +2040,7 @@ func_exit:
/* Remove possible adaptive hash index on the page.
The page was declared uninitialized by
- buf_LRU_block_remove_hashed_page(). We need to flag
+ buf_LRU_block_remove_hashed(). We need to flag
the contents of the page valid (which it still is) in
order to avoid bogus Valgrind warnings.*/
@@ -2073,7 +2076,7 @@ func_exit:
mutex_exit(block_mutex);
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
- return(TRUE);
+ return(true);
}
/******************************************************************//**
@@ -2147,19 +2150,19 @@ The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex
and the appropriate hash_lock. This function will release the
buf_page_get_mutex() and the hash_lock.
-If a compressed page or a compressed-only block descriptor is freed,
-other compressed pages or compressed-only block descriptors may be
-relocated.
-@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state
-was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */
+If a compressed page is freed other compressed pages may be relocated.
+@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
+caller needs to free the page to the free list
+@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
+this case the block is already returned to the buddy allocator. */
static
-enum buf_page_state
-buf_LRU_block_remove_hashed_page(
-/*=============================*/
+bool
+buf_LRU_block_remove_hashed(
+/*========================*/
buf_page_t* bpage, /*!< in: block, must contain a file page and
be in a state where it can be freed; there
may or may not be a hash index to the page */
- ibool zip) /*!< in: TRUE if should remove also the
+ bool zip) /*!< in: true if should remove also the
compressed page of an uncompressed page */
{
ulint fold;
@@ -2252,7 +2255,7 @@ buf_LRU_block_remove_hashed_page(
UNIV_MEM_ASSERT_W(bpage->zip.data,
page_zip_get_size(&bpage->zip));
break;
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_DIRTY:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
@@ -2319,7 +2322,7 @@ buf_LRU_block_remove_hashed_page(
buf_pool_mutex_exit_allow(buf_pool);
buf_page_free_descriptor(bpage);
- return(BUF_BLOCK_ZIP_FREE);
+ return(false);
case BUF_BLOCK_FILE_PAGE:
memset(((buf_block_t*) bpage)->frame
@@ -2370,9 +2373,9 @@ buf_LRU_block_remove_hashed_page(
page_zip_set_size(&bpage->zip, 0);
}
- return(BUF_BLOCK_REMOVE_HASH);
+ return(true);
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_DIRTY:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
@@ -2382,7 +2385,7 @@ buf_LRU_block_remove_hashed_page(
}
ut_error;
- return(BUF_BLOCK_ZIP_FREE);
+ return(false);
}
/******************************************************************//**
@@ -2427,12 +2430,11 @@ buf_LRU_free_one_page(
rw_lock_x_lock(hash_lock);
mutex_enter(block_mutex);
- if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
- != BUF_BLOCK_ZIP_FREE) {
+ if (buf_LRU_block_remove_hashed(bpage, true)) {
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
}
- /* buf_LRU_block_remove_hashed_page() releases hash_lock and block_mutex */
+ /* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
&& !rw_lock_own(hash_lock, RW_LOCK_SHARED));
@@ -2606,7 +2608,7 @@ buf_LRU_validate_instance(
bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 3a579e251ff..7c8369c0c09 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -107,7 +107,7 @@ buf_read_page_low(
dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
trying to read from a non-existent tablespace, or a
tablespace which is just now being dropped */
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ bool sync, /*!< in: true if synchronous aio is desired */
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
at read-ahead functions) */
@@ -152,7 +152,7 @@ buf_read_page_low(
syncronous i/o, to make sure they do not get involved in
thread deadlocks. */
- sync = TRUE;
+ sync = true;
}
/* The following call will also check if the tablespace does not exist
@@ -169,15 +169,17 @@ buf_read_page_low(
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
- "Posting read request for page %lu, sync %lu\n",
- (ulong) offset,
- (ulong) sync);
+ "Posting read request for page %lu, sync %s\n",
+ (ulong) offset, sync ? "true" : "false");
}
#endif
ut_ad(buf_page_in_file(bpage));
- thd_wait_begin(NULL, THD_WAIT_DISKIO);
+ if (sync) {
+ thd_wait_begin(NULL, THD_WAIT_DISKIO);
+ }
+
if (zip_size) {
*err = fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
@@ -191,7 +193,10 @@ buf_read_page_low(
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
((buf_block_t*) bpage)->frame, bpage);
}
- thd_wait_end(NULL);
+
+ if (sync) {
+ thd_wait_end(NULL);
+ }
if (*err != DB_SUCCESS) {
if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) {
@@ -337,7 +342,7 @@ read_ahead:
if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
- &err, FALSE,
+ &err, false,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, FALSE,
tablespace_version, i);
@@ -401,7 +406,7 @@ buf_read_page(
/* We do the i/o in the synchronous aio mode to save thread
switches: hence TRUE */
- count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+ count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
tablespace_version, offset);
srv_stats.buf_pool_reads.add(count);
@@ -447,7 +452,7 @@ buf_read_page_async(
tablespace_version = fil_space_get_version(space);
- count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE
+ count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE
| OS_AIO_SIMULATED_WAKE_LATER
| BUF_READ_IGNORE_NONEXISTENT_PAGES,
space, zip_size, FALSE,
@@ -708,7 +713,7 @@ buf_read_ahead_linear(
if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
- &err, FALSE,
+ &err, false,
ibuf_mode,
space, zip_size, FALSE, tablespace_version, i);
if (err == DB_TABLESPACE_DELETED) {
@@ -754,7 +759,7 @@ UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
- ibool sync, /*!< in: TRUE if the caller
+ bool sync, /*!< in: true if the caller
wants this function to wait
for the highest address page
to get read in, before this
@@ -893,11 +898,11 @@ buf_read_recv_pages(
os_aio_print_debug = FALSE;
if ((i + 1 == n_stored) && sync) {
- buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+ buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
zip_size, TRUE, tablespace_version,
page_nos[i]);
} else {
- buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
+ buf_read_page_low(&err, false, BUF_READ_ANY_PAGE
| OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, TRUE,
tablespace_version, page_nos[i]);
diff --git a/storage/innobase/compile-innodb b/storage/innobase/compile-innodb
index 6b0b2df66da..77d2244f2bb 100644
--- a/storage/innobase/compile-innodb
+++ b/storage/innobase/compile-innodb
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index eea10759fcd..1a1dd29a202 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -32,7 +32,6 @@ Created 4/18/1996 Heikki Tuuri
#include "dict0crea.h"
#include "btr0btr.h"
#include "dict0load.h"
-#include "dict0load.h"
#include "trx0trx.h"
#include "srv0srv.h"
#include "ibuf0ibuf.h"
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index 864150b324a..c7cb3aa21bb 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -44,6 +44,7 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0vec.h"
#include "dict0priv.h"
#include "fts0priv.h"
+#include "ha_prototypes.h"
/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
@@ -891,11 +892,15 @@ create:
for (index = UT_LIST_GET_FIRST(table->indexes);
index;
index = UT_LIST_GET_NEXT(indexes, index)) {
- if (index->id == index_id && !(index->type & DICT_FTS)) {
- root_page_no = btr_create(type, space, zip_size,
- index_id, index, mtr);
- index->page = (unsigned int) root_page_no;
- return(root_page_no);
+ if (index->id == index_id) {
+ if (index->type & DICT_FTS) {
+ return(FIL_NULL);
+ } else {
+ root_page_no = btr_create(type, space, zip_size,
+ index_id, index, mtr);
+ index->page = (unsigned int) root_page_no;
+ return(root_page_no);
+ }
}
}
@@ -1453,11 +1458,11 @@ static __attribute__((nonnull, warn_unused_result))
dberr_t
dict_foreign_eval_sql(
/*==================*/
- pars_info_t* info, /*!< in: info struct, or NULL */
+ pars_info_t* info, /*!< in: info struct */
const char* sql, /*!< in: SQL string to evaluate */
- dict_table_t* table, /*!< in: table */
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx) /*!< in: transaction */
+ const char* name, /*!< in: table name (for diagnostics) */
+ const char* id, /*!< in: foreign key id */
+ trx_t* trx) /*!< in/out: transaction */
{
dberr_t error;
FILE* ef = dict_foreign_err_file;
@@ -1470,9 +1475,9 @@ dict_foreign_eval_sql(
ut_print_timestamp(ef);
fputs(" Error in foreign key constraint creation for table ",
ef);
- ut_print_name(ef, trx, TRUE, table->name);
+ ut_print_name(ef, trx, TRUE, name);
fputs(".\nA foreign key constraint of name ", ef);
- ut_print_name(ef, trx, TRUE, foreign->id);
+ ut_print_name(ef, trx, TRUE, id);
fputs("\nalready exists."
" (Note that internally InnoDB adds 'databasename'\n"
"in front of the user-defined constraint name.)\n"
@@ -1499,7 +1504,7 @@ dict_foreign_eval_sql(
ut_print_timestamp(ef);
fputs(" Internal error in foreign key constraint creation"
" for table ", ef);
- ut_print_name(ef, trx, TRUE, table->name);
+ ut_print_name(ef, trx, TRUE, name);
fputs(".\n"
"See the MySQL .err log in the datadir"
" for more information.\n", ef);
@@ -1519,10 +1524,10 @@ static __attribute__((nonnull, warn_unused_result))
dberr_t
dict_create_add_foreign_field_to_dictionary(
/*========================================*/
- ulint field_nr, /*!< in: foreign field number */
- dict_table_t* table, /*!< in: table */
- dict_foreign_t* foreign, /*!< in: foreign */
- trx_t* trx) /*!< in: transaction */
+ ulint field_nr, /*!< in: field number */
+ const char* table_name, /*!< in: table name */
+ const dict_foreign_t* foreign, /*!< in: foreign */
+ trx_t* trx) /*!< in/out: transaction */
{
pars_info_t* info = pars_info_create();
@@ -1543,48 +1548,26 @@ dict_create_add_foreign_field_to_dictionary(
"INSERT INTO SYS_FOREIGN_COLS VALUES"
"(:id, :pos, :for_col_name, :ref_col_name);\n"
"END;\n",
- table, foreign, trx));
+ table_name, foreign->id, trx));
}
/********************************************************************//**
-Add a single foreign key definition to the data dictionary tables in the
-database. We also generate names to constraints that were not named by the
-user. A generated constraint has a name of the format
-databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and
-are given locally for this table, that is, the number is not global, as in
-the old format constraints < 4.0.18 it used to be.
+Add a foreign key definition to the data dictionary tables.
@return error code or DB_SUCCESS */
UNIV_INTERN
dberr_t
dict_create_add_foreign_to_dictionary(
/*==================================*/
- ulint* id_nr, /*!< in/out: number to use in id generation;
- incremented if used */
- dict_table_t* table, /*!< in: table */
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx) /*!< in/out: dictionary transaction */
+ const char* name, /*!< in: table name */
+ const dict_foreign_t* foreign,/*!< in: foreign key */
+ trx_t* trx) /*!< in/out: dictionary transaction */
{
dberr_t error;
- ulint i;
-
pars_info_t* info = pars_info_create();
- if (foreign->id == NULL) {
- /* Generate a new constraint id */
- char* id;
- ulint namelen = strlen(table->name);
-
- id = static_cast<char*>(mem_heap_alloc(
- foreign->heap, namelen + 20));
-
- /* no overflow if number < 1e13 */
- sprintf(id, "%s_ibfk_%lu", table->name, (ulong) (*id_nr)++);
- foreign->id = id;
- }
-
pars_info_add_str_literal(info, "id", foreign->id);
- pars_info_add_str_literal(info, "for_name", table->name);
+ pars_info_add_str_literal(info, "for_name", name);
pars_info_add_str_literal(info, "ref_name",
foreign->referenced_table_name);
@@ -1598,16 +1581,16 @@ dict_create_add_foreign_to_dictionary(
"INSERT INTO SYS_FOREIGN VALUES"
"(:id, :for_name, :ref_name, :n_cols);\n"
"END;\n"
- , table, foreign, trx);
+ , name, foreign->id, trx);
if (error != DB_SUCCESS) {
return(error);
}
- for (i = 0; i < foreign->n_fields; i++) {
+ for (ulint i = 0; i < foreign->n_fields; i++) {
error = dict_create_add_foreign_field_to_dictionary(
- i, table, foreign, trx);
+ i, name, foreign, trx);
if (error != DB_SUCCESS) {
@@ -1654,7 +1637,15 @@ dict_create_add_foreigns_to_dictionary(
foreign;
foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
- error = dict_create_add_foreign_to_dictionary(&number, table,
+ error = dict_create_add_foreign_id(&number, table->name,
+ foreign);
+
+ if (error != DB_SUCCESS) {
+
+ return(error);
+ }
+
+ error = dict_create_add_foreign_to_dictionary(table->name,
foreign, trx);
if (error != DB_SUCCESS) {
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index a560dc54eac..1a1697b2ffc 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -413,7 +413,8 @@ dict_table_try_drop_aborted(
trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
if (table == NULL) {
- table = dict_table_open_on_id_low(table_id);
+ table = dict_table_open_on_id_low(
+ table_id, DICT_ERR_IGNORE_NONE);
} else {
ut_ad(table->id == table_id);
}
@@ -786,9 +787,7 @@ dict_table_open_on_id(
/*==================*/
table_id_t table_id, /*!< in: table id */
ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- ibool try_drop) /*!< in: TRUE=try to drop any orphan
- indexes after an aborted online
- index creation */
+ dict_table_op_t table_op) /*!< in: operation to perform */
{
dict_table_t* table;
@@ -798,7 +797,11 @@ dict_table_open_on_id(
ut_ad(mutex_own(&dict_sys->mutex));
- table = dict_table_open_on_id_low(table_id);
+ table = dict_table_open_on_id_low(
+ table_id,
+ table_op == DICT_TABLE_OP_LOAD_TABLESPACE
+ ? DICT_ERR_IGNORE_RECOVER_LOCK
+ : DICT_ERR_IGNORE_NONE);
if (table != NULL) {
@@ -812,7 +815,8 @@ dict_table_open_on_id(
}
if (!dict_locked) {
- dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
+ dict_table_try_drop_aborted_and_mutex_exit(
+ table, table_op == DICT_TABLE_OP_DROP_ORPHAN);
}
return(table);
@@ -1446,8 +1450,8 @@ dict_table_rename_in_cache(
ibool exists;
char* filepath;
- ut_ad(table->space != TRX_SYS_SPACE);
-
+ ut_ad(table->space != TRX_SYS_SPACE);
+
if (DICT_TF_HAS_DATA_DIR(table->flags)) {
dict_get_and_save_data_dir_path(table, true);
@@ -1459,12 +1463,13 @@ dict_table_rename_in_cache(
filepath = fil_make_ibd_name(table->name, false);
}
- fil_delete_tablespace(table->space, BUF_REMOVE_FLUSH_NO_WRITE);
+ fil_delete_tablespace(table->space, BUF_REMOVE_ALL_NO_WRITE);
/* Delete any temp file hanging around. */
if (os_file_status(filepath, &exists, &type)
&& exists
- && !os_file_delete_if_exists(filepath)) {
+ && !os_file_delete_if_exists(innodb_file_temp_key,
+ filepath)) {
ib_logf(IB_LOG_LEVEL_INFO,
"Delete of %s failed.", filepath);
@@ -1606,22 +1611,78 @@ dict_table_rename_in_cache(
dict_mem_foreign_table_name_lookup_set(foreign, FALSE);
}
if (strchr(foreign->id, '/')) {
+ /* This is a >= 4.0.18 format id */
+
ulint db_len;
char* old_id;
+ char old_name_cs_filename[MAX_TABLE_NAME_LEN+20];
+ uint errors = 0;
+
+ /* All table names are internally stored in charset
+ my_charset_filename (except the temp tables and the
+ partition identifier suffix in partition tables). The
+ foreign key constraint names are internally stored
+ in UTF-8 charset. The variable fkid here is used
+ to store foreign key constraint name in charset
+ my_charset_filename for comparison further below. */
+ char fkid[MAX_TABLE_NAME_LEN+20];
+ ibool on_tmp = FALSE;
+
+ /* The old table name in my_charset_filename is stored
+ in old_name_cs_filename */
+
+ strncpy(old_name_cs_filename, old_name,
+ MAX_TABLE_NAME_LEN);
+ if (strstr(old_name, TEMP_TABLE_PATH_PREFIX) == NULL) {
+
+ innobase_convert_to_system_charset(
+ strchr(old_name_cs_filename, '/') + 1,
+ strchr(old_name, '/') + 1,
+ MAX_TABLE_NAME_LEN, &errors);
+
+ if (errors) {
+ /* There has been an error to convert
+ old table into UTF-8. This probably
+ means that the old table name is
+ actually in UTF-8. */
+ innobase_convert_to_filename_charset(
+ strchr(old_name_cs_filename,
+ '/') + 1,
+ strchr(old_name, '/') + 1,
+ MAX_TABLE_NAME_LEN);
+ } else {
+ /* Old name already in
+ my_charset_filename */
+ strncpy(old_name_cs_filename, old_name,
+ MAX_TABLE_NAME_LEN);
+ }
+ }
- /* This is a >= 4.0.18 format id */
+ strncpy(fkid, foreign->id, MAX_TABLE_NAME_LEN);
+
+ if (strstr(fkid, TEMP_TABLE_PATH_PREFIX) == NULL) {
+ innobase_convert_to_filename_charset(
+ strchr(fkid, '/') + 1,
+ strchr(foreign->id, '/') + 1,
+ MAX_TABLE_NAME_LEN+20);
+ } else {
+ on_tmp = TRUE;
+ }
old_id = mem_strdup(foreign->id);
- if (ut_strlen(foreign->id) > ut_strlen(old_name)
+ if (ut_strlen(fkid) > ut_strlen(old_name_cs_filename)
+ ((sizeof dict_ibfk) - 1)
- && !memcmp(foreign->id, old_name,
- ut_strlen(old_name))
- && !memcmp(foreign->id + ut_strlen(old_name),
+ && !memcmp(fkid, old_name_cs_filename,
+ ut_strlen(old_name_cs_filename))
+ && !memcmp(fkid + ut_strlen(old_name_cs_filename),
dict_ibfk, (sizeof dict_ibfk) - 1)) {
/* This is a generated >= 4.0.18 format id */
+ char table_name[MAX_TABLE_NAME_LEN] = "";
+ uint errors = 0;
+
if (strlen(table->name) > strlen(old_name)) {
foreign->id = static_cast<char*>(
mem_heap_alloc(
@@ -1630,11 +1691,36 @@ dict_table_rename_in_cache(
+ strlen(old_id) + 1));
}
+ /* Convert the table name to UTF-8 */
+ strncpy(table_name, table->name,
+ MAX_TABLE_NAME_LEN);
+ innobase_convert_to_system_charset(
+ strchr(table_name, '/') + 1,
+ strchr(table->name, '/') + 1,
+ MAX_TABLE_NAME_LEN, &errors);
+
+ if (errors) {
+ /* Table name could not be converted
+ from charset my_charset_filename to
+ UTF-8. This means that the table name
+ is already in UTF-8 (#mysql#50). */
+ strncpy(table_name, table->name,
+ MAX_TABLE_NAME_LEN);
+ }
+
/* Replace the prefix 'databasename/tablename'
with the new names */
- strcpy(foreign->id, table->name);
- strcat(foreign->id,
- old_id + ut_strlen(old_name));
+ strcpy(foreign->id, table_name);
+ if (on_tmp) {
+ strcat(foreign->id,
+ old_id + ut_strlen(old_name));
+ } else {
+ sprintf(strchr(foreign->id, '/') + 1,
+ "%s%s",
+ strchr(table_name, '/') +1,
+ strstr(old_id, "_ibfk_") );
+ }
+
} else {
/* This is a >= 4.0.18 format id where the user
gave the id name */
@@ -2033,6 +2119,10 @@ dict_index_too_big_for_tree(
return(false);
}
+ DBUG_EXECUTE_IF(
+ "ib_force_create_table",
+ return(FALSE););
+
comp = dict_table_is_comp(table);
zip_size = dict_table_zip_size(table);
@@ -2047,7 +2137,10 @@ dict_index_too_big_for_tree(
number in the page modification log. The maximum
allowed node pointer size is half that. */
page_rec_max = page_zip_empty_size(new_index->n_fields,
- zip_size) - 1;
+ zip_size);
+ if (page_rec_max) {
+ page_rec_max--;
+ }
page_ptr_max = page_rec_max / 2;
/* On a compressed page, there is a two-byte entry in
the dense page directory for every record. But there
@@ -3117,13 +3210,16 @@ dict_index_t*
dict_foreign_find_index(
/*====================*/
const dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
const char** columns,/*!< in: array of column names */
ulint n_cols, /*!< in: number of columns */
const dict_index_t* types_idx,
/*!< in: NULL or an index
whose types the column types
must match */
- ibool check_charsets,
+ bool check_charsets,
/*!< in: whether to check
charsets. only has an effect
if types_idx != NULL */
@@ -3139,20 +3235,16 @@ dict_foreign_find_index(
index = dict_table_get_first_index(table);
while (index != NULL) {
- /* Ignore matches that refer to the same instance
- (or the index is to be dropped) */
- if (types_idx == index || index->type & DICT_FTS
- || index->to_be_dropped) {
-
- goto next_rec;
-
- } else if (dict_foreign_qualify_index(
- table, columns, n_cols, index, types_idx,
- check_charsets, check_null)) {
+ if (types_idx != index
+ && !(index->type & DICT_FTS)
+ && !index->to_be_dropped
+ && dict_foreign_qualify_index(
+ table, col_names, columns, n_cols,
+ index, types_idx,
+ check_charsets, check_null)) {
return(index);
}
-next_rec:
index = dict_table_get_next_index(index);
}
@@ -3211,9 +3303,16 @@ UNIV_INTERN
dberr_t
dict_foreign_add_to_cache(
/*======================*/
- dict_foreign_t* foreign, /*!< in, own: foreign key constraint */
- ibool check_charsets) /*!< in: TRUE=check charset
- compatibility */
+ dict_foreign_t* foreign,
+ /*!< in, own: foreign key constraint */
+ const char** col_names,
+ /*!< in: column names, or NULL to use
+ foreign->foreign_table->col_names */
+ bool check_charsets,
+ /*!< in: whether to check charset
+ compatibility */
+ dict_err_ignore_t ignore_err)
+ /*!< in: error to be ignored */
{
dict_table_t* for_table;
dict_table_t* ref_table;
@@ -3246,14 +3345,15 @@ dict_foreign_add_to_cache(
for_in_cache = foreign;
}
- if (for_in_cache->referenced_table == NULL && ref_table) {
+ if (ref_table && !for_in_cache->referenced_table) {
index = dict_foreign_find_index(
- ref_table,
+ ref_table, NULL,
for_in_cache->referenced_col_names,
for_in_cache->n_fields, for_in_cache->foreign_index,
- check_charsets, FALSE);
+ check_charsets, false);
- if (index == NULL) {
+ if (index == NULL
+ && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) {
dict_foreign_error_report(
ef, for_in_cache,
"there is no index in referenced table"
@@ -3278,9 +3378,9 @@ dict_foreign_add_to_cache(
added_to_referenced_list = TRUE;
}
- if (for_in_cache->foreign_table == NULL && for_table) {
+ if (for_table && !for_in_cache->foreign_table) {
index = dict_foreign_find_index(
- for_table,
+ for_table, col_names,
for_in_cache->foreign_col_names,
for_in_cache->n_fields,
for_in_cache->referenced_index, check_charsets,
@@ -3288,7 +3388,8 @@ dict_foreign_add_to_cache(
& (DICT_FOREIGN_ON_DELETE_SET_NULL
| DICT_FOREIGN_ON_UPDATE_SET_NULL));
- if (index == NULL) {
+ if (index == NULL
+ && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) {
dict_foreign_error_report(
ef, for_in_cache,
"there is no index in the table"
@@ -3350,14 +3451,27 @@ dict_scan_to(
const char* string) /*!< in: look for this */
{
char quote = '\0';
+ bool escape = false;
for (; *ptr; ptr++) {
if (*ptr == quote) {
/* Closing quote character: do not look for
starting quote or the keyword. */
- quote = '\0';
+
+ /* If the quote character is escaped by a
+ backslash, ignore it. */
+ if (escape) {
+ escape = false;
+ } else {
+ quote = '\0';
+ }
} else if (quote) {
/* Within quotes: do nothing. */
+ if (escape) {
+ escape = false;
+ } else if (*ptr == '\\') {
+ escape = true;
+ }
} else if (*ptr == '`' || *ptr == '"' || *ptr == '\'') {
/* Starting quote: remember the quote character. */
quote = *ptr;
@@ -3772,6 +3886,11 @@ dict_strip_comments(
char* ptr;
/* unclosed quote character (0 if none) */
char quote = 0;
+ bool escape = false;
+
+ DBUG_ENTER("dict_strip_comments");
+
+ DBUG_PRINT("dict_strip_comments", ("%s", sql_string));
str = static_cast<char*>(mem_alloc(sql_length + 1));
@@ -3786,16 +3905,29 @@ end_of_string:
ut_a(ptr <= str + sql_length);
- return(str);
+ DBUG_PRINT("dict_strip_comments", ("%s", str));
+ DBUG_RETURN(str);
}
if (*sptr == quote) {
/* Closing quote character: do not look for
starting quote or comments. */
- quote = 0;
+
+ /* If the quote character is escaped by a
+ backslash, ignore it. */
+ if (escape) {
+ escape = false;
+ } else {
+ quote = 0;
+ }
} else if (quote) {
/* Within quotes: do not look for
starting quotes or comments. */
+ if (escape) {
+ escape = false;
+ } else if (*sptr == '\\') {
+ escape = true;
+ }
} else if (*sptr == '"' || *sptr == '`' || *sptr == '\'') {
/* Starting quote: remember the quote character. */
quote = *sptr;
@@ -4164,10 +4296,13 @@ col_loop1:
}
/* Try to find an index which contains the columns
- as the first fields and in the right order */
+ as the first fields and in the right order. There is
+ no need to check column type match (on types_idx), since
+ the referenced table can be NULL if foreign_key_checks is
+ set to 0 */
- index = dict_foreign_find_index(table, column_names, i,
- NULL, TRUE, FALSE);
+ index = dict_foreign_find_index(
+ table, NULL, column_names, i, NULL, TRUE, FALSE);
if (!index) {
mutex_enter(&dict_foreign_err_mutex);
@@ -4439,7 +4574,7 @@ try_find_index:
foreign->foreign_index */
if (referenced_table) {
- index = dict_foreign_find_index(referenced_table,
+ index = dict_foreign_find_index(referenced_table, NULL,
column_names, i,
foreign->foreign_index,
TRUE, FALSE);
@@ -5635,38 +5770,42 @@ dict_table_get_index_on_name(
/**********************************************************************//**
Replace the index passed in with another equivalent index in the
-foreign key lists of the table. */
+foreign key lists of the table.
+@return whether all replacements were found */
UNIV_INTERN
-void
+bool
dict_foreign_replace_index(
/*=======================*/
dict_table_t* table, /*!< in/out: table */
- const dict_index_t* index, /*!< in: index to be replaced */
- const trx_t* trx) /*!< in: transaction handle */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const dict_index_t* index) /*!< in: index to be replaced */
{
+ bool found = true;
dict_foreign_t* foreign;
ut_ad(index->to_be_dropped);
+ ut_ad(index->table == table);
for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
foreign;
foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
- dict_index_t* new_index;
-
if (foreign->foreign_index == index) {
ut_ad(foreign->foreign_table == index->table);
- new_index = dict_foreign_find_index(
- foreign->foreign_table,
+ dict_index_t* new_index = dict_foreign_find_index(
+ foreign->foreign_table, col_names,
foreign->foreign_col_names,
foreign->n_fields, index,
/*check_charsets=*/TRUE, /*check_null=*/FALSE);
- /* There must exist an alternative index,
- since this must have been checked earlier. */
- ut_a(new_index || !trx->check_foreigns);
- ut_ad(!new_index || new_index->table == index->table);
- ut_ad(!new_index || !new_index->to_be_dropped);
+ if (new_index) {
+ ut_ad(new_index->table == index->table);
+ ut_ad(!new_index->to_be_dropped);
+ } else {
+ found = false;
+ }
foreign->foreign_index = new_index;
}
@@ -5676,25 +5815,28 @@ dict_foreign_replace_index(
foreign;
foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
- dict_index_t* new_index;
-
if (foreign->referenced_index == index) {
ut_ad(foreign->referenced_table == index->table);
- new_index = dict_foreign_find_index(
- foreign->referenced_table,
+ dict_index_t* new_index = dict_foreign_find_index(
+ foreign->referenced_table, NULL,
foreign->referenced_col_names,
foreign->n_fields, index,
/*check_charsets=*/TRUE, /*check_null=*/FALSE);
/* There must exist an alternative index,
since this must have been checked earlier. */
- ut_a(new_index || !trx->check_foreigns);
- ut_ad(!new_index || new_index->table == index->table);
- ut_ad(!new_index || !new_index->to_be_dropped);
+ if (new_index) {
+ ut_ad(new_index->table == index->table);
+ ut_ad(!new_index->to_be_dropped);
+ } else {
+ found = false;
+ }
foreign->referenced_index = new_index;
}
}
+
+ return(found);
}
/**********************************************************************//**
@@ -6118,7 +6260,7 @@ dict_close(void)
}
}
-# ifdef UNIV_DEBUG
+#ifdef UNIV_DEBUG
/**********************************************************************//**
Validate the dictionary table LRU list.
@return TRUE if valid */
@@ -6203,7 +6345,7 @@ dict_non_lru_find_table(
return(FALSE);
}
-# endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
/*********************************************************************//**
Check an index to see whether its first fields are the columns in the array,
in the same order and is not marked for deletion and is not the same
@@ -6213,67 +6355,66 @@ UNIV_INTERN
bool
dict_foreign_qualify_index(
/*=======================*/
- const dict_table_t* table, /*!< in: table */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- const dict_index_t* index, /*!< in: index to check */
- const dict_index_t* types_idx,
- /*!< in: NULL or an index
- whose types the column types
- must match */
- ibool check_charsets,
- /*!< in: whether to check
- charsets. only has an effect
- if types_idx != NULL */
- ulint check_null)
- /*!< in: nonzero if none of
- the columns must be declared
- NOT NULL */
+ const dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* index, /*!< in: index to check */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ bool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
{
- ulint i;
-
- if (dict_index_get_n_fields(index) < n_cols) {
- return(false);
- }
-
- for (i= 0; i < n_cols; i++) {
- dict_field_t* field;
- const char* col_name;
-
- field = dict_index_get_nth_field(index, i);
-
- col_name = dict_table_get_col_name(
- table, dict_col_get_no(field->col));
+ if (dict_index_get_n_fields(index) < n_cols) {
+ return(false);
+ }
- if (field->prefix_len != 0) {
- /* We do not accept column prefix
- indexes here */
+ for (ulint i = 0; i < n_cols; i++) {
+ dict_field_t* field;
+ const char* col_name;
+ ulint col_no;
- break;
- }
+ field = dict_index_get_nth_field(index, i);
+ col_no = dict_col_get_no(field->col);
- if (0 != innobase_strcasecmp(columns[i],
- col_name)) {
- break;
- }
+ if (field->prefix_len != 0) {
+ /* We do not accept column prefix
+ indexes here */
+ return(false);
+ }
- if (check_null
- && (field->col->prtype & DATA_NOT_NULL)) {
+ if (check_null
+ && (field->col->prtype & DATA_NOT_NULL)) {
+ return(false);
+ }
- break;
- }
+ col_name = col_names
+ ? col_names[col_no]
+ : dict_table_get_col_name(table, col_no);
- if (types_idx && !cmp_cols_are_equal(
- dict_index_get_nth_col(index, i),
- dict_index_get_nth_col(types_idx,
- i),
- check_charsets)) {
+ if (0 != innobase_strcasecmp(columns[i], col_name)) {
+ return(false);
+ }
- break;
- }
- }
+ if (types_idx && !cmp_cols_are_equal(
+ dict_index_get_nth_col(index, i),
+ dict_index_get_nth_col(types_idx, i),
+ check_charsets)) {
+ return(false);
+ }
+ }
- return((i == n_cols) ? true : false);
+ return(true);
}
/*********************************************************************//**
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index d423a3b7304..6ffb6924402 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -960,7 +960,7 @@ UNIV_INTERN
void
dict_check_tablespaces_and_store_max_id(
/*====================================*/
- ibool in_crash_recovery) /*!< in: are we doing a crash recovery */
+ dict_check_t dict_check) /*!< in: how to check */
{
dict_table_t* sys_tables;
dict_index_t* sys_index;
@@ -1039,7 +1039,7 @@ loop:
ib_logf(IB_LOG_LEVEL_ERROR,
"Table '%s' in InnoDB data dictionary"
" has unknown type %lx", table_name, flags);
-
+ mem_free(name);
goto loop;
}
@@ -1085,15 +1085,36 @@ loop:
if (space_id == 0) {
/* The system tablespace always exists. */
ut_ad(!discarded);
- } else if (in_crash_recovery) {
+ goto next_tablespace;
+ }
+
+ switch (dict_check) {
+ case DICT_CHECK_ALL_LOADED:
/* All tablespaces should have been found in
fil_load_single_table_tablespaces(). */
fil_space_for_table_exists_in_mem(
space_id, name, TRUE, !(is_temp || discarded),
false, NULL, 0);
+ break;
- } else if (!discarded) {
+ case DICT_CHECK_SOME_LOADED:
+ /* Some tablespaces may have been opened in
+ trx_resurrect_table_locks(). */
+ if (fil_space_for_table_exists_in_mem(
+ space_id, name, FALSE, FALSE,
+ false, NULL, 0)) {
+ break;
+ }
+ /* fall through */
+ case DICT_CHECK_NONE_LOADED:
+ if (discarded) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "DISCARD flag set for table '%s',"
+ " ignored.",
+ table_name);
+ break;
+ }
/* It is a normal database startup: create the
space object and check that the .ibd file exists.
@@ -1127,18 +1148,16 @@ loop:
if (filepath) {
mem_free(filepath);
}
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "DISCARD flag set for table '%s', ignored.",
- table_name);
- }
- mem_free(name);
+ break;
+ }
if (space_id > max_space_id) {
max_space_id = space_id;
}
+next_tablespace:
+ mem_free(name);
mtr_start(&mtr);
btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
@@ -1808,6 +1827,23 @@ dict_load_indexes(
rec = btr_pcur_get_rec(&pcur);
+ if ((ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
+ && rec_get_n_fields_old(rec)
+ == DICT_NUM_FIELDS__SYS_INDEXES) {
+ const byte* field;
+ ulint len;
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__NAME, &len);
+
+ if (len != UNIV_SQL_NULL
+ && char(*field) == char(TEMP_INDEX_PREFIX)) {
+ /* Skip indexes whose name starts with
+ TEMP_INDEX_PREFIX, because they will
+ be dropped during crash recovery. */
+ goto next_rec;
+ }
+ }
+
err_msg = dict_load_index_low(buf, table->name, heap, rec,
TRUE, &index);
ut_ad((index == NULL && err_msg != NULL)
@@ -2317,11 +2353,14 @@ err_exit:
table->ibd_file_missing = TRUE;
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to find tablespace for table '%s' "
- "in the cache. Attempting to load the "
- "tablespace with space id %lu.",
- table_name, (ulong) table->space);
+ if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Failed to find tablespace for "
+ "table '%s' in the cache. "
+ "Attempting to load the tablespace "
+ "with space id %lu.",
+ table_name, (ulong) table->space);
+ }
/* Use the remote filepath if needed. */
if (DICT_TF_HAS_DATA_DIR(table->flags)) {
@@ -2368,13 +2407,15 @@ err_exit:
/* If there is no tablespace for the table then we only need to
load the index definitions. So that we can IMPORT the tablespace
- later. */
- if (table->ibd_file_missing) {
- err = dict_load_indexes(
- table, heap, DICT_ERR_IGNORE_ALL);
- } else {
- err = dict_load_indexes(table, heap, ignore_err);
- }
+ later. When recovering table locks for resurrected incomplete
+ transactions, the tablespace should exist, because DDL operations
+ were not allowed while the table is being locked by a transaction. */
+ dict_err_ignore_t index_load_err =
+ !(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
+ && table->ibd_file_missing
+ ? DICT_ERR_IGNORE_ALL
+ : ignore_err;
+ err = dict_load_indexes(table, heap, index_load_err);
if (err == DB_INDEX_CORRUPT) {
/* Refuse to load the table if the table has a corrupted
@@ -2411,9 +2452,16 @@ err_exit:
if (!cached || table->ibd_file_missing) {
/* Don't attempt to load the indexes from disk. */
} else if (err == DB_SUCCESS) {
- err = dict_load_foreigns(table->name, TRUE, TRUE);
+ err = dict_load_foreigns(table->name, NULL, true, true,
+ ignore_err);
if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Load table '%s' failed, the table has missing "
+ "foreign key indexes. Turn off "
+ "'foreign_key_checks' and try again.",
+ table->name);
+
dict_table_remove_from_cache(table);
table = NULL;
} else {
@@ -2474,7 +2522,9 @@ UNIV_INTERN
dict_table_t*
dict_load_table_on_id(
/*==================*/
- table_id_t table_id) /*!< in: table id */
+ table_id_t table_id, /*!< in: table id */
+ dict_err_ignore_t ignore_err) /*!< in: errors to ignore
+ when loading the table */
{
byte id_buf[8];
btr_pcur_t pcur;
@@ -2551,7 +2601,7 @@ check_rec:
table = dict_load_table(
mem_heap_strdupl(
heap, (char*) field, len),
- TRUE, DICT_ERR_IGNORE_NONE);
+ TRUE, ignore_err);
}
}
}
@@ -2710,18 +2760,25 @@ dict_load_foreign_cols(
/***********************************************************************//**
Loads a foreign key constraint to the dictionary cache.
@return DB_SUCCESS or error code */
-static __attribute__((nonnull, warn_unused_result))
+static __attribute__((nonnull(1), warn_unused_result))
dberr_t
dict_load_foreign(
/*==============*/
- const char* id, /*!< in: foreign constraint id, must be
+ const char* id,
+ /*!< in: foreign constraint id, must be
'\0'-terminated */
- ibool check_charsets,
- /*!< in: TRUE=check charset compatibility */
- ibool check_recursive)
- /*!< in: Whether to record the foreign table
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use foreign->foreign_table->col_names */
+ bool check_recursive,
+ /*!< in: whether to record the foreign table
parent count to avoid unlimited recursive
load of chained foreign tables */
+ bool check_charsets,
+ /*!< in: whether to check charset
+ compatibility */
+ dict_err_ignore_t ignore_err)
+ /*!< in: error to be ignored */
{
dict_foreign_t* foreign;
dict_table_t* sys_foreign;
@@ -2890,7 +2947,8 @@ dict_load_foreign(
a new foreign key constraint but loading one from the data
dictionary. */
- return(dict_foreign_add_to_cache(foreign, check_charsets));
+ return(dict_foreign_add_to_cache(foreign, col_names, check_charsets,
+ ignore_err));
}
/***********************************************************************//**
@@ -2903,12 +2961,16 @@ already in the dictionary cache.
UNIV_INTERN
dberr_t
dict_load_foreigns(
+ const char* table_name, /*!< in: table name */
+ const char** col_names, /*!< in: column names, or NULL
+ to use table->col_names */
+ bool check_recursive,/*!< in: Whether to check
+ recursive load of tables
+ chained by FK */
+ bool check_charsets, /*!< in: whether to check
+ charset compatibility */
+ dict_err_ignore_t ignore_err) /*!< in: error to be ignored */
/*===============*/
- const char* table_name, /*!< in: table name */
- ibool check_recursive,/*!< in: Whether to check recursive
- load of tables chained by FK */
- ibool check_charsets) /*!< in: TRUE=check charset
- compatibility */
{
ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1)
/ sizeof(ulint)];
@@ -2992,13 +3054,12 @@ loop:
may not be the same case, but the previous comparison showed that they
match with no-case. */
- if ((innobase_get_lower_case_table_names() != 2)
- && (0 != ut_memcmp(field, table_name, len))) {
+ if (rec_get_deleted_flag(rec, 0)) {
goto next_rec;
}
- if (rec_get_deleted_flag(rec, 0)) {
-
+ if ((innobase_get_lower_case_table_names() != 2)
+ && (0 != ut_memcmp(field, table_name, len))) {
goto next_rec;
}
@@ -3020,7 +3081,8 @@ loop:
/* Load the foreign constraint definition to the dictionary cache */
- err = dict_load_foreign(fk_id, check_charsets, check_recursive);
+ err = dict_load_foreign(fk_id, col_names,
+ check_recursive, check_charsets, ignore_err);
if (err != DB_SUCCESS) {
btr_pcur_close(&pcur);
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index 116a6a6d96a..b060a79e75a 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -53,10 +53,6 @@ Created 1/8/1996 Heikki Tuuri
UNIV_INTERN mysql_pfs_key_t autoinc_mutex_key;
#endif /* UNIV_PFS_MUTEX */
-/** Prefix for tmp tables, adopted from sql/table.h */
-#define tmp_file_prefix "#sql"
-#define tmp_file_prefix_length 4
-
/**********************************************************************//**
Creates a table memory object.
@return own: table object */
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index 25bcc87d93c..bfd0542b8e2 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2009, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -34,7 +34,6 @@ Created Jan 06, 2010 Vasil Dimov
#include "dict0stats.h"
#include "data0type.h" /* dtype_t */
#include "db0err.h" /* dberr_t */
-#include "dyn0dyn.h" /* dyn_array* */
#include "page0page.h" /* page_align() */
#include "pars0pars.h" /* pars_info_create() */
#include "pars0types.h" /* pars_info_t */
@@ -47,6 +46,8 @@ Created Jan 06, 2010 Vasil Dimov
#include "ut0rnd.h" /* ut_rnd_interval() */
#include "ut0ut.h" /* ut_format_name(), ut_time() */
+#include <vector>
+
/* Sampling algorithm description @{
The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
@@ -135,12 +136,18 @@ descending to lower levels and fetch N_SAMPLE_PAGES(index) records
from that level */
#define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
+/* A dynamic array where we store the boundaries of each distinct group
+of keys. For example if a btree level is:
+index: 0,1,2,3,4,5,6,7,8,9,10,11,12
+data: b,b,b,b,b,b,g,g,j,j,j, x, y
+then we would store 5,7,10,11,12 in the array. */
+typedef std::vector<ib_uint64_t> boundaries_t;
+
/*********************************************************************//**
Checks whether an index should be ignored in stats manipulations:
* stats fetch
* stats recalc
* stats save
-dict_stats_should_ignore_index() @{
@return true if exists and all tables are ok */
UNIV_INLINE
bool
@@ -153,12 +160,10 @@ dict_stats_should_ignore_index(
|| index->to_be_dropped
|| *index->name == TEMP_INDEX_PREFIX);
}
-/* @} */
/*********************************************************************//**
Checks whether the persistent statistics storage exists and that all
tables have the proper structure.
-dict_stats_persistent_storage_check() @{
@return true if exists and all tables are ok */
static
bool
@@ -260,7 +265,6 @@ dict_stats_persistent_storage_check(
return(true);
}
-/* @} */
/*********************************************************************//**
Executes a given SQL statement using the InnoDB internal SQL parser
@@ -463,8 +467,7 @@ dict_stats_table_clone_create(
/*********************************************************************//**
Free the resources occupied by an object returned by
-dict_stats_table_clone_create().
-dict_stats_table_clone_free() @{ */
+dict_stats_table_clone_create(). */
static
void
dict_stats_table_clone_free(
@@ -473,14 +476,12 @@ dict_stats_table_clone_free(
{
mem_heap_free(t->heap);
}
-/* @} */
/*********************************************************************//**
Write all zeros (or 1 where it makes sense) into an index
statistics members. The resulting stats correspond to an empty index.
The caller must own index's table stats latch in X mode
-(dict_table_stats_lock(table, RW_X_LATCH))
-dict_stats_empty_index() @{ */
+(dict_table_stats_lock(table, RW_X_LATCH)) */
static
void
dict_stats_empty_index(
@@ -501,12 +502,10 @@ dict_stats_empty_index(
index->stat_index_size = 1;
index->stat_n_leaf_pages = 1;
}
-/* @} */
/*********************************************************************//**
Write all zeros (or 1 where it makes sense) into a table and its indexes'
-statistics members. The resulting stats correspond to an empty table.
-dict_stats_empty_table() @{ */
+statistics members. The resulting stats correspond to an empty table. */
static
void
dict_stats_empty_table(
@@ -543,7 +542,6 @@ dict_stats_empty_table(
dict_table_stats_unlock(table, RW_X_LATCH);
}
-/* @} */
/*********************************************************************//**
Check whether index's stats are initialized (assert if they are not). */
@@ -573,6 +571,7 @@ dict_stats_assert_initialized_index(
&index->stat_n_leaf_pages,
sizeof(index->stat_n_leaf_pages));
}
+
/*********************************************************************//**
Check whether table's stats are initialized (assert if they are not). */
static
@@ -760,8 +759,7 @@ dict_stats_snapshot_create(
/*********************************************************************//**
Free the resources occupied by an object returned by
-dict_stats_snapshot_create().
-dict_stats_snapshot_free() @{ */
+dict_stats_snapshot_create(). */
static
void
dict_stats_snapshot_free(
@@ -770,14 +768,12 @@ dict_stats_snapshot_free(
{
dict_stats_table_clone_free(t);
}
-/* @} */
/*********************************************************************//**
Calculates new estimates for index statistics. This function is
relatively quick and is used to calculate transient statistics that
are not saved on disk. This was the only way to calculate statistics
-before the Persistent Statistics feature was introduced.
-dict_stats_update_transient_for_index() @{ */
+before the Persistent Statistics feature was introduced. */
static
void
dict_stats_update_transient_for_index(
@@ -826,15 +822,13 @@ dict_stats_update_transient_for_index(
dict_stats_empty_index(index);
}
}
-/* @} */
/*********************************************************************//**
Calculates new estimates for table and index statistics. This function
is relatively quick and is used to calculate transient statistics that
are not saved on disk.
This was the only way to calculate statistics before the
-Persistent Statistics feature was introduced.
-dict_stats_update_transient() @{ */
+Persistent Statistics feature was introduced. */
UNIV_INTERN
void
dict_stats_update_transient(
@@ -900,7 +894,6 @@ dict_stats_update_transient(
table->stat_initialized = TRUE;
}
-/* @} */
/* @{ Pseudo code about the relation between the following functions
@@ -938,7 +931,7 @@ dict_stats_analyze_index_level(
distinct keys for all prefixes */
ib_uint64_t* total_recs, /*!< out: total number of records */
ib_uint64_t* total_pages, /*!< out: total number of pages */
- dyn_array_t* n_diff_boundaries,/*!< out: boundaries of the groups
+ boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups
of distinct keys */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
@@ -983,9 +976,9 @@ dict_stats_analyze_index_level(
/* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
if (n_diff_boundaries != NULL) {
for (i = 0; i < n_uniq; i++) {
- dyn_array_free(&n_diff_boundaries[i]);
-
- dyn_array_create(&n_diff_boundaries[i]);
+ n_diff_boundaries[i].erase(
+ n_diff_boundaries[i].begin(),
+ n_diff_boundaries[i].end());
}
}
@@ -1113,7 +1106,6 @@ dict_stats_analyze_index_level(
record, that is - the last one from
a group of equal keys */
- void* p;
ib_uint64_t idx;
/* the index of the current record
@@ -1126,11 +1118,7 @@ dict_stats_analyze_index_level(
total_recs >= 2 */
idx = *total_recs - 2;
- p = dyn_array_push(
- &n_diff_boundaries[i],
- sizeof(ib_uint64_t));
-
- memcpy(p, &idx, sizeof(ib_uint64_t));
+ n_diff_boundaries[i].push_back(idx);
}
/* increment the number of different keys
@@ -1191,15 +1179,11 @@ dict_stats_analyze_index_level(
last one from the last group of equal keys; this holds for
all possible prefixes */
for (i = 0; i < n_uniq; i++) {
- void* p;
ib_uint64_t idx;
idx = *total_recs - 1;
- p = dyn_array_push(&n_diff_boundaries[i],
- sizeof(ib_uint64_t));
-
- memcpy(p, &idx, sizeof(ib_uint64_t));
+ n_diff_boundaries[i].push_back(idx);
}
}
@@ -1226,9 +1210,7 @@ dict_stats_analyze_index_level(
for (j = 0; j < n_diff[i]; j++) {
ib_uint64_t idx;
- idx = *(ib_uint64_t*) dyn_array_get_element(
- &n_diff_boundaries[i],
- j * sizeof(ib_uint64_t));
+ idx = n_diff_boundaries[i][j];
DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
j, idx);
@@ -1273,8 +1255,7 @@ to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
equal) or 2 (the function found a non-boring record and returned).
@return offsets1 or offsets2 (the offsets of *out_rec),
-or NULL if the page is empty and does not contain user records.
-dict_stats_scan_page() @{ */
+or NULL if the page is empty and does not contain user records. */
UNIV_INLINE __attribute__((nonnull))
ulint*
dict_stats_scan_page(
@@ -1382,13 +1363,11 @@ func_exit:
*out_rec = rec;
return(offsets_rec);
}
-/* @} */
/*********************************************************************//**
Dive below the current position of a cursor and calculate the number of
distinct records on the leaf page, when looking at the fist n_prefix
columns.
-dict_stats_analyze_index_below_cur() @{
@return number of distinct records on the leaf page */
static
ib_uint64_t
@@ -1511,7 +1490,6 @@ dict_stats_analyze_index_below_cur(
return(n_diff);
}
-/* @} */
/*********************************************************************//**
For a given level in an index select N_SAMPLE_PAGES(index)
@@ -1536,7 +1514,7 @@ dict_stats_analyze_index_for_n_prefix(
records on the given level,
when looking at the first
n_prefix columns */
- dyn_array_t* boundaries, /*!< in: array that contains
+ boundaries_t* boundaries, /*!< in: array that contains
n_diff_for_this_prefix
integers each of which
represents the index (on the
@@ -1602,8 +1580,7 @@ dict_stats_analyze_index_for_n_prefix(
== !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
btr_pcur_get_rec(&pcur), page_is_comp(page))));
- last_idx_on_level = *(ib_uint64_t*) dyn_array_get_element(boundaries,
- (ulint) ((n_diff_for_this_prefix - 1) * sizeof(ib_uint64_t)));
+ last_idx_on_level = boundaries->at(n_diff_for_this_prefix - 1);
rec_idx = 0;
@@ -1619,7 +1596,7 @@ dict_stats_analyze_index_for_n_prefix(
ib_uint64_t dive_below_idx;
/* there are n_diff_for_this_prefix elements
- in the array boundaries[] and we divide those elements
+ in 'boundaries' and we divide those elements
into n_recs_to_dive_below segments, for example:
let n_diff_for_this_prefix=100, n_recs_to_dive_below=4, then:
@@ -1658,9 +1635,7 @@ dict_stats_analyze_index_for_n_prefix(
ib_uint64_t could be bigger than ulint */
rnd = ut_rnd_interval(0, (ulint) (right - left));
- dive_below_idx = *(ib_uint64_t*) dyn_array_get_element(
- boundaries, (ulint) ((left + rnd)
- * sizeof(ib_uint64_t)));
+ dive_below_idx = boundaries->at(left + rnd);
#if 0
DEBUG_PRINTF(" %s(): dive below record with index="
@@ -1769,9 +1744,13 @@ dict_stats_analyze_index(
ib_uint64_t* n_diff_on_level;
ib_uint64_t total_recs;
ib_uint64_t total_pages;
- dyn_array_t* n_diff_boundaries;
+ boundaries_t* n_diff_boundaries;
mtr_t mtr;
ulint size;
+ DBUG_ENTER("dict_stats_analyze_index");
+
+ DBUG_PRINT("info", ("index: %s, online status: %d", index->name,
+ dict_index_get_online_status(index)));
DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name);
@@ -1794,7 +1773,7 @@ dict_stats_analyze_index(
switch (size) {
case ULINT_UNDEFINED:
dict_stats_assert_initialized_index(index);
- return;
+ DBUG_VOID_RETURN;
case 0:
/* The root node of the tree is a leaf */
size = 1;
@@ -1848,20 +1827,14 @@ dict_stats_analyze_index(
mtr_commit(&mtr);
dict_stats_assert_initialized_index(index);
- return;
+ DBUG_VOID_RETURN;
}
/* set to zero */
n_diff_on_level = reinterpret_cast<ib_uint64_t*>
(mem_zalloc(n_uniq * sizeof(ib_uint64_t)));
- n_diff_boundaries = reinterpret_cast<dyn_array_t*>
- (mem_alloc(n_uniq * sizeof(dyn_array_t)));
-
- for (ulint i = 0; i < n_uniq; i++) {
- /* initialize the dynamic arrays */
- dyn_array_create(&n_diff_boundaries[i]);
- }
+ n_diff_boundaries = new boundaries_t[n_uniq];
/* total_recs is also used to estimate the number of pages on one
level below, so at the start we have 1 page (the root) */
@@ -2011,15 +1984,12 @@ found_level:
mtr_commit(&mtr);
- for (ulint i = 0; i < n_uniq; i++) {
- dyn_array_free(&n_diff_boundaries[i]);
- }
-
- mem_free(n_diff_boundaries);
+ delete[] n_diff_boundaries;
mem_free(n_diff_on_level);
dict_stats_assert_initialized_index(index);
+ DBUG_VOID_RETURN;
}
/*********************************************************************//**
@@ -2109,7 +2079,6 @@ dict_stats_update_persistent(
/*********************************************************************//**
Save an individual index's statistic into the persistent statistics
storage.
-dict_stats_save_index_stat() @{
@return DB_SUCCESS or error code */
static
dberr_t
@@ -2232,11 +2201,9 @@ dict_stats_save_index_stat(
return(ret);
}
-/* @} */
/*********************************************************************//**
Save the table's statistics into the persistent statistics storage.
-dict_stats_save() @{
@return DB_SUCCESS or error code */
static
dberr_t
@@ -2401,14 +2368,12 @@ end:
return(ret);
}
-/* @} */
/*********************************************************************//**
Called for the row that is selected by
SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
The second argument is a pointer to the table and the fetched stats are
written to it.
-dict_stats_fetch_table_stats_step() @{
@return non-NULL dummy */
static
ibool
@@ -2485,7 +2450,6 @@ dict_stats_fetch_table_stats_step(
/* XXX this is not used but returning non-NULL is necessary */
return(TRUE);
}
-/* @} */
/** Aux struct used to pass a table and a boolean to
dict_stats_fetch_index_stats_step(). */
@@ -2511,7 +2475,6 @@ This can be improved if we sort table->indexes in a temporary area just once
and then search in that sorted list. Then the complexity will be O(N*log(N)).
We assume a table will not have more than 100 indexes, so we go with the
simpler N^2 algorithm.
-dict_stats_fetch_index_stats_step() @{
@return non-NULL dummy */
static
ibool
@@ -2752,11 +2715,9 @@ dict_stats_fetch_index_stats_step(
/* XXX this is not used but returning non-NULL is necessary */
return(TRUE);
}
-/* @} */
/*********************************************************************//**
Read table's statistics from the persistent statistics storage.
-dict_stats_fetch_from_ps() @{
@return DB_SUCCESS or error code */
static
dberr_t
@@ -2877,17 +2838,17 @@ dict_stats_fetch_from_ps(
return(ret);
}
-/* @} */
/*********************************************************************//**
-Fetches or calculates new estimates for index statistics.
-dict_stats_update_for_index() @{ */
+Fetches or calculates new estimates for index statistics. */
UNIV_INTERN
void
dict_stats_update_for_index(
/*========================*/
dict_index_t* index) /*!< in/out: index */
{
+ DBUG_ENTER("dict_stats_update_for_index");
+
ut_ad(!mutex_own(&dict_sys->mutex));
if (dict_stats_is_persistent_enabled(index->table)) {
@@ -2897,7 +2858,7 @@ dict_stats_update_for_index(
dict_stats_analyze_index(index);
dict_table_stats_unlock(index->table, RW_X_LATCH);
dict_stats_save(index->table);
- return;
+ DBUG_VOID_RETURN;
}
/* else */
@@ -2920,8 +2881,9 @@ dict_stats_update_for_index(
dict_table_stats_lock(index->table, RW_X_LATCH);
dict_stats_update_transient_for_index(index);
dict_table_stats_unlock(index->table, RW_X_LATCH);
+
+ DBUG_VOID_RETURN;
}
-/* @} */
/*********************************************************************//**
Calculates new estimates for table and index statistics. The statistics
@@ -2962,7 +2924,9 @@ dict_stats_update(
switch (stats_upd_option) {
case DICT_STATS_RECALC_PERSISTENT:
- ut_ad(!srv_read_only_mode);
+ if (srv_read_only_mode) {
+ goto transient;
+ }
/* Persistent recalculation requested, called from
1) ANALYZE TABLE, or
@@ -3063,8 +3027,6 @@ dict_stats_update(
dict_table_t* t;
- ut_ad(!srv_read_only_mode);
-
/* Create a dummy table object with the same name and
indexes, suitable for fetching the stats into it. */
t = dict_stats_table_clone_create(table);
@@ -3098,6 +3060,10 @@ dict_stats_update(
dict_stats_table_clone_free(t);
+ if (srv_read_only_mode) {
+ goto transient;
+ }
+
if (dict_stats_auto_recalc_is_enabled(table)) {
return(dict_stats_update(
table,
@@ -3162,7 +3128,6 @@ rolling back dict transactions.
marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
But we shouldn't open *.ibd files before we have rolled back dict
transactions and opened the SYS_* records for the *.ibd files.
-dict_stats_drop_index() @{
@return DB_SUCCESS or error code */
UNIV_INTERN
dberr_t
@@ -3244,14 +3209,12 @@ dict_stats_drop_index(
return(ret);
}
-/* @} */
/*********************************************************************//**
Executes
DELETE FROM mysql.innodb_table_stats
WHERE database_name = '...' AND table_name = '...';
Creates its own transaction and commits it.
-dict_stats_delete_from_table_stats() @{
@return DB_SUCCESS or error code */
UNIV_INLINE
dberr_t
@@ -3265,7 +3228,7 @@ dict_stats_delete_from_table_stats(
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_STAT */
+#endif /* UNIV_SYNC_DEBUG */
ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
@@ -3284,14 +3247,12 @@ dict_stats_delete_from_table_stats(
return(ret);
}
-/* @} */
/*********************************************************************//**
Executes
DELETE FROM mysql.innodb_index_stats
WHERE database_name = '...' AND table_name = '...';
Creates its own transaction and commits it.
-dict_stats_delete_from_index_stats() @{
@return DB_SUCCESS or error code */
UNIV_INLINE
dberr_t
@@ -3305,7 +3266,7 @@ dict_stats_delete_from_index_stats(
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_STAT */
+#endif /* UNIV_SYNC_DEBUG */
ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
@@ -3324,13 +3285,11 @@ dict_stats_delete_from_index_stats(
return(ret);
}
-/* @} */
/*********************************************************************//**
Removes the statistics for a table and all of its indexes from the
persistent statistics storage if it exists and if there is data stored for
the table. This function creates its own transaction and commits it.
-dict_stats_drop_table() @{
@return DB_SUCCESS or error code */
UNIV_INTERN
dberr_t
@@ -3347,7 +3306,7 @@ dict_stats_drop_table(
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_STAT */
+#endif /* UNIV_SYNC_DEBUG */
ut_ad(mutex_own(&dict_sys->mutex));
/* skip tables that do not contain a database name
@@ -3403,7 +3362,6 @@ dict_stats_drop_table(
return(ret);
}
-/* @} */
/*********************************************************************//**
Executes
@@ -3411,7 +3369,6 @@ UPDATE mysql.innodb_table_stats SET
database_name = '...', table_name = '...'
WHERE database_name = '...' AND table_name = '...';
Creates its own transaction and commits it.
-dict_stats_rename_in_table_stats() @{
@return DB_SUCCESS or error code */
UNIV_INLINE
dberr_t
@@ -3427,7 +3384,7 @@ dict_stats_rename_in_table_stats(
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_STAT */
+#endif /* UNIV_SYNC_DEBUG */
ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
@@ -3451,7 +3408,6 @@ dict_stats_rename_in_table_stats(
return(ret);
}
-/* @} */
/*********************************************************************//**
Executes
@@ -3459,7 +3415,6 @@ UPDATE mysql.innodb_index_stats SET
database_name = '...', table_name = '...'
WHERE database_name = '...' AND table_name = '...';
Creates its own transaction and commits it.
-dict_stats_rename_in_index_stats() @{
@return DB_SUCCESS or error code */
UNIV_INLINE
dberr_t
@@ -3475,7 +3430,7 @@ dict_stats_rename_in_index_stats(
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_STAT */
+#endif /* UNIV_SYNC_DEBUG */
ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
@@ -3499,12 +3454,10 @@ dict_stats_rename_in_index_stats(
return(ret);
}
-/* @} */
/*********************************************************************//**
Renames a table in InnoDB persistent stats storage.
This function creates its own transaction and commits it.
-dict_stats_rename_table() @{
@return DB_SUCCESS or error code */
UNIV_INTERN
dberr_t
@@ -3524,7 +3477,7 @@ dict_stats_rename_table(
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_STAT */
+#endif /* UNIV_SYNC_DEBUG */
ut_ad(!mutex_own(&dict_sys->mutex));
/* skip innodb_table_stats and innodb_index_stats themselves */
@@ -3658,7 +3611,6 @@ dict_stats_rename_table(
return(ret);
}
-/* @} */
/* tests @{ */
#ifdef UNIV_COMPILE_TEST_FUNCS
@@ -4050,5 +4002,3 @@ test_dict_stats_all()
/* @} */
#endif /* UNIV_HOTBACKUP */
-
-/* vim: set foldmethod=marker foldmarker=@{,@}: */
diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc
index 9a10d995360..ecd723ca39a 100644
--- a/storage/innobase/dict/dict0stats_bg.cc
+++ b/storage/innobase/dict/dict0stats_bg.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,10 @@ Created Apr 25, 2012 Vasil Dimov
#include "dict0stats.h"
#include "dict0stats_bg.h"
+#ifdef UNIV_NONINL
+# include "dict0stats_bg.ic"
+#endif
+
#include <vector>
/** Minimum time interval between stats recalc for a given table */
@@ -95,8 +99,7 @@ Add a table to the recalc pool, which is processed by the
background stats gathering thread. Only the table id is added to the
list, so the table can be closed after being enqueued and it will be
opened when needed. If the table does not exist later (has been DROPped),
-then it will be removed from the pool and skipped.
-dict_stats_recalc_pool_add() @{ */
+then it will be removed from the pool and skipped. */
UNIV_INTERN
void
dict_stats_recalc_pool_add(
@@ -124,12 +127,10 @@ dict_stats_recalc_pool_add(
os_event_set(dict_stats_event);
}
-/* @} */
/*****************************************************************//**
Get a table from the auto recalc pool. The returned table id is removed
from the pool.
-dict_stats_recalc_pool_get() @{
@return true if the pool was non-empty and "id" was set, false otherwise */
static
bool
@@ -155,7 +156,6 @@ dict_stats_recalc_pool_get(
return(true);
}
-/* @} */
/*****************************************************************//**
Delete a given table from the auto recalc pool.
@@ -188,46 +188,30 @@ dict_stats_recalc_pool_del(
}
/*****************************************************************//**
-Wait until background stats thread has stopped using the specified table(s).
+Wait until background stats thread has stopped using the specified table.
The caller must have locked the data dictionary using
row_mysql_lock_data_dictionary() and this function may unlock it temporarily
and restore the lock before it exits.
-The background stats thead is guaranteed not to start using the specified
-tables after this function returns and before the caller unlocks the data
+The background stats thread is guaranteed not to start using the specified
+table after this function returns and before the caller unlocks the data
dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
-under dict_sys->mutex.
-dict_stats_wait_bg_to_stop_using_table() @{ */
+under dict_sys->mutex. */
UNIV_INTERN
void
-dict_stats_wait_bg_to_stop_using_tables(
-/*====================================*/
- dict_table_t* table1, /*!< in/out: table1 */
- dict_table_t* table2, /*!< in/out: table2, could be NULL */
+dict_stats_wait_bg_to_stop_using_table(
+/*===================================*/
+ dict_table_t* table, /*!< in/out: table */
trx_t* trx) /*!< in/out: transaction to use for
unlocking/locking the data dict */
{
- ut_ad(!srv_read_only_mode);
-
- while ((table1->stats_bg_flag & BG_STAT_IN_PROGRESS)
- || (table2 != NULL
- && (table2->stats_bg_flag & BG_STAT_IN_PROGRESS))) {
-
- table1->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
- if (table2 != NULL) {
- table2->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
- }
-
- row_mysql_unlock_data_dictionary(trx);
- os_thread_sleep(250000);
- row_mysql_lock_data_dictionary(trx);
+ while (!dict_stats_stop_bg(table)) {
+ DICT_STATS_BG_YIELD(trx);
}
}
-/* @} */
/*****************************************************************//**
Initialize global variables needed for the operation of dict_stats_thread()
-Must be called before dict_stats_thread() is started.
-dict_stats_thread_init() @{ */
+Must be called before dict_stats_thread() is started. */
UNIV_INTERN
void
dict_stats_thread_init()
@@ -255,12 +239,10 @@ dict_stats_thread_init()
dict_stats_recalc_pool_init();
}
-/* @} */
/*****************************************************************//**
Free resources allocated by dict_stats_thread_init(), must be called
-after dict_stats_thread() has exited.
-dict_stats_thread_deinit() @{ */
+after dict_stats_thread() has exited. */
UNIV_INTERN
void
dict_stats_thread_deinit()
@@ -277,12 +259,10 @@ dict_stats_thread_deinit()
os_event_free(dict_stats_event);
dict_stats_event = NULL;
}
-/* @} */
/*****************************************************************//**
Get the first table that has been added for auto recalc and eventually
-update its stats.
-dict_stats_process_entry_from_recalc_pool() @{ */
+update its stats. */
static
void
dict_stats_process_entry_from_recalc_pool()
@@ -302,7 +282,7 @@ dict_stats_process_entry_from_recalc_pool()
mutex_enter(&dict_sys->mutex);
- table = dict_table_open_on_id(table_id, TRUE, FALSE);
+ table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL);
if (table == NULL) {
/* table does not exist, must have been DROPped
@@ -351,13 +331,11 @@ dict_stats_process_entry_from_recalc_pool()
mutex_exit(&dict_sys->mutex);
}
-/* @} */
/*****************************************************************//**
This is the thread for background stats gathering. It pops tables, from
the auto recalc list and proceeds them, eventually recalculating their
statistics.
-dict_stats_thread() @{
@return this function does not return, it calls os_thread_exit() */
extern "C" UNIV_INTERN
os_thread_ret_t
@@ -397,6 +375,3 @@ DECLARE_THREAD(dict_stats_thread)(
OS_THREAD_DUMMY_RETURN;
}
-/* @} */
-
-/* vim: set foldmethod=marker foldmarker=@{,@}: */
diff --git a/storage/innobase/dyn/dyn0dyn.cc b/storage/innobase/dyn/dyn0dyn.cc
index b157c7707f4..3ef5297a7c9 100644
--- a/storage/innobase/dyn/dyn0dyn.cc
+++ b/storage/innobase/dyn/dyn0dyn.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -35,7 +35,7 @@ UNIV_INTERN
dyn_block_t*
dyn_array_add_block(
/*================*/
- dyn_array_t* arr) /*!< in: dyn array */
+ dyn_array_t* arr) /*!< in/out: dyn array */
{
mem_heap_t* heap;
dyn_block_t* block;
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 1779ae86c46..b8357951667 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -147,6 +147,8 @@ struct fil_node_t {
char* name; /*!< path to the file */
ibool open; /*!< TRUE if file open */
os_file_t handle; /*!< OS handle to the file, if file open */
+ os_event_t sync_event;/*!< Condition event to group and
+ serialize calls to fsync */
ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw
device or a raw disk partition */
ulint size; /*!< size of the file in database pages, 0 if
@@ -374,9 +376,10 @@ NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
Prepares a file node for i/o. Opens the file if it is closed. Updates the
pending i/o's field in the node and the system appropriately. Takes the node
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex. */
+mutex.
+@return false if the file can't be opened, otherwise true */
static
-void
+bool
fil_node_prepare_for_io(
/*====================*/
fil_node_t* node, /*!< in: file node */
@@ -416,7 +419,7 @@ UNIV_INLINE
dberr_t
fil_read(
/*=====*/
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ bool sync, /*!< in: true if synchronous aio is desired */
ulint space_id, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes;
0 for uncompressed pages */
@@ -445,7 +448,7 @@ UNIV_INLINE
dberr_t
fil_write(
/*======*/
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ bool sync, /*!< in: true if synchronous aio is desired */
ulint space_id, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes;
0 for uncompressed pages */
@@ -649,6 +652,7 @@ fil_node_create(
ut_a(!is_raw || srv_start_raw_disk_in_use);
+ node->sync_event = os_event_create();
node->is_raw_disk = is_raw;
node->size = size;
node->magic_n = FIL_NODE_MAGIC_N;
@@ -689,9 +693,10 @@ fil_node_create(
/********************************************************************//**
Opens a file of a node of a tablespace. The caller must own the fil_system
-mutex. */
+mutex.
+@return false if the file can't be opened, otherwise true */
static
-void
+bool
fil_node_open_file(
/*===============*/
fil_node_t* node, /*!< in: file node */
@@ -729,12 +734,12 @@ fil_node_open_file(
ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Fatal error: cannot open %s\n."
- "InnoDB: Have you deleted .ibd files"
- " under a running mysqld server?\n",
+ ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot "
+ "open %s\n. InnoDB: Have you deleted .ibd "
+ "files under a running mysqld server?\n",
node->name);
- ut_a(0);
+
+ return(false);
}
size_bytes = os_file_get_size(node->handle);
@@ -879,6 +884,8 @@ add_size:
/* Put the node to the LRU list */
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
}
+
+ return(true);
}
/**********************************************************************//**
@@ -1150,6 +1157,7 @@ fil_node_free(
there are no unflushed modifications in the file */
node->modification_counter = node->flush_counter;
+ os_event_set(node->sync_event);
if (fil_buffering_disabled(space)) {
@@ -1173,6 +1181,7 @@ fil_node_free(
UT_LIST_REMOVE(chain, space->chain, node);
+ os_event_free(node->sync_event);
mem_free(node->name);
mem_free(node);
}
@@ -1242,7 +1251,8 @@ fil_space_create(
if (space != 0) {
ib_logf(IB_LOG_LEVEL_WARN,
"Tablespace '%s' exists in the cache "
- "with id %lu", name, (ulong) id);
+ "with id %lu != %lu",
+ name, (ulong) space->id, (ulong) id);
if (id == 0 || purpose != FIL_TABLESPACE) {
@@ -1481,6 +1491,21 @@ fil_space_get_space(
if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
ut_a(id != 0);
+ mutex_exit(&fil_system->mutex);
+
+ /* It is possible that the space gets evicted at this point
+ before the fil_mutex_enter_and_prepare_for_io() acquires
+ the fil_system->mutex. Check for this after completing the
+ call to fil_mutex_enter_and_prepare_for_io(). */
+ fil_mutex_enter_and_prepare_for_io(id);
+
+ /* We are still holding the fil_system->mutex. Check if
+ the space is still in memory cache. */
+ space = fil_space_get_by_id(id);
+ if (space == NULL) {
+ return(NULL);
+ }
+
/* The following code must change when InnoDB supports
multiple datafiles per tablespace. */
ut_a(1 == UT_LIST_GET_LEN(space->chain));
@@ -1491,7 +1516,11 @@ fil_space_get_space(
the file yet; the following calls will open it and update the
size fields */
- fil_node_prepare_for_io(node, fil_system, space);
+ if (!fil_node_prepare_for_io(node, fil_system, space)) {
+ /* The single-table tablespace can't be opened,
+ because the ibd file is missing. */
+ return(NULL);
+ }
fil_node_complete_io(node, fil_system, OS_FILE_READ);
}
@@ -1552,8 +1581,7 @@ fil_space_get_size(
ulint size;
ut_ad(fil_system);
-
- fil_mutex_enter_and_prepare_for_io(id);
+ mutex_enter(&fil_system->mutex);
space = fil_space_get_space(id);
@@ -1583,7 +1611,7 @@ fil_space_get_flags(
return(0);
}
- fil_mutex_enter_and_prepare_for_io(id);
+ mutex_enter(&fil_system->mutex);
space = fil_space_get_space(id);
@@ -1700,7 +1728,15 @@ fil_open_log_and_system_tablespace_files(void)
node = UT_LIST_GET_NEXT(chain, node)) {
if (!node->open) {
- fil_node_open_file(node, fil_system, space);
+ if (!fil_node_open_file(node, fil_system,
+ space)) {
+ /* This func is called during server's
+ startup. If some file of log or system
+ tablespace is missing, the server
+ can't start successfully. So we should
+ assert for it. */
+ ut_a(0);
+ }
}
if (fil_system->max_n_open < 10 + fil_system->n_open) {
@@ -1930,10 +1966,63 @@ fil_write_flushed_lsn_to_data_files(
}
/*******************************************************************//**
+Checks the consistency of the first data page of a tablespace
+at database startup.
+@retval NULL on success, or if innodb_force_recovery is set
+@return pointer to an error message string */
+static __attribute__((warn_unused_result))
+const char*
+fil_check_first_page(
+/*=================*/
+ const page_t* page) /*!< in: data page */
+{
+ ulint space_id;
+ ulint flags;
+
+ if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
+ return(NULL);
+ }
+
+ space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page);
+ flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
+
+ if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
+ return("innodb-page-size mismatch");
+ }
+
+ if (!space_id && !flags) {
+ ulint nonzero_bytes = UNIV_PAGE_SIZE;
+ const byte* b = page;
+
+ while (!*b && --nonzero_bytes) {
+ b++;
+ }
+
+ if (!nonzero_bytes) {
+ return("space header page consists of zero bytes");
+ }
+ }
+
+ if (buf_page_is_corrupted(
+ false, page, fsp_flags_get_zip_size(flags))) {
+ return("checksum mismatch");
+ }
+
+ if (page_get_space_id(page) == space_id
+ && page_get_page_no(page) == 0) {
+ return(NULL);
+ }
+
+ return("inconsistent data in space header");
+}
+
+/*******************************************************************//**
Reads the flushed lsn, arch no, and tablespace flag fields from a data
-file at database startup. */
+file at database startup.
+@retval NULL on success, or if innodb_force_recovery is set
+@return pointer to an error message string */
UNIV_INTERN
-void
+const char*
fil_read_first_page(
/*================*/
os_file_t data_file, /*!< in: open data file */
@@ -1953,9 +2042,10 @@ fil_read_first_page(
lsn_t* max_flushed_lsn) /*!< out: max of flushed
lsn values in data files */
{
- byte* buf;
- byte* page;
- lsn_t flushed_lsn;
+ byte* buf;
+ byte* page;
+ lsn_t flushed_lsn;
+ const char* check_msg = NULL;
buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
@@ -1971,8 +2061,16 @@ fil_read_first_page(
flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+ if (!one_read_already) {
+ check_msg = fil_check_first_page(page);
+ }
+
ut_free(buf);
+ if (check_msg) {
+ return(check_msg);
+ }
+
if (!one_read_already) {
*min_flushed_lsn = flushed_lsn;
*max_flushed_lsn = flushed_lsn;
@@ -1980,7 +2078,7 @@ fil_read_first_page(
*min_arch_log_no = arch_log_no;
*max_arch_log_no = arch_log_no;
#endif /* UNIV_LOG_ARCHIVE */
- return;
+ return(NULL);
}
if (*min_flushed_lsn > flushed_lsn) {
@@ -1997,6 +2095,8 @@ fil_read_first_page(
*max_arch_log_no = arch_log_no;
}
#endif /* UNIV_LOG_ARCHIVE */
+
+ return(NULL);
}
/*================ SINGLE-TABLE TABLESPACES ==========================*/
@@ -2567,7 +2667,7 @@ fil_close_tablespace(
char* cfg_name = fil_make_cfg_name(path);
- os_file_delete_if_exists(cfg_name);
+ os_file_delete_if_exists(innodb_file_data_key, cfg_name);
mem_free(path);
mem_free(cfg_name);
@@ -2650,7 +2750,7 @@ fil_delete_tablespace(
when we drop the database the remove directory will fail. */
{
char* cfg_name = fil_make_cfg_name(path);
- os_file_delete_if_exists(cfg_name);
+ os_file_delete_if_exists(innodb_file_data_key, cfg_name);
mem_free(cfg_name);
}
@@ -2678,7 +2778,8 @@ fil_delete_tablespace(
if (err != DB_SUCCESS) {
rw_lock_x_unlock(&space->latch);
- } else if (!os_file_delete(path) && !os_file_delete_if_exists(path)) {
+ } else if (!os_file_delete(innodb_file_data_key, path)
+ && !os_file_delete_if_exists(innodb_file_data_key, path)) {
/* Note: This is because we have removed the
tablespace instance from the cache. */
@@ -3147,7 +3248,7 @@ fil_delete_link_file(
{
char* link_filepath = fil_make_isl_name(tablename);
- os_file_delete_if_exists(link_filepath);
+ os_file_delete_if_exists(innodb_file_data_key, link_filepath);
mem_free(link_filepath);
}
@@ -3458,7 +3559,7 @@ error_exit_1:
error_exit_2:
os_file_close(file);
if (err != DB_SUCCESS) {
- os_file_delete(path);
+ os_file_delete(innodb_file_data_key, path);
}
error_exit_3:
mem_free(path);
@@ -3473,12 +3574,25 @@ static
void
fil_report_bad_tablespace(
/*======================*/
- char* filepath, /*!< in: filepath */
+ const char* filepath, /*!< in: filepath */
+ const char* check_msg, /*!< in: fil_check_first_page() */
ulint found_id, /*!< in: found space ID */
ulint found_flags, /*!< in: found flags */
ulint expected_id, /*!< in: expected space id */
ulint expected_flags) /*!< in: expected flags */
{
+ if (check_msg) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Error %s in file '%s',"
+ "tablespace id=%lu, flags=%lu. "
+ "Please refer to "
+ REFMAN "innodb-troubleshooting-datadict.html "
+ "for how to resolve the issue.",
+ check_msg, filepath,
+ (ulong) expected_id, (ulong) expected_flags);
+ return;
+ }
+
ib_logf(IB_LOG_LEVEL_ERROR,
"In file '%s', tablespace id and flags are %lu and %lu, "
"but in the InnoDB data dictionary they are %lu and %lu. "
@@ -3493,6 +3607,7 @@ fil_report_bad_tablespace(
struct fsp_open_info {
ibool success; /*!< Has the tablespace been opened? */
+ const char* check_msg; /*!< fil_check_first_page() message */
ibool valid; /*!< Is the tablespace valid? */
os_file_t file; /*!< File handle */
char* filepath; /*!< File path to open */
@@ -3635,48 +3750,50 @@ fil_open_single_table_tablespace(
/* Read the first page of the datadir tablespace, if found. */
if (def.success) {
- fil_read_first_page(
+ def.check_msg = fil_read_first_page(
def.file, FALSE, &def.flags, &def.id,
#ifdef UNIV_LOG_ARCHIVE
&space_arch_log_no, &space_arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
&def.lsn, &def.lsn);
+ def.valid = !def.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
but do not compare the DATA_DIR flag, in case the
tablespace was relocated. */
- ulint mod_def_flags = def.flags & ~FSP_FLAGS_MASK_DATA_DIR;
- if (def.id == id && mod_def_flags == mod_flags) {
+ if (def.valid && def.id == id
+ && (def.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
valid_tablespaces_found++;
- def.valid = TRUE;
} else {
+ def.valid = false;
/* Do not use this tablespace. */
fil_report_bad_tablespace(
- def.filepath, def.id,
+ def.filepath, def.check_msg, def.id,
def.flags, id, flags);
}
}
/* Read the first page of the remote tablespace */
if (remote.success) {
- fil_read_first_page(
+ remote.check_msg = fil_read_first_page(
remote.file, FALSE, &remote.flags, &remote.id,
#ifdef UNIV_LOG_ARCHIVE
&remote.arch_log_no, &remote.arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
&remote.lsn, &remote.lsn);
+ remote.valid = !remote.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
but do not compare the DATA_DIR flag, in case the
tablespace was relocated. */
- ulint mod_remote_flags = remote.flags & ~FSP_FLAGS_MASK_DATA_DIR;
- if (remote.id == id && mod_remote_flags == mod_flags) {
+ if (remote.valid && remote.id == id
+ && (remote.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
valid_tablespaces_found++;
- remote.valid = TRUE;
} else {
+ remote.valid = false;
/* Do not use this linked tablespace. */
fil_report_bad_tablespace(
- remote.filepath, remote.id,
+ remote.filepath, remote.check_msg, remote.id,
remote.flags, id, flags);
link_file_is_bad = true;
}
@@ -3684,24 +3801,25 @@ fil_open_single_table_tablespace(
/* Read the first page of the datadir tablespace, if found. */
if (dict.success) {
- fil_read_first_page(
+ dict.check_msg = fil_read_first_page(
dict.file, FALSE, &dict.flags, &dict.id,
#ifdef UNIV_LOG_ARCHIVE
&dict.arch_log_no, &dict.arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
&dict.lsn, &dict.lsn);
+ dict.valid = !dict.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
but do not compare the DATA_DIR flag, in case the
tablespace was relocated. */
- ulint mod_dict_flags = dict.flags & ~FSP_FLAGS_MASK_DATA_DIR;
- if (dict.id == id && mod_dict_flags == mod_flags) {
+ if (dict.valid && dict.id == id
+ && (dict.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
valid_tablespaces_found++;
- dict.valid = TRUE;
} else {
+ dict.valid = false;
/* Do not use this tablespace. */
fil_report_bad_tablespace(
- dict.filepath, dict.id,
+ dict.filepath, dict.check_msg, dict.id,
dict.flags, id, flags);
}
}
@@ -3914,17 +4032,23 @@ fil_validate_single_table_tablespace(
const char* tablename, /*!< in: database/tablename */
fsp_open_info* fsp) /*!< in/out: tablespace info */
{
- fil_read_first_page(
- fsp->file, FALSE, &fsp->flags, &fsp->id,
+ if (const char* check_msg = fil_read_first_page(
+ fsp->file, FALSE, &fsp->flags, &fsp->id,
#ifdef UNIV_LOG_ARCHIVE
- &fsp->arch_log_no, &fsp->arch_log_no,
+ &fsp->arch_log_no, &fsp->arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &fsp->lsn, &fsp->lsn);
+ &fsp->lsn, &fsp->lsn)) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "%s in tablespace %s (table %s)",
+ check_msg, fsp->filepath, tablename);
+ fsp->success = FALSE;
+ return;
+ }
if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
- fprintf(stderr,
- " InnoDB: Error: Tablespace is not sensible;"
- " Table: %s Space ID: %lu Filepath: %s\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Tablespace is not sensible;"
+ " Table: %s Space ID: %lu Filepath: %s\n",
tablename, (ulong) fsp->id, fsp->filepath);
fsp->success = FALSE;
return;
@@ -4051,6 +4175,19 @@ fil_load_single_table_tablespace(
fprintf(stderr,
"InnoDB: Error: could not open single-table"
" tablespace file %s\n", def.filepath);
+
+ if (!strncmp(filename,
+ tmp_file_prefix, tmp_file_prefix_length)) {
+ /* Ignore errors for #sql tablespaces. */
+ mem_free(tablename);
+ if (remote.filepath) {
+ mem_free(remote.filepath);
+ }
+ if (def.filepath) {
+ mem_free(def.filepath);
+ }
+ return;
+ }
no_good_file:
fprintf(stderr,
"InnoDB: We do not continue the crash recovery,"
@@ -4075,10 +4212,12 @@ no_good_file:
" recovery here.\n");
will_not_choose:
mem_free(tablename);
- if (remote.success) {
+ if (remote.filepath) {
mem_free(remote.filepath);
}
- mem_free(def.filepath);
+ if (def.filepath) {
+ mem_free(def.filepath);
+ }
if (srv_force_recovery > 0) {
ib_logf(IB_LOG_LEVEL_INFO,
@@ -4089,9 +4228,6 @@ will_not_choose:
return;
}
- /* If debug code, cause a core dump and call stack. For
- release builds just exit and rely on the messages above. */
- ut_ad(0);
exit(1);
}
@@ -4167,7 +4303,7 @@ will_not_choose:
new_path = fil_make_ibbackup_old_name(fsp->filepath);
bool success = os_file_rename(
- innodb_file_data_key, fsp->filepath, new_path));
+ innodb_file_data_key, fsp->filepath, new_path);
ut_a(success);
@@ -4772,7 +4908,13 @@ retry:
goto retry;
}
- fil_node_prepare_for_io(node, fil_system, space);
+ if (!fil_node_prepare_for_io(node, fil_system, space)) {
+ /* The tablespace data file, such as .ibd file, is missing */
+ node->being_extended = false;
+ mutex_exit(&fil_system->mutex);
+
+ return(false);
+ }
/* At this point it is safe to release fil_system mutex. No
other thread can rename, delete or close the file because
@@ -5044,9 +5186,10 @@ NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
Prepares a file node for i/o. Opens the file if it is closed. Updates the
pending i/o's field in the node and the system appropriately. Takes the node
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex. */
+mutex.
+@return false if the file can't be opened, otherwise true */
static
-void
+bool
fil_node_prepare_for_io(
/*====================*/
fil_node_t* node, /*!< in: file node */
@@ -5068,7 +5211,10 @@ fil_node_prepare_for_io(
if (node->open == FALSE) {
/* File is closed: open it */
ut_a(node->n_pending == 0);
- fil_node_open_file(node, system, space);
+
+ if (!fil_node_open_file(node, system, space)) {
+ return(false);
+ }
}
if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
@@ -5080,6 +5226,8 @@ fil_node_prepare_for_io(
}
node->n_pending++;
+
+ return(true);
}
/********************************************************************//**
@@ -5177,7 +5325,7 @@ fil_io(
because i/os are not actually handled until
all have been posted: use with great
caution! */
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ bool sync, /*!< in: true if synchronous aio is desired */
ulint space_id, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes;
0 for uncompressed pages */
@@ -5296,7 +5444,7 @@ fil_io(
ut_error;
- } else if (fil_is_user_tablespace_id(space->id)
+ } else if (fil_is_user_tablespace_id(space->id)
&& node->size == 0) {
/* We do not know the size of a single-table tablespace
@@ -5312,7 +5460,28 @@ fil_io(
}
/* Open file if closed */
- fil_node_prepare_for_io(node, fil_system, space);
+ if (!fil_node_prepare_for_io(node, fil_system, space)) {
+ if (space->purpose == FIL_TABLESPACE
+ && fil_is_user_tablespace_id(space->id)) {
+ mutex_exit(&fil_system->mutex);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Trying to do i/o to a tablespace which "
+ "exists without .ibd data file. "
+ "i/o type %lu, space id %lu, page no %lu, "
+ "i/o length %lu bytes",
+ (ulong) type, (ulong) space_id,
+ (ulong) block_offset, (ulong) len);
+
+ return(DB_TABLESPACE_DELETED);
+ }
+
+ /* The tablespace is for log. Currently, we just assert here
+ to prevent handling errors along the way fil_io returns.
+ Also, if the log files are missing, it would be hard to
+ promise the server can continue running. */
+ ut_a(0);
+ }
/* Check that at least the start offset is within the bounds of a
single-table tablespace, including rollback tablespaces. */
@@ -5475,7 +5644,7 @@ fil_flush(
fil_space_t* space;
fil_node_t* node;
os_file_t file;
- ib_int64_t old_mod_counter;
+
mutex_enter(&fil_system->mutex);
@@ -5511,87 +5680,88 @@ fil_flush(
space->n_pending_flushes++; /*!< prevent dropping of the space while
we are flushing */
- node = UT_LIST_GET_FIRST(space->chain);
+ for (node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
- while (node) {
- if (node->modification_counter > node->flush_counter) {
- ut_a(node->open);
+ ib_int64_t old_mod_counter = node->modification_counter;;
+
+ if (old_mod_counter <= node->flush_counter) {
+ continue;
+ }
- /* We want to flush the changes at least up to
- old_mod_counter */
- old_mod_counter = node->modification_counter;
+ ut_a(node->open);
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes++;
- } else {
- fil_n_pending_log_flushes++;
- fil_n_log_flushes++;
- }
+ if (space->purpose == FIL_TABLESPACE) {
+ fil_n_pending_tablespace_flushes++;
+ } else {
+ fil_n_pending_log_flushes++;
+ fil_n_log_flushes++;
+ }
#ifdef __WIN__
- if (node->is_raw_disk) {
+ if (node->is_raw_disk) {
- goto skip_flush;
- }
+ goto skip_flush;
+ }
#endif /* __WIN__ */
retry:
- if (node->n_pending_flushes > 0) {
- /* We want to avoid calling os_file_flush() on
- the file twice at the same time, because we do
- not know what bugs OS's may contain in file
- i/o; sleep for a while */
+ if (node->n_pending_flushes > 0) {
+ /* We want to avoid calling os_file_flush() on
+ the file twice at the same time, because we do
+ not know what bugs OS's may contain in file
+ i/o */
- mutex_exit(&fil_system->mutex);
+ ib_int64_t sig_count =
+ os_event_reset(node->sync_event);
- os_thread_sleep(20000);
+ mutex_exit(&fil_system->mutex);
- mutex_enter(&fil_system->mutex);
+ os_event_wait_low(node->sync_event, sig_count);
- if (node->flush_counter >= old_mod_counter) {
+ mutex_enter(&fil_system->mutex);
- goto skip_flush;
- }
+ if (node->flush_counter >= old_mod_counter) {
- goto retry;
+ goto skip_flush;
}
- ut_a(node->open);
- file = node->handle;
- node->n_pending_flushes++;
+ goto retry;
+ }
- mutex_exit(&fil_system->mutex);
+ ut_a(node->open);
+ file = node->handle;
+ node->n_pending_flushes++;
- /* fprintf(stderr, "Flushing to file %s\n",
- node->name); */
+ mutex_exit(&fil_system->mutex);
- os_file_flush(file);
+ os_file_flush(file);
- mutex_enter(&fil_system->mutex);
+ mutex_enter(&fil_system->mutex);
- node->n_pending_flushes--;
-skip_flush:
- if (node->flush_counter < old_mod_counter) {
- node->flush_counter = old_mod_counter;
+ os_event_set(node->sync_event);
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
+ node->n_pending_flushes--;
+skip_flush:
+ if (node->flush_counter < old_mod_counter) {
+ node->flush_counter = old_mod_counter;
- space->is_in_unflushed_spaces = false;
+ if (space->is_in_unflushed_spaces
+ && fil_space_is_flushed(space)) {
- UT_LIST_REMOVE(
- unflushed_spaces,
- fil_system->unflushed_spaces,
- space);
- }
- }
+ space->is_in_unflushed_spaces = false;
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes--;
- } else {
- fil_n_pending_log_flushes--;
+ UT_LIST_REMOVE(
+ unflushed_spaces,
+ fil_system->unflushed_spaces,
+ space);
}
}
- node = UT_LIST_GET_NEXT(chain, node);
+ if (space->purpose == FIL_TABLESPACE) {
+ fil_n_pending_tablespace_flushes--;
+ } else {
+ fil_n_pending_log_flushes--;
+ }
}
space->n_pending_flushes--;
@@ -6131,11 +6301,11 @@ fil_delete_file(
ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
- os_file_delete_if_exists(ibd_name);
+ os_file_delete_if_exists(innodb_file_data_key, ibd_name);
char* cfg_name = fil_make_cfg_name(ibd_name);
- os_file_delete_if_exists(cfg_name);
+ os_file_delete_if_exists(innodb_file_data_key, cfg_name);
mem_free(cfg_name);
}
@@ -6198,15 +6368,17 @@ fil_mtr_rename_log(
ulint new_space_id, /*!< in: tablespace id of the new
table */
const char* new_name, /*!< in: new table name */
- const char* tmp_name) /*!< in: temp table name used while
+ const char* tmp_name, /*!< in: temp table name used while
swapping */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- mtr_t mtr;
- mtr_start(&mtr);
- fil_op_write_log(MLOG_FILE_RENAME, old_space_id,
- 0, 0, old_name, tmp_name, &mtr);
- fil_op_write_log(MLOG_FILE_RENAME, new_space_id,
- 0, 0, new_name, old_name, &mtr);
- mtr_commit(&mtr);
-}
+ if (old_space_id != TRX_SYS_SPACE) {
+ fil_op_write_log(MLOG_FILE_RENAME, old_space_id,
+ 0, 0, old_name, tmp_name, mtr);
+ }
+ if (new_space_id != TRX_SYS_SPACE) {
+ fil_op_write_log(MLOG_FILE_RENAME, new_space_id,
+ 0, 0, new_name, old_name, mtr);
+ }
+}
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index dc843a89fb9..d1bb22ed7a9 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1035,6 +1035,11 @@ fsp_try_extend_data_file(
success = fil_extend_space_to_desired_size(&actual_size, space,
size + size_increase);
+ if (!success) {
+
+ return(false);
+ }
+
/* We ignore any fragments of a full megabyte when storing the size
to the space header */
diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc
index 972f5acf461..3a03fc63303 100644
--- a/storage/innobase/fts/fts0ast.cc
+++ b/storage/innobase/fts/fts0ast.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,18 @@ Created 2007/3/16 Sunny Bains.
#include "mem0mem.h"
#include "fts0ast.h"
#include "fts0pars.h"
+#include "fts0fts.h"
+
+/* The FTS ast visit pass. */
+enum fts_ast_visit_pass_t {
+ FTS_PASS_FIRST, /*!< First visit pass,
+ process operators excluding
+ FTS_EXIST and FTS_IGNORE */
+ FTS_PASS_EXIST, /*!< Exist visit pass,
+ process operator FTS_EXIST */
+ FTS_PASS_IGNORE /*!< Ignore visit pass,
+ process operator FTS_IGNORE */
+};
/******************************************************************//**
Create an empty fts_ast_node_t.
@@ -66,7 +78,7 @@ fts_ast_create_node_oper(
/******************************************************************//**
This function takes ownership of the ptr and is responsible
for free'ing it
-@return new node */
+@return new node or a node list with tokenized words */
UNIV_INTERN
fts_ast_node_t*
fts_ast_create_node_term(
@@ -74,17 +86,68 @@ fts_ast_create_node_term(
void* arg, /*!< in: ast state instance */
const char* ptr) /*!< in: ast term string */
{
- ulint len = strlen(ptr);
- fts_ast_node_t* node = fts_ast_node_create();
+ fts_ast_state_t* state = static_cast<fts_ast_state_t*>(arg);
+ ulint len = strlen(ptr);
+ ulint cur_pos = 0;
+ fts_ast_node_t* node = NULL;
+ fts_ast_node_t* node_list = NULL;
+ fts_ast_node_t* first_node = NULL;
+
+ /* Scan the incoming string and filter out any "non-word" characters */
+ while (cur_pos < len) {
+ fts_string_t str;
+ ulint offset;
+ ulint cur_len;
+
+ cur_len = innobase_mysql_fts_get_token(
+ state->charset,
+ reinterpret_cast<const byte*>(ptr) + cur_pos,
+ reinterpret_cast<const byte*>(ptr) + len, &str, &offset);
+
+ if (cur_len == 0) {
+ break;
+ }
- node->type = FTS_AST_TERM;
+ cur_pos += cur_len;
- node->term.ptr = static_cast<byte*>(ut_malloc(len + 1));
- memcpy(node->term.ptr, ptr, len + 1);
+ if (str.f_n_char > 0) {
+ /* If the subsequent term (after the first one)'s size
+ is less than fts_min_token_size, we shall ignore
+ that. This is to make consistent with MyISAM behavior */
+ if (first_node && (str.f_n_char < fts_min_token_size)) {
+ continue;
+ }
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
+ node = fts_ast_node_create();
- return(node);
+ node->type = FTS_AST_TERM;
+
+ node->term.ptr = static_cast<byte*>(ut_malloc(
+ str.f_len + 1));
+ memcpy(node->term.ptr, str.f_str, str.f_len);
+ node->term.ptr[str.f_len] = '\0';
+
+ fts_ast_state_add_node(
+ static_cast<fts_ast_state_t*>(arg), node);
+
+ if (first_node) {
+ /* There is more than one word, create
+ a list to organize them */
+ if (!node_list) {
+ node_list = fts_ast_create_node_list(
+ static_cast<fts_ast_state_t*>(
+ arg),
+ first_node);
+ }
+
+ fts_ast_add_node(node_list, node);
+ } else {
+ first_node = node;
+ }
+ }
+ }
+
+ return((node_list != NULL) ? node_list : first_node);
}
/******************************************************************//**
@@ -101,11 +164,19 @@ fts_ast_create_node_text(
ulint len = strlen(ptr);
fts_ast_node_t* node = NULL;
- ut_ad(len >= 2);
- if (len == 2) {
+ ut_ad(len >= 1);
+
+ if (len <= 2) {
+ /* There is a way to directly supply null terminator
+ in the query string (by using 0x220022) and get here,
+ and certainly it would not make a valid query text */
ut_ad(ptr[0] == '\"');
- ut_ad(ptr[1] == '\"');
+
+ if (len == 2) {
+ ut_ad(ptr[1] == '\"');
+ }
+
return(NULL);
}
@@ -297,6 +368,16 @@ fts_ast_term_set_wildcard(
fts_ast_node_t* node) /*!< in/out: set attribute of
a term node */
{
+ if (!node) {
+ return;
+ }
+
+ /* If it's a node list, the wildcard should be set to the tail node*/
+ if (node->type == FTS_AST_LIST) {
+ ut_ad(node->list.tail != NULL);
+ node = node->list.tail;
+ }
+
ut_a(node->type == FTS_AST_TERM);
ut_a(!node->term.wildcard);
@@ -393,9 +474,9 @@ fts_ast_node_print(
}
/******************************************************************//**
-Traverse the AST - in-order traversal, except for the FTS_IGNORE
-nodes, which will be ignored in the first pass of each level, and
-visited in a second pass after all other nodes in the same level are visited.
+Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE
+nodes, which will be ignored in the first pass of each level, and visited in a
+second and third pass after all other nodes in the same level are visited.
@return DB_SUCCESS if all went well */
UNIV_INTERN
dberr_t
@@ -407,85 +488,142 @@ fts_ast_visit(
void* arg, /*!< in: arg for callback */
bool* has_ignore) /*!< out: true, if the operator
was ignored during processing,
- currently we only ignore
- FTS_IGNORE operator */
+ currently we ignore FTS_EXIST
+ and FTS_IGNORE operators */
{
dberr_t error = DB_SUCCESS;
fts_ast_node_t* oper_node = NULL;
fts_ast_node_t* start_node;
bool revisit = false;
bool will_be_ignored = false;
+ fts_ast_visit_pass_t visit_pass = FTS_PASS_FIRST;
start_node = node->list.head;
ut_a(node->type == FTS_AST_LIST
|| node->type == FTS_AST_SUBEXP_LIST);
+ if (oper == FTS_EXIST_SKIP) {
+ visit_pass = FTS_PASS_EXIST;
+ } else if (oper == FTS_IGNORE_SKIP) {
+ visit_pass = FTS_PASS_IGNORE;
+ }
+
/* In the first pass of the tree, at the leaf level of the
- tree, FTS_IGNORE operation will be ignored. It will be
- repeated at the level above the leaf level */
+ tree, FTS_EXIST and FTS_IGNORE operation will be ignored.
+ It will be repeated at the level above the leaf level.
+
+ The basic idea here is that when we encounter FTS_EXIST or
+ FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP
+ or FTS_IGNORE_SKIP, and term node & text node with the operators
+ is ignored in the first pass. We have two passes during the revisit:
+ We process nodes with FTS_EXIST_SKIP in the exist pass, and then
+ process nodes with FTS_IGNORE_SKIP in the ignore pass.
+
+ The order should be restrictly followed, or we will get wrong results.
+ For example, we have a query 'a +b -c d +e -f'.
+ first pass: process 'a' and 'd' by union;
+ exist pass: process '+b' and '+e' by intersection;
+ ignore pass: process '-c' and '-f' by difference. */
+
for (node = node->list.head;
node && (error == DB_SUCCESS);
node = node->next) {
- if (node->type == FTS_AST_LIST) {
+ switch(node->type) {
+ case FTS_AST_LIST:
+ if (visit_pass != FTS_PASS_FIRST) {
+ break;
+ }
+
error = fts_ast_visit(oper, node, visitor,
arg, &will_be_ignored);
/* If will_be_ignored is set to true, then
- we encountered and ignored a FTS_IGNORE operator,
- and a second pass is needed to process FTS_IGNORE
- operator */
+ we encountered and ignored a FTS_EXIST or FTS_IGNORE
+ operator. */
if (will_be_ignored) {
revisit = true;
+ /* Remember oper for list in case '-abc&def',
+ ignored oper is from previous node of list.*/
+ node->oper = oper;
+ }
+
+ break;
+
+ case FTS_AST_SUBEXP_LIST:
+ if (visit_pass != FTS_PASS_FIRST) {
+ break;
}
- } else if (node->type == FTS_AST_SUBEXP_LIST) {
+
error = fts_ast_visit_sub_exp(node, visitor, arg);
- } else if (node->type == FTS_AST_OPER) {
+ break;
+
+ case FTS_AST_OPER:
oper = node->oper;
oper_node = node;
- } else {
+
+ /* Change the operator for revisit */
+ if (oper == FTS_EXIST) {
+ oper_node->oper = FTS_EXIST_SKIP;
+ } else if (oper == FTS_IGNORE) {
+ oper_node->oper = FTS_IGNORE_SKIP;
+ }
+
+ break;
+
+ default:
if (node->visited) {
continue;
}
ut_a(oper == FTS_NONE || !oper_node
- || oper_node->oper == oper);
+ || oper_node->oper == oper
+ || oper_node->oper == FTS_EXIST_SKIP
+ || oper_node->oper == FTS_IGNORE_SKIP);
- if (oper == FTS_IGNORE) {
+ if (oper== FTS_EXIST || oper == FTS_IGNORE) {
*has_ignore = true;
- /* Change the operator to FTS_IGNORE_SKIP,
- so that it is processed in the second pass */
- oper_node->oper = FTS_IGNORE_SKIP;
continue;
}
- if (oper == FTS_IGNORE_SKIP) {
- /* This must be the second pass, now we process
- the FTS_IGNORE operator */
- visitor(FTS_IGNORE, node, arg);
- } else {
- visitor(oper, node, arg);
+ /* Process leaf node accroding to its pass.*/
+ if (oper == FTS_EXIST_SKIP
+ && visit_pass == FTS_PASS_EXIST) {
+ error = visitor(FTS_EXIST, node, arg);
+ node->visited = true;
+ } else if (oper == FTS_IGNORE_SKIP
+ && visit_pass == FTS_PASS_IGNORE) {
+ error = visitor(FTS_IGNORE, node, arg);
+ node->visited = true;
+ } else if (visit_pass == FTS_PASS_FIRST) {
+ error = visitor(oper, node, arg);
+ node->visited = true;
}
-
- node->visited = true;
}
}
- /* Second pass to process the skipped FTS_IGNORE operation.
- It is only performed at the level above leaf level */
if (revisit) {
+ /* Exist pass processes the skipped FTS_EXIST operation. */
+ for (node = start_node;
+ node && error == DB_SUCCESS;
+ node = node->next) {
+
+ if (node->type == FTS_AST_LIST
+ && node->oper != FTS_IGNORE) {
+ error = fts_ast_visit(FTS_EXIST_SKIP, node,
+ visitor, arg, &will_be_ignored);
+ }
+ }
+
+ /* Ignore pass processes the skipped FTS_IGNORE operation. */
for (node = start_node;
node && error == DB_SUCCESS;
node = node->next) {
if (node->type == FTS_AST_LIST) {
- /* In this pass, it will process all those
- operators ignored in the first pass, and those
- whose operators are set to FTS_IGNORE_SKIP */
- error = fts_ast_visit(
- oper, node, visitor, arg,
- &will_be_ignored);
+ error = fts_ast_visit(FTS_IGNORE_SKIP, node,
+ visitor, arg, &will_be_ignored);
}
}
}
diff --git a/storage/innobase/fts/fts0blex.cc b/storage/innobase/fts/fts0blex.cc
index 1abd737ec06..dccedac0212 100644
--- a/storage/innobase/fts/fts0blex.cc
+++ b/storage/innobase/fts/fts0blex.cc
@@ -52,7 +52,7 @@ typedef uint32_t flex_uint32_t;
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
+typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
@@ -185,7 +185,7 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE;
#define EOB_ACT_LAST_MATCH 2
#define YY_LESS_LINENO(n)
-
+
/* Return all but the first "n" matched characters back to the input stream. */
#define yyless(n) \
do \
@@ -247,7 +247,7 @@ struct yy_buffer_state
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
@@ -305,9 +305,9 @@ YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner
YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
-void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
-void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
-void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
#define yy_new_buffer fts0b_create_buffer
@@ -347,7 +347,7 @@ typedef int yy_state_type;
static yy_state_type yy_get_previous_state (yyscan_t yyscanner );
static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner);
static int yy_get_next_buffer (yyscan_t yyscanner );
-static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
/* Done after the current pattern has been matched and before the
* corresponding action - sets up yytext.
@@ -579,11 +579,11 @@ extern int fts0bwrap (yyscan_t yyscanner );
#endif
#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
+static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
#endif
#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
+static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
#endif
#ifndef YY_NO_INPUT
@@ -1609,7 +1609,7 @@ YY_BUFFER_STATE fts0b_scan_bytes (yyconst char * yybytes, int _yybytes_len , y
#define YY_EXIT_FAILURE 2
#endif
-static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
+static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
(void) fprintf( stderr, "%s\n", msg );
exit( YY_EXIT_FAILURE );
@@ -1910,7 +1910,7 @@ int fts0blex_destroy (yyscan_t yyscanner)
*/
#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
register int i;
for ( i = 0; i < n; ++i )
@@ -1919,7 +1919,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t y
#endif
#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
+static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
register int n;
for ( n = 0; s[n]; ++n )
@@ -1929,12 +1929,12 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute
}
#endif
-void *fts0balloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
+void *fts0balloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
return (void *) malloc( size );
}
-void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
+void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
/* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
@@ -1946,7 +1946,7 @@ void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attr
return (void *) realloc( (char *) ptr, size );
}
-void fts0bfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
+void fts0bfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
free( (char *) ptr ); /* see fts0brealloc() for (char *) cast */
}
diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc
index 9cac680101c..29d6771f9e7 100644
--- a/storage/innobase/fts/fts0config.cc
+++ b/storage/innobase/fts/fts0config.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -221,7 +221,7 @@ fts_config_set_value(
graph = fts_parse_sql(
fts_table, info,
- "BEGIN UPDATE %s SET value = :value WHERE key = :name;");
+ "BEGIN UPDATE \"%s\" SET value = :value WHERE key = :name;");
trx->op_info = "setting FTS config value";
@@ -246,7 +246,7 @@ fts_config_set_value(
graph = fts_parse_sql(
fts_table, info,
"BEGIN\n"
- "INSERT INTO %s VALUES(:name, :value);");
+ "INSERT INTO \"%s\" VALUES(:name, :value);");
trx->op_info = "inserting FTS config value";
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index a81d3043e9c..9b06497c8ae 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -48,6 +48,17 @@ Full Text Search interface
a configurable variable */
UNIV_INTERN ulong fts_max_cache_size;
+/** Whether the total memory used for FTS cache is exhausted, and we will
+need a sync to free some memory */
+UNIV_INTERN bool fts_need_sync = false;
+
+/** Variable specifying the total memory allocated for FTS cache */
+UNIV_INTERN ulong fts_max_total_cache_size;
+
+/** This is FTS result cache limit for each query and would be
+a configurable variable */
+UNIV_INTERN ulong fts_result_cache_limit;
+
/** Variable specifying the maximum FTS max token size */
UNIV_INTERN ulong fts_max_token_size;
@@ -146,74 +157,65 @@ struct fts_aux_table_t {
static const char* fts_create_common_tables_sql = {
"BEGIN\n"
""
- "CREATE TABLE %s_ADDED (\n"
+ "CREATE TABLE \"%s_DELETED\" (\n"
" doc_id BIGINT UNSIGNED\n"
") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON %s_ADDED(doc_id);\n"
+ "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DELETED\"(doc_id);\n"
""
- "CREATE TABLE %s_DELETED (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON %s_DELETED(doc_id);\n"
- ""
- "CREATE TABLE %s_DELETED_CACHE (\n"
+ "CREATE TABLE \"%s_DELETED_CACHE\" (\n"
" doc_id BIGINT UNSIGNED\n"
") COMPACT;\n"
"CREATE UNIQUE CLUSTERED INDEX IND "
- "ON %s_DELETED_CACHE(doc_id);\n"
+ "ON \"%s_DELETED_CACHE\"(doc_id);\n"
""
- "CREATE TABLE %s_BEING_DELETED (\n"
+ "CREATE TABLE \"%s_BEING_DELETED\" (\n"
" doc_id BIGINT UNSIGNED\n"
") COMPACT;\n"
"CREATE UNIQUE CLUSTERED INDEX IND "
- "ON %s_BEING_DELETED(doc_id);\n"
+ "ON \"%s_BEING_DELETED\"(doc_id);\n"
""
- "CREATE TABLE %s_BEING_DELETED_CACHE (\n"
+ "CREATE TABLE \"%s_BEING_DELETED_CACHE\" (\n"
" doc_id BIGINT UNSIGNED\n"
") COMPACT;\n"
"CREATE UNIQUE CLUSTERED INDEX IND "
- "ON %s_BEING_DELETED_CACHE(doc_id);\n"
+ "ON \"%s_BEING_DELETED_CACHE\"(doc_id);\n"
""
- "CREATE TABLE %s_CONFIG (\n"
+ "CREATE TABLE \"%s_CONFIG\" (\n"
" key CHAR(50),\n"
" value CHAR(50) NOT NULL\n"
") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON %s_CONFIG(key);\n"
- ""
- "CREATE TABLE %s_STOPWORDS (\n"
- " word CHAR\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON %s_STOPWORDS(word);\n",
+ "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_CONFIG\"(key);\n"
};
-/** Template for creating the FTS auxiliary index specific tables. */
+#ifdef FTS_DOC_STATS_DEBUG
+/** Template for creating the FTS auxiliary index specific tables. This is
+mainly designed for the statistics work in the future */
static const char* fts_create_index_tables_sql = {
"BEGIN\n"
""
- "CREATE TABLE %s_DOC_ID (\n"
+ "CREATE TABLE \"%s_DOC_ID\" (\n"
" doc_id BIGINT UNSIGNED,\n"
" word_count INTEGER UNSIGNED NOT NULL\n"
") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON %s_DOC_ID(doc_id);\n"
+ "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DOC_ID\"(doc_id);\n"
};
+#endif
/** Template for creating the ancillary FTS tables word index tables. */
static const char* fts_create_index_sql = {
"BEGIN\n"
""
"CREATE UNIQUE CLUSTERED INDEX FTS_INDEX_TABLE_IND "
- "ON %s(word, first_doc_id);\n"
+ "ON \"%s\"(word, first_doc_id);\n"
};
/** FTS auxiliary table suffixes that are common to all FT indexes. */
static const char* fts_common_tables[] = {
- "ADDED",
"BEING_DELETED",
"BEING_DELETED_CACHE",
"CONFIG",
"DELETED",
"DELETED_CACHE",
- "STOPWORDS",
NULL
};
@@ -232,19 +234,19 @@ const fts_index_selector_t fts_index_selector[] = {
static const char* fts_config_table_insert_values_sql =
"BEGIN\n"
"\n"
- "INSERT INTO %s VALUES('"
+ "INSERT INTO \"%s\" VALUES('"
FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
""
- "INSERT INTO %s VALUES('"
+ "INSERT INTO \"%s\" VALUES('"
FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
""
- "INSERT INTO %s VALUES ('"
+ "INSERT INTO \"%s\" VALUES ('"
FTS_SYNCED_DOC_ID "', '0');\n"
""
- "INSERT INTO %s VALUES ('"
+ "INSERT INTO \"%s\" VALUES ('"
FTS_TOTAL_DELETED_COUNT "', '0');\n"
"" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
- "INSERT INTO %s VALUES ('"
+ "INSERT INTO \"%s\" VALUES ('"
FTS_TABLE_STATE "', '0');\n";
/****************************************************************//**
@@ -355,6 +357,13 @@ fts_load_default_stopword(
allocator = stopword_info->heap;
heap = static_cast<mem_heap_t*>(allocator->arg);
+
+ if (!stopword_info->cached_stopword) {
+ /* For default stopword, we always use fts_utf8_string_cmp() */
+ stopword_info->cached_stopword = rbt_create(
+ sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
+ }
+
stop_words = stopword_info->cached_stopword;
str.f_n_char = 0;
@@ -468,9 +477,17 @@ fts_load_user_stopword(
/* Validate the user table existence and in the right
format */
- if (!fts_valid_stopword_table(stopword_table_name)) {
+ stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
+ if (!stopword_info->charset) {
ret = FALSE;
goto cleanup;
+ } else if (!stopword_info->cached_stopword) {
+ /* Create the stopword RB tree with the stopword column
+ charset. All comparison will use this charset */
+ stopword_info->cached_stopword = rbt_create_arg_cmp(
+ sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+ (void*)stopword_info->charset);
+
}
info = pars_info_create();
@@ -638,6 +655,8 @@ fts_cache_create(
cache->sync_heap = ib_heap_allocator_create(heap);
cache->sync_heap->arg = NULL;
+ fts_need_sync = false;
+
cache->sync = static_cast<fts_sync_t*>(
mem_heap_zalloc(heap, sizeof(fts_sync_t)));
@@ -649,10 +668,8 @@ fts_cache_create(
fts_cache_init(cache);
- /* Create stopword RB tree. The stopword tree will
- remain in cache for the duration of FTS cache's lifetime */
- cache->stopword_info.cached_stopword = rbt_create(
- sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
+ cache->stopword_info.cached_stopword = NULL;
+ cache->stopword_info.charset = NULL;
cache->stopword_info.heap = cache->self_heap;
@@ -922,6 +939,8 @@ fts_que_graph_free_check_lock(
mutex_enter(&dict_sys->mutex);
}
+ ut_ad(mutex_own(&dict_sys->mutex));
+
que_graph_free(graph);
if (!has_dict) {
@@ -1199,7 +1218,10 @@ fts_cache_destroy(
mutex_free(&cache->optimize_lock);
mutex_free(&cache->deleted_lock);
mutex_free(&cache->doc_id_lock);
- rbt_free(cache->stopword_info.cached_stopword);
+
+ if (cache->stopword_info.cached_stopword) {
+ rbt_free(cache->stopword_info.cached_stopword);
+ }
if (cache->sync_heap->arg) {
mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
@@ -1501,6 +1523,112 @@ fts_drop_table(
}
/****************************************************************//**
+Rename a single auxiliary table due to database name change.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+fts_rename_one_aux_table(
+/*=====================*/
+ const char* new_name, /*!< in: new parent tbl name */
+ const char* fts_table_old_name, /*!< in: old aux tbl name */
+ trx_t* trx) /*!< in: transaction */
+{
+ char fts_table_new_name[MAX_TABLE_NAME_LEN];
+ ulint new_db_name_len = dict_get_db_name_len(new_name);
+ ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
+ ulint table_new_name_len = strlen(fts_table_old_name)
+ + new_db_name_len - old_db_name_len;
+
+ /* Check if the new and old database names are the same, if so,
+ nothing to do */
+ ut_ad((new_db_name_len != old_db_name_len)
+ || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
+
+ /* Get the database name from "new_name", and table name
+ from the fts_table_old_name */
+ strncpy(fts_table_new_name, new_name, new_db_name_len);
+ strncpy(fts_table_new_name + new_db_name_len,
+ strchr(fts_table_old_name, '/'),
+ table_new_name_len - new_db_name_len);
+ fts_table_new_name[table_new_name_len] = 0;
+
+ return(row_rename_table_for_mysql(
+ fts_table_old_name, fts_table_new_name, trx, false));
+}
+
+/****************************************************************//**
+Rename auxiliary tables for all fts index for a table. This(rename)
+is due to database name change
+@return DB_SUCCESS or error code */
+
+dberr_t
+fts_rename_aux_tables(
+/*==================*/
+ dict_table_t* table, /*!< in: user Table */
+ const char* new_name, /*!< in: new table name */
+ trx_t* trx) /*!< in: transaction */
+{
+ ulint i;
+ fts_table_t fts_table;
+
+ FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
+
+ /* Rename common auxiliary tables */
+ for (i = 0; fts_common_tables[i] != NULL; ++i) {
+ char* old_table_name;
+ dberr_t err = DB_SUCCESS;
+
+ fts_table.suffix = fts_common_tables[i];
+
+ old_table_name = fts_get_table_name(&fts_table);
+
+ err = fts_rename_one_aux_table(new_name, old_table_name, trx);
+
+ mem_free(old_table_name);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ fts_t* fts = table->fts;
+
+ /* Rename index specific auxiliary tables */
+ for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
+ ++i) {
+ dict_index_t* index;
+
+ index = static_cast<dict_index_t*>(
+ ib_vector_getp(fts->indexes, i));
+
+ FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
+
+ for (ulint j = 0; fts_index_selector[j].value; ++j) {
+ dberr_t err;
+ char* old_table_name;
+
+ fts_table.suffix = fts_get_suffix(j);
+
+ old_table_name = fts_get_table_name(&fts_table);
+
+ err = fts_rename_one_aux_table(
+ new_name, old_table_name, trx);
+
+ DBUG_EXECUTE_IF("fts_rename_failure",
+ err = DB_DEADLOCK;);
+
+ mem_free(old_table_name);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/****************************************************************//**
Drops the common ancillary tables needed for supporting an FTS index
on the given table. row_mysql_lock_data_dictionary must have been called
before this.
@@ -1586,13 +1714,15 @@ fts_drop_index_tables(
trx_t* trx, /*!< in: transaction */
dict_index_t* index) /*!< in: Index to drop */
{
- fts_table_t fts_table;
dberr_t error = DB_SUCCESS;
+#ifdef FTS_DOC_STATS_DEBUG
+ fts_table_t fts_table;
static const char* index_tables[] = {
"DOC_ID",
NULL
};
+#endif /* FTS_DOC_STATS_DEBUG */
dberr_t err = fts_drop_index_split_tables(trx, index);
@@ -1601,6 +1731,7 @@ fts_drop_index_tables(
error = err;
}
+#ifdef FTS_DOC_STATS_DEBUG
FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
for (ulint i = 0; index_tables[i] != NULL; ++i) {
@@ -1619,6 +1750,7 @@ fts_drop_index_tables(
mem_free(table_name);
}
+#endif /* FTS_DOC_STATS_DEBUG */
return(error);
}
@@ -1884,7 +2016,6 @@ fts_create_index_tables_low(
{
ulint i;
- char* sql;
que_t* graph;
fts_table_t fts_table;
dberr_t error = DB_SUCCESS;
@@ -1896,6 +2027,9 @@ fts_create_index_tables_low(
fts_table.parent = table_name;
fts_table.table = NULL;
+#ifdef FTS_DOC_STATS_DEBUG
+ char* sql;
+
/* Create the FTS auxiliary tables that are specific
to an FTS index. */
sql = fts_prepare_sql(&fts_table, fts_create_index_tables_sql);
@@ -1905,6 +2039,7 @@ fts_create_index_tables_low(
error = fts_eval_sql(trx, graph);
que_graph_free(graph);
+#endif /* FTS_DOC_STATS_DEBUG */
for (i = 0; fts_index_selector[i].value && error == DB_SUCCESS; ++i) {
dict_table_t* new_table;
@@ -2501,12 +2636,14 @@ fts_get_next_doc_id(
/* Otherwise, simply increment the value in cache */
mutex_enter(&cache->doc_id_lock);
- ++cache->next_doc_id;
+ *doc_id = ++cache->next_doc_id;
+ mutex_exit(&cache->doc_id_lock);
+ } else {
+ mutex_enter(&cache->doc_id_lock);
+ *doc_id = cache->next_doc_id;
mutex_exit(&cache->doc_id_lock);
}
- *doc_id = cache->next_doc_id;
-
return(DB_SUCCESS);
}
@@ -2555,7 +2692,7 @@ retry:
graph = fts_parse_sql(
&fts_table, info,
"DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS SELECT value FROM %s"
+ "DECLARE CURSOR c IS SELECT value FROM \"%s\""
" WHERE key = 'synced_doc_id' FOR UPDATE;\n"
"BEGIN\n"
""
@@ -2841,7 +2978,7 @@ fts_delete(
graph = fts_parse_sql(
&fts_table,
info,
- "BEGIN INSERT INTO %s VALUES (:doc_id);");
+ "BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
error = fts_eval_sql(trx, graph);
@@ -3404,7 +3541,13 @@ fts_add_doc_by_id(
rw_lock_x_unlock(&table->fts->cache->lock);
- if (cache->total_size > fts_max_cache_size) {
+ DBUG_EXECUTE_IF(
+ "fts_instrument_sync",
+ fts_sync(cache->sync);
+ );
+
+ if (cache->total_size > fts_max_cache_size
+ || fts_need_sync) {
fts_sync(cache->sync);
}
@@ -3492,7 +3635,7 @@ fts_get_max_doc_id(
btr_pcur_open_at_index_side(
false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
- if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
+ if (!page_is_empty(btr_pcur_get_page(&pcur))) {
const rec_t* rec = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
@@ -3711,7 +3854,7 @@ fts_write_node(
fts_table,
info,
"BEGIN\n"
- "INSERT INTO %s VALUES "
+ "INSERT INTO \"%s\" VALUES "
"(:token, :first_doc_id,"
" :last_doc_id, :doc_count, :ilist);");
}
@@ -3756,7 +3899,7 @@ fts_sync_add_deleted_cache(
graph = fts_parse_sql(
&fts_table,
info,
- "BEGIN INSERT INTO %s VALUES (:doc_id);");
+ "BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
fts_update_t* update;
@@ -3937,7 +4080,7 @@ fts_sync_write_doc_stat(
*graph = fts_parse_sql(
&fts_table,
info,
- "BEGIN INSERT INTO %s VALUES (:doc_id, :count);");
+ "BEGIN INSERT INTO \"%s\" VALUES (:doc_id, :count);");
}
for (;;) {
@@ -4303,6 +4446,10 @@ fts_sync(
}
}
+ DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
+ sync->interrupted = true;
+ );
+
if (error == DB_SUCCESS && !sync->interrupted) {
error = fts_sync_commit(sync);
} else {
@@ -4553,7 +4700,7 @@ fts_get_docs_clear(
}
/*********************************************************************//**
-Get the initial Doc ID by consulting the ADDED and the CONFIG table
+Get the initial Doc ID by consulting the CONFIG table
@return initial Doc ID */
UNIV_INTERN
doc_id_t
@@ -4656,7 +4803,7 @@ fts_get_rows_count(
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
" SELECT COUNT(*) "
- " FROM %s;\n"
+ " FROM \"%s\";\n"
"BEGIN\n"
"\n"
"OPEN c;\n"
@@ -4892,20 +5039,20 @@ fts_get_doc_id_from_rec(
ulint len;
const byte* data;
ulint col_no;
- ulint* offsets;
doc_id_t doc_id = 0;
dict_index_t* clust_index;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ mem_heap_t* my_heap = heap;
ut_a(table->fts->doc_col != ULINT_UNDEFINED);
- offsets = offsets_;
clust_index = dict_table_get_first_index(table);
- offsets_[0] = UT_ARR_SIZE(offsets_);
+ rec_offs_init(offsets_);
offsets = rec_get_offsets(
- rec, clust_index, offsets, ULINT_UNDEFINED, &heap);
+ rec, clust_index, offsets, ULINT_UNDEFINED, &my_heap);
col_no = dict_col_get_clust_pos(
&table->cols[table->fts->doc_col], clust_index);
@@ -4917,6 +5064,10 @@ fts_get_doc_id_from_rec(
ut_ad(8 == sizeof(doc_id));
doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
+ if (my_heap && !heap) {
+ mem_heap_free(my_heap);
+ }
+
return(doc_id);
}
@@ -5794,7 +5945,7 @@ fts_check_and_drop_orphaned_tables(
ib_vector_get(tables, i));
table = dict_table_open_on_id(
- aux_table->parent_id, TRUE, FALSE);
+ aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
if (table == NULL || table->fts == NULL) {
@@ -5844,7 +5995,8 @@ fts_check_and_drop_orphaned_tables(
path = fil_make_ibd_name(
aux_table->name, false);
- os_file_delete_if_exists(path);
+ os_file_delete_if_exists(innodb_file_data_key,
+ path);
mem_free(path);
}
@@ -5995,18 +6147,19 @@ fts_drop_orphaned_tables(void)
/**********************************************************************//**
Check whether user supplied stopword table is of the right format.
Caller is responsible to hold dictionary locks.
-@return TRUE if the table qualifies */
+@return the stopword column charset if qualifies */
UNIV_INTERN
-ibool
+CHARSET_INFO*
fts_valid_stopword_table(
/*=====================*/
const char* stopword_table_name) /*!< in: Stopword table
name */
{
dict_table_t* table;
+ dict_col_t* col = NULL;
if (!stopword_table_name) {
- return(FALSE);
+ return(NULL);
}
table = dict_table_get_low(stopword_table_name);
@@ -6016,9 +6169,8 @@ fts_valid_stopword_table(
"InnoDB: user stopword table %s does not exist.\n",
stopword_table_name);
- return(FALSE);
+ return(NULL);
} else {
- dict_col_t* col;
const char* col_name;
col_name = dict_table_get_col_name(table, 0);
@@ -6029,22 +6181,27 @@ fts_valid_stopword_table(
"table %s. Its first column must be named as "
"'value'.\n", stopword_table_name);
- return(FALSE);
+ return(NULL);
}
col = dict_table_get_nth_col(table, 0);
- if (col->mtype != DATA_VARCHAR) {
+ if (col->mtype != DATA_VARCHAR
+ && col->mtype != DATA_VARMYSQL) {
fprintf(stderr,
"InnoDB: invalid column type for stopword "
"table %s. Its first column must be of "
"varchar type\n", stopword_table_name);
- return(FALSE);
+ return(NULL);
}
}
- return(TRUE);
+ ut_ad(col);
+
+ return(innobase_get_fts_charset(
+ static_cast<int>(col->prtype & DATA_MYSQL_TYPE_MASK),
+ static_cast<ulint>(dtype_get_charset_coll(col->prtype))));
}
/**********************************************************************//**
@@ -6109,7 +6266,7 @@ fts_load_stopword(
}
/* If stopword is turned off, no need to continue to load the
- stopword into cache */
+ stopword into cache, but still need to do initialization */
if (!use_stopword) {
cache->stopword_info.status = STOPWORD_OFF;
goto cleanup;
@@ -6166,6 +6323,11 @@ cleanup:
trx_free_for_background(trx);
}
+ if (!cache->stopword_info.cached_stopword) {
+ cache->stopword_info.cached_stopword = rbt_create(
+ sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
+ }
+
return(error == DB_SUCCESS);
}
@@ -6329,7 +6491,6 @@ fts_init_index(
dict_index_t* index;
doc_id_t start_doc;
fts_get_doc_t* get_doc = NULL;
- ibool has_fts = TRUE;
fts_cache_t* cache = table->fts->cache;
bool need_init = false;
@@ -6367,11 +6528,15 @@ fts_init_index(
ut_a(index);
- has_fts = FALSE;
fts_doc_fetch_by_doc_id(NULL, start_doc, index,
FTS_FETCH_DOC_BY_ID_LARGE,
fts_init_get_doc_id, cache);
} else {
+ if (table->fts->cache->stopword_info.status
+ & STOPWORD_NOT_INIT) {
+ fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
+ }
+
for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
get_doc = static_cast<fts_get_doc_t*>(
ib_vector_get(cache->get_docs, i));
@@ -6384,13 +6549,6 @@ fts_init_index(
}
}
- if (has_fts) {
- if (table->fts->cache->stopword_info.status
- & STOPWORD_NOT_INIT) {
- fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
- }
- }
-
table->fts->fts_status |= ADDED_TABLE_SYNCED;
fts_get_docs_clear(cache->get_docs);
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index 9abeeccac91..0fd577c5767 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -54,6 +54,9 @@ static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300;
/** Server is shutting down, so does we exiting the optimize thread */
static bool fts_opt_start_shutdown = false;
+/** Last time we did check whether system need a sync */
+static ib_time_t last_check_sync_time;
+
#if 0
/** Check each table in round robin to see whether they'd
need to be "optimized" */
@@ -242,22 +245,22 @@ static const char* fts_init_delete_sql =
"BEGIN\n"
"\n"
"INSERT INTO %s_BEING_DELETED\n"
- "SELECT doc_id FROM %s_DELETED;\n"
+ "SELECT doc_id FROM \"%s_DELETED\";\n"
"\n"
"INSERT INTO %s_BEING_DELETED_CACHE\n"
- "SELECT doc_id FROM %s_DELETED_CACHE;\n";
+ "SELECT doc_id FROM \"%s_DELETED_CACHE\";\n";
static const char* fts_delete_doc_ids_sql =
"BEGIN\n"
"\n"
- "DELETE FROM %s_DELETED WHERE doc_id = :doc_id1;\n"
- "DELETE FROM %s_DELETED_CACHE WHERE doc_id = :doc_id2;\n";
+ "DELETE FROM \"%s_DELETED\" WHERE doc_id = :doc_id1;\n"
+ "DELETE FROM \"%s_DELETED_CACHE\" WHERE doc_id = :doc_id2;\n";
static const char* fts_end_delete_sql =
"BEGIN\n"
"\n"
- "DELETE FROM %s_BEING_DELETED;\n"
- "DELETE FROM %s_BEING_DELETED_CACHE;\n";
+ "DELETE FROM \"%s_BEING_DELETED\";\n"
+ "DELETE FROM \"%s_BEING_DELETED_CACHE\";\n";
/**********************************************************************//**
Initialize fts_zip_t. */
@@ -500,7 +503,7 @@ fts_index_fetch_nodes(
"DECLARE CURSOR c IS"
" SELECT word, doc_count, first_doc_id, last_doc_id, "
"ilist\n"
- " FROM %s\n"
+ " FROM \"%s\"\n"
" WHERE word LIKE :word\n"
" ORDER BY first_doc_id;\n"
"BEGIN\n"
@@ -824,7 +827,7 @@ fts_index_fetch_words(
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
" SELECT word\n"
- " FROM %s\n"
+ " FROM \"%s\"\n"
" WHERE word > :word\n"
" ORDER BY word;\n"
"BEGIN\n"
@@ -984,7 +987,7 @@ fts_table_fetch_doc_ids(
info,
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
- " SELECT doc_id FROM %s;\n"
+ " SELECT doc_id FROM \"%s\";\n"
"BEGIN\n"
"\n"
"OPEN c;\n"
@@ -1457,7 +1460,7 @@ fts_optimize_write_word(
graph = fts_parse_sql(
fts_table,
info,
- "BEGIN DELETE FROM %s WHERE word = :word;");
+ "BEGIN DELETE FROM \"%s\" WHERE word = :word;");
error = fts_eval_sql(trx, graph);
@@ -2813,6 +2816,43 @@ fts_optimize_how_many(
return(n_tables);
}
+/**********************************************************************//**
+Check if the total memory used by all FTS table exceeds the maximum limit.
+@return true if a sync is needed, false otherwise */
+static
+bool
+fts_is_sync_needed(
+/*===============*/
+ const ib_vector_t* tables) /*!< in: registered tables
+ vector*/
+{
+ ulint total_memory = 0;
+ double time_diff = difftime(ut_time(), last_check_sync_time);
+
+ if (fts_need_sync || time_diff < 5) {
+ return(false);
+ }
+
+ last_check_sync_time = ut_time();
+
+ for (ulint i = 0; i < ib_vector_size(tables); ++i) {
+ const fts_slot_t* slot;
+
+ slot = static_cast<const fts_slot_t*>(
+ ib_vector_get_const(tables, i));
+
+ if (slot->table && slot->table->fts) {
+ total_memory += slot->table->fts->cache->total_size;
+ }
+
+ if (total_memory > fts_max_total_cache_size) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
#if 0
/*********************************************************************//**
Check whether a table needs to be optimized. */
@@ -2933,6 +2973,10 @@ fts_optimize_thread(
/* Timeout ? */
if (msg == NULL) {
+ if (fts_is_sync_needed(tables)) {
+ fts_need_sync = true;
+ }
+
continue;
}
@@ -3055,6 +3099,7 @@ fts_optimize_init(void)
fts_optimize_wq = ib_wqueue_create();
ut_a(fts_optimize_wq != NULL);
+ last_check_sync_time = ut_time();
os_thread_create(fts_optimize_thread, fts_optimize_wq, NULL);
}
diff --git a/storage/innobase/fts/fts0pars.cc b/storage/innobase/fts/fts0pars.cc
index dd2984b1beb..a4009106c83 100644
--- a/storage/innobase/fts/fts0pars.cc
+++ b/storage/innobase/fts/fts0pars.cc
@@ -1,21 +1,19 @@
+/* A Bison parser, made by GNU Bison 2.5. */
-/* A Bison parser, made by GNU Bison 2.4.1. */
-
-/* Skeleton implementation for Bison's Yacc-like parsers in C
-
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
-
+/* Bison implementation for Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
@@ -28,7 +26,7 @@
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
-
+
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
@@ -46,7 +44,7 @@
#define YYBISON 1
/* Bison version. */
-#define YYBISON_VERSION "2.4.1"
+#define YYBISON_VERSION "2.5"
/* Skeleton name. */
#define YYSKELETON_NAME "yacc.c"
@@ -75,7 +73,7 @@
/* Copy the first part of user declarations. */
-/* Line 189 of yacc.c */
+/* Line 268 of yacc.c */
#line 26 "fts0pars.y"
@@ -112,8 +110,8 @@ struct fts_lexer_t {
-/* Line 189 of yacc.c */
-#line 117 "fts0pars.cc"
+/* Line 268 of yacc.c */
+#line 115 "fts0pars.cc"
/* Enabling traces. */
#ifndef YYDEBUG
@@ -153,7 +151,7 @@ struct fts_lexer_t {
typedef union YYSTYPE
{
-/* Line 214 of yacc.c */
+/* Line 293 of yacc.c */
#line 61 "fts0pars.y"
int oper;
@@ -162,8 +160,8 @@ typedef union YYSTYPE
-/* Line 214 of yacc.c */
-#line 167 "fts0pars.cc"
+/* Line 293 of yacc.c */
+#line 165 "fts0pars.cc"
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
@@ -174,8 +172,8 @@ typedef union YYSTYPE
/* Copy the second part of user declarations. */
-/* Line 264 of yacc.c */
-#line 179 "fts0pars.cc"
+/* Line 343 of yacc.c */
+#line 177 "fts0pars.cc"
#ifdef short
# undef short
@@ -225,7 +223,7 @@ typedef short int yytype_int16;
#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
#ifndef YY_
-# if YYENABLE_NLS
+# if defined YYENABLE_NLS && YYENABLE_NLS
# if ENABLE_NLS
# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
# define YY_(msgid) dgettext ("bison-runtime", msgid)
@@ -278,11 +276,11 @@ YYID (yyi)
# define alloca _alloca
# else
# define YYSTACK_ALLOC alloca
-# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
|| defined __cplusplus || defined _MSC_VER)
# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef _STDLIB_H
-# define _STDLIB_H 1
+# ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
# endif
# endif
# endif
@@ -305,26 +303,26 @@ YYID (yyi)
# ifndef YYSTACK_ALLOC_MAXIMUM
# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
# endif
-# if (defined __cplusplus && ! defined _STDLIB_H \
+# if (defined __cplusplus && ! defined EXIT_SUCCESS \
&& ! ((defined YYMALLOC || defined malloc) \
&& (defined YYFREE || defined free)))
# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef _STDLIB_H
-# define _STDLIB_H 1
+# ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
# endif
# endif
# ifndef YYMALLOC
# define YYMALLOC malloc
-# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+# if ! defined malloc && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
|| defined __cplusplus || defined _MSC_VER)
void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
# endif
# endif
# ifndef YYFREE
# define YYFREE free
-# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+# if ! defined free && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
|| defined __cplusplus || defined _MSC_VER)
-void free (void*); /* INFRINGES ON USER NAME SPACE */
+void free (void *); /* INFRINGES ON USER NAME SPACE */
# endif
# endif
# endif
@@ -351,23 +349,7 @@ union yyalloc
((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
+ YYSTACK_GAP_MAXIMUM)
-/* Copy COUNT objects from FROM to TO. The source and destination do
- not overlap. */
-# ifndef YYCOPY
-# if defined __GNUC__ && 1 < __GNUC__
-# define YYCOPY(To, From, Count) \
- __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
-# else
-# define YYCOPY(To, From, Count) \
- do \
- { \
- YYSIZE_T yyi; \
- for (yyi = 0; yyi < (Count); yyi++) \
- (To)[yyi] = (From)[yyi]; \
- } \
- while (YYID (0))
-# endif
-# endif
+# define YYCOPY_NEEDED 1
/* Relocate STACK from its old location to the new one. The
local variables YYSIZE and YYSTACKSIZE give the old and new number of
@@ -387,19 +369,39 @@ union yyalloc
#endif
+#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
+/* Copy COUNT objects from FROM to TO. The source and destination do
+ not overlap. */
+# ifndef YYCOPY
+# if defined __GNUC__ && 1 < __GNUC__
+# define YYCOPY(To, From, Count) \
+ __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+# else
+# define YYCOPY(To, From, Count) \
+ do \
+ { \
+ YYSIZE_T yyi; \
+ for (yyi = 0; yyi < (Count); yyi++) \
+ (To)[yyi] = (From)[yyi]; \
+ } \
+ while (YYID (0))
+# endif
+# endif
+#endif /* !YYCOPY_NEEDED */
+
/* YYFINAL -- State number of the termination state. */
#define YYFINAL 3
/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 43
+#define YYLAST 52
/* YYNTOKENS -- Number of terminals. */
#define YYNTOKENS 16
/* YYNNTS -- Number of nonterminals. */
#define YYNNTS 8
/* YYNRULES -- Number of rules. */
-#define YYNRULES 23
+#define YYNRULES 24
/* YYNRULES -- Number of states. */
-#define YYNSTATES 31
+#define YYNSTATES 33
/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
#define YYUNDEFTOK 2
@@ -447,7 +449,7 @@ static const yytype_uint8 yyprhs[] =
{
0, 0, 3, 5, 6, 9, 12, 16, 21, 23,
25, 28, 32, 36, 39, 44, 47, 49, 51, 53,
- 55, 57, 59, 61
+ 55, 57, 59, 61, 64
};
/* YYRHS -- A `-1'-separated list of the rules' RHS. */
@@ -459,7 +461,7 @@ static const yytype_int8 yyrhs[] =
6, -1, 21, 22, 14, -1, 21, 22, -1, 21,
23, 15, 6, -1, 21, 23, -1, 8, -1, 7,
-1, 9, -1, 10, -1, 11, -1, 5, -1, 6,
- -1, 4, -1
+ -1, 14, 22, -1, 4, -1
};
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
@@ -467,7 +469,7 @@ static const yytype_uint8 yyrline[] =
{
0, 79, 79, 85, 89, 99, 111, 115, 124, 128,
132, 136, 141, 147, 152, 159, 165, 169, 173, 177,
- 181, 186, 191, 198
+ 181, 186, 191, 197, 202
};
#endif
@@ -498,7 +500,7 @@ static const yytype_uint8 yyr1[] =
{
0, 16, 17, 18, 18, 18, 19, 19, 20, 20,
20, 20, 20, 20, 20, 20, 21, 21, 21, 21,
- 21, 22, 22, 23
+ 21, 22, 22, 22, 23
};
/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
@@ -506,24 +508,24 @@ static const yytype_uint8 yyr2[] =
{
0, 2, 1, 0, 2, 2, 3, 4, 1, 1,
2, 3, 3, 2, 4, 2, 1, 1, 1, 1,
- 1, 1, 1, 1
+ 1, 1, 1, 2, 1
};
-/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
- STATE-NUM when YYTABLE doesn't specify something else to do. Zero
+/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
+ Performed when YYTABLE doesn't specify something else to do. Zero
means the default is an error. */
static const yytype_uint8 yydefact[] =
{
- 3, 0, 2, 1, 23, 21, 22, 17, 16, 18,
- 19, 20, 3, 5, 4, 0, 8, 9, 0, 3,
- 13, 15, 10, 0, 6, 0, 12, 0, 11, 7,
- 14
+ 3, 0, 2, 1, 24, 21, 22, 17, 16, 18,
+ 19, 20, 3, 0, 5, 4, 0, 8, 9, 0,
+ 23, 3, 13, 15, 10, 0, 6, 0, 12, 0,
+ 11, 7, 14
};
/* YYDEFGOTO[NTERM-NUM]. */
static const yytype_int8 yydefgoto[] =
{
- -1, 1, 2, 13, 14, 15, 16, 17
+ -1, 1, 2, 14, 15, 16, 17, 18
};
/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
@@ -531,39 +533,46 @@ static const yytype_int8 yydefgoto[] =
#define YYPACT_NINF -5
static const yytype_int8 yypact[] =
{
- -5, 33, 16, -5, -5, -5, -5, -5, -5, -5,
- -5, -5, -5, -5, -5, 25, 21, 19, -4, -5,
- 22, 23, -5, 34, -5, 6, -5, 35, -5, -5,
- -5
+ -5, 38, 18, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, 31, -5, -5, 29, 30, 32, -4,
+ -5, -5, 34, 35, -5, 40, -5, 7, -5, 43,
+ -5, -5, -5
};
/* YYPGOTO[NTERM-NUM]. */
static const yytype_int8 yypgoto[] =
{
- -5, -5, 20, -5, -5, -5, 27, 28
+ -5, -5, 19, -5, -5, -5, 26, 36
};
/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
positive, shift that token. If negative, reduce the rule which
- number is the opposite. If zero, do what YYDEFACT says.
- If YYTABLE_NINF, syntax error. */
+ number is the opposite. If YYTABLE_NINF, syntax error. */
#define YYTABLE_NINF -1
static const yytype_uint8 yytable[] =
{
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 24,
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 29,
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 4,
- 5, 6, 18, 3, 23, 22, 26, 19, 27, 25,
- 28, 30, 20, 21
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 26,
+ 13, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 31, 13, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 19, 13, 4, 5, 6, 5, 6, 3, 20,
+ 27, 21, 22, 13, 24, 13, 30, 25, 28, 32,
+ 29, 0, 23
};
-static const yytype_uint8 yycheck[] =
+#define yypact_value_is_default(yystate) \
+ ((yystate) == (-5))
+
+#define yytable_value_is_error(yytable_value) \
+ YYID (0)
+
+static const yytype_int8 yycheck[] =
{
4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 4,
- 5, 6, 12, 0, 15, 14, 14, 12, 15, 19,
- 6, 6, 15, 15
+ 14, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 12, 14, 4, 5, 6, 5, 6, 0, 13,
+ 21, 12, 16, 14, 14, 14, 6, 15, 14, 6,
+ 15, -1, 16
};
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
@@ -571,9 +580,9 @@ static const yytype_uint8 yycheck[] =
static const yytype_uint8 yystos[] =
{
0, 17, 18, 0, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 19, 20, 21, 22, 23, 18, 12,
- 22, 23, 14, 15, 13, 18, 14, 15, 6, 13,
- 6
+ 10, 11, 12, 14, 19, 20, 21, 22, 23, 18,
+ 22, 12, 22, 23, 14, 15, 13, 18, 14, 15,
+ 6, 13, 6
};
#define yyerrok (yyerrstatus = 0)
@@ -588,9 +597,18 @@ static const yytype_uint8 yystos[] =
/* Like YYERROR except do call yyerror. This remains here temporarily
to ease the transition to the new meaning of YYERROR, for GCC.
- Once GCC version 2 has supplanted version 1, this can go. */
+ Once GCC version 2 has supplanted version 1, this can go. However,
+ YYFAIL appears to be in use. Nevertheless, it is formally deprecated
+ in Bison 2.4.2's NEWS entry, where a plan to phase it out is
+ discussed. */
#define YYFAIL goto yyerrlab
+#if defined YYFAIL
+ /* This is here to suppress warnings from the GCC cpp's
+ -Wunused-macros. Normally we don't worry about that warning, but
+ some users do, and we want to make it easy for users to remove
+ YYFAIL uses, which will produce warnings from Bison 2.5. */
+#endif
#define YYRECOVERING() (!!yyerrstatus)
@@ -600,7 +618,6 @@ do \
{ \
yychar = (Token); \
yylval = (Value); \
- yytoken = YYTRANSLATE (yychar); \
YYPOPSTACK (1); \
goto yybackup; \
} \
@@ -642,19 +659,10 @@ while (YYID (0))
#endif
-/* YY_LOCATION_PRINT -- Print the location on the stream.
- This macro was not mandated originally: define only if we know
- we won't break user code: when these are the locations we know. */
+/* This macro is provided for backward compatibility. */
#ifndef YY_LOCATION_PRINT
-# if YYLTYPE_IS_TRIVIAL
-# define YY_LOCATION_PRINT(File, Loc) \
- fprintf (File, "%d.%d-%d.%d", \
- (Loc).first_line, (Loc).first_column, \
- (Loc).last_line, (Loc).last_column)
-# else
-# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
-# endif
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
#endif
@@ -847,7 +855,6 @@ int yydebug;
#endif
-
#if YYERROR_VERBOSE
# ifndef yystrlen
@@ -949,116 +956,143 @@ yytnamerr (char *yyres, const char *yystr)
}
# endif
-/* Copy into YYRESULT an error message about the unexpected token
- YYCHAR while in state YYSTATE. Return the number of bytes copied,
- including the terminating null byte. If YYRESULT is null, do not
- copy anything; just return the number of bytes that would be
- copied. As a special case, return 0 if an ordinary "syntax error"
- message will do. Return YYSIZE_MAXIMUM if overflow occurs during
- size calculation. */
-static YYSIZE_T
-yysyntax_error (char *yyresult, int yystate, int yychar)
-{
- int yyn = yypact[yystate];
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+ about the unexpected token YYTOKEN for the state stack whose top is
+ YYSSP.
- if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
- return 0;
- else
+ Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is
+ not large enough to hold the message. In that case, also set
+ *YYMSG_ALLOC to the required number of bytes. Return 2 if the
+ required number of bytes is too large to store. */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
+ yytype_int16 *yyssp, int yytoken)
+{
+ YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
+ YYSIZE_T yysize = yysize0;
+ YYSIZE_T yysize1;
+ enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
+ /* Internationalized format string. */
+ const char *yyformat = 0;
+ /* Arguments of yyformat. */
+ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+ /* Number of reported tokens (one for the "unexpected", one per
+ "expected"). */
+ int yycount = 0;
+
+ /* There are many possibilities here to consider:
+ - Assume YYFAIL is not used. It's too flawed to consider. See
+ <http://lists.gnu.org/archive/html/bison-patches/2009-12/msg00024.html>
+ for details. YYERROR is fine as it does not invoke this
+ function.
+ - If this state is a consistent state with a default action, then
+ the only way this function was invoked is if the default action
+ is an error action. In that case, don't check for expected
+ tokens because there are none.
+ - The only way there can be no lookahead present (in yychar) is if
+ this state is a consistent state with a default action. Thus,
+ detecting the absence of a lookahead is sufficient to determine
+ that there is no unexpected or expected token to report. In that
+ case, just report a simple "syntax error".
+ - Don't assume there isn't a lookahead just because this state is a
+ consistent state with a default action. There might have been a
+ previous inconsistent state, consistent state with a non-default
+ action, or user semantic action that manipulated yychar.
+ - Of course, the expected token list depends on states to have
+ correct lookahead information, and it depends on the parser not
+ to perform extra reductions after fetching a lookahead from the
+ scanner and before detecting a syntax error. Thus, state merging
+ (from LALR or IELR) and default reductions corrupt the expected
+ token list. However, the list is correct for canonical LR with
+ one exception: it will still contain any token that will not be
+ accepted due to an error action in a later state.
+ */
+ if (yytoken != YYEMPTY)
{
- int yytype = YYTRANSLATE (yychar);
- YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]);
- YYSIZE_T yysize = yysize0;
- YYSIZE_T yysize1;
- int yysize_overflow = 0;
- enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
- char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
- int yyx;
-
-# if 0
- /* This is so xgettext sees the translatable formats that are
- constructed on the fly. */
- YY_("syntax error, unexpected %s");
- YY_("syntax error, unexpected %s, expecting %s");
- YY_("syntax error, unexpected %s, expecting %s or %s");
- YY_("syntax error, unexpected %s, expecting %s or %s or %s");
- YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s");
-# endif
- char *yyfmt;
- char const *yyf;
- static char const yyunexpected[] = "syntax error, unexpected %s";
- static char const yyexpecting[] = ", expecting %s";
- static char const yyor[] = " or %s";
- char yyformat[sizeof yyunexpected
- + sizeof yyexpecting - 1
- + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2)
- * (sizeof yyor - 1))];
- char const *yyprefix = yyexpecting;
-
- /* Start YYX at -YYN if negative to avoid negative indexes in
- YYCHECK. */
- int yyxbegin = yyn < 0 ? -yyn : 0;
-
- /* Stay within bounds of both yycheck and yytname. */
- int yychecklim = YYLAST - yyn + 1;
- int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
- int yycount = 1;
-
- yyarg[0] = yytname[yytype];
- yyfmt = yystpcpy (yyformat, yyunexpected);
-
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
- {
- if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
- {
- yycount = 1;
- yysize = yysize0;
- yyformat[sizeof yyunexpected - 1] = '\0';
- break;
- }
- yyarg[yycount++] = yytname[yyx];
- yysize1 = yysize + yytnamerr (0, yytname[yyx]);
- yysize_overflow |= (yysize1 < yysize);
- yysize = yysize1;
- yyfmt = yystpcpy (yyfmt, yyprefix);
- yyprefix = yyor;
- }
+ int yyn = yypact[*yyssp];
+ yyarg[yycount++] = yytname[yytoken];
+ if (!yypact_value_is_default (yyn))
+ {
+ /* Start YYX at -YYN if negative to avoid negative indexes in
+ YYCHECK. In other words, skip the first -YYN actions for
+ this state because they are default actions. */
+ int yyxbegin = yyn < 0 ? -yyn : 0;
+ /* Stay within bounds of both yycheck and yytname. */
+ int yychecklim = YYLAST - yyn + 1;
+ int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+ int yyx;
+
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR
+ && !yytable_value_is_error (yytable[yyx + yyn]))
+ {
+ if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+ {
+ yycount = 1;
+ yysize = yysize0;
+ break;
+ }
+ yyarg[yycount++] = yytname[yyx];
+ yysize1 = yysize + yytnamerr (0, yytname[yyx]);
+ if (! (yysize <= yysize1
+ && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+ return 2;
+ yysize = yysize1;
+ }
+ }
+ }
- yyf = YY_(yyformat);
- yysize1 = yysize + yystrlen (yyf);
- yysize_overflow |= (yysize1 < yysize);
- yysize = yysize1;
+ switch (yycount)
+ {
+# define YYCASE_(N, S) \
+ case N: \
+ yyformat = S; \
+ break
+ YYCASE_(0, YY_("syntax error"));
+ YYCASE_(1, YY_("syntax error, unexpected %s"));
+ YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s"));
+ YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s"));
+ YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s"));
+ YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"));
+# undef YYCASE_
+ }
- if (yysize_overflow)
- return YYSIZE_MAXIMUM;
+ yysize1 = yysize + yystrlen (yyformat);
+ if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+ return 2;
+ yysize = yysize1;
- if (yyresult)
- {
- /* Avoid sprintf, as that infringes on the user's name space.
- Don't have undefined behavior even if the translation
- produced a string with the wrong number of "%s"s. */
- char *yyp = yyresult;
- int yyi = 0;
- while ((*yyp = *yyf) != '\0')
- {
- if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
- {
- yyp += yytnamerr (yyp, yyarg[yyi++]);
- yyf += 2;
- }
- else
- {
- yyp++;
- yyf++;
- }
- }
- }
- return yysize;
+ if (*yymsg_alloc < yysize)
+ {
+ *yymsg_alloc = 2 * yysize;
+ if (! (yysize <= *yymsg_alloc
+ && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
+ *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
+ return 1;
}
+
+ /* Avoid sprintf, as that infringes on the user's name space.
+ Don't have undefined behavior even if the translation
+ produced a string with the wrong number of "%s"s. */
+ {
+ char *yyp = *yymsg;
+ int yyi = 0;
+ while ((*yyp = *yyformat) != '\0')
+ if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+ {
+ yyp += yytnamerr (yyp, yyarg[yyi++]);
+ yyformat += 2;
+ }
+ else
+ {
+ yyp++;
+ yyformat++;
+ }
+ }
+ return 0;
}
#endif /* YYERROR_VERBOSE */
-
/*-----------------------------------------------.
| Release the memory associated to this symbol. |
`-----------------------------------------------*/
@@ -1090,6 +1124,7 @@ yydestruct (yymsg, yytype, yyvaluep)
}
}
+
/* Prevent warnings from -Wmissing-prototypes. */
#ifdef YYPARSE_PARAM
#if defined __STDC__ || defined __cplusplus
@@ -1106,12 +1141,9 @@ int yyparse ();
#endif /* ! YYPARSE_PARAM */
-
-
-
-/*-------------------------.
-| yyparse or yypush_parse. |
-`-------------------------*/
+/*----------.
+| yyparse. |
+`----------*/
#ifdef YYPARSE_PARAM
#if (defined __STDC__ || defined __C99__FUNC__ \
@@ -1259,7 +1291,7 @@ YYSTYPE yylval;
{
yytype_int16 *yyss1 = yyss;
union yyalloc *yyptr =
- (union yyalloc*) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
+ (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
if (! yyptr)
goto yyexhaustedlab;
YYSTACK_RELOCATE (yyss_alloc, yyss);
@@ -1298,7 +1330,7 @@ yybackup:
/* First try to decide what to do without reference to lookahead token. */
yyn = yypact[yystate];
- if (yyn == YYPACT_NINF)
+ if (yypact_value_is_default (yyn))
goto yydefault;
/* Not known => get a lookahead token if don't already have one. */
@@ -1329,8 +1361,8 @@ yybackup:
yyn = yytable[yyn];
if (yyn <= 0)
{
- if (yyn == 0 || yyn == YYTABLE_NINF)
- goto yyerrlab;
+ if (yytable_value_is_error (yyn))
+ goto yyerrlab;
yyn = -yyn;
goto yyreduce;
}
@@ -1385,26 +1417,26 @@ yyreduce:
{
case 2:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 79 "fts0pars.y"
{
(yyval.node) = (yyvsp[(1) - (1)].node);
((fts_ast_state_t*) state)->root = (yyval.node);
- ;}
+ }
break;
case 3:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 85 "fts0pars.y"
{
(yyval.node) = NULL;
- ;}
+ }
break;
case 4:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 89 "fts0pars.y"
{
(yyval.node) = (yyvsp[(1) - (2)].node);
@@ -1414,12 +1446,12 @@ yyreduce:
} else {
fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
}
- ;}
+ }
break;
case 5:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 99 "fts0pars.y"
{
(yyval.node) = (yyvsp[(1) - (2)].node);
@@ -1430,21 +1462,21 @@ yyreduce:
} else {
fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
}
- ;}
+ }
break;
case 6:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 111 "fts0pars.y"
{
(yyval.node) = (yyvsp[(2) - (3)].node);
- ;}
+ }
break;
case 7:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 115 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_subexp_list(state, (yyvsp[(1) - (4)].node));
@@ -1452,170 +1484,190 @@ yyreduce:
if ((yyvsp[(3) - (4)].node)) {
fts_ast_add_node((yyval.node), (yyvsp[(3) - (4)].node));
}
- ;}
+ }
break;
case 8:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 124 "fts0pars.y"
{
(yyval.node) = (yyvsp[(1) - (1)].node);
- ;}
+ }
break;
case 9:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 128 "fts0pars.y"
{
(yyval.node) = (yyvsp[(1) - (1)].node);
- ;}
+ }
break;
case 10:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 132 "fts0pars.y"
{
fts_ast_term_set_wildcard((yyvsp[(1) - (2)].node));
- ;}
+ }
break;
case 11:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 136 "fts0pars.y"
{
fts_ast_term_set_distance((yyvsp[(1) - (3)].node), strtoul((yyvsp[(3) - (3)].token), NULL, 10));
free((yyvsp[(3) - (3)].token));
- ;}
+ }
break;
case 12:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 141 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (3)].node));
fts_ast_add_node((yyval.node), (yyvsp[(2) - (3)].node));
fts_ast_term_set_wildcard((yyvsp[(2) - (3)].node));
- ;}
+ }
break;
case 13:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 147 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node));
fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
- ;}
+ }
break;
case 14:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 152 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node));
fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node));
fts_ast_term_set_distance((yyvsp[(2) - (4)].node), strtoul((yyvsp[(4) - (4)].token), NULL, 10));
free((yyvsp[(4) - (4)].token));
- ;}
+ }
break;
case 15:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 159 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node));
fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
- ;}
+ }
break;
case 16:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 165 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_oper(state, FTS_IGNORE);
- ;}
+ }
break;
case 17:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 169 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_oper(state, FTS_EXIST);
- ;}
+ }
break;
case 18:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 173 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_oper(state, FTS_NEGATE);
- ;}
+ }
break;
case 19:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 177 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_oper(state, FTS_DECR_RATING);
- ;}
+ }
break;
case 20:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 181 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_oper(state, FTS_INCR_RATING);
- ;}
+ }
break;
case 21:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 186 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token));
free((yyvsp[(1) - (1)].token));
- ;}
+ }
break;
case 22:
-/* Line 1455 of yacc.c */
+/* Line 1806 of yacc.c */
#line 191 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token));
free((yyvsp[(1) - (1)].token));
- ;}
+ }
break;
case 23:
-/* Line 1455 of yacc.c */
-#line 198 "fts0pars.y"
+/* Line 1806 of yacc.c */
+#line 197 "fts0pars.y"
+ {
+ (yyval.node) = (yyvsp[(2) - (2)].node);
+ }
+ break;
+
+ case 24:
+
+/* Line 1806 of yacc.c */
+#line 202 "fts0pars.y"
{
(yyval.node) = fts_ast_create_node_text(state, (yyvsp[(1) - (1)].token));
free((yyvsp[(1) - (1)].token));
- ;}
+ }
break;
-/* Line 1455 of yacc.c */
-#line 1617 "fts0pars.cc"
+/* Line 1806 of yacc.c */
+#line 1658 "fts0pars.cc"
default: break;
}
+ /* User semantic actions sometimes alter yychar, and that requires
+ that yytoken be updated with the new translation. We take the
+ approach of translating immediately before every use of yytoken.
+ One alternative is translating here after every semantic action,
+ but that translation would be missed if the semantic action invokes
+ YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
+ if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an
+ incorrect destructor might then be invoked immediately. In the
+ case of YYERROR or YYBACKUP, subsequent parser actions might lead
+ to an incorrect destructor call or verbose syntax error message
+ before the lookahead is translated. */
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
YYPOPSTACK (yylen);
@@ -1643,6 +1695,10 @@ yyreduce:
| yyerrlab -- here on detecting error |
`------------------------------------*/
yyerrlab:
+ /* Make sure we have latest lookahead translation. See comments at
+ user semantic actions for why this is necessary. */
+ yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
+
/* If not already recovering from an error, report this error. */
if (!yyerrstatus)
{
@@ -1650,37 +1706,36 @@ yyerrlab:
#if ! YYERROR_VERBOSE
yyerror (YY_("syntax error"));
#else
+# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \
+ yyssp, yytoken)
{
- YYSIZE_T yysize = yysyntax_error (0, yystate, yychar);
- if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
- {
- YYSIZE_T yyalloc = 2 * yysize;
- if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
- yyalloc = YYSTACK_ALLOC_MAXIMUM;
- if (yymsg != yymsgbuf)
- YYSTACK_FREE (yymsg);
- yymsg = (char*) YYSTACK_ALLOC (yyalloc);
- if (yymsg)
- yymsg_alloc = yyalloc;
- else
- {
- yymsg = yymsgbuf;
- yymsg_alloc = sizeof yymsgbuf;
- }
- }
-
- if (0 < yysize && yysize <= yymsg_alloc)
- {
- (void) yysyntax_error (yymsg, yystate, yychar);
- yyerror (yymsg);
- }
- else
- {
- yyerror (YY_("syntax error"));
- if (yysize != 0)
- goto yyexhaustedlab;
- }
+ char const *yymsgp = YY_("syntax error");
+ int yysyntax_error_status;
+ yysyntax_error_status = YYSYNTAX_ERROR;
+ if (yysyntax_error_status == 0)
+ yymsgp = yymsg;
+ else if (yysyntax_error_status == 1)
+ {
+ if (yymsg != yymsgbuf)
+ YYSTACK_FREE (yymsg);
+ yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
+ if (!yymsg)
+ {
+ yymsg = yymsgbuf;
+ yymsg_alloc = sizeof yymsgbuf;
+ yysyntax_error_status = 2;
+ }
+ else
+ {
+ yysyntax_error_status = YYSYNTAX_ERROR;
+ yymsgp = yymsg;
+ }
+ }
+ yyerror (yymsgp);
+ if (yysyntax_error_status == 2)
+ goto yyexhaustedlab;
}
+# undef YYSYNTAX_ERROR
#endif
}
@@ -1739,7 +1794,7 @@ yyerrlab1:
for (;;)
{
yyn = yypact[yystate];
- if (yyn != YYPACT_NINF)
+ if (!yypact_value_is_default (yyn))
{
yyn += YYTERROR;
if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
@@ -1798,8 +1853,13 @@ yyexhaustedlab:
yyreturn:
if (yychar != YYEMPTY)
- yydestruct ("Cleanup: discarding lookahead",
- yytoken, &yylval);
+ {
+ /* Make sure we have latest lookahead translation. See comments at
+ user semantic actions for why this is necessary. */
+ yytoken = YYTRANSLATE (yychar);
+ yydestruct ("Cleanup: discarding lookahead",
+ yytoken, &yylval);
+ }
/* Do not reclaim the symbols of the rule which action triggered
this YYABORT or YYACCEPT. */
YYPOPSTACK (yylen);
@@ -1824,8 +1884,8 @@ yyreturn:
-/* Line 1675 of yacc.c */
-#line 203 "fts0pars.y"
+/* Line 2067 of yacc.c */
+#line 207 "fts0pars.y"
/********************************************************************
@@ -1835,7 +1895,7 @@ ftserror(
/*=====*/
const char* p)
{
- fprintf(stderr, "%s\n", p);
+ my_printf_error(ER_PARSE_ERROR, "%s", MYF(0), p);
return(0);
}
diff --git a/storage/innobase/fts/fts0pars.y b/storage/innobase/fts/fts0pars.y
index fe644d84eea..73d71bc87c5 100644
--- a/storage/innobase/fts/fts0pars.y
+++ b/storage/innobase/fts/fts0pars.y
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -193,6 +193,10 @@ term : FTS_TERM {
free($1);
}
+ /* Ignore leading '*' */
+ | '*' term {
+ $$ = $2;
+ }
;
text : FTS_TEXT {
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index 5c757b4f176..72901d193eb 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -40,6 +40,10 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#include "fts0vlc.ic"
#endif
+#include <string>
+#include <vector>
+#include <map>
+
#define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)])
#define RANK_DOWNGRADE (-1.0F)
@@ -50,13 +54,20 @@ FIXME, this limitation can be removed easily. Need to see
if we want to enforce such limitation */
#define MAX_PROXIMITY_ITEM 128
+/* Memory used by rbt itself for create and node add */
+#define SIZEOF_RBT_CREATE sizeof(ib_rbt_t) + sizeof(ib_rbt_node_t) * 2
+#define SIZEOF_RBT_NODE_ADD sizeof(ib_rbt_node_t)
+
+/*Initial byte length for 'words' in fts_ranking_t */
+#define RANKING_WORDS_INIT_LEN 4
+
/* Coeffecient to use for normalize relevance ranking. */
static const double FTS_NORMALIZE_COEFF = 0.0115F;
// FIXME: Need to have a generic iterator that traverses the ilist.
-/* For parsing the search phrase */
-static const char* FTS_PHRASE_DELIMITER = "\t ";
+typedef std::map<std::string, ulint> word_map_t;
+typedef std::vector<std::string> word_vector_t;
struct fts_word_freq_t;
@@ -72,6 +83,8 @@ struct fts_query_t {
fts_table_t fts_index_table;/*!< FTS auxiliary index table def */
+ ulint total_size; /*!< total memory size used by query */
+
fts_doc_ids_t* deleted; /*!< Deleted doc ids that need to be
filtered from the output */
@@ -79,6 +92,12 @@ struct fts_query_t {
fts_ast_node_t* cur_node; /*!< Current tree node */
+ word_map_t* word_map; /*!< Matched word map for
+ searching by word*/
+
+ word_vector_t* word_vector; /*!< Matched word vector for
+ searching by index */
+
ib_rbt_t* doc_ids; /*!< The current set of matching
doc ids, elements are of
type fts_ranking_t */
@@ -113,7 +132,7 @@ struct fts_query_t {
doc_id_t upper_doc_id; /*!< Highest doc id in doc_ids */
- ibool boolean_mode; /*!< TRUE if boolean mode query */
+ bool boolean_mode; /*!< TRUE if boolean mode query */
ib_vector_t* matched; /*!< Array of matching documents
(fts_match_t) to search for a phrase */
@@ -133,9 +152,7 @@ struct fts_query_t {
document, its elements are of type
fts_word_freq_t */
- ibool inited; /*!< Flag to test whether the query
- processing has started or not */
- ibool multi_exist; /*!< multiple FTS_EXIST oper */
+ bool multi_exist; /*!< multiple FTS_EXIST oper */
};
/** For phrase matching, first we collect the documents and the positions
@@ -237,7 +254,7 @@ fts_query_index_fetch_nodes(
Read and filter nodes.
@return fts_node_t instance */
static
-void
+dberr_t
fts_query_filter_doc_ids(
/*=====================*/
fts_query_t* query, /*!< in: query instance */
@@ -318,7 +335,7 @@ static
ulint
fts_query_terms_in_document(
/*========================*/
- /*!< out: DB_SUCCESS if all went well
+ /*!< out: DB_SUCCESS if all go well
else error code */
fts_query_t* query, /*!< in: FTS query state */
doc_id_t doc_id, /*!< in: the word to check */
@@ -430,22 +447,6 @@ fts_query_lcs(
#endif
/*******************************************************************//**
-Compare two byte* arrays.
-@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
-static
-int
-fts_query_strcmp(
-/*=============*/
- const void* p1, /*!< in: pointer to elem */
- const void* p2) /*!< in: pointer to elem */
-{
- void* temp = const_cast<void*>(p2);
-
- return(strcmp(static_cast<const char*>(p1),
- *(static_cast <char**>(temp))));
-}
-
-/*******************************************************************//**
Compare two fts_ranking_t instance on their rank value and doc ids in
descending order on the rank and ascending order on doc id.
@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
@@ -537,6 +538,127 @@ fts_utf8_strcmp(
#endif
/*******************************************************************//**
+Create words in ranking */
+static
+void
+fts_ranking_words_create(
+/*=====================*/
+ fts_query_t* query, /*!< in: query instance */
+ fts_ranking_t* ranking) /*!< in: ranking instance */
+{
+ ranking->words = static_cast<byte*>(
+ mem_heap_zalloc(query->heap, RANKING_WORDS_INIT_LEN));
+ ranking->words_len = RANKING_WORDS_INIT_LEN;
+}
+
+/*
+The optimization here is using a char array(bitmap) to replace words rb tree
+in fts_ranking_t.
+
+It can save lots of memory except in some cases of QUERY EXPANSION.
+
+'word_map' is used as a word dictionary, in which the key is a word, the value
+is a number. In 'fts_ranking_words_add', we first check if the word is in 'word_map'.
+if not, we add it into 'word_map', and give it a position(actually a number).
+then we set the corresponding bit to '1' at the position in the char array 'words'.
+
+'word_vector' is a useful backup of 'word_map', and we can get a word by its position,
+more quickly than searching by value in 'word_map'. we use 'word_vector'
+in 'fts_query_calculate_ranking' and 'fts_expand_query'. In the two functions, we need
+to scan the bitmap 'words', and get a word when a bit is '1', then we get word_freq
+by the word.
+*/
+
+/*******************************************************************//**
+Add a word into ranking */
+static
+void
+fts_ranking_words_add(
+/*==================*/
+ fts_query_t* query, /*!< in: query instance */
+ fts_ranking_t* ranking, /*!< in: ranking instance */
+ const char* word) /*!< in: term/word to add */
+{
+ ulint pos;
+ ulint byte_offset;
+ ulint bit_offset;
+ word_map_t::iterator it;
+
+ /* Note: we suppose the word map and vector are append-only */
+ /* Check if need to add it to word map */
+ it = query->word_map->lower_bound(word);
+ if (it != query->word_map->end()
+ && !query->word_map->key_comp()(word, it->first)) {
+ pos = it->second;
+ } else {
+ pos = query->word_map->size();
+ query->word_map->insert(it,
+ std::pair<std::string, ulint>(word, pos));
+
+ query->word_vector->push_back(word);
+ }
+
+ /* Check words len */
+ byte_offset = pos / CHAR_BIT;
+ if (byte_offset >= ranking->words_len) {
+ byte* words = ranking->words;
+ ulint words_len = ranking->words_len;
+
+ while (byte_offset >= words_len) {
+ words_len *= 2;
+ }
+
+ ranking->words = static_cast<byte*>(
+ mem_heap_zalloc(query->heap, words_len));
+ ut_memcpy(ranking->words, words, ranking->words_len);
+ ranking->words_len = words_len;
+ }
+
+ /* Set ranking words */
+ ut_ad(byte_offset < ranking->words_len);
+ bit_offset = pos % CHAR_BIT;
+ ranking->words[byte_offset] |= 1 << bit_offset;
+}
+
+/*******************************************************************//**
+Get a word from a ranking
+@return true if it's successful */
+static
+bool
+fts_ranking_words_get_next(
+/*=======================*/
+ const fts_query_t* query, /*!< in: query instance */
+ fts_ranking_t* ranking,/*!< in: ranking instance */
+ ulint* pos, /*!< in/out: word start pos */
+ byte** word) /*!< in/out: term/word to add */
+{
+ bool ret = false;
+ ulint max_pos = ranking->words_len * CHAR_BIT;
+
+ /* Search for next word */
+ while (*pos < max_pos) {
+ ulint byte_offset = *pos / CHAR_BIT;
+ ulint bit_offset = *pos % CHAR_BIT;
+
+ if (ranking->words[byte_offset] & (1 << bit_offset)) {
+ ret = true;
+ break;
+ }
+
+ *pos += 1;
+ };
+
+ /* Get next word from word vector */
+ if (ret) {
+ ut_ad(*pos < query->word_vector->size());
+ *word = (byte*)query->word_vector->at((size_t)*pos).c_str();
+ *pos += 1;
+ }
+
+ return ret;
+}
+
+/*******************************************************************//**
Add a word if it doesn't exist, to the term freq RB tree. We store
a pointer to the word that is passed in as the argument.
@return pointer to word */
@@ -569,6 +691,11 @@ fts_query_add_word_freq(
parent.last = rbt_add_node(
query->word_freqs, &parent, &word_freq);
+
+ query->total_size += len
+ + SIZEOF_RBT_CREATE
+ + SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_word_freq_t);
}
return(rbt_value(fts_word_freq_t, parent.last));
@@ -581,6 +708,7 @@ static
fts_doc_freq_t*
fts_query_add_doc_freq(
/*===================*/
+ fts_query_t* query, /*!< in: query instance */
ib_rbt_t* doc_freqs, /*!< in: rb tree of fts_doc_freq_t */
doc_id_t doc_id) /*!< in: doc id to add */
{
@@ -596,6 +724,9 @@ fts_query_add_doc_freq(
doc_freq.doc_id = doc_id;
parent.last = rbt_add_node(doc_freqs, &parent, &doc_freq);
+
+ query->total_size += SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_doc_freq_t);
}
return(rbt_value(fts_doc_freq_t, parent.last));
@@ -625,9 +756,12 @@ fts_query_union_doc_id(
ranking.rank = rank;
ranking.doc_id = doc_id;
- ranking.words = rbt_create(sizeof(byte*), fts_query_strcmp);
+ fts_ranking_words_create(query, &ranking);
rbt_add_node(query->doc_ids, &parent, &ranking);
+
+ query->total_size += SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t) + RANKING_WORDS_INIT_LEN;
}
}
@@ -648,13 +782,12 @@ fts_query_remove_doc_id(
/* Check if the doc id is deleted and it's in our set. */
if (fts_bsearch(array, 0, size, doc_id) < 0
&& rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
-
- fts_ranking_t* ranking;
-
- ranking = rbt_value(fts_ranking_t, parent.last);
- rbt_free(ranking->words);
-
ut_free(rbt_remove_node(query->doc_ids, parent.last));
+
+ ut_ad(query->total_size >
+ SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t));
+ query->total_size -= SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t);
}
}
@@ -712,57 +845,69 @@ fts_query_intersect_doc_id(
ib_rbt_bound_t parent;
ulint size = ib_vector_size(query->deleted->doc_ids);
fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data;
- fts_ranking_t* ranking;
+ fts_ranking_t* ranking= NULL;
+
+ /* There are three types of intersect:
+ 1. '+a': doc_ids is empty, add doc into intersect if it matches 'a'.
+ 2. 'a +b': docs match 'a' is in doc_ids, add doc into intersect
+ if it matches 'b'. if the doc is also in doc_ids, then change the
+ doc's rank, and add 'a' in doc's words.
+ 3. '+a +b': docs matching '+a' is in doc_ids, add doc into intsersect
+ if it matches 'b' and it's in doc_ids.(multi_exist = true). */
/* Check if the doc id is deleted and it's in our set */
if (fts_bsearch(array, 0, size, doc_id) < 0) {
- /* If this is the first FTS_EXIST we encountered, all of its
- value must be in intersect list */
- if (!query->multi_exist) {
- fts_ranking_t new_ranking;
-
- if (rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
- ranking = rbt_value(fts_ranking_t, parent.last);
- rank += (ranking->rank > 0)
- ? ranking->rank : RANK_UPGRADE;
- if (rank >= 1.0F) {
- rank = 1.0F;
- }
- }
-
- new_ranking.rank = rank;
- new_ranking.doc_id = doc_id;
- new_ranking.words = rbt_create(
- sizeof(byte*), fts_query_strcmp);
- ranking = &new_ranking;
+ fts_ranking_t new_ranking;
- if (rbt_search(query->intersection, &parent,
- ranking) != 0) {
- rbt_add_node(query->intersection,
- &parent, ranking);
+ if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
+ if (query->multi_exist) {
+ return;
} else {
- rbt_free(new_ranking.words);
+ new_ranking.words = NULL;
}
} else {
+ ranking = rbt_value(fts_ranking_t, parent.last);
- if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
+ /* We've just checked the doc id before */
+ if (ranking->words == NULL) {
+ ut_ad(rbt_search(query->intersection, &parent,
+ ranking) == 0);
return;
}
- ranking = rbt_value(fts_ranking_t, parent.last);
+ /* Merge rank */
+ rank += ranking->rank;
+ if (rank >= 1.0F) {
+ rank = 1.0F;
+ } else if (rank <= -1.0F) {
+ rank = -1.0F;
+ }
+
+ /* Take words */
+ new_ranking.words = ranking->words;
+ new_ranking.words_len = ranking->words_len;
+ }
- ranking->rank = rank;
+ new_ranking.rank = rank;
+ new_ranking.doc_id = doc_id;
- if (ranking->words != NULL
- && rbt_search(query->intersection, &parent,
- ranking) != 0) {
- rbt_add_node(query->intersection, &parent,
- ranking);
+ if (rbt_search(query->intersection, &parent,
+ &new_ranking) != 0) {
+ if (new_ranking.words == NULL) {
+ fts_ranking_words_create(query, &new_ranking);
+ query->total_size += RANKING_WORDS_INIT_LEN;
+ } else {
/* Note that the intersection has taken
ownership of the ranking data. */
ranking->words = NULL;
}
+
+ rbt_add_node(query->intersection,
+ &parent, &new_ranking);
+
+ query->total_size += SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t);
}
}
}
@@ -773,6 +918,7 @@ static
void
fts_query_free_doc_ids(
/*===================*/
+ fts_query_t* query, /*!< in: query instance */
ib_rbt_t* doc_ids) /*!< in: rb tree to free */
{
const ib_rbt_node_t* node;
@@ -784,14 +930,21 @@ fts_query_free_doc_ids(
ranking = rbt_value(fts_ranking_t, node);
if (ranking->words) {
- rbt_free(ranking->words);
ranking->words = NULL;
}
ut_free(rbt_remove_node(doc_ids, node));
+
+ ut_ad(query->total_size >
+ SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t));
+ query->total_size -= SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t);
}
rbt_free(doc_ids);
+
+ ut_ad(query->total_size > SIZEOF_RBT_CREATE);
+ query->total_size -= SIZEOF_RBT_CREATE;
}
/*******************************************************************//**
@@ -808,6 +961,10 @@ fts_query_add_word_to_document(
ib_rbt_bound_t parent;
fts_ranking_t* ranking = NULL;
+ if (query->flags == FTS_OPT_RANKING) {
+ return;
+ }
+
/* First we search the intersection RB tree as it could have
taken ownership of the words rb tree instance. */
if (query->intersection
@@ -823,23 +980,7 @@ fts_query_add_word_to_document(
}
if (ranking != NULL) {
- ulint len;
- byte* term;
-
- len = ut_strlen((char*) word) + 1;
-
- term = static_cast<byte*>(mem_heap_alloc(query->heap, len));
-
- /* Need to copy the NUL character too. */
- memcpy(term, (char*) word, len);
-
- /* The current set must have ownership of the RB tree. */
- ut_a(ranking->words != NULL);
-
- /* If the word doesn't exist in the words "list" we add it. */
- if (rbt_search(ranking->words, &parent, term) != 0) {
- rbt_add_node(ranking->words, &parent, &term);
- }
+ fts_ranking_words_add(query, ranking, (char*)word);
}
}
@@ -874,9 +1015,9 @@ fts_query_check_node(
word_freqs = rbt_value(fts_word_freq_t, parent.last);
- fts_query_filter_doc_ids(
- query, token->f_str, word_freqs, node,
- node->ilist, ilist_size, TRUE);
+ query->error = fts_query_filter_doc_ids(
+ query, token->f_str, word_freqs, node,
+ node->ilist, ilist_size, TRUE);
}
}
@@ -940,10 +1081,14 @@ fts_cache_find_wildcard(
fts_word_freq_t,
freq_parent.last);
- fts_query_filter_doc_ids(
+ query->error = fts_query_filter_doc_ids(
query, srch_text.f_str,
word_freqs, node,
node->ilist, node->ilist_size, TRUE);
+
+ if (query->error != DB_SUCCESS) {
+ return(0);
+ }
}
num_word++;
@@ -976,7 +1121,7 @@ cont_search:
/*****************************************************************//**
Set difference.
-@return DB_SUCCESS if all went well */
+@return DB_SUCCESS if all go well */
static __attribute__((nonnull, warn_unused_result))
dberr_t
fts_query_difference(
@@ -1007,6 +1152,7 @@ fts_query_difference(
const fts_index_cache_t*index_cache;
que_t* graph = NULL;
fts_cache_t* cache = table->fts->cache;
+ dberr_t error;
rw_lock_x_lock(&cache->lock);
@@ -1023,7 +1169,8 @@ fts_query_difference(
} else {
nodes = fts_cache_find_word(index_cache, token);
- for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
+ for (i = 0; nodes && i < ib_vector_size(nodes)
+ && query->error == DB_SUCCESS; ++i) {
const fts_node_t* node;
node = static_cast<const fts_node_t*>(
@@ -1035,14 +1182,26 @@ fts_query_difference(
rw_lock_x_unlock(&cache->lock);
+ /* error is passed by 'query->error' */
+ if (query->error != DB_SUCCESS) {
+ ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ return(query->error);
+ }
+
/* Setup the callback args for filtering and
consolidating the ilist. */
fetch.read_arg = query;
fetch.read_record = fts_query_index_fetch_nodes;
- query->error = fts_index_fetch_nodes(
+ error = fts_index_fetch_nodes(
trx, &graph, &query->fts_index_table, token, &fetch);
+ /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
+ ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
+ if (error != DB_SUCCESS) {
+ query->error = error;
+ }
+
fts_que_graph_free(graph);
}
@@ -1054,7 +1213,7 @@ fts_query_difference(
/*****************************************************************//**
Intersect the token doc ids with the current set.
-@return DB_SUCCESS if all went well */
+@return DB_SUCCESS if all go well */
static __attribute__((nonnull, warn_unused_result))
dberr_t
fts_query_intersect(
@@ -1062,7 +1221,6 @@ fts_query_intersect(
fts_query_t* query, /*!< in: query instance */
const fts_string_t* token) /*!< in: the token to search */
{
- ulint n_doc_ids = 0;
trx_t* trx = query->trx;
dict_table_t* table = query->index->table;
@@ -1073,41 +1231,28 @@ fts_query_intersect(
(int) token->f_len, token->f_str);
#endif
- if (!query->inited) {
-
- ut_a(rbt_empty(query->doc_ids));
-
- /* Since this is the first time we need to convert this
- intersection query into a union query. Otherwise we
- will end up with an empty set. */
- query->oper = FTS_NONE;
- query->inited = TRUE;
- }
-
- if (query->doc_ids) {
- n_doc_ids = rbt_size(query->doc_ids);
- }
-
- /* If the words set is not empty or this is the first time. */
-
- if (!rbt_empty(query->doc_ids) || query->oper == FTS_NONE) {
+ /* If the words set is not empty and multi exist is true,
+ we know the intersection set is empty in advance. */
+ if (!(rbt_empty(query->doc_ids) && query->multi_exist)) {
+ ulint n_doc_ids = 0;
ulint i;
fts_fetch_t fetch;
const ib_vector_t* nodes;
const fts_index_cache_t*index_cache;
que_t* graph = NULL;
fts_cache_t* cache = table->fts->cache;
+ dberr_t error;
ut_a(!query->intersection);
- /* Only if this is not the first time. */
- if (query->oper != FTS_NONE) {
+ n_doc_ids = rbt_size(query->doc_ids);
- /* Create the rb tree that will hold the doc ids of
- the intersection. */
- query->intersection = rbt_create(
- sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
- }
+ /* Create the rb tree that will hold the doc ids of
+ the intersection. */
+ query->intersection = rbt_create(
+ sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+
+ query->total_size += SIZEOF_RBT_CREATE;
/* This is to avoid decompressing the ilist if the
node's ilist doc ids are out of range. */
@@ -1144,7 +1289,8 @@ fts_query_intersect(
} else {
nodes = fts_cache_find_word(index_cache, token);
- for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
+ for (i = 0; nodes && i < ib_vector_size(nodes)
+ && query->error == DB_SUCCESS; ++i) {
const fts_node_t* node;
node = static_cast<const fts_node_t*>(
@@ -1156,48 +1302,48 @@ fts_query_intersect(
rw_lock_x_unlock(&cache->lock);
+ /* error is passed by 'query->error' */
+ if (query->error != DB_SUCCESS) {
+ ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ return(query->error);
+ }
+
/* Setup the callback args for filtering and
consolidating the ilist. */
fetch.read_arg = query;
fetch.read_record = fts_query_index_fetch_nodes;
- query->error = fts_index_fetch_nodes(
+ error = fts_index_fetch_nodes(
trx, &graph, &query->fts_index_table, token, &fetch);
+ /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
+ ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
+ if (error != DB_SUCCESS) {
+ query->error = error;
+ }
+
fts_que_graph_free(graph);
if (query->error == DB_SUCCESS) {
- if (query->oper == FTS_EXIST) {
-
- /* The size can't increase. */
- ut_a(rbt_size(query->doc_ids) <= n_doc_ids);
- }
-
/* Make the intesection (rb tree) the current doc id
set and free the old set. */
- if (query->intersection) {
- fts_query_free_doc_ids(query->doc_ids);
- query->doc_ids = query->intersection;
- query->intersection = NULL;
- }
+ fts_query_free_doc_ids(query, query->doc_ids);
+ query->doc_ids = query->intersection;
+ query->intersection = NULL;
- /* Reset the set operation to intersect. */
- query->oper = FTS_EXIST;
+ ut_a(!query->multi_exist || (query->multi_exist
+ && rbt_size(query->doc_ids) <= n_doc_ids));
}
}
- if (!query->multi_exist) {
- query->multi_exist = TRUE;
- }
-
return(query->error);
}
/*****************************************************************//**
Query index cache.
-@return DB_SUCCESS if all went well */
+@return DB_SUCCESS if all go well */
static
-ulint
+dberr_t
fts_query_cache(
/*============*/
fts_query_t* query, /*!< in/out: query instance */
@@ -1227,7 +1373,8 @@ fts_query_cache(
nodes = fts_cache_find_word(index_cache, token);
- for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
+ for (i = 0; nodes && i < ib_vector_size(nodes)
+ && query->error == DB_SUCCESS; ++i) {
const fts_node_t* node;
node = static_cast<const fts_node_t*>(
@@ -1239,12 +1386,12 @@ fts_query_cache(
rw_lock_x_unlock(&cache->lock);
- return(DB_SUCCESS);
+ return(query->error);
}
/*****************************************************************//**
Set union.
-@return DB_SUCCESS if all went well */
+@return DB_SUCCESS if all go well */
static __attribute__((nonnull, warn_unused_result))
dberr_t
fts_query_union(
@@ -1256,6 +1403,7 @@ fts_query_union(
ulint n_doc_ids = 0;
trx_t* trx = query->trx;
que_t* graph = NULL;
+ dberr_t error;
ut_a(query->oper == FTS_NONE || query->oper == FTS_DECR_RATING ||
query->oper == FTS_NEGATE || query->oper == FTS_INCR_RATING);
@@ -1265,8 +1413,6 @@ fts_query_union(
(int) token->f_len, token->f_str);
#endif
- query->error = DB_SUCCESS;
-
if (query->doc_ids) {
n_doc_ids = rbt_size(query->doc_ids);
}
@@ -1287,9 +1433,15 @@ fts_query_union(
fetch.read_record = fts_query_index_fetch_nodes;
/* Read the nodes from disk. */
- query->error = fts_index_fetch_nodes(
+ error = fts_index_fetch_nodes(
trx, &graph, &query->fts_index_table, token, &fetch);
+ /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
+ ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
+ if (error != DB_SUCCESS) {
+ query->error = error;
+ }
+
fts_que_graph_free(graph);
if (query->error == DB_SUCCESS) {
@@ -1302,22 +1454,17 @@ fts_query_union(
if (query->doc_ids) {
n_doc_ids = rbt_size(query->doc_ids) - n_doc_ids;
}
-
- /* In case there were no matching docs then we reset the
- state, otherwise intersection will not be able to detect
- that it's being called for the first time. */
- if (!rbt_empty(query->doc_ids)) {
- query->inited = TRUE;
- }
}
return(query->error);
}
/*****************************************************************//**
-Depending upon the current query operator process the doc id. */
+Depending upon the current query operator process the doc id.
+return DB_SUCCESS if all go well
+or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */
static
-void
+dberr_t
fts_query_process_doc_id(
/*=====================*/
fts_query_t* query, /*!< in: query instance */
@@ -1325,6 +1472,10 @@ fts_query_process_doc_id(
fts_rank_t rank) /*!< in: if non-zero, it is the
rank associated with the doc_id */
{
+ if (query->flags == FTS_OPT_RANKING) {
+ return(DB_SUCCESS);
+ }
+
switch (query->oper) {
case FTS_NONE:
fts_query_union_doc_id(query, doc_id, rank);
@@ -1355,12 +1506,18 @@ fts_query_process_doc_id(
default:
ut_error;
}
+
+ if (query->total_size > fts_result_cache_limit) {
+ return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ } else {
+ return(DB_SUCCESS);
+ }
}
/*****************************************************************//**
Merge two result sets. */
static
-void
+dberr_t
fts_merge_doc_ids(
/*==============*/
fts_query_t* query, /*!< in,out: query instance */
@@ -1377,25 +1534,42 @@ fts_merge_doc_ids(
query->intersection = rbt_create(
sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+
+ query->total_size += SIZEOF_RBT_CREATE;
}
/* Merge the elements to the result set. */
for (node = rbt_first(doc_ids); node; node = rbt_next(doc_ids, node)) {
fts_ranking_t* ranking;
+ ulint pos = 0;
+ byte* word = NULL;
ranking = rbt_value(fts_ranking_t, node);
- fts_query_process_doc_id(
- query, ranking->doc_id, ranking->rank);
+ query->error = fts_query_process_doc_id(
+ query, ranking->doc_id, ranking->rank);
+
+ if (query->error != DB_SUCCESS) {
+ return(query->error);
+ }
+
+ /* Merge words. Don't need to take operator into account. */
+ ut_a(ranking->words);
+ while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
+ fts_query_add_word_to_document(query, ranking->doc_id,
+ word);
+ }
}
/* If it is an intersection operation, reset query->doc_ids
to query->intersection and free the old result list. */
if (query->oper == FTS_EXIST && query->intersection != NULL) {
- fts_query_free_doc_ids(query->doc_ids);
+ fts_query_free_doc_ids(query, query->doc_ids);
query->doc_ids = query->intersection;
query->intersection = NULL;
}
+
+ return(DB_SUCCESS);
}
/*****************************************************************//**
@@ -1827,7 +2001,7 @@ fts_query_select(
/********************************************************************
Read the rows from the FTS index, that match word and where the
doc id is between first and last doc id.
-@return DB_SUCCESS if all went well else error code */
+@return DB_SUCCESS if all go well else error code */
static __attribute__((nonnull, warn_unused_result))
dberr_t
fts_query_find_term(
@@ -1967,7 +2141,7 @@ fts_query_sum(
/********************************************************************
Calculate the total documents that contain a particular word (term).
-@return DB_SUCCESS if all went well else error code */
+@return DB_SUCCESS if all go well else error code */
static __attribute__((nonnull, warn_unused_result))
dberr_t
fts_query_total_docs_containing_term(
@@ -2046,7 +2220,7 @@ fts_query_total_docs_containing_term(
/********************************************************************
Get the total number of words in a documents.
-@return DB_SUCCESS if all went well else error code */
+@return DB_SUCCESS if all go well else error code */
static __attribute__((nonnull, warn_unused_result))
dberr_t
fts_query_terms_in_document(
@@ -2233,8 +2407,14 @@ static __attribute__((nonnull, warn_unused_result))
dberr_t
fts_query_search_phrase(
/*====================*/
- fts_query_t* query, /*!< in: query instance */
- ib_vector_t* tokens) /*!< in: tokens to search */
+ fts_query_t* query, /*!< in: query instance */
+ ib_vector_t* orig_tokens, /*!< in: tokens to search,
+ with any stopwords in the
+ original phrase */
+ ib_vector_t* tokens) /*!< in: tokens that does
+ not include stopwords and
+ can be used to calculate
+ ranking */
{
ulint i;
fts_get_doc_t get_doc;
@@ -2275,14 +2455,18 @@ fts_query_search_phrase(
if (match->doc_id != 0) {
query->error = fts_query_match_document(
- tokens, &get_doc,
+ orig_tokens, &get_doc,
match, query->distance, &found);
if (query->error == DB_SUCCESS && found) {
ulint z;
- fts_query_process_doc_id(query,
+ query->error = fts_query_process_doc_id(query,
match->doc_id, 0);
+ if (query->error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
for (z = 0; z < ib_vector_size(tokens); z++) {
fts_string_t* token;
token = static_cast<fts_string_t*>(
@@ -2295,6 +2479,7 @@ fts_query_search_phrase(
}
}
+func_exit:
/* Free the prepared statement. */
if (get_doc.get_document_graph) {
fts_que_graph_free(get_doc.get_document_graph);
@@ -2314,17 +2499,21 @@ fts_query_phrase_search(
fts_query_t* query, /*!< in: query instance */
const fts_string_t* phrase) /*!< in: token to search */
{
- char* src;
- char* state; /* strtok_r internal state */
ib_vector_t* tokens;
+ ib_vector_t* orig_tokens;
mem_heap_t* heap = mem_heap_create(sizeof(fts_string_t));
- char* utf8 = strdup((char*) phrase->f_str);
+ ulint len = phrase->f_len;
+ ulint cur_pos = 0;
ib_alloc_t* heap_alloc;
ulint num_token;
+ CHARSET_INFO* charset;
+
+ charset = query->fts_index_table.charset;
heap_alloc = ib_heap_allocator_create(heap);
tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
+ orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
if (query->distance != ULINT_UNDEFINED && query->distance > 0) {
query->flags = FTS_PROXIMITY;
@@ -2333,26 +2522,65 @@ fts_query_phrase_search(
}
/* Split the phrase into tokens. */
- for (src = utf8; /* No op */; src = NULL) {
+ while (cur_pos < len) {
+ fts_cache_t* cache = query->index->table->fts->cache;
+ ib_rbt_bound_t parent;
+ ulint offset;
+ ulint cur_len;
+ fts_string_t result_str;
+
+ cur_len = innobase_mysql_fts_get_token(
+ charset,
+ reinterpret_cast<const byte*>(phrase->f_str) + cur_pos,
+ reinterpret_cast<const byte*>(phrase->f_str) + len,
+ &result_str, &offset);
+
+ if (cur_len == 0) {
+ break;
+ }
+
+ cur_pos += cur_len;
+
+ if (result_str.f_n_char == 0) {
+ continue;
+ }
+
fts_string_t* token = static_cast<fts_string_t*>(
ib_vector_push(tokens, NULL));
- token->f_str = (byte*) strtok_r(
- src, FTS_PHRASE_DELIMITER, &state);
+ token->f_str = static_cast<byte*>(
+ mem_heap_alloc(heap, result_str.f_len + 1));
+ ut_memcpy(token->f_str, result_str.f_str, result_str.f_len);
- if (token->f_str) {
+ token->f_len = result_str.f_len;
+ token->f_str[token->f_len] = 0;
+
+ if (cache->stopword_info.cached_stopword
+ && rbt_search(cache->stopword_info.cached_stopword,
+ &parent, token) != 0
+ && result_str.f_n_char >= fts_min_token_size
+ && result_str.f_n_char <= fts_max_token_size) {
/* Add the word to the RB tree so that we can
calculate it's frequencey within a document. */
fts_query_add_word_freq(query, token->f_str);
-
- token->f_len = ut_strlen((char*) token->f_str);
} else {
ib_vector_pop(tokens);
- break;
+ }
+
+ /* we will start to store all words including stopwords
+ in the "orig_tokens" vector, but skip any leading words
+ that are stopwords */
+ if (!ib_vector_is_empty(tokens)) {
+ fts_string_t* orig_token = static_cast<fts_string_t*>(
+ ib_vector_push(orig_tokens, NULL));
+
+ orig_token->f_str = token->f_str;
+ orig_token->f_len = token->f_len;
}
}
num_token = ib_vector_size(tokens);
+ ut_ad(ib_vector_size(orig_tokens) >= num_token);
/* Ignore empty strings. */
if (num_token > 0) {
@@ -2362,19 +2590,7 @@ fts_query_phrase_search(
fts_ast_oper_t oper = query->oper;
que_t* graph = NULL;
ulint i;
-
- /* Create the rb tree for storing the words read form disk. */
- if (!query->inited) {
-
- /* Since this is the first time, we need to convert
- this intersection query into a union query. Otherwise
- we will end up with an empty set. */
- if (query->oper == FTS_EXIST) {
- query->oper = FTS_NONE;
- }
-
- query->inited = TRUE;
- }
+ dberr_t error;
/* Create the vector for storing matching document ids
and the positions of the first token of the phrase. */
@@ -2422,10 +2638,16 @@ fts_query_phrase_search(
query->matched = query->match_array[i];
}
- fts_index_fetch_nodes(
+ error = fts_index_fetch_nodes(
trx, &graph, &query->fts_index_table,
token, &fetch);
+ /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
+ ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
+ if (error != DB_SUCCESS) {
+ query->error = error;
+ }
+
fts_que_graph_free(graph);
graph = NULL;
@@ -2438,12 +2660,15 @@ fts_query_phrase_search(
/* If any of the token can't be found,
no need to continue match */
- if (ib_vector_is_empty(query->match_array[i])) {
+ if (ib_vector_is_empty(query->match_array[i])
+ || query->error != DB_SUCCESS) {
goto func_exit;
}
}
- if (num_token == 1
+ /* Just a single word, no need to fetch the original
+ documents to do phrase matching */
+ if (ib_vector_size(orig_tokens) == 1
&& !ib_vector_is_empty(query->match_array[0])) {
fts_match_t* match;
ulint n_matched;
@@ -2455,8 +2680,11 @@ fts_query_phrase_search(
ib_vector_get(
query->match_array[0], i));
- fts_query_process_doc_id(
- query, match->doc_id, 0);
+ query->error = fts_query_process_doc_id(
+ query, match->doc_id, 0);
+ if (query->error != DB_SUCCESS) {
+ goto func_exit;
+ }
fts_query_add_word_to_document(
query, match->doc_id, token->f_str);
@@ -2484,18 +2712,21 @@ fts_query_phrase_search(
/* Read the actual text in and search for the phrase. */
if (matched) {
- query->error = DB_SUCCESS;
+ ut_ad(query->error == DB_SUCCESS);
query->error = fts_query_search_phrase(
- query, tokens);
+ query, orig_tokens, tokens);
}
}
/* Restore original operation. */
query->oper = oper;
+
+ if (query->error != DB_SUCCESS) {
+ goto func_exit;
+ }
}
func_exit:
- free(utf8);
mem_heap_free(heap);
/* Don't need it anymore. */
@@ -2506,7 +2737,7 @@ func_exit:
/*****************************************************************//**
Find the word and evaluate.
-@return DB_SUCCESS if all went well */
+@return DB_SUCCESS if all go well */
static __attribute__((nonnull, warn_unused_result))
dberr_t
fts_query_execute(
@@ -2578,7 +2809,7 @@ fts_query_get_token(
/*****************************************************************//**
Visit every node of the AST. */
static
-ulint
+dberr_t
fts_query_visitor(
/*==============*/
fts_ast_oper_t oper, /*!< in: current operator */
@@ -2602,11 +2833,8 @@ fts_query_visitor(
token.f_str = node->text.ptr;
token.f_len = ut_strlen((char*) token.f_str);
- /* "first second third" is treated as first & second
- & third. Create the rb tree that will hold the doc ids
- of the intersection. */
- if (!query->intersection && query->oper == FTS_EXIST) {
-
+ if (query->oper == FTS_EXIST) {
+ ut_ad(query->intersection == NULL);
query->intersection = rbt_create(
sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
}
@@ -2621,10 +2849,8 @@ fts_query_visitor(
query->collect_positions = FALSE;
- /* Make the intesection (rb tree) the current doc id
- set and free the old set. */
- if (query->intersection) {
- fts_query_free_doc_ids(query->doc_ids);
+ if (query->oper == FTS_EXIST) {
+ fts_query_free_doc_ids(query, query->doc_ids);
query->doc_ids = query->intersection;
query->intersection = NULL;
}
@@ -2649,6 +2875,10 @@ fts_query_visitor(
ut_error;
}
+ if (query->oper == FTS_EXIST) {
+ query->multi_exist = true;
+ }
+
return(query->error);
}
@@ -2656,7 +2886,7 @@ fts_query_visitor(
Process (nested) sub-expression, create a new result set to store the
sub-expression result by processing nodes under current sub-expression
list. Merge the sub-expression result with that of parent expression list.
-@return DB_SUCCESS if all went well */
+@return DB_SUCCESS if all well */
UNIV_INTERN
dberr_t
fts_ast_visit_sub_exp(
@@ -2670,8 +2900,8 @@ fts_ast_visit_sub_exp(
ib_rbt_t* parent_doc_ids;
ib_rbt_t* subexpr_doc_ids;
dberr_t error = DB_SUCCESS;
- ibool inited = query->inited;
bool will_be_ignored = false;
+ bool multi_exist;
ut_a(node->type == FTS_AST_SUBEXP_LIST);
@@ -2691,45 +2921,34 @@ fts_ast_visit_sub_exp(
query->doc_ids = rbt_create(sizeof(fts_ranking_t),
fts_ranking_doc_id_cmp);
- /* Reset the query start flag because the sub-expression result
- set is independent of any previous results. The state flag
- reset is needed for not making an intersect operation on an empty
- set in the first call to fts_query_intersect() for the first term. */
- query->inited = FALSE;
+ query->total_size += SIZEOF_RBT_CREATE;
+ multi_exist = query->multi_exist;
+ query->multi_exist = false;
/* Process nodes in current sub-expression and store its
result set in query->doc_ids we created above. */
error = fts_ast_visit(FTS_NONE, node->next, visitor,
arg, &will_be_ignored);
/* Reinstate parent node state and prepare for merge. */
- query->inited = inited;
+ query->multi_exist = multi_exist;
query->oper = cur_oper;
subexpr_doc_ids = query->doc_ids;
/* Restore current result set. */
query->doc_ids = parent_doc_ids;
- if (query->oper == FTS_EXIST && !query->inited) {
- ut_a(rbt_empty(query->doc_ids));
- /* Since this is the first time we need to convert this
- intersection query into a union query. Otherwise we
- will end up with an empty set. */
- query->oper = FTS_NONE;
- query->inited = TRUE;
- }
-
/* Merge the sub-expression result with the parent result set. */
if (error == DB_SUCCESS && !rbt_empty(subexpr_doc_ids)) {
- fts_merge_doc_ids(query, subexpr_doc_ids);
+ error = fts_merge_doc_ids(query, subexpr_doc_ids);
}
if (query->oper == FTS_EXIST) {
- query->multi_exist = TRUE;
+ query->multi_exist = true;
}
/* Free current result set. Result already merged into parent. */
- fts_query_free_doc_ids(subexpr_doc_ids);
+ fts_query_free_doc_ids(query, subexpr_doc_ids);
return(error);
}
@@ -2808,9 +3027,10 @@ fts_query_find_doc_id(
/*****************************************************************//**
Read and filter nodes.
-@return fts_node_t instance */
+@return DB_SUCCESS if all go well,
+or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */
static
-void
+dberr_t
fts_query_filter_doc_ids(
/*=====================*/
fts_query_t* query, /*!< in: query instance */
@@ -2863,6 +3083,10 @@ fts_query_filter_doc_ids(
parent container. */
match->positions = ib_vector_create(
heap_alloc, sizeof(ulint), 64);
+
+ query->total_size += sizeof(fts_match_t)
+ + sizeof(ib_vector_t)
+ + sizeof(ulint) * 64;
}
/* Unpack the positions within the document. */
@@ -2888,7 +3112,7 @@ fts_query_filter_doc_ids(
/* Add the doc id to the doc freq rb tree, if the doc id
doesn't exist it will be created. */
- doc_freq = fts_query_add_doc_freq(doc_freqs, doc_id);
+ doc_freq = fts_query_add_doc_freq(query, doc_freqs, doc_id);
/* Avoid duplicating frequency tally. */
if (doc_freq->freq == 0) {
@@ -2904,21 +3128,29 @@ fts_query_filter_doc_ids(
/* We simply collect the matching documents and the
positions here and match later. */
if (!query->collect_positions) {
+ /* We ignore error here and will check it later */
fts_query_process_doc_id(query, doc_id, 0);
- }
- /* Add the word to the document's matched RB tree. */
- fts_query_add_word_to_document(query, doc_id, word);
+ /* Add the word to the document's matched RB tree. */
+ fts_query_add_word_to_document(query, doc_id, word);
+ }
}
/* Some sanity checks. */
ut_a(doc_id == node->last_doc_id);
+
+ if (query->total_size > fts_result_cache_limit) {
+ return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ } else {
+ return(DB_SUCCESS);
+ }
}
/*****************************************************************//**
-Read the FTS INDEX row. */
+Read the FTS INDEX row.
+@return DB_SUCCESS if all go well. */
static
-void
+dberr_t
fts_query_read_node(
/*================*/
fts_query_t* query, /*!< in: query instance */
@@ -2932,6 +3164,7 @@ fts_query_read_node(
fts_word_freq_t* word_freq;
ibool skip = FALSE;
byte term[FTS_MAX_WORD_LEN + 1];
+ dberr_t error = DB_SUCCESS;
ut_a(query->cur_node->type == FTS_AST_TERM ||
query->cur_node->type == FTS_AST_TEXT);
@@ -3005,9 +3238,9 @@ fts_query_read_node(
case 4: /* ILIST */
- fts_query_filter_doc_ids(
- query, word_freq->word, word_freq,
- &node, data, len, FALSE);
+ error = fts_query_filter_doc_ids(
+ query, word_freq->word, word_freq,
+ &node, data, len, FALSE);
break;
@@ -3021,6 +3254,8 @@ fts_query_read_node(
ut_a(i == 5);
}
+
+ return error;
}
/*****************************************************************//**
@@ -3047,9 +3282,15 @@ fts_query_index_fetch_nodes(
ut_a(dfield_len <= FTS_MAX_WORD_LEN);
- fts_query_read_node(query, &key, que_node_get_next(exp));
+ /* Note: we pass error out by 'query->error' */
+ query->error = fts_query_read_node(query, &key, que_node_get_next(exp));
- return(TRUE);
+ if (query->error != DB_SUCCESS) {
+ ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ return(FALSE);
+ } else {
+ return(TRUE);
+ }
}
/*****************************************************************//**
@@ -3107,27 +3348,22 @@ fts_query_calculate_ranking(
const fts_query_t* query, /*!< in: query state */
fts_ranking_t* ranking) /*!< in: Document to rank */
{
- const ib_rbt_node_t* node;
+ ulint pos = 0;
+ byte* word = NULL;
/* At this stage, ranking->rank should not exceed the 1.0
bound */
ut_ad(ranking->rank <= 1.0 && ranking->rank >= -1.0);
+ ut_ad(query->word_map->size() == query->word_vector->size());
- for (node = rbt_first(ranking->words);
- node;
- node = rbt_first(ranking->words)) {
-
+ while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
int ret;
- const byte* word;
- const byte** wordp;
ib_rbt_bound_t parent;
double weight;
fts_doc_freq_t* doc_freq;
fts_word_freq_t* word_freq;
- wordp = rbt_value(const byte*, node);
- word = *wordp;
-
+ ut_ad(word != NULL);
ret = rbt_search(query->word_freqs, &parent, word);
/* It must exist. */
@@ -3146,8 +3382,6 @@ fts_query_calculate_ranking(
weight = (double) doc_freq->freq * word_freq->idf;
ranking->rank += (fts_rank_t) (weight * word_freq->idf);
-
- ut_free(rbt_remove_node(ranking->words, node));
}
}
@@ -3157,6 +3391,7 @@ static
void
fts_query_add_ranking(
/*==================*/
+ fts_query_t* query, /*!< in: query state */
ib_rbt_t* ranking_tree, /*!< in: ranking tree */
const fts_ranking_t* new_ranking) /*!< in: ranking of a document */
{
@@ -3173,6 +3408,9 @@ fts_query_add_ranking(
ut_a(ranking->words == NULL);
} else {
rbt_add_node(ranking_tree, &parent, new_ranking);
+
+ query->total_size += SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t);
}
}
@@ -3213,14 +3451,13 @@ static
fts_result_t*
fts_query_prepare_result(
/*=====================*/
- const fts_query_t* query, /*!< in: Query state */
- fts_result_t* result) /*!< in: result this can contain
- data from a previous search on
- another FTS index */
+ fts_query_t* query, /*!< in: Query state */
+ fts_result_t* result) /*!< in: result this can contain
+ data from a previous search on
+ another FTS index */
{
const ib_rbt_node_t* node;
-
- ut_a(rbt_size(query->doc_ids) > 0);
+ bool result_is_null = false;
if (result == NULL) {
result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result)));
@@ -3229,8 +3466,55 @@ fts_query_prepare_result(
result->rankings_by_id = rbt_create(
sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+
+ query->total_size += sizeof(fts_result_t) + SIZEOF_RBT_CREATE;
+ result_is_null = true;
+ }
+
+ if (query->flags == FTS_OPT_RANKING) {
+ fts_word_freq_t* word_freq;
+ ulint size = ib_vector_size(query->deleted->doc_ids);
+ fts_update_t* array =
+ (fts_update_t*) query->deleted->doc_ids->data;
+
+ node = rbt_first(query->word_freqs);
+ ut_ad(node);
+ word_freq = rbt_value(fts_word_freq_t, node);
+
+ for (node = rbt_first(word_freq->doc_freqs);
+ node;
+ node = rbt_next(word_freq->doc_freqs, node)) {
+ fts_doc_freq_t* doc_freq;
+ fts_ranking_t ranking;
+
+ doc_freq = rbt_value(fts_doc_freq_t, node);
+
+ /* Don't put deleted docs into result */
+ if (fts_bsearch(array, 0, size, doc_freq->doc_id)
+ >= 0) {
+ continue;
+ }
+
+ ranking.doc_id = doc_freq->doc_id;
+ ranking.rank = doc_freq->freq * word_freq->idf
+ * word_freq->idf;
+ ranking.words = NULL;
+
+ fts_query_add_ranking(query, result->rankings_by_id,
+ &ranking);
+
+ if (query->total_size > fts_result_cache_limit) {
+ query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
+ fts_query_free_result(result);
+ return(NULL);
+ }
+ }
+
+ return(result);
}
+ ut_a(rbt_size(query->doc_ids) > 0);
+
for (node = rbt_first(query->doc_ids);
node;
node = rbt_next(query->doc_ids, node)) {
@@ -3245,11 +3529,24 @@ fts_query_prepare_result(
// different FTS indexes.
/* We don't need these anymore free the resources. */
- ut_a(rbt_empty(ranking->words));
- rbt_free(ranking->words);
ranking->words = NULL;
- fts_query_add_ranking(result->rankings_by_id, ranking);
+ if (!result_is_null) {
+ fts_query_add_ranking(query, result->rankings_by_id, ranking);
+
+ if (query->total_size > fts_result_cache_limit) {
+ query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
+ fts_query_free_result(result);
+ return(NULL);
+ }
+ }
+ }
+
+ if (result_is_null) {
+ /* Use doc_ids directly */
+ rbt_free(result->rankings_by_id);
+ result->rankings_by_id = query->doc_ids;
+ query->doc_ids = NULL;
}
return(result);
@@ -3261,10 +3558,10 @@ static
fts_result_t*
fts_query_get_result(
/*=================*/
- const fts_query_t* query, /*!< in: query instance */
+ fts_query_t* query, /*!< in: query instance */
fts_result_t* result) /*!< in: result */
{
- if (rbt_size(query->doc_ids) > 0) {
+ if (rbt_size(query->doc_ids) > 0 || query->flags == FTS_OPT_RANKING) {
/* Copy the doc ids to the result. */
result = fts_query_prepare_result(query, result);
} else {
@@ -3298,7 +3595,7 @@ fts_query_free(
}
if (query->doc_ids) {
- fts_query_free_doc_ids(query->doc_ids);
+ fts_query_free_doc_ids(query, query->doc_ids);
}
if (query->word_freqs) {
@@ -3327,6 +3624,14 @@ fts_query_free(
mem_heap_free(query->heap);
}
+ if (query->word_map) {
+ delete query->word_map;
+ }
+
+ if (query->word_vector) {
+ delete query->word_vector;
+ }
+
memset(query, 0, sizeof(*query));
}
@@ -3342,12 +3647,13 @@ fts_query_parse(
{
int error;
fts_ast_state_t state;
- ibool mode = query->boolean_mode;
+ bool mode = query->boolean_mode;
memset(&state, 0x0, sizeof(state));
/* Setup the scanner to use, this depends on the mode flag. */
state.lexer = fts_lexer_create(mode, query_str, query_len);
+ state.charset = query->fts_index_table.charset;
error = fts_parse(&state);
fts_lexer_free(state.lexer);
state.lexer = NULL;
@@ -3363,6 +3669,112 @@ fts_query_parse(
return(state.root);
}
+/*******************************************************************//**
+FTS Query optimization
+Set FTS_OPT_RANKING if it is a simple term query */
+static
+void
+fts_query_can_optimize(
+/*===================*/
+ fts_query_t* query, /*!< in/out: query instance */
+ uint flags) /*!< In: FTS search mode */
+{
+ fts_ast_node_t* node = query->root;
+
+ if (flags & FTS_EXPAND) {
+ return;
+ }
+
+ /* Check if it has only a term without oper */
+ ut_ad(node->type == FTS_AST_LIST);
+ node = node->list.head;
+ if (node != NULL && node->type == FTS_AST_TERM && node->next == NULL) {
+ query->flags = FTS_OPT_RANKING;
+ }
+}
+
+/*******************************************************************//**
+Pre-process the query string
+1) make it lower case
+2) in boolean mode, if there is '-' or '+' that is immediately proceeded
+and followed by valid word, make it a space
+@return the processed string */
+static
+byte*
+fts_query_str_preprocess(
+/*=====================*/
+ const byte* query_str, /*!< in: FTS query */
+ ulint query_len, /*!< in: FTS query string len */
+ ulint *result_len, /*!< out: result string length */
+ CHARSET_INFO* charset, /*!< in: string charset */
+ bool boolean_mode) /*!< in: is boolean mode */
+{
+ ulint cur_pos = 0;
+ ulint str_len;
+ byte* str_ptr;
+ bool in_phrase = false;
+
+ /* Convert the query string to lower case before parsing. We own
+ the ut_malloc'ed result and so remember to free it before return. */
+
+ str_len = query_len * charset->casedn_multiply + 1;
+ str_ptr = static_cast<byte*>(ut_malloc(str_len));
+
+ *result_len = innobase_fts_casedn_str(
+ charset, const_cast<char*>(reinterpret_cast<const char*>(
+ query_str)), query_len,
+ reinterpret_cast<char*>(str_ptr), str_len);
+
+ ut_ad(*result_len < str_len);
+
+ str_ptr[*result_len] = 0;
+
+ /* If it is boolean mode, no need to check for '-/+' */
+ if (!boolean_mode) {
+ return(str_ptr);
+ }
+
+ /* Otherwise, we travese the string to find any '-/+' that are
+ immediately proceeded and followed by valid search word.
+ NOTE: we should not do so for CJK languages, this should
+ be taken care of in our CJK implementation */
+ while (cur_pos < *result_len) {
+ fts_string_t str;
+ ulint offset;
+ ulint cur_len;
+
+ cur_len = innobase_mysql_fts_get_token(
+ charset, str_ptr + cur_pos, str_ptr + *result_len,
+ &str, &offset);
+
+ if (cur_len == 0) {
+ break;
+ }
+
+ /* Check if we are in a phrase, if so, no need to do
+ replacement of '-/+'. */
+ for (byte* ptr = str_ptr + cur_pos; ptr < str.f_str; ptr++) {
+ if ((char) (*ptr) == '"' ) {
+ in_phrase = !in_phrase;
+ }
+ }
+
+ /* Find those are not leading '-/+' and also not in a phrase */
+ if (cur_pos > 0 && str.f_str - str_ptr - cur_pos == 1
+ && !in_phrase) {
+ char* last_op = reinterpret_cast<char*>(
+ str_ptr + cur_pos);
+
+ if (*last_op == '-' || *last_op == '+') {
+ *last_op = ' ';
+ }
+ }
+
+ cur_pos += cur_len;
+ }
+
+ return(str_ptr);
+}
/*******************************************************************//**
FTS Query entry point.
@@ -3382,9 +3794,8 @@ fts_query(
fts_query_t query;
dberr_t error = DB_SUCCESS;
byte* lc_query_str;
- ulint lc_query_str_len;
ulint result_len;
- ibool boolean_mode;
+ bool boolean_mode;
trx_t* query_trx;
CHARSET_INFO* charset;
ulint start_time_ms;
@@ -3401,7 +3812,6 @@ fts_query(
query.trx = query_trx;
query.index = index;
- query.inited = FALSE;
query.boolean_mode = boolean_mode;
query.deleted = fts_doc_ids_create();
query.cur_node = NULL;
@@ -3418,6 +3828,9 @@ fts_query(
query.fts_index_table.parent = index->table->name;
query.fts_index_table.charset = charset;
+ query.word_map = new word_map_t;
+ query.word_vector = new word_vector_t;
+ query.error = DB_SUCCESS;
/* Setup the RB tree that will be used to collect per term
statistics. */
@@ -3425,6 +3838,8 @@ fts_query(
sizeof(fts_word_freq_t), innobase_fts_string_cmp,
(void*) charset);
+ query.total_size += SIZEOF_RBT_CREATE;
+
query.total_docs = dict_table_get_n_rows(index->table);
#ifdef FTS_DOC_STATS_DEBUG
@@ -3467,6 +3882,7 @@ fts_query(
/* Sort the vector so that we can do a binary search over the ids. */
ib_vector_sort(query.deleted->doc_ids, fts_update_doc_id_cmp);
+#if 0
/* Convert the query string to lower case before parsing. We own
the ut_malloc'ed result and so remember to free it before return. */
@@ -3481,16 +3897,30 @@ fts_query(
lc_query_str[result_len] = 0;
+#endif
+
+ lc_query_str = fts_query_str_preprocess(
+ query_str, query_len, &result_len, charset, boolean_mode);
+
query.heap = mem_heap_create(128);
/* Create the rb tree for the doc id (current) set. */
query.doc_ids = rbt_create(
sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+ query.total_size += SIZEOF_RBT_CREATE;
+
/* Parse the input query string. */
if (fts_query_parse(&query, lc_query_str, result_len)) {
fts_ast_node_t* ast = query.root;
+ /* Optimize query to check if it's a single term */
+ fts_query_can_optimize(&query, flags);
+
+ DBUG_EXECUTE_IF("fts_instrument_result_cache_limit",
+ fts_result_cache_limit = 2048;
+ );
+
/* Traverse the Abstract Syntax Tree (AST) and execute
the query. */
query.error = fts_ast_visit(
@@ -3500,11 +3930,13 @@ fts_query(
/* If query expansion is requested, extend the search
with first search pass result */
if (query.error == DB_SUCCESS && (flags & FTS_EXPAND)) {
- query.error = fts_expand_query(index, &query);
+ query.error = fts_expand_query(index, &query);
}
/* Calculate the inverse document frequency of the terms. */
- fts_query_calculate_idf(&query);
+ if (query.error == DB_SUCCESS) {
+ fts_query_calculate_idf(&query);
+ }
/* Copy the result from the query state, so that we can
return it to the caller. */
@@ -3530,6 +3962,15 @@ fts_query(
(*result)->rankings_by_id
? (int) rbt_size((*result)->rankings_by_id)
: -1);
+
+ /* Log memory consumption & result size */
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Full Search Memory: "
+ "%lu (bytes), Row: %lu .",
+ query.total_size,
+ (*result)->rankings_by_id
+ ? rbt_size((*result)->rankings_by_id)
+ : 0);
}
func_exit:
@@ -3608,30 +4049,24 @@ static
void
fts_print_doc_id(
/*=============*/
- ib_rbt_t* doc_ids) /*!< in : tree that stores doc_ids.*/
+ fts_query_t* query) /*!< in : tree that stores doc_ids.*/
{
const ib_rbt_node_t* node;
- const ib_rbt_node_t* node_word;
/* Iterate each member of the doc_id set */
- for (node = rbt_first(doc_ids);
+ for (node = rbt_first(query->doc_ids);
node;
- node = rbt_next(doc_ids, node)) {
+ node = rbt_next(query->doc_ids, node)) {
fts_ranking_t* ranking;
ranking = rbt_value(fts_ranking_t, node);
fprintf(stderr, "doc_ids info, doc_id: %ld \n",
(ulint) ranking->doc_id);
- for (node_word = rbt_first(ranking->words);
- node_word;
- node_word = rbt_next(ranking->words, node_word)) {
-
- const byte** value;
-
- value = rbt_value(const byte*, node_word);
-
- fprintf(stderr, "doc_ids info, value: %s \n", *value);
+ ulint pos = 0;
+ byte* value = NULL;
+ while (fts_ranking_words_get_next(query, ranking, &pos, &value)) {
+ fprintf(stderr, "doc_ids info, value: %s \n", value);
}
}
}
@@ -3676,8 +4111,9 @@ fts_expand_query(
result_doc.charset = index_cache->charset;
+ query->total_size += SIZEOF_RBT_CREATE;
#ifdef UNIV_DEBUG
- fts_print_doc_id(query->doc_ids);
+ fts_print_doc_id(query);
#endif
for (node = rbt_first(query->doc_ids);
@@ -3685,7 +4121,12 @@ fts_expand_query(
node = rbt_next(query->doc_ids, node)) {
fts_ranking_t* ranking;
- const ib_rbt_node_t* node_word;
+ ulint pos;
+ byte* word;
+ ulint prev_token_size;
+ ulint estimate_size;
+
+ prev_token_size = rbt_size(result_doc.tokens);
ranking = rbt_value(fts_ranking_t, node);
@@ -3702,16 +4143,15 @@ fts_expand_query(
/* Remove words that have already been searched in the
first pass */
- for (node_word = rbt_first(ranking->words);
- node_word;
- node_word = rbt_next(ranking->words, node_word)) {
+ pos = 0;
+ word = NULL;
+ while (fts_ranking_words_get_next(query, ranking, &pos,
+ &word)) {
fts_string_t str;
ibool ret;
- const byte** strp;
- strp = rbt_value(const byte*, node_word);
/* FIXME: We are discarding a const qualifier here. */
- str.f_str = (byte*) *strp;
+ str.f_str = word;
str.f_len = ut_strlen((const char*) str.f_str);
ret = rbt_delete(result_doc.tokens, &str);
@@ -3723,6 +4163,18 @@ fts_expand_query(
(ulint) ranking->doc_id);
}
}
+
+ /* Estimate memory used, see fts_process_token and fts_token_t.
+ We ignore token size here. */
+ estimate_size = (rbt_size(result_doc.tokens) - prev_token_size)
+ * (SIZEOF_RBT_NODE_ADD + sizeof(fts_token_t)
+ + sizeof(ib_vector_t) + sizeof(ulint) * 32);
+ query->total_size += estimate_size;
+
+ if (query->total_size > fts_result_cache_limit) {
+ error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
+ goto func_exit;
+ }
}
/* Search the table the second time with expanded search list */
@@ -3740,6 +4192,7 @@ fts_expand_query(
}
}
+func_exit:
fts_doc_free(&result_doc);
return(error);
@@ -3857,8 +4310,12 @@ fts_phrase_or_proximity_search(
if (fts_query_is_in_proximity_range(
query, match, &qualified_pos)) {
/* If so, mark we find a matching doc */
- fts_query_process_doc_id(
+ query->error = fts_query_process_doc_id(
query, match[0]->doc_id, 0);
+ if (query->error != DB_SUCCESS) {
+ matched = FALSE;
+ goto func_exit;
+ }
matched = TRUE;
for (ulint z = 0; z < num_token; z++) {
diff --git a/storage/innobase/fts/fts0tlex.cc b/storage/innobase/fts/fts0tlex.cc
index 44434c4ea25..717ddb8a77e 100644
--- a/storage/innobase/fts/fts0tlex.cc
+++ b/storage/innobase/fts/fts0tlex.cc
@@ -52,7 +52,7 @@ typedef uint32_t flex_uint32_t;
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
+typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
@@ -185,7 +185,7 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE;
#define EOB_ACT_LAST_MATCH 2
#define YY_LESS_LINENO(n)
-
+
/* Return all but the first "n" matched characters back to the input stream. */
#define yyless(n) \
do \
@@ -247,7 +247,7 @@ struct yy_buffer_state
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
@@ -305,9 +305,9 @@ YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner
YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
-void *fts0talloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) );
-void *fts0trealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) );
-void fts0tfree (void * , yyscan_t yyscanner __attribute__((unused)) );
+void *fts0talloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void *fts0trealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void fts0tfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
#define yy_new_buffer fts0t_create_buffer
@@ -347,7 +347,7 @@ typedef int yy_state_type;
static yy_state_type yy_get_previous_state (yyscan_t yyscanner );
static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner);
static int yy_get_next_buffer (yyscan_t yyscanner );
-static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) );
+static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
/* Done after the current pattern has been matched and before the
* corresponding action - sets up yytext.
@@ -359,8 +359,8 @@ static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute_
*yy_cp = '\0'; \
yyg->yy_c_buf_p = yy_cp;
-#define YY_NUM_RULES 6
-#define YY_END_OF_BUFFER 7
+#define YY_NUM_RULES 7
+#define YY_END_OF_BUFFER 8
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
@@ -370,7 +370,7 @@ struct yy_trans_info
};
static yyconst flex_int16_t yy_accept[17] =
{ 0,
- 4, 4, 7, 4, 1, 5, 1, 6, 6, 2,
+ 4, 4, 8, 4, 1, 6, 1, 5, 5, 2,
4, 1, 1, 0, 3, 0
} ;
@@ -575,11 +575,11 @@ extern int fts0twrap (yyscan_t yyscanner );
#endif
#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)));
+static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
#endif
#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)));
+static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
#endif
#ifndef YY_NO_INPUT
@@ -816,17 +816,22 @@ YY_RULE_SETUP
}
YY_BREAK
case 5:
-/* rule 5 can match eol */
+YY_RULE_SETUP
+#line 65 "fts0tlex.l"
+;
+ YY_BREAK
+case 6:
+/* rule 6 can match eol */
YY_RULE_SETUP
#line 66 "fts0tlex.l"
YY_BREAK
-case 6:
+case 7:
YY_RULE_SETUP
#line 68 "fts0tlex.l"
ECHO;
YY_BREAK
-#line 829 "fts0tlex.cc"
+#line 834 "fts0tlex.cc"
case YY_STATE_EOF(INITIAL):
yyterminate();
@@ -1307,7 +1312,7 @@ static void fts0t_load_buffer_state (yyscan_t yyscanner)
YY_BUFFER_STATE fts0t_create_buffer (FILE * file, int size , yyscan_t yyscanner)
{
YY_BUFFER_STATE b;
-
+
b = (YY_BUFFER_STATE) fts0talloc(sizeof( struct yy_buffer_state ) ,yyscanner );
if ( ! b )
YY_FATAL_ERROR( "out of dynamic memory in fts0t_create_buffer()" );
@@ -1373,7 +1378,7 @@ static void fts0t_load_buffer_state (yyscan_t yyscanner)
}
b->yy_is_interactive = 0;
-
+
errno = oerrno;
}
@@ -1479,9 +1484,9 @@ static void fts0tensure_buffer_stack (yyscan_t yyscanner)
, yyscanner);
if ( ! yyg->yy_buffer_stack )
YY_FATAL_ERROR( "out of dynamic memory in fts0tensure_buffer_stack()" );
-
+
memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
+
yyg->yy_buffer_stack_max = num_to_alloc;
yyg->yy_buffer_stack_top = 0;
return;
@@ -1515,7 +1520,7 @@ static void fts0tensure_buffer_stack (yyscan_t yyscanner)
YY_BUFFER_STATE fts0t_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
{
YY_BUFFER_STATE b;
-
+
if ( size < 2 ||
base[size-2] != YY_END_OF_BUFFER_CHAR ||
base[size-1] != YY_END_OF_BUFFER_CHAR )
@@ -1551,7 +1556,7 @@ YY_BUFFER_STATE fts0t_scan_buffer (char * base, yy_size_t size , yyscan_t yysc
*/
YY_BUFFER_STATE fts0t_scan_string (yyconst char * yystr , yyscan_t yyscanner)
{
-
+
return fts0t_scan_bytes(yystr,strlen(yystr) ,yyscanner);
}
@@ -1568,7 +1573,7 @@ YY_BUFFER_STATE fts0t_scan_bytes (yyconst char * yybytes, int _yybytes_len , y
char *buf;
yy_size_t n;
int i;
-
+
/* Get memory for full buffer, including space for trailing EOB's. */
n = _yybytes_len + 2;
buf = (char *) fts0talloc(n ,yyscanner );
@@ -1596,7 +1601,7 @@ YY_BUFFER_STATE fts0t_scan_bytes (yyconst char * yybytes, int _yybytes_len , y
#define YY_EXIT_FAILURE 2
#endif
-static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)))
+static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
(void) fprintf( stderr, "%s\n", msg );
exit( YY_EXIT_FAILURE );
@@ -1636,10 +1641,10 @@ YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner)
int fts0tget_lineno (yyscan_t yyscanner)
{
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
+
if (! YY_CURRENT_BUFFER)
return 0;
-
+
return yylineno;
}
@@ -1649,10 +1654,10 @@ int fts0tget_lineno (yyscan_t yyscanner)
int fts0tget_column (yyscan_t yyscanner)
{
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
+
if (! YY_CURRENT_BUFFER)
return 0;
-
+
return yycolumn;
}
@@ -1714,7 +1719,7 @@ void fts0tset_lineno (int line_number , yyscan_t yyscanner)
/* lineno is only valid if an input buffer exists. */
if (! YY_CURRENT_BUFFER )
yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner);
-
+
yylineno = line_number;
}
@@ -1729,7 +1734,7 @@ void fts0tset_column (int column_no , yyscan_t yyscanner)
/* column is only valid if an input buffer exists. */
if (! YY_CURRENT_BUFFER )
yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner);
-
+
yycolumn = column_no;
}
@@ -1812,9 +1817,9 @@ int fts0tlex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
errno = EINVAL;
return 1;
}
-
+
*ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
-
+
if (*ptr_yy_globals == NULL){
errno = ENOMEM;
return 1;
@@ -1823,9 +1828,9 @@ int fts0tlex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
/* By setting to 0xAA, we expose bugs in
yy_init_globals. Leave at 0x00 for releases. */
memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
-
+
fts0tset_extra (yy_user_defined, *ptr_yy_globals);
-
+
return yy_init_globals ( *ptr_yy_globals );
}
@@ -1897,7 +1902,7 @@ int fts0tlex_destroy (yyscan_t yyscanner)
*/
#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)))
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
register int i;
for ( i = 0; i < n; ++i )
@@ -1906,7 +1911,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yysc
#endif
#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)))
+static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
register int n;
for ( n = 0; s[n]; ++n )
@@ -1916,12 +1921,12 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__(
}
#endif
-void *fts0talloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)))
+void *fts0talloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
return (void *) malloc( size );
}
-void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)))
+void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
/* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
@@ -1933,7 +1938,7 @@ void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribu
return (void *) realloc( (char *) ptr, size );
}
-void fts0tfree (void * ptr , yyscan_t yyscanner __attribute__((unused)))
+void fts0tfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
free( (char *) ptr ); /* see fts0trealloc() for (char *) cast */
}
diff --git a/storage/innobase/fts/fts0tlex.l b/storage/innobase/fts/fts0tlex.l
index 8c42678ac7a..a18c2a55081 100644
--- a/storage/innobase/fts/fts0tlex.l
+++ b/storage/innobase/fts/fts0tlex.l
@@ -62,7 +62,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
return(FTS_TERM);
}
-
+. ;
\n
%%
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index ee7ea4246f9..b5abc89601d 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2012, Facebook Inc.
@@ -154,8 +154,6 @@ static uint innobase_old_blocks_pct;
of the buffer pool. */
static uint innobase_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
-static ulong innobase_compression_level = DEFAULT_COMPRESSION_LEVEL;
-
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
@@ -396,7 +394,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] = {
{&index_tree_rw_lock_key, "index_tree_rw_lock", 0},
{&index_online_log_key, "index_online_log", 0},
{&dict_table_stats_latch_key, "dict_table_stats", 0},
- {&hash_table_rw_lock_key, "hash table locks", 0}
+ {&hash_table_rw_lock_key, "hash_table_locks", 0}
};
# endif /* UNIV_PFS_RWLOCK */
@@ -413,7 +411,7 @@ static PSI_thread_info all_innodb_threads[] = {
{&srv_master_thread_key, "srv_master_thread", 0},
{&srv_purge_thread_key, "srv_purge_thread", 0},
{&buf_page_cleaner_thread_key, "page_cleaner_thread", 0},
- {&recv_writer_thread_key, "recovery writer thread", 0}
+ {&recv_writer_thread_key, "recv_writer_thread", 0}
};
# endif /* UNIV_PFS_THREAD */
@@ -453,10 +451,18 @@ ib_cb_t innodb_api_cb[] = {
(ib_cb_t) ib_clust_read_tuple_create,
(ib_cb_t) ib_tuple_delete,
(ib_cb_t) ib_tuple_copy,
+ (ib_cb_t) ib_tuple_read_u8,
+ (ib_cb_t) ib_tuple_write_u8,
+ (ib_cb_t) ib_tuple_read_u16,
+ (ib_cb_t) ib_tuple_write_u16,
(ib_cb_t) ib_tuple_read_u32,
(ib_cb_t) ib_tuple_write_u32,
(ib_cb_t) ib_tuple_read_u64,
(ib_cb_t) ib_tuple_write_u64,
+ (ib_cb_t) ib_tuple_read_i8,
+ (ib_cb_t) ib_tuple_write_i8,
+ (ib_cb_t) ib_tuple_read_i16,
+ (ib_cb_t) ib_tuple_write_i16,
(ib_cb_t) ib_tuple_read_i32,
(ib_cb_t) ib_tuple_write_i32,
(ib_cb_t) ib_tuple_read_i64,
@@ -520,6 +526,15 @@ innobase_map_isolation_level(
/*=========================*/
enum_tx_isolation iso); /*!< in: MySQL isolation level code */
+/******************************************************************//**
+Maps a MySQL trx isolation level code to the InnoDB isolation level code
+@return InnoDB isolation level */
+static inline
+ulint
+innobase_map_isolation_level(
+/*=========================*/
+ enum_tx_isolation iso); /*!< in: MySQL isolation level code */
+
static const char innobase_hton_name[]= "InnoDB";
static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
@@ -1506,7 +1521,8 @@ convert_error_code_to_mysql(
case DB_FTS_INVALID_DOCID:
return(HA_FTS_INVALID_DOCID);
-
+ case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
+ return(HA_ERR_OUT_OF_MEM);
case DB_TOO_MANY_CONCURRENT_TRXS:
return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
case DB_UNSUPPORTED:
@@ -1519,6 +1535,8 @@ convert_error_code_to_mysql(
return(HA_ERR_OUT_OF_MEM);
case DB_TABLESPACE_EXISTS:
return(HA_ERR_TABLESPACE_EXISTS);
+ case DB_IDENTIFIER_TOO_LONG:
+ return(HA_ERR_INTERNAL_ERROR);
}
}
@@ -1611,6 +1629,31 @@ innobase_convert_from_table_id(
strconvert(cs, from, FN_REFLEN, &my_charset_filename, to, (uint) len, &errors);
}
+/**********************************************************************
+Check if the length of the identifier exceeds the maximum allowed.
+return true when length of identifier is too long. */
+UNIV_INTERN
+my_bool
+innobase_check_identifier_length(
+/*=============================*/
+ const char* id) /* in: FK identifier to check excluding the
+ database portion. */
+{
+ int well_formed_error = 0;
+ CHARSET_INFO *cs = system_charset_info;
+ DBUG_ENTER("innobase_check_identifier_length");
+
+ uint res = cs->cset->well_formed_len(cs, id, id + strlen(id),
+ NAME_CHAR_LEN,
+ &well_formed_error);
+
+ if (well_formed_error || res == NAME_CHAR_LEN) {
+ my_error(ER_TOO_LONG_IDENT, MYF(0), id);
+ DBUG_RETURN(true);
+ }
+ DBUG_RETURN(false);
+}
+
/******************************************************************//**
Converts an identifier to UTF-8. */
UNIV_INTERN
@@ -1754,9 +1797,14 @@ innobase_mysql_tmpfile(void)
/*========================*/
{
int fd2 = -1;
- File fd = mysql_tmpfile("ib");
+ File fd;
+
+ DBUG_EXECUTE_IF(
+ "innobase_tmpfile_creation_failure",
+ return(-1);
+ );
- DBUG_EXECUTE_IF("innobase_tmpfile_creation_failure", return(-1););
+ fd = mysql_tmpfile("ib");
if (fd >= 0) {
/* Copy the file descriptor, so that the additional resources
@@ -2124,13 +2172,13 @@ UNIV_INTERN
void
innobase_copy_frm_flags_from_create_info(
/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- HA_CREATE_INFO* create_info) /*!< in: create info */
+ dict_table_t* innodb_table, /*!< in/out: InnoDB table */
+ const HA_CREATE_INFO* create_info) /*!< in: create info */
{
ibool ps_on;
ibool ps_off;
- if (dict_table_is_temporary(innodb_table) || srv_read_only_mode) {
+ if (dict_table_is_temporary(innodb_table)) {
/* Temp tables do not use persistent stats. */
ps_on = FALSE;
ps_off = TRUE;
@@ -2160,13 +2208,13 @@ UNIV_INTERN
void
innobase_copy_frm_flags_from_table_share(
/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- TABLE_SHARE* table_share) /*!< in: table share */
+ dict_table_t* innodb_table, /*!< in/out: InnoDB table */
+ const TABLE_SHARE* table_share) /*!< in: table share */
{
ibool ps_on;
ibool ps_off;
- if (dict_table_is_temporary(innodb_table) || srv_read_only_mode) {
+ if (dict_table_is_temporary(innodb_table)) {
/* Temp tables do not use persistent stats */
ps_on = FALSE;
ps_off = TRUE;
@@ -2229,6 +2277,10 @@ ha_innobase::update_thd(
{
trx_t* trx;
+ DBUG_ENTER("ha_innobase::update_thd");
+ DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p",
+ user_thd, thd));
+
/* The table should have been opened in ha_innobase::open(). */
DBUG_ASSERT(prebuilt->table->n_ref_count > 0);
@@ -2240,6 +2292,7 @@ ha_innobase::update_thd(
}
user_thd = thd;
+ DBUG_VOID_RETURN;
}
/*********************************************************************//**
@@ -2479,21 +2532,18 @@ innobase_convert_identifier(
ibool file_id)/*!< in: TRUE=id is a table or database name;
FALSE=id is an UTF-8 string */
{
- char nz[NAME_LEN + 1];
- char nz2[NAME_LEN + 1 + EXPLAIN_FILENAME_MAX_EXTRA_LENGTH];
-
const char* s = id;
int q;
if (file_id) {
+
+ char nz[MAX_TABLE_NAME_LEN + 1];
+ char nz2[MAX_TABLE_NAME_LEN + 1];
+
/* Decode the table name. The MySQL function expects
a NUL-terminated string. The input and output strings
buffers must not be shared. */
-
- if (UNIV_UNLIKELY(idlen > (sizeof nz) - 1)) {
- idlen = (sizeof nz) - 1;
- }
-
+ ut_a(idlen <= MAX_TABLE_NAME_LEN);
memcpy(nz, id, idlen);
nz[idlen] = 0;
@@ -2751,7 +2801,8 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/****************************************************************//**
Gives the file extension of an InnoDB single-table tablespace. */
static const char* ha_innobase_exts[] = {
- ".ibd",
+ ".ibd",
+ ".isl",
NullS
};
@@ -2922,14 +2973,33 @@ mem_free_and_error:
srv_normalize_path_for_win(srv_log_group_home_dir);
- if (strchr(srv_log_group_home_dir, ';')
- || innobase_mirrored_log_groups != 1) {
- sql_print_error("syntax error in innodb_log_group_home_dir, "
- "or a wrong number of mirrored log groups");
+ if (strchr(srv_log_group_home_dir, ';')) {
+ sql_print_error("syntax error in innodb_log_group_home_dir");
+ goto mem_free_and_error;
+ }
+ if (innobase_mirrored_log_groups == 1) {
+ sql_print_warning(
+ "innodb_mirrored_log_groups is an unimplemented "
+ "feature and the variable will be completely "
+ "removed in a future version.");
+ }
+
+ if (innobase_mirrored_log_groups > 1) {
+ sql_print_error(
+ "innodb_mirrored_log_groups is an unimplemented feature and "
+ "the variable will be completely removed in a future version. "
+ "Using values other than 1 is not supported.");
goto mem_free_and_error;
}
+ if (innobase_mirrored_log_groups == 0) {
+ /* To throw a deprecation warning message when the option is
+ passed, the default was changed to '0' (as a workaround). Since
+ the only value accepted for this option is '1', reset it to 1 */
+ innobase_mirrored_log_groups = 1;
+ }
+
/* Validate the file format by animal name */
if (innobase_file_format_name != NULL) {
@@ -3134,8 +3204,6 @@ innobase_change_buffering_inited_ok:
srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
- page_compression_level = (ulint) innobase_compression_level;
-
if (!innobase_use_checksums) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -3366,6 +3434,9 @@ innobase_end(
mysql_mutex_destroy(&pending_checkpoint_mutex);
}
+ my_free(fts_server_stopword_table);
+ fts_server_stopword_table= NULL;
+
DBUG_RETURN(err);
}
@@ -4902,17 +4973,17 @@ UNIV_INTERN
int
ha_innobase::open(
/*==============*/
- const char* name, /*!< in: table name */
- int mode, /*!< in: not used */
- uint test_if_locked) /*!< in: not used */
-{
- dict_table_t* ib_table;
- char norm_name[FN_REFLEN];
- THD* thd;
- ulint retries = 0;
- char* is_part = NULL;
- ibool par_case_name_set = FALSE;
- char par_case_name[FN_REFLEN];
+ const char* name, /*!< in: table name */
+ int mode, /*!< in: not used */
+ uint test_if_locked) /*!< in: not used */
+{
+ dict_table_t* ib_table;
+ char norm_name[FN_REFLEN];
+ THD* thd;
+ char* is_part = NULL;
+ ibool par_case_name_set = FALSE;
+ char par_case_name[FN_REFLEN];
+ dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;
DBUG_ENTER("ha_innobase::open");
@@ -4942,20 +5013,22 @@ ha_innobase::open(
upd_buf_size = 0;
/* We look for pattern #P# to see if the table is partitioned
- MySQL table. The retry logic for partitioned tables is a
- workaround for http://bugs.mysql.com/bug.php?id=33349. Look
- at support issue https://support.mysql.com/view.php?id=21080
- for more details. */
+ MySQL table. */
#ifdef __WIN__
is_part = strstr(norm_name, "#p#");
#else
is_part = strstr(norm_name, "#P#");
#endif /* __WIN__ */
-retry:
+ /* Check whether FOREIGN_KEY_CHECKS is set to 0. If so, the table
+ can be opened even if some FK indexes are missing. If not, the table
+ can't be opened in the same situation */
+ if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
+ ignore_err = DICT_ERR_IGNORE_FK_NOKEY;
+ }
+
/* Get pointer to a table object in InnoDB dictionary cache */
- ib_table = dict_table_open_on_name(norm_name, FALSE, TRUE,
- DICT_ERR_IGNORE_NONE);
+ ib_table = dict_table_open_on_name(norm_name, FALSE, TRUE, ignore_err);
if (ib_table
&& ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
@@ -4981,7 +5054,7 @@ retry:
}
if (NULL == ib_table) {
- if (is_part && retries < 10) {
+ if (is_part) {
/* MySQL partition engine hard codes the file name
separator as "#P#". The text case is fixed even if
lower_case_table_names is set to 1 or 2. This is true
@@ -5021,14 +5094,10 @@ retry:
ib_table = dict_table_open_on_name(
par_case_name, FALSE, TRUE,
- DICT_ERR_IGNORE_NONE);
+ ignore_err);
}
- if (!ib_table) {
- ++retries;
- os_thread_sleep(100000);
- goto retry;
- } else {
+ if (ib_table) {
#ifndef __WIN__
sql_print_warning("Partition table %s opened "
"after converting to lower "
@@ -5054,9 +5123,8 @@ retry:
}
if (is_part) {
- sql_print_error("Failed to open table %s after "
- "%lu attempts.\n", norm_name,
- retries);
+ sql_print_error("Failed to open table %s.\n",
+ norm_name);
}
ib_logf(IB_LOG_LEVEL_WARN,
@@ -5299,8 +5367,6 @@ ha_innobase::clone(
mem_root));
if (new_handler) {
DBUG_ASSERT(new_handler->prebuilt != NULL);
- DBUG_ASSERT(new_handler->user_thd == user_thd);
- DBUG_ASSERT(new_handler->prebuilt->trx == prebuilt->trx);
new_handler->prebuilt->select_lock_type
= prebuilt->select_lock_type;
@@ -5680,8 +5746,8 @@ ulint
innobase_mysql_fts_get_token(
/*=========================*/
CHARSET_INFO* cs, /*!< in: Character set */
- byte* start, /*!< in: start of text */
- byte* end, /*!< in: one character past end of
+ const byte* start, /*!< in: start of text */
+ const byte* end, /*!< in: one character past end of
text */
fts_string_t* token, /*!< out: token's text */
ulint* offset) /*!< out: offset to token,
@@ -5689,64 +5755,56 @@ innobase_mysql_fts_get_token(
'start' */
{
int mbl;
- uchar* doc = start;
+ const uchar* doc = start;
ut_a(cs);
token->f_n_char = token->f_len = 0;
- do {
- for (;;) {
-
- if (doc >= end) {
- return(doc - start);
- }
+ for (;;) {
- int ctype;
+ if (doc >= end) {
+ return(doc - start);
+ }
- mbl = cs->cset->ctype(
- cs, &ctype, (uchar*) doc, (uchar*) end);
+ int ctype;
- if (true_word_char(ctype, *doc)) {
- break;
- }
+ mbl = cs->cset->ctype(
+ cs, &ctype, doc, (const uchar*) end);
- doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
+ if (true_word_char(ctype, *doc)) {
+ break;
}
- ulint mwc = 0;
- ulint length = 0;
+ doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
+ }
- token->f_str = doc;
+ ulint mwc = 0;
+ ulint length = 0;
- while (doc < end) {
+ token->f_str = const_cast<byte*>(doc);
- int ctype;
+ while (doc < end) {
- mbl = cs->cset->ctype(
- cs, &ctype, (uchar*) doc, (uchar*) end);
+ int ctype;
- if (true_word_char(ctype, *doc)) {
- mwc = 0;
- } else if (!misc_word_char(*doc) || mwc) {
- break;
- } else {
- ++mwc;
- }
-
- ++length;
-
- doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
+ mbl = cs->cset->ctype(
+ cs, &ctype, (uchar*) doc, (uchar*) end);
+ if (true_word_char(ctype, *doc)) {
+ mwc = 0;
+ } else if (!misc_word_char(*doc) || mwc) {
+ break;
+ } else {
+ ++mwc;
}
- token->f_len = (uint) (doc - token->f_str) - mwc;
- token->f_n_char = length;
-
- return(doc - start);
+ ++length;
- } while (doc < end);
+ doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
+ }
- token->f_str[token->f_len] = 0;
+ token->f_len = (uint) (doc - token->f_str) - mwc;
+ token->f_n_char = length;
return(doc - start);
}
@@ -7476,6 +7534,12 @@ ha_innobase::unlock_row(void)
DBUG_VOID_RETURN;
}
+ /* Ideally, this assert must be in the beginning of the function.
+ But there are some calls to this function from the SQL layer when the
+ transaction is in state TRX_STATE_NOT_STARTED. The check on
+ prebuilt->select_lock_type above gets around this issue. */
+ ut_ad(trx_state_eq(prebuilt->trx, TRX_STATE_ACTIVE));
+
switch (prebuilt->row_read_type) {
case ROW_READ_WITH_LOCKS:
if (!srv_locks_unsafe_for_binlog
@@ -8310,7 +8374,7 @@ ha_innobase::ft_init_ext(
{
trx_t* trx;
dict_table_t* table;
- ulint error;
+ dberr_t error;
byte* query = (byte*) key->ptr();
ulint query_len = key->length();
const CHARSET_INFO* char_set = key->charset();
@@ -8387,23 +8451,24 @@ ha_innobase::ft_init_ext(
error = fts_query(trx, index, flags, query, query_len, &result);
- // FIXME: Proper error handling and diagnostic
if (error != DB_SUCCESS) {
- fprintf(stderr, "Error processing query\n");
- } else {
- /* Allocate FTS handler, and instantiate it before return */
- fts_hdl = (NEW_FT_INFO*) my_malloc(sizeof(NEW_FT_INFO),
- MYF(0));
+ my_error(convert_error_code_to_mysql(error, 0, NULL),
+ MYF(0));
+ return(NULL);
+ }
- fts_hdl->please = (struct _ft_vft*)(&ft_vft_result);
- fts_hdl->could_you = (struct _ft_vft_ext*)(&ft_vft_ext_result);
- fts_hdl->ft_prebuilt = prebuilt;
- fts_hdl->ft_result = result;
+ /* Allocate FTS handler, and instantiate it before return */
+ fts_hdl = static_cast<NEW_FT_INFO*>(my_malloc(sizeof(NEW_FT_INFO),
+ MYF(0)));
- /* FIXME: Re-evluate the condition when Bug 14469540
- is resolved */
- prebuilt->in_fts_query = true;
- }
+ fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result);
+ fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result);
+ fts_hdl->ft_prebuilt = prebuilt;
+ fts_hdl->ft_result = result;
+
+ /* FIXME: Re-evluate the condition when Bug 14469540
+ is resolved */
+ prebuilt->in_fts_query = true;
return((FT_INFO*) fts_hdl);
}
@@ -8922,6 +8987,9 @@ err_col:
mem_heap_free(heap);
+ DBUG_EXECUTE_IF("ib_create_err_tablespace_exist",
+ err = DB_TABLESPACE_EXISTS;);
+
if (err == DB_DUPLICATE_KEY || err == DB_TABLESPACE_EXISTS) {
char display_name[FN_REFLEN];
char* buf_end = innobase_convert_identifier(
@@ -9515,6 +9583,11 @@ innobase_table_flags(
DBUG_RETURN(false);
}
+ if (key->flags & HA_USES_PARSER) {
+ my_error(ER_INNODB_NO_FT_USES_PARSER, MYF(0));
+ DBUG_RETURN(false);
+ }
+
if (fts_doc_id_index_bad) {
goto index_bad;
}
@@ -9837,7 +9910,7 @@ ha_innobase::create(
/* Check whether there already exists FTS_DOC_ID_INDEX */
ret = innobase_fts_check_doc_id_index_in_def(
- form->s->keys, form->s->key_info);
+ form->s->keys, form->key_info);
switch (ret) {
case FTS_INCORRECT_DOC_ID_INDEX:
@@ -9893,6 +9966,16 @@ ha_innobase::create(
}
}
+ /* Cache all the FTS indexes on this table in the FTS specific
+ structure. They are used for FTS indexed column update handling. */
+ if (flags2 & DICT_TF2_FTS) {
+ fts_t* fts = innobase_table->fts;
+
+ ut_a(fts != NULL);
+
+ dict_table_get_all_fts_indexes(innobase_table, fts->indexes);
+ }
+
stmt = innobase_get_stmt(thd, &stmt_len);
if (stmt) {
@@ -9931,15 +10014,6 @@ ha_innobase::create(
goto cleanup;
}
}
- /* Cache all the FTS indexes on this table in the FTS specific
- structure. They are used for FTS indexed column update handling. */
- if (flags2 & DICT_TF2_FTS) {
- fts_t* fts = innobase_table->fts;
-
- ut_a(fts != NULL);
-
- dict_table_get_all_fts_indexes(innobase_table, fts->indexes);
- }
innobase_commit_low(trx);
@@ -10407,8 +10481,10 @@ innobase_rename_table(
DEBUG_SYNC_C("innodb_rename_table_ready");
+ trx_start_if_not_started(trx);
+
/* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
+ no deadlocks can occur then in these operations. */
row_mysql_lock_data_dictionary(trx);
@@ -10446,6 +10522,7 @@ innobase_rename_table(
normalize_table_name_low(
par_case_name, from, FALSE);
#endif
+ trx_start_if_not_started(trx);
error = row_rename_table_for_mysql(
par_case_name, norm_to, trx, TRUE);
}
@@ -11054,8 +11131,6 @@ ha_innobase::info_low(
if (dict_stats_is_persistent_enabled(ib_table)) {
- ut_ad(!srv_read_only_mode);
-
if (is_analyze) {
opt = DICT_STATS_RECALC_PERSISTENT;
} else {
@@ -11602,14 +11677,15 @@ ha_innobase::check(
index_name, sizeof index_name,
index->name, TRUE);
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NOT_KEYFILE,
- "InnoDB: The B-tree of"
- " index %s is corrupted.",
- index_name);
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE,
+ "InnoDB: The B-tree of"
+ " index %s is corrupted.",
+ index_name);
is_ok = FALSE;
dict_set_corrupted(
- index, prebuilt->trx, "CHECK TABLE");
+ index, prebuilt->trx, "CHECK TABLE-check index");
}
if (thd_kill_level(user_thd)) {
@@ -11625,15 +11701,18 @@ ha_innobase::check(
n_rows_in_table = n_rows;
} else if (!(index->type & DICT_FTS)
&& (n_rows != n_rows_in_table)) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NOT_KEYFILE,
- "InnoDB: Index '%-.200s'"
- " contains %lu entries,"
- " should be %lu.",
- index->name,
- (ulong) n_rows,
- (ulong) n_rows_in_table);
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE,
+ "InnoDB: Index '%-.200s' contains %lu"
+ " entries, should be %lu.",
+ index->name,
+ (ulong) n_rows,
+ (ulong) n_rows_in_table);
is_ok = FALSE;
+ dict_set_corrupted(
+ index, prebuilt->trx,
+ "CHECK TABLE; Wrong count");
}
}
@@ -12301,11 +12380,10 @@ ha_innobase::external_lock(
&& !(table_flags() & HA_BINLOG_STMT_CAPABLE)
&& thd_binlog_format(thd) == BINLOG_FORMAT_STMT
&& thd_binlog_filter_ok(thd)
- && thd_sqlcom_can_generate_row_events(thd))
- {
- int skip = 0;
+ && thd_sqlcom_can_generate_row_events(thd)) {
+ bool skip = 0;
/* used by test case */
- DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = 1;);
+ DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;);
if (!skip) {
my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
" InnoDB is limited to row-logging when "
@@ -12323,14 +12401,23 @@ ha_innobase::external_lock(
|| thd_sql_command(thd) == SQLCOM_DROP_TABLE
|| thd_sql_command(thd) == SQLCOM_ALTER_TABLE
|| thd_sql_command(thd) == SQLCOM_OPTIMIZE
- || thd_sql_command(thd) == SQLCOM_CREATE_TABLE
+ || (thd_sql_command(thd) == SQLCOM_CREATE_TABLE
+ && lock_type == F_WRLCK)
|| thd_sql_command(thd) == SQLCOM_CREATE_INDEX
|| thd_sql_command(thd) == SQLCOM_DROP_INDEX
|| thd_sql_command(thd) == SQLCOM_DELETE)) {
- ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+ if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE)
+ {
+ ib_senderrf(thd, IB_LOG_LEVEL_WARN,
+ ER_READ_ONLY_MODE);
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ } else {
+ ib_senderrf(thd, IB_LOG_LEVEL_WARN,
+ ER_READ_ONLY_MODE);
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ }
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
}
trx = prebuilt->trx;
@@ -13051,7 +13138,9 @@ ha_innobase::store_lock(
|| sql_command == SQLCOM_DROP_TABLE
|| sql_command == SQLCOM_ALTER_TABLE
|| sql_command == SQLCOM_OPTIMIZE
- || sql_command == SQLCOM_CREATE_TABLE
+ || (sql_command == SQLCOM_CREATE_TABLE
+ && (lock_type >= TL_WRITE_CONCURRENT_INSERT
+ && lock_type <= TL_WRITE))
|| sql_command == SQLCOM_CREATE_INDEX
|| sql_command == SQLCOM_DROP_INDEX
|| sql_command == SQLCOM_DELETE)) {
@@ -15165,6 +15254,80 @@ innodb_srv_buf_dump_filename_validate(
# define innodb_srv_buf_dump_filename_validate NULL
#endif /* __WIN__ */
+#ifdef UNIV_DEBUG
+static char* srv_buffer_pool_evict;
+
+/****************************************************************//**
+Evict all uncompressed pages of compressed tables from the buffer pool.
+Keep the compressed pages in the buffer pool.
+@return whether all uncompressed pages were evicted */
+static __attribute__((warn_unused_result))
+bool
+innodb_buffer_pool_evict_uncompressed(void)
+/*=======================================*/
+{
+ bool all_evicted = true;
+
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool = &buf_pool_ptr[i];
+
+ buf_pool_mutex_enter(buf_pool);
+
+ for (buf_block_t* block = UT_LIST_GET_LAST(
+ buf_pool->unzip_LRU);
+ block != NULL; ) {
+ buf_block_t* prev_block = UT_LIST_GET_PREV(
+ unzip_LRU, block);
+ ut_ad(buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->in_unzip_LRU_list);
+ ut_ad(block->page.in_LRU_list);
+
+ if (!buf_LRU_free_page(&block->page, false)) {
+ all_evicted = false;
+ }
+
+ block = prev_block;
+ }
+
+ buf_pool_mutex_exit(buf_pool);
+ }
+
+ return(all_evicted);
+}
+
+/****************************************************************//**
+Called on SET GLOBAL innodb_buffer_pool_evict=...
+Handles some values specially, to evict pages from the buffer pool.
+SET GLOBAL innodb_buffer_pool_evict='uncompressed'
+evicts all uncompressed page frames of compressed tablespaces. */
+static
+void
+innodb_buffer_pool_evict_update(
+/*============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var*var, /*!< in: pointer to system variable */
+ void* var_ptr,/*!< out: ignored */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ if (const char* op = *static_cast<const char*const*>(save)) {
+ if (!strcmp(op, "uncompressed")) {
+ for (uint tries = 0; tries < 10000; tries++) {
+ if (innodb_buffer_pool_evict_uncompressed()) {
+ return;
+ }
+
+ os_thread_sleep(10000);
+ }
+
+ /* We failed to evict all uncompressed pages. */
+ ut_ad(0);
+ }
+ }
+}
+#endif /* UNIV_DEBUG */
+
/****************************************************************//**
Update the system variable innodb_monitor_enable and enable
specified monitor counter.
@@ -15242,29 +15405,6 @@ innodb_reset_all_monitor_update(
}
/****************************************************************//**
-Update the system variable innodb_compression_level using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_compression_level_update(
-/*============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- /* We have this call back just to avoid confusion between
- ulong and ulint datatypes. */
- innobase_compression_level =
- (*static_cast<const ulong*>(save));
- page_compression_level =
- (static_cast<const ulint>(innobase_compression_level));
-}
-
-/****************************************************************//**
Parse and enable InnoDB monitor counters during server startup.
User can list the monitor counters/groups to be enable by specifying
"loose-innodb_monitor_enable=monitor_name1;monitor_name2..."
@@ -15441,6 +15581,7 @@ innobase_fts_find_ranking(
#ifdef UNIV_DEBUG
static my_bool innodb_purge_run_now = TRUE;
static my_bool innodb_purge_stop_now = TRUE;
+static my_bool innodb_log_checkpoint_now = TRUE;
/****************************************************************//**
Set the purge state to RUN. If purge is disabled then it
@@ -15487,6 +15628,33 @@ purge_stop_now_set(
trx_purge_stop();
}
}
+
+/****************************************************************//**
+Force innodb to checkpoint. */
+static
+void
+checkpoint_now_set(
+/*===============*/
+ THD* thd /*!< in: thread handle */
+ __attribute__((unused)),
+ struct st_mysql_sys_var* var /*!< in: pointer to system
+ variable */
+ __attribute__((unused)),
+ void* var_ptr /*!< out: where the formal
+ string goes */
+ __attribute__((unused)),
+ const void* save) /*!< in: immediate result from
+ check function */
+{
+ if (*(my_bool*) save) {
+ while (log_sys->last_checkpoint_lsn < log_sys->lsn) {
+ log_make_checkpoint_at(LSN_MAX, TRUE);
+ fil_flush_file_spaces(FIL_LOG);
+ }
+ fil_write_flushed_lsn_to_data_files(log_sys->lsn, 0);
+ fil_flush_file_spaces(FIL_TABLESPACE);
+ }
+}
#endif /* UNIV_DEBUG */
/***********************************************************************
@@ -15725,6 +15893,11 @@ static MYSQL_SYSVAR_BOOL(purge_stop_now, innodb_purge_stop_now,
PLUGIN_VAR_OPCMDARG,
"Set purge state to STOP",
NULL, purge_stop_now_set, FALSE);
+
+static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now,
+ PLUGIN_VAR_OPCMDARG,
+ "Force checkpoint now",
+ NULL, checkpoint_now_set, FALSE);
#endif /* UNIV_DEBUG */
static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
@@ -15744,7 +15917,7 @@ static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
32, 0); /* Maximum value */
static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
- PLUGIN_VAR_OPCMDARG,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
"Size of the mutex/lock wait array.",
NULL, NULL,
1, /* Default setting */
@@ -15952,12 +16125,20 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
"innodb_thread_concurrency is reached (0 by default)",
NULL, NULL, 0, 0, ~0UL, 0);
-static MYSQL_SYSVAR_ULONG(compression_level, innobase_compression_level,
+static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
PLUGIN_VAR_RQCMDARG,
"Compression level used for compressed row format. 0 is no compression"
", 1 is fastest, 9 is best compression and default is 6.",
- NULL, innodb_compression_level_update,
- DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
+ NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
+
+static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
+ PLUGIN_VAR_OPCMDARG,
+ "Enables/disables the logging of entire compressed page images."
+ " InnoDB logs the compressed pages to prevent corruption if"
+ " the zlib compression algorithm changes."
+ " When turned OFF, InnoDB will assume that the zlib"
+ " compression algorithm doesn't change.",
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -16009,6 +16190,13 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_s
"Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
NULL, NULL, FALSE);
+#ifdef UNIV_DEBUG
+static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
+ PLUGIN_VAR_RQCMDARG,
+ "Evict pages from the buffer pool",
+ NULL, innodb_buffer_pool_evict_update, "");
+#endif /* UNIV_DEBUG */
+
static MYSQL_SYSVAR_BOOL(buffer_pool_load_now, innodb_buffer_pool_load_now,
PLUGIN_VAR_RQCMDARG,
"Trigger an immediate load of the buffer pool from a file named @@innodb_buffer_pool_filename",
@@ -16074,6 +16262,16 @@ static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
"InnoDB Fulltext search cache size in bytes",
NULL, NULL, 8000000, 1600000, 80000000, 0);
+static MYSQL_SYSVAR_ULONG(ft_total_cache_size, fts_max_total_cache_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Total memory allocated for InnoDB Fulltext Search cache",
+ NULL, NULL, 640000000, 32000000, 1600000000, 0);
+
+static MYSQL_SYSVAR_ULONG(ft_result_cache_limit, fts_result_cache_limit,
+ PLUGIN_VAR_RQCMDARG,
+ "InnoDB Fulltext search query result cache limit in bytes",
+ NULL, NULL, 2000000000L, 1000000L, ~0UL, 0);
+
static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"InnoDB Fulltext search minimum token size in characters",
@@ -16082,7 +16280,7 @@ static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"InnoDB Fulltext search maximum token size in characters",
- NULL, NULL, HA_FT_MAXCHARLEN, 10, FTS_MAX_WORD_LEN , 0);
+ NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0);
static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize,
@@ -16153,10 +16351,12 @@ static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
"Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
NULL, NULL, 2, 2, SRV_N_LOG_FILES_MAX, 0);
+/* Note that the default and minimum values are set to 0 to
+detect if the option is passed and print deprecation message */
static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
- NULL, NULL, 1, 1, 10, 0);
+ NULL, NULL, 0, 0, 10, 0);
static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
PLUGIN_VAR_RQCMDARG,
@@ -16432,6 +16632,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(buffer_pool_filename),
MYSQL_SYSVAR(buffer_pool_dump_now),
MYSQL_SYSVAR(buffer_pool_dump_at_shutdown),
+#ifdef UNIV_DEBUG
+ MYSQL_SYSVAR(buffer_pool_evict),
+#endif /* UNIV_DEBUG */
MYSQL_SYSVAR(buffer_pool_load_now),
MYSQL_SYSVAR(buffer_pool_load_abort),
MYSQL_SYSVAR(buffer_pool_load_at_startup),
@@ -16466,6 +16669,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(force_recovery_crash),
#endif /* !DBUG_OFF */
MYSQL_SYSVAR(ft_cache_size),
+ MYSQL_SYSVAR(ft_total_cache_size),
+ MYSQL_SYSVAR(ft_result_cache_limit),
MYSQL_SYSVAR(ft_enable_stopword),
MYSQL_SYSVAR(ft_max_token_size),
MYSQL_SYSVAR(ft_min_token_size),
@@ -16484,6 +16689,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(log_file_size),
MYSQL_SYSVAR(log_files_in_group),
MYSQL_SYSVAR(log_group_home_dir),
+ MYSQL_SYSVAR(log_compressed_pages),
MYSQL_SYSVAR(max_dirty_pages_pct),
MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
MYSQL_SYSVAR(adaptive_flushing_lwm),
@@ -16548,6 +16754,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#ifdef UNIV_DEBUG
MYSQL_SYSVAR(purge_run_now),
MYSQL_SYSVAR(purge_stop_now),
+ MYSQL_SYSVAR(log_checkpoint_now),
#endif /* UNIV_DEBUG */
#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
MYSQL_SYSVAR(page_hash_locks),
@@ -16600,7 +16807,6 @@ i_s_innodb_buffer_page_lru,
i_s_innodb_buffer_stats,
i_s_innodb_metrics,
i_s_innodb_ft_default_stopword,
-i_s_innodb_ft_inserted,
i_s_innodb_ft_deleted,
i_s_innodb_ft_being_deleted,
i_s_innodb_ft_config,
@@ -17060,6 +17266,23 @@ ib_logf(
Converts an identifier from my_charset_filename to UTF-8 charset.
@return result string length, as returned by strconvert() */
uint
+innobase_convert_to_filename_charset(
+/*=================================*/
+ char* to, /* out: converted identifier */
+ const char* from, /* in: identifier to convert */
+ ulint len) /* in: length of 'to', in bytes */
+{
+ uint errors;
+ CHARSET_INFO* cs_to = &my_charset_filename;
+ CHARSET_INFO* cs_from = system_charset_info;
+
+ return(strconvert(cs_from, from, strlen(from), cs_to, to, len, &errors));
+}
+
+/**********************************************************************
+Converts an identifier from my_charset_filename to UTF-8 charset.
+@return result string length, as returned by strconvert() */
+uint
innobase_convert_to_system_charset(
/*===============================*/
char* to, /* out: converted identifier */
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 1fb071f5765..97f26f93225 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -633,8 +633,8 @@ UNIV_INTERN
void
innobase_copy_frm_flags_from_create_info(
/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- HA_CREATE_INFO* create_info); /*!< in: create info */
+ dict_table_t* innodb_table, /*!< in/out: InnoDB table */
+ const HA_CREATE_INFO* create_info); /*!< in: create info */
/*********************************************************************//**
Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
@@ -645,5 +645,5 @@ UNIV_INTERN
void
innobase_copy_frm_flags_from_table_share(
/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- TABLE_SHARE* table_share); /*!< in: table share */
+ dict_table_t* innodb_table, /*!< in/out: InnoDB table */
+ const TABLE_SHARE* table_share); /*!< in: table share */
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index a120534b36d..8cfd6928376 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -49,16 +49,17 @@ Smart ALTER TABLE
#include "ha_innodb.h"
-/** Operations for creating an index in place */
+/** Operations for creating secondary indexes (no rebuild needed) */
static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_CREATE
= Alter_inplace_info::ADD_INDEX
| Alter_inplace_info::ADD_UNIQUE_INDEX;
/** Operations for rebuilding a table in place */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_REBUILD
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_REBUILD
= Alter_inplace_info::ADD_PK_INDEX
| Alter_inplace_info::DROP_PK_INDEX
| Alter_inplace_info::CHANGE_CREATE_OPTION
+ /* CHANGE_CREATE_OPTION needs to check innobase_need_rebuild() */
| Alter_inplace_info::ALTER_COLUMN_NULLABLE
| Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE
| Alter_inplace_info::ALTER_COLUMN_ORDER
@@ -70,9 +71,9 @@ static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_REBUILD
*/
;
-/** Operations for creating indexes or rebuilding a table */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_CREATE
- = INNOBASE_ONLINE_CREATE | INNOBASE_INPLACE_REBUILD;
+/** Operations that require changes to data */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_DATA
+ = INNOBASE_ONLINE_CREATE | INNOBASE_ALTER_REBUILD;
/** Operations for altering a table that InnoDB does not care about */
static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE
@@ -81,15 +82,18 @@ static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE
| Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE
| Alter_inplace_info::ALTER_RENAME;
-/** Operations that InnoDB can perform online */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_OPERATIONS
- = INNOBASE_INPLACE_IGNORE
- | INNOBASE_ONLINE_CREATE
+/** Operations on foreign key definitions (changing the schema only) */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_FOREIGN_OPERATIONS
+ = Alter_inplace_info::DROP_FOREIGN_KEY
+ | Alter_inplace_info::ADD_FOREIGN_KEY;
+
+/** Operations that InnoDB cares about and can perform without rebuild */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_NOREBUILD
+ = INNOBASE_ONLINE_CREATE
+ | INNOBASE_FOREIGN_OPERATIONS
| Alter_inplace_info::DROP_INDEX
| Alter_inplace_info::DROP_UNIQUE_INDEX
- | Alter_inplace_info::DROP_FOREIGN_KEY
- | Alter_inplace_info::ALTER_COLUMN_NAME
- | Alter_inplace_info::ADD_FOREIGN_KEY;
+ | Alter_inplace_info::ALTER_COLUMN_NAME;
/* Report an InnoDB error to the client by invoking my_error(). */
static UNIV_COLD __attribute__((nonnull))
@@ -162,16 +166,16 @@ my_error_innodb(
}
/** Determine if fulltext indexes exist in a given table.
-@param table_share MySQL table
+@param table MySQL table
@return whether fulltext indexes exist on the table */
static
bool
innobase_fulltext_exist(
/*====================*/
- const TABLE_SHARE* table_share)
+ const TABLE* table)
{
- for (uint i = 0; i < table_share->keys; i++) {
- if (table_share->key_info[i].flags & HA_FULLTEXT) {
+ for (uint i = 0; i < table->s->keys; i++) {
+ if (table->key_info[i].flags & HA_FULLTEXT) {
return(true);
}
}
@@ -199,7 +203,7 @@ innobase_need_rebuild(
return(false);
}
- return(!!(ha_alter_info->handler_flags & INNOBASE_INPLACE_REBUILD));
+ return(!!(ha_alter_info->handler_flags & INNOBASE_ALTER_REBUILD));
}
/** Check if InnoDB supports a particular alter table in-place
@@ -249,7 +253,10 @@ ha_innobase::check_if_supported_inplace_alter(
trx_search_latch_release_if_reserved(prebuilt->trx);
if (ha_alter_info->handler_flags
- & ~(INNOBASE_ONLINE_OPERATIONS | INNOBASE_INPLACE_REBUILD)) {
+ & ~(INNOBASE_INPLACE_IGNORE
+ | INNOBASE_ALTER_NOREBUILD
+ | INNOBASE_ALTER_REBUILD)) {
+
if (ha_alter_info->handler_flags
& (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
| Alter_inplace_info::ALTER_COLUMN_TYPE))
@@ -307,29 +314,6 @@ ha_innobase::check_if_supported_inplace_alter(
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
}
- /* ADD FOREIGN KEY does not currently work properly in combination
- with renaming columns. (Bug#14105491) */
- if ((ha_alter_info->handler_flags
- & (Alter_inplace_info::ADD_FOREIGN_KEY
- | Alter_inplace_info::ALTER_COLUMN_NAME))
- == (Alter_inplace_info::ADD_FOREIGN_KEY
- | Alter_inplace_info::ALTER_COLUMN_NAME)) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_RENAME);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- /* DROP FOREIGN KEY may not currently work properly in combination
- with other operations. (Work-around for 5.6.10 only.) */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::DROP_FOREIGN_KEY)
- && (ha_alter_info->handler_flags
- & (Alter_inplace_info::DROP_FOREIGN_KEY
- | INNOBASE_INPLACE_REBUILD))
- != Alter_inplace_info::DROP_FOREIGN_KEY) {
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
/* If a column change from NOT NULL to NULL,
and there's a implict pk on this column. the
table should be rebuild. The change should
@@ -423,7 +407,7 @@ ha_innobase::check_if_supported_inplace_alter(
/* We cannot replace a hidden FTS_DOC_ID
with a user-visible FTS_DOC_ID. */
if (prebuilt->table->fts
- && innobase_fulltext_exist(altered_table->s)
+ && innobase_fulltext_exist(altered_table)
&& !my_strcasecmp(
system_charset_info,
key_part->field->field_name,
@@ -456,7 +440,7 @@ ha_innobase::check_if_supported_inplace_alter(
< dict_table_get_n_user_cols(prebuilt->table));
if (prebuilt->table->fts
- && innobase_fulltext_exist(altered_table->s)) {
+ && innobase_fulltext_exist(altered_table)) {
/* FULLTEXT indexes are supposed to remain. */
/* Disallow DROP INDEX FTS_DOC_ID_INDEX */
@@ -500,7 +484,7 @@ ha_innobase::check_if_supported_inplace_alter(
} else if (((ha_alter_info->handler_flags
& Alter_inplace_info::ADD_PK_INDEX)
|| innobase_need_rebuild(ha_alter_info))
- && (innobase_fulltext_exist(altered_table->s)
+ && (innobase_fulltext_exist(altered_table)
|| (prebuilt->table->flags2
& DICT_TF2_FTS_HAS_DOC_ID))) {
/* Refuse to rebuild the table online, if
@@ -571,6 +555,8 @@ innobase_init_foreign(
ulint referenced_num_field) /*!< in: number of referenced
columns */
{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
if (constraint_name) {
ulint db_len;
@@ -587,22 +573,21 @@ innobase_init_foreign(
ut_memcpy(foreign->id, table->name, db_len);
foreign->id[db_len] = '/';
strcpy(foreign->id + db_len + 1, constraint_name);
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
- /* Check if any existing foreign key has the same id */
+ /* Check if any existing foreign key has the same id,
+ this is needed only if user supplies the constraint name */
- for (const dict_foreign_t* existing_foreign
- = UT_LIST_GET_FIRST(table->foreign_list);
- existing_foreign != 0;
- existing_foreign = UT_LIST_GET_NEXT(
- foreign_list, existing_foreign)) {
+ for (const dict_foreign_t* existing_foreign
+ = UT_LIST_GET_FIRST(table->foreign_list);
+ existing_foreign != 0;
+ existing_foreign = UT_LIST_GET_NEXT(
+ foreign_list, existing_foreign)) {
- if (ut_strcmp(existing_foreign->id, foreign->id) == 0) {
- return(false);
+ if (ut_strcmp(existing_foreign->id, foreign->id) == 0) {
+ return(false);
+ }
}
- }
+ }
foreign->foreign_table = table;
foreign->foreign_table_name = mem_heap_strdup(
@@ -647,16 +632,19 @@ static __attribute__((nonnull, warn_unused_result))
bool
innobase_check_fk_option(
/*=====================*/
- dict_foreign_t* foreign) /*!< in:InnoDB Foreign key */
+ const dict_foreign_t* foreign) /*!< in: foreign key */
{
+ if (!foreign->foreign_index) {
+ return(true);
+ }
+
if (foreign->type & (DICT_FOREIGN_ON_UPDATE_SET_NULL
- | DICT_FOREIGN_ON_DELETE_SET_NULL)
- && foreign->foreign_index) {
+ | DICT_FOREIGN_ON_DELETE_SET_NULL)) {
for (ulint j = 0; j < foreign->n_fields; j++) {
if ((dict_index_get_nth_col(
- foreign->foreign_index, j)->prtype)
- & DATA_NOT_NULL) {
+ foreign->foreign_index, j)->prtype)
+ & DATA_NOT_NULL) {
/* It is not sensible to define
SET NULL if the column is not
@@ -770,63 +758,57 @@ no_match:
}
/*************************************************************//**
-Found an index whose first fields are the columns in the array
+Find an index whose first fields are the columns in the array
in the same order and is not marked for deletion
@return matching index, NULL if not found */
-static
+static __attribute__((nonnull(1,2,6), warn_unused_result))
dict_index_t*
innobase_find_fk_index(
/*===================*/
Alter_inplace_info* ha_alter_info,
/*!< in: alter table info */
dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ dict_index_t** drop_index,
+ /*!< in: indexes to be dropped */
+ ulint n_drop_index,
+ /*!< in: size of drop_index[] */
const char** columns,/*!< in: array of column names */
ulint n_cols) /*!< in: number of columns */
-
{
- dict_index_t* index;
- dict_index_t* found_index = NULL;
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (index->type & DICT_FTS) {
- goto next_rec;
- } else if (dict_foreign_qualify_index(
- table, columns, n_cols, index, NULL, TRUE, FALSE)) {
- /* Check if this index is in the drop list */
- if (index) {
- KEY** drop_key;
-
- drop_key = ha_alter_info->index_drop_buffer;
-
- for (uint i = 0;
- i < ha_alter_info->index_drop_count;
- i++) {
- if (innobase_strcasecmp(
- drop_key[i]->name,
- index->name) == 0) {
- goto next_rec;
- }
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ if (!(index->type & DICT_FTS)
+ && dict_foreign_qualify_index(
+ table, col_names, columns, n_cols,
+ index, NULL, true, 0)) {
+ for (ulint i = 0; i < n_drop_index; i++) {
+ if (index == drop_index[i]) {
+ /* Skip to-be-dropped indexes. */
+ goto next_rec;
}
}
- found_index = index;
- break;
+ return(index);
}
next_rec:
- index = dict_table_get_next_index(index);
+ index = dict_table_get_next_index(index);
}
- return(found_index);
+ return(NULL);
}
/*************************************************************//**
Create InnoDB foreign key structure from MySQL alter_info
@retval true if successful
@retval false on error (will call my_error()) */
-static
+static __attribute__((nonnull(1,2,3,7,8), warn_unused_result))
bool
innobase_get_foreign_key_info(
/*==========================*/
@@ -835,15 +817,17 @@ innobase_get_foreign_key_info(
const TABLE_SHARE*
table_share, /*!< in: the TABLE_SHARE */
dict_table_t* table, /*!< in: table */
+ const char** col_names, /*!< in: column names, or NULL
+ to use table->col_names */
+ dict_index_t** drop_index, /*!< in: indexes to be dropped */
+ ulint n_drop_index, /*!< in: size of drop_index[] */
dict_foreign_t**add_fk, /*!< out: foreign constraint added */
ulint* n_add_fk, /*!< out: number of foreign
constraints added */
- mem_heap_t* heap, /*!< in: memory heap */
const trx_t* trx) /*!< in: user transaction */
{
Key* key;
Foreign_key* fk_key;
- ulint i = 0;
dict_table_t* referenced_table = NULL;
char* referenced_table_name = NULL;
ulint num_fk = 0;
@@ -854,203 +838,205 @@ innobase_get_foreign_key_info(
List_iterator<Key> key_iterator(alter_info->key_list);
while ((key=key_iterator++)) {
- if (key->type == Key::FOREIGN_KEY) {
- const char* column_names[MAX_NUM_FK_COLUMNS];
- dict_index_t* index = NULL;
- const char* referenced_column_names[MAX_NUM_FK_COLUMNS];
- dict_index_t* referenced_index = NULL;
- ulint num_col = 0;
- ulint referenced_num_col = 0;
- bool correct_option;
- char* db_namep = NULL;
- char* tbl_namep = NULL;
- ulint db_name_len = 0;
- ulint tbl_name_len = 0;
+ if (key->type != Key::FOREIGN_KEY) {
+ continue;
+ }
+
+ const char* column_names[MAX_NUM_FK_COLUMNS];
+ dict_index_t* index = NULL;
+ const char* referenced_column_names[MAX_NUM_FK_COLUMNS];
+ dict_index_t* referenced_index = NULL;
+ ulint num_col = 0;
+ ulint referenced_num_col = 0;
+ bool correct_option;
+ char* db_namep = NULL;
+ char* tbl_namep = NULL;
+ ulint db_name_len = 0;
+ ulint tbl_name_len = 0;
#ifdef __WIN__
- char db_name[MAX_DATABASE_NAME_LEN];
- char tbl_name[MAX_TABLE_NAME_LEN];
+ char db_name[MAX_DATABASE_NAME_LEN];
+ char tbl_name[MAX_TABLE_NAME_LEN];
#endif
- fk_key= static_cast<Foreign_key*>(key);
-
- if (fk_key->columns.elements > 0) {
- Key_part_spec* column;
- List_iterator<Key_part_spec> key_part_iterator(
- fk_key->columns);
-
- /* Get all the foreign key column info for the
- current table */
- while ((column = key_part_iterator++)) {
- column_names[i] =
- column->field_name.str;
- ut_ad(i < MAX_NUM_FK_COLUMNS);
- i++;
- }
-
- index = innobase_find_fk_index(
- ha_alter_info, table, column_names, i);
+ fk_key = static_cast<Foreign_key*>(key);
- /* MySQL would add a index in the creation
- list if no such index for foreign table,
- so we have to use DBUG_EXECUTE_IF to simulate
- the scenario */
- DBUG_EXECUTE_IF("innodb_test_no_foreign_idx",
- index = NULL;);
+ if (fk_key->columns.elements > 0) {
+ ulint i = 0;
+ Key_part_spec* column;
+ List_iterator<Key_part_spec> key_part_iterator(
+ fk_key->columns);
- /* Check whether there exist such
- index in the the index create clause */
- if (!index && !innobase_find_equiv_index(
- column_names, i,
- ha_alter_info->key_info_buffer,
- ha_alter_info->index_add_buffer,
- ha_alter_info->index_add_count)) {
- my_error(
- ER_FK_NO_INDEX_CHILD,
- MYF(0),
- fk_key->name.str,
- table_share->table_name.str);
- goto err_exit;
- }
+ /* Get all the foreign key column info for the
+ current table */
+ while ((column = key_part_iterator++)) {
+ column_names[i] = column->field_name.str;
+ ut_ad(i < MAX_NUM_FK_COLUMNS);
+ i++;
+ }
- num_col = i;
+ index = innobase_find_fk_index(
+ ha_alter_info,
+ table, col_names,
+ drop_index, n_drop_index,
+ column_names, i);
+
+ /* MySQL would add a index in the creation
+ list if no such index for foreign table,
+ so we have to use DBUG_EXECUTE_IF to simulate
+ the scenario */
+ DBUG_EXECUTE_IF("innodb_test_no_foreign_idx",
+ index = NULL;);
+
+ /* Check whether there exist such
+ index in the the index create clause */
+ if (!index && !innobase_find_equiv_index(
+ column_names, i,
+ ha_alter_info->key_info_buffer,
+ ha_alter_info->index_add_buffer,
+ ha_alter_info->index_add_count)) {
+ my_error(
+ ER_FK_NO_INDEX_CHILD,
+ MYF(0),
+ fk_key->name.str
+ ? fk_key->name.str : "",
+ table_share->table_name.str);
+ goto err_exit;
}
- add_fk[num_fk] = dict_mem_foreign_create();
+ num_col = i;
+ }
+
+ add_fk[num_fk] = dict_mem_foreign_create();
#ifndef __WIN__
- tbl_namep = fk_key->ref_table.str;
- tbl_name_len = fk_key->ref_table.length;
- db_namep = fk_key->ref_db.str;
- db_name_len = fk_key->ref_db.length;
+ tbl_namep = fk_key->ref_table.str;
+ tbl_name_len = fk_key->ref_table.length;
+ db_namep = fk_key->ref_db.str;
+ db_name_len = fk_key->ref_db.length;
#else
- ut_ad(fk_key->ref_table.str);
-
- memcpy(tbl_name, fk_key->ref_table.str,
- fk_key->ref_table.length);
- tbl_name[fk_key->ref_table.length] = 0;
- innobase_casedn_str(tbl_name);
- tbl_name_len = strlen(tbl_name);
- tbl_namep = &tbl_name[0];
-
- if (fk_key->ref_db.str != NULL) {
- memcpy(db_name, fk_key->ref_db.str,
- fk_key->ref_db.length);
- db_name[fk_key->ref_db.length] = 0;
- innobase_casedn_str(db_name);
- db_name_len = strlen(db_name);
- db_namep = &db_name[0];
- }
+ ut_ad(fk_key->ref_table.str);
+
+ memcpy(tbl_name, fk_key->ref_table.str,
+ fk_key->ref_table.length);
+ tbl_name[fk_key->ref_table.length] = 0;
+ innobase_casedn_str(tbl_name);
+ tbl_name_len = strlen(tbl_name);
+ tbl_namep = &tbl_name[0];
+
+ if (fk_key->ref_db.str != NULL) {
+ memcpy(db_name, fk_key->ref_db.str,
+ fk_key->ref_db.length);
+ db_name[fk_key->ref_db.length] = 0;
+ innobase_casedn_str(db_name);
+ db_name_len = strlen(db_name);
+ db_namep = &db_name[0];
+ }
#endif
- mutex_enter(&dict_sys->mutex);
-
- referenced_table_name = dict_get_referenced_table(
- table->name,
- db_namep,
- db_name_len,
- tbl_namep,
- tbl_name_len,
- &referenced_table,
- add_fk[num_fk]->heap);
-
- /* Test the case when referenced_table failed to
- open, if trx->check_foreigns is not set, we should
- still be able to add the foreign key */
- DBUG_EXECUTE_IF("innodb_test_open_ref_fail",
- referenced_table = NULL;);
-
- if (!referenced_table && trx->check_foreigns) {
- mutex_exit(&dict_sys->mutex);
- my_error(ER_FK_CANNOT_OPEN_PARENT,
- MYF(0), tbl_namep);
+ mutex_enter(&dict_sys->mutex);
+
+ referenced_table_name = dict_get_referenced_table(
+ table->name,
+ db_namep,
+ db_name_len,
+ tbl_namep,
+ tbl_name_len,
+ &referenced_table,
+ add_fk[num_fk]->heap);
+
+ /* Test the case when referenced_table failed to
+ open, if trx->check_foreigns is not set, we should
+ still be able to add the foreign key */
+ DBUG_EXECUTE_IF("innodb_test_open_ref_fail",
+ referenced_table = NULL;);
+
+ if (!referenced_table && trx->check_foreigns) {
+ mutex_exit(&dict_sys->mutex);
+ my_error(ER_FK_CANNOT_OPEN_PARENT,
+ MYF(0), tbl_namep);
- goto err_exit;
- }
+ goto err_exit;
+ }
- i = 0;
+ if (fk_key->ref_columns.elements > 0) {
+ ulint i = 0;
+ Key_part_spec* column;
+ List_iterator<Key_part_spec> key_part_iterator(
+ fk_key->ref_columns);
- if (fk_key->ref_columns.elements > 0) {
- Key_part_spec* column;
- List_iterator<Key_part_spec> key_part_iterator(
- fk_key->ref_columns);
+ while ((column = key_part_iterator++)) {
+ referenced_column_names[i] =
+ column->field_name.str;
+ ut_ad(i < MAX_NUM_FK_COLUMNS);
+ i++;
+ }
- while ((column = key_part_iterator++)) {
- referenced_column_names[i] =
- column->field_name.str;
- ut_ad(i < MAX_NUM_FK_COLUMNS);
- i++;
- }
+ if (referenced_table) {
+ referenced_index =
+ dict_foreign_find_index(
+ referenced_table, 0,
+ referenced_column_names,
+ i, index,
+ TRUE, FALSE);
- if (referenced_table) {
- referenced_index =
- dict_foreign_find_index(
- referenced_table,
- referenced_column_names,
- i, NULL,
- TRUE, FALSE);
-
- DBUG_EXECUTE_IF(
- "innodb_test_no_reference_idx",
- referenced_index = NULL;);
-
- /* Check whether there exist such
- index in the the index create clause */
- if (!referenced_index) {
- mutex_exit(&dict_sys->mutex);
- my_error(
- ER_FK_NO_INDEX_PARENT,
- MYF(0),
- fk_key->name.str,
- tbl_namep);
- goto err_exit;
- }
- } else {
- ut_a(!trx->check_foreigns);
- }
-
- referenced_num_col = i;
- }
+ DBUG_EXECUTE_IF(
+ "innodb_test_no_reference_idx",
+ referenced_index = NULL;);
- if (!innobase_init_foreign(
- add_fk[num_fk], fk_key->name.str,
- table, index, column_names,
- num_col, referenced_table_name,
- referenced_table, referenced_index,
- referenced_column_names, referenced_num_col)) {
+ /* Check whether there exist such
+ index in the the index create clause */
+ if (!referenced_index) {
mutex_exit(&dict_sys->mutex);
- my_error(
- ER_FK_DUP_NAME,
- MYF(0),
- add_fk[num_fk]->id);
+ my_error(ER_FK_NO_INDEX_PARENT, MYF(0),
+ fk_key->name.str
+ ? fk_key->name.str : "",
+ tbl_namep);
goto err_exit;
+ }
+ } else {
+ ut_a(!trx->check_foreigns);
}
+ referenced_num_col = i;
+ }
+
+ if (!innobase_init_foreign(
+ add_fk[num_fk], fk_key->name.str,
+ table, index, column_names,
+ num_col, referenced_table_name,
+ referenced_table, referenced_index,
+ referenced_column_names, referenced_num_col)) {
mutex_exit(&dict_sys->mutex);
+ my_error(
+ ER_FK_DUP_NAME,
+ MYF(0),
+ add_fk[num_fk]->id);
+ goto err_exit;
+ }
- correct_option = innobase_set_foreign_key_option(
- add_fk[num_fk], fk_key);
+ mutex_exit(&dict_sys->mutex);
- DBUG_EXECUTE_IF("innodb_test_wrong_fk_option",
- correct_option = false;);
+ correct_option = innobase_set_foreign_key_option(
+ add_fk[num_fk], fk_key);
- if (!correct_option) {
- my_error(ER_FK_INCORRECT_OPTION,
- MYF(0),
- table_share->table_name.str,
- add_fk[num_fk]->id);
- goto err_exit;
- }
+ DBUG_EXECUTE_IF("innodb_test_wrong_fk_option",
+ correct_option = false;);
- num_fk++;
- i = 0;
+ if (!correct_option) {
+ my_error(ER_FK_INCORRECT_OPTION,
+ MYF(0),
+ table_share->table_name.str,
+ add_fk[num_fk]->id);
+ goto err_exit;
}
+ num_fk++;
}
*n_add_fk = num_fk;
return(true);
err_exit:
- for (i = 0; i <= num_fk; i++) {
+ for (ulint i = 0; i <= num_fk; i++) {
if (add_fk[i]) {
dict_foreign_free(add_fk[i]);
}
@@ -1651,7 +1637,7 @@ innobase_fts_check_doc_id_index(
FTS_DOC_ID_INDEX_NAME is being created. */
for (uint i = 0; i < altered_table->s->keys; i++) {
- const KEY& key = altered_table->s->key_info[i];
+ const KEY& key = altered_table->key_info[i];
if (innobase_strcasecmp(
key.name, FTS_DOC_ID_INDEX_NAME)) {
@@ -2004,16 +1990,18 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
{
/** Dummy query graph */
que_thr_t* thr;
+ /** reference to the prebuilt struct of the creating instance */
+ row_prebuilt_t*&prebuilt;
/** InnoDB indexes being created */
- dict_index_t** add;
+ dict_index_t** add_index;
/** MySQL key numbers for the InnoDB indexes that are being created */
const ulint* add_key_numbers;
/** number of InnoDB indexes being created */
- const ulint num_to_add;
+ ulint num_to_add_index;
/** InnoDB indexes being dropped */
- dict_index_t** drop;
+ dict_index_t** drop_index;
/** number of InnoDB indexes being dropped */
- const ulint num_to_drop;
+ const ulint num_to_drop_index;
/** InnoDB foreign key constraints being dropped */
dict_foreign_t** drop_fk;
/** number of InnoDB foreign key constraints being dropped */
@@ -2028,21 +2016,26 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
mem_heap_t* heap;
/** dictionary transaction */
trx_t* trx;
+ /** original table (if rebuilt, differs from indexed_table) */
+ dict_table_t* old_table;
/** table where the indexes are being created or dropped */
- dict_table_t* indexed_table;
+ dict_table_t* new_table;
/** mapping of old column numbers to new ones, or NULL */
const ulint* col_map;
+ /** new column names, or NULL if nothing was renamed */
+ const char** col_names;
/** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
const ulint add_autoinc;
/** default values of ADD COLUMN, or NULL */
const dtuple_t* add_cols;
/** autoinc sequence to use */
ib_sequence_t sequence;
+ /** maximum auto-increment value */
+ ulonglong max_autoinc;
+ /** temporary table name to use for old table when renaming tables */
+ const char* tmp_name;
- ha_innobase_inplace_ctx(trx_t* user_trx,
- dict_index_t** add_arg,
- const ulint* add_key_numbers_arg,
- ulint num_to_add_arg,
+ ha_innobase_inplace_ctx(row_prebuilt_t*& prebuilt_arg,
dict_index_t** drop_arg,
ulint num_to_drop_arg,
dict_foreign_t** drop_fk_arg,
@@ -2051,36 +2044,38 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
ulint num_to_add_fk_arg,
bool online_arg,
mem_heap_t* heap_arg,
- trx_t* trx_arg,
- dict_table_t* indexed_table_arg,
- const ulint* col_map_arg,
+ dict_table_t* new_table_arg,
+ const char** col_names_arg,
ulint add_autoinc_arg,
ulonglong autoinc_col_min_value_arg,
- ulonglong autoinc_col_max_value_arg,
- const dtuple_t* add_cols_arg) :
+ ulonglong autoinc_col_max_value_arg) :
inplace_alter_handler_ctx(),
- add (add_arg), add_key_numbers (add_key_numbers_arg),
- num_to_add (num_to_add_arg),
- drop (drop_arg), num_to_drop (num_to_drop_arg),
+ prebuilt (prebuilt_arg),
+ add_index (0), add_key_numbers (0), num_to_add_index (0),
+ drop_index (drop_arg), num_to_drop_index (num_to_drop_arg),
drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg),
add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
- online (online_arg), heap (heap_arg), trx (trx_arg),
- indexed_table (indexed_table_arg),
- col_map (col_map_arg), add_autoinc (add_autoinc_arg),
- add_cols (add_cols_arg),
- sequence(user_trx ? user_trx->mysql_thd : 0,
- autoinc_col_min_value_arg, autoinc_col_max_value_arg)
+ online (online_arg), heap (heap_arg), trx (0),
+ old_table (prebuilt_arg->table),
+ new_table (new_table_arg),
+ col_map (0), col_names (col_names_arg),
+ add_autoinc (add_autoinc_arg),
+ add_cols (0),
+ sequence(prebuilt->trx->mysql_thd,
+ autoinc_col_min_value_arg, autoinc_col_max_value_arg),
+ max_autoinc (0),
+ tmp_name (0)
{
#ifdef UNIV_DEBUG
- for (ulint i = 0; i < num_to_add; i++) {
- ut_ad(!add[i]->to_be_dropped);
+ for (ulint i = 0; i < num_to_add_index; i++) {
+ ut_ad(!add_index[i]->to_be_dropped);
}
- for (ulint i = 0; i < num_to_drop; i++) {
- ut_ad(drop[i]->to_be_dropped);
+ for (ulint i = 0; i < num_to_drop_index; i++) {
+ ut_ad(drop_index[i]->to_be_dropped);
}
#endif /* UNIV_DEBUG */
- thr = pars_complete_graph_for_exec(NULL, user_trx, heap);
+ thr = pars_complete_graph_for_exec(NULL, prebuilt->trx, heap);
}
~ha_innobase_inplace_ctx()
@@ -2088,6 +2083,10 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
mem_heap_free(heap);
}
+ /** Determine if the table will be rebuilt.
+ @return whether the table will be rebuilt */
+ bool need_rebuild () const { return(old_table != new_table); }
+
private:
// Disable copying
ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
@@ -2523,27 +2522,59 @@ innobase_drop_fts_index_table(
return(ret_err);
}
+/** Get the new column names if any columns were renamed
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param user_table InnoDB table as it is before the ALTER operation
+@param heap Memory heap for the allocation
+@return array of new column names in rebuilt_table, or NULL if not renamed */
+static __attribute__((nonnull, warn_unused_result))
+const char**
+innobase_get_col_names(
+/*===================*/
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* altered_table,
+ const dict_table_t* user_table,
+ mem_heap_t* heap)
+{
+ const char** cols;
+ uint i;
+
+ DBUG_ENTER("innobase_get_col_names");
+ DBUG_ASSERT(user_table->n_def > altered_table->s->fields);
+ DBUG_ASSERT(ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME);
+
+ cols = static_cast<const char**>(
+ mem_heap_alloc(heap, user_table->n_def * sizeof *cols));
+
+ for (i = 0; i < altered_table->s->fields; i++) {
+ const Field* field = altered_table->field[i];
+ cols[i] = field->field_name;
+ }
+
+ /* Copy the internal column names. */
+ cols[i] = dict_table_get_col_name(user_table, i);
+
+ while (++i < user_table->n_def) {
+ cols[i] = cols[i - 1] + strlen(cols[i - 1]) + 1;
+ }
+
+ DBUG_RETURN(cols);
+}
+
/** Update internal structures with concurrent writes blocked,
while preparing ALTER TABLE.
@param ha_alter_info Data used during in-place alter
@param altered_table MySQL table that is being altered
@param old_table MySQL table as it is before the ALTER operation
-@param user_table InnoDB table that is being altered
-@param user_trx User transaction, for locking the table
@param table_name Table name in MySQL
@param flags Table and tablespace flags
@param flags2 Additional table flags
-@param heap Memory heap, or NULL
-@param drop_index Indexes to be dropped, or NULL
-@param n_drop_index Number of indexes to drop
-@param drop_foreign Foreign key constraints to be dropped, or NULL
-@param n_drop_foreign Number of foreign key constraints to drop
@param fts_doc_id_col The column number of FTS_DOC_ID
-@param add_autoinc_col The number of an added AUTO_INCREMENT column,
- or ULINT_UNDEFINED if none was added
@param add_fts_doc_id Flag: add column FTS_DOC_ID?
-@param add_fts_doc_id_idx Flag: add index (FTS_DOC_ID)?
+@param add_fts_doc_id_idx Flag: add index FTS_DOC_ID_INDEX (FTS_DOC_ID)?
@retval true Failure
@retval false Success
@@ -2555,92 +2586,83 @@ prepare_inplace_alter_table_dict(
Alter_inplace_info* ha_alter_info,
const TABLE* altered_table,
const TABLE* old_table,
- dict_table_t* user_table,
- trx_t* user_trx,
const char* table_name,
ulint flags,
ulint flags2,
- mem_heap_t* heap,
- dict_index_t** drop_index,
- ulint n_drop_index,
- dict_foreign_t** drop_foreign,
- ulint n_drop_foreign,
- dict_foreign_t** add_foreign,
- ulint n_add_foreign,
ulint fts_doc_id_col,
- ulint add_autoinc_col,
- ulonglong autoinc_col_max_value,
bool add_fts_doc_id,
bool add_fts_doc_id_idx)
{
- trx_t* trx;
bool dict_locked = false;
- dict_index_t** add_index; /* indexes to be created */
ulint* add_key_nums; /* MySQL key numbers */
- ulint n_add_index;
index_def_t* index_defs; /* index definitions */
+ dict_table_t* user_table;
dict_index_t* fts_index = NULL;
- dict_table_t* indexed_table = user_table;
ulint new_clustered = 0;
dberr_t error;
- THD* user_thd = user_trx->mysql_thd;
- const ulint* col_map = NULL;
- dtuple_t* add_cols = NULL;
ulint num_fts_index;
uint sql_idx;
+ ha_innobase_inplace_ctx*ctx;
DBUG_ENTER("prepare_inplace_alter_table_dict");
- DBUG_ASSERT((add_autoinc_col != ULINT_UNDEFINED)
- == (autoinc_col_max_value > 0));
- DBUG_ASSERT(!n_drop_index == !drop_index);
- DBUG_ASSERT(!n_drop_foreign == !drop_foreign);
+
+ ctx = static_cast<ha_innobase_inplace_ctx*>
+ (ha_alter_info->handler_ctx);
+
+ DBUG_ASSERT((ctx->add_autoinc != ULINT_UNDEFINED)
+ == (ctx->sequence.m_max_value > 0));
+ DBUG_ASSERT(!ctx->num_to_drop_index == !ctx->drop_index);
+ DBUG_ASSERT(!ctx->num_to_drop_fk == !ctx->drop_fk);
DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_id_idx);
DBUG_ASSERT(!add_fts_doc_id_idx
- || innobase_fulltext_exist(altered_table->s));
+ || innobase_fulltext_exist(altered_table));
+ DBUG_ASSERT(!ctx->add_cols);
+ DBUG_ASSERT(!ctx->add_index);
+ DBUG_ASSERT(!ctx->add_key_numbers);
+ DBUG_ASSERT(!ctx->num_to_add_index);
+
+ user_table = ctx->new_table;
- trx_start_if_not_started_xa(user_trx);
+ trx_start_if_not_started_xa(ctx->prebuilt->trx);
/* Create a background transaction for the operations on
the data dictionary tables. */
- trx = innobase_trx_allocate(user_thd);
+ ctx->trx = innobase_trx_allocate(ctx->prebuilt->trx->mysql_thd);
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
-
- if (!heap) {
- heap = mem_heap_create(1024);
- }
+ trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
/* Create table containing all indexes to be built in this
ALTER TABLE ADD INDEX so that they are in the correct order
in the table. */
- n_add_index = ha_alter_info->index_add_count;
+ ctx->num_to_add_index = ha_alter_info->index_add_count;
index_defs = innobase_create_key_defs(
- heap, ha_alter_info, altered_table, n_add_index,
- num_fts_index, row_table_got_default_clust_index(indexed_table),
+ ctx->heap, ha_alter_info, altered_table, ctx->num_to_add_index,
+ num_fts_index,
+ row_table_got_default_clust_index(ctx->new_table),
fts_doc_id_col, add_fts_doc_id, add_fts_doc_id_idx);
new_clustered = DICT_CLUSTERED & index_defs[0].ind_type;
- const bool locked =
- !ha_alter_info->online
- || add_autoinc_col != ULINT_UNDEFINED
- || num_fts_index > 0
- || (innobase_need_rebuild(ha_alter_info)
- && innobase_fulltext_exist(altered_table->s));
-
if (num_fts_index > 1) {
my_error(ER_INNODB_FT_LIMIT, MYF(0));
goto error_handled;
}
- if (locked && ha_alter_info->online) {
+ if (!ctx->online) {
+ /* This is not an online operation (LOCK=NONE). */
+ } else if (ctx->add_autoinc == ULINT_UNDEFINED
+ && num_fts_index == 0
+ && (!innobase_need_rebuild(ha_alter_info)
+ || !innobase_fulltext_exist(altered_table))) {
+ /* InnoDB can perform an online operation (LOCK=NONE). */
+ } else {
/* This should have been blocked in
check_if_supported_inplace_alter(). */
ut_ad(0);
my_error(ER_NOT_SUPPORTED_YET, MYF(0),
- thd_query_string(user_thd)->str);
+ thd_query_string(ctx->prebuilt->trx->mysql_thd)->str);
goto error_handled;
}
@@ -2654,34 +2676,36 @@ prepare_inplace_alter_table_dict(
/* Allocate memory for dictionary index definitions */
- add_index = (dict_index_t**) mem_heap_alloc(
- heap, n_add_index * sizeof *add_index);
- add_key_nums = (ulint*) mem_heap_alloc(
- heap, n_add_index * sizeof *add_key_nums);
+ ctx->add_index = static_cast<dict_index_t**>(
+ mem_heap_alloc(ctx->heap, ctx->num_to_add_index
+ * sizeof *ctx->add_index));
+ ctx->add_key_numbers = add_key_nums = static_cast<ulint*>(
+ mem_heap_alloc(ctx->heap, ctx->num_to_add_index
+ * sizeof *ctx->add_key_numbers));
/* This transaction should be dictionary operation, so that
the data dictionary will be locked during crash recovery. */
- ut_ad(trx->dict_operation == TRX_DICT_OP_INDEX);
+ ut_ad(ctx->trx->dict_operation == TRX_DICT_OP_INDEX);
/* Acquire a lock on the table before creating any indexes. */
- if (locked) {
+ if (ctx->online) {
+ error = DB_SUCCESS;
+ } else {
error = row_merge_lock_table(
- user_trx, indexed_table, LOCK_S);
+ ctx->prebuilt->trx, ctx->new_table, LOCK_S);
if (error != DB_SUCCESS) {
goto error_handling;
}
- } else {
- error = DB_SUCCESS;
}
/* Latch the InnoDB data dictionary exclusively so that no deadlocks
or lock waits can happen in it during an index create operation. */
- row_mysql_lock_data_dictionary(trx);
+ row_mysql_lock_data_dictionary(ctx->trx);
dict_locked = true;
/* Wait for background stats processing to stop using the table that
@@ -2690,24 +2714,28 @@ prepare_inplace_alter_table_dict(
at least until checking ut_ad(user_table->n_ref_count == 1) below.
XXX what may happen if bg stats opens the table after we
have unlocked data dictionary below? */
- dict_stats_wait_bg_to_stop_using_tables(user_table, NULL, trx);
+ dict_stats_wait_bg_to_stop_using_table(user_table, ctx->trx);
- online_retry_drop_indexes_low(indexed_table, trx);
+ online_retry_drop_indexes_low(ctx->new_table, ctx->trx);
ut_d(dict_table_check_for_dup_indexes(
- indexed_table, CHECK_ABORTED_OK));
+ ctx->new_table, CHECK_ABORTED_OK));
/* If a new clustered index is defined for the table we need
- to drop the original table and rebuild all indexes. */
+ to rebuild the table with a temporary name. */
if (new_clustered) {
- char* new_table_name = dict_mem_create_temporary_tablename(
- heap, indexed_table->name, indexed_table->id);
- ulint n_cols;
+ const char* new_table_name
+ = dict_mem_create_temporary_tablename(
+ ctx->heap,
+ ctx->new_table->name,
+ ctx->new_table->id);
+ ulint n_cols;
+ dtuple_t* add_cols;
if (innobase_check_foreigns(
ha_alter_info, altered_table, old_table,
- user_table, drop_foreign, n_drop_foreign)) {
+ user_table, ctx->drop_fk, ctx->num_to_drop_fk)) {
goto new_clustered_failed;
}
@@ -2725,7 +2753,7 @@ prepare_inplace_alter_table_dict(
DBUG_ASSERT(!add_fts_doc_id_idx || (flags2 & DICT_TF2_FTS));
/* Create the table. */
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ trx_set_dict_operation(ctx->trx, TRX_DICT_OP_TABLE);
if (dict_table_get_low(new_table_name)) {
my_error(ER_TABLE_EXISTS_ERROR, MYF(0),
@@ -2734,12 +2762,15 @@ prepare_inplace_alter_table_dict(
}
/* The initial space id 0 may be overridden later. */
- indexed_table = dict_mem_table_create(
+ ctx->new_table = dict_mem_table_create(
new_table_name, 0, n_cols, flags, flags2);
+ /* The rebuilt indexed_table will use the renamed
+ column names. */
+ ctx->col_names = NULL;
if (DICT_TF_HAS_DATA_DIR(flags)) {
- indexed_table->data_dir_path =
- mem_heap_strdup(indexed_table->heap,
+ ctx->new_table->data_dir_path =
+ mem_heap_strdup(ctx->new_table->heap,
user_table->data_dir_path);
}
@@ -2778,7 +2809,8 @@ prepare_inplace_alter_table_dict(
charset_no = (ulint) field->charset()->number;
if (charset_no > MAX_CHAR_COLL_NUM) {
- dict_mem_table_free(indexed_table);
+ dict_mem_table_free(
+ ctx->new_table);
my_error(ER_WRONG_KEY_COLUMN, MYF(0),
field->field_name);
goto new_clustered_failed;
@@ -2808,14 +2840,14 @@ prepare_inplace_alter_table_dict(
}
if (dict_col_name_is_reserved(field->field_name)) {
- dict_mem_table_free(indexed_table);
+ dict_mem_table_free(ctx->new_table);
my_error(ER_WRONG_COLUMN_NAME, MYF(0),
field->field_name);
goto new_clustered_failed;
}
dict_mem_table_add_col(
- indexed_table, heap,
+ ctx->new_table, ctx->heap,
field->field_name,
col_type,
dtype_form_prtype(field_type, charset_no),
@@ -2823,14 +2855,15 @@ prepare_inplace_alter_table_dict(
}
if (add_fts_doc_id) {
- fts_add_doc_id_column(indexed_table, heap);
- indexed_table->fts->doc_col = fts_doc_id_col;
+ fts_add_doc_id_column(ctx->new_table, ctx->heap);
+ ctx->new_table->fts->doc_col = fts_doc_id_col;
ut_ad(fts_doc_id_col == altered_table->s->stored_fields);
- } else if (indexed_table->fts) {
- indexed_table->fts->doc_col = fts_doc_id_col;
+ } else if (ctx->new_table->fts) {
+ ctx->new_table->fts->doc_col = fts_doc_id_col;
}
- error = row_create_table_for_mysql(indexed_table, trx, false);
+ error = row_create_table_for_mysql(
+ ctx->new_table, ctx->trx, false);
switch (error) {
dict_table_t* temp_table;
@@ -2842,13 +2875,13 @@ prepare_inplace_alter_table_dict(
the dict_sys->mutex. */
ut_ad(mutex_own(&dict_sys->mutex));
temp_table = dict_table_open_on_name(
- indexed_table->name, TRUE, FALSE,
+ ctx->new_table->name, TRUE, FALSE,
DICT_ERR_IGNORE_NONE);
- ut_a(indexed_table == temp_table);
+ ut_a(ctx->new_table == temp_table);
/* n_ref_count must be 1, because purge cannot
be executing on this very table as we are
holding dict_operation_lock X-latch. */
- DBUG_ASSERT(indexed_table->n_ref_count == 1);
+ DBUG_ASSERT(ctx->new_table->n_ref_count == 1);
break;
case DB_TABLESPACE_EXISTS:
my_error(ER_TABLESPACE_EXISTS, MYF(0),
@@ -2861,64 +2894,69 @@ prepare_inplace_alter_table_dict(
default:
my_error_innodb(error, table_name, flags);
new_clustered_failed:
- DBUG_ASSERT(trx != user_trx);
- trx_rollback_to_savepoint(trx, NULL);
+ DBUG_ASSERT(ctx->trx != ctx->prebuilt->trx);
+ trx_rollback_to_savepoint(ctx->trx, NULL);
ut_ad(user_table->n_ref_count == 1);
- online_retry_drop_indexes_with_trx(user_table, trx);
-
+ online_retry_drop_indexes_with_trx(
+ user_table, ctx->trx);
goto err_exit;
}
if (ha_alter_info->handler_flags
& Alter_inplace_info::ADD_COLUMN) {
-
add_cols = dtuple_create(
- heap, dict_table_get_n_cols(indexed_table));
+ ctx->heap,
+ dict_table_get_n_cols(ctx->new_table));
- dict_table_copy_types(add_cols, indexed_table);
+ dict_table_copy_types(add_cols, ctx->new_table);
+ } else {
+ add_cols = NULL;
}
- col_map = innobase_build_col_map(
+ ctx->col_map = innobase_build_col_map(
ha_alter_info, altered_table, old_table,
- indexed_table, user_table,
- add_cols, heap);
+ ctx->new_table, user_table,
+ add_cols, ctx->heap);
+ ctx->add_cols = add_cols;
} else {
DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info));
- if (!indexed_table->fts
- && innobase_fulltext_exist(altered_table->s)) {
- indexed_table->fts = fts_create(indexed_table);
- indexed_table->fts->doc_col = fts_doc_id_col;
+ if (!ctx->new_table->fts
+ && innobase_fulltext_exist(altered_table)) {
+ ctx->new_table->fts = fts_create(
+ ctx->new_table);
+ ctx->new_table->fts->doc_col = fts_doc_id_col;
}
}
/* Assign table_id, so that no table id of
fts_create_index_tables() will be written to the undo logs. */
- DBUG_ASSERT(indexed_table->id != 0);
- trx->table_id = indexed_table->id;
+ DBUG_ASSERT(ctx->new_table->id != 0);
+ ctx->trx->table_id = ctx->new_table->id;
/* Create the indexes in SYS_INDEXES and load into dictionary. */
- for (ulint num_created = 0; num_created < n_add_index; num_created++) {
+ for (ulint a = 0; a < ctx->num_to_add_index; a++) {
- add_index[num_created] = row_merge_create_index(
- trx, indexed_table, &index_defs[num_created]);
+ ctx->add_index[a] = row_merge_create_index(
+ ctx->trx, ctx->new_table,
+ &index_defs[a]);
- add_key_nums[num_created] = index_defs[num_created].key_number;
+ add_key_nums[a] = index_defs[a].key_number;
- if (!add_index[num_created]) {
- error = trx->error_state;
+ if (!ctx->add_index[a]) {
+ error = ctx->trx->error_state;
DBUG_ASSERT(error != DB_SUCCESS);
goto error_handling;
}
- if (add_index[num_created]->type & DICT_FTS) {
+ if (ctx->add_index[a]->type & DICT_FTS) {
DBUG_ASSERT(num_fts_index);
DBUG_ASSERT(!fts_index);
- DBUG_ASSERT(add_index[num_created]->type == DICT_FTS);
- fts_index = add_index[num_created];
+ DBUG_ASSERT(ctx->add_index[a]->type == DICT_FTS);
+ fts_index = ctx->add_index[a];
}
/* If only online ALTER TABLE operations have been
@@ -2928,22 +2966,22 @@ prepare_inplace_alter_table_dict(
(new_clustered), we will allocate the log for the
clustered index of the old table, later. */
if (new_clustered
- || locked
+ || !ctx->online
|| user_table->ibd_file_missing
|| dict_table_is_discarded(user_table)) {
/* No need to allocate a modification log. */
- ut_ad(!add_index[num_created]->online_log);
- } else if (add_index[num_created]->type & DICT_FTS) {
+ ut_ad(!ctx->add_index[a]->online_log);
+ } else if (ctx->add_index[a]->type & DICT_FTS) {
/* Fulltext indexes are not covered
by a modification log. */
} else {
DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
error = DB_OUT_OF_MEMORY;
goto error_handling;);
- rw_lock_x_lock(&add_index[num_created]->lock);
- bool ok = row_log_allocate(add_index[num_created],
+ rw_lock_x_lock(&ctx->add_index[a]->lock);
+ bool ok = row_log_allocate(ctx->add_index[a],
NULL, true, NULL, NULL);
- rw_lock_x_unlock(&add_index[num_created]->lock);
+ rw_lock_x_unlock(&ctx->add_index[a]->lock);
if (!ok) {
error = DB_OUT_OF_MEMORY;
@@ -2952,39 +2990,41 @@ prepare_inplace_alter_table_dict(
}
}
- ut_ad(new_clustered == (indexed_table != user_table));
+ ut_ad(new_clustered == ctx->need_rebuild());
DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
error = DB_OUT_OF_MEMORY;
goto error_handling;);
- if (new_clustered && !locked) {
+ if (new_clustered && ctx->online) {
/* Allocate a log for online table rebuild. */
dict_index_t* clust_index = dict_table_get_first_index(
user_table);
rw_lock_x_lock(&clust_index->lock);
bool ok = row_log_allocate(
- clust_index, indexed_table,
+ clust_index, ctx->new_table,
!(ha_alter_info->handler_flags
& Alter_inplace_info::ADD_PK_INDEX),
- add_cols, col_map);
+ ctx->add_cols, ctx->col_map);
rw_lock_x_unlock(&clust_index->lock);
if (!ok) {
error = DB_OUT_OF_MEMORY;
goto error_handling;
}
+ }
+ if (ctx->online) {
/* Assign a consistent read view for
row_merge_read_clustered_index(). */
- trx_assign_read_view(user_trx);
+ trx_assign_read_view(ctx->prebuilt->trx);
}
if (fts_index) {
/* Ensure that the dictionary operation mode will
not change while creating the auxiliary tables. */
- trx_dict_op_t op = trx_get_dict_operation(trx);
+ trx_dict_op_t op = trx_get_dict_operation(ctx->trx);
#ifdef UNIV_DEBUG
switch (op) {
@@ -2997,18 +3037,18 @@ prepare_inplace_alter_table_dict(
ut_error;
op_ok:
#endif /* UNIV_DEBUG */
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(ctx->trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(mutex_own(&dict_sys->mutex));
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- DICT_TF2_FLAG_SET(indexed_table, DICT_TF2_FTS);
+ DICT_TF2_FLAG_SET(ctx->new_table, DICT_TF2_FTS);
/* This function will commit the transaction and reset
the trx_t::dict_operation flag on success. */
- error = fts_create_index_tables(trx, fts_index);
+ error = fts_create_index_tables(ctx->trx, fts_index);
DBUG_EXECUTE_IF("innodb_test_fail_after_fts_index_table",
error = DB_LOCK_WAIT_TIMEOUT;
@@ -3018,34 +3058,38 @@ op_ok:
goto error_handling;
}
- trx_start_for_ddl(trx, op);
+ trx_start_for_ddl(ctx->trx, op);
- if (!indexed_table->fts
- || ib_vector_size(indexed_table->fts->indexes) == 0) {
+ if (!ctx->new_table->fts
+ || ib_vector_size(ctx->new_table->fts->indexes) == 0) {
error = fts_create_common_tables(
- trx, indexed_table, user_table->name, TRUE);
+ ctx->trx, ctx->new_table,
+ user_table->name, TRUE);
- DBUG_EXECUTE_IF("innodb_test_fail_after_fts_common_table",
- error = DB_LOCK_WAIT_TIMEOUT;
- goto error_handling;);
+ DBUG_EXECUTE_IF(
+ "innodb_test_fail_after_fts_common_table",
+ error = DB_LOCK_WAIT_TIMEOUT;);
if (error != DB_SUCCESS) {
goto error_handling;
}
- indexed_table->fts->fts_status |= TABLE_DICT_LOCKED;
+ ctx->new_table->fts->fts_status
+ |= TABLE_DICT_LOCKED;
error = innobase_fts_load_stopword(
- indexed_table, trx, user_thd)
+ ctx->new_table, ctx->trx,
+ ctx->prebuilt->trx->mysql_thd)
? DB_SUCCESS : DB_ERROR;
- indexed_table->fts->fts_status &= ~TABLE_DICT_LOCKED;
+ ctx->new_table->fts->fts_status
+ &= ~TABLE_DICT_LOCKED;
if (error != DB_SUCCESS) {
goto error_handling;
}
}
- ut_ad(trx_get_dict_operation(trx) == op);
+ ut_ad(trx_get_dict_operation(ctx->trx) == op);
}
DBUG_ASSERT(error == DB_SUCCESS);
@@ -3053,15 +3097,15 @@ op_ok:
/* Commit the data dictionary transaction in order to release
the table locks on the system tables. This means that if
MySQL crashes while creating a new primary key inside
- row_merge_build_indexes(), indexed_table will not be dropped
+ row_merge_build_indexes(), ctx->new_table will not be dropped
by trx_rollback_active(). It will have to be recovered or
dropped by the database administrator. */
- trx_commit_for_mysql(trx);
+ trx_commit_for_mysql(ctx->trx);
- row_mysql_unlock_data_dictionary(trx);
+ row_mysql_unlock_data_dictionary(ctx->trx);
dict_locked = false;
- ut_a(trx->lock.n_active_thrs == 0);
+ ut_a(ctx->trx->lock.n_active_thrs == 0);
error_handling:
/* After an error, remove all those index definitions from the
@@ -3075,16 +3119,6 @@ error_handling:
ut_d(dict_table_check_for_dup_indexes(
user_table, CHECK_PARTIAL_OK));
ut_d(mutex_exit(&dict_sys->mutex));
- ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx(
- user_trx, add_index, add_key_nums, n_add_index,
- drop_index, n_drop_index,
- drop_foreign, n_drop_foreign,
- add_foreign, n_add_foreign,
- !locked, heap, trx, indexed_table, col_map,
- add_autoinc_col,
- ha_alter_info->create_info->auto_increment_value,
- autoinc_col_max_value,
- add_cols);
DBUG_RETURN(false);
case DB_TABLESPACE_EXISTS:
my_error(ER_TABLESPACE_EXISTS, MYF(0), "(unknown)");
@@ -3098,32 +3132,33 @@ error_handling:
error_handled:
- user_trx->error_info = NULL;
- trx->error_state = DB_SUCCESS;
+ ctx->prebuilt->trx->error_info = NULL;
+ ctx->trx->error_state = DB_SUCCESS;
if (!dict_locked) {
- row_mysql_lock_data_dictionary(trx);
+ row_mysql_lock_data_dictionary(ctx->trx);
}
if (new_clustered) {
- if (indexed_table != user_table) {
+ if (ctx->need_rebuild()) {
- if (DICT_TF2_FLAG_IS_SET(indexed_table, DICT_TF2_FTS)) {
+ if (DICT_TF2_FLAG_IS_SET(
+ ctx->new_table, DICT_TF2_FTS)) {
innobase_drop_fts_index_table(
- indexed_table, trx);
+ ctx->new_table, ctx->trx);
}
- dict_table_close(indexed_table, TRUE, FALSE);
+ dict_table_close(ctx->new_table, TRUE, FALSE);
-#ifdef UNIV_DDL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
/* Nobody should have initialized the stats of the
newly created table yet. When this is the case, we
know that it has not been added for background stats
gathering. */
- ut_a(!indexed_table->stat_initialized);
-#endif /* UNIV_DDL_DEBUG */
+ ut_a(!ctx->new_table->stat_initialized);
+#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
- row_merge_drop_table(trx, indexed_table);
+ row_merge_drop_table(ctx->trx, ctx->new_table);
/* Free the log for online table rebuild, if
one was allocated. */
@@ -3134,7 +3169,7 @@ error_handled:
rw_lock_x_lock(&clust_index->lock);
if (clust_index->online_log) {
- ut_ad(!locked);
+ ut_ad(ctx->online);
row_log_abort_sec(clust_index);
clust_index->online_status
= ONLINE_INDEX_COMPLETE;
@@ -3143,17 +3178,17 @@ error_handled:
rw_lock_x_unlock(&clust_index->lock);
}
- trx_commit_for_mysql(trx);
+ trx_commit_for_mysql(ctx->trx);
/* n_ref_count must be 1, because purge cannot
be executing on this very table as we are
holding dict_operation_lock X-latch. */
- DBUG_ASSERT(user_table->n_ref_count == 1 || !locked);
+ DBUG_ASSERT(user_table->n_ref_count == 1 || ctx->online);
- online_retry_drop_indexes_with_trx(user_table, trx);
+ online_retry_drop_indexes_with_trx(user_table, ctx->trx);
} else {
- ut_ad(indexed_table == user_table);
- row_merge_drop_indexes(trx, user_table, TRUE);
- trx_commit_for_mysql(trx);
+ ut_ad(!ctx->need_rebuild());
+ row_merge_drop_indexes(ctx->trx, user_table, TRUE);
+ trx_commit_for_mysql(ctx->trx);
}
ut_d(dict_table_check_for_dup_indexes(user_table, CHECK_ALL_COMPLETE));
@@ -3161,18 +3196,19 @@ error_handled:
err_exit:
/* Clear the to_be_dropped flag in the data dictionary cache. */
- for (ulint i = 0; i < n_drop_index; i++) {
- DBUG_ASSERT(*drop_index[i]->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(drop_index[i]->to_be_dropped);
- drop_index[i]->to_be_dropped = 0;
+ for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
+ DBUG_ASSERT(*ctx->drop_index[i]->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(ctx->drop_index[i]->to_be_dropped);
+ ctx->drop_index[i]->to_be_dropped = 0;
}
- row_mysql_unlock_data_dictionary(trx);
+ row_mysql_unlock_data_dictionary(ctx->trx);
- trx_free_for_mysql(trx);
- mem_heap_free(heap);
+ trx_free_for_mysql(ctx->trx);
+ trx_commit_for_mysql(ctx->prebuilt->trx);
- trx_commit_for_mysql(user_trx);
+ delete ctx;
+ ha_alter_info->handler_ctx = NULL;
/* There might be work for utility threads.*/
srv_active_wake_master_thread();
@@ -3183,7 +3219,7 @@ err_exit:
/* Check whether an index is needed for the foreign key constraint.
If so, if it is dropped, is there an equivalent index can play its role.
@return true if the index is needed and can't be dropped */
-static __attribute__((warn_unused_result))
+static __attribute__((nonnull(1,2,3,5), warn_unused_result))
bool
innobase_check_foreign_key_index(
/*=============================*/
@@ -3193,6 +3229,8 @@ innobase_check_foreign_key_index(
dict_index_t* index, /*!< in: index to check */
dict_table_t* indexed_table, /*!< in: table that owns the
foreign keys */
+ const char** col_names, /*!< in: column names, or NULL
+ for indexed_table->col_names */
trx_t* trx, /*!< in/out: transaction */
dict_foreign_t** drop_fk, /*!< in: Foreign key constraints
to drop */
@@ -3201,8 +3239,6 @@ innobase_check_foreign_key_index(
{
dict_foreign_t* foreign;
- ut_ad(!index->to_be_dropped);
-
/* Check if the index is referenced. */
foreign = dict_table_get_referenced_constraint(indexed_table, index);
@@ -3211,7 +3247,7 @@ innobase_check_foreign_key_index(
if (foreign
&& !dict_foreign_find_index(
- indexed_table,
+ indexed_table, col_names,
foreign->referenced_col_names,
foreign->n_fields, index,
/*check_charsets=*/TRUE,
@@ -3239,7 +3275,7 @@ innobase_check_foreign_key_index(
&& !innobase_dropping_foreign(
foreign, drop_fk, n_drop_fk)
&& !dict_foreign_find_index(
- indexed_table,
+ indexed_table, col_names,
foreign->foreign_col_names,
foreign->n_fields, index,
/*check_charsets=*/TRUE,
@@ -3285,6 +3321,7 @@ ha_innobase::prepare_inplace_alter_table(
ulint n_add_fk; /*!< Number of foreign keys to drop */
dict_table_t* indexed_table; /*!< Table where indexes are created */
mem_heap_t* heap;
+ const char** col_names;
int error;
ulint flags;
ulint flags2;
@@ -3298,10 +3335,7 @@ ha_innobase::prepare_inplace_alter_table(
DBUG_ENTER("prepare_inplace_alter_table");
DBUG_ASSERT(!ha_alter_info->handler_ctx);
DBUG_ASSERT(ha_alter_info->create_info);
-
- if (srv_read_only_mode) {
- DBUG_RETURN(false);
- }
+ DBUG_ASSERT(!srv_read_only_mode);
MONITOR_ATOMIC_INC(MONITOR_PENDING_ALTER_TABLE);
@@ -3324,12 +3358,6 @@ ha_innobase::prepare_inplace_alter_table(
}
if (ha_alter_info->handler_flags
- == Alter_inplace_info::CHANGE_CREATE_OPTION
- && !innobase_need_rebuild(ha_alter_info)) {
- goto func_exit;
- }
-
- if (ha_alter_info->handler_flags
& Alter_inplace_info::CHANGE_CREATE_OPTION) {
if (const char* invalid_opt = create_options_are_invalid(
user_thd, altered_table,
@@ -3392,7 +3420,7 @@ check_if_ok_to_rename:
if (!my_strcasecmp(system_charset_info,
(*fp)->field_name,
FTS_DOC_ID_COL_NAME)
- && innobase_fulltext_exist(altered_table->s)) {
+ && innobase_fulltext_exist(altered_table)) {
my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN,
MYF(0), name);
goto err_exit_no_heap;
@@ -3484,11 +3512,26 @@ check_if_ok_to_rename:
n_drop_fk = 0;
if (ha_alter_info->handler_flags
+ & (INNOBASE_ALTER_NOREBUILD | INNOBASE_ALTER_REBUILD)) {
+ heap = mem_heap_create(1024);
+
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME) {
+ col_names = innobase_get_col_names(
+ ha_alter_info, altered_table, indexed_table,
+ heap);
+ } else {
+ col_names = NULL;
+ }
+ } else {
+ heap = NULL;
+ col_names = NULL;
+ }
+
+ if (ha_alter_info->handler_flags
& Alter_inplace_info::DROP_FOREIGN_KEY) {
DBUG_ASSERT(ha_alter_info->alter_info->drop_list.elements > 0);
- heap = mem_heap_create(1024);
-
drop_fk = static_cast<dict_foreign_t**>(
mem_heap_alloc(
heap,
@@ -3535,7 +3578,6 @@ found_fk:
== ha_alter_info->alter_info->drop_list.elements);
} else {
drop_fk = NULL;
- heap = NULL;
}
if (ha_alter_info->index_drop_count) {
@@ -3546,9 +3588,6 @@ found_fk:
| Alter_inplace_info::DROP_UNIQUE_INDEX
| Alter_inplace_info::DROP_PK_INDEX));
/* Check which indexes to drop. */
- if (!heap) {
- heap = mem_heap_create(1024);
- }
drop_index = static_cast<dict_index_t**>(
mem_heap_alloc(
heap, (ha_alter_info->index_drop_count + 1)
@@ -3582,8 +3621,8 @@ found_fk:
internal FTS_DOC_ID_INDEX as well, unless it exists in
the table. */
- if (innobase_fulltext_exist(table->s)
- && !innobase_fulltext_exist(altered_table->s)
+ if (innobase_fulltext_exist(table)
+ && !innobase_fulltext_exist(altered_table)
&& !DICT_TF2_FLAG_IS_SET(
indexed_table, DICT_TF2_FTS_HAS_DOC_ID)) {
dict_index_t* fts_doc_index
@@ -3601,7 +3640,7 @@ found_fk:
if (!my_strcasecmp(
system_charset_info,
FTS_DOC_ID_INDEX_NAME,
- table->s->key_info[i].name)) {
+ table->key_info[i].name)) {
/* The index exists in the MySQL
data dictionary. Do not drop it,
even though it is no longer needed
@@ -3620,12 +3659,23 @@ check_if_can_drop_indexes:
CREATE TABLE adding FOREIGN KEY constraints. */
row_mysql_lock_data_dictionary(prebuilt->trx);
+ if (!n_drop_index) {
+ drop_index = NULL;
+ } else {
+ /* Flag all indexes that are to be dropped. */
+ for (ulint i = 0; i < n_drop_index; i++) {
+ ut_ad(!drop_index[i]->to_be_dropped);
+ drop_index[i]->to_be_dropped = 1;
+ }
+ }
+
if (prebuilt->trx->check_foreigns) {
for (uint i = 0; i < n_drop_index; i++) {
dict_index_t* index = drop_index[i];
if (innobase_check_foreign_key_index(
- ha_alter_info, index, indexed_table,
+ ha_alter_info, index,
+ indexed_table, col_names,
prebuilt->trx, drop_fk, n_drop_fk)) {
row_mysql_unlock_data_dictionary(
prebuilt->trx);
@@ -3640,7 +3690,8 @@ check_if_can_drop_indexes:
any depending foreign constraints get affected */
if (drop_primary
&& innobase_check_foreign_key_index(
- ha_alter_info, drop_primary, indexed_table,
+ ha_alter_info, drop_primary,
+ indexed_table, col_names,
prebuilt->trx, drop_fk, n_drop_fk)) {
row_mysql_unlock_data_dictionary(prebuilt->trx);
print_error(HA_ERR_DROP_INDEX_FK, MYF(0));
@@ -3648,16 +3699,6 @@ check_if_can_drop_indexes:
}
}
- if (!n_drop_index) {
- drop_index = NULL;
- } else {
- /* Flag all indexes that are to be dropped. */
- for (ulint i = 0; i < n_drop_index; i++) {
- ut_ad(!drop_index[i]->to_be_dropped);
- drop_index[i]->to_be_dropped = 1;
- }
- }
-
row_mysql_unlock_data_dictionary(prebuilt->trx);
} else {
drop_index = NULL;
@@ -3669,10 +3710,6 @@ check_if_can_drop_indexes:
& Alter_inplace_info::ADD_FOREIGN_KEY) {
ut_ad(!prebuilt->trx->check_foreigns);
- if (!heap) {
- heap = mem_heap_create(1024);
- }
-
add_fk = static_cast<dict_foreign_t**>(
mem_heap_zalloc(
heap,
@@ -3680,8 +3717,10 @@ check_if_can_drop_indexes:
* sizeof(dict_foreign_t*)));
if (!innobase_get_foreign_key_info(
- ha_alter_info, table_share, prebuilt->table,
- add_fk, &n_add_fk, heap, prebuilt->trx)) {
+ ha_alter_info, table_share,
+ prebuilt->table, col_names,
+ drop_index, n_drop_index,
+ add_fk, &n_add_fk, prebuilt->trx)) {
err_exit:
if (n_drop_index) {
row_mysql_lock_data_dictionary(prebuilt->trx);
@@ -3700,21 +3739,26 @@ err_exit:
if (heap) {
mem_heap_free(heap);
}
+
goto err_exit_no_heap;
}
}
- if (!(ha_alter_info->handler_flags & INNOBASE_INPLACE_CREATE)) {
+ if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
+ || (ha_alter_info->handler_flags
+ == Alter_inplace_info::CHANGE_CREATE_OPTION
+ && !innobase_need_rebuild(ha_alter_info))) {
+
if (heap) {
ha_alter_info->handler_ctx
= new ha_innobase_inplace_ctx(
- prebuilt->trx, 0, 0, 0,
+ prebuilt,
drop_index, n_drop_index,
drop_fk, n_drop_fk,
add_fk, n_add_fk,
ha_alter_info->online,
- heap, 0, indexed_table, 0,
- ULINT_UNDEFINED, 0, 0, 0);
+ heap, indexed_table,
+ col_names, ULINT_UNDEFINED, 0, 0);
}
func_exit:
@@ -3728,7 +3772,7 @@ func_exit:
/* If we are to build a full-text search index, check whether
the table already has a DOC ID column. If not, we will need to
add a Doc ID hidden column and rebuild the primary index */
- if (innobase_fulltext_exist(altered_table->s)) {
+ if (innobase_fulltext_exist(altered_table)) {
ulint doc_col_no;
if (!innobase_fts_check_doc_id_col(
@@ -3816,16 +3860,25 @@ found_col:
innodb_idx++;
}
+ DBUG_ASSERT(heap);
DBUG_ASSERT(user_thd == prebuilt->trx->mysql_thd);
+ DBUG_ASSERT(!ha_alter_info->handler_ctx);
+
+ ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx(
+ prebuilt,
+ drop_index, n_drop_index,
+ drop_fk, n_drop_fk, add_fk, n_add_fk,
+ ha_alter_info->online,
+ heap, prebuilt->table, col_names,
+ add_autoinc_col_no,
+ ha_alter_info->create_info->auto_increment_value,
+ autoinc_col_max_value);
+
DBUG_RETURN(prepare_inplace_alter_table_dict(
ha_alter_info, altered_table, table,
- prebuilt->table, prebuilt->trx,
table_share->table_name.str,
flags, flags2,
- heap, drop_index, n_drop_index,
- drop_fk, n_drop_fk, add_fk, n_add_fk,
- fts_doc_col_no, add_autoinc_col_no,
- autoinc_col_max_value, add_fts_doc_id,
+ fts_doc_col_no, add_fts_doc_id,
add_fts_doc_id_idx));
}
@@ -3851,10 +3904,7 @@ ha_innobase::inplace_alter_table(
dberr_t error;
DBUG_ENTER("inplace_alter_table");
-
- if (srv_read_only_mode) {
- DBUG_RETURN(false);
- }
+ DBUG_ASSERT(!srv_read_only_mode);
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -3863,7 +3913,7 @@ ha_innobase::inplace_alter_table(
DEBUG_SYNC(user_thd, "innodb_inplace_alter_table_enter");
- if (!(ha_alter_info->handler_flags & INNOBASE_INPLACE_CREATE)) {
+ if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)) {
ok_exit:
DEBUG_SYNC(user_thd, "innodb_after_inplace_alter_table");
DBUG_RETURN(false);
@@ -3881,6 +3931,7 @@ ok_exit:
DBUG_ASSERT(ctx);
DBUG_ASSERT(ctx->trx);
+ DBUG_ASSERT(ctx->prebuilt == prebuilt);
if (prebuilt->table->ibd_file_missing
|| dict_table_is_discarded(prebuilt->table)) {
@@ -3894,16 +3945,15 @@ ok_exit:
error = DB_OUT_OF_MEMORY; goto oom;);
error = row_merge_build_indexes(
prebuilt->trx,
- prebuilt->table, ctx->indexed_table,
+ prebuilt->table, ctx->new_table,
ctx->online,
- ctx->add, ctx->add_key_numbers, ctx->num_to_add,
+ ctx->add_index, ctx->add_key_numbers, ctx->num_to_add_index,
altered_table, ctx->add_cols, ctx->col_map,
ctx->add_autoinc, ctx->sequence);
#ifndef DBUG_OFF
oom:
#endif /* !DBUG_OFF */
- if (error == DB_SUCCESS && ctx->online
- && ctx->indexed_table != prebuilt->table) {
+ if (error == DB_SUCCESS && ctx->online && ctx->need_rebuild()) {
DEBUG_SYNC_C("row_log_table_apply1_before");
error = row_log_table_apply(
ctx->thr, prebuilt->table, altered_table);
@@ -4005,27 +4055,29 @@ innobase_online_rebuild_log_free(
/** Rollback a secondary index creation, drop the indexes with
temparary index prefix
-@param prebuilt the prebuilt struct
-@param table_share the TABLE_SHARE
+@param user_table InnoDB table
+@param table the TABLE
+@param locked TRUE=table locked, FALSE=may need to do a lazy drop
@param trx the transaction
*/
-static
+static __attribute__((nonnull))
void
innobase_rollback_sec_index(
/*========================*/
- row_prebuilt_t* prebuilt,
- const TABLE_SHARE* table_share,
+ dict_table_t* user_table,
+ const TABLE* table,
+ ibool locked,
trx_t* trx)
{
- row_merge_drop_indexes(trx, prebuilt->table, FALSE);
+ row_merge_drop_indexes(trx, user_table, locked);
/* Free the table->fts only if there is no FTS_DOC_ID
in the table */
- if (prebuilt->table->fts
- && !DICT_TF2_FLAG_IS_SET(prebuilt->table,
+ if (user_table->fts
+ && !DICT_TF2_FLAG_IS_SET(user_table,
DICT_TF2_FTS_HAS_DOC_ID)
- && !innobase_fulltext_exist(table_share)) {
- fts_free(prebuilt->table);
+ && !innobase_fulltext_exist(table)) {
+ fts_free(user_table);
}
}
@@ -4037,17 +4089,17 @@ prepare_inplace_alter_table(). (E.g concurrent writes were blocked
during prepare, but might not be during commit).
@param ha_alter_info Data used during in-place alter.
-@param table_share the TABLE_SHARE
+@param table the TABLE
@param prebuilt the prebuilt struct
@retval true Failure
@retval false Success
*/
-inline
+inline __attribute__((nonnull, warn_unused_result))
bool
rollback_inplace_alter_table(
/*=========================*/
Alter_inplace_info* ha_alter_info,
- const TABLE_SHARE* table_share,
+ const TABLE* table,
row_prebuilt_t* prebuilt)
{
bool fail = false;
@@ -4066,65 +4118,66 @@ rollback_inplace_alter_table(
row_mysql_lock_data_dictionary(ctx->trx);
- if (prebuilt->table != ctx->indexed_table) {
+ if (ctx->need_rebuild()) {
dberr_t err;
- ulint flags = ctx->indexed_table->flags;
+ ulint flags = ctx->new_table->flags;
- /* DML threads can access ctx->indexed_table via the
+ /* DML threads can access ctx->new_table via the
online rebuild log. Free it first. */
innobase_online_rebuild_log_free(prebuilt->table);
/* Since the FTS index specific auxiliary tables has
not yet registered with "table->fts" by fts_add_index(),
we will need explicitly delete them here */
- if (DICT_TF2_FLAG_IS_SET(ctx->indexed_table, DICT_TF2_FTS)) {
+ if (DICT_TF2_FLAG_IS_SET(ctx->new_table, DICT_TF2_FTS)) {
err = innobase_drop_fts_index_table(
- ctx->indexed_table, ctx->trx);
+ ctx->new_table, ctx->trx);
if (err != DB_SUCCESS) {
my_error_innodb(
- err, table_share->table_name.str,
+ err, table->s->table_name.str,
flags);
fail = true;
}
}
/* Drop the table. */
- dict_table_close(ctx->indexed_table, TRUE, FALSE);
+ dict_table_close(ctx->new_table, TRUE, FALSE);
-#ifdef UNIV_DDL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
/* Nobody should have initialized the stats of the
newly created table yet. When this is the case, we
know that it has not been added for background stats
gathering. */
- ut_a(!ctx->indexed_table->stat_initialized);
-#endif /* UNIV_DDL_DEBUG */
+ ut_a(!ctx->new_table->stat_initialized);
+#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
- err = row_merge_drop_table(ctx->trx, ctx->indexed_table);
+ err = row_merge_drop_table(ctx->trx, ctx->new_table);
switch (err) {
case DB_SUCCESS:
break;
default:
- my_error_innodb(err, table_share->table_name.str,
+ my_error_innodb(err, table->s->table_name.str,
flags);
fail = true;
}
} else {
DBUG_ASSERT(!(ha_alter_info->handler_flags
& Alter_inplace_info::ADD_PK_INDEX));
+ DBUG_ASSERT(ctx->new_table == prebuilt->table);
trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
- innobase_rollback_sec_index(prebuilt, table_share, ctx->trx);
+ innobase_rollback_sec_index(
+ prebuilt->table, table, FALSE, ctx->trx);
}
trx_commit_for_mysql(ctx->trx);
row_mysql_unlock_data_dictionary(ctx->trx);
trx_free_for_mysql(ctx->trx);
-
func_exit:
#ifndef DBUG_OFF
dict_index_t* clust_index = dict_table_get_first_index(
@@ -4135,13 +4188,15 @@ func_exit:
#endif /* !DBUG_OFF */
if (ctx) {
+ DBUG_ASSERT(ctx->prebuilt == prebuilt);
+
if (ctx->num_to_add_fk) {
for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
dict_foreign_free(ctx->add_fk[i]);
}
}
- if (ctx->num_to_drop) {
+ if (ctx->num_to_drop_index) {
row_mysql_lock_data_dictionary(prebuilt->trx);
/* Clear the to_be_dropped flags
@@ -4149,8 +4204,8 @@ func_exit:
The flags may already have been cleared,
in case an error was detected in
commit_inplace_alter_table(). */
- for (ulint i = 0; i < ctx->num_to_drop; i++) {
- dict_index_t* index = ctx->drop[i];
+ for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
+ dict_index_t* index = ctx->drop_index[i];
DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
index->to_be_dropped = 0;
@@ -4166,21 +4221,21 @@ func_exit:
DBUG_RETURN(fail);
}
-/** Drop a FOREIGN KEY constraint.
-@param table_share the TABLE_SHARE
+/** Drop a FOREIGN KEY constraint from the data dictionary tables.
@param trx data dictionary transaction
-@param foreign the foreign key constraint, will be freed
+@param table_name Table name in MySQL
+@param foreign_id Foreign key constraint identifier
@retval true Failure
@retval false Success */
static __attribute__((nonnull, warn_unused_result))
bool
-innobase_drop_foreign(
-/*==================*/
- const TABLE_SHARE* table_share,
+innobase_drop_foreign_try(
+/*======================*/
trx_t* trx,
- dict_foreign_t* foreign)
+ const char* table_name,
+ const char* foreign_id)
{
- DBUG_ENTER("innobase_drop_foreign");
+ DBUG_ENTER("innobase_drop_foreign_try");
DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
@@ -4201,7 +4256,7 @@ innobase_drop_foreign(
pars_info_t* info;
info = pars_info_create();
- pars_info_add_str_literal(info, "id", foreign->id);
+ pars_info_add_str_literal(info, "id", foreign_id);
trx->op_info = "dropping foreign key constraint from dictionary";
error = que_eval_sql(info, sql, FALSE, trx);
@@ -4211,20 +4266,18 @@ innobase_drop_foreign(
error = DB_OUT_OF_FILE_SPACE;);
if (error != DB_SUCCESS) {
- my_error_innodb(error, table_share->table_name.str, 0);
+ my_error_innodb(error, table_name, 0);
trx->error_state = DB_SUCCESS;
DBUG_RETURN(true);
}
- /* Drop the foreign key constraint from the data dictionary cache. */
- dict_foreign_remove_from_cache(foreign);
DBUG_RETURN(false);
}
-/** Rename a column.
-@param table_share the TABLE_SHARE
-@param prebuilt the prebuilt struct
+/** Rename a column in the data dictionary tables.
+@param user_table InnoDB table that was being altered
@param trx data dictionary transaction
+@param table_name Table name in MySQL
@param nth_col 0-based index of the column
@param from old column name
@param to new column name
@@ -4233,11 +4286,11 @@ innobase_drop_foreign(
@retval false Success */
static __attribute__((nonnull, warn_unused_result))
bool
-innobase_rename_column(
-/*===================*/
- const TABLE_SHARE* table_share,
- row_prebuilt_t* prebuilt,
+innobase_rename_column_try(
+/*=======================*/
+ const dict_table_t* user_table,
trx_t* trx,
+ const char* table_name,
ulint nth_col,
const char* from,
const char* to,
@@ -4246,10 +4299,9 @@ innobase_rename_column(
pars_info_t* info;
dberr_t error;
- DBUG_ENTER("innobase_rename_column");
+ DBUG_ENTER("innobase_rename_column_try");
- DBUG_ASSERT(trx_get_dict_operation(trx)
- == new_clustered ? TRX_DICT_OP_TABLE : TRX_DICT_OP_INDEX);
+ DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(mutex_own(&dict_sys->mutex));
#ifdef UNIV_SYNC_DEBUG
@@ -4262,7 +4314,7 @@ innobase_rename_column(
info = pars_info_create();
- pars_info_add_ull_literal(info, "tableid", prebuilt->table->id);
+ pars_info_add_ull_literal(info, "tableid", user_table->id);
pars_info_add_int4_literal(info, "nth", nth_col);
pars_info_add_str_literal(info, "old", from);
pars_info_add_str_literal(info, "new", to);
@@ -4284,7 +4336,7 @@ innobase_rename_column(
if (error != DB_SUCCESS) {
err_exit:
- my_error_innodb(error, table_share->table_name.str, 0);
+ my_error_innodb(error, table_name, 0);
trx->error_state = DB_SUCCESS;
trx->op_info = "";
DBUG_RETURN(true);
@@ -4292,7 +4344,8 @@ err_exit:
trx->op_info = "renaming column in SYS_FIELDS";
- for (dict_index_t* index = dict_table_get_first_index(prebuilt->table);
+ for (const dict_index_t* index = dict_table_get_first_index(
+ user_table);
index != NULL;
index = dict_table_get_next_index(index)) {
@@ -4337,8 +4390,8 @@ err_exit:
rename_foreign:
trx->op_info = "renaming column in SYS_FOREIGN_COLS";
- for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
- prebuilt->table->foreign_list);
+ for (const dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+ user_table->foreign_list);
foreign != NULL;
foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
for (unsigned i = 0; i < foreign->n_fields; i++) {
@@ -4370,8 +4423,8 @@ rename_foreign:
}
}
- for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
- prebuilt->table->referenced_list);
+ for (const dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+ user_table->referenced_list);
foreign != NULL;
foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
for (unsigned i = 0; i < foreign->n_fields; i++) {
@@ -4404,37 +4457,35 @@ rename_foreign:
}
trx->op_info = "";
- if (!new_clustered) {
- /* Rename the column in the data dictionary cache. */
- dict_mem_table_col_rename(prebuilt->table, nth_col, from, to);
- }
DBUG_RETURN(false);
}
-/** Rename columns.
+/** Rename columns in the data dictionary tables.
@param ha_alter_info Data used during in-place alter.
-@param new_clustered whether the table has been rebuilt
+@param ctx In-place ALTER TABLE context
@param table the TABLE
-@param table_share the TABLE_SHARE
-@param prebuilt the prebuilt struct
@param trx data dictionary transaction
+@param table_name Table name in MySQL
@retval true Failure
@retval false Success */
static __attribute__((nonnull, warn_unused_result))
bool
-innobase_rename_columns(
-/*====================*/
+innobase_rename_columns_try(
+/*========================*/
Alter_inplace_info* ha_alter_info,
- bool new_clustered,
+ ha_innobase_inplace_ctx*ctx,
const TABLE* table,
- const TABLE_SHARE* table_share,
- row_prebuilt_t* prebuilt,
- trx_t* trx)
+ trx_t* trx,
+ const char* table_name)
{
List_iterator_fast<Create_field> cf_it(
ha_alter_info->alter_info->create_list);
uint i = 0;
+ DBUG_ASSERT(ctx);
+ DBUG_ASSERT(ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME);
+
for (Field** fp = table->field; *fp; fp++, i++) {
if (!((*fp)->flags & FIELD_IS_RENAMED) || !((*fp)->stored_in_db)) {
continue;
@@ -4443,11 +4494,11 @@ innobase_rename_columns(
cf_it.rewind();
while (Create_field* cf = cf_it++) {
if (cf->field == *fp) {
- if (innobase_rename_column(
- table_share,
- prebuilt, trx, i,
+ if (innobase_rename_column_try(
+ ctx->old_table, trx, table_name, i,
cf->field->field_name,
- cf->field_name, new_clustered)) {
+ cf->field_name,
+ ctx->need_rebuild())) {
return(true);
}
goto processed_field;
@@ -4462,87 +4513,73 @@ processed_field:
return(false);
}
-/** Undo the in-memory addition of foreign key on table->foreign_list
-and table->referenced_list.
-@param ctx saved alter table context
-@param table the foreign table */
+/** Rename columns in the data dictionary cache
+as part of commit_cache_norebuild().
+@param ha_alter_info Data used during in-place alter.
+@param table the TABLE
+@param user_table InnoDB table that was being altered */
static __attribute__((nonnull))
void
-innobase_undo_add_fk(
-/*=================*/
- ha_innobase_inplace_ctx* ctx,
- dict_table_t* fk_table)
+innobase_rename_columns_cache(
+/*==========================*/
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* table,
+ dict_table_t* user_table)
{
- for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
- UT_LIST_REMOVE(
- foreign_list,
- fk_table->foreign_list,
- ctx->add_fk[i]);
+ if (!(ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME)) {
+ return;
+ }
+
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+ uint i = 0;
+
+ for (Field** fp = table->field; *fp; fp++, i++) {
+ if (!((*fp)->flags & FIELD_IS_RENAMED)) {
+ continue;
+ }
- if (ctx->add_fk[i]->referenced_table) {
- UT_LIST_REMOVE(
- referenced_list,
- ctx->add_fk[i]->referenced_table
- ->referenced_list,
- ctx->add_fk[i]);
+ cf_it.rewind();
+ while (Create_field* cf = cf_it++) {
+ if (cf->field == *fp) {
+ dict_mem_table_col_rename(user_table, i,
+ cf->field->field_name,
+ cf->field_name);
+ goto processed_field;
+ }
}
+
+ ut_error;
+processed_field:
+ continue;
}
}
-/** Commit or rollback the changes made during
-prepare_inplace_alter_table() and inplace_alter_table() inside
-the storage engine. Note that the allowed level of concurrency
-during this operation will be the same as for
-inplace_alter_table() and thus might be higher than during
-prepare_inplace_alter_table(). (E.g concurrent writes were
-blocked during prepare, but might not be during commit).
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
-by ALTER TABLE and holding data used during in-place alter.
-@param commit true => Commit, false => Rollback.
-@retval true Failure
-@retval false Success
-*/
-UNIV_INTERN
-bool
-ha_innobase::commit_inplace_alter_table(
-/*====================================*/
- TABLE* altered_table,
+/** Get the auto-increment value of the table on commit.
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@return the next auto-increment value (0 if not present) */
+static __attribute__((nonnull, warn_unused_result))
+ulonglong
+commit_get_autoinc(
+/*===============*/
Alter_inplace_info* ha_alter_info,
- bool commit)
+ ha_innobase_inplace_ctx*ctx,
+ const TABLE* altered_table,
+ const TABLE* old_table)
{
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>
- (ha_alter_info->handler_ctx);
- trx_t* trx;
- trx_t* fk_trx = NULL;
- int err = 0;
- bool new_clustered;
- dict_table_t* fk_table = NULL;
- ulonglong max_autoinc;
-
- ut_ad(!srv_read_only_mode);
+ ulonglong max_autoinc;
- DBUG_ENTER("commit_inplace_alter_table");
-
- DEBUG_SYNC_C("innodb_commit_inplace_alter_table_enter");
-
- DEBUG_SYNC_C("innodb_commit_inplace_alter_table_wait");
-
- if (!commit) {
- /* A rollback is being requested. So far we may at
- most have created some indexes. If any indexes were to
- be dropped, they would actually be dropped in this
- method if commit=true. */
- DBUG_RETURN(rollback_inplace_alter_table(
- ha_alter_info, table_share, prebuilt));
- }
+ DBUG_ENTER("commit_get_autoinc");
if (!altered_table->found_next_number_field) {
/* There is no AUTO_INCREMENT column in the table
after the ALTER operation. */
max_autoinc = 0;
- } else if (ctx && ctx->add_autoinc != ULINT_UNDEFINED) {
+ } else if (ctx->add_autoinc != ULINT_UNDEFINED) {
/* An AUTO_INCREMENT column was added. Get the last
value from the sequence, which may be based on a
supplied AUTO_INCREMENT value. */
@@ -4552,546 +4589,954 @@ ha_innobase::commit_inplace_alter_table(
&& (ha_alter_info->create_info->used_fields
& HA_CREATE_USED_AUTO)) {
/* An AUTO_INCREMENT value was supplied, but the table
- was not rebuilt. Get the user-supplied value. */
+ was not rebuilt. Get the user-supplied value or the
+ last value from the sequence. */
+ ut_ad(old_table->found_next_number_field);
+
max_autoinc = ha_alter_info->create_info->auto_increment_value;
+
+ dict_table_autoinc_lock(ctx->old_table);
+ if (max_autoinc < ctx->old_table->autoinc) {
+ max_autoinc = ctx->old_table->autoinc;
+ }
+ dict_table_autoinc_unlock(ctx->old_table);
} else {
/* An AUTO_INCREMENT value was not specified.
Read the old counter value from the table. */
- ut_ad(table->found_next_number_field);
- dict_table_autoinc_lock(prebuilt->table);
- max_autoinc = dict_table_autoinc_read(prebuilt->table);
- dict_table_autoinc_unlock(prebuilt->table);
+ ut_ad(old_table->found_next_number_field);
+ dict_table_autoinc_lock(ctx->old_table);
+ max_autoinc = ctx->old_table->autoinc;
+ dict_table_autoinc_unlock(ctx->old_table);
}
- if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
- DBUG_ASSERT(!ctx);
- /* We may want to update table attributes. */
- goto func_exit;
- }
+ DBUG_RETURN(max_autoinc);
+}
- trx_start_if_not_started_xa(prebuilt->trx);
+/** Add or drop foreign key constraints to the data dictionary tables,
+but do not touch the data dictionary cache.
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
+*/
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_update_foreign_try(
+/*========================*/
+ ha_innobase_inplace_ctx*ctx,
+ trx_t* trx,
+ const char* table_name)
+{
+ ulint foreign_id;
+ ulint i;
- {
- /* Exclusively lock the table, to ensure that no other
- transaction is holding locks on the table while we
- change the table definition. The MySQL meta-data lock
- should normally guarantee that no conflicting locks
- exist. However, FOREIGN KEY constraints checks and any
- transactions collected during crash recovery could be
- holding InnoDB locks only, not MySQL locks. */
- dberr_t error = row_merge_lock_table(
- prebuilt->trx, prebuilt->table, LOCK_X);
+ DBUG_ENTER("innobase_update_foreign_try");
+ DBUG_ASSERT(ctx);
+
+ foreign_id = dict_table_get_highest_foreign_id(ctx->new_table);
+
+ foreign_id++;
+
+ for (i = 0; i < ctx->num_to_add_fk; i++) {
+ dict_foreign_t* fk = ctx->add_fk[i];
+
+ ut_ad(fk->foreign_table == ctx->new_table
+ || fk->foreign_table == ctx->old_table);
+
+ dberr_t error = dict_create_add_foreign_id(
+ &foreign_id, ctx->old_table->name, fk);
if (error != DB_SUCCESS) {
- my_error_innodb(error, table_share->table_name.str, 0);
+ my_error(ER_TOO_LONG_IDENT, MYF(0),
+ fk->id);
DBUG_RETURN(true);
}
- DEBUG_SYNC(user_thd, "innodb_alter_commit_after_lock_table");
+ if (!fk->foreign_index) {
+ fk->foreign_index = dict_foreign_find_index(
+ ctx->new_table, ctx->col_names,
+ fk->foreign_col_names,
+ fk->n_fields, fk->referenced_index, TRUE,
+ fk->type
+ & (DICT_FOREIGN_ON_DELETE_SET_NULL
+ | DICT_FOREIGN_ON_UPDATE_SET_NULL));
+ if (!fk->foreign_index) {
+ my_error(ER_FK_INCORRECT_OPTION,
+ MYF(0), table_name, fk->id);
+ DBUG_RETURN(true);
+ }
+ }
+
+ /* The fk->foreign_col_names[] uses renamed column
+ names, while the columns in ctx->old_table have not
+ been renamed yet. */
+ error = dict_create_add_foreign_to_dictionary(
+ ctx->old_table->name, fk, trx);
+
+ DBUG_EXECUTE_IF(
+ "innodb_test_cannot_add_fk_system",
+ error = DB_ERROR;);
+
+ if (error != DB_SUCCESS) {
+ my_error(ER_FK_FAIL_ADD_SYSTEM, MYF(0),
+ fk->id);
+ DBUG_RETURN(true);
+ }
}
- if (ctx) {
- if (ctx->indexed_table != prebuilt->table) {
- for (dict_index_t* index = dict_table_get_first_index(
- ctx->indexed_table);
- index;
- index = dict_table_get_next_index(index)) {
- DBUG_ASSERT(dict_index_get_online_status(index)
- == ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- if (dict_index_is_corrupted(index)) {
- my_error(ER_INDEX_CORRUPT, MYF(0),
- index->name);
- DBUG_RETURN(true);
- }
- }
- } else {
- for (ulint i = 0; i < ctx->num_to_add; i++) {
- dict_index_t* index = ctx->add[i];
- DBUG_ASSERT(dict_index_get_online_status(index)
- == ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
- if (dict_index_is_corrupted(index)) {
- /* Report a duplicate key
- error for the index that was
- flagged corrupted, most likely
- because a duplicate value was
- inserted (directly or by
- rollback) after
- ha_innobase::inplace_alter_table()
- completed. */
- my_error(ER_DUP_UNKNOWN_IN_INDEX,
- MYF(0), index->name + 1);
- DBUG_RETURN(true);
- }
- }
+ for (i = 0; i < ctx->num_to_drop_fk; i++) {
+ dict_foreign_t* fk = ctx->drop_fk[i];
+
+ DBUG_ASSERT(fk->foreign_table == ctx->old_table);
+
+ if (innobase_drop_foreign_try(trx, table_name, fk->id)) {
+ DBUG_RETURN(true);
}
}
- if (!ctx || !ctx->trx) {
- /* Create a background transaction for the operations on
- the data dictionary tables. */
- trx = innobase_trx_allocate(user_thd);
+ DBUG_RETURN(false);
+}
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+/** Update the foreign key constraint definitions in the data dictionary cache
+after the changes to data dictionary tables were committed.
+@param ctx In-place ALTER TABLE context
+@return InnoDB error code (should always be DB_SUCCESS) */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+innobase_update_foreign_cache(
+/*==========================*/
+ ha_innobase_inplace_ctx* ctx)
+{
+ dict_table_t* user_table;
+
+ DBUG_ENTER("innobase_update_foreign_cache");
+
+ user_table = ctx->old_table;
- new_clustered = false;
+ /* Discard the added foreign keys, because we will
+ load them from the data dictionary. */
+ for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
+ dict_foreign_t* fk = ctx->add_fk[i];
+ dict_foreign_free(fk);
+ }
+
+ if (ctx->need_rebuild()) {
+ /* The rebuilt table is already using the renamed
+ column names. No need to pass col_names or to drop
+ constraints from the data dictionary cache. */
+ DBUG_ASSERT(!ctx->col_names);
+ DBUG_ASSERT(UT_LIST_GET_LEN(user_table->foreign_list) == 0);
+ DBUG_ASSERT(UT_LIST_GET_LEN(user_table->referenced_list) == 0);
+ user_table = ctx->new_table;
} else {
- trx_dict_op_t op;
+ /* Drop the foreign key constraints if the
+ table was not rebuilt. If the table is rebuilt,
+ there would not be any foreign key contraints for
+ it yet in the data dictionary cache. */
+ for (ulint i = 0; i < ctx->num_to_drop_fk; i++) {
+ dict_foreign_t* fk = ctx->drop_fk[i];
+ dict_foreign_remove_from_cache(fk);
+ }
+ }
+
+ /* Load the old or added foreign keys from the data dictionary
+ and prevent the table from being evicted from the data
+ dictionary cache (work around the lack of WL#6049). */
+ DBUG_RETURN(dict_load_foreigns(user_table->name,
+ ctx->col_names, false, true,
+ DICT_ERR_IGNORE_NONE));
+}
- trx = ctx->trx;
+/** Commit the changes made during prepare_inplace_alter_table()
+and inplace_alter_table() inside the data dictionary tables,
+when rebuilding the table.
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
+*/
+inline __attribute__((nonnull, warn_unused_result))
+bool
+commit_try_rebuild(
+/*===============*/
+ Alter_inplace_info* ha_alter_info,
+ ha_innobase_inplace_ctx*ctx,
+ TABLE* altered_table,
+ const TABLE* old_table,
+ trx_t* trx,
+ const char* table_name)
+{
+ dict_table_t* rebuilt_table = ctx->new_table;
+ dict_table_t* user_table = ctx->old_table;
+
+ DBUG_ENTER("commit_try_rebuild");
+ DBUG_ASSERT(ctx->need_rebuild());
+ DBUG_ASSERT(trx->dict_operation_lock_mode == RW_X_LATCH);
+ DBUG_ASSERT(!(ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_FOREIGN_KEY)
+ || ctx->num_to_drop_fk > 0);
+ DBUG_ASSERT(ctx->num_to_drop_fk
+ == ha_alter_info->alter_info->drop_list.elements);
+
+ for (dict_index_t* index = dict_table_get_first_index(rebuilt_table);
+ index;
+ index = dict_table_get_next_index(index)) {
+ DBUG_ASSERT(dict_index_get_online_status(index)
+ == ONLINE_INDEX_COMPLETE);
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ if (dict_index_is_corrupted(index)) {
+ my_error(ER_INDEX_CORRUPT, MYF(0),
+ index->name);
+ DBUG_RETURN(true);
+ }
+ }
- new_clustered = ctx->indexed_table != prebuilt->table;
+ if (innobase_update_foreign_try(ctx, trx, table_name)) {
+ DBUG_RETURN(true);
+ }
- op = (new_clustered) ? TRX_DICT_OP_TABLE : TRX_DICT_OP_INDEX;
+ dberr_t error;
- trx_start_for_ddl(trx, op);
+ /* Clear the to_be_dropped flag in the data dictionary cache
+ of user_table. */
+ for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
+ dict_index_t* index = ctx->drop_index[i];
+ DBUG_ASSERT(index->table == user_table);
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->to_be_dropped);
+ index->to_be_dropped = 0;
}
- if (new_clustered) {
- if (prebuilt->table->fts) {
- ut_ad(!prebuilt->table->fts->add_wq);
- fts_optimize_remove_table(prebuilt->table);
- }
+ /* We copied the table. Any indexes that were requested to be
+ dropped were not created in the copy of the table. Apply any
+ last bit of the rebuild log and then rename the tables. */
- if (ctx->indexed_table->fts) {
- ut_ad(!ctx->indexed_table->fts->add_wq);
- fts_optimize_remove_table(ctx->indexed_table);
+ if (ctx->online) {
+ DEBUG_SYNC_C("row_log_table_apply2_before");
+ error = row_log_table_apply(
+ ctx->thr, user_table, altered_table);
+ ulint err_key = thr_get_trx(ctx->thr)->error_key_num;
+
+ switch (error) {
+ KEY* dup_key;
+ case DB_SUCCESS:
+ break;
+ case DB_DUPLICATE_KEY:
+ if (err_key == ULINT_UNDEFINED) {
+ /* This should be the hidden index on
+ FTS_DOC_ID. */
+ dup_key = NULL;
+ } else {
+ DBUG_ASSERT(err_key <
+ ha_alter_info->key_count);
+ dup_key = &ha_alter_info
+ ->key_info_buffer[err_key];
+ }
+ print_keydup_error(altered_table, dup_key, MYF(0));
+ DBUG_RETURN(true);
+ case DB_ONLINE_LOG_TOO_BIG:
+ my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
+ ha_alter_info->key_info_buffer[0].name);
+ DBUG_RETURN(true);
+ case DB_INDEX_CORRUPT:
+ my_error(ER_INDEX_CORRUPT, MYF(0),
+ (err_key == ULINT_UNDEFINED)
+ ? FTS_DOC_ID_INDEX_NAME
+ : ha_alter_info->key_info_buffer[err_key]
+ .name);
+ DBUG_RETURN(true);
+ default:
+ my_error_innodb(error, table_name, user_table->flags);
+ DBUG_RETURN(true);
}
}
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during the data dictionary operation. */
- row_mysql_lock_data_dictionary(trx);
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME)
+ && innobase_rename_columns_try(ha_alter_info, ctx, old_table,
+ trx, table_name)) {
+ DBUG_RETURN(true);
+ }
- /* Wait for background stats processing to stop using the
- indexes that we are going to drop (if any). */
- if (ctx) {
- dict_stats_wait_bg_to_stop_using_tables(
- prebuilt->table, ctx->indexed_table, trx);
+ DBUG_EXECUTE_IF("ib_ddl_crash_before_rename", DBUG_SUICIDE(););
+
+ /* The new table must inherit the flag from the
+ "parent" table. */
+ if (dict_table_is_discarded(user_table)) {
+ rebuilt_table->ibd_file_missing = true;
+ rebuilt_table->flags2 |= DICT_TF2_DISCARDED;
}
- /* Final phase of add foreign key processing */
- if (ctx && ctx->num_to_add_fk > 0) {
- ulint highest_id_so_far;
- dberr_t error;
+ /* We can now rename the old table as a temporary table,
+ rename the new temporary table as the old table and drop the
+ old table. First, we only do this in the data dictionary
+ tables. The actual renaming will be performed in
+ commit_cache_rebuild(), once the data dictionary transaction
+ has been successfully committed. */
- /* If it runs concurrently with create index or table
- rebuild, we will need a separate trx to do the system
- table change, since in the case of failure to rebuild/create
- index, it will need to commit the trx that drops the newly
- created table/index, while for FK, it needs to rollback
- the metadata change */
- if (new_clustered || ctx->num_to_add) {
- fk_trx = innobase_trx_allocate(user_thd);
+ error = row_merge_rename_tables_dict(
+ user_table, rebuilt_table, ctx->tmp_name, trx);
- trx_start_for_ddl(fk_trx, TRX_DICT_OP_INDEX);
+ /* We must be still holding a table handle. */
+ DBUG_ASSERT(user_table->n_ref_count >= 1);
- fk_trx->dict_operation_lock_mode =
- trx->dict_operation_lock_mode;
- } else {
- fk_trx = trx;
- }
+ DBUG_EXECUTE_IF("ib_ddl_crash_after_rename", DBUG_SUICIDE(););
+ DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
- ut_ad(ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_FOREIGN_KEY);
+ if (user_table->n_ref_count > 1) {
+ /* This should only occur when an innodb_memcached
+ connection with innodb_api_enable_mdl=off was started
+ before commit_inplace_alter_table() locked the data
+ dictionary. We must roll back the ALTER TABLE, because
+ we cannot drop a table while it is being used. */
- highest_id_so_far = dict_table_get_highest_foreign_id(
- prebuilt->table);
+ /* Normally, n_ref_count must be 1, because purge
+ cannot be executing on this very table as we are
+ holding dict_operation_lock X-latch. */
- highest_id_so_far++;
+ error = DB_LOCK_WAIT_TIMEOUT;
+ }
- fk_table = ctx->indexed_table;
+ switch (error) {
+ case DB_SUCCESS:
+ DBUG_RETURN(false);
+ case DB_TABLESPACE_EXISTS:
+ ut_a(rebuilt_table->n_ref_count == 1);
+ my_error(ER_TABLESPACE_EXISTS, MYF(0), ctx->tmp_name);
+ DBUG_RETURN(true);
+ case DB_DUPLICATE_KEY:
+ ut_a(rebuilt_table->n_ref_count == 1);
+ my_error(ER_TABLE_EXISTS_ERROR, MYF(0), ctx->tmp_name);
+ DBUG_RETURN(true);
+ default:
+ my_error_innodb(error, table_name, user_table->flags);
+ DBUG_RETURN(true);
+ }
+}
- for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
+/** Apply the changes made during commit_try_rebuild(),
+to the data dictionary cache and the file system.
+@param ctx In-place ALTER TABLE context */
+inline __attribute__((nonnull))
+void
+commit_cache_rebuild(
+/*=================*/
+ ha_innobase_inplace_ctx* ctx)
+{
+ dberr_t error;
- /* Get the new dict_table_t */
- if (new_clustered) {
- ctx->add_fk[i]->foreign_table
- = fk_table;
- }
+ DBUG_ENTER("commit_cache_rebuild");
+ DBUG_ASSERT(ctx->need_rebuild());
+ DBUG_ASSERT(dict_table_is_discarded(ctx->old_table)
+ == dict_table_is_discarded(ctx->new_table));
- /* Add Foreign Key info to in-memory metadata */
- UT_LIST_ADD_LAST(foreign_list,
- fk_table->foreign_list,
- ctx->add_fk[i]);
+ const char* old_name = mem_heap_strdup(
+ ctx->heap, ctx->old_table->name);
- if (ctx->add_fk[i]->referenced_table) {
- UT_LIST_ADD_LAST(
- referenced_list,
- ctx->add_fk[i]->referenced_table->referenced_list,
- ctx->add_fk[i]);
- }
+ /* We already committed and redo logged the renames,
+ so this must succeed. */
+ error = dict_table_rename_in_cache(
+ ctx->old_table, ctx->tmp_name, FALSE);
+ ut_a(error == DB_SUCCESS);
- if (!ctx->add_fk[i]->foreign_index) {
- ctx->add_fk[i]->foreign_index
- = dict_foreign_find_index(
- fk_table,
- ctx->add_fk[i]->foreign_col_names,
- ctx->add_fk[i]->n_fields, NULL,
- TRUE, FALSE);
-
- ut_ad(ctx->add_fk[i]->foreign_index);
-
- if (!innobase_check_fk_option(
- ctx->add_fk[i])) {
- my_error(ER_FK_INCORRECT_OPTION,
- MYF(0),
- table_share->table_name.str,
- ctx->add_fk[i]->id);
- goto undo_add_fk;
- }
- }
+ error = dict_table_rename_in_cache(
+ ctx->new_table, old_name, FALSE);
+ ut_a(error == DB_SUCCESS);
- /* System table change */
- error = dict_create_add_foreign_to_dictionary(
- &highest_id_so_far, prebuilt->table,
- ctx->add_fk[i], fk_trx);
+ DBUG_VOID_RETURN;
+}
- DBUG_EXECUTE_IF(
- "innodb_test_cannot_add_fk_system",
- error = DB_ERROR;);
+/** Commit the changes made during prepare_inplace_alter_table()
+and inplace_alter_table() inside the data dictionary tables,
+when not rebuilding the table.
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param old_table MySQL table as it is before the ALTER operation
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
+*/
+inline __attribute__((nonnull, warn_unused_result))
+bool
+commit_try_norebuild(
+/*=================*/
+ Alter_inplace_info* ha_alter_info,
+ ha_innobase_inplace_ctx*ctx,
+ const TABLE* old_table,
+ trx_t* trx,
+ const char* table_name)
+{
+ DBUG_ENTER("commit_try_norebuild");
+ DBUG_ASSERT(!ctx->need_rebuild());
+ DBUG_ASSERT(trx->dict_operation_lock_mode == RW_X_LATCH);
+ DBUG_ASSERT(!(ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_FOREIGN_KEY)
+ || ctx->num_to_drop_fk > 0);
+ DBUG_ASSERT(ctx->num_to_drop_fk
+ == ha_alter_info->alter_info->drop_list.elements);
+
+ for (ulint i = 0; i < ctx->num_to_add_index; i++) {
+ dict_index_t* index = ctx->add_index[i];
+ DBUG_ASSERT(dict_index_get_online_status(index)
+ == ONLINE_INDEX_COMPLETE);
+ DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
+ if (dict_index_is_corrupted(index)) {
+ /* Report a duplicate key
+ error for the index that was
+ flagged corrupted, most likely
+ because a duplicate value was
+ inserted (directly or by
+ rollback) after
+ ha_innobase::inplace_alter_table()
+ completed.
+ TODO: report this as a corruption
+ with a detailed reason once
+ WL#6379 has been implemented. */
+ my_error(ER_DUP_UNKNOWN_IN_INDEX,
+ MYF(0), index->name + 1);
+ DBUG_RETURN(true);
+ }
+ }
- if (error != DB_SUCCESS) {
- my_error(ER_FK_FAIL_ADD_SYSTEM, MYF(0),
- ctx->add_fk[i]->id);
- goto undo_add_fk;
- }
+ if (innobase_update_foreign_try(ctx, trx, table_name)) {
+ DBUG_RETURN(true);
+ }
+
+ dberr_t error;
+
+ /* We altered the table in place. */
+ /* Lose the TEMP_INDEX_PREFIX. */
+ for (ulint i = 0; i < ctx->num_to_add_index; i++) {
+ dict_index_t* index = ctx->add_index[i];
+ DBUG_ASSERT(dict_index_get_online_status(index)
+ == ONLINE_INDEX_COMPLETE);
+ DBUG_ASSERT(*index->name
+ == TEMP_INDEX_PREFIX);
+ error = row_merge_rename_index_to_add(
+ trx, ctx->new_table->id, index->id);
+ if (error != DB_SUCCESS) {
+ sql_print_error(
+ "InnoDB: rename index to add: %lu\n",
+ (ulong) error);
+ DBUG_ASSERT(0);
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "rename index to add");
+ DBUG_RETURN(true);
}
+ }
+
+ /* Drop any indexes that were requested to be dropped.
+ Rename them to TEMP_INDEX_PREFIX in the data
+ dictionary first. We do not bother to rename
+ index->name in the dictionary cache, because the index
+ is about to be freed after row_merge_drop_indexes_dict(). */
- /* Make sure the tables are moved to non-lru side of
- dictionary list */
- error = dict_load_foreigns(prebuilt->table->name, FALSE, TRUE);
+ for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
+ dict_index_t* index = ctx->drop_index[i];
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->table == ctx->new_table);
+ DBUG_ASSERT(index->to_be_dropped);
+ error = row_merge_rename_index_to_drop(
+ trx, index->table->id, index->id);
if (error != DB_SUCCESS) {
- my_error(ER_CANNOT_ADD_FOREIGN, MYF(0));
+ sql_print_error(
+ "InnoDB: rename index to drop: %lu\n",
+ (ulong) error);
+ DBUG_ASSERT(0);
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "rename index to drop");
+ DBUG_RETURN(true);
+ }
+ }
-undo_add_fk:
- err = -1;
+ if (!(ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME)) {
+ DBUG_RETURN(false);
+ }
- if (new_clustered) {
- goto drop_new_clustered;
- } else if (ctx->num_to_add > 0) {
- ut_ad(trx != fk_trx);
+ DBUG_RETURN(innobase_rename_columns_try(ha_alter_info, ctx,
+ old_table, trx, table_name));
+}
- innobase_rollback_sec_index(
- prebuilt, table_share, trx);
- innobase_undo_add_fk(ctx, fk_table);
- trx_rollback_for_mysql(fk_trx);
+/** Commit the changes to the data dictionary cache
+after a successful commit_try_norebuild() call.
+@param ctx In-place ALTER TABLE context
+@param table the TABLE before the ALTER
+@param trx Data dictionary transaction object
+(will be started and committed)
+@return whether all replacements were found for dropped indexes */
+inline __attribute__((nonnull, warn_unused_result))
+bool
+commit_cache_norebuild(
+/*===================*/
+ ha_innobase_inplace_ctx*ctx,
+ const TABLE* table,
+ trx_t* trx)
+{
+ DBUG_ENTER("commit_cache_norebuild");
- goto trx_commit;
- } else {
- goto trx_rollback;
- }
- }
+ bool found = true;
+
+ DBUG_ASSERT(!ctx->need_rebuild());
+
+ for (ulint i = 0; i < ctx->num_to_add_index; i++) {
+ dict_index_t* index = ctx->add_index[i];
+ DBUG_ASSERT(dict_index_get_online_status(index)
+ == ONLINE_INDEX_COMPLETE);
+ DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
+ index->name++;
}
- if (new_clustered) {
- dberr_t error;
- char* tmp_name;
+ if (ctx->num_to_drop_index) {
+ /* Really drop the indexes that were dropped.
+ The transaction had to be committed first
+ (after renaming the indexes), so that in the
+ event of a crash, crash recovery will drop the
+ indexes, because it drops all indexes whose
+ names start with TEMP_INDEX_PREFIX. Once we
+ have started dropping an index tree, there is
+ no way to roll it back. */
- /* Clear the to_be_dropped flag in the data dictionary. */
- for (ulint i = 0; i < ctx->num_to_drop; i++) {
- dict_index_t* index = ctx->drop[i];
+ for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
+ dict_index_t* index = ctx->drop_index[i];
DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->table == ctx->new_table);
DBUG_ASSERT(index->to_be_dropped);
- index->to_be_dropped = 0;
+
+ /* Replace the indexes in foreign key
+ constraints if needed. */
+
+ if (!dict_foreign_replace_index(
+ index->table, ctx->col_names, index)) {
+ found = false;
+ }
+
+ /* Mark the index dropped
+ in the data dictionary cache. */
+ rw_lock_x_lock(dict_index_get_lock(index));
+ index->page = FIL_NULL;
+ rw_lock_x_unlock(dict_index_get_lock(index));
}
- /* We copied the table. Any indexes that were
- requested to be dropped were not created in the copy
- of the table. Apply any last bit of the rebuild log
- and then rename the tables. */
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+ row_merge_drop_indexes_dict(trx, ctx->new_table->id);
- if (ctx->online) {
- DEBUG_SYNC_C("row_log_table_apply2_before");
- error = row_log_table_apply(
- ctx->thr, prebuilt->table, altered_table);
+ for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
+ dict_index_t* index = ctx->drop_index[i];
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->table == ctx->new_table);
- switch (error) {
- KEY* dup_key;
- case DB_SUCCESS:
- break;
- case DB_DUPLICATE_KEY:
- if (prebuilt->trx->error_key_num
- == ULINT_UNDEFINED) {
- /* This should be the hidden index on
- FTS_DOC_ID. */
- dup_key = NULL;
- } else {
- DBUG_ASSERT(
- prebuilt->trx->error_key_num
- < ha_alter_info->key_count);
- dup_key = &ha_alter_info
- ->key_info_buffer[
- prebuilt->trx
- ->error_key_num];
- }
- print_keydup_error(altered_table, dup_key, MYF(0));
- break;
- case DB_ONLINE_LOG_TOO_BIG:
- my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
- ha_alter_info->key_info_buffer[0]
- .name);
- break;
- case DB_INDEX_CORRUPT:
- my_error(ER_INDEX_CORRUPT, MYF(0),
- (prebuilt->trx->error_key_num
- == ULINT_UNDEFINED)
- ? FTS_DOC_ID_INDEX_NAME
- : ha_alter_info->key_info_buffer[
- prebuilt->trx->error_key_num]
- .name);
- break;
- default:
- my_error_innodb(error,
- table_share->table_name.str,
- prebuilt->table->flags);
+ if (index->type & DICT_FTS) {
+ DBUG_ASSERT(index->type == DICT_FTS
+ || (index->type
+ & DICT_CORRUPT));
+ DBUG_ASSERT(index->table->fts);
+ fts_drop_index(index->table, index, trx);
}
- if (error != DB_SUCCESS) {
- err = -1;
- goto drop_new_clustered;
- }
+ dict_index_remove_from_cache(index->table, index);
}
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME)
- && innobase_rename_columns(ha_alter_info, true, table,
- table_share, prebuilt, trx)) {
- err = -1;
- goto drop_new_clustered;
- }
-
- /* A new clustered index was defined for the table
- and there was no error at this point. We can
- now rename the old table as a temporary table,
- rename the new temporary table as the old
- table and drop the old table. */
- tmp_name = dict_mem_create_temporary_tablename(
- ctx->heap, ctx->indexed_table->name,
- ctx->indexed_table->id);
-
- /* Rename table will reload and refresh the in-memory
- foreign key constraint metadata. This is a rename operation
- in preparing for dropping the old table. Set the table
- to_be_dropped bit here, so to make sure DML foreign key
- constraint check does not use the stale dict_foreign_t.
- This is done because WL#6049 (FK MDL) has not been
- implemented yet */
- prebuilt->table->to_be_dropped = true;
-
- DBUG_EXECUTE_IF("ib_ddl_crash_before_rename",
- DBUG_SUICIDE(););
+ trx_commit_for_mysql(trx);
+ }
- /* The new table must inherit the flag from the
- "parent" table. */
- if (dict_table_is_discarded(prebuilt->table)) {
- ctx->indexed_table->ibd_file_missing = true;
- ctx->indexed_table->flags2 |= DICT_TF2_DISCARDED;
+ DBUG_RETURN(found);
+}
+
+/** Adjust the persistent statistics after non-rebuilding ALTER TABLE.
+Remove statistics for dropped indexes, add statistics for created indexes
+and rename statistics for renamed indexes.
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param table_name Table name in MySQL
+@param thd MySQL connection
+*/
+static
+void
+alter_stats_norebuild(
+/*==================*/
+ Alter_inplace_info* ha_alter_info,
+ ha_innobase_inplace_ctx* ctx,
+ TABLE* altered_table,
+ const char* table_name,
+ THD* thd)
+{
+ ulint i;
+
+ DBUG_ENTER("alter_stats_norebuild");
+ DBUG_ASSERT(!ctx->need_rebuild());
+
+ if (!dict_stats_is_persistent_enabled(ctx->new_table)) {
+ DBUG_VOID_RETURN;
+ }
+
+ /* TODO: This will not drop the (unused) statistics for
+ FTS_DOC_ID_INDEX if it was a hidden index, dropped together
+ with the last renamining FULLTEXT index. */
+ for (i = 0; i < ha_alter_info->index_drop_count; i++) {
+ const KEY* key = ha_alter_info->index_drop_buffer[i];
+
+ if (key->flags & HA_FULLTEXT) {
+ /* There are no index cardinality
+ statistics for FULLTEXT indexes. */
+ continue;
}
- error = row_merge_rename_tables(
- prebuilt->table, ctx->indexed_table,
- tmp_name, trx);
+ char errstr[1024];
- DBUG_EXECUTE_IF("ib_ddl_crash_after_rename",
- DBUG_SUICIDE(););
+ if (dict_stats_drop_index(
+ ctx->new_table->name, key->name,
+ errstr, sizeof errstr) != DB_SUCCESS) {
+ push_warning(thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_LOCK_WAIT_TIMEOUT, errstr);
+ }
+ }
- /* n_ref_count must be 1, because purge cannot
- be executing on this very table as we are
- holding dict_operation_lock X-latch. */
- ut_a(prebuilt->table->n_ref_count == 1);
+ for (i = 0; i < ctx->num_to_add_index; i++) {
+ dict_index_t* index = ctx->add_index[i];
+ DBUG_ASSERT(index->table == ctx->new_table);
- switch (error) {
- dict_table_t* old_table;
- case DB_SUCCESS:
- old_table = prebuilt->table;
+ if (!(index->type & DICT_FTS)) {
+ dict_stats_init(ctx->new_table);
+ dict_stats_update_for_index(index);
+ }
+ }
- DBUG_EXECUTE_IF("ib_ddl_crash_before_commit",
- DBUG_SUICIDE(););
+ DBUG_VOID_RETURN;
+}
- trx_commit_for_mysql(prebuilt->trx);
+/** Adjust the persistent statistics after rebuilding ALTER TABLE.
+Remove statistics for dropped indexes, add statistics for created indexes
+and rename statistics for renamed indexes.
+@param table InnoDB table that was rebuilt by ALTER TABLE
+@param table_name Table name in MySQL
+@param thd MySQL connection
+*/
+static
+void
+alter_stats_rebuild(
+/*================*/
+ dict_table_t* table,
+ const char* table_name,
+ THD* thd)
+{
+ DBUG_ENTER("alter_stats_rebuild");
- DBUG_EXECUTE_IF("ib_ddl_crash_after_commit",
- DBUG_SUICIDE(););
+ if (dict_table_is_discarded(table)
+ || !dict_stats_is_persistent_enabled(table)) {
+ DBUG_VOID_RETURN;
+ }
- if (fk_trx) {
- ut_ad(fk_trx != trx);
- trx_commit_for_mysql(fk_trx);
- }
+ dberr_t ret;
- row_prebuilt_free(prebuilt, TRUE);
- error = row_merge_drop_table(trx, old_table);
- prebuilt = row_create_prebuilt(
- ctx->indexed_table, table->s->reclength);
- err = 0;
- break;
- case DB_TABLESPACE_EXISTS:
- ut_a(ctx->indexed_table->n_ref_count == 1);
- my_error(ER_TABLESPACE_EXISTS, MYF(0), tmp_name);
- err = HA_ERR_TABLESPACE_EXISTS;
- goto drop_new_clustered;
- case DB_DUPLICATE_KEY:
- ut_a(ctx->indexed_table->n_ref_count == 1);
- my_error(ER_TABLE_EXISTS_ERROR, MYF(0), tmp_name);
- err = HA_ERR_TABLE_EXIST;
- goto drop_new_clustered;
- default:
- my_error_innodb(error,
- table_share->table_name.str,
- prebuilt->table->flags);
- err = -1;
-
-drop_new_clustered:
- /* Reset the to_be_dropped bit for the old table,
- since we are aborting the operation and dropping
- the new table due to some error conditions */
- prebuilt->table->to_be_dropped = false;
-
- /* Need to drop the added foreign key first */
- if (fk_trx) {
- ut_ad(fk_trx != trx);
- innobase_undo_add_fk(ctx, fk_table);
- trx_rollback_for_mysql(fk_trx);
- }
+ ret = dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
- dict_table_close(ctx->indexed_table, TRUE, FALSE);
+ if (ret != DB_SUCCESS) {
+ push_warning_printf(
+ thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_ALTER_INFO,
+ "Error updating stats for table '%s' "
+ "after table rebuild: %s",
+ table_name, ut_strerr(ret));
+ }
-#ifdef UNIV_DDL_DEBUG
- /* Nobody should have initialized the stats of the
- newly created table yet. When this is the case, we
- know that it has not been added for background stats
- gathering. */
- ut_a(!ctx->indexed_table->stat_initialized);
-#endif /* UNIV_DDL_DEBUG */
-
- row_merge_drop_table(trx, ctx->indexed_table);
- ctx->indexed_table = NULL;
- goto trx_commit;
- }
- } else if (ctx) {
- dberr_t error;
-
- /* We altered the table in place. */
- /* Lose the TEMP_INDEX_PREFIX. */
- for (ulint i = 0; i < ctx->num_to_add; i++) {
- dict_index_t* index = ctx->add[i];
- DBUG_ASSERT(dict_index_get_online_status(index)
- == ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name
- == TEMP_INDEX_PREFIX);
- index->name++;
- error = row_merge_rename_index_to_add(
- trx, prebuilt->table->id,
- index->id);
- if (error != DB_SUCCESS) {
- sql_print_error(
- "InnoDB: rename index to add: %lu\n",
- (ulong) error);
- DBUG_ASSERT(0);
- }
+ DBUG_VOID_RETURN;
+}
+
+#ifndef DBUG_OFF
+# define DBUG_INJECT_CRASH(prefix, count) \
+do { \
+ char buf[32]; \
+ ut_snprintf(buf, sizeof buf, prefix "_%u", count); \
+ DBUG_EXECUTE_IF(buf, DBUG_SUICIDE();); \
+} while (0)
+#else
+# define DBUG_INJECT_CRASH(prefix, count)
+#endif
+
+/** Commit or rollback the changes made during
+prepare_inplace_alter_table() and inplace_alter_table() inside
+the storage engine. Note that the allowed level of concurrency
+during this operation will be the same as for
+inplace_alter_table() and thus might be higher than during
+prepare_inplace_alter_table(). (E.g concurrent writes were
+blocked during prepare, but might not be during commit).
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@param commit true => Commit, false => Rollback.
+@retval true Failure
+@retval false Success
+*/
+UNIV_INTERN
+bool
+ha_innobase::commit_inplace_alter_table(
+/*====================================*/
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info,
+ bool commit)
+{
+ ha_innobase_inplace_ctx* ctx0
+ = static_cast<ha_innobase_inplace_ctx*>
+ (ha_alter_info->handler_ctx);
+#ifndef DBUG_OFF
+ uint crash_inject_count = 1;
+ uint crash_fail_inject_count = 1;
+ uint failure_inject_count = 1;
+#endif
+
+ DBUG_ENTER("commit_inplace_alter_table");
+ DBUG_ASSERT(!srv_read_only_mode);
+ DBUG_ASSERT(!ctx0 || ctx0->prebuilt == prebuilt);
+ DBUG_ASSERT(!ctx0 || ctx0->old_table == prebuilt->table);
+
+ DEBUG_SYNC_C("innodb_commit_inplace_alter_table_enter");
+
+ DEBUG_SYNC_C("innodb_commit_inplace_alter_table_wait");
+
+ if (!commit) {
+ /* A rollback is being requested. So far we may at
+ most have created some indexes. If any indexes were to
+ be dropped, they would actually be dropped in this
+ method if commit=true. */
+ DBUG_RETURN(rollback_inplace_alter_table(
+ ha_alter_info, table, prebuilt));
+ }
+
+ if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
+ DBUG_ASSERT(!ctx0);
+ MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
+ DBUG_RETURN(false);
+ }
+
+ DBUG_ASSERT(ctx0);
+
+ inplace_alter_handler_ctx** ctx_array;
+ inplace_alter_handler_ctx* ctx_single[2];
+
+ ctx_single[0] = ctx0;
+ ctx_single[1] = NULL;
+ ctx_array = ctx_single;
+
+ DBUG_ASSERT(ctx0 == ctx_array[0]);
+ ut_ad(prebuilt->table == ctx0->old_table);
+
+ /* Free the ctx->trx of other partitions, if any. We will only
+ use the ctx0->trx here. Others may have been allocated in
+ the prepare stage. */
+
+ for (inplace_alter_handler_ctx** pctx = &ctx_array[1]; *pctx;
+ pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>(*pctx);
+
+ if (ctx->trx) {
+ trx_free_for_mysql(ctx->trx);
+ ctx->trx = NULL;
}
+ }
- /* Drop any indexes that were requested to be dropped.
- Rename them to TEMP_INDEX_PREFIX in the data
- dictionary first. We do not bother to rename
- index->name in the dictionary cache, because the index
- is about to be freed after row_merge_drop_indexes_dict(). */
+ trx_start_if_not_started_xa(prebuilt->trx);
- for (ulint i = 0; i < ctx->num_to_drop; i++) {
- dict_index_t* index = ctx->drop[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(index->table == prebuilt->table);
- DBUG_ASSERT(index->to_be_dropped);
+ for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>(*pctx);
+ DBUG_ASSERT(ctx->prebuilt->trx == prebuilt->trx);
- error = row_merge_rename_index_to_drop(
- trx, index->table->id, index->id);
- if (error != DB_SUCCESS) {
- sql_print_error(
- "InnoDB: rename index to drop: %lu\n",
- (ulong) error);
- DBUG_ASSERT(0);
- }
+ /* Exclusively lock the table, to ensure that no other
+ transaction is holding locks on the table while we
+ change the table definition. The MySQL meta-data lock
+ should normally guarantee that no conflicting locks
+ exist. However, FOREIGN KEY constraints checks and any
+ transactions collected during crash recovery could be
+ holding InnoDB locks only, not MySQL locks. */
+
+ dberr_t error = row_merge_lock_table(
+ prebuilt->trx, ctx->old_table, LOCK_X);
+
+ if (error != DB_SUCCESS) {
+ my_error_innodb(
+ error, table_share->table_name.str, 0);
+ DBUG_RETURN(true);
}
}
- if (err == 0
- && (ha_alter_info->handler_flags
- & Alter_inplace_info::DROP_FOREIGN_KEY)) {
- DBUG_ASSERT(ctx->num_to_drop_fk > 0);
- DBUG_ASSERT(ctx->num_to_drop_fk
- == ha_alter_info->alter_info->drop_list.elements);
- for (ulint i = 0; i < ctx->num_to_drop_fk; i++) {
- DBUG_ASSERT(prebuilt->table
- == ctx->drop_fk[i]->foreign_table);
+ DEBUG_SYNC(user_thd, "innodb_alter_commit_after_lock_table");
+
+ const bool new_clustered = ctx0->need_rebuild();
+ trx_t* trx = ctx0->trx;
+ bool fail = false;
+
+ if (new_clustered) {
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>(*pctx);
+ DBUG_ASSERT(ctx->need_rebuild());
+
+ if (ctx->old_table->fts) {
+ ut_ad(!ctx->old_table->fts->add_wq);
+ fts_optimize_remove_table(
+ ctx->old_table);
+ }
- if (innobase_drop_foreign(
- table_share, trx, ctx->drop_fk[i])) {
- err = -1;
+ if (ctx->new_table->fts) {
+ ut_ad(!ctx->new_table->fts->add_wq);
+ fts_optimize_remove_table(
+ ctx->new_table);
}
}
}
- if (err == 0 && !new_clustered
- && (ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME)
- && innobase_rename_columns(ha_alter_info, false, table,
- table_share, prebuilt, trx)) {
- err = -1;
+ if (!trx) {
+ DBUG_ASSERT(!new_clustered);
+ trx = innobase_trx_allocate(user_thd);
}
- if (err == 0) {
- if (fk_trx && fk_trx != trx) {
- /* This needs to be placed before "trx_commit" marker,
- since anyone called "goto trx_commit" has committed
- or rolled back fk_trx before jumping here */
- trx_commit_for_mysql(fk_trx);
- }
-trx_commit:
- trx_commit_for_mysql(trx);
- } else {
-trx_rollback:
- /* undo the addition of foreign key */
- if (fk_trx) {
- innobase_undo_add_fk(ctx, fk_table);
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+ /* Latch the InnoDB data dictionary exclusively so that no deadlocks
+ or lock waits can happen in it during the data dictionary operation. */
+ row_mysql_lock_data_dictionary(trx);
- if (fk_trx != trx) {
- trx_rollback_for_mysql(fk_trx);
- }
- }
+ /* Prevent the background statistics collection from accessing
+ the tables. */
+ for (;;) {
+ bool retry = false;
- trx_rollback_for_mysql(trx);
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>(*pctx);
- /* If there are newly added secondary indexes, above
- rollback will revert the rename operation and put the
- new indexes with the temp index prefix, we can drop
- them here */
- if (ctx && !new_clustered) {
- ulint i;
-
- /* Need to drop the in-memory dict_index_t first
- to avoid dict_table_check_for_dup_indexes()
- assertion in row_merge_drop_indexes() in the case
- of add and drop the same index */
- for (i = 0; i < ctx->num_to_add; i++) {
- dict_index_t* index = ctx->add[i];
- dict_index_remove_from_cache(
- prebuilt->table, index);
- }
+ DBUG_ASSERT(new_clustered == ctx->need_rebuild());
- if (ctx->num_to_add) {
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
- row_merge_drop_indexes(trx, prebuilt->table,
- FALSE);
- trx_commit_for_mysql(trx);
+ if (new_clustered
+ && !dict_stats_stop_bg(ctx->old_table)) {
+ retry = true;
}
- for (i = 0; i < ctx->num_to_drop; i++) {
- dict_index_t* index = ctx->drop[i];
- index->to_be_dropped = false;
+ if (!dict_stats_stop_bg(ctx->new_table)) {
+ retry = true;
}
}
+
+ if (!retry) {
+ break;
+ }
+
+ DICT_STATS_BG_YIELD(trx);
+ }
+
+ /* Apply the changes to the data dictionary tables, for all
+ partitions. */
+
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx && !fail; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>(*pctx);
+
+ DBUG_ASSERT(new_clustered == ctx->need_rebuild());
+
+ ctx->max_autoinc = commit_get_autoinc(
+ ha_alter_info, ctx, altered_table, table);
+
+ if (ctx->need_rebuild()) {
+ ctx->tmp_name = dict_mem_create_temporary_tablename(
+ ctx->heap, ctx->new_table->name,
+ ctx->new_table->id);
+
+ fail = commit_try_rebuild(
+ ha_alter_info, ctx, altered_table, table,
+ trx, table_share->table_name.str);
+ } else {
+ fail = commit_try_norebuild(
+ ha_alter_info, ctx, table, trx,
+ table_share->table_name.str);
+ }
+ DBUG_INJECT_CRASH("ib_commit_inplace_crash",
+ crash_inject_count++);
+#ifndef DBUG_OFF
+ {
+ /* Generate a dynamic dbug text. */
+ char buf[32];
+ ut_snprintf(buf, sizeof buf, "ib_commit_inplace_fail_%u",
+ failure_inject_count++);
+ DBUG_EXECUTE_IF(buf,
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "Injected error!");
+ fail = true;
+ );
+ }
+#endif
+ }
+
+ /* Commit or roll back the changes to the data dictionary. */
+
+ if (fail) {
+ trx_rollback_for_mysql(trx);
+ } else if (!new_clustered) {
+ trx_commit_for_mysql(trx);
+ } else {
+ mtr_t mtr;
+ mtr_start(&mtr);
+
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>(*pctx);
+
+ DBUG_ASSERT(ctx->need_rebuild());
+ /* Generate the redo log for the file
+ operations that will be performed in
+ commit_cache_rebuild(). */
+ fil_mtr_rename_log(ctx->old_table->space,
+ ctx->old_table->name,
+ ctx->new_table->space,
+ ctx->new_table->name,
+ ctx->tmp_name, &mtr);
+ DBUG_INJECT_CRASH("ib_commit_inplace_crash",
+ crash_inject_count++);
+ }
+
+ /* Test what happens on crash if the redo logs
+ are flushed to disk here. The log records
+ about the rename should not be committed, and
+ the data dictionary transaction should be
+ rolled back, restoring the old table. */
+ DBUG_EXECUTE_IF("innodb_alter_commit_crash_before_commit",
+ log_buffer_flush_to_disk();
+ DBUG_SUICIDE(););
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+ ut_ad(!trx->fts_trx);
+ ut_ad(trx->insert_undo || trx->update_undo);
+
+ /* The following call commits the
+ mini-transaction, making the data dictionary
+ transaction committed at mtr.end_lsn. The
+ transaction becomes 'durable' by the time when
+ log_buffer_flush_to_disk() returns. In the
+ logical sense the commit in the file-based
+ data structures happens here. */
+ trx_commit_low(trx, &mtr);
+
+ /* If server crashes here, the dictionary in
+ InnoDB and MySQL will differ. The .ibd files
+ and the .frm files must be swapped manually by
+ the administrator. No loss of data. */
+ DBUG_EXECUTE_IF("innodb_alter_commit_crash_after_commit",
+ log_buffer_flush_to_disk();
+ DBUG_SUICIDE(););
}
/* Flush the log to reduce probability that the .frm files and
@@ -5100,236 +5545,316 @@ trx_rollback:
log_buffer_flush_to_disk();
- if (new_clustered) {
- innobase_online_rebuild_log_free(prebuilt->table);
- }
+ /* At this point, the changes to the persistent storage have
+ been committed or rolled back. What remains to be done is to
+ update the in-memory structures, close some handles, release
+ temporary files, and (unless we rolled back) update persistent
+ statistics. */
+ dberr_t error = DB_SUCCESS;
- if (err == 0 && ctx) {
- /* The changes were successfully performed. */
- bool add_fts = false;
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>(*pctx);
- /* Rebuild the index translation table.
- This should only be needed when !new_clustered. */
- share->idx_trans_tbl.index_count = 0;
+ DBUG_ASSERT(ctx->need_rebuild() == new_clustered);
- /* Publish the created fulltext index, if any.
- Note that a fulltext index can be created without
- creating the clustered index, if there already exists
- a suitable FTS_DOC_ID column. If not, one will be
- created, implying new_clustered */
- for (ulint i = 0; i < ctx->num_to_add; i++) {
- dict_index_t* index = ctx->add[i];
+ if (new_clustered) {
+ innobase_online_rebuild_log_free(ctx->old_table);
+ }
- if (index->type & DICT_FTS) {
- DBUG_ASSERT(index->type == DICT_FTS);
- fts_add_index(index, prebuilt->table);
- add_fts = true;
+ if (fail) {
+ if (new_clustered) {
+ dict_table_close(ctx->new_table,
+ TRUE, FALSE);
+
+#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
+ /* Nobody should have initialized the
+ stats of the newly created table
+ yet. When this is the case, we know
+ that it has not been added for
+ background stats gathering. */
+ ut_a(!ctx->new_table->stat_initialized);
+#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
+
+ trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+ row_merge_drop_table(trx, ctx->new_table);
+ trx_commit_for_mysql(trx);
+ ctx->new_table = NULL;
+ } else {
+ /* We failed, but did not rebuild the table.
+ Roll back any ADD INDEX, or get rid of garbage
+ ADD INDEX that was left over from a previous
+ ALTER TABLE statement. */
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+ innobase_rollback_sec_index(
+ ctx->new_table, table, TRUE, trx);
+ trx_commit_for_mysql(trx);
}
- }
+ DBUG_INJECT_CRASH("ib_commit_inplace_crash_fail",
+ crash_fail_inject_count++);
- if (!new_clustered && ha_alter_info->index_drop_count) {
+ continue;
+ }
- /* Really drop the indexes that were dropped.
- The transaction had to be committed first
- (after renaming the indexes), so that in the
- event of a crash, crash recovery will drop the
- indexes, because it drops all indexes whose
- names start with TEMP_INDEX_PREFIX. Once we
- have started dropping an index tree, there is
- no way to roll it back. */
+ innobase_copy_frm_flags_from_table_share(
+ ctx->new_table, altered_table->s);
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+ if (new_clustered) {
+ /* We will reload and refresh the
+ in-memory foreign key constraint
+ metadata. This is a rename operation
+ in preparing for dropping the old
+ table. Set the table to_be_dropped bit
+ here, so to make sure DML foreign key
+ constraint check does not use the
+ stale dict_foreign_t. This is done
+ because WL#6049 (FK MDL) has not been
+ implemented yet. */
+ ctx->old_table->to_be_dropped = true;
- for (ulint i = 0; i < ctx->num_to_drop; i++) {
- dict_index_t* index = ctx->drop[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(index->table == prebuilt->table);
- DBUG_ASSERT(index->to_be_dropped);
+ /* Rename the tablespace files. */
+ commit_cache_rebuild(ctx);
- /* Replace the indexes in foreign key
- constraints if needed. */
+ error = innobase_update_foreign_cache(ctx);
+ if (error != DB_SUCCESS) {
+ goto foreign_fail;
+ }
+ } else {
+ error = innobase_update_foreign_cache(ctx);
- dict_foreign_replace_index(
- prebuilt->table, index, prebuilt->trx);
+ if (error != DB_SUCCESS) {
+foreign_fail:
+ /* The data dictionary cache
+ should be corrupted now. The
+ best solution should be to
+ kill and restart the server,
+ but the *.frm file has not
+ been replaced yet. */
+ my_error(ER_CANNOT_ADD_FOREIGN,
+ MYF(0));
+ sql_print_error(
+ "InnoDB: dict_load_foreigns()"
+ " returned %u for %s",
+ (unsigned) error,
+ thd_query_string(user_thd)
+ ->str);
+ ut_ad(0);
+ } else {
+ if (!commit_cache_norebuild(
+ ctx, table, trx)) {
+ ut_a(!prebuilt->trx->check_foreigns);
+ }
- /* Mark the index dropped
- in the data dictionary cache. */
- rw_lock_x_lock(dict_index_get_lock(index));
- index->page = FIL_NULL;
- rw_lock_x_unlock(dict_index_get_lock(index));
+ innobase_rename_columns_cache(
+ ha_alter_info, table,
+ ctx->new_table);
}
+ }
+ DBUG_INJECT_CRASH("ib_commit_inplace_crash",
+ crash_inject_count++);
+ }
- row_merge_drop_indexes_dict(trx, prebuilt->table->id);
+ /* Invalidate the index translation table. In partitioned
+ tables, there is one TABLE_SHARE (and also only one TABLE)
+ covering all partitions. */
+ share->idx_trans_tbl.index_count = 0;
- for (ulint i = 0; i < ctx->num_to_drop; i++) {
- dict_index_t* index = ctx->drop[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(index->table == prebuilt->table);
-
- if (index->type & DICT_FTS) {
- DBUG_ASSERT(index->type == DICT_FTS
- || (index->type
- & DICT_CORRUPT));
- DBUG_ASSERT(prebuilt->table->fts);
- fts_drop_index(
- prebuilt->table, index, trx);
- }
+ if (trx == ctx0->trx) {
+ ctx0->trx = NULL;
+ }
- dict_index_remove_from_cache(
- prebuilt->table, index);
- }
+ /* Tell the InnoDB server that there might be work for
+ utility threads: */
- trx_commit_for_mysql(trx);
- }
+ srv_active_wake_master_thread();
- ut_d(dict_table_check_for_dup_indexes(
- prebuilt->table, CHECK_ALL_COMPLETE));
- DBUG_ASSERT(new_clustered == !prebuilt->trx);
+ if (fail) {
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>
+ (*pctx);
+ DBUG_ASSERT(ctx->need_rebuild() == new_clustered);
- if (add_fts) {
- fts_optimize_add_table(prebuilt->table);
+ ut_d(dict_table_check_for_dup_indexes(
+ ctx->old_table,
+ CHECK_ABORTED_OK));
+ ut_a(fts_check_cached_index(ctx->old_table));
+ DBUG_INJECT_CRASH("ib_commit_inplace_crash_fail",
+ crash_fail_inject_count++);
}
+
+ row_mysql_unlock_data_dictionary(trx);
+ trx_free_for_mysql(trx);
+ DBUG_RETURN(true);
}
- if (!prebuilt->trx) {
- /* We created a new clustered index and committed the
- user transaction already, so that we were able to
- drop the old table. */
- update_thd();
- prebuilt->trx->will_lock++;
+ /* Release the table locks. */
+ trx_commit_for_mysql(prebuilt->trx);
- DBUG_EXECUTE_IF("ib_ddl_crash_after_user_trx_commit",
- DBUG_SUICIDE(););
+ DBUG_EXECUTE_IF("ib_ddl_crash_after_user_trx_commit", DBUG_SUICIDE(););
- trx_start_if_not_started_xa(prebuilt->trx);
- }
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>
+ (*pctx);
+ DBUG_ASSERT(ctx->need_rebuild() == new_clustered);
- ut_d(dict_table_check_for_dup_indexes(
- prebuilt->table, CHECK_ABORTED_OK));
- ut_a(fts_check_cached_index(prebuilt->table));
- row_mysql_unlock_data_dictionary(trx);
- if (fk_trx && fk_trx != trx) {
- fk_trx->dict_operation_lock_mode = 0;
- trx_free_for_mysql(fk_trx);
- }
- trx_free_for_mysql(trx);
+ if (altered_table->found_next_number_field) {
+ dict_table_t* t = ctx->new_table;
- if (ctx && trx == ctx->trx) {
- ctx->trx = NULL;
- }
+ dict_table_autoinc_lock(t);
+ dict_table_autoinc_initialize(t, ctx->max_autoinc);
+ dict_table_autoinc_unlock(t);
+ }
- if (err == 0) {
- /* Delete corresponding rows from the stats table. We update
- the statistics in a separate transaction from trx, because
- lock waits are not allowed in a data dictionary transaction.
- (Lock waits are possible on the statistics table, because it
- is directly accessible by users, not covered by the
- dict_operation_lock.)
+ bool add_fts = false;
- Because the data dictionary changes were already committed,
- orphaned rows may be left in the statistics table if the
- system crashes. */
+ /* Publish the created fulltext index, if any.
+ Note that a fulltext index can be created without
+ creating the clustered index, if there already exists
+ a suitable FTS_DOC_ID column. If not, one will be
+ created, implying new_clustered */
+ for (ulint i = 0; i < ctx->num_to_add_index; i++) {
+ dict_index_t* index = ctx->add_index[i];
- for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
- const KEY* key
- = ha_alter_info->index_drop_buffer[i];
- dberr_t ret;
- char errstr[1024];
-
- ret = dict_stats_drop_index(
- prebuilt->table->name, key->name,
- errstr, sizeof(errstr));
-
- if (ret != DB_SUCCESS) {
- push_warning(user_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_LOCK_WAIT_TIMEOUT,
- errstr);
+ if (index->type & DICT_FTS) {
+ DBUG_ASSERT(index->type == DICT_FTS);
+ fts_add_index(index, ctx->new_table);
+ add_fts = true;
}
}
- if (ctx && !dict_table_is_discarded(prebuilt->table)) {
- bool stats_init_called = false;
+ ut_d(dict_table_check_for_dup_indexes(
+ ctx->new_table, CHECK_ALL_COMPLETE));
- for (uint i = 0; i < ctx->num_to_add; i++) {
- dict_index_t* index = ctx->add[i];
+ if (add_fts) {
+ fts_optimize_add_table(ctx->new_table);
+ }
- if (!(index->type & DICT_FTS)) {
+ ut_d(dict_table_check_for_dup_indexes(
+ ctx->new_table, CHECK_ABORTED_OK));
+ ut_a(fts_check_cached_index(ctx->new_table));
- if (!stats_init_called) {
- innobase_copy_frm_flags_from_table_share(
- index->table,
- altered_table->s);
+ if (new_clustered) {
+ /* Since the table has been rebuilt, we remove
+ all persistent statistics corresponding to the
+ old copy of the table (which was renamed to
+ ctx->tmp_name). */
- dict_stats_init(index->table);
+ char errstr[1024];
- stats_init_called = true;
- }
+ DBUG_ASSERT(0 == strcmp(ctx->old_table->name,
+ ctx->tmp_name));
- dict_stats_update_for_index(index);
- }
+ if (dict_stats_drop_table(
+ ctx->new_table->name,
+ errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ push_warning_printf(
+ user_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_ALTER_INFO,
+ "Deleting persistent statistics"
+ " for rebuilt table '%s' in"
+ " InnoDB failed: %s",
+ table->s->table_name.str,
+ errstr);
}
- }
- }
- trx_commit_for_mysql(prebuilt->trx);
+ DBUG_EXECUTE_IF("ib_ddl_crash_before_commit",
+ DBUG_SUICIDE(););
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
+ trx_t* const user_trx = prebuilt->trx;
- log_buffer_flush_to_disk();
+ row_prebuilt_free(ctx->prebuilt, TRUE);
- /* Tell the InnoDB server that there might be work for
- utility threads: */
+ /* Drop the copy of the old table, which was
+ renamed to ctx->tmp_name at the atomic DDL
+ transaction commit. If the system crashes
+ before this is completed, some orphan tables
+ with ctx->tmp_name may be recovered. */
+ trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+ row_merge_drop_table(trx, ctx->old_table);
+ trx_commit_for_mysql(trx);
- srv_active_wake_master_thread();
+ /* Rebuild the prebuilt object. */
+ ctx->prebuilt = row_create_prebuilt(
+ ctx->new_table, altered_table->s->reclength);
+ trx_start_if_not_started(user_trx);
+ user_trx->will_lock++;
+ prebuilt->trx = user_trx;
+ }
+ DBUG_INJECT_CRASH("ib_commit_inplace_crash",
+ crash_inject_count++);
+ }
-func_exit:
+ row_mysql_unlock_data_dictionary(trx);
+ trx_free_for_mysql(trx);
+
+ /* TODO: The following code could be executed
+ while allowing concurrent access to the table
+ (MDL downgrade). */
- if (err == 0 && altered_table->found_next_number_field != 0) {
- dict_table_autoinc_lock(prebuilt->table);
- dict_table_autoinc_initialize(prebuilt->table, max_autoinc);
- dict_table_autoinc_unlock(prebuilt->table);
+ if (new_clustered) {
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>
+ (*pctx);
+ DBUG_ASSERT(ctx->need_rebuild());
+
+ alter_stats_rebuild(
+ ctx->new_table, table->s->table_name.str,
+ user_thd);
+ DBUG_INJECT_CRASH("ib_commit_inplace_crash",
+ crash_inject_count++);
+ }
+ } else {
+ for (inplace_alter_handler_ctx** pctx = ctx_array;
+ *pctx; pctx++) {
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>
+ (*pctx);
+ DBUG_ASSERT(!ctx->need_rebuild());
+
+ alter_stats_norebuild(
+ ha_alter_info, ctx, altered_table,
+ table->s->table_name.str, user_thd);
+ DBUG_INJECT_CRASH("ib_commit_inplace_crash",
+ crash_inject_count++);
+ }
}
+ /* TODO: Also perform DROP TABLE and DROP INDEX after
+ the MDL downgrade. */
+
#ifndef DBUG_OFF
dict_index_t* clust_index = dict_table_get_first_index(
prebuilt->table);
DBUG_ASSERT(!clust_index->online_log);
DBUG_ASSERT(dict_index_get_online_status(clust_index)
== ONLINE_INDEX_COMPLETE);
-#endif /* !DBUG_OFF */
-#ifdef UNIV_DEBUG
for (dict_index_t* index = dict_table_get_first_index(
prebuilt->table);
index;
index = dict_table_get_next_index(index)) {
- ut_ad(!index->to_be_dropped);
- }
-#endif /* UNIV_DEBUG */
-
- if (err == 0) {
- MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
-
-#ifdef UNIV_DDL_DEBUG
- /* Invoke CHECK TABLE atomically after a successful
- ALTER TABLE. */
- TABLE* old_table = table;
- table = altered_table;
- ut_a(check(user_thd, 0) == HA_ADMIN_OK);
- table = old_table;
-#endif /* UNIV_DDL_DEBUG */
+ DBUG_ASSERT(!index->to_be_dropped);
}
+#endif /* DBUG_OFF */
- DBUG_RETURN(err != 0);
+ MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
+ DBUG_RETURN(false);
}
/**
@param thd - the session
@param start_value - the lower bound
@param max_value - the upper bound (inclusive) */
+UNIV_INTERN
ib_sequence_t::ib_sequence_t(
THD* thd,
ulonglong start_value,
@@ -5366,6 +5891,7 @@ ib_sequence_t::ib_sequence_t(
/**
Postfix increment
@return the next value to insert */
+UNIV_INTERN
ulonglong
ib_sequence_t::operator++(int) UNIV_NOTHROW
{
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 9a95e620af7..cafa745129b 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,7 +24,7 @@ Created July 18, 2007 Vasil Dimov
*******************************************************/
#include <mysqld_error.h>
-#include <sql_acl.h> // PROCESS_ACL
+#include <sql_acl.h>
#include <m_ctype.h>
#include <hash.h>
@@ -35,18 +35,19 @@ Created July 18, 2007 Vasil Dimov
#include <sql_plugin.h>
#include <innodb_priv.h>
-#include "btr0pcur.h" /* for file sys_tables related info. */
+#include "btr0pcur.h"
#include "btr0types.h"
-#include "buf0buddy.h" /* for i_s_cmpmem */
-#include "buf0buf.h" /* for buf_pool */
-#include "dict0dict.h" /* for dict_table_stats_lock() */
-#include "dict0load.h" /* for file sys_tables related info. */
+#include "dict0dict.h"
+#include "dict0load.h"
+#include "buf0buddy.h"
+#include "buf0buf.h"
+#include "ibuf0ibuf.h"
#include "dict0mem.h"
#include "dict0types.h"
-#include "ha_prototypes.h" /* for innobase_convert_name() */
-#include "srv0start.h" /* for srv_was_started */
+#include "ha_prototypes.h"
+#include "srv0start.h"
#include "trx0i_s.h"
-#include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
+#include "trx0trx.h"
#include "srv0mon.h"
#include "fut0fut.h"
#include "pars0pars.h"
@@ -64,8 +65,12 @@ struct buf_page_desc_t{
ulint type_value; /*!< Page type or page state */
};
-/** Any states greater than FIL_PAGE_TYPE_LAST would be treated as unknown. */
-#define I_S_PAGE_TYPE_UNKNOWN (FIL_PAGE_TYPE_LAST + 1)
+/** Change buffer B-tree page */
+#define I_S_PAGE_TYPE_IBUF (FIL_PAGE_TYPE_LAST + 1)
+
+/** Any states greater than I_S_PAGE_TYPE_IBUF would be treated as
+unknown. */
+#define I_S_PAGE_TYPE_UNKNOWN (I_S_PAGE_TYPE_IBUF + 1)
/** We also define I_S_PAGE_TYPE_INDEX as the Index Page's position
in i_s_page_type[] array */
@@ -86,6 +91,7 @@ static buf_page_desc_t i_s_page_type[] = {
{"BLOB", FIL_PAGE_TYPE_BLOB},
{"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB},
{"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2},
+ {"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
{"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN}
};
@@ -2900,8 +2906,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_default_stopword =
};
/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
-INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED and
-INFORMATION_SCHEMA.INNODB_FT_INSERTED */
+INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED */
static ST_FIELD_INFO i_s_fts_doc_fields_info[] =
{
#define I_S_FTS_DOC_ID 0
@@ -3151,139 +3156,6 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_being_deleted =
STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
};
-/*******************************************************************//**
-Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INSERTED.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_inserted_fill(
-/*==================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- Field** fields;
- TABLE* table = (TABLE*) tables->table;
- trx_t* trx;
- fts_table_t fts_table;
- fts_doc_ids_t* inserted;
- dict_table_t* user_table;
-
- DBUG_ENTER("i_s_fts_inserted_fill");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL)) {
- DBUG_RETURN(0);
- }
-
- if (!fts_internal_tbl_name) {
- DBUG_RETURN(0);
- }
-
- user_table = dict_table_open_on_name(
- fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
-
- if (!user_table) {
- DBUG_RETURN(0);
- }
-
- inserted = fts_doc_ids_create();
-
- trx = trx_allocate_for_background();
- trx->op_info = "Select for FTS ADDED Table";
-
- FTS_INIT_FTS_TABLE(&fts_table, "ADDED", FTS_COMMON_TABLE, user_table);
-
- fts_table_fetch_doc_ids(trx, &fts_table, inserted);
-
- fields = table->field;
-
- for (ulint j = 0; j < ib_vector_size(inserted->doc_ids); ++j) {
- doc_id_t doc_id;
-
- doc_id = *(doc_id_t*) ib_vector_get_const(inserted->doc_ids, j);
-
- OK(fields[I_S_FTS_DOC_ID]->store((longlong) doc_id, true));
-
- OK(schema_table_store_record(thd, table));
- }
-
- trx_free_for_background(trx);
-
- fts_doc_ids_free(inserted);
-
- dict_table_close(user_table, FALSE, FALSE);
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INSERTED
-@return 0 on success */
-static
-int
-i_s_fts_inserted_init(
-/*==================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_fts_inserted_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_fts_doc_fields_info;
- schema->fill_table = i_s_fts_inserted_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_inserted =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_FT_INSERTED"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "INNODB AUXILIARY FTS INSERTED TABLE"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_fts_inserted_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED and
INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE */
static ST_FIELD_INFO i_s_fts_index_fields_info[] =
@@ -3875,14 +3747,8 @@ static ST_FIELD_INFO i_s_fts_config_fields_info[] =
static const char* fts_config_key[] = {
FTS_OPTIMIZE_LIMIT_IN_SECS,
FTS_SYNCED_DOC_ID,
- FTS_LAST_OPTIMIZED_WORD,
- FTS_TOTAL_DELETED_COUNT,
- FTS_TOTAL_WORD_COUNT,
- FTS_OPTIMIZE_START_TIME,
- FTS_OPTIMIZE_END_TIME,
FTS_STOPWORD_TABLE_NAME,
FTS_USE_STOPWORD,
- FTS_TABLE_STATE,
NULL
};
@@ -4466,6 +4332,7 @@ i_s_innodb_buffer_stats_fill_table(
buf_pool_info_t* pool_info;
DBUG_ENTER("i_s_innodb_buffer_fill_general");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* Only allow the PROCESS privilege holder to access the stats */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -4879,7 +4746,7 @@ i_s_innodb_buffer_page_fill(
/* First three states are for compression pages and
are not states we would get as we scan pages through
buffer blocks */
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
state_str = NULL;
@@ -4951,14 +4818,21 @@ i_s_innodb_set_page_type(
if (page_type == FIL_PAGE_INDEX) {
const page_t* page = (const page_t*) frame;
+ page_info->index_id = btr_page_get_index_id(page);
+
/* FIL_PAGE_INDEX is a bit special, its value
is defined as 17855, so we cannot use FIL_PAGE_INDEX
to index into i_s_page_type[] array, its array index
in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX
- (1) */
- page_info->page_type = I_S_PAGE_TYPE_INDEX;
-
- page_info->index_id = btr_page_get_index_id(page);
+ (1) for index pages or I_S_PAGE_TYPE_IBUF for
+ change buffer index pages */
+ if (page_info->index_id
+ == static_cast<index_id_t>(DICT_IBUF_ID_MIN
+ + IBUF_SPACE_ID)) {
+ page_info->page_type = I_S_PAGE_TYPE_IBUF;
+ } else {
+ page_info->page_type = I_S_PAGE_TYPE_INDEX;
+ }
page_info->data_size = (ulint)(page_header_get_field(
page, PAGE_HEAP_TOP) - (page_is_comp(page)
@@ -4967,7 +4841,7 @@ i_s_innodb_set_page_type(
- page_header_get_field(page, PAGE_GARBAGE));
page_info->num_recs = page_get_n_recs(page);
- } else if (page_type >= I_S_PAGE_TYPE_UNKNOWN) {
+ } else if (page_type > FIL_PAGE_TYPE_LAST) {
/* Encountered an unknown page type */
page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
} else {
@@ -5039,6 +4913,16 @@ i_s_innodb_buffer_page_get_info(
page_info->freed_page_clock = bpage->freed_page_clock;
+ switch (buf_page_get_io_fix(bpage)) {
+ case BUF_IO_NONE:
+ case BUF_IO_WRITE:
+ case BUF_IO_PIN:
+ break;
+ case BUF_IO_READ:
+ page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
+ return;
+ }
+
if (page_info->page_state == BUF_BLOCK_FILE_PAGE) {
const buf_block_t*block;
@@ -5075,6 +4959,7 @@ i_s_innodb_fill_buffer_pool(
mem_heap_t* heap;
DBUG_ENTER("i_s_innodb_fill_buffer_pool");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
heap = mem_heap_create(10000);
@@ -5574,7 +5459,7 @@ i_s_innodb_buf_page_lru_fill(
state_str = "NO";
break;
/* We should not see following states */
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_MEMORY:
@@ -5640,6 +5525,7 @@ i_s_innodb_fill_buffer_lru(
ulint lru_len;
DBUG_ENTER("i_s_innodb_fill_buffer_lru");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* Obtain buf_pool mutex before allocate info_buffer, since
UT_LIST_GET_LEN(buf_pool->LRU) could change */
@@ -5966,6 +5852,7 @@ i_s_sys_tables_fill_table(
mtr_t mtr;
DBUG_ENTER("i_s_sys_tables_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -6262,6 +6149,7 @@ i_s_sys_tables_fill_table_stats(
mtr_t mtr;
DBUG_ENTER("i_s_sys_tables_fill_table_stats");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -6511,6 +6399,7 @@ i_s_sys_indexes_fill_table(
mtr_t mtr;
DBUG_ENTER("i_s_sys_indexes_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -6748,6 +6637,7 @@ i_s_sys_columns_fill_table(
mtr_t mtr;
DBUG_ENTER("i_s_sys_columns_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -6951,6 +6841,7 @@ i_s_sys_fields_fill_table(
mtr_t mtr;
DBUG_ENTER("i_s_sys_fields_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -7182,6 +7073,7 @@ i_s_sys_foreign_fill_table(
mtr_t mtr;
DBUG_ENTER("i_s_sys_foreign_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -7396,6 +7288,7 @@ i_s_sys_foreign_cols_fill_table(
mtr_t mtr;
DBUG_ENTER("i_s_sys_foreign_cols_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -7660,6 +7553,7 @@ i_s_sys_tablespaces_fill_table(
mtr_t mtr;
DBUG_ENTER("i_s_sys_tablespaces_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
@@ -7850,6 +7744,7 @@ i_s_sys_datafiles_fill_table(
mtr_t mtr;
DBUG_ENTER("i_s_sys_datafiles_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
diff --git a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
index 05f6fd8ecd2..a2b324cb314 100644
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,7 +39,6 @@ extern struct st_maria_plugin i_s_innodb_cmpmem;
extern struct st_maria_plugin i_s_innodb_cmpmem_reset;
extern struct st_maria_plugin i_s_innodb_metrics;
extern struct st_maria_plugin i_s_innodb_ft_default_stopword;
-extern struct st_maria_plugin i_s_innodb_ft_inserted;
extern struct st_maria_plugin i_s_innodb_ft_deleted;
extern struct st_maria_plugin i_s_innodb_ft_being_deleted;
extern struct st_maria_plugin i_s_innodb_ft_index_cache;
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index af0699f8c67..fd3b13d2cd3 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -61,6 +61,7 @@ UNIV_INTERN my_bool srv_ibuf_disable_background_merge;
#include "que0que.h"
#include "srv0start.h" /* srv_shutdown_state */
#include "ha_prototypes.h"
+#include "rem0cmp.h"
/* STRUCTURE OF AN INSERT BUFFER RECORD
@@ -416,7 +417,7 @@ ibuf_tree_root_get(
ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
- ut_ad(ibuf->empty == (page_get_n_recs(root) == 0));
+ ut_ad(ibuf->empty == page_is_empty(root));
return(root);
}
@@ -564,7 +565,7 @@ ibuf_init_at_db_start(void)
ibuf_size_update(root, &mtr);
mutex_exit(&ibuf_mutex);
- ibuf->empty = (page_get_n_recs(root) == 0);
+ ibuf->empty = page_is_empty(root);
ibuf_mtr_commit(&mtr);
heap = mem_heap_create(450);
@@ -2567,7 +2568,7 @@ ulint
ibuf_merge_pages(
/*=============*/
ulint* n_pages, /*!< out: number of pages to which merged */
- bool sync) /*!< in: TRUE if the caller wants to wait for
+ bool sync) /*!< in: true if the caller wants to wait for
the issued read with the highest tablespace
address to complete */
{
@@ -2589,7 +2590,7 @@ ibuf_merge_pages(
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
- if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
+ if (page_is_empty(btr_pcur_get_page(&pcur))) {
/* If a B-tree page is empty, it must be the root page
and the whole B-tree must be empty. InnoDB does not
allow empty B-tree pages other than the root. */
@@ -2633,7 +2634,8 @@ ibuf_get_table(
{
rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
- dict_table_t* table = dict_table_open_on_id(table_id, FALSE, FALSE);
+ dict_table_t* table = dict_table_open_on_id(
+ table_id, FALSE, DICT_TABLE_OP_NORMAL);
rw_lock_s_unlock_gen(&dict_operation_lock, 0);
@@ -2674,7 +2676,7 @@ ibuf_merge_space(
ulint spaces[IBUF_MAX_N_PAGES_MERGED];
ib_int64_t versions[IBUF_MAX_N_PAGES_MERGED];
- if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
+ if (page_is_empty(btr_pcur_get_page(&pcur))) {
/* If a B-tree page is empty, it must be the root page
and the whole B-tree must be empty. InnoDB does not
allow empty B-tree pages other than the root. */
@@ -2712,7 +2714,7 @@ ibuf_merge_space(
#endif /* UNIV_DEBUG */
buf_read_ibuf_merge_pages(
- TRUE, spaces, versions, pages, *n_pages);
+ true, spaces, versions, pages, *n_pages);
}
return(sum_sizes);
@@ -3697,7 +3699,7 @@ fail_exit:
ut_ad(page_get_page_no(root)
== FSP_IBUF_TREE_ROOT_PAGE_NO);
- ibuf->empty = (page_get_n_recs(root) == 0);
+ ibuf->empty = page_is_empty(root);
}
} else {
ut_ad(mode == BTR_MODIFY_TREE);
@@ -3726,7 +3728,7 @@ fail_exit:
mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf_size_update(root, &mtr);
mutex_exit(&ibuf_mutex);
- ibuf->empty = (page_get_n_recs(root) == 0);
+ ibuf->empty = page_is_empty(root);
block = btr_cur_get_block(cursor);
ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
@@ -3768,7 +3770,7 @@ func_exit:
#ifdef UNIV_IBUF_DEBUG
ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
#endif
- buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
+ buf_read_ibuf_merge_pages(false, space_ids, space_versions,
page_nos, n_stored);
}
@@ -3798,6 +3800,10 @@ ibuf_insert(
/* Read the settable global variable ibuf_use only once in
this function, so that we will have a consistent view of it. */
ibuf_use_t use = ibuf_use;
+ DBUG_ENTER("ibuf_insert");
+
+ DBUG_PRINT("ibuf", ("op: %d, space: %ld, page_no: %ld",
+ op, space, page_no));
ut_ad(dtuple_check_typed(entry));
ut_ad(ut_is_2pow(zip_size));
@@ -3812,7 +3818,7 @@ ibuf_insert(
case IBUF_USE_NONE:
case IBUF_USE_DELETE:
case IBUF_USE_DELETE_MARK:
- return(FALSE);
+ DBUG_RETURN(FALSE);
case IBUF_USE_INSERT:
case IBUF_USE_INSERT_DELETE_MARK:
case IBUF_USE_ALL:
@@ -3825,7 +3831,7 @@ ibuf_insert(
switch (use) {
case IBUF_USE_NONE:
case IBUF_USE_INSERT:
- return(FALSE);
+ DBUG_RETURN(FALSE);
case IBUF_USE_DELETE_MARK:
case IBUF_USE_DELETE:
case IBUF_USE_INSERT_DELETE_MARK:
@@ -3841,7 +3847,7 @@ ibuf_insert(
case IBUF_USE_NONE:
case IBUF_USE_INSERT:
case IBUF_USE_INSERT_DELETE_MARK:
- return(FALSE);
+ DBUG_RETURN(FALSE);
case IBUF_USE_DELETE_MARK:
case IBUF_USE_DELETE:
case IBUF_USE_ALL:
@@ -3883,7 +3889,7 @@ check_watch:
is being buffered, have this request executed
directly on the page in the buffer pool after the
buffered entries for this page have been merged. */
- return(FALSE);
+ DBUG_RETURN(FALSE);
}
}
@@ -3894,7 +3900,7 @@ skip_watch:
>= page_get_free_space_of_empty(dict_table_is_comp(index->table))
/ 2) {
- return(FALSE);
+ DBUG_RETURN(FALSE);
}
err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
@@ -3911,20 +3917,21 @@ skip_watch:
/* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
page_no, index->name); */
#endif
- return(TRUE);
+ DBUG_RETURN(TRUE);
} else {
ut_a(err == DB_STRONG_FAIL || err == DB_TOO_BIG_RECORD);
- return(FALSE);
+ DBUG_RETURN(FALSE);
}
}
/********************************************************************//**
During merge, inserts to an index page a secondary index entry extracted
-from the insert buffer. */
+from the insert buffer.
+@return newly inserted record */
static __attribute__((nonnull))
-void
+rec_t*
ibuf_insert_to_index_page_low(
/*==========================*/
const dtuple_t* entry, /*!< in: buffered entry to insert */
@@ -3943,22 +3950,31 @@ ibuf_insert_to_index_page_low(
ulint zip_size;
const page_t* bitmap_page;
ulint old_bits;
+ rec_t* rec;
+ DBUG_ENTER("ibuf_insert_to_index_page_low");
- if (page_cur_tuple_insert(
- page_cur, entry, index, offsets, &heap, 0, mtr) != NULL) {
- return;
+ rec = page_cur_tuple_insert(page_cur, entry, index,
+ offsets, &heap, 0, mtr);
+ if (rec != NULL) {
+ DBUG_RETURN(rec);
}
+ /* Page reorganization or recompression should already have
+ been attempted by page_cur_tuple_insert(). Besides, per
+ ibuf_index_page_calc_free_zip() the page should not have been
+ recompressed or reorganized. */
+ ut_ad(!buf_block_get_page_zip(block));
+
/* If the record did not fit, reorganize */
- btr_page_reorganize(block, index, mtr);
- page_cur_search(block, index, entry, PAGE_CUR_LE, page_cur);
+ btr_page_reorganize(page_cur, index, mtr);
/* This time the record must fit */
- if (page_cur_tuple_insert(page_cur, entry, index,
- offsets, &heap, 0, mtr) != NULL) {
- return;
+ rec = page_cur_tuple_insert(page_cur, entry, index,
+ offsets, &heap, 0, mtr);
+ if (rec != NULL) {
+ DBUG_RETURN(rec);
}
page = buf_block_get_frame(block);
@@ -3992,6 +4008,7 @@ ibuf_insert_to_index_page_low(
fputs("InnoDB: Submit a detailed bug report"
" to http://bugs.mysql.com\n", stderr);
ut_ad(0);
+ DBUG_RETURN(NULL);
}
/************************************************************************
@@ -4014,6 +4031,13 @@ ibuf_insert_to_index_page(
ulint* offsets;
mem_heap_t* heap;
+ DBUG_ENTER("ibuf_insert_to_index_page");
+
+ DBUG_PRINT("ibuf", ("page_no: %ld", buf_block_get_page_no(block)));
+ DBUG_PRINT("ibuf", ("index name: %s", index->name));
+ DBUG_PRINT("ibuf", ("online status: %d",
+ dict_index_get_online_status(index)));
+
ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
ut_ad(!buf_block_align(page)->index);
@@ -4057,7 +4081,7 @@ dump:
"InnoDB: Submit a detailed bug report to"
" http://bugs.mysql.com!\n", stderr);
- return;
+ DBUG_VOID_RETURN;
}
low_match = page_cur_search(block, index, entry,
@@ -4105,15 +4129,19 @@ dump:
if (!row_upd_changes_field_size_or_external(index, offsets,
update)
&& (!page_zip || btr_cur_update_alloc_zip(
- page_zip, block, index,
- rec_offs_size(offsets), FALSE, mtr))) {
+ page_zip, &page_cur, index, offsets,
+ rec_offs_size(offsets), false, mtr))) {
/* This is the easy case. Do something similar
to btr_cur_update_in_place(). */
+ rec = page_cur_get_rec(&page_cur);
row_upd_rec_in_place(rec, index, offsets,
update, page_zip);
goto updated_in_place;
}
+ /* btr_cur_update_alloc_zip() may have changed this */
+ rec = page_cur_get_rec(&page_cur);
+
/* A collation may identify values that differ in
storage length.
Some examples (1 or 2 bytes):
@@ -4136,10 +4164,11 @@ dump:
lock_rec_store_on_page_infimum(block, rec);
page_cur_delete_rec(&page_cur, index, offsets, mtr);
page_cur_move_to_prev(&page_cur);
+ rec = ibuf_insert_to_index_page_low(entry, block, index,
+ &offsets, heap, mtr,
+ &page_cur);
- ibuf_insert_to_index_page_low(entry, block, index,
- &offsets, heap, mtr,
- &page_cur);
+ ut_ad(!cmp_dtuple_rec(entry, rec, offsets));
lock_rec_restore_from_page_infimum(block, rec, block);
} else {
offsets = NULL;
@@ -4147,9 +4176,10 @@ dump:
&offsets, heap, mtr,
&page_cur);
}
-
updated_in_place:
mem_heap_free(heap);
+
+ DBUG_VOID_RETURN;
}
/****************************************************************//**
@@ -4378,7 +4408,7 @@ Deletes from ibuf the record on which pcur is positioned. If we have to
resort to a pessimistic delete, this function commits mtr and closes
the cursor.
@return TRUE if mtr was committed and pcur closed in this operation */
-static
+static __attribute__((warn_unused_result))
ibool
ibuf_delete_rec(
/*============*/
@@ -4411,7 +4441,7 @@ ibuf_delete_rec(
btr_cur_set_deleted_flag_for_ibuf(
btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
ibuf_mtr_commit(mtr);
- log_write_up_to(IB_ULONGLONG_MAX, LOG_WAIT_ALL_GROUPS, TRUE);
+ log_write_up_to(LSN_MAX, LOG_WAIT_ALL_GROUPS, TRUE);
DBUG_SUICIDE();
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
@@ -4420,7 +4450,7 @@ ibuf_delete_rec(
0, mtr);
if (success) {
- if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) {
+ if (page_is_empty(btr_pcur_get_page(pcur))) {
/* If a B-tree page is empty, it must be the root page
and the whole B-tree must be empty. InnoDB does not
allow empty B-tree pages other than the root. */
@@ -4433,7 +4463,7 @@ ibuf_delete_rec(
/* ibuf->empty is protected by the root page latch.
Before the deletion, it had to be FALSE. */
ut_ad(!ibuf->empty);
- ibuf->empty = TRUE;
+ ibuf->empty = true;
}
#ifdef UNIV_IBUF_COUNT_DEBUG
@@ -4484,7 +4514,7 @@ ibuf_delete_rec(
ibuf_size_update(root, mtr);
mutex_exit(&ibuf_mutex);
- ibuf->empty = (page_get_n_recs(root) == 0);
+ ibuf->empty = page_is_empty(root);
ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
func_exit:
@@ -4677,6 +4707,12 @@ ibuf_merge_or_delete_for_page(
loop:
ibuf_mtr_start(&mtr);
+ /* Position pcur in the insert buffer at the first entry for this
+ index page */
+ btr_pcur_open_on_user_rec(
+ ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
+
if (block) {
ibool success;
@@ -4695,12 +4731,6 @@ loop:
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
}
- /* Position pcur in the insert buffer at the first entry for this
- index page */
- btr_pcur_open_on_user_rec(
- ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
- &pcur, &mtr);
-
if (!btr_pcur_is_on_user_rec(&pcur)) {
ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
@@ -4785,6 +4815,16 @@ loop:
== page_no);
ut_ad(ibuf_rec_get_space(&mtr, rec) == space);
+ /* Mark the change buffer record processed,
+ so that it will not be merged again in case
+ the server crashes between the following
+ mtr_commit() and the subsequent mtr_commit()
+ of deleting the change buffer record. */
+
+ btr_cur_set_deleted_flag_for_ibuf(
+ btr_pcur_get_rec(&pcur), NULL,
+ TRUE, &mtr);
+
btr_pcur_store_position(&pcur, &mtr);
ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr);
@@ -4832,6 +4872,7 @@ loop:
/* Deletion was pessimistic and mtr was committed:
we start from the beginning again */
+ ut_ad(mtr.state == MTR_COMMITTED);
goto loop;
} else if (btr_pcur_is_after_last_on_page(&pcur)) {
ibuf_mtr_commit(&mtr);
@@ -4962,6 +5003,7 @@ loop:
/* Deletion was pessimistic and mtr was committed:
we start from the beginning again */
+ ut_ad(mtr.state == MTR_COMMITTED);
goto loop;
}
@@ -4991,13 +5033,13 @@ leave_loop:
/******************************************************************//**
Looks if the insert buffer is empty.
-@return TRUE if empty */
+@return true if empty */
UNIV_INTERN
-ibool
+bool
ibuf_is_empty(void)
/*===============*/
{
- ibool is_empty;
+ bool is_empty;
const page_t* root;
mtr_t mtr;
@@ -5007,7 +5049,7 @@ ibuf_is_empty(void)
root = ibuf_tree_root_get(&mtr);
mutex_exit(&ibuf_mutex);
- is_empty = (page_get_n_recs(root) == 0);
+ is_empty = page_is_empty(root);
ut_a(is_empty == ibuf->empty);
ibuf_mtr_commit(&mtr);
diff --git a/storage/innobase/include/api0api.h b/storage/innobase/include/api0api.h
index 5b7bfdbdde5..1d6aaab60bc 100644
--- a/storage/innobase/include/api0api.h
+++ b/storage/innobase/include/api0api.h
@@ -728,7 +728,9 @@ ib_col_set_value(
ib_tpl_t ib_tpl, /*!< in: tuple instance */
ib_ulint_t col_no, /*!< in: column index in tuple */
const void* src, /*!< in: data value */
- ib_ulint_t len); /*!< in: data value len */
+ ib_ulint_t len, /*!< in: data value len */
+ ib_bool_t need_cpy); /*!< in: if need memcpy */
+
/*****************************************************************//**
Get the size of the data available in the column the tuple.
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index b99b0c0cd7b..305acf7e322 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -450,18 +450,48 @@ btr_root_raise_and_insert(
__attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Reorganizes an index page.
-IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
-page of a non-clustered index, the caller must update the insert
-buffer free bits in the same mini-transaction in such a way that the
-modification will be redo-logged.
-@return TRUE on success, FALSE on failure */
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
UNIV_INTERN
-ibool
+bool
+btr_page_reorganize_low(
+/*====================*/
+ bool recovery,/*!< in: true if called in recovery:
+ locks should not be updated, i.e.,
+ there cannot exist locks on the
+ page, and a hash index should not be
+ dropped: it cannot exist */
+ ulint z_level,/*!< in: compression level to be used
+ if dealing with compressed page */
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ dict_index_t* index, /*!< in: the index tree of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Reorganizes an index page.
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
+UNIV_INTERN
+bool
btr_page_reorganize(
/*================*/
- buf_block_t* block, /*!< in: page to be reorganized */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ dict_index_t* index, /*!< in: the index tree of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
__attribute__((nonnull));
/*************************************************************//**
Decides if the page should be split at the convergence point of
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index edba1d1d77f..e2bc599d598 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -45,7 +45,11 @@ enum {
BTR_KEEP_POS_FLAG = 8,
/** the caller is creating the index or wants to bypass the
index->info.online creation log */
- BTR_CREATE_FLAG = 16
+ BTR_CREATE_FLAG = 16,
+ /** the caller of btr_cur_optimistic_update() or
+ btr_cur_update_in_place() will take care of
+ updating IBUF_BITMAP_FREE */
+ BTR_KEEP_IBUF_BITMAP = 32
};
#ifndef UNIV_HOTBACKUP
@@ -225,10 +229,11 @@ btr_cur_optimistic_insert(
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr) /*!< in: mtr; if this function returns
- DB_SUCCESS on a leaf page of a secondary
- index in a compressed tablespace, the
- mtr must be committed before latching
+ mtr_t* mtr) /*!< in/out: mini-transaction;
+ if this function returns DB_SUCCESS on
+ a leaf page of a secondary index in a
+ compressed tablespace, the caller must
+ mtr_commit(mtr) before latching
any further pages */
__attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
/*************************************************************//**
@@ -260,27 +265,48 @@ btr_cur_pessimistic_insert(
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
__attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
/*************************************************************//**
See if there is enough place in the page modification log to log
an update-in-place.
-@return TRUE if enough place */
+
+@retval false if out of space; IBUF_BITMAP_FREE will be reset
+outside mtr if the page was recompressed
+@retval true if enough place;
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
+a secondary index leaf page. This has to be done either within the
+same mini-transaction, or by invoking ibuf_reset_free_bits() before
+mtr_commit(mtr). */
UNIV_INTERN
-ibool
-btr_cur_update_alloc_zip(
-/*=====================*/
+bool
+btr_cur_update_alloc_zip_func(
+/*==========================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
- buf_block_t* block, /*!< in/out: buffer page */
- dict_index_t* index, /*!< in: the index corresponding to the block */
+ page_cur_t* cursor, /*!< in/out: B-tree page cursor */
+ dict_index_t* index, /*!< in: the index corresponding to cursor */
+#ifdef UNIV_DEBUG
+ ulint* offsets,/*!< in/out: offsets of the cursor record */
+#endif /* UNIV_DEBUG */
ulint length, /*!< in: size needed */
- ibool create, /*!< in: TRUE=delete-and-insert,
- FALSE=update-in-place */
- mtr_t* mtr) /*!< in: mini-transaction */
+ bool create, /*!< in: true=delete-and-insert,
+ false=update-in-place */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
__attribute__((nonnull, warn_unused_result));
+#ifdef UNIV_DEBUG
+# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr) \
+ btr_cur_update_alloc_zip_func(page_zip,cursor,index,offsets,len,cr,mtr)
+#else /* UNIV_DEBUG */
+# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr) \
+ btr_cur_update_alloc_zip_func(page_zip,cursor,index,len,cr,mtr)
+#endif /* UNIV_DEBUG */
/*************************************************************//**
Updates a record when the update causes no size changes in its fields.
-@return DB_SUCCESS or error number */
+@return locking or undo log related error code, or
+@retval DB_SUCCESS on success
+@retval DB_ZIP_OVERFLOW if there is not enough space left
+on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
UNIV_INTERN
dberr_t
btr_cur_update_in_place(
@@ -289,24 +315,28 @@ btr_cur_update_in_place(
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- const ulint* offsets,/*!< in: offsets on cursor->page_cur.rec */
+ ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread, or NULL if
- appropriate flags are set */
+ que_thr_t* thr, /*!< in: query thread */
trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
- __attribute__((warn_unused_result, nonnull(2,3,4,8)));
+ mtr_t* mtr) /*!< in/out: mini-transaction; if this
+ is a secondary index, the caller must
+ mtr_commit(mtr) before latching any
+ further pages */
+ __attribute__((warn_unused_result, nonnull));
/*************************************************************//**
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended.
-@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
-DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
-there is not enough space left on the compressed page */
+@return error code, including
+@retval DB_SUCCESS on success
+@retval DB_OVERFLOW if the updated record does not fit
+@retval DB_UNDERFLOW if the page would become too empty
+@retval DB_ZIP_OVERFLOW if there is not enough space left
+on the compressed page */
UNIV_INTERN
dberr_t
btr_cur_optimistic_update(
@@ -316,17 +346,18 @@ btr_cur_optimistic_update(
cursor stays valid and positioned on the
same record */
ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread, or NULL if
- appropriate flags are set */
+ que_thr_t* thr, /*!< in: query thread */
trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
- __attribute__((warn_unused_result, nonnull(2,3,4,5,9)));
+ mtr_t* mtr) /*!< in/out: mini-transaction; if this
+ is a secondary index, the caller must
+ mtr_commit(mtr) before latching any
+ further pages */
+ __attribute__((warn_unused_result, nonnull));
/*************************************************************//**
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
@@ -356,12 +387,11 @@ btr_cur_pessimistic_update(
the values in update vector have no effect */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread, or NULL if
- appropriate flags are set */
+ que_thr_t* thr, /*!< in: query thread */
trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
- __attribute__((warn_unused_result, nonnull(2,3,4,5,6,7,11)));
+ mtr_t* mtr) /*!< in/out: mini-transaction; must be committed
+ before latching any further pages */
+ __attribute__((warn_unused_result, nonnull));
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -377,8 +407,8 @@ btr_cur_del_mark_set_clust_rec(
dict_index_t* index, /*!< in: clustered index of the record */
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************//**
Sets a secondary index record delete mark to TRUE or FALSE.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
@@ -390,7 +420,8 @@ btr_cur_del_mark_set_sec_rec(
btr_cur_t* cursor, /*!< in: cursor */
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 74a6e203808..7ad6339e63b 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -104,9 +104,7 @@ extern buf_block_t* back_block2; /*!< second block, for page reorganize */
The enumeration values must be 0..7. */
enum buf_page_state {
- BUF_BLOCK_ZIP_FREE = 0, /*!< contains a free
- compressed page */
- BUF_BLOCK_POOL_WATCH = 0, /*!< a sentinel for the buffer pool
+ BUF_BLOCK_POOL_WATCH, /*!< a sentinel for the buffer pool
watch, element of buf_pool->watch[] */
BUF_BLOCK_ZIP_PAGE, /*!< contains a clean
compressed page */
@@ -897,7 +895,7 @@ buf_page_get_mutex(
Get the flush type of a page.
@return flush type */
UNIV_INLINE
-enum buf_flush
+buf_flush_t
buf_page_get_flush_type(
/*====================*/
const buf_page_t* bpage) /*!< in: buffer page */
@@ -909,7 +907,7 @@ void
buf_page_set_flush_type(
/*====================*/
buf_page_t* bpage, /*!< in: buffer page */
- enum buf_flush flush_type); /*!< in: flush type */
+ buf_flush_t flush_type); /*!< in: flush type */
/*********************************************************************//**
Map a block to a file page. */
UNIV_INLINE
@@ -1451,7 +1449,7 @@ struct buf_page_t{
unsigned flush_type:2; /*!< if this block is currently being
flushed to disk, this tells the
flush_type.
- @see enum buf_flush */
+ @see buf_flush_t */
unsigned io_fix:2; /*!< type of pending I/O operation;
also protected by buf_pool->mutex
@see enum buf_io_fix */
@@ -1495,7 +1493,6 @@ struct buf_page_t{
- BUF_BLOCK_FILE_PAGE: flush_list
- BUF_BLOCK_ZIP_DIRTY: flush_list
- BUF_BLOCK_ZIP_PAGE: zip_clean
- - BUF_BLOCK_ZIP_FREE: zip_free[]
If bpage is part of flush_list
then the node pointers are
@@ -1729,6 +1726,26 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
+/** Struct that is embedded in the free zip blocks */
+struct buf_buddy_free_t {
+ union {
+ ulint size; /*!< size of the block */
+ byte bytes[FIL_PAGE_DATA];
+ /*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
+ == BUF_BUDDY_FREE_STAMP denotes a free
+ block. If the space_id field of buddy
+ block != BUF_BUDDY_FREE_STAMP, the block
+ is not in any zip_free list. If the
+ space_id is BUF_BUDDY_FREE_STAMP then
+ stamp[0] will contain the
+ buddy block size. */
+ } stamp;
+
+ buf_page_t bpage; /*!< Embedded bpage descriptor */
+ UT_LIST_NODE_T(buf_buddy_free_t) list;
+ /*!< Node of zip_free list */
+};
+
/** @brief The buffer pool statistics structure. */
struct buf_pool_stat_t{
ulint n_page_gets; /*!< number of page gets performed;
@@ -1839,7 +1856,12 @@ struct buf_pool_t{
and bpage::list pointers when
the bpage is on flush_list. It
also protects writes to
- bpage::oldest_modification */
+ bpage::oldest_modification and
+ flush_list_hp */
+ const buf_page_t* flush_list_hp;/*!< "hazard pointer"
+ used during scan of flush_list
+ while doing flush list batch.
+ Protected by flush_list_mutex */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
/*!< base node of the modified block
list */
@@ -1925,7 +1947,7 @@ struct buf_pool_t{
UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
/*!< unmodified compressed pages */
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES_MAX];
+ UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
/*!< buddy free lists */
buf_page_t* watch;
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index b310efdf451..08b31a59da3 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -205,7 +205,7 @@ buf_page_get_state(
#ifdef UNIV_DEBUG
switch (state) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
case BUF_BLOCK_NOT_USED:
@@ -245,7 +245,7 @@ buf_page_set_state(
enum buf_page_state old_state = buf_page_get_state(bpage);
switch (old_state) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
ut_error;
break;
case BUF_BLOCK_ZIP_PAGE:
@@ -300,9 +300,7 @@ buf_page_in_file(
const buf_page_t* bpage) /*!< in: pointer to control block */
{
switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- /* This is a free page in buf_pool->zip_free[].
- Such pages should only be accessed by the buddy allocator. */
+ case BUF_BLOCK_POOL_WATCH:
ut_error;
break;
case BUF_BLOCK_ZIP_PAGE:
@@ -347,7 +345,7 @@ buf_page_get_mutex(
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
ut_error;
return(NULL);
case BUF_BLOCK_ZIP_PAGE:
@@ -362,12 +360,12 @@ buf_page_get_mutex(
Get the flush type of a page.
@return flush type */
UNIV_INLINE
-enum buf_flush
+buf_flush_t
buf_page_get_flush_type(
/*====================*/
const buf_page_t* bpage) /*!< in: buffer page */
{
- enum buf_flush flush_type = (enum buf_flush) bpage->flush_type;
+ buf_flush_t flush_type = (buf_flush_t) bpage->flush_type;
#ifdef UNIV_DEBUG
switch (flush_type) {
@@ -389,7 +387,7 @@ void
buf_page_set_flush_type(
/*====================*/
buf_page_t* bpage, /*!< in: buffer page */
- enum buf_flush flush_type) /*!< in: flush type */
+ buf_flush_t flush_type) /*!< in: flush type */
{
bpage->flush_type = flush_type;
ut_ad(buf_page_get_flush_type(bpage) == flush_type);
@@ -666,7 +664,7 @@ buf_block_get_frame(
ut_ad(block);
switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
case BUF_BLOCK_NOT_USED:
@@ -1311,7 +1309,7 @@ buf_page_release_zip(
bpage->buf_fix_count--;
mutex_exit(&block->mutex);
return;
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index 357ba697f6a..1b9336f4002 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -63,12 +63,13 @@ void
buf_dblwr_free(void);
/*================*/
/********************************************************************//**
-Updates the doublewrite buffer when an IO request that is part of an
-LRU or flush batch is completed. */
+Updates the doublewrite buffer when an IO request is completed. */
UNIV_INTERN
void
-buf_dblwr_update(void);
-/*==================*/
+buf_dblwr_update(
+/*=============*/
+ const buf_page_t* bpage, /*!< in: buffer block descriptor */
+ buf_flush_t flush_type);/*!< in: flush type */
/****************************************************************//**
Determines if a page number is located inside the doublewrite buffer.
@return TRUE if the location is inside the two blocks of the
@@ -109,36 +110,41 @@ UNIV_INTERN
void
buf_dblwr_write_single_page(
/*========================*/
- buf_page_t* bpage); /*!< in: buffer block to write */
+ buf_page_t* bpage, /*!< in: buffer block to write */
+ bool sync); /*!< in: true if sync IO requested */
/** Doublewrite control struct */
struct buf_dblwr_t{
- ib_mutex_t mutex; /*!< mutex protecting the first_free field and
- write_buf */
- ulint block1; /*!< the page number of the first
+ ib_mutex_t mutex; /*!< mutex protecting the first_free
+ field and write_buf */
+ ulint block1; /*!< the page number of the first
doublewrite block (64 pages) */
- ulint block2; /*!< page number of the second block */
- ulint first_free; /*!< first free position in write_buf measured
- in units of UNIV_PAGE_SIZE */
- ulint s_reserved; /*!< number of slots currently reserved
- for single page flushes. */
- ulint b_reserved; /*!< number of slots currently reserved
+ ulint block2; /*!< page number of the second block */
+ ulint first_free;/*!< first free position in write_buf
+ measured in units of UNIV_PAGE_SIZE */
+ ulint b_reserved;/*!< number of slots currently reserved
for batch flush. */
- ibool* in_use; /*!< flag used to indicate if a slot is
+ os_event_t b_event;/*!< event where threads wait for a
+ batch flush to end. */
+ ulint s_reserved;/*!< number of slots currently
+ reserved for single page flushes. */
+ os_event_t s_event;/*!< event where threads wait for a
+ single page flush slot. */
+ bool* in_use; /*!< flag used to indicate if a slot is
in use. Only used for single page
flushes. */
- ibool batch_running; /*!< set to TRUE if currently a batch
+ bool batch_running;/*!< set to TRUE if currently a batch
is being written from the doublewrite
buffer. */
- byte* write_buf; /*!< write buffer used in writing to the
+ byte* write_buf;/*!< write buffer used in writing to the
doublewrite buffer, aligned to an
address divisible by UNIV_PAGE_SIZE
(which is required by Windows aio) */
- byte* write_buf_unaligned;
- /*!< pointer to write_buf, but unaligned */
- buf_page_t**
- buf_block_arr; /*!< array to store pointers to the buffer
- blocks which have been cached to write_buf */
+ byte* write_buf_unaligned;/*!< pointer to write_buf,
+ but unaligned */
+ buf_page_t** buf_block_arr;/*!< array to store pointers to
+ the buffer blocks which have been
+ cached to write_buf */
};
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 94f4e6dedd1..6fee9afcc91 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -87,13 +87,6 @@ buf_flush_page_try(
buf_block_t* block) /*!< in/out: buffer control block */
__attribute__((nonnull, warn_unused_result));
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/********************************************************************//**
-Flush a batch of writes to the datafiles that have already been
-written by the OS. */
-UNIV_INTERN
-void
-buf_flush_sync_datafiles(void);
-/*==========================*/
/*******************************************************************//**
This utility flushes dirty blocks from the end of the flush list of
all buffer pool instances.
@@ -136,7 +129,7 @@ void
buf_flush_wait_batch_end(
/*=====================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- enum buf_flush type); /*!< in: BUF_FLUSH_LRU
+ buf_flush_t type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/******************************************************************//**
Waits until a flush batch of the given type ends. This is called by
@@ -147,7 +140,7 @@ void
buf_flush_wait_batch_end_wait_only(
/*===============================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- enum buf_flush type); /*!< in: BUF_FLUSH_LRU
+ buf_flush_t type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
@@ -248,8 +241,20 @@ buf_flush_page(
/*===========*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_page_t* bpage, /*!< in: buffer control block */
- buf_flush flush_type) /*!< in: type of flush */
+ buf_flush_t flush_type, /*!< in: type of flush */
+ bool sync) /*!< in: true if sync IO request */
__attribute__((nonnull));
+/********************************************************************//**
+Returns true if the block is modified and ready for flushing.
+@return true if can flush immediately */
+UNIV_INTERN
+bool
+buf_flush_ready_for_flush(
+/*======================*/
+ buf_page_t* bpage, /*!< in: buffer control block, must be
+ buf_page_in_file(bpage) */
+ buf_flush_t flush_type)/*!< in: type of flush */
+ __attribute__((warn_unused_result));
#ifdef UNIV_DEBUG
/******************************************************************//**
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index f7a69e1c9e4..ecdaef685a1 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -79,19 +79,19 @@ buf_LRU_insert_zip_clean(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
-NOTE: If this function returns TRUE, it will temporarily
+NOTE: If this function returns true, it will temporarily
release buf_pool->mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool->mutex and must not hold any
buf_page_get_mutex() when calling this function.
-@return TRUE if freed, FALSE otherwise. */
+@return true if freed, false otherwise. */
UNIV_INTERN
-ibool
-buf_LRU_free_block(
-/*===============*/
+bool
+buf_LRU_free_page(
+/*==============*/
buf_page_t* bpage, /*!< in: block to be freed */
- ibool zip) /*!< in: TRUE if should remove also the
+ bool zip) /*!< in: true if should remove also the
compressed page of an uncompressed page */
__attribute__((nonnull));
/******************************************************************//**
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index b98ff121209..d2a1f264ff5 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -119,7 +119,7 @@ UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
- ibool sync, /*!< in: TRUE if the caller
+ bool sync, /*!< in: true if the caller
wants this function to wait
for the highest address page
to get read in, before this
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 5ed210d3b90..307ef18f0c2 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -45,7 +45,7 @@ struct buf_dblwr_t;
typedef byte buf_frame_t;
/** Flags for flush types */
-enum buf_flush {
+enum buf_flush_t {
BUF_FLUSH_LRU = 0, /*!< flush via the LRU list */
BUF_FLUSH_LIST, /*!< flush via the flush list
of dirty blocks */
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index 12e9f543e94..1f5ab0d7923 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -124,6 +124,9 @@ enum dberr_t {
during online index creation */
DB_IO_ERROR, /*!< Generic IO error */
+ DB_IDENTIFIER_TOO_LONG, /*!< Identifier name too long */
+ DB_FTS_EXCEED_RESULT_CACHE_LIMIT, /*!< FTS query memory
+ exceeds result cache limit */
/* The following are partial failure codes */
DB_FAIL = 1000,
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 217da0142ee..6ec1079957b 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -111,6 +111,20 @@ dberr_t
dict_create_or_check_foreign_constraint_tables(void);
/*================================================*/
/********************************************************************//**
+Generate a foreign key constraint name when it was not named by the user.
+A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
+where the numbers start from 1, and are given locally for this table, that is,
+the number is not global, as it used to be before MySQL 4.0.18. */
+UNIV_INLINE
+dberr_t
+dict_create_add_foreign_id(
+/*=======================*/
+ ulint* id_nr, /*!< in/out: number to use in id generation;
+ incremented if used */
+ const char* name, /*!< in: table name */
+ dict_foreign_t* foreign)/*!< in/out: foreign key */
+ __attribute__((nonnull));
+/********************************************************************//**
Adds foreign key definitions to data dictionary tables in the database. We
look at table->foreign_list, and also generate names to constraints that were
not named by the user. A generated constraint has a name of the format
@@ -158,25 +172,15 @@ dict_create_add_tablespace_to_dictionary(
bool commit); /*!< in: if true then commit the
transaction */
/********************************************************************//**
-Table create node structure */
-
-/********************************************************************//**
-Add a single foreign key definition to the data dictionary tables in the
-database. We also generate names to constraints that were not named by the
-user. A generated constraint has a name of the format
-databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and
-are given locally for this table, that is, the number is not global, as in
-the old format constraints < 4.0.18 it used to be.
-@return error code or DB_SUCCESS */
+Add a foreign key definition to the data dictionary tables.
+@return error code or DB_SUCCESS */
UNIV_INTERN
dberr_t
dict_create_add_foreign_to_dictionary(
/*==================================*/
- ulint* id_nr, /*!< in/out: number to use in id generation;
- incremented if used */
- dict_table_t* table, /*!< in: table */
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx) /*!< in/out: dictionary transaction */
+ const char* name, /*!< in: table name */
+ const dict_foreign_t* foreign,/*!< in: foreign key */
+ trx_t* trx) /*!< in/out: dictionary transaction */
__attribute__((nonnull, warn_unused_result));
/* Table create node structure */
diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic
index 98cbbf28208..2d0d9dcb858 100644
--- a/storage/innobase/include/dict0crea.ic
+++ b/storage/innobase/include/dict0crea.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,3 +23,76 @@ Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
+#include "mem0mem.h"
+
+/*********************************************************************//**
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL.
+@return true if temporary table */
+UNIV_INTERN
+bool
+row_is_mysql_tmp_table_name(
+/*========================*/
+ const char* name) __attribute__((warn_unused_result));
+ /*!< in: table name in the form
+ 'database/tablename' */
+
+
+/********************************************************************//**
+Generate a foreign key constraint name when it was not named by the user.
+A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
+where the numbers start from 1, and are given locally for this table, that is,
+the number is not global, as it used to be before MySQL 4.0.18. */
+UNIV_INLINE
+dberr_t
+dict_create_add_foreign_id(
+/*=======================*/
+ ulint* id_nr, /*!< in/out: number to use in id generation;
+ incremented if used */
+ const char* name, /*!< in: table name */
+ dict_foreign_t* foreign)/*!< in/out: foreign key */
+{
+ if (foreign->id == NULL) {
+ /* Generate a new constraint id */
+ ulint namelen = strlen(name);
+ char* id = static_cast<char*>(
+ mem_heap_alloc(foreign->heap,
+ namelen + 20));
+
+ if (row_is_mysql_tmp_table_name(name)) {
+
+ /* no overflow if number < 1e13 */
+ sprintf(id, "%s_ibfk_%lu", name,
+ (ulong) (*id_nr)++);
+ } else {
+ char table_name[MAX_TABLE_NAME_LEN + 20] = "";
+ uint errors = 0;
+
+ strncpy(table_name, name,
+ MAX_TABLE_NAME_LEN + 20);
+
+ innobase_convert_to_system_charset(
+ strchr(table_name, '/') + 1,
+ strchr(name, '/') + 1,
+ MAX_TABLE_NAME_LEN, &errors);
+
+ if (errors) {
+ strncpy(table_name, name,
+ MAX_TABLE_NAME_LEN + 20);
+ }
+
+ /* no overflow if number < 1e13 */
+ sprintf(id, "%s_ibfk_%lu", table_name,
+ (ulong) (*id_nr)++);
+
+ if (innobase_check_identifier_length(
+ strchr(id,'/') + 1)) {
+ return(DB_IDENTIFIER_TOO_LONG);
+ }
+ }
+ foreign->id = id;
+ }
+
+ return(DB_SUCCESS);
+}
+
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index af0a5b31cc4..f740c427006 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -108,6 +108,18 @@ dict_remove_db_name(
const char* name) /*!< in: table name in the form
dbname '/' tablename */
__attribute__((nonnull, warn_unused_result));
+
+/** Operation to perform when opening a table */
+enum dict_table_op_t {
+ /** Expect the tablespace to exist. */
+ DICT_TABLE_OP_NORMAL = 0,
+ /** Drop any orphan indexes after an aborted online index creation */
+ DICT_TABLE_OP_DROP_ORPHAN,
+ /** Silently load the tablespace if it does not exist,
+ and do not load the definitions of incomplete indexes. */
+ DICT_TABLE_OP_LOAD_TABLESPACE
+};
+
/**********************************************************************//**
Returns a table object based on table id.
@return table, NULL if does not exist */
@@ -117,9 +129,7 @@ dict_table_open_on_id(
/*==================*/
table_id_t table_id, /*!< in: table id */
ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- ibool try_drop) /*!< in: TRUE=try to drop any orphan
- indexes after an aborted online
- index creation */
+ dict_table_op_t table_op) /*!< in: operation to perform */
__attribute__((warn_unused_result));
/********************************************************************//**
Decrements the count of open handles to a table. */
@@ -408,10 +418,17 @@ UNIV_INTERN
dberr_t
dict_foreign_add_to_cache(
/*======================*/
- dict_foreign_t* foreign, /*!< in, own: foreign key constraint */
- ibool check_charsets) /*!< in: TRUE=check charset
- compatibility */
- __attribute__((nonnull, warn_unused_result));
+ dict_foreign_t* foreign,
+ /*!< in, own: foreign key constraint */
+ const char** col_names,
+ /*!< in: column names, or NULL to use
+ foreign->foreign_table->col_names */
+ bool check_charsets,
+ /*!< in: whether to check charset
+ compatibility */
+ dict_err_ignore_t ignore_err)
+ /*!< in: error to be ignored */
+ __attribute__((nonnull(1), warn_unused_result));
/*********************************************************************//**
Check if the index is referenced by a foreign key, if TRUE return the
matching instance NULL otherwise.
@@ -435,15 +452,18 @@ dict_table_is_referenced_by_foreign_key(
__attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Replace the index passed in with another equivalent index in the
-foreign key lists of the table. */
+foreign key lists of the table.
+@return whether all replacements were found */
UNIV_INTERN
-void
+bool
dict_foreign_replace_index(
/*=======================*/
dict_table_t* table, /*!< in/out: table */
- const dict_index_t* index, /*!< in: index to be replaced */
- const trx_t* trx) /*!< in: transaction handle */
- __attribute__((nonnull));
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const dict_index_t* index) /*!< in: index to be replaced */
+ __attribute__((nonnull(1,3), warn_unused_result));
/**********************************************************************//**
Determines whether a string starts with the specified keyword.
@return TRUE if str starts with keyword */
@@ -544,13 +564,16 @@ dict_index_t*
dict_foreign_find_index(
/*====================*/
const dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
const char** columns,/*!< in: array of column names */
ulint n_cols, /*!< in: number of columns */
const dict_index_t* types_idx,
/*!< in: NULL or an index
whose types the column types
must match */
- ibool check_charsets,
+ bool check_charsets,
/*!< in: whether to check
charsets. only has an effect
if types_idx != NULL */
@@ -558,7 +581,7 @@ dict_foreign_find_index(
/*!< in: nonzero if none of
the columns must be declared
NOT NULL */
- __attribute__((nonnull(1,2), warn_unused_result));
+ __attribute__((nonnull(1,3), warn_unused_result));
/**********************************************************************//**
Returns a column's name.
@return column name. NOTE: not guaranteed to stay valid if table is
@@ -624,6 +647,9 @@ bool
dict_foreign_qualify_index(
/*====================*/
const dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
const char** columns,/*!< in: array of column names */
ulint n_cols, /*!< in: number of columns */
const dict_index_t* index, /*!< in: index to check */
@@ -631,7 +657,7 @@ dict_foreign_qualify_index(
/*!< in: NULL or an index
whose types the column types
must match */
- ibool check_charsets,
+ bool check_charsets,
/*!< in: whether to check
charsets. only has an effect
if types_idx != NULL */
@@ -639,7 +665,7 @@ dict_foreign_qualify_index(
/*!< in: nonzero if none of
the columns must be declared
NOT NULL */
- __attribute__((nonnull(1,2), warn_unused_result));
+ __attribute__((nonnull(1,3), warn_unused_result));
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the first index on the table (the clustered index).
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index 5991d58a686..030190b1a8e 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -58,6 +58,18 @@ enum dict_table_info_t {
is in the cache, if so, return it */
};
+/** Check type for dict_check_tablespaces_and_store_max_id() */
+enum dict_check_t {
+ /** No user tablespaces have been opened
+ (no crash recovery, no transactions recovered). */
+ DICT_CHECK_NONE_LOADED = 0,
+ /** Some user tablespaces may have been opened
+ (no crash recovery; recovered table locks for transactions). */
+ DICT_CHECK_SOME_LOADED,
+ /** All user tablespaces have been opened (crash recovery). */
+ DICT_CHECK_ALL_LOADED
+};
+
/********************************************************************//**
In a crash recovery we already have all the tablespace objects created.
This function compares the space id information in the InnoDB data dictionary
@@ -70,7 +82,7 @@ UNIV_INTERN
void
dict_check_tablespaces_and_store_max_id(
/*====================================*/
- ibool in_crash_recovery); /*!< in: are we doing a crash recovery */
+ dict_check_t dict_check); /*!< in: how to check */
/********************************************************************//**
Finds the first table name in the given database.
@return own: table name, NULL if does not exist; the caller must free
@@ -199,7 +211,9 @@ UNIV_INTERN
dict_table_t*
dict_load_table_on_id(
/*==================*/
- table_id_t table_id); /*!< in: table id */
+ table_id_t table_id, /*!< in: table id */
+ dict_err_ignore_t ignore_err); /*!< in: errors to ignore
+ when loading the table */
/********************************************************************//**
This function is called when the database is booted.
Loads system table index definitions except for the clustered index which
@@ -220,12 +234,16 @@ UNIV_INTERN
dberr_t
dict_load_foreigns(
/*===============*/
- const char* table_name, /*!< in: table name */
- ibool check_recursive,/*!< in: Whether to check recursive
- load of tables chained by FK */
- ibool check_charsets) /*!< in: TRUE=check charsets
- compatibility */
- __attribute__((nonnull, warn_unused_result));
+ const char* table_name, /*!< in: table name */
+ const char** col_names, /*!< in: column names, or NULL
+ to use table->col_names */
+ bool check_recursive,/*!< in: Whether to check
+ recursive load of tables
+ chained by FK */
+ bool check_charsets, /*!< in: whether to check
+ charset compatibility */
+ dict_err_ignore_t ignore_err) /*!< in: error to be ignored */
+ __attribute__((nonnull(1), warn_unused_result));
/********************************************************************//**
Prints to the standard output information on all tables found in the data
dictionary system table. */
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 671f67eb1f8..bc90e2ddfaf 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -226,7 +226,7 @@ This could result in rescursive calls and out of stack error eventually.
DICT_FK_MAX_RECURSIVE_LOAD defines the maximum number of recursive loads,
when exceeded, the child table will not be loaded. It will be loaded when
the foreign constraint check needs to be run. */
-#define DICT_FK_MAX_RECURSIVE_LOAD 255
+#define DICT_FK_MAX_RECURSIVE_LOAD 20
/** Similarly, when tables are chained together with foreign key constraints
with on cascading delete/update clause, delete from parent table could
@@ -916,7 +916,9 @@ struct dict_table_t{
the background stats thread will detect this
and will eventually quit sooner */
byte stats_bg_flag;
- /*!< see BG_STAT_* above */
+ /*!< see BG_STAT_* above.
+ Writes are covered by dict_sys->mutex.
+ Dirty reads are possible. */
/* @} */
/*----------------------*/
/**!< The following fields are used by the
diff --git a/storage/innobase/include/dict0priv.h b/storage/innobase/include/dict0priv.h
index 69eeb835885..9a3c8e22992 100644
--- a/storage/innobase/include/dict0priv.h
+++ b/storage/innobase/include/dict0priv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -52,7 +52,9 @@ UNIV_INLINE
dict_table_t*
dict_table_open_on_id_low(
/*=====================*/
- table_id_t table_id); /*!< in: table id */
+ table_id_t table_id, /*!< in: table id */
+ dict_err_ignore_t ignore_err); /*!< in: errors to ignore
+ when loading the table */
#ifndef UNIV_NONINL
#include "dict0priv.ic"
diff --git a/storage/innobase/include/dict0priv.ic b/storage/innobase/include/dict0priv.ic
index e15fbc65a63..30ba8fb60aa 100644
--- a/storage/innobase/include/dict0priv.ic
+++ b/storage/innobase/include/dict0priv.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -73,7 +73,9 @@ UNIV_INLINE
dict_table_t*
dict_table_open_on_id_low(
/*======================*/
- table_id_t table_id) /*!< in: table id */
+ table_id_t table_id, /*!< in: table id */
+ dict_err_ignore_t ignore_err) /*!< in: errors to ignore
+ when loading the table */
{
dict_table_t* table;
ulint fold;
@@ -87,7 +89,7 @@ dict_table_open_on_id_low(
dict_table_t*, table, ut_ad(table->cached),
table->id == table_id);
if (table == NULL) {
- table = dict_load_table_on_id(table_id);
+ table = dict_load_table_on_id(table_id, ignore_err);
}
ut_ad(!table || table->cached);
diff --git a/storage/innobase/include/dict0stats.ic b/storage/innobase/include/dict0stats.ic
index 04763f174d0..8fb31678af9 100644
--- a/storage/innobase/include/dict0stats.ic
+++ b/storage/innobase/include/dict0stats.ic
@@ -31,8 +31,7 @@ Created Jan 23, 2012 Vasil Dimov
/*********************************************************************//**
Set the persistent statistics flag for a given table. This is set only
in the in-memory table object and is not saved on disk. It will be read
-from the .frm file upon first open from MySQL after a server restart.
-dict_stats_set_persistent() @{ */
+from the .frm file upon first open from MySQL after a server restart. */
UNIV_INLINE
void
dict_stats_set_persistent(
@@ -61,11 +60,9 @@ dict_stats_set_persistent(
/* we rely on this assignment to be atomic */
table->stat_persistent = stat_persistent;
}
-/* @} */
/*********************************************************************//**
Check whether persistent statistics is enabled for a given table.
-dict_stats_is_persistent_enabled() @{
@return TRUE if enabled, FALSE otherwise */
UNIV_INLINE
ibool
@@ -100,14 +97,12 @@ dict_stats_is_persistent_enabled(
return(srv_stats_persistent);
}
}
-/* @} */
/*********************************************************************//**
Set the auto recalc flag for a given table (only honored for a persistent
stats enabled table). The flag is set only in the in-memory table object
and is not saved in InnoDB files. It will be read from the .frm file upon
-first open from MySQL after a server restart.
-dict_stats_auto_recalc_set() @{ */
+first open from MySQL after a server restart. */
UNIV_INLINE
void
dict_stats_auto_recalc_set(
@@ -131,11 +126,9 @@ dict_stats_auto_recalc_set(
/* we rely on this assignment to be atomic */
table->stats_auto_recalc = stats_auto_recalc;
}
-/* @} */
/*********************************************************************//**
Check whether auto recalc is enabled for a given table.
-dict_stats_auto_recalc_is_enabled() @{
@return TRUE if enabled, FALSE otherwise */
UNIV_INLINE
ibool
@@ -155,11 +148,9 @@ dict_stats_auto_recalc_is_enabled(
return(srv_stats_auto_recalc);
}
}
-/* @} */
/*********************************************************************//**
-Initialize table's stats for the first time when opening a table.
-dict_stats_init() @{ */
+Initialize table's stats for the first time when opening a table. */
UNIV_INLINE
void
dict_stats_init(
@@ -182,12 +173,10 @@ dict_stats_init(
dict_stats_update(table, opt);
}
-/* @} */
/*********************************************************************//**
Deinitialize table's stats after the last close of the table. This is
-used to detect "FLUSH TABLE" and refresh the stats upon next open.
-dict_stats_deinit() @{ */
+used to detect "FLUSH TABLE" and refresh the stats upon next open. */
UNIV_INLINE
void
dict_stats_deinit(
@@ -245,6 +234,3 @@ dict_stats_deinit(
dict_table_stats_unlock(table, RW_X_LATCH);
}
-/* @} */
-
-/* vim: set foldmethod=marker foldmarker=@{,@}: */
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
index dd85088c7ba..e866ab419fe 100644
--- a/storage/innobase/include/dict0stats_bg.h
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -40,14 +40,12 @@ Add a table to the recalc pool, which is processed by the
background stats gathering thread. Only the table id is added to the
list, so the table can be closed after being enqueued and it will be
opened when needed. If the table does not exist later (has been DROPped),
-then it will be removed from the pool and skipped.
-dict_stats_recalc_pool_add() @{ */
+then it will be removed from the pool and skipped. */
UNIV_INTERN
void
dict_stats_recalc_pool_add(
/*=======================*/
const dict_table_t* table); /*!< in: table to add */
-/* @} */
/*****************************************************************//**
Delete a given table from the auto recalc pool.
@@ -57,53 +55,63 @@ void
dict_stats_recalc_pool_del(
/*=======================*/
const dict_table_t* table); /*!< in: table to remove */
-/* @} */
+
+/** Yield the data dictionary latch when waiting
+for the background thread to stop accessing a table.
+@param trx transaction holding the data dictionary locks */
+#define DICT_STATS_BG_YIELD(trx) do { \
+ row_mysql_unlock_data_dictionary(trx); \
+ os_thread_sleep(250000); \
+ row_mysql_lock_data_dictionary(trx); \
+} while (0)
+
+/*****************************************************************//**
+Request the background collection of statistics to stop for a table.
+@retval true when no background process is active
+@retval false when it is not safe to modify the table definition */
+UNIV_INLINE
+bool
+dict_stats_stop_bg(
+/*===============*/
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((warn_unused_result));
/*****************************************************************//**
-Wait until background stats thread has stopped using the specified table(s).
+Wait until background stats thread has stopped using the specified table.
The caller must have locked the data dictionary using
row_mysql_lock_data_dictionary() and this function may unlock it temporarily
and restore the lock before it exits.
-The background stats thead is guaranteed not to start using the specified
-tables after this function returns and before the caller unlocks the data
+The background stats thread is guaranteed not to start using the specified
+table after this function returns and before the caller unlocks the data
dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
-under dict_sys->mutex.
-dict_stats_wait_bg_to_stop_using_table() @{ */
+under dict_sys->mutex. */
UNIV_INTERN
void
-dict_stats_wait_bg_to_stop_using_tables(
-/*====================================*/
- dict_table_t* table1, /*!< in/out: table1 */
- dict_table_t* table2, /*!< in/out: table2, could be NULL */
+dict_stats_wait_bg_to_stop_using_table(
+/*===================================*/
+ dict_table_t* table, /*!< in/out: table */
trx_t* trx); /*!< in/out: transaction to use for
unlocking/locking the data dict */
-/* @} */
-
/*****************************************************************//**
Initialize global variables needed for the operation of dict_stats_thread().
-Must be called before dict_stats_thread() is started.
-dict_stats_thread_init() @{ */
+Must be called before dict_stats_thread() is started. */
UNIV_INTERN
void
dict_stats_thread_init();
/*====================*/
-/* @} */
/*****************************************************************//**
Free resources allocated by dict_stats_thread_init(), must be called
-after dict_stats_thread() has exited.
-dict_stats_thread_deinit() @{ */
+after dict_stats_thread() has exited. */
UNIV_INTERN
void
dict_stats_thread_deinit();
/*======================*/
-/* @} */
/*****************************************************************//**
This is the thread for background stats gathering. It pops tables, from
the auto recalc list and proceeds them, eventually recalculating their
statistics.
-dict_stats_thread() @{
@return this function does not return, it calls os_thread_exit() */
extern "C" UNIV_INTERN
os_thread_ret_t
@@ -111,6 +119,9 @@ DECLARE_THREAD(dict_stats_thread)(
/*==============================*/
void* arg); /*!< in: a dummy parameter
required by os_thread_create */
-/* @} */
+
+# ifndef UNIV_NONINL
+# include "dict0stats_bg.ic"
+# endif
#endif /* dict0stats_bg_h */
diff --git a/storage/innobase/include/dict0stats_bg.ic b/storage/innobase/include/dict0stats_bg.ic
new file mode 100644
index 00000000000..87e3225de58
--- /dev/null
+++ b/storage/innobase/include/dict0stats_bg.ic
@@ -0,0 +1,45 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats_bg.ic
+Code used for background table and index stats gathering.
+
+Created Feb 8, 2013 Marko Makela
+*******************************************************/
+
+/*****************************************************************//**
+Request the background collection of statistics to stop for a table.
+@retval true when no background process is active
+@retval false when it is not safe to modify the table definition */
+UNIV_INLINE
+bool
+dict_stats_stop_bg(
+/*===============*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ if (!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)) {
+ return(true);
+ }
+
+ table->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
+ return(false);
+}
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index b7f7c2d9df9..6acb6a2dcbe 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -57,6 +57,14 @@ enum dict_err_ignore_t {
DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
page is FIL_NULL or incorrect value */
DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */
+ DICT_ERR_IGNORE_FK_NOKEY = 4, /*!< ignore error if any foreign
+ key is missing */
+ DICT_ERR_IGNORE_RECOVER_LOCK = 8,
+ /*!< Used when recovering table locks
+ for resurrected transactions.
+ Silently load a missing
+ tablespace, and do not load
+ incomplete index definitions. */
DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
};
@@ -67,4 +75,11 @@ enum ib_quiesce_t {
QUIESCE_COMPLETE /*!< All done */
};
+/** Prefix for tmp tables, adopted from sql/table.h */
+#define tmp_file_prefix "#sql"
+#define tmp_file_prefix_length 4
+
+#define TEMP_TABLE_PREFIX "#sql"
+#define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX
+
#endif
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
index ffb4f270d0e..7f23302d1ff 100644
--- a/storage/innobase/include/dyn0dyn.h
+++ b/storage/innobase/include/dyn0dyn.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -46,15 +46,17 @@ UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
- dyn_array_t* arr); /*!< in: pointer to a memory buffer of
+ dyn_array_t* arr) /*!< in/out memory buffer of
size sizeof(dyn_array_t) */
+ __attribute__((nonnull));
/************************************************************//**
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
- dyn_array_t* arr); /*!< in: dyn array */
+ dyn_array_t* arr) /*!< in,own: dyn array */
+ __attribute__((nonnull));
/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
@@ -65,8 +67,9 @@ byte*
dyn_array_open(
/*===========*/
dyn_array_t* arr, /*!< in: dynamic array */
- ulint size); /*!< in: size in bytes of the buffer; MUST be
+ ulint size) /*!< in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
@@ -74,7 +77,8 @@ void
dyn_array_close(
/*============*/
dyn_array_t* arr, /*!< in: dynamic array */
- byte* ptr); /*!< in: buffer space from ptr up was not used */
+ const byte* ptr) /*!< in: end of used space */
+ __attribute__((nonnull));
/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to
the added element. The caller must copy the element to
@@ -84,8 +88,9 @@ UNIV_INLINE
void*
dyn_array_push(
/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size); /*!< in: size in bytes of the element */
+ dyn_array_t* arr, /*!< in/out: dynamic array */
+ ulint size) /*!< in: size in bytes of the element */
+ __attribute__((nonnull, warn_unused_result));
/************************************************************//**
Returns pointer to an element in dyn array.
@return pointer to element */
@@ -93,9 +98,10 @@ UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
- dyn_array_t* arr, /*!< in: dyn array */
- ulint pos); /*!< in: position of element as bytes
- from array start */
+ const dyn_array_t* arr, /*!< in: dyn array */
+ ulint pos) /*!< in: position of element
+ in bytes from array start */
+ __attribute__((nonnull, warn_unused_result));
/************************************************************//**
Returns the size of stored data in a dyn array.
@return data size in bytes */
@@ -103,30 +109,33 @@ UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
- dyn_array_t* arr); /*!< in: dyn array */
+ const dyn_array_t* arr) /*!< in: dyn array */
+ __attribute__((nonnull, warn_unused_result, pure));
/************************************************************//**
-Gets the first block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_first_block(
-/*======================*/
- dyn_array_t* arr); /*!< in: dyn array */
+Gets the first block in a dyn array.
+@param arr dyn array
+@return first block */
+#define dyn_array_get_first_block(arr) (arr)
/************************************************************//**
-Gets the last block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_last_block(
-/*=====================*/
- dyn_array_t* arr); /*!< in: dyn array */
+Gets the last block in a dyn array.
+@param arr dyn array
+@return last block */
+#define dyn_array_get_last_block(arr) \
+ ((arr)->heap ? UT_LIST_GET_LAST((arr)->base) : (arr))
/********************************************************************//**
Gets the next block in a dyn array.
-@return pointer to next, NULL if end of list */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_next_block(
-/*=====================*/
- dyn_array_t* arr, /*!< in: dyn array */
- dyn_block_t* block); /*!< in: dyn array block */
+@param arr dyn array
+@param block dyn array block
+@return pointer to next, NULL if end of list */
+#define dyn_array_get_next_block(arr, block) \
+ ((arr)->heap ? UT_LIST_GET_NEXT(list, block) : NULL)
+/********************************************************************//**
+Gets the previous block in a dyn array.
+@param arr dyn array
+@param block dyn array block
+@return pointer to previous, NULL if end of list */
+#define dyn_array_get_prev_block(arr, block) \
+ ((arr)->heap ? UT_LIST_GET_PREV(list, block) : NULL)
/********************************************************************//**
Gets the number of used bytes in a dyn array block.
@return number of bytes used */
@@ -134,7 +143,8 @@ UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
- dyn_block_t* block); /*!< in: dyn array block */
+ const dyn_block_t* block) /*!< in: dyn array block */
+ __attribute__((nonnull, warn_unused_result, pure));
/********************************************************************//**
Gets pointer to the start of data in a dyn array block.
@return pointer to data */
@@ -142,16 +152,18 @@ UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
- dyn_block_t* block); /*!< in: dyn array block */
+ const dyn_block_t* block) /*!< in: dyn array block */
+ __attribute__((nonnull, warn_unused_result, pure));
/********************************************************//**
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
- dyn_array_t* arr, /*!< in: dyn array */
+ dyn_array_t* arr, /*!< in/out: dyn array */
const byte* str, /*!< in: string to write */
- ulint len); /*!< in: string length */
+ ulint len) /*!< in: string length */
+ __attribute__((nonnull));
/*#################################################################*/
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
index 39254e632a8..0296554e2ee 100644
--- a/storage/innobase/include/dyn0dyn.ic
+++ b/storage/innobase/include/dyn0dyn.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -35,56 +35,8 @@ UNIV_INTERN
dyn_block_t*
dyn_array_add_block(
/*================*/
- dyn_array_t* arr); /*!< in: dyn array */
-
-
-/************************************************************//**
-Gets the first block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_first_block(
-/*======================*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- return(arr);
-}
-
-/************************************************************//**
-Gets the last block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_last_block(
-/*=====================*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- if (arr->heap == NULL) {
-
- return(arr);
- }
-
- return(UT_LIST_GET_LAST(arr->base));
-}
-
-/********************************************************************//**
-Gets the next block in a dyn array.
-@return pointer to next, NULL if end of list */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_next_block(
-/*=====================*/
- dyn_array_t* arr, /*!< in: dyn array */
- dyn_block_t* block) /*!< in: dyn array block */
-{
- ut_ad(arr && block);
-
- if (arr->heap == NULL) {
- ut_ad(arr == block);
-
- return(NULL);
- }
-
- return(UT_LIST_GET_NEXT(list, block));
-}
+ dyn_array_t* arr) /*!< in/out: dyn array */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the number of used bytes in a dyn array block.
@@ -93,7 +45,7 @@ UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
- dyn_block_t* block) /*!< in: dyn array block */
+ const dyn_block_t* block) /*!< in: dyn array block */
{
ut_ad(block);
@@ -107,11 +59,11 @@ UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
- dyn_block_t* block) /*!< in: dyn array block */
+ const dyn_block_t* block) /*!< in: dyn array block */
{
ut_ad(block);
- return(block->data);
+ return(const_cast<byte*>(block->data));
}
/*********************************************************************//**
@@ -121,7 +73,7 @@ UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
- dyn_array_t* arr) /*!< in: pointer to a memory buffer of
+ dyn_array_t* arr) /*!< in/out: memory buffer of
size sizeof(dyn_array_t) */
{
ut_ad(arr);
@@ -132,10 +84,9 @@ dyn_array_create(
arr->heap = NULL;
arr->used = 0;
-#ifdef UNIV_DEBUG
- arr->buf_end = 0;
- arr->magic_n = DYN_BLOCK_MAGIC_N;
-#endif
+ ut_d(arr->buf_end = 0);
+ ut_d(arr->magic_n = DYN_BLOCK_MAGIC_N);
+
return(arr);
}
@@ -151,9 +102,7 @@ dyn_array_free(
mem_heap_free(arr->heap);
}
-#ifdef UNIV_DEBUG
- arr->magic_n = 0;
-#endif
+ ut_d(arr->magic_n = 0);
}
/*********************************************************************//**
@@ -164,7 +113,7 @@ UNIV_INLINE
void*
dyn_array_push(
/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
+ dyn_array_t* arr, /*!< in/out: dynamic array */
ulint size) /*!< in: size in bytes of the element */
{
dyn_block_t* block;
@@ -176,24 +125,23 @@ dyn_array_push(
ut_ad(size);
block = arr;
- used = block->used;
- if (used + size > DYN_ARRAY_DATA_SIZE) {
+ if (block->used + size > DYN_ARRAY_DATA_SIZE) {
/* Get the last array block */
block = dyn_array_get_last_block(arr);
- used = block->used;
- if (used + size > DYN_ARRAY_DATA_SIZE) {
+ if (block->used + size > DYN_ARRAY_DATA_SIZE) {
block = dyn_array_add_block(arr);
- used = block->used;
}
}
+ used = block->used;
+
block->used = used + size;
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
- return((block->data) + used);
+ return(block->data + used);
}
/*********************************************************************//**
@@ -210,7 +158,6 @@ dyn_array_open(
smaller than DYN_ARRAY_DATA_SIZE! */
{
dyn_block_t* block;
- ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
@@ -218,28 +165,23 @@ dyn_array_open(
ut_ad(size);
block = arr;
- used = block->used;
- if (used + size > DYN_ARRAY_DATA_SIZE) {
+ if (block->used + size > DYN_ARRAY_DATA_SIZE) {
/* Get the last array block */
block = dyn_array_get_last_block(arr);
- used = block->used;
- if (used + size > DYN_ARRAY_DATA_SIZE) {
+ if (block->used + size > DYN_ARRAY_DATA_SIZE) {
block = dyn_array_add_block(arr);
- used = block->used;
ut_a(size <= DYN_ARRAY_DATA_SIZE);
}
}
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-#ifdef UNIV_DEBUG
ut_ad(arr->buf_end == 0);
+ ut_d(arr->buf_end = block->used + size);
- arr->buf_end = used + size;
-#endif
- return((block->data) + used);
+ return(block->data + block->used);
}
/*********************************************************************//**
@@ -248,8 +190,8 @@ UNIV_INLINE
void
dyn_array_close(
/*============*/
- dyn_array_t* arr, /*!< in: dynamic array */
- byte* ptr) /*!< in: buffer space from ptr up was not used */
+ dyn_array_t* arr, /*!< in/out: dynamic array */
+ const byte* ptr) /*!< in: end of used space */
{
dyn_block_t* block;
@@ -264,9 +206,7 @@ dyn_array_close(
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-#ifdef UNIV_DEBUG
- arr->buf_end = 0;
-#endif
+ ut_d(arr->buf_end = 0);
}
/************************************************************//**
@@ -276,12 +216,11 @@ UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
- dyn_array_t* arr, /*!< in: dyn array */
- ulint pos) /*!< in: position of element as bytes
- from array start */
+ const dyn_array_t* arr, /*!< in: dyn array */
+ ulint pos) /*!< in: position of element
+ in bytes from array start */
{
- dyn_block_t* block;
- ulint used;
+ const dyn_block_t* block;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
@@ -290,21 +229,23 @@ dyn_array_get_element(
block = dyn_array_get_first_block(arr);
if (arr->heap != NULL) {
- used = dyn_block_get_used(block);
+ for (;;) {
+ ulint used = dyn_block_get_used(block);
+
+ if (pos < used) {
+ break;
+ }
- while (pos >= used) {
pos -= used;
block = UT_LIST_GET_NEXT(list, block);
ut_ad(block);
-
- used = dyn_block_get_used(block);
}
}
ut_ad(block);
ut_ad(dyn_block_get_used(block) >= pos);
- return(block->data + pos);
+ return(const_cast<byte*>(block->data) + pos);
}
/************************************************************//**
@@ -314,10 +255,10 @@ UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
- dyn_array_t* arr) /*!< in: dyn array */
+ const dyn_array_t* arr) /*!< in: dyn array */
{
- dyn_block_t* block;
- ulint sum = 0;
+ const dyn_block_t* block;
+ ulint sum = 0;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
@@ -344,7 +285,7 @@ UNIV_INLINE
void
dyn_push_string(
/*============*/
- dyn_array_t* arr, /*!< in: dyn array */
+ dyn_array_t* arr, /*!< in/out: dyn array */
const byte* str, /*!< in: string to write */
ulint len) /*!< in: string length */
{
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 56fda8b39b1..daeca1d8e44 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -360,9 +360,11 @@ fil_write_flushed_lsn_to_data_files(
ulint arch_log_no); /*!< in: latest archived log file number */
/*******************************************************************//**
Reads the flushed lsn, arch no, and tablespace flag fields from a data
-file at database startup. */
+file at database startup.
+@retval NULL on success, or if innodb_force_recovery is set
+@return pointer to an error message string */
UNIV_INTERN
-void
+const char*
fil_read_first_page(
/*================*/
os_file_t data_file, /*!< in: open data file */
@@ -379,8 +381,9 @@ fil_read_first_page(
#endif /* UNIV_LOG_ARCHIVE */
lsn_t* min_flushed_lsn, /*!< out: min of flushed
lsn values in data files */
- lsn_t* max_flushed_lsn); /*!< out: max of flushed
+ lsn_t* max_flushed_lsn) /*!< out: max of flushed
lsn values in data files */
+ __attribute__((warn_unused_result));
/*******************************************************************//**
Increments the count of pending operation, if space is not being deleted.
@return TRUE if being deleted, and operation should be skipped */
@@ -728,7 +731,7 @@ fil_io(
because i/os are not actually handled until
all have been posted: use with great
caution! */
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ bool sync, /*!< in: true if synchronous aio is desired */
ulint space_id, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes;
0 for uncompressed pages */
@@ -977,8 +980,10 @@ fil_mtr_rename_log(
ulint new_space_id, /*!< in: tablespace id of the new
table */
const char* new_name, /*!< in: new table name */
- const char* tmp_name); /*!< in: temp table name used while
+ const char* tmp_name, /*!< in: temp table name used while
swapping */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
#endif /* !UNIV_INNOCHECKSUM */
#endif /* fil0fil_h */
diff --git a/storage/innobase/include/fts0ast.h b/storage/innobase/include/fts0ast.h
index 7f2525dc450..c0aac6d8e4c 100644
--- a/storage/innobase/include/fts0ast.h
+++ b/storage/innobase/include/fts0ast.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,6 +27,7 @@ Created 2007/03/16/03 Sunny Bains
#define INNOBASE_FST0AST_H
#include "mem0mem.h"
+#include "ha_prototypes.h"
/* The type of AST Node */
enum fts_ast_type_t {
@@ -59,11 +60,16 @@ enum fts_ast_oper_t {
word*/
FTS_DISTANCE, /*!< Proximity distance */
- FTS_IGNORE_SKIP /*!< Transient node operator
+ FTS_IGNORE_SKIP, /*!< Transient node operator
signifies that this is a
FTS_IGNORE node, and ignored in
the first pass of
fts_ast_visit() */
+ FTS_EXIST_SKIP /*!< Transient node operator
+ signifies that this ia a
+ FTS_EXIST node, and ignored in
+ the first pass of
+ fts_ast_visit() */
};
/* Data types used by the FTS parser */
@@ -71,7 +77,7 @@ struct fts_lexer_t;
struct fts_ast_node_t;
struct fts_ast_state_t;
-typedef ulint (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
+typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
/********************************************************************
Parse the string using the lexer setup within state.*/
@@ -268,6 +274,8 @@ struct fts_ast_state_t {
fts_ast_list_t list; /*!< List of nodes allocated */
fts_lexer_t* lexer; /*!< Lexer callback + arg */
+ CHARSET_INFO* charset; /*!< charset used for
+ tokenization */
};
#endif /* INNOBASE_FSTS0AST_H */
diff --git a/storage/innobase/include/fts0blex.h b/storage/innobase/include/fts0blex.h
index 6f8d6eaeb29..d0e4cae0678 100644
--- a/storage/innobase/include/fts0blex.h
+++ b/storage/innobase/include/fts0blex.h
@@ -38,7 +38,7 @@
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
@@ -55,7 +55,7 @@ typedef uint32_t flex_uint32_t;
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
+typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
@@ -193,7 +193,7 @@ struct yy_buffer_state
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index f2f8617012a..f94112ef4d4 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -87,6 +87,7 @@ those defined in mysql file ft_global.h */
#define FTS_EXPAND 4
#define FTS_PROXIMITY 8
#define FTS_PHRASE 16
+#define FTS_OPT_RANKING 32
#define FTS_INDEX_TABLE_IND_NAME "FTS_INDEX_TABLE_IND"
@@ -240,9 +241,10 @@ struct fts_ranking_t {
fts_rank_t rank; /*!< Rank is between 0 .. 1 */
- ib_rbt_t* words; /*!< RB Tree of type byte*, this
- contains the words that were queried
+ byte* words; /*!< this contains the words
+ that were queried
and found in this document */
+ ulint words_len; /*!< words len */
};
/** Query result. */
@@ -345,14 +347,27 @@ extern const char* fts_default_stopword[];
/** Variable specifying the maximum FTS cache size for each table */
extern ulong fts_max_cache_size;
+/** Variable specifying the total memory allocated for FTS cache */
+extern ulong fts_max_total_cache_size;
+
+/** Variable specifying the FTS result cache limit for each query */
+extern ulong fts_result_cache_limit;
+
/** Variable specifying the maximum FTS max token size */
extern ulong fts_max_token_size;
/** Variable specifying the minimum FTS max token size */
extern ulong fts_min_token_size;
+/** Whether the total memory used for FTS cache is exhausted, and we will
+need a sync to free some memory */
+extern bool fts_need_sync;
+
/** Maximum possible Fulltext word length */
-#define FTS_MAX_WORD_LEN 3 * HA_FT_MAXCHARLEN
+#define FTS_MAX_WORD_LEN HA_FT_MAXBYTELEN
+
+/** Maximum possible Fulltext word length (in characters) */
+#define FTS_MAX_WORD_LEN_IN_CHAR HA_FT_MAXCHARLEN
/** Variable specifying the table that has Fulltext index to display its
content through information schema table */
@@ -844,7 +859,7 @@ fts_index_get_charset(
dict_index_t* index); /*!< in: FTS index */
/*********************************************************************//**
-Get the initial Doc ID by consulting the ADDED and the CONFIG table
+Get the initial Doc ID by consulting the CONFIG table
@return initial Doc ID */
UNIV_INTERN
doc_id_t
@@ -894,8 +909,8 @@ ulint
innobase_mysql_fts_get_token(
/*=========================*/
CHARSET_INFO* charset, /*!< in: Character set */
- byte* start, /*!< in: start of text */
- byte* end, /*!< in: one character past
+ const byte* start, /*!< in: start of text */
+ const byte* end, /*!< in: one character past
end of text */
fts_string_t* token, /*!< out: token's text */
ulint* offset); /*!< out: offset to token,
@@ -923,9 +938,9 @@ fts_get_max_doc_id(
/******************************************************************//**
Check whether user supplied stopword table exists and is of
the right format.
-@return TRUE if the table qualifies */
+@return the stopword column charset if qualifies */
UNIV_INTERN
-ibool
+CHARSET_INFO*
fts_valid_stopword_table(
/*=====================*/
const char* stopword_table_name); /*!< in: Stopword table
@@ -970,9 +985,11 @@ fts_table_fetch_doc_ids(
fts_doc_ids_t* doc_ids); /*!< in: For collecting
doc ids */
/****************************************************************//**
-This function loads the documents in "ADDED" table into FTS cache,
-it also loads the stopword info into the FTS cache.
-@return DB_SUCCESS if all OK */
+This function brings FTS index in sync when FTS index is first
+used. There are documents that have not yet sync-ed to auxiliary
+tables from last server abnormally shutdown, we will need to bring
+such document into FTS cache before any further operations
+@return TRUE if all OK */
UNIV_INTERN
ibool
fts_init_index(
@@ -1001,6 +1018,17 @@ fts_drop_index(
trx_t* trx) /*!< in: Transaction for the drop */
__attribute__((nonnull));
+/****************************************************************//**
+Rename auxiliary tables for all fts index for a table
+@return DB_SUCCESS or error code */
+
+dberr_t
+fts_rename_aux_tables(
+/*==================*/
+ dict_table_t* table, /*!< in: user Table */
+ const char* new_name, /*!< in: new table name */
+ trx_t* trx); /*!< in: transaction */
+
/*******************************************************************//**
Check indexes in the fts->indexes is also present in index cache and
table->indexes list
diff --git a/storage/innobase/include/fts0pars.h b/storage/innobase/include/fts0pars.h
index ae5a55b2455..50f636944e5 100644
--- a/storage/innobase/include/fts0pars.h
+++ b/storage/innobase/include/fts0pars.h
@@ -1,21 +1,19 @@
+/* A Bison parser, made by GNU Bison 2.5. */
-/* A Bison parser, made by GNU Bison 2.4.1. */
-
-/* Skeleton interface for Bison's Yacc-like parsers in C
-
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
-
+/* Bison interface for Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
@@ -28,7 +26,7 @@
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
-
+
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
@@ -52,8 +50,8 @@
typedef union YYSTYPE
{
-/* Line 1676 of yacc.c */
-#line 36 "fts0pars.y"
+/* Line 2068 of yacc.c */
+#line 61 "fts0pars.y"
int oper;
char* token;
@@ -61,8 +59,8 @@ typedef union YYSTYPE
-/* Line 1676 of yacc.c */
-#line 66 "fts0pars.h"
+/* Line 2068 of yacc.c */
+#line 64 "fts0pars.hh"
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
diff --git a/storage/innobase/include/fts0tlex.h b/storage/innobase/include/fts0tlex.h
index c0fed0efa71..f91533803e8 100644
--- a/storage/innobase/include/fts0tlex.h
+++ b/storage/innobase/include/fts0tlex.h
@@ -38,7 +38,7 @@
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
@@ -55,7 +55,7 @@ typedef uint32_t flex_uint32_t;
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
+typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
@@ -193,7 +193,7 @@ struct yy_buffer_state
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index 8fc52c9fc5e..b714d326487 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -137,11 +137,11 @@ struct fts_cache_t {
intialization, it has different
SYNC level as above cache lock */
- ib_mutex_t optimize_lock; /*!< Lock for OPTIMIZE */
+ ib_mutex_t optimize_lock; /*!< Lock for OPTIMIZE */
- ib_mutex_t deleted_lock; /*!< Lock covering deleted_doc_ids */
+ ib_mutex_t deleted_lock; /*!< Lock covering deleted_doc_ids */
- ib_mutex_t doc_id_lock; /*!< Lock covering Doc ID */
+ ib_mutex_t doc_id_lock; /*!< Lock covering Doc ID */
ib_vector_t* deleted_doc_ids;/*!< Array of deleted doc ids, each
element is of type fts_update_t */
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index b96c3f9dac8..f0dfd023a70 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -47,7 +47,8 @@ fts_utf8_string_dup(
const fts_string_t* src, /*!< in: src string */
mem_heap_t* heap) /*!< in: heap to use */
{
- dst->f_str = (byte*) mem_heap_dup(heap, src->f_str, src->f_len + 1);
+ dst->f_str = (byte*)mem_heap_alloc(heap, src->f_len + 1);
+ memcpy(dst->f_str, src->f_str, src->f_len);
dst->f_len = src->f_len;
dst->f_str[src->f_len] = 0;
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index a16ce656f04..6fdad6a0b89 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -396,8 +396,8 @@ ulint
innobase_mysql_fts_get_token(
/*=========================*/
CHARSET_INFO* charset, /*!< in: Character set */
- byte* start, /*!< in: start of text */
- byte* end, /*!< in: one character past end of
+ const byte* start, /*!< in: start of text */
+ const byte* end, /*!< in: one character past end of
text */
fts_string_t* token, /*!< out: token's text */
ulint* offset); /*!< out: offset to token,
@@ -595,4 +595,35 @@ innobase_convert_to_system_charset(
ulint len, /* in: length of 'to', in bytes */
uint* errors); /* out: error return */
+/**********************************************************************
+Check if the length of the identifier exceeds the maximum allowed.
+The input to this function is an identifier in charset my_charset_filename.
+return true when length of identifier is too long. */
+UNIV_INTERN
+my_bool
+innobase_check_identifier_length(
+/*=============================*/
+ const char* id); /* in: identifier to check. it must belong
+ to charset my_charset_filename */
+
+/**********************************************************************
+Converts an identifier from my_charset_filename to UTF-8 charset. */
+uint
+innobase_convert_to_system_charset(
+/*===============================*/
+ char* to, /* out: converted identifier */
+ const char* from, /* in: identifier to convert */
+ ulint len, /* in: length of 'to', in bytes */
+ uint* errors); /* out: error return */
+
+/**********************************************************************
+Converts an identifier from my_charset_filename to UTF-8 charset. */
+uint
+innobase_convert_to_filename_charset(
+/*=================================*/
+ char* to, /* out: converted identifier */
+ const char* from, /* in: identifier to convert */
+ ulint len); /* in: length of 'to', in bytes */
+
+
#endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index 52aaf2d25ef..66b963ae39a 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index e64f067d364..0c5a336a1f0 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -413,9 +413,9 @@ ibuf_count_get(
#endif
/******************************************************************//**
Looks if the insert buffer is empty.
-@return TRUE if empty */
+@return true if empty */
UNIV_INTERN
-ibool
+bool
ibuf_is_empty(void);
/*===============*/
/******************************************************************//**
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index 92ca2cbb9a2..21747fdceac 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -67,10 +67,10 @@ struct ibuf_t{
ulint seg_size; /*!< allocated pages of the file
segment containing ibuf header and
tree */
- ibool empty; /*!< Protected by the page
+ bool empty; /*!< Protected by the page
latch of the root page of the
insert buffer tree
- (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
+ (FSP_IBUF_TREE_ROOT_PAGE_NO). true
if and only if the insert
buffer tree is empty. */
ulint free_list_len; /*!< length of the free list */
@@ -253,7 +253,15 @@ ibuf_index_page_calc_free_zip(
ut_ad(zip_size == buf_block_get_zip_size(block));
ut_ad(zip_size);
- max_ins_size = page_get_max_insert_size_after_reorganize(
+ /* Consider the maximum insert size on the uncompressed page
+ without reorganizing the page. We must not assume anything
+ about the compression ratio. If zip_max_ins > max_ins_size and
+ there is 1/4 garbage on the page, recompression after the
+ reorganize could fail, in theory. So, let us guarantee that
+ merging a buffered insert to a compressed page will always
+ succeed without reorganizing or recompressing the page, just
+ by using the page modification log. */
+ max_ins_size = page_get_max_insert_size(
buf_block_get_frame(block), 1);
page_zip = buf_block_get_page_zip(block);
@@ -331,8 +339,8 @@ ibuf_update_free_bits_if_full(
before = ibuf_index_page_calc_free_bits(0, max_ins_size);
if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
+#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
+# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
#endif
after = ibuf_index_page_calc_free_bits(0, max_ins_size
- increase);
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 8e6fdaed3d5..2c8bff47952 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -466,6 +466,14 @@ lock_table(
enum lock_mode mode, /*!< in: lock mode */
que_thr_t* thr) /*!< in: query thread */
__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Creates a table IX lock object for a resurrected transaction. */
+UNIV_INTERN
+void
+lock_table_ix_resurrect(
+/*====================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx); /*!< in/out: transaction */
/*************************************************************//**
Removes a granted record lock of a transaction from the queue and grants
locks to other transactions waiting in the queue if they now are entitled
@@ -824,6 +832,19 @@ lock_trx_has_sys_table_locks(
/*=========================*/
const trx_t* trx) /*!< in: transaction to check */
__attribute__((warn_unused_result));
+
+/*******************************************************************//**
+Check if the transaction holds an exclusive lock on a record.
+@return whether the locks are held */
+UNIV_INTERN
+bool
+lock_trx_has_rec_x_lock(
+/*====================*/
+ const trx_t* trx, /*!< in: transaction to check */
+ const dict_table_t* table, /*!< in: table to check */
+ const buf_block_t* block, /*!< in: buffer block of the record */
+ ulint heap_no)/*!< in: record heap number */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/** Lock modes and types */
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index dd5e37012b7..1e61ea54e7b 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -794,12 +794,14 @@ struct log_t{
ulint max_buf_free; /*!< recommended maximum value of
buf_free, after which the buffer is
flushed */
+ #ifdef UNIV_LOG_DEBUG
ulint old_buf_free; /*!< value of buf free when log was
last time opened; only in the debug
version */
ib_uint64_t old_lsn; /*!< value of lsn when log was
last time opened; only in the
debug version */
+#endif /* UNIV_LOG_DEBUG */
ibool check_flush_or_checkpoint;
/*!< this is set to TRUE when there may
be need to flush the log buffer, or
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
index fffef87f09d..7449d2da2b8 100644
--- a/storage/innobase/include/mach0data.ic
+++ b/storage/innobase/include/mach0data.ic
@@ -873,6 +873,8 @@ mach_read_ulint(
default:
ut_error;
}
+
+ return(0);
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index f8c1874412c..3257402d8aa 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -299,14 +299,16 @@ mtr_x_lock_func(
#endif /* !UNIV_HOTBACKUP */
/***************************************************//**
-Releases an object in the memo stack. */
+Releases an object in the memo stack.
+@return true if released */
UNIV_INTERN
-void
+bool
mtr_memo_release(
/*=============*/
- mtr_t* mtr, /*!< in: mtr */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
void* object, /*!< in: object */
- ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
+ ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
+ __attribute__((nonnull));
#ifdef UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
/**********************************************************//**
@@ -318,7 +320,8 @@ mtr_memo_contains(
/*==============*/
mtr_t* mtr, /*!< in: mtr */
const void* object, /*!< in: object to search */
- ulint type); /*!< in: type of object */
+ ulint type) /*!< in: type of object */
+ __attribute__((warn_unused_result, nonnull));
/**********************************************************//**
Checks if memo contains the given page.
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 4a744c1b268..ef7503ad45f 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -213,7 +213,9 @@ various file I/O operations with performance schema.
1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
used to register file creation, opening, closing and renaming.
2) register_pfs_file_io_begin() and register_pfs_file_io_end() are
-used to register actual file read, write and flush */
+used to register actual file read, write and flush
+3) register_pfs_file_close_begin() and register_pfs_file_close_end()
+are used to register file deletion operations*/
# define register_pfs_file_open_begin(state, locker, key, op, name, \
src_file, src_line) \
do { \
@@ -233,6 +235,25 @@ do { \
} \
} while (0)
+# define register_pfs_file_close_begin(state, locker, key, op, name, \
+ src_file, src_line) \
+do { \
+ locker = PSI_FILE_CALL(get_thread_file_name_locker)( \
+ state, key, op, name, &locker); \
+ if (UNIV_LIKELY(locker != NULL)) { \
+ PSI_FILE_CALL(start_file_close_wait)( \
+ locker, src_file, src_line); \
+ } \
+} while (0)
+
+# define register_pfs_file_close_end(locker, result) \
+do { \
+ if (UNIV_LIKELY(locker != NULL)) { \
+ PSI_FILE_CALL(end_file_close_wait)( \
+ locker, result); \
+ } \
+} while (0)
+
# define register_pfs_file_io_begin(state, locker, file, count, op, \
src_file, src_line) \
do { \
@@ -306,6 +327,12 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_rename(key, oldpath, newpath) \
pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
+
+# define os_file_delete(key, name) \
+ pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
+
+# define os_file_delete_if_exists(key, name) \
+ pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__)
#else /* UNIV_PFS_IO */
/* If UNIV_PFS_IO is not defined, these I/O APIs point
@@ -341,6 +368,11 @@ to original un-instrumented file I/O APIs */
# define os_file_rename(key, oldpath, newpath) \
os_file_rename_func(oldpath, newpath)
+# define os_file_delete(key, name) os_file_delete_func(name)
+
+# define os_file_delete_if_exists(key, name) \
+ os_file_delete_if_exists_func(name)
+
#endif /* UNIV_PFS_IO */
/* File types for directory entry data type */
@@ -527,8 +559,8 @@ Deletes a file. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
bool
-os_file_delete(
-/*===========*/
+os_file_delete_func(
+/*================*/
const char* name); /*!< in: file path as a null-terminated
string */
@@ -537,8 +569,8 @@ Deletes a file if it exists. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
bool
-os_file_delete_if_exists(
-/*=====================*/
+os_file_delete_if_exists_func(
+/*==========================*/
const char* name); /*!< in: file path as a null-terminated
string */
/***********************************************************************//**
@@ -767,6 +799,38 @@ pfs_os_file_rename_func(
const char* newpath,/*!< in: new file path */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: old file path as a null-terminated
+ string */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
+directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete_if_exists()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_if_exists_func(
+/*==============================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: old file path as a null-terminated
+ string */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
#endif /* UNIV_PFS_IO */
#ifdef UNIV_HOTBACKUP
@@ -896,8 +960,8 @@ os_file_status(
The function os_file_dirname returns a directory component of a
null-terminated pathname string. In the usual case, dirname returns
the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' charac­
-ters are not counted as part of the pathname.
+is the component following the final '/'. Trailing '/' characters
+are not counted as part of the pathname.
If path does not contain a slash, dirname returns the string ".".
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index bdd7eb5f8f4..defd8204ba3 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -386,4 +386,64 @@ pfs_os_file_rename_func(
return(result);
}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: file path as a null-terminated
+ string */
+ const char* src_file, /*!< in: file name where func invoked */
+ ulint src_line) /*!< in: line where the func invoked */
+{
+ bool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
+ name, src_file, src_line);
+
+ result = os_file_delete_func(name);
+
+ register_pfs_file_close_end(locker, 0);
+
+ return(result);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
+directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete_if_exists()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_if_exists_func(
+/*==============================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: file path as a null-terminated
+ string */
+ const char* src_file, /*!< in: file name where func invoked */
+ ulint src_line) /*!< in: line where the func invoked */
+{
+ bool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
+ name, src_file, src_line);
+
+ result = os_file_delete_if_exists_func(name);
+
+ register_pfs_file_close_end(locker, 0);
+
+ return(result);
+}
#endif /* UNIV_PFS_IO */
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index 038a05edbd0..b1ad49b4915 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -162,6 +162,12 @@ Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -181,6 +187,12 @@ Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -205,27 +217,38 @@ page_cur_insert_rec_low(
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+ __attribute__((nonnull(1,2,3,4), warn_unused_result));
/***********************************************************//**
Inserts a record next to page cursor on a compressed and uncompressed
page. Returns pointer to inserted record if succeed, i.e.,
enough space available, NULL otherwise.
The cursor stays at the same position.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INTERN
rec_t*
page_cur_insert_rec_zip(
/*====================*/
- rec_t** current_rec,/*!< in/out: pointer to current record after
- which the new record is inserted */
- buf_block_t* block, /*!< in: buffer block of *current_rec */
+ page_cur_t* cursor, /*!< in/out: page cursor */
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+ __attribute__((nonnull(1,2,3,4), warn_unused_result));
/*************************************************************//**
Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
+including that record. Infimum and supremum records are not copied.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
UNIV_INTERN
void
page_copy_rec_list_end_to_created_page(
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index 90a5a690487..028d33b17aa 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -237,6 +237,12 @@ Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -267,8 +273,8 @@ page_cur_tuple_insert(
rec, index, *offsets, ULINT_UNDEFINED, heap);
if (buf_block_get_page_zip(cursor->block)) {
- rec = page_cur_insert_rec_zip(&cursor->rec, cursor->block,
- index, rec, *offsets, mtr);
+ rec = page_cur_insert_rec_zip(
+ cursor, index, rec, *offsets, mtr);
} else {
rec = page_cur_insert_rec_low(cursor->rec,
index, rec, *offsets, mtr);
@@ -284,6 +290,12 @@ Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -296,8 +308,8 @@ page_cur_rec_insert(
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
if (buf_block_get_page_zip(cursor->block)) {
- return(page_cur_insert_rec_zip(&cursor->rec, cursor->block,
- index, rec, offsets, mtr));
+ return(page_cur_insert_rec_zip(
+ cursor, index, rec, offsets, mtr));
} else {
return(page_cur_insert_rec_low(cursor->rec,
index, rec, offsets, mtr));
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 773ec4c2177..fb21aaec778 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -518,14 +518,32 @@ page_rec_get_heap_no(
const rec_t* rec); /*!< in: the physical record */
/************************************************************//**
Determine whether the page is a B-tree leaf.
-@return TRUE if the page is a B-tree leaf */
+@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
UNIV_INLINE
-ibool
+bool
page_is_leaf(
/*=========*/
const page_t* page) /*!< in: page */
__attribute__((nonnull, pure));
/************************************************************//**
+Determine whether the page is empty.
+@return true if the page is empty (PAGE_N_RECS = 0) */
+UNIV_INLINE
+bool
+page_is_empty(
+/*==========*/
+ const page_t* page) /*!< in: page */
+ __attribute__((nonnull, pure));
+/************************************************************//**
+Determine whether the page contains garbage.
+@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
+UNIV_INLINE
+bool
+page_has_garbage(
+/*=============*/
+ const page_t* page) /*!< in: page */
+ __attribute__((nonnull, pure));
+/************************************************************//**
Gets the pointer to the next record on the page.
@return pointer to next record */
UNIV_INLINE
@@ -566,10 +584,10 @@ UNIV_INLINE
void
page_rec_set_next(
/*==============*/
- rec_t* rec, /*!< in: pointer to record,
- must not be page supremum */
- rec_t* next); /*!< in: pointer to next record,
- must not be page infimum */
+ rec_t* rec, /*!< in: pointer to record,
+ must not be page supremum */
+ const rec_t* next); /*!< in: pointer to next record,
+ must not be page infimum */
/************************************************************//**
Gets the pointer to the previous record.
@return pointer to previous record */
@@ -777,11 +795,27 @@ page_create_zip(
page is created */
dict_index_t* index, /*!< in: the index of the page */
ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-
+ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
+/**********************************************************//**
+Empty a previously created B-tree index page. */
+UNIV_INTERN
+void
+page_create_empty(
+/*==============*/
+ buf_block_t* block, /*!< in/out: B-tree block */
+ dict_index_t* index, /*!< in: the index of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull(1,2)));
/*************************************************************//**
Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page. */
+touch the lock table and max trx id on page or compress the page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
UNIV_INTERN
void
page_copy_rec_list_end_no_locks(
@@ -795,6 +829,12 @@ page_copy_rec_list_end_no_locks(
Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to the original successor of the infimum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
UNIV_INTERN
@@ -811,6 +851,12 @@ page_copy_rec_list_end(
Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to the original predecessor of the supremum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
UNIV_INTERN
@@ -855,6 +901,12 @@ page_delete_rec_list_start(
/*************************************************************//**
Moves record list end to another page. Moved records include
split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return TRUE on success; FALSE on compression failure (new_block will
be decompressed) */
UNIV_INTERN
@@ -870,6 +922,12 @@ page_move_rec_list_end(
/*************************************************************//**
Moves record list start to another page. Moved records do not include
split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return TRUE on success; FALSE on compression failure */
UNIV_INTERN
ibool
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index c2e20d81a29..1410f21b670 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -266,9 +266,9 @@ page_rec_get_heap_no(
/************************************************************//**
Determine whether the page is a B-tree leaf.
-@return TRUE if the page is a B-tree leaf */
+@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
UNIV_INLINE
-ibool
+bool
page_is_leaf(
/*=========*/
const page_t* page) /*!< in: page */
@@ -277,6 +277,30 @@ page_is_leaf(
}
/************************************************************//**
+Determine whether the page is empty.
+@return true if the page is empty (PAGE_N_RECS = 0) */
+UNIV_INLINE
+bool
+page_is_empty(
+/*==========*/
+ const page_t* page) /*!< in: page */
+{
+ return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_N_RECS)));
+}
+
+/************************************************************//**
+Determine whether the page contains garbage.
+@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
+UNIV_INLINE
+bool
+page_has_garbage(
+/*=============*/
+ const page_t* page) /*!< in: page */
+{
+ return(!!*(const uint16*) (page + (PAGE_HEADER + PAGE_GARBAGE)));
+}
+
+/************************************************************//**
Gets the offset of the first record on the page.
@return offset of the first record in record list, relative from page */
UNIV_INLINE
@@ -805,9 +829,9 @@ UNIV_INLINE
void
page_rec_set_next(
/*==============*/
- rec_t* rec, /*!< in: pointer to record,
+ rec_t* rec, /*!< in: pointer to record,
must not be page supremum */
- rec_t* next) /*!< in: pointer to next record,
+ const rec_t* next) /*!< in: pointer to next record,
must not be page infimum */
{
ulint offs;
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
index 533b0d3cf98..95143a4bb44 100644
--- a/storage/innobase/include/page0types.h
+++ b/storage/innobase/include/page0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,9 +42,7 @@ typedef byte page_t;
struct page_cur_t;
/** Compressed index page */
-typedef byte page_zip_t;
-/** Compressed page descriptor */
-struct page_zip_des_t;
+typedef byte page_zip_t;
/* The following definitions would better belong to page0zip.h,
but we cannot include page0zip.h from rem0rec.ic, because
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index 12781bd61b8..9d3b78ed2fc 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -41,14 +41,14 @@ Created June 2005 by Marko Makela
#include "mem0mem.h"
/* Compression level to be used by zlib. Settable by user. */
-extern ulint page_compression_level;
+extern uint page_zip_level;
/* Default compression level. */
#define DEFAULT_COMPRESSION_LEVEL 6
/* Whether or not to log compressed page images to avoid possible
compression algorithm changes in zlib. */
-extern bool page_log_compressed_pages;
+extern my_bool page_zip_log_pages;
/**********************************************************************//**
Determine the size of a compressed page in bytes.
@@ -125,7 +125,7 @@ page_zip_compress(
m_start, m_end, m_nonempty */
const page_t* page, /*!< in: uncompressed page */
dict_index_t* index, /*!< in: index of the B-tree node */
- ulint level, /*!< in: commpression level */
+ ulint level, /*!< in: compression level */
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
__attribute__((nonnull(1,2,3)));
@@ -495,7 +495,7 @@ page_zip_parse_compress_no_data(
byte* end_ptr, /*!< in: buffer end */
page_t* page, /*!< in: uncompressed page */
page_zip_des_t* page_zip, /*!< out: compressed page */
- dict_index_t* index) /*!< in: index */
+ dict_index_t* index) /*!< in: index */
__attribute__((nonnull(1,2)));
/**********************************************************************//**
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index 0062e1cb39f..6c7d8cd32c7 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -176,7 +176,7 @@ page_zip_rec_needs_ext(
ut_ad(ut_is_2pow(zip_size));
ut_ad(comp || !zip_size);
-#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE
+#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE
if (rec_size >= REC_MAX_DATA_SIZE) {
return(TRUE);
}
@@ -190,8 +190,8 @@ page_zip_rec_needs_ext(
one record on an empty leaf page. Subtract 1 byte for
the encoded heap number. Check also the available space
on the uncompressed page. */
- return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2)
- >= (page_zip_empty_size(n_fields, zip_size) - 1)
+ return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
+ >= page_zip_empty_size(n_fields, zip_size)
|| rec_size >= page_get_free_space_of_empty(TRUE) / 2);
}
@@ -231,9 +231,7 @@ ibool
page_zip_get_trailer_len(
/*=====================*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
- ibool is_clust,/*!< in: TRUE if clustered index */
- ulint* entry_size)/*!< out: size of the uncompressed
- portion of a user record */
+ ibool is_clust)/*!< in: TRUE if clustered index */
{
ulint uncompressed_size;
@@ -252,10 +250,6 @@ page_zip_get_trailer_len(
ut_ad(!page_zip->n_blobs);
}
- if (entry_size) {
- *entry_size = uncompressed_size;
- }
-
return((page_dir_get_n_heap(page_zip->data) - 2)
* uncompressed_size
+ page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
@@ -272,11 +266,9 @@ page_zip_max_ins_size(
const page_zip_des_t* page_zip,/*!< in: compressed page */
ibool is_clust)/*!< in: TRUE if clustered index */
{
- ulint uncompressed_size;
ulint trailer_len;
- trailer_len = page_zip_get_trailer_len(page_zip, is_clust,
- &uncompressed_size);
+ trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
/* When a record is created, a pointer may be added to
the dense directory.
@@ -285,7 +277,7 @@ page_zip_max_ins_size(
Also the BLOB pointers will be allocated from there, but
we may as well count them in the length of the record. */
- trailer_len += uncompressed_size;
+ trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
return((lint) page_zip_get_size(page_zip)
- trailer_len - page_zip->m_end
@@ -305,13 +297,11 @@ page_zip_available(
ulint create) /*!< in: nonzero=add the record to
the heap */
{
- ulint uncompressed_size;
ulint trailer_len;
ut_ad(length > REC_N_NEW_EXTRA_BYTES);
- trailer_len = page_zip_get_trailer_len(page_zip, is_clust,
- &uncompressed_size);
+ trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
/* Subtract the fixed extra bytes and add the maximum
space needed for identifying the record (encoded heap_no). */
@@ -325,7 +315,7 @@ page_zip_available(
Also the BLOB pointers will be allocated from there, but
we may as well count them in the length of the record. */
- trailer_len += uncompressed_size;
+ trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
}
return(length + trailer_len + page_zip->m_end
@@ -422,7 +412,7 @@ page_zip_parse_compress_no_data(
byte* end_ptr, /*!< in: buffer end */
page_t* page, /*!< in: uncompressed page */
page_zip_des_t* page_zip, /*!< out: compressed page */
- dict_index_t* index) /*!< in: index */
+ dict_index_t* index) /*!< in: index */
{
ulint level;
if (end_ptr == ptr) {
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index 18a7deb9d26..a539320dd2a 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -259,7 +259,7 @@ rec_get_next_ptr_const(
}
if (comp) {
-#if UNIV_PAGE_SIZE <= 32768
+#if UNIV_PAGE_SIZE_MAX <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */
@@ -302,7 +302,7 @@ rec_get_next_ptr(
rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
{
- return((rec_t*) rec_get_next_ptr_const(rec, comp));
+ return(const_cast<rec_t*>(rec_get_next_ptr_const(rec, comp)));
}
/******************************************************//**
@@ -327,7 +327,7 @@ rec_get_next_offs(
field_value = mach_read_from_2(rec - REC_NEXT);
if (comp) {
-#if UNIV_PAGE_SIZE <= 32768
+#if UNIV_PAGE_SIZE_MAX <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */
@@ -1508,7 +1508,7 @@ rec_get_end(
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
- return((rec_t*) rec + rec_offs_data_size(offsets));
+ return(const_cast<rec_t*>(rec + rec_offs_data_size(offsets)));
}
/**********************************************************//**
@@ -1522,7 +1522,7 @@ rec_get_start(
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
- return((rec_t*) rec - rec_offs_extra_size(offsets));
+ return(const_cast<rec_t*>(rec - rec_offs_extra_size(offsets)));
}
#endif /* UNIV_DEBUG */
@@ -1541,7 +1541,7 @@ rec_copy(
ulint data_len;
ut_ad(rec && buf);
- ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(rec_validate(rec, offsets));
extra_len = rec_offs_extra_size(offsets);
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
index 984d907d390..41dac63963d 100644
--- a/storage/innobase/include/row0log.h
+++ b/storage/innobase/include/row0log.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -122,6 +122,7 @@ row_log_table_delete(
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ bool purge, /*!< in: true=purging BLOBs */
trx_id_t trx_id) /*!< in: DB_TRX_ID of the record before
it was deleted */
UNIV_COLD __attribute__((nonnull));
@@ -173,28 +174,24 @@ row_log_table_insert(
or X-latched */
const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */
UNIV_COLD __attribute__((nonnull));
-
/******************************************************//**
-Notes that a transaction is being rolled back. */
+Notes that a BLOB is being freed during online ALTER TABLE. */
UNIV_INTERN
void
-row_log_table_rollback(
-/*===================*/
- dict_index_t* index, /*!< in/out: clustered index */
- trx_id_t trx_id) /*!< in: transaction being rolled back */
+row_log_table_blob_free(
+/*====================*/
+ dict_index_t* index, /*!< in/out: clustered index, X-latched */
+ ulint page_no)/*!< in: starting page number of the BLOB */
UNIV_COLD __attribute__((nonnull));
-
/******************************************************//**
-Check if a transaction rollback has been initiated.
-@return true if inserts of this transaction were rolled back */
+Notes that a BLOB is being allocated during online ALTER TABLE. */
UNIV_INTERN
-bool
-row_log_table_is_rollback(
-/*======================*/
- const dict_index_t* index, /*!< in: clustered index */
- trx_id_t trx_id) /*!< in: transaction id */
- __attribute__((nonnull));
-
+void
+row_log_table_blob_alloc(
+/*=====================*/
+ dict_index_t* index, /*!< in/out: clustered index, X-latched */
+ ulint page_no)/*!< in: starting page number of the BLOB */
+ UNIV_COLD __attribute__((nonnull));
/******************************************************//**
Apply the row_log_table log to a table upon completing rebuild.
@return DB_SUCCESS, or error code on failure */
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index f464e46ae5b..390c0ce038b 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -206,14 +206,14 @@ will not be committed.
@return error code or DB_SUCCESS */
UNIV_INTERN
dberr_t
-row_merge_rename_tables(
-/*====================*/
+row_merge_rename_tables_dict(
+/*=========================*/
dict_table_t* old_table, /*!< in/out: old table, renamed to
tmp_name */
dict_table_t* new_table, /*!< in/out: new table, renamed to
old_table->name */
const char* tmp_name, /*!< in: new name for old_table */
- trx_t* trx) /*!< in: transaction handle */
+ trx_t* trx) /*!< in/out: dictionary transaction */
__attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
@@ -265,7 +265,7 @@ row_merge_is_index_usable(
/*********************************************************************//**
Drop a table. The caller must have ensured that the background stats
thread is not processing the table. This can be done by calling
-dict_stats_wait_bg_to_stop_using_tables() after locking the dictionary and
+dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
before calling this function.
@return DB_SUCCESS or error code */
UNIV_INTERN
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 48d4b94dcae..209894833a0 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -1,6 +1,6 @@
/***********************************************************************
-Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it
@@ -98,7 +98,7 @@ enum monitor_type_t {
/** Counter minimum value is initialized to be max value of
mon_type_t (ib_int64_t) */
-#define MIN_RESERVED ((mon_type_t) (IB_ULONGLONG_MAX >> 1))
+#define MIN_RESERVED ((mon_type_t) (IB_UINT64_MAX >> 1))
#define MAX_RESERVED (~MIN_RESERVED)
/** This enumeration defines internal monitor identifier used internally
@@ -169,6 +169,7 @@ enum monitor_id_t {
MONITOR_FLUSH_BATCH_SCANNED,
MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
+ MONITOR_FLUSH_HP_RESCAN,
MONITOR_FLUSH_BATCH_TOTAL_PAGE,
MONITOR_FLUSH_BATCH_COUNT,
MONITOR_FLUSH_BATCH_PAGES,
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index e136f30f96a..40d502f4459 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -53,15 +53,6 @@ srv_parse_data_file_paths_and_sizes(
/*================================*/
char* str); /*!< in/out: the data file path string */
/*********************************************************************//**
-Reads log group home directories from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
- char* str); /*!< in/out: character string */
-/*********************************************************************//**
Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
and srv_parse_log_group_home_dirs(). */
UNIV_INTERN
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index cd1ecc096fd..50da55d2ea3 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -109,7 +109,7 @@ trx_undo_rec_get_pars(
externally stored fild */
undo_no_t* undo_no, /*!< out: undo log record number */
table_id_t* table_id) /*!< out: table id */
- __attribute__((nonnull, warn_unused_result));
+ __attribute__((nonnull));
/*******************************************************************//**
Builds a row reference from an undo log record.
@return pointer to remaining part of undo record */
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index bb84c1806f2..a75d925d4eb 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -190,7 +190,18 @@ UNIV_INTERN
void
trx_commit(
/*=======*/
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull));
+/****************************************************************//**
+Commits a transaction and a mini-transaction. */
+UNIV_INTERN
+void
+trx_commit_low(
+/*===========*/
+ trx_t* trx, /*!< in/out: transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
+ or NULL if trx made no modifications */
+ __attribute__((nonnull(1)));
/****************************************************************//**
Cleans up a transaction at database startup. The cleanup is needed if
the transaction already got to the middle of a commit when the database
@@ -665,7 +676,7 @@ lock_sys->mutex and sometimes by trx->mutex. */
struct trx_t{
ulint magic_n;
- ib_mutex_t mutex; /*!< Mutex protecting the fields
+ ib_mutex_t mutex; /*!< Mutex protecting the fields
state and lock
(except some fields of lock, which
are protected by lock_sys->mutex) */
@@ -823,7 +834,7 @@ struct trx_t{
COMMITTED_IN_MEMORY state.
Protected by trx_sys_t::mutex
when trx->in_rw_trx_list. Initially
- set to IB_ULONGLONG_MAX. */
+ set to TRX_ID_MAX. */
time_t start_time; /*!< time the trx object was created
or the state last time became
@@ -914,7 +925,7 @@ struct trx_t{
trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
oldest first */
/*------------------------------*/
- ib_mutex_t undo_mutex; /*!< mutex protecting the fields in this
+ ib_mutex_t undo_mutex; /*!< mutex protecting the fields in this
section (down to undo_no_arr), EXCEPT
last_sql_stat_start, which can be
accessed only when we know that there
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index 4f515cb5248..7ca95131328 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -119,6 +119,9 @@ typedef ib_id_t roll_ptr_t;
/** Undo number */
typedef ib_id_t undo_no_t;
+/** Maximum transaction identifier */
+#define TRX_ID_MAX IB_ID_MAX
+
/** Transaction savepoint */
struct trx_savept_t{
undo_no_t least_undo_no; /*!< least undo number to undo */
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index 4021d71c68a..61b0dabb1e6 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -175,6 +175,7 @@ trx_undo_get_prev_rec(
trx_undo_rec_t* rec, /*!< in: undo record */
ulint page_no,/*!< in: undo log header page number */
ulint offset, /*!< in: undo log header offset on page */
+ bool shared, /*!< in: true=S-latch, false=X-latch */
mtr_t* mtr); /*!< in: mtr */
/***********************************************************************//**
Gets the next record in an undo log.
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 65f289eda35..a4d292b4f0f 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 5
#define INNODB_VERSION_MINOR 6
-#define INNODB_VERSION_BUGFIX 10
+#define INNODB_VERSION_BUGFIX 14
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
@@ -171,6 +171,7 @@ command. Not tested on Windows. */
#define UNIV_COMPILE_TEST_FUNCS
*/
+#undef UNIV_SYNC_DEBUG
#if defined(HAVE_valgrind)&& defined(HAVE_VALGRIND_MEMCHECK_H)
# define UNIV_DEBUG_VALGRIND
#endif /* HAVE_VALGRIND */
@@ -332,7 +333,7 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift
/** The universal page size of the database */
-#define UNIV_PAGE_SIZE srv_page_size
+#define UNIV_PAGE_SIZE ((ulint) srv_page_size)
/** log2 of smallest compressed page size (1<<10 == 1024 bytes)
Note: This must never change! */
@@ -433,6 +434,7 @@ macro ULINTPF. */
# define INT64PF "%I64d"
# define UINT64PF "%I64u"
# define UINT64PFx "%016I64u"
+# define DBUG_LSN_PF "%llu"
typedef __int64 ib_int64_t;
typedef unsigned __int64 ib_uint64_t;
typedef unsigned __int32 ib_uint32_t;
@@ -442,6 +444,7 @@ typedef unsigned __int32 ib_uint32_t;
# define INT64PF "%"PRId64
# define UINT64PF "%"PRIu64
# define UINT64PFx "%016"PRIx64
+# define DBUG_LSN_PF UINT64PF
typedef int64_t ib_int64_t;
typedef uint64_t ib_uint64_t;
typedef uint32_t ib_uint32_t;
@@ -486,11 +489,11 @@ typedef unsigned long long int ullint;
#define ULINT_MAX ((ulint)(-2))
/** Maximum value for ib_uint64_t */
-#define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL))
-#define IB_UINT64_MAX IB_ULONGLONG_MAX
+#define IB_UINT64_MAX ((ib_uint64_t) (~0ULL))
/** The generic InnoDB system object identifier data type */
-typedef ib_uint64_t ib_id_t;
+typedef ib_uint64_t ib_id_t;
+#define IB_ID_MAX IB_UINT64_MAX
/** The 'undefined' value for a ullint */
#define ULLINT_UNDEFINED ((ullint)(-1))
@@ -633,6 +636,10 @@ typedef void* os_thread_ret_t;
(const void*) (addr), (unsigned) (size), (long) \
(((const char*) _p) - ((const char*) (addr)))); \
} while (0)
+# define UNIV_MEM_TRASH(addr, c, size) do { \
+ ut_d(memset(addr, c, size)); \
+ UNIV_MEM_INVALID(addr, size); \
+ } while (0)
#else
# define UNIV_MEM_VALID(addr, size) do {} while(0)
# define UNIV_MEM_INVALID(addr, size) do {} while(0)
@@ -644,6 +651,7 @@ typedef void* os_thread_ret_t;
# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) do {} while(0)
# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
+# define UNIV_MEM_TRASH(addr, c, size) do {} while(0)
#endif
#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \
UNIV_MEM_ASSERT_W(addr, size); \
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index 0f2da165da7..6a4afe99597 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -61,49 +61,8 @@ ut_dbg_assertion_failed(
ulint line) /*!< in: line number of the assertion */
UNIV_COLD __attribute__((nonnull(2)));
-
-#define UT_DBG_USE_ABORT
-
-
-#ifndef UT_DBG_USE_ABORT
-/** A null pointer that will be dereferenced to trigger a memory trap */
-extern ulint* ut_dbg_null_ptr;
-#endif
-
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
-will stop at the next ut_a() or ut_ad(). */
-extern ibool ut_dbg_stop_threads;
-
-/*************************************************************//**
-Stop a thread after assertion failure. */
-UNIV_INTERN
-void
-ut_dbg_stop_thread(
-/*===============*/
- const char* file,
- ulint line);
-#endif
-
-#ifdef UT_DBG_USE_ABORT
/** Abort the execution. */
-#ifdef _WIN32
-# define UT_DBG_PANIC __debugbreak()
-#else
# define UT_DBG_PANIC abort()
-#endif
-/** Stop threads (null operation) */
-# define UT_DBG_STOP do {} while (0)
-#else /* UT_DBG_USE_ABORT */
-/** Abort the execution. */
-# define UT_DBG_PANIC \
- if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL
-/** Stop threads in ut_a(). */
-# define UT_DBG_STOP do \
- if (UNIV_UNLIKELY(ut_dbg_stop_threads)) { \
- ut_dbg_stop_thread(__FILE__, (ulint) __LINE__); \
- } while (0)
-#endif /* UT_DBG_USE_ABORT */
/** Abort execution if EXPR does not evaluate to nonzero.
@param EXPR assertion expression that should hold */
@@ -113,7 +72,6 @@ ut_dbg_stop_thread(
__FILE__, (ulint) __LINE__); \
UT_DBG_PANIC; \
} \
- UT_DBG_STOP; \
} while (0)
/** Abort execution. */
diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h
index e0593e99bde..59e3fc94598 100644
--- a/storage/innobase/include/ut0rbt.h
+++ b/storage/innobase/include/ut0rbt.h
@@ -136,7 +136,7 @@ rbt_create_arg_cmp(
size_t sizeof_value, /*!< in: size in bytes */
ib_rbt_arg_compare
compare, /*!< in: comparator */
- void* cmp_arg); /*!< in: compare fn arg */
+ void* cmp_arg); /*!< in: compare fn arg */
/**********************************************************************//**
Delete a node from the red black tree, identified by key */
UNIV_INTERN
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 1152152cc77..be51c395aae 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -48,6 +48,7 @@ Created 5/7/1996 Heikki Tuuri
#include "ut0vec.h"
#include "btr0btr.h"
#include "dict0boot.h"
+#include <set>
/* Restricts the length of search we will do in the waits-for
graph of transactions */
@@ -369,7 +370,7 @@ struct lock_deadlock_ctx_t {
struct lock_stack_t {
const lock_t* lock; /*!< Current lock */
const lock_t* wait_lock; /*!< Waiting for lock */
- unsigned heap_no:16; /*!< heap number if rec lock */
+ ulint heap_no; /*!< heap number if rec lock */
};
/** Stack to use during DFS search. Currently only a single stack is required
@@ -1498,6 +1499,7 @@ lock_rec_has_expl(
lock = lock_rec_get_next(heap_no, lock)) {
if (lock->trx == trx
+ && !lock_rec_get_insert_intention(lock)
&& !lock_is_wait_not_by_other(lock->type_mode)
&& lock_mode_stronger_or_eq(
lock_get_mode(lock),
@@ -1508,8 +1510,7 @@ lock_rec_has_expl(
|| heap_no == PAGE_HEAP_NO_SUPREMUM)
&& (!lock_rec_get_gap(lock)
|| (precise_mode & LOCK_GAP)
- || heap_no == PAGE_HEAP_NO_SUPREMUM)
- && (!lock_rec_get_insert_intention(lock))) {
+ || heap_no == PAGE_HEAP_NO_SUPREMUM)) {
return(lock);
}
@@ -1860,6 +1861,7 @@ lock_rec_enqueue_waiting(
trx_id_t victim_trx_id;
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
trx = thr_get_trx(thr);
@@ -2113,6 +2115,8 @@ lock_rec_lock_fast(
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+ DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
+
lock = lock_rec_get_first_on_page(block);
trx = thr_get_trx(thr);
@@ -2190,8 +2194,9 @@ lock_rec_lock_slow(
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
- trx = thr_get_trx(thr);
+ DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
+ trx = thr_get_trx(thr);
trx_mutex_enter(trx);
lock = lock_rec_has_expl(mode, block, heap_no, trx);
@@ -3599,16 +3604,14 @@ lock_get_next_lock(
} else {
ut_ad(heap_no == ULINT_UNDEFINED);
ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
- }
- if (lock == NULL) {
- return(NULL);
+ lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
}
+ } while (lock != NULL
+ && lock->trx->lock.deadlock_mark > ctx->mark_start);
- } while (lock->trx->lock.deadlock_mark > ctx->mark_start);
-
- ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
+ ut_ad(lock == NULL
+ || lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
return(lock);
}
@@ -3643,20 +3646,20 @@ lock_get_first_lock(
lock = lock_rec_get_first_on_page_addr(
lock->un_member.rec_lock.space,
lock->un_member.rec_lock.page_no);
+
+ /* Position on the first lock on the physical record. */
+ if (!lock_rec_get_nth_bit(lock, *heap_no)) {
+ lock = lock_rec_get_next_const(*heap_no, lock);
+ }
+
} else {
*heap_no = ULINT_UNDEFINED;
ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
}
- ut_ad(lock != NULL);
-
- /* Skip sub-trees that have already been searched. */
-
- if (lock->trx->lock.deadlock_mark > ctx->mark_start) {
- return(lock_get_next_lock(ctx, lock, *heap_no));
- }
-
+ ut_a(lock != NULL);
+ ut_a(lock != ctx->wait_lock);
ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
return(lock);
@@ -3735,44 +3738,6 @@ lock_deadlock_select_victim(
}
/********************************************************************//**
-Check whether the current waiting lock in the context has to wait for
-the given lock that is ahead in the queue.
-@return lock instance that could cause potential deadlock. */
-static
-const lock_t*
-lock_deadlock_check(
-/*================*/
- const lock_deadlock_ctx_t* ctx, /*!< in: deadlock context */
- const lock_t* lock) /*!< in: lock to check */
-{
- ut_ad(lock_mutex_own());
-
- /* If it is the joining transaction wait lock or the joining
- transaction was granted its lock due to deadlock detection. */
- if (lock == ctx->start->lock.wait_lock
- || ctx->start->lock.wait_lock == NULL) {
- ; /* Skip */
- } else if (lock == ctx->wait_lock) {
-
- /* We can mark this subtree as searched */
- ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start);
- lock->trx->lock.deadlock_mark = ++lock_mark_counter;
-
- /* We are not prepared for an overflow. This 64-bit
- counter should never wrap around. At 10^9 increments
- per second, it would take 10^3 years of uptime. */
-
- ut_ad(lock_mark_counter > 0);
-
- } else if (lock_has_to_wait(ctx->wait_lock, lock)) {
-
- return(lock);
- }
-
- return(NULL);
-}
-
-/********************************************************************//**
Pop the deadlock search state from the stack.
@return stack slot instance that was on top of the stack. */
static
@@ -3781,23 +3746,11 @@ lock_deadlock_pop(
/*==============*/
lock_deadlock_ctx_t* ctx) /*!< in/out: context */
{
- const lock_stack_t* stack;
- const trx_lock_t* trx_lock;
-
ut_ad(lock_mutex_own());
ut_ad(ctx->depth > 0);
- do {
- /* Restore search state. */
-
- stack = &lock_stack[--ctx->depth];
- trx_lock = &stack->lock->trx->lock;
-
- /* Skip sub-trees that have already been searched. */
- } while (ctx->depth > 0 && trx_lock->deadlock_mark > ctx->mark_start);
-
- return(ctx->depth == 0) ? NULL : stack;
+ return(&lock_stack[--ctx->depth]);
}
/********************************************************************//**
@@ -3853,23 +3806,54 @@ lock_deadlock_search(
/* Look at the locks ahead of wait_lock in the lock queue. */
lock = lock_get_first_lock(ctx, &heap_no);
- do {
+
+ for (;;) {
+
/* We should never visit the same sub-tree more than once. */
- ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start);
+ ut_ad(lock == NULL
+ || lock->trx->lock.deadlock_mark <= ctx->mark_start);
- ++ctx->cost;
+ while (ctx->depth > 0 && lock == NULL) {
+ const lock_stack_t* stack;
+
+ /* Restore previous search state. */
- if (lock_deadlock_check(ctx, lock) == NULL) {
+ stack = lock_deadlock_pop(ctx);
- /* No conflict found, skip this lock. */
+ lock = stack->lock;
+ heap_no = stack->heap_no;
+ ctx->wait_lock = stack->wait_lock;
+
+ lock = lock_get_next_lock(ctx, lock, heap_no);
+ }
+
+ if (lock == NULL) {
+ break;
+ } else if (lock == ctx->wait_lock) {
+
+ /* We can mark this subtree as searched */
+ ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start);
+
+ lock->trx->lock.deadlock_mark = ++lock_mark_counter;
+
+ /* We are not prepared for an overflow. This 64-bit
+ counter should never wrap around. At 10^9 increments
+ per second, it would take 10^3 years of uptime. */
+
+ ut_ad(lock_mark_counter > 0);
+
+ lock = NULL;
+
+ } else if (!lock_has_to_wait(ctx->wait_lock, lock)) {
+
+ /* No conflict, next lock */
+ lock = lock_get_next_lock(ctx, lock, heap_no);
} else if (lock->trx == ctx->start) {
/* Found a cycle. */
- if (!srv_read_only_mode) {
- lock_deadlock_notify(ctx, lock);
- }
+ lock_deadlock_notify(ctx, lock);
return(lock_deadlock_select_victim(ctx)->id);
@@ -3887,6 +3871,8 @@ lock_deadlock_search(
/* Another trx ahead has requested a lock in an
incompatible mode, and is itself waiting for a lock. */
+ ++ctx->cost;
+
/* Save current search state. */
if (!lock_deadlock_push(ctx, lock, heap_no)) {
@@ -3901,31 +3887,17 @@ lock_deadlock_search(
ctx->wait_lock = lock->trx->lock.wait_lock;
lock = lock_get_first_lock(ctx, &heap_no);
- if (lock != NULL) {
- continue;
+ if (lock->trx->lock.deadlock_mark > ctx->mark_start) {
+ lock = lock_get_next_lock(ctx, lock, heap_no);
}
- }
- if (lock != NULL) {
+ } else {
lock = lock_get_next_lock(ctx, lock, heap_no);
}
+ }
- if (lock == NULL && ctx->depth > 0) {
- const lock_stack_t* stack;
-
- /* Restore previous search state. */
-
- stack = lock_deadlock_pop(ctx);
-
- if (stack != NULL) {
- lock = stack->lock;
- heap_no = stack->heap_no;
- ctx->wait_lock = stack->wait_lock;
- }
- }
-
- } while (lock != NULL || ctx->depth > 0);
-
+ ut_a(lock == NULL && ctx->depth == 0);
+
/* No deadlock found. */
return(0);
}
@@ -4278,6 +4250,7 @@ lock_table_enqueue_waiting(
trx_id_t victim_trx_id;
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
trx = thr_get_trx(thr);
ut_ad(trx_mutex_own(trx));
@@ -4461,6 +4434,35 @@ lock_table(
}
/*********************************************************************//**
+Creates a table IX lock object for a resurrected transaction. */
+UNIV_INTERN
+void
+lock_table_ix_resurrect(
+/*====================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(trx->is_recovered);
+
+ if (lock_table_has(trx, table, LOCK_IX)) {
+ return;
+ }
+
+ lock_mutex_enter();
+
+ /* We have to check if the new lock is compatible with any locks
+ other transactions have in the table lock queue. */
+
+ ut_ad(!lock_table_other_has_incompatible(
+ trx, LOCK_WAIT, table, LOCK_IX));
+
+ trx_mutex_enter(trx);
+ lock_table_create(table, LOCK_IX, trx);
+ lock_mutex_exit();
+ trx_mutex_exit(trx);
+}
+
+/*********************************************************************//**
Checks if a waiting table lock request still has to wait in a queue.
@return TRUE if still has to wait */
static
@@ -4853,15 +4855,21 @@ lock_remove_recovered_trx_record_locks(
ut_a(!lock_get_wait(lock));
- /* Recovered transactions don't have any
- table level locks. */
-
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
- if (lock->index->table == table) {
- lock_rec_discard(lock);
+ switch (lock_get_type_low(lock)) {
+ default:
+ ut_error;
+ case LOCK_TABLE:
+ if (lock->un_member.tab_lock.table == table) {
+ lock_trx_table_locks_remove(lock);
+ lock_table_remove_low(lock);
+ }
+ break;
+ case LOCK_REC:
+ if (lock->index->table == table) {
+ lock_rec_discard(lock);
+ }
}
}
@@ -5820,8 +5828,11 @@ bool
lock_validate()
/*===========*/
{
- lock_mutex_enter();
+ typedef std::pair<ulint, ulint> page_addr_t;
+ typedef std::set<page_addr_t> page_addr_set;
+ page_addr_set pages;
+ lock_mutex_enter();
mutex_enter(&trx_sys->mutex);
ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
@@ -5840,20 +5851,19 @@ lock_validate()
ulint space = lock->un_member.rec_lock.space;
ulint page_no = lock->un_member.rec_lock.page_no;
- lock_mutex_exit();
- mutex_exit(&trx_sys->mutex);
-
- lock_rec_block_validate(space, page_no);
-
- lock_mutex_enter();
- mutex_enter(&trx_sys->mutex);
+ pages.insert(std::make_pair(space, page_no));
}
}
mutex_exit(&trx_sys->mutex);
-
lock_mutex_exit();
+ for (page_addr_set::const_iterator it = pages.begin();
+ it != pages.end();
+ ++it) {
+ lock_rec_block_validate((*it).first, (*it).second);
+ }
+
return(true);
}
#endif /* UNIV_DEBUG */
@@ -7053,4 +7063,26 @@ lock_trx_has_sys_table_locks(
return(strongest_lock);
}
+
+/*******************************************************************//**
+Check if the transaction holds an exclusive lock on a record.
+@return whether the locks are held */
+UNIV_INTERN
+bool
+lock_trx_has_rec_x_lock(
+/*====================*/
+ const trx_t* trx, /*!< in: transaction to check */
+ const dict_table_t* table, /*!< in: table to check */
+ const buf_block_t* block, /*!< in: buffer block of the record */
+ ulint heap_no)/*!< in: record heap number */
+{
+ ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
+
+ lock_mutex_enter();
+ ut_a(lock_table_has(trx, table, LOCK_IX));
+ ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, trx));
+ lock_mutex_exit();
+ return(true);
+}
#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
index fc355d8bb6d..a1c35e20ead 100644
--- a/storage/innobase/lock/lock0wait.cc
+++ b/storage/innobase/lock/lock0wait.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -266,6 +266,16 @@ lock_wait_suspend_thread(
lock_wait_mutex_exit();
trx_mutex_exit(trx);
+ ulint lock_type = ULINT_UNDEFINED;
+
+ lock_mutex_enter();
+
+ if (const lock_t* wait_lock = trx->lock.wait_lock) {
+ lock_type = lock_get_type_low(wait_lock);
+ }
+
+ lock_mutex_exit();
+
had_dict_lock = trx->dict_operation_lock_mode;
switch (had_dict_lock) {
@@ -301,8 +311,18 @@ lock_wait_suspend_thread(
srv_conc_force_exit_innodb(trx);
}
+ /* Unknown is also treated like a record lock */
+ if (lock_type == ULINT_UNDEFINED || lock_type == LOCK_REC) {
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK);
+ } else {
+ ut_ad(lock_type == LOCK_TABLE);
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_TABLE_LOCK);
+ }
+
os_event_wait(slot->event);
+ thd_wait_end(trx->mysql_thd);
+
/* After resuming, reacquire the data dictionary latch if
necessary. */
@@ -333,7 +353,8 @@ lock_wait_suspend_thread(
finish_time = (ib_int64_t) sec * 1000000 + ms;
}
- diff_time = (ulint) (finish_time - start_time);
+ diff_time = (finish_time > start_time) ?
+ (ulint) (finish_time - start_time) : 0;
srv_stats.n_lock_wait_current_count.dec();
srv_stats.n_lock_wait_time.add(diff_time);
@@ -346,6 +367,10 @@ lock_wait_suspend_thread(
lock_sys->n_lock_max_wait_time = diff_time;
}
+
+ /* Record the lock wait time for this thread */
+ thd_set_lock_wait_time(trx->mysql_thd, diff_time);
+
}
if (lock_wait_timeout < 100000000
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index b6909f4771a..a90d49becdf 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -852,7 +852,7 @@ log_init(void)
recv_sys->scanned_lsn = log_sys->lsn;
recv_sys->scanned_checkpoint_no = 0;
recv_sys->recovered_lsn = log_sys->lsn;
- recv_sys->limit_lsn = IB_ULONGLONG_MAX;
+ recv_sys->limit_lsn = LSN_MAX;
#endif
}
@@ -1161,7 +1161,7 @@ log_group_file_header_flush(
srv_stats.os_log_pending_writes.inc();
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
(ulint) (dest_offset / UNIV_PAGE_SIZE),
(ulint) (dest_offset % UNIV_PAGE_SIZE),
OS_FILE_LOG_BLOCK_SIZE,
@@ -1290,7 +1290,7 @@ loop:
ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
(ulint) (next_offset / UNIV_PAGE_SIZE),
(ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
group);
@@ -1323,7 +1323,7 @@ log_write_up_to(
/*============*/
lsn_t lsn, /*!< in: log sequence number up to which
the log should be written,
- IB_ULONGLONG_MAX if not specified */
+ LSN_MAX if not specified */
ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
or LOG_WAIT_ALL_GROUPS */
ibool flush_to_disk)
@@ -1787,7 +1787,7 @@ log_group_checkpoint(
#ifdef UNIV_LOG_ARCHIVE
if (log_sys->archiving_state == LOG_ARCH_OFF) {
- archived_lsn = IB_ULONGLONG_MAX;
+ archived_lsn = LSN_MAX;
} else {
archived_lsn = log_sys->archived_lsn;
@@ -1799,7 +1799,7 @@ log_group_checkpoint(
mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
#else /* UNIV_LOG_ARCHIVE */
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
+ mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
#endif /* UNIV_LOG_ARCHIVE */
for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
@@ -1855,7 +1855,7 @@ log_group_checkpoint(
added with 1, as we want to distinguish between a normal log
file write and a checkpoint field write */
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, 0,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->space_id, 0,
write_offset / UNIV_PAGE_SIZE,
write_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
@@ -1906,7 +1906,7 @@ log_reset_first_header_and_checkpoint(
mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
+ mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
@@ -1937,7 +1937,7 @@ log_group_read_checkpoint_info(
MONITOR_INC(MONITOR_LOG_IO);
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 0,
+ fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
}
@@ -2070,7 +2070,7 @@ void
log_make_checkpoint_at(
/*===================*/
lsn_t lsn, /*!< in: make a checkpoint at this or a
- later lsn, if IB_ULONGLONG_MAX, makes
+ later lsn, if LSN_MAX, makes
a checkpoint at the latest lsn */
ibool write_always) /*!< in: the function normally checks if
the new checkpoint would have a
@@ -2196,7 +2196,7 @@ log_group_read_log_seg(
{
ulint len;
lsn_t source_offset;
- ibool sync;
+ bool sync;
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -2294,7 +2294,7 @@ log_group_archive_file_header_write(
MONITOR_INC(MONITOR_LOG_IO);
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
dest_offset / UNIV_PAGE_SIZE,
dest_offset % UNIV_PAGE_SIZE,
2 * OS_FILE_LOG_BLOCK_SIZE,
@@ -2329,7 +2329,7 @@ log_group_archive_completed_header_write(
MONITOR_INC(MONITOR_LOG_IO);
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
dest_offset / UNIV_PAGE_SIZE,
dest_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
@@ -2458,7 +2458,7 @@ loop:
MONITOR_INC(MONITOR_LOG_IO);
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id,
(ulint) (next_offset / UNIV_PAGE_SIZE),
(ulint) (next_offset % UNIV_PAGE_SIZE),
ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 8cefa9e4b70..e7a643b516a 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -1689,17 +1689,13 @@ recv_recover_page_func(
start_lsn = recv->start_lsn;
}
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Applying log rec"
- " type %lu len %lu"
- " to space %lu page no %lu\n",
- (ulong) recv->type, (ulong) recv->len,
- (ulong) recv_addr->space,
- (ulong) recv_addr->page_no);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_log",
+ ("apply " DBUG_LSN_PF ": %u len %u "
+ "page %u:%u", recv->start_lsn,
+ (unsigned) recv->type,
+ (unsigned) recv->len,
+ (unsigned) recv_addr->space,
+ (unsigned) recv_addr->page_no));
recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len,
@@ -1734,19 +1730,6 @@ recv_recover_page_func(
}
#endif /* UNIV_ZIP_DEBUG */
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_max_page_lsn < page_lsn) {
- recv_max_page_lsn = page_lsn;
- }
-
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
-
- mutex_exit(&(recv_sys->mutex));
-
#ifndef UNIV_HOTBACKUP
if (modification_to_page) {
ut_a(block);
@@ -1763,6 +1746,20 @@ recv_recover_page_func(
mtr.modifications = FALSE;
mtr_commit(&mtr);
+
+ mutex_enter(&(recv_sys->mutex));
+
+ if (recv_max_page_lsn < page_lsn) {
+ recv_max_page_lsn = page_lsn;
+ }
+
+ recv_addr->state = RECV_PROCESSED;
+
+ ut_a(recv_sys->n_addrs);
+ recv_sys->n_addrs--;
+
+ mutex_exit(&(recv_sys->mutex));
+
}
#ifndef UNIV_HOTBACKUP
@@ -2060,7 +2057,7 @@ recv_apply_log_recs_for_backup(void)
fil0fil.cc routines */
if (zip_size) {
- error = fil_io(OS_FILE_READ, TRUE,
+ error = fil_io(OS_FILE_READ, true,
recv_addr->space, zip_size,
recv_addr->page_no, 0, zip_size,
block->page.zip.data, NULL);
@@ -2069,7 +2066,7 @@ recv_apply_log_recs_for_backup(void)
exit(1);
}
} else {
- error = fil_io(OS_FILE_READ, TRUE,
+ error = fil_io(OS_FILE_READ, true,
recv_addr->space, 0,
recv_addr->page_no, 0,
UNIV_PAGE_SIZE,
@@ -2098,13 +2095,13 @@ recv_apply_log_recs_for_backup(void)
mach_read_from_8(block->frame + FIL_PAGE_LSN));
if (zip_size) {
- error = fil_io(OS_FILE_WRITE, TRUE,
+ error = fil_io(OS_FILE_WRITE, true,
recv_addr->space, zip_size,
recv_addr->page_no, 0,
zip_size,
block->page.zip.data, NULL);
} else {
- error = fil_io(OS_FILE_WRITE, TRUE,
+ error = fil_io(OS_FILE_WRITE, true,
recv_addr->space, 0,
recv_addr->page_no, 0,
UNIV_PAGE_SIZE,
@@ -2387,15 +2384,11 @@ loop:
recv_sys->recovered_offset += len;
recv_sys->recovered_lsn = new_recovered_lsn;
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Parsed a single log rec"
- " type %lu len %lu space %lu page no %lu\n",
- (ulong) type, (ulong) len, (ulong) space,
- (ulong) page_no);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_log",
+ ("scan " DBUG_LSN_PF ": log rec %u len %u "
+ "page %u:%u", old_lsn,
+ (unsigned) type, (unsigned) len,
+ (unsigned) space, (unsigned) page_no));
if (type == MLOG_DUMMY_RECORD) {
/* Do nothing */
@@ -2482,16 +2475,12 @@ loop:
}
#endif /* UNIV_LOG_DEBUG */
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Parsed a multi log rec"
- " type %lu len %lu"
- " space %lu page no %lu\n",
- (ulong) type, (ulong) len,
- (ulong) space, (ulong) page_no);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_log",
+ ("scan " DBUG_LSN_PF ": multi-log rec %u "
+ "len %u page %u:%u",
+ recv_sys->recovered_lsn,
+ (unsigned) type, (unsigned) len,
+ (unsigned) space, (unsigned) page_no));
total_len += len;
n_recs++;
@@ -2980,6 +2969,11 @@ recv_init_crash_recovery(void)
"from the doublewrite buffer...");
buf_dblwr_init_or_restore_pages(TRUE);
+
+ /* Spawn the background thread to flush dirty pages
+ from the buffer pools. */
+ recv_writer_thread_handle = os_thread_create(
+ recv_writer_thread, 0, 0);
}
}
@@ -3072,7 +3066,7 @@ recv_recovery_from_checkpoint_start_func(
/* Read the first log file header to print a note if this is
a recovery from a restored InnoDB Hot Backup */
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
+ fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
0, 0, LOG_FILE_HDR_SIZE,
log_hdr_buf, max_cp_group);
@@ -3102,7 +3096,7 @@ recv_recovery_from_checkpoint_start_func(
memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
' ', 4);
/* Write to the log file to wipe over the label */
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
max_cp_group->space_id, 0,
0, 0, OS_FILE_LOG_BLOCK_SIZE,
log_hdr_buf, max_cp_group);
@@ -3266,19 +3260,9 @@ recv_recovery_from_checkpoint_start_func(
}
}
- if (!srv_read_only_mode) {
- if (recv_needed_recovery) {
- /* Spawn the background thread to
- flush dirty pages from the buffer
- pools. */
- recv_writer_thread_handle =
- os_thread_create(
- recv_writer_thread, 0, 0);
- } else {
- /* Init the doublewrite buffer memory
- structure */
- buf_dblwr_init_or_restore_pages(FALSE);
- }
+ if (!recv_needed_recovery && !srv_read_only_mode) {
+ /* Init the doublewrite buffer memory structure */
+ buf_dblwr_init_or_restore_pages(FALSE);
}
}
@@ -3347,7 +3331,7 @@ recv_recovery_from_checkpoint_start_func(
log_sys->next_checkpoint_no = checkpoint_no + 1;
#ifdef UNIV_LOG_ARCHIVE
- if (archived_lsn == IB_ULONGLONG_MAX) {
+ if (archived_lsn == LSN_MAX) {
log_sys->archiving_state = LOG_ARCH_OFF;
}
@@ -3387,12 +3371,7 @@ recv_recovery_from_checkpoint_finish(void)
recv_apply_hashed_log_recs(TRUE);
}
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Log records applied to the database\n");
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_log", ("apply completed"));
if (recv_needed_recovery) {
trx_sys_print_mysql_master_log_pos();
@@ -3753,7 +3732,7 @@ ask_again:
#endif
/* Read the archive file header */
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
+ fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, 0,
LOG_FILE_HDR_SIZE, buf, NULL);
/* Check if the archive file header is consistent */
@@ -3825,7 +3804,7 @@ ask_again:
}
#endif /* UNIV_DEBUG */
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
+ fil_io(OS_FILE_READ | OS_FILE_LOG, true,
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
@@ -3951,8 +3930,8 @@ recv_recovery_from_archive_start(
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
limit_lsn,
- IB_ULONGLONG_MAX,
- IB_ULONGLONG_MAX);
+ LSN_MAX,
+ LSN_MAX);
if (err != DB_SUCCESS) {
return(err);
@@ -3961,7 +3940,7 @@ recv_recovery_from_archive_start(
mutex_enter(&(log_sys->mutex));
}
- if (limit_lsn != IB_ULONGLONG_MAX) {
+ if (limit_lsn != LSN_MAX) {
recv_apply_hashed_log_recs(FALSE);
diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc
index 33060f22c6a..e0e6220f4d8 100644
--- a/storage/innobase/mem/mem0mem.cc
+++ b/storage/innobase/mem/mem0mem.cc
@@ -354,7 +354,11 @@ mem_heap_create_block(
block = (mem_block_t*) buf_block->frame;
}
- ut_ad(block);
+ if(!block) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ " InnoDB: Unable to allocate memory of size %lu.\n",
+ len);
+ }
block->buf_block = buf_block;
block->free_block = NULL;
#else /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 10b4686b720..cb7fd244172 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -58,78 +58,81 @@ mtr_block_dirtied(
/*****************************************************************//**
Releases the item in the slot given. */
-static
+static __attribute__((nonnull))
void
-mtr_memo_slot_release(
-/*==================*/
- mtr_t* mtr, /*!< in: mtr */
+mtr_memo_slot_release_func(
+/*=======================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+#endif /* UNIV_DEBUG */
mtr_memo_slot_t* slot) /*!< in: memo slot */
{
- void* object;
- ulint type;
-
- ut_ad(mtr);
- ut_ad(slot);
+ void* object = slot->object;
+ slot->object = NULL;
/* slot release is a local operation for the current mtr.
We must not be holding the flush_order mutex while
doing this. */
ut_ad(!log_flush_order_mutex_own());
-#ifndef UNIV_DEBUG
- UT_NOT_USED(mtr);
-#endif /* UNIV_DEBUG */
-
- object = slot->object;
- type = slot->type;
- if (UNIV_LIKELY(object != NULL)) {
- if (type <= MTR_MEMO_BUF_FIX) {
- buf_page_release((buf_block_t*) object, type);
- } else if (type == MTR_MEMO_S_LOCK) {
- rw_lock_s_unlock((rw_lock_t*) object);
+ switch (slot->type) {
+ case MTR_MEMO_PAGE_S_FIX:
+ case MTR_MEMO_PAGE_X_FIX:
+ case MTR_MEMO_BUF_FIX:
+ buf_page_release((buf_block_t*) object, slot->type);
+ break;
+ case MTR_MEMO_S_LOCK:
+ rw_lock_s_unlock((rw_lock_t*) object);
+ break;
+ case MTR_MEMO_X_LOCK:
+ rw_lock_x_unlock((rw_lock_t*) object);
+ break;
#ifdef UNIV_DEBUG
- } else if (type != MTR_MEMO_X_LOCK) {
- ut_ad(type == MTR_MEMO_MODIFY);
- ut_ad(mtr_memo_contains(mtr, object,
- MTR_MEMO_PAGE_X_FIX));
+ default:
+ ut_ad(slot->type == MTR_MEMO_MODIFY);
+ ut_ad(mtr_memo_contains(mtr, object, MTR_MEMO_PAGE_X_FIX));
#endif /* UNIV_DEBUG */
- } else {
- rw_lock_x_unlock((rw_lock_t*) object);
- }
}
-
- slot->object = NULL;
}
+#ifdef UNIV_DEBUG
+# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(mtr, slot)
+#else /* UNIV_DEBUG */
+# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(slot)
+#endif /* UNIV_DEBUG */
+
/**********************************************************//**
Releases the mlocks and other objects stored in an mtr memo.
They are released in the order opposite to which they were pushed
to the memo. */
-static
+static __attribute__((nonnull))
void
mtr_memo_pop_all(
/*=============*/
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
commit */
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
-
- while (offset > 0) {
- offset -= sizeof(mtr_memo_slot_t);
-
- slot = static_cast<mtr_memo_slot_t*>(
- dyn_array_get_element(memo, offset));
- mtr_memo_slot_release(mtr, slot);
+ for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
+ block;
+ block = dyn_array_get_prev_block(&mtr->memo, block)) {
+ const mtr_memo_slot_t* start
+ = reinterpret_cast<mtr_memo_slot_t*>(
+ dyn_block_get_data(block));
+ mtr_memo_slot_t* slot
+ = reinterpret_cast<mtr_memo_slot_t*>(
+ dyn_block_get_data(block)
+ + dyn_block_get_used(block));
+
+ ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
+
+ while (slot-- != start) {
+ if (slot->object != NULL) {
+ mtr_memo_slot_release(mtr, slot);
+ }
+ }
}
}
@@ -339,46 +342,44 @@ mtr_commit(
#ifndef UNIV_HOTBACKUP
/***************************************************//**
-Releases an object in the memo stack. */
+Releases an object in the memo stack.
+@return true if released */
UNIV_INTERN
-void
+bool
mtr_memo_release(
/*=============*/
- mtr_t* mtr, /*!< in: mtr */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
void* object, /*!< in: object */
ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
-
- while (offset > 0) {
- offset -= sizeof(mtr_memo_slot_t);
-
- slot = static_cast<mtr_memo_slot_t*>(
- dyn_array_get_element(memo, offset));
-
- if (object == slot->object && type == slot->type) {
-
- /* We cannot release a page that has been written
- to in the middle of a mini-transaction. */
-
- ut_ad(!(mtr->modifications
- && slot->type == MTR_MEMO_PAGE_X_FIX));
-
- mtr_memo_slot_release(mtr, slot);
-
- break;
+ /* We cannot release a page that has been written to in the
+ middle of a mini-transaction. */
+ ut_ad(!mtr->modifications || type != MTR_MEMO_PAGE_X_FIX);
+
+ for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
+ block;
+ block = dyn_array_get_prev_block(&mtr->memo, block)) {
+ const mtr_memo_slot_t* start
+ = reinterpret_cast<mtr_memo_slot_t*>(
+ dyn_block_get_data(block));
+ mtr_memo_slot_t* slot
+ = reinterpret_cast<mtr_memo_slot_t*>(
+ dyn_block_get_data(block)
+ + dyn_block_get_used(block));
+
+ ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
+
+ while (slot-- != start) {
+ if (object == slot->object && type == slot->type) {
+ mtr_memo_slot_release(mtr, slot);
+ return(true);
+ }
}
}
+
+ return(false);
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index d1b2b12bf59..d9c6e43ee14 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -1,6 +1,6 @@
/***********************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
@@ -640,26 +640,13 @@ os_file_handle_error_cond_exit(
to the log. */
if (should_exit || !on_error_silent) {
- if (name) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: File name %s\n", name);
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: File operation call: "
- "'%s' returned OS error " ULINTPF ".\n",
- operation, err);
+ ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS "
+ "error " ULINTPF ".%s", name ? name : "(unknown)",
+ operation, err, should_exit
+ ? " Cannot continue operation" : "");
}
if (should_exit) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Cannot continue "
- "operation.\n");
-
- fflush(stderr);
-
- ut_ad(0); /* Report call stack, etc only in debug code. */
exit(1);
}
}
@@ -1131,6 +1118,7 @@ os_file_create_simple_func(
os_file_t file;
ibool retry;
+ *success = FALSE;
#ifdef __WIN__
DWORD access;
DWORD create_flag;
@@ -1325,6 +1313,7 @@ os_file_create_simple_no_error_handling_func(
{
os_file_t file;
+ *success = FALSE;
#ifdef __WIN__
DWORD access;
DWORD create_flag;
@@ -1800,8 +1789,8 @@ Deletes a file if it exists. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
bool
-os_file_delete_if_exists(
-/*=====================*/
+os_file_delete_if_exists_func(
+/*==========================*/
const char* name) /*!< in: file path as a null-terminated
string */
{
@@ -1862,8 +1851,8 @@ Deletes a file. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
bool
-os_file_delete(
-/*===========*/
+os_file_delete_func(
+/*================*/
const char* name) /*!< in: file path as a null-terminated
string */
{
@@ -3358,8 +3347,8 @@ os_file_make_data_dir_path(
The function os_file_dirname returns a directory component of a
null-terminated pathname string. In the usual case, dirname returns
the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' charac­
-ters are not counted as part of the pathname.
+is the component following the final '/'. Trailing '/' characters
+are not counted as part of the pathname.
If path does not contain a slash, dirname returns the string ".".
@@ -4085,7 +4074,7 @@ os_aio_get_segment_no_from_slot(
seg_len = os_aio_read_array->n_slots
/ os_aio_read_array->n_segments;
- segment = 2 + slot->pos / seg_len;
+ segment = (srv_read_only_mode ? 0 : 2) + slot->pos / seg_len;
} else {
ut_ad(!srv_read_only_mode);
ut_a(array == os_aio_write_array);
@@ -5323,7 +5312,7 @@ consecutive_loop:
if (slot->reserved
&& slot != aio_slot
- && slot->offset == slot->offset + aio_slot->len
+ && slot->offset == aio_slot->offset + aio_slot->len
&& slot->type == aio_slot->type
&& slot->file == aio_slot->file) {
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index f416d38cc35..efce1f10cae 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -30,6 +30,7 @@ Created 10/4/1994 Heikki Tuuri
#endif
#include "page0zip.h"
+#include "btr0btr.h"
#include "mtr0log.h"
#include "log0recv.h"
#include "ut0ut.h"
@@ -773,7 +774,7 @@ page_cur_parse_insert_rec(
byte* buf;
byte* ptr2 = ptr;
ulint info_and_status_bits = 0; /* remove warning */
- page_cur_t cursor;
+ page_cur_t cursor;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
@@ -1160,84 +1161,22 @@ use_heap:
}
/***********************************************************//**
-Compresses or reorganizes a page after an optimistic insert.
-@return rec if succeed, NULL otherwise */
-static
-rec_t*
-page_cur_insert_rec_zip_reorg(
-/*==========================*/
- rec_t** current_rec,/*!< in/out: pointer to current record after
- which the new record is inserted */
- buf_block_t* block, /*!< in: buffer block */
- dict_index_t* index, /*!< in: record descriptor */
- rec_t* rec, /*!< in: inserted record */
- ulint rec_size,/*!< in: size of the inserted record */
- page_t* page, /*!< in: uncompressed page */
- page_zip_des_t* page_zip,/*!< in: compressed page */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
-{
- ulint pos;
-
- /* Make a local copy as the values can change dynamically. */
- bool log_compressed = page_log_compressed_pages;
- ulint level = page_compression_level;
-
- /* Recompress or reorganize and recompress the page. */
- if (page_zip_compress(page_zip, page, index, level,
- log_compressed ? mtr : NULL)) {
- if (!log_compressed) {
- page_cur_insert_rec_write_log(
- rec, rec_size, *current_rec, index, mtr);
- page_zip_compress_write_log_no_data(
- level, page, index, mtr);
- }
-
- return(rec);
- }
-
- /* Before trying to reorganize the page,
- store the number of preceding records on the page. */
- pos = page_rec_get_n_recs_before(rec);
- ut_ad(pos > 0);
-
- if (page_zip_reorganize(block, index, mtr)) {
- /* The page was reorganized: Find rec by seeking to pos,
- and update *current_rec. */
- if (pos > 1) {
- rec = page_rec_get_nth(page, pos - 1);
- } else {
- rec = page + PAGE_NEW_INFIMUM;
- }
-
- *current_rec = rec;
- rec = page + rec_get_next_offs(rec, TRUE);
-
- return(rec);
- }
-
- /* Out of space: restore the page */
- btr_blob_dbg_remove(page, index, "insert_zip_fail");
- if (!page_zip_decompress(page_zip, page, FALSE)) {
- ut_error; /* Memory corrupted? */
- }
- ut_ad(page_validate(page, index));
- btr_blob_dbg_add(page, index, "insert_zip_fail");
- return(NULL);
-}
-
-/***********************************************************//**
Inserts a record next to page cursor on a compressed and uncompressed
page. Returns pointer to inserted record if succeed, i.e.,
enough space available, NULL otherwise.
The cursor stays at the same position.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INTERN
rec_t*
page_cur_insert_rec_zip(
/*====================*/
- rec_t** current_rec,/*!< in/out: pointer to current record after
- which the new record is inserted */
- buf_block_t* block, /*!< in: buffer block of *current_rec */
+ page_cur_t* cursor, /*!< in/out: page cursor */
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
@@ -1255,19 +1194,19 @@ page_cur_insert_rec_zip(
record */
page_zip_des_t* page_zip;
- page_zip = buf_block_get_page_zip(block);
+ page_zip = page_cur_get_page_zip(cursor);
ut_ad(page_zip);
ut_ad(rec_offs_validate(rec, index, offsets));
- page = page_align(*current_rec);
+ page = page_cur_get_page(cursor);
ut_ad(dict_table_is_comp(index->table));
ut_ad(page_is_comp(page));
ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
== index->id || mtr->inside_ibuf || recv_recovery_is_on());
- ut_ad(!page_rec_is_supremum(*current_rec));
+ ut_ad(!page_cur_is_after_last(cursor));
#ifdef UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
@@ -1292,14 +1231,74 @@ page_cur_insert_rec_zip(
}
#endif /* UNIV_DEBUG_VALGRIND */
+ const bool reorg_before_insert = page_has_garbage(page)
+ && rec_size > page_get_max_insert_size(page, 1)
+ && rec_size <= page_get_max_insert_size_after_reorganize(
+ page, 1);
+
/* 2. Try to find suitable space from page memory management */
if (!page_zip_available(page_zip, dict_index_is_clust(index),
- rec_size, 1)) {
+ rec_size, 1)
+ || reorg_before_insert) {
+ /* The values can change dynamically. */
+ bool log_compressed = page_zip_log_pages;
+ ulint level = page_zip_level;
+#ifdef UNIV_DEBUG
+ rec_t* cursor_rec = page_cur_get_rec(cursor);
+#endif /* UNIV_DEBUG */
+
+ /* If we are not writing compressed page images, we
+ must reorganize the page before attempting the
+ insert. */
+ if (recv_recovery_is_on()) {
+ /* Insert into the uncompressed page only.
+ The page reorganization or creation that we
+ would attempt outside crash recovery would
+ have been covered by a previous redo log record. */
+ } else if (page_is_empty(page)) {
+ ut_ad(page_cur_is_before_first(cursor));
+
+ /* This is an empty page. Recreate it to
+ get rid of the modification log. */
+ page_create_zip(page_cur_get_block(cursor), index,
+ page_header_get_field(page, PAGE_LEVEL),
+ 0, mtr);
+ ut_ad(!page_header_get_ptr(page, PAGE_FREE));
+
+ if (page_zip_available(
+ page_zip, dict_index_is_clust(index),
+ rec_size, 1)) {
+ goto use_heap;
+ }
+
+ /* The cursor should remain on the page infimum. */
+ return(NULL);
+ } else if (!page_zip->m_nonempty && !page_has_garbage(page)) {
+ /* The page has been freshly compressed, so
+ reorganizing it will not help. */
+ } else if (log_compressed && !reorg_before_insert) {
+ /* Insert into uncompressed page only, and
+ try page_zip_reorganize() afterwards. */
+ } else if (btr_page_reorganize_low(
+ recv_recovery_is_on(), level,
+ cursor, index, mtr)) {
+ ut_ad(!page_header_get_ptr(page, PAGE_FREE));
+
+ if (page_zip_available(
+ page_zip, dict_index_is_clust(index),
+ rec_size, 1)) {
+ /* After reorganizing, there is space
+ available. */
+ goto use_heap;
+ }
+ } else {
+ ut_ad(cursor->rec == cursor_rec);
+ return(NULL);
+ }
/* Try compressing the whole page afterwards. */
- insert_rec = page_cur_insert_rec_low(*current_rec,
- index, rec, offsets,
- NULL);
+ insert_rec = page_cur_insert_rec_low(
+ cursor->rec, index, rec, offsets, NULL);
/* If recovery is on, this implies that the compression
of the page was successful during runtime. Had that not
@@ -1318,16 +1317,82 @@ page_cur_insert_rec_zip(
we call page_zip_validate only after processing
all changes to a page under a single mtr during
recovery. */
- if (insert_rec != NULL && !recv_recovery_is_on()) {
- insert_rec = page_cur_insert_rec_zip_reorg(
- current_rec, block, index, insert_rec,
- rec_size, page, page_zip, mtr);
-#ifdef UNIV_DEBUG
- if (insert_rec) {
- rec_offs_make_valid(
- insert_rec, index, offsets);
+ if (insert_rec == NULL) {
+ /* Out of space.
+ This should never occur during crash recovery,
+ because the MLOG_COMP_REC_INSERT should only
+ be logged after a successful operation. */
+ ut_ad(!recv_recovery_is_on());
+ } else if (recv_recovery_is_on()) {
+ /* This should be followed by
+ MLOG_ZIP_PAGE_COMPRESS_NO_DATA,
+ which should succeed. */
+ rec_offs_make_valid(insert_rec, index, offsets);
+ } else {
+ ulint pos = page_rec_get_n_recs_before(insert_rec);
+ ut_ad(pos > 0);
+
+ if (!log_compressed) {
+ if (page_zip_compress(
+ page_zip, page, index,
+ level, NULL)) {
+ page_cur_insert_rec_write_log(
+ insert_rec, rec_size,
+ cursor->rec, index, mtr);
+ page_zip_compress_write_log_no_data(
+ level, page, index, mtr);
+
+ rec_offs_make_valid(
+ insert_rec, index, offsets);
+ return(insert_rec);
+ }
+
+ ut_ad(cursor->rec
+ == (pos > 1
+ ? page_rec_get_nth(
+ page, pos - 1)
+ : page + PAGE_NEW_INFIMUM));
+ } else {
+ /* We are writing entire page images
+ to the log. Reduce the redo log volume
+ by reorganizing the page at the same time. */
+ if (page_zip_reorganize(
+ cursor->block, index, mtr)) {
+ /* The page was reorganized:
+ Seek to pos. */
+ if (pos > 1) {
+ cursor->rec = page_rec_get_nth(
+ page, pos - 1);
+ } else {
+ cursor->rec = page
+ + PAGE_NEW_INFIMUM;
+ }
+
+ insert_rec = page + rec_get_next_offs(
+ cursor->rec, TRUE);
+ rec_offs_make_valid(
+ insert_rec, index, offsets);
+ return(insert_rec);
+ }
+
+ /* Theoretically, we could try one
+ last resort of btr_page_reorganize_low()
+ followed by page_zip_available(), but
+ that would be very unlikely to
+ succeed. (If the full reorganized page
+ failed to compress, why would it
+ succeed to compress the page, plus log
+ the insert of this record? */
}
-#endif /* UNIV_DEBUG */
+
+ /* Out of space: restore the page */
+ btr_blob_dbg_remove(page, index, "insert_zip_fail");
+ if (!page_zip_decompress(page_zip, page, FALSE)) {
+ ut_error; /* Memory corrupted? */
+ }
+ ut_ad(page_validate(page, index));
+ btr_blob_dbg_add(page, index, "insert_zip_fail");
+ insert_rec = NULL;
}
return(insert_rec);
@@ -1344,7 +1409,7 @@ page_cur_insert_rec_zip(
rec_offs_init(foffsets_);
foffsets = rec_get_offsets(free_rec, index, foffsets,
- ULINT_UNDEFINED, &heap);
+ ULINT_UNDEFINED, &heap);
if (rec_offs_size(foffsets) < rec_size) {
too_small:
if (UNIV_LIKELY_NULL(heap)) {
@@ -1452,18 +1517,19 @@ use_heap:
rec_offs_make_valid(insert_rec, index, offsets);
/* 4. Insert the record in the linked list of records */
- ut_ad(*current_rec != insert_rec);
+ ut_ad(cursor->rec != insert_rec);
{
/* next record after current before the insertion */
- rec_t* next_rec = page_rec_get_next(*current_rec);
- ut_ad(rec_get_status(*current_rec)
+ const rec_t* next_rec = page_rec_get_next_low(
+ cursor->rec, TRUE);
+ ut_ad(rec_get_status(cursor->rec)
<= REC_STATUS_INFIMUM);
ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
page_rec_set_next(insert_rec, next_rec);
- page_rec_set_next(*current_rec, insert_rec);
+ page_rec_set_next(cursor->rec, insert_rec);
}
page_header_set_field(page, page_zip, PAGE_N_RECS,
@@ -1477,7 +1543,7 @@ use_heap:
UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
rec_offs_size(offsets));
- page_zip_dir_insert(page_zip, *current_rec, free_rec, insert_rec);
+ page_zip_dir_insert(page_zip, cursor->rec, free_rec, insert_rec);
/* 6. Update the last insertion info in page header */
@@ -1491,7 +1557,7 @@ use_heap:
PAGE_NO_DIRECTION);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
- } else if ((last_insert == *current_rec)
+ } else if ((last_insert == cursor->rec)
&& (page_header_get_field(page, PAGE_DIRECTION)
!= PAGE_LEFT)) {
@@ -1544,7 +1610,7 @@ use_heap:
/* 9. Write log record of the insert */
if (UNIV_LIKELY(mtr != NULL)) {
page_cur_insert_rec_write_log(insert_rec, rec_size,
- *current_rec, index, mtr);
+ cursor->rec, index, mtr);
}
return(insert_rec);
@@ -1638,7 +1704,12 @@ page_parse_copy_rec_list_to_created_page(
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
+including that record. Infimum and supremum records are not copied.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
UNIV_INTERN
void
page_copy_rec_list_end_to_created_page(
@@ -1940,6 +2011,24 @@ page_cur_delete_rec(
/* The record must not be the supremum or infimum record. */
ut_ad(page_rec_is_user_rec(current_rec));
+ if (page_get_n_recs(page) == 1 && !recv_recovery_is_on()) {
+ /* Empty the page, unless we are applying the redo log
+ during crash recovery. During normal operation, the
+ page_create_empty() gets logged as one of MLOG_PAGE_CREATE,
+ MLOG_COMP_PAGE_CREATE, MLOG_ZIP_PAGE_COMPRESS. */
+ ut_ad(page_is_leaf(page));
+ /* Usually, this should be the root page,
+ and the whole index tree should become empty.
+ However, this could also be a call in
+ btr_cur_pessimistic_update() to delete the only
+ record in the page and to insert another one. */
+ page_cur_move_to_next(cursor);
+ ut_ad(page_cur_is_after_last(cursor));
+ page_create_empty(page_cur_get_block(cursor),
+ const_cast<dict_index_t*>(index), mtr);
+ return;
+ }
+
/* Save to local variables some data associated with current_rec */
cur_slot_no = page_dir_find_owner_slot(current_rec);
ut_ad(cur_slot_no > 0);
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index 6b7b8424856..2faf804279c 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -500,7 +500,8 @@ page_create_zip(
page is created */
dict_index_t* index, /*!< in: the index of the page */
ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
page_t* page;
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
@@ -511,10 +512,11 @@ page_create_zip(
ut_ad(dict_table_is_comp(index->table));
page = page_create_low(block, TRUE);
- mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
+ mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level);
+ mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id);
if (!page_zip_compress(page_zip, page, index,
- page_compression_level, mtr)) {
+ page_zip_level, mtr)) {
/* The compression of a newly created page
should always succeed. */
ut_error;
@@ -523,9 +525,49 @@ page_create_zip(
return(page);
}
+/**********************************************************//**
+Empty a previously created B-tree index page. */
+UNIV_INTERN
+void
+page_create_empty(
+/*==============*/
+ buf_block_t* block, /*!< in/out: B-tree block */
+ dict_index_t* index, /*!< in: the index of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ trx_id_t max_trx_id = 0;
+ const page_t* page = buf_block_get_frame(block);
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+ if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+ max_trx_id = page_get_max_trx_id(page);
+ ut_ad(max_trx_id);
+ }
+
+ if (page_zip) {
+ page_create_zip(block, index,
+ page_header_get_field(page, PAGE_LEVEL),
+ max_trx_id, mtr);
+ } else {
+ page_create(block, mtr, page_is_comp(page));
+
+ if (max_trx_id) {
+ page_update_max_trx_id(
+ block, page_zip, max_trx_id, mtr);
+ }
+ }
+}
+
/*************************************************************//**
Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page. */
+touch the lock table and max trx id on page or compress the page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
UNIV_INTERN
void
page_copy_rec_list_end_no_locks(
@@ -600,6 +642,12 @@ page_copy_rec_list_end_no_locks(
Copies records from page to new_page, from a given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to the original successor of the infimum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
UNIV_INTERN
@@ -660,11 +708,8 @@ page_copy_rec_list_end(
if (new_page_zip) {
mtr_set_log_mode(mtr, log_mode);
- if (!page_zip_compress(new_page_zip,
- new_page,
- index,
- page_compression_level,
- mtr)) {
+ if (!page_zip_compress(new_page_zip, new_page,
+ index, page_zip_level, mtr)) {
/* Before trying to reorganize the page,
store the number of preceding records on the page. */
ulint ret_pos
@@ -713,6 +758,12 @@ page_copy_rec_list_end(
Copies records from page to new_page, up to the given record,
NOT including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to the original predecessor of the supremum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
UNIV_INTERN
@@ -788,7 +839,7 @@ page_copy_rec_list_start(
goto zip_reorganize;);
if (!page_zip_compress(new_page_zip, new_page, index,
- page_compression_level, mtr)) {
+ page_zip_level, mtr)) {
ulint ret_pos;
#ifndef DBUG_OFF
@@ -953,13 +1004,38 @@ page_delete_rec_list_end(
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- if (page_rec_is_infimum(rec)) {
- rec = page_rec_get_next(rec);
- }
-
if (page_rec_is_supremum(rec)) {
+ ut_ad(n_recs == 0 || n_recs == ULINT_UNDEFINED);
+ /* Nothing to do, there are no records bigger than the
+ page supremum. */
+ return;
+ }
+ if (recv_recovery_is_on()) {
+ /* If we are replaying a redo log record, we must
+ replay it exactly. Since MySQL 5.6.11, we should be
+ generating a redo log record for page creation if
+ the page would become empty. Thus, this branch should
+ only be executed when applying redo log that was
+ generated by an older version of MySQL. */
+ } else if (page_rec_is_infimum(rec)
+ || n_recs == page_get_n_recs(page)) {
+delete_all:
+ /* We are deleting all records. */
+ page_create_empty(block, index, mtr);
return;
+ } else if (page_is_comp(page)) {
+ if (page_rec_get_next_low(page + PAGE_NEW_INFIMUM, 1) == rec) {
+ /* We are deleting everything from the first
+ user record onwards. */
+ goto delete_all;
+ }
+ } else {
+ if (page_rec_get_next_low(page + PAGE_OLD_INFIMUM, 0) == rec) {
+ /* We are deleting everything from the first
+ user record onwards. */
+ goto delete_all;
+ }
}
/* Reset the last insert info in the page header and increment
@@ -1138,7 +1214,12 @@ page_delete_rec_list_start(
#endif /* UNIV_ZIP_DEBUG */
if (page_rec_is_infimum(rec)) {
+ return;
+ }
+ if (page_rec_is_supremum(rec)) {
+ /* We are deleting all records. */
+ page_create_empty(block, index, mtr);
return;
}
@@ -1176,6 +1257,12 @@ page_delete_rec_list_start(
/*************************************************************//**
Moves record list end to another page. Moved records include
split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return TRUE on success; FALSE on compression failure (new_block will
be decompressed) */
UNIV_INTERN
@@ -1231,6 +1318,12 @@ page_move_rec_list_end(
/*************************************************************//**
Moves record list start to another page. Moved records do not include
split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return TRUE on success; FALSE on compression failure */
UNIV_INTERN
ibool
@@ -2327,7 +2420,7 @@ page_validate(
}
if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)
- && page_get_n_recs(page) > 0) {
+ && !page_is_empty(page)) {
trx_id_t max_trx_id = page_get_max_trx_id(page);
trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id();
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index dee37580002..52dcbf64183 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -69,11 +69,11 @@ UNIV_INTERN mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
#endif /* !UNIV_HOTBACKUP */
/* Compression level to be used by zlib. Settable by user. */
-UNIV_INTERN ulint page_compression_level = 6;
+UNIV_INTERN uint page_zip_level = DEFAULT_COMPRESSION_LEVEL;
/* Whether or not to log compressed page images to avoid possible
compression algorithm changes in zlib. */
-UNIV_INTERN bool page_log_compressed_pages = true;
+UNIV_INTERN my_bool page_zip_log_pages = true;
/* Please refer to ../include/page0zip.ic for a description of the
compressed page format. */
@@ -655,8 +655,8 @@ page_zip_dir_encode(
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
#endif
-#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
-# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
+#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE_MAX - 1
+# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE_MAX - 1"
#endif
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
offs |= PAGE_ZIP_DIR_SLOT_OWNED;
@@ -864,11 +864,12 @@ page_zip_compress_node_ptrs(
c_stream->next_in = (byte*) rec;
c_stream->avail_in = rec_offs_data_size(offsets)
- REC_NODE_PTR_SIZE;
- ut_ad(c_stream->avail_in);
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
- break;
+ if (c_stream->avail_in) {
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+ break;
+ }
}
ut_ad(!c_stream->avail_in);
@@ -1199,7 +1200,7 @@ page_zip_compress(
m_start, m_end, m_nonempty */
const page_t* page, /*!< in: uncompressed page */
dict_index_t* index, /*!< in: index of the B-tree node */
- ulint level, /*!< in: commpression level */
+ ulint level, /*!< in: compression level */
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
{
z_stream c_stream;
@@ -1246,7 +1247,7 @@ page_zip_compress(
ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
supremum_extra_data, sizeof supremum_extra_data));
- if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
+ if (page_is_empty(page)) {
ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
== PAGE_NEW_SUPREMUM);
}
@@ -1263,7 +1264,7 @@ page_zip_compress(
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
fprintf(stderr, "compress %p %p %lu %lu %lu\n",
(void*) page_zip, (void*) page,
- page_is_leaf(page),
+ (ibool) page_is_leaf(page),
n_fields, n_dense);
}
if (UNIV_UNLIKELY(page_zip_compress_log)) {
@@ -2173,6 +2174,32 @@ page_zip_apply_log(
}
/**********************************************************************//**
+Set the heap_no in a record, and skip the fixed-size record header
+that is not included in the d_stream.
+@return TRUE on success, FALSE if d_stream does not end at rec */
+static
+ibool
+page_zip_decompress_heap_no(
+/*========================*/
+ z_stream* d_stream, /*!< in/out: compressed page stream */
+ rec_t* rec, /*!< in/out: record */
+ ulint& heap_status) /*!< in/out: heap_no and status bits */
+{
+ if (d_stream->next_out != rec - REC_N_NEW_EXTRA_BYTES) {
+ /* n_dense has grown since the page was last compressed. */
+ return(FALSE);
+ }
+
+ /* Skip the REC_N_NEW_EXTRA_BYTES. */
+ d_stream->next_out = rec;
+
+ /* Set heap_no and the status bits. */
+ mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
+ heap_status += 1 << REC_HEAP_NO_SHIFT;
+ return(TRUE);
+}
+
+/**********************************************************************//**
Decompress the records of a node pointer page.
@return TRUE on success, FALSE on failure */
static
@@ -2208,8 +2235,8 @@ page_zip_decompress_node_ptrs(
- PAGE_ZIP_START - PAGE_DIR);
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
case Z_STREAM_END:
- /* Apparently, n_dense has grown
- since the time the page was last compressed. */
+ page_zip_decompress_heap_no(
+ d_stream, rec, heap_status);
goto zlib_done;
case Z_OK:
case Z_BUF_ERROR:
@@ -2224,12 +2251,10 @@ page_zip_decompress_node_ptrs(
goto zlib_error;
}
- ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
- /* Prepare to decompress the data bytes. */
- d_stream->next_out = rec;
- /* Set heap_no and the status bits. */
- mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
- heap_status += 1 << REC_HEAP_NO_SHIFT;
+ if (!page_zip_decompress_heap_no(
+ d_stream, rec, heap_status)) {
+ ut_ad(0);
+ }
/* Read the offsets. The status bits are needed here. */
offsets = rec_get_offsets(rec, index, offsets,
@@ -2331,13 +2356,12 @@ zlib_done:
if (UNIV_UNLIKELY
(page_zip_get_trailer_len(page_zip,
- dict_index_is_clust(index), NULL)
+ dict_index_is_clust(index))
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
page_zip_fail(("page_zip_decompress_node_ptrs:"
" %lu + %lu >= %lu, %lu\n",
(ulong) page_zip_get_trailer_len(
- page_zip, dict_index_is_clust(index),
- NULL),
+ page_zip, dict_index_is_clust(index)),
(ulong) page_zip->m_end,
(ulong) page_zip_get_size(page_zip),
(ulong) dict_index_is_clust(index)));
@@ -2398,8 +2422,8 @@ page_zip_decompress_sec(
if (UNIV_LIKELY(d_stream->avail_out)) {
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
case Z_STREAM_END:
- /* Apparently, n_dense has grown
- since the time the page was last compressed. */
+ page_zip_decompress_heap_no(
+ d_stream, rec, heap_status);
goto zlib_done;
case Z_OK:
case Z_BUF_ERROR:
@@ -2415,15 +2439,10 @@ page_zip_decompress_sec(
}
}
- ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
-
- /* Skip the REC_N_NEW_EXTRA_BYTES. */
-
- d_stream->next_out = rec;
-
- /* Set heap_no and the status bits. */
- mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
- heap_status += 1 << REC_HEAP_NO_SHIFT;
+ if (!page_zip_decompress_heap_no(
+ d_stream, rec, heap_status)) {
+ ut_ad(0);
+ }
}
/* Decompress the data of the last record and any trailing garbage,
@@ -2488,12 +2507,12 @@ zlib_done:
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
}
- if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
+ if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE)
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
(ulong) page_zip_get_trailer_len(
- page_zip, FALSE, NULL),
+ page_zip, FALSE),
(ulong) page_zip->m_end,
(ulong) page_zip_get_size(page_zip)));
return(FALSE);
@@ -2657,8 +2676,8 @@ page_zip_decompress_clust(
err = inflate(d_stream, Z_SYNC_FLUSH);
switch (err) {
case Z_STREAM_END:
- /* Apparently, n_dense has grown
- since the time the page was last compressed. */
+ page_zip_decompress_heap_no(
+ d_stream, rec, heap_status);
goto zlib_done;
case Z_OK:
case Z_BUF_ERROR:
@@ -2673,12 +2692,10 @@ page_zip_decompress_clust(
goto zlib_error;
}
- ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
- /* Prepare to decompress the data bytes. */
- d_stream->next_out = rec;
- /* Set heap_no and the status bits. */
- mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
- heap_status += 1 << REC_HEAP_NO_SHIFT;
+ if (!page_zip_decompress_heap_no(
+ d_stream, rec, heap_status)) {
+ ut_ad(0);
+ }
/* Read the offsets. The status bits are needed here. */
offsets = rec_get_offsets(rec, index, offsets,
@@ -2819,12 +2836,12 @@ zlib_done:
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
}
- if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
+ if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE)
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
(ulong) page_zip_get_trailer_len(
- page_zip, TRUE, NULL),
+ page_zip, TRUE),
(ulong) page_zip->m_end,
(ulong) page_zip_get_size(page_zip)));
return(FALSE);
@@ -3001,7 +3018,7 @@ zlib_error:
/* Copy the infimum and supremum records. */
memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
infimum_extra, sizeof infimum_extra);
- if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
+ if (page_is_empty(page)) {
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
PAGE_NEW_SUPREMUM);
} else {
@@ -4630,8 +4647,7 @@ page_zip_reorganize(
/* Restore logging. */
mtr_set_log_mode(mtr, log_mode);
- if (!page_zip_compress(page_zip, page, index,
- page_compression_level, mtr)) {
+ if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) {
#ifndef UNIV_HOTBACKUP
buf_block_free(temp_block);
@@ -4715,8 +4731,7 @@ page_zip_copy_recs(
memcpy(page_zip, src_zip, sizeof *page_zip);
page_zip->data = data;
}
- ut_ad(page_zip_get_trailer_len(page_zip,
- dict_index_is_clust(index), NULL)
+ ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index))
+ page_zip->m_end < page_zip_get_size(page_zip));
if (!page_is_leaf(src)
diff --git a/storage/innobase/pars/pars0pars.cc b/storage/innobase/pars/pars0pars.cc
index f82610e62d0..e0bc00fad0d 100644
--- a/storage/innobase/pars/pars0pars.cc
+++ b/storage/innobase/pars/pars0pars.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
+this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St,
+Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index 14dc9ee5e7f..faf4102437b 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -303,7 +303,7 @@ struct CreateView {
trx_sys->max_trx_id can still be active, if it is
in the middle of its commit! Note that when a
transaction starts, we initialize trx->no to
- IB_ULONGLONG_MAX. */
+ TRX_ID_MAX. */
/* trx->no is protected by trx_sys->mutex, which
we are holding. It is assigned by trx_commit()
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index 3a5d2f579c3..43072159b9e 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1839,6 +1839,13 @@ rec_print_comp(
if (len <= 30) {
ut_print_buf(file, data, len);
+ } else if (rec_offs_nth_extern(offsets, i)) {
+ ut_print_buf(file, data, 30);
+ fprintf(file, " (total %lu bytes, external)",
+ (ulong) len);
+ ut_print_buf(file, data + len
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ BTR_EXTERN_FIELD_REF_SIZE);
} else {
ut_print_buf(file, data, 30);
diff --git a/storage/innobase/row/row0ext.cc b/storage/innobase/row/row0ext.cc
index f084fa09c5a..32b78391d6a 100644
--- a/storage/innobase/row/row0ext.cc
+++ b/storage/innobase/row/row0ext.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -58,14 +58,28 @@ row_ext_cache_fill(
/* The BLOB pointer is not set: we cannot fetch it */
ext->len[i] = 0;
} else {
- /* Fetch at most ext->max_len of the column.
- The column should be non-empty. However,
- trx_rollback_or_clean_all_recovered() may try to
- access a half-deleted BLOB if the server previously
- crashed during the execution of
- btr_free_externally_stored_field(). */
- ext->len[i] = btr_copy_externally_stored_field_prefix(
- buf, ext->max_len, zip_size, field, f_len);
+ if (ext->max_len == REC_VERSION_56_MAX_INDEX_COL_LEN
+ && f_len > BTR_EXTERN_FIELD_REF_SIZE) {
+ /* In this case, the field is in B format or beyond,
+ (refer to the definition of row_ext_t.max_len)
+ and the field is already fill with prefix, otherwise
+ f_len would be BTR_EXTERN_FIELD_REF_SIZE.
+ So there is no need to re-read the prefix externally,
+ but just copy the local prefix to buf. Please note
+ if the ext->len[i] is zero, it means an error
+ as above. */
+ memcpy(buf, field, f_len - BTR_EXTERN_FIELD_REF_SIZE);
+ ext->len[i] = f_len - BTR_EXTERN_FIELD_REF_SIZE;
+ } else {
+ /* Fetch at most ext->max_len of the column.
+ The column should be non-empty. However,
+ trx_rollback_or_clean_all_recovered() may try to
+ access a half-deleted BLOB if the server previously
+ crashed during the execution of
+ btr_free_externally_stored_field(). */
+ ext->len[i] = btr_copy_externally_stored_field_prefix(
+ buf, ext->max_len, zip_size, field, f_len);
+ }
}
}
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
index 9a6af50e09d..275fedbfb5d 100644
--- a/storage/innobase/row/row0ftsort.cc
+++ b/storage/innobase/row/row0ftsort.cc
@@ -96,7 +96,7 @@ row_merge_create_fts_sort_index(
field->prefix_len = 0;
field->col = static_cast<dict_col_t*>(
mem_heap_alloc(new_index->heap, sizeof(dict_col_t)));
- field->col->len = fts_max_token_size;
+ field->col->len = FTS_MAX_WORD_LEN;
if (strcmp(charset->name, "latin1_swedish_ci") == 0) {
field->col->mtype = DATA_VARCHAR;
@@ -450,7 +450,7 @@ row_merge_fts_doc_tokenize(
field->type.prtype = word_dtype->prtype | DATA_NOT_NULL;
/* Variable length field, set to max size. */
- field->type.len = fts_max_token_size;
+ field->type.len = FTS_MAX_WORD_LEN;
field->type.mbminmaxlen = word_dtype->mbminmaxlen;
cur_len += len;
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index f5eb31191a5..b753574158a 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1944,7 +1944,7 @@ PageConverter::update_index_page(
page_set_max_trx_id(block, m_page_zip_ptr, m_trx->id, 0);
- if (page_get_n_recs(block->frame) == 0) {
+ if (page_is_empty(block->frame)) {
/* Only a root page can be empty. */
if (!is_root_page(block->frame)) {
@@ -2269,7 +2269,7 @@ row_import_cleanup(
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ log_make_checkpoint_at(LSN_MAX, TRUE);
return(err);
}
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index c1c27152831..49fb374e2aa 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1705,16 +1705,22 @@ do_possible_lock_wait:
/* We had temporarily released dict_operation_lock in
above lock sleep wait, now we have the lock again, and
we will need to re-check whether the foreign key has been
- dropped */
- for (const dict_foreign_t* check_foreign = UT_LIST_GET_FIRST(
- table->referenced_list);
- check_foreign;
- check_foreign = UT_LIST_GET_NEXT(
- referenced_list, check_foreign)) {
- if (check_foreign == foreign) {
- verified = true;
- break;
+ dropped. We only need to verify if the table is referenced
+ table case (check_ref == 0), since MDL lock will prevent
+ concurrent DDL and DML on the same table */
+ if (!check_ref) {
+ for (const dict_foreign_t* check_foreign
+ = UT_LIST_GET_FIRST( table->referenced_list);
+ check_foreign;
+ check_foreign = UT_LIST_GET_NEXT(
+ referenced_list, check_foreign)) {
+ if (check_foreign == foreign) {
+ verified = true;
+ break;
+ }
}
+ } else {
+ verified = true;
}
if (!verified) {
@@ -1938,6 +1944,7 @@ row_ins_scan_sec_index_for_duplicate(
do {
const rec_t* rec = btr_pcur_get_rec(&pcur);
const buf_block_t* block = btr_pcur_get_block(&pcur);
+ ulint lock_type;
if (page_rec_is_infimum(rec)) {
@@ -1947,6 +1954,16 @@ row_ins_scan_sec_index_for_duplicate(
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &offsets_heap);
+ /* If the transaction isolation level is no stronger than
+ READ COMMITTED, then avoid gap locks. */
+ if (!page_rec_is_supremum(rec)
+ && thr_get_trx(thr)->isolation_level
+ <= TRX_ISO_READ_COMMITTED) {
+ lock_type = LOCK_REC_NOT_GAP;
+ } else {
+ lock_type = LOCK_ORDINARY;
+ }
+
if (flags & BTR_NO_LOCKING_FLAG) {
/* Set no locks when applying log
in online table rebuild. */
@@ -1958,13 +1975,11 @@ row_ins_scan_sec_index_for_duplicate(
INSERT ON DUPLICATE KEY UPDATE). */
err = row_ins_set_exclusive_rec_lock(
- LOCK_ORDINARY, block,
- rec, index, offsets, thr);
+ lock_type, block, rec, index, offsets, thr);
} else {
err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, block,
- rec, index, offsets, thr);
+ lock_type, block, rec, index, offsets, thr);
}
switch (err) {
@@ -1990,6 +2005,19 @@ row_ins_scan_sec_index_for_duplicate(
thr_get_trx(thr)->error_info = index;
+ /* If the duplicate is on hidden FTS_DOC_ID,
+ state so in the error log */
+ if (DICT_TF2_FLAG_IS_SET(
+ index->table,
+ DICT_TF2_FTS_HAS_DOC_ID)
+ && strcmp(index->name,
+ FTS_DOC_ID_INDEX_NAME) == 0) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Duplicate FTS_DOC_ID value"
+ " on table %s",
+ index->table->name);
+ }
+
goto end_scan;
}
} else {
@@ -2484,7 +2512,7 @@ err_exit:
DBUG_EXECUTE_IF(
"row_ins_extern_checkpoint",
log_make_checkpoint_at(
- IB_ULONGLONG_MAX, TRUE););
+ LSN_MAX, TRUE););
err = row_ins_index_entry_big_rec(
entry, big_rec, offsets, &offsets_heap, index,
thr_get_trx(thr)->mysql_thd,
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 01270300924..170358147b1 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,7 +38,7 @@ Created 2011-05-26 Marko Makela
#include "que0que.h"
#include "handler0alter.h"
-#include<set>
+#include<map>
/** Table row modification operations during online table rebuild.
Delete-marked records are not copied to the rebuilt table. */
@@ -72,18 +72,86 @@ static bool row_log_apply_print;
/** Size of the modification log entry header, in bytes */
#define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/
-/** Log block for modifications during online index creation */
+/** Log block for modifications during online ALTER TABLE */
struct row_log_buf_t {
byte* block; /*!< file block buffer */
mrec_buf_t buf; /*!< buffer for accessing a record
that spans two blocks */
ulint blocks; /*!< current position in blocks */
ulint bytes; /*!< current position within buf */
+ ulonglong total; /*!< logical position, in bytes from
+ the start of the row_log_table log;
+ 0 for row_log_online_op() and
+ row_log_apply(). */
};
-/** Set of transactions that rolled back inserts of BLOBs during
-online table rebuild */
-typedef std::set<trx_id_t> trx_id_set;
+/** Tracks BLOB allocation during online ALTER TABLE */
+class row_log_table_blob_t {
+public:
+ /** Constructor (declaring a BLOB freed)
+ @param offset_arg row_log_t::tail::total */
+#ifdef UNIV_DEBUG
+ row_log_table_blob_t(ulonglong offset_arg) :
+ old_offset (0), free_offset (offset_arg),
+ offset (BLOB_FREED) {}
+#else /* UNIV_DEBUG */
+ row_log_table_blob_t() :
+ offset (BLOB_FREED) {}
+#endif /* UNIV_DEBUG */
+
+ /** Declare a BLOB freed again.
+ @param offset_arg row_log_t::tail::total */
+#ifdef UNIV_DEBUG
+ void blob_free(ulonglong offset_arg)
+#else /* UNIV_DEBUG */
+ void blob_free()
+#endif /* UNIV_DEBUG */
+ {
+ ut_ad(offset < offset_arg);
+ ut_ad(offset != BLOB_FREED);
+ ut_d(old_offset = offset);
+ ut_d(free_offset = offset_arg);
+ offset = BLOB_FREED;
+ }
+ /** Declare a freed BLOB reused.
+ @param offset_arg row_log_t::tail::total */
+ void blob_alloc(ulonglong offset_arg) {
+ ut_ad(free_offset <= offset_arg);
+ ut_d(old_offset = offset);
+ offset = offset_arg;
+ }
+ /** Determine if a BLOB was freed at a given log position
+ @param offset_arg row_log_t::head::total after the log record
+ @return true if freed */
+ bool is_freed(ulonglong offset_arg) const {
+ /* This is supposed to be the offset at the end of the
+ current log record. */
+ ut_ad(offset_arg > 0);
+ /* We should never get anywhere close the magic value. */
+ ut_ad(offset_arg < BLOB_FREED);
+ return(offset_arg < offset);
+ }
+private:
+ /** Magic value for a freed BLOB */
+ static const ulonglong BLOB_FREED = ~0ULL;
+#ifdef UNIV_DEBUG
+ /** Old offset, in case a page was freed, reused, freed, ... */
+ ulonglong old_offset;
+ /** Offset of last blob_free() */
+ ulonglong free_offset;
+#endif /* UNIV_DEBUG */
+ /** Byte offset to the log file */
+ ulonglong offset;
+};
+
+/** @brief Map of off-page column page numbers to 0 or log byte offsets.
+
+If there is no mapping for a page number, it is safe to access.
+If a page number maps to 0, it is an off-page column that has been freed.
+If a page number maps to a nonzero number, the number is a byte offset
+into the index->online_log, indicating that the page is safe to access
+when applying log records starting from that offset. */
+typedef std::map<ulint, row_log_table_blob_t> page_no_map;
/** @brief Buffer for logging modifications during online index creation
@@ -99,11 +167,12 @@ directly. When also head.bytes == tail.bytes, both counts will be
reset to 0 and the file will be truncated. */
struct row_log_t {
int fd; /*!< file descriptor */
- ib_mutex_t mutex; /*!< mutex protecting trx_log, error,
+ ib_mutex_t mutex; /*!< mutex protecting error,
max_trx and tail */
- trx_id_set* trx_rb; /*!< set of transactions that rolled back
- inserts of BLOBs during online table rebuild;
- protected by mutex */
+ page_no_map* blobs; /*!< map of page numbers of off-page columns
+ that have been freed during table-rebuilding
+ ALTER TABLE (row_log_table_*); protected by
+ index->lock X-latch only */
dict_table_t* table; /*!< table that is being rebuilt,
or NULL when this is a secondary
index that is being created online */
@@ -347,6 +416,7 @@ write_failed:
ut_ad(b == log->tail.block + log->tail.bytes);
}
+ log->tail.total += size;
UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
mutex_exit(&log->mutex);
}
@@ -371,6 +441,7 @@ row_log_table_delete(
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ bool purge, /*!< in: true=purging BLOBs */
trx_id_t trx_id) /*!< in: DB_TRX_ID of the record before
it was deleted */
{
@@ -460,6 +531,7 @@ row_log_table_delete(
ut_ad(dfield_get_type(dfield)->prtype
== (DATA_NOT_NULL | DATA_TRX_ID));
ut_ad(dfield_get_len(dfield) == DATA_TRX_ID_LEN);
+ dfield_dup(dfield, heap);
trx_write_trx_id(static_cast<byte*>(dfield->data), trx_id);
}
@@ -473,27 +545,25 @@ row_log_table_delete(
mrec_size = 4 + old_pk_size;
- /* If the row is marked as rollback, we will need to
- log the enough prefix of the BLOB unless both the
- old and new table are in COMPACT or REDUNDANT format */
- if ((dict_table_get_format(index->table) >= UNIV_FORMAT_B
- || dict_table_get_format(new_table) >= UNIV_FORMAT_B)
- && row_log_table_is_rollback(index, trx_id)) {
- if (rec_offs_any_extern(offsets)) {
- /* Build a cache of those off-page column
- prefixes that are referenced by secondary
- indexes. It can be that none of the off-page
- columns are needed. */
- row_build(ROW_COPY_DATA, index, rec,
- offsets, NULL, NULL, NULL, &ext, heap);
- if (ext) {
- /* Log the row_ext_t, ext->ext and ext->buf */
- ext_size = ext->n_ext * ext->max_len
- + sizeof(*ext)
- + ext->n_ext * sizeof(ulint)
- + (ext->n_ext - 1) * sizeof ext->len;
- mrec_size += ext_size;
- }
+ /* Log enough prefix of the BLOB unless both the
+ old and new table are in COMPACT or REDUNDANT format,
+ which store the prefix in the clustered index record. */
+ if (purge && rec_offs_any_extern(offsets)
+ && (dict_table_get_format(index->table) >= UNIV_FORMAT_B
+ || dict_table_get_format(new_table) >= UNIV_FORMAT_B)) {
+
+ /* Build a cache of those off-page column prefixes
+ that are referenced by secondary indexes. It can be
+ that none of the off-page columns are needed. */
+ row_build(ROW_COPY_DATA, index, rec,
+ offsets, NULL, NULL, NULL, &ext, heap);
+ if (ext) {
+ /* Log the row_ext_t, ext->ext and ext->buf */
+ ext_size = ext->n_ext * ext->max_len
+ + sizeof(*ext)
+ + ext->n_ext * sizeof(ulint)
+ + (ext->n_ext - 1) * sizeof ext->len;
+ mrec_size += ext_size;
}
}
@@ -548,7 +618,7 @@ row_log_table_delete(
/******************************************************//**
Logs an insert or update to a table that is being rebuilt. */
-static __attribute__((nonnull(1,2,3)))
+static
void
row_log_table_low_redundant(
/*========================*/
@@ -557,7 +627,6 @@ row_log_table_low_redundant(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
bool insert, /*!< in: true if insert,
false if update */
const dtuple_t* old_pk, /*!< in: old PRIMARY KEY value
@@ -578,6 +647,9 @@ row_log_table_low_redundant(
ut_ad(!page_is_comp(page_align(rec)));
ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec));
+ ut_ad(dict_tf_is_valid(index->table->flags));
+ ut_ad(!dict_table_is_comp(index->table)); /* redundant row format */
+ ut_ad(dict_index_is_clust(new_index));
heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields));
tuple = dtuple_create(heap, index->n_fields);
@@ -712,7 +784,7 @@ row_log_table_low(
if (!rec_offs_comp(offsets)) {
row_log_table_low_redundant(
- rec, index, offsets, insert, old_pk, new_index);
+ rec, index, insert, old_pk, new_index);
return;
}
@@ -723,8 +795,8 @@ row_log_table_low(
extra_size = rec_offs_extra_size(offsets) - omit_size;
- mrec_size = rec_offs_size(offsets) - omit_size
- + ROW_LOG_HEADER_SIZE + (extra_size >= 0x80);
+ mrec_size = ROW_LOG_HEADER_SIZE
+ + (extra_size >= 0x80) + rec_offs_size(offsets) - omit_size;
if (insert || index->online_log->same_pk) {
ut_ad(!old_pk);
@@ -793,6 +865,93 @@ row_log_table_update(
row_log_table_low(rec, index, offsets, false, old_pk);
}
+/** Gets the old table column of a PRIMARY KEY column.
+@param table old table (before ALTER TABLE)
+@param col_map mapping of old column numbers to new ones
+@param col_no column position in the new table
+@return old table column, or NULL if this is an added column */
+static
+const dict_col_t*
+row_log_table_get_pk_old_col(
+/*=========================*/
+ const dict_table_t* table,
+ const ulint* col_map,
+ ulint col_no)
+{
+ for (ulint i = 0; i < table->n_cols; i++) {
+ if (col_no == col_map[i]) {
+ return(dict_table_get_nth_col(table, i));
+ }
+ }
+
+ return(NULL);
+}
+
+/** Maps an old table column of a PRIMARY KEY column.
+@param col old table column (before ALTER TABLE)
+@param ifield clustered index field in the new table (after ALTER TABLE)
+@param dfield clustered index tuple field in the new table
+@param heap memory heap for allocating dfield contents
+@param rec clustered index leaf page record in the old table
+@param offsets rec_get_offsets(rec)
+@param i rec field corresponding to col
+@param zip_size compressed page size of the old table, or 0 for uncompressed
+@param max_len maximum length of dfield
+@retval DB_INVALID_NULL if a NULL value is encountered
+@retval DB_TOO_BIG_INDEX_COL if the maximum prefix length is exceeded */
+static
+dberr_t
+row_log_table_get_pk_col(
+/*=====================*/
+ const dict_col_t* col,
+ const dict_field_t* ifield,
+ dfield_t* dfield,
+ mem_heap_t* heap,
+ const rec_t* rec,
+ const ulint* offsets,
+ ulint i,
+ ulint zip_size,
+ ulint max_len)
+{
+ const byte* field;
+ ulint len;
+
+ ut_ad(ut_is_2pow(zip_size));
+
+ field = rec_get_nth_field(rec, offsets, i, &len);
+
+ if (len == UNIV_SQL_NULL) {
+ return(DB_INVALID_NULL);
+ }
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ ulint field_len = ifield->prefix_len;
+ byte* blob_field;
+
+ if (!field_len) {
+ field_len = ifield->fixed_len;
+ if (!field_len) {
+ field_len = max_len + 1;
+ }
+ }
+
+ blob_field = static_cast<byte*>(
+ mem_heap_alloc(heap, field_len));
+
+ len = btr_copy_externally_stored_field_prefix(
+ blob_field, field_len, zip_size, field, len);
+ if (len >= max_len + 1) {
+ return(DB_TOO_BIG_INDEX_COL);
+ }
+
+ dfield_set_data(dfield, blob_field, len);
+ } else {
+ dfield_set_data(dfield, mem_heap_dup(heap, field, len), len);
+ }
+
+ return(DB_SUCCESS);
+}
+
/******************************************************//**
Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
of a table that is being rebuilt.
@@ -865,95 +1024,69 @@ row_log_table_get_pk(
dict_index_copy_types(tuple, new_index, tuple->n_fields);
dtuple_set_n_fields_cmp(tuple, new_n_uniq);
+ const ulint max_len = DICT_MAX_FIELD_LEN_BY_FORMAT(new_table);
+ const ulint zip_size = dict_table_zip_size(index->table);
+
for (ulint new_i = 0; new_i < new_n_uniq; new_i++) {
- dict_field_t* ifield;
- dfield_t* dfield;
- const dict_col_t* new_col;
- const dict_col_t* col;
- ulint col_no;
- ulint i;
- ulint len;
- const byte* field;
+ dict_field_t* ifield;
+ dfield_t* dfield;
+ ulint prtype;
+ ulint mbminmaxlen;
ifield = dict_index_get_nth_field(new_index, new_i);
dfield = dtuple_get_nth_field(tuple, new_i);
- new_col = dict_field_get_col(ifield);
- col_no = new_col->ind;
-
- for (ulint old_i = 0; old_i < index->table->n_cols;
- old_i++) {
- if (col_no == log->col_map[old_i]) {
- col_no = old_i;
- goto copy_col;
- }
- }
-
- /* No matching column was found in the old
- table, so this must be an added column.
- Copy the default value. */
- ut_ad(log->add_cols);
- dfield_copy(dfield,
- dtuple_get_nth_field(
- log->add_cols, col_no));
- continue;
-
-copy_col:
- col = dict_table_get_nth_col(index->table, col_no);
- i = dict_col_get_clust_pos(col, index);
+ const ulint col_no
+ = dict_field_get_col(ifield)->ind;
- if (i == ULINT_UNDEFINED) {
- ut_ad(0);
- log->error = DB_CORRUPTION;
- tuple = NULL;
- goto func_exit;
- }
+ if (const dict_col_t* col
+ = row_log_table_get_pk_old_col(
+ index->table, log->col_map, col_no)) {
+ ulint i = dict_col_get_clust_pos(col, index);
- field = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len == UNIV_SQL_NULL) {
- log->error = DB_INVALID_NULL;
- tuple = NULL;
- goto func_exit;
- }
-
- if (rec_offs_nth_extern(offsets, i)) {
- ulint field_len = ifield->prefix_len;
- byte* blob_field;
- const ulint max_len =
- DICT_MAX_FIELD_LEN_BY_FORMAT(
- new_table);
-
- if (!field_len) {
- field_len = ifield->fixed_len;
- if (!field_len) {
- field_len = max_len + 1;
- }
+ if (i == ULINT_UNDEFINED) {
+ ut_ad(0);
+ log->error = DB_CORRUPTION;
+ goto err_exit;
}
- blob_field = static_cast<byte*>(
- mem_heap_alloc(*heap, field_len));
+ log->error = row_log_table_get_pk_col(
+ col, ifield, dfield, *heap,
+ rec, offsets, i, zip_size, max_len);
- len = btr_copy_externally_stored_field_prefix(
- blob_field, field_len,
- dict_table_zip_size(index->table),
- field, len);
- if (len == max_len + 1) {
- log->error = DB_TOO_BIG_INDEX_COL;
+ if (log->error != DB_SUCCESS) {
+err_exit:
tuple = NULL;
goto func_exit;
}
- dfield_set_data(dfield, blob_field, len);
+ mbminmaxlen = col->mbminmaxlen;
+ prtype = col->prtype;
} else {
- if (ifield->prefix_len
- && ifield->prefix_len < len) {
- len = ifield->prefix_len;
- }
+ /* No matching column was found in the old
+ table, so this must be an added column.
+ Copy the default value. */
+ ut_ad(log->add_cols);
+
+ dfield_copy(dfield, dtuple_get_nth_field(
+ log->add_cols, col_no));
+ mbminmaxlen = dfield->type.mbminmaxlen;
+ prtype = dfield->type.prtype;
+ }
+
+ ut_ad(!dfield_is_ext(dfield));
+ ut_ad(!dfield_is_null(dfield));
- dfield_set_data(
- dfield,
- mem_heap_dup(*heap, field, len), len);
+ if (ifield->prefix_len) {
+ ulint len = dtype_get_at_most_n_mbchars(
+ prtype, mbminmaxlen,
+ ifield->prefix_len,
+ dfield_get_len(dfield),
+ static_cast<const char*>(
+ dfield_get_data(dfield)));
+
+ ut_ad(len <= dfield_get_len(dfield));
+ dfield_set_len(dfield, len);
}
}
@@ -988,66 +1121,80 @@ row_log_table_insert(
}
/******************************************************//**
-Notes that a transaction is being rolled back. */
+Notes that a BLOB is being freed during online ALTER TABLE. */
UNIV_INTERN
void
-row_log_table_rollback(
-/*===================*/
- dict_index_t* index, /*!< in/out: clustered index */
- trx_id_t trx_id) /*!< in: transaction being rolled back */
+row_log_table_blob_free(
+/*====================*/
+ dict_index_t* index, /*!< in/out: clustered index, X-latched */
+ ulint page_no)/*!< in: starting page number of the BLOB */
{
ut_ad(dict_index_is_clust(index));
-#ifdef UNIV_DEBUG
- ibool corrupt = FALSE;
- ut_ad(trx_rw_is_active(trx_id, &corrupt));
- ut_ad(!corrupt);
-#endif /* UNIV_DEBUG */
+ ut_ad(dict_index_is_online_ddl(index));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(page_no != FIL_NULL);
- /* Protect transitions of index->online_status and access to
- index->online_log. */
- rw_lock_s_lock(&index->lock);
+ if (index->online_log->error != DB_SUCCESS) {
+ return;
+ }
- if (dict_index_is_online_ddl(index)) {
- ut_ad(index->online_log);
- ut_ad(index->online_log->table);
- mutex_enter(&index->online_log->mutex);
- trx_id_set* trxs = index->online_log->trx_rb;
+ page_no_map* blobs = index->online_log->blobs;
- if (!trxs) {
- index->online_log->trx_rb = trxs = new trx_id_set();
- }
+ if (!blobs) {
+ index->online_log->blobs = blobs = new page_no_map();
+ }
- trxs->insert(trx_id);
+#ifdef UNIV_DEBUG
+ const ulonglong log_pos = index->online_log->tail.total;
+#else
+# define log_pos /* empty */
+#endif /* UNIV_DEBUG */
- mutex_exit(&index->online_log->mutex);
- }
+ const page_no_map::value_type v(page_no,
+ row_log_table_blob_t(log_pos));
- rw_lock_s_unlock(&index->lock);
+ std::pair<page_no_map::iterator,bool> p = blobs->insert(v);
+
+ if (!p.second) {
+ /* Update the existing mapping. */
+ ut_ad(p.first->first == page_no);
+ p.first->second.blob_free(log_pos);
+ }
+#undef log_pos
}
/******************************************************//**
-Check if a transaction rollback has been initiated.
-@return true if inserts of this transaction were rolled back */
+Notes that a BLOB is being allocated during online ALTER TABLE. */
UNIV_INTERN
-bool
-row_log_table_is_rollback(
-/*======================*/
- const dict_index_t* index, /*!< in: clustered index */
- trx_id_t trx_id) /*!< in: transaction id */
+void
+row_log_table_blob_alloc(
+/*=====================*/
+ dict_index_t* index, /*!< in/out: clustered index, X-latched */
+ ulint page_no)/*!< in: starting page number of the BLOB */
{
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_online_ddl(index));
- ut_ad(index->online_log);
-
- if (const trx_id_set* trxs = index->online_log->trx_rb) {
- mutex_enter(&index->online_log->mutex);
- bool is_rollback = trxs->find(trx_id) != trxs->end();
- mutex_exit(&index->online_log->mutex);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(page_no != FIL_NULL);
- return(is_rollback);
+ if (index->online_log->error != DB_SUCCESS) {
+ return;
}
- return(false);
+ /* Only track allocations if the same page has been freed
+ earlier. Double allocation without a free is not allowed. */
+ if (page_no_map* blobs = index->online_log->blobs) {
+ page_no_map::iterator p = blobs->find(page_no);
+
+ if (p != blobs->end()) {
+ ut_ad(p->first == page_no);
+ p->second.blob_alloc(index->online_log->tail.total);
+ }
+ }
}
/******************************************************//**
@@ -1069,17 +1216,6 @@ row_log_table_apply_convert_mrec(
{
dtuple_t* row;
-#ifdef UNIV_SYNC_DEBUG
- /* This prevents BLOBs from being freed, in case an insert
- transaction rollback starts after row_log_table_is_rollback(). */
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (row_log_table_is_rollback(index, trx_id)) {
- row = NULL;
- goto func_exit;
- }
-
/* This is based on row_build(). */
if (log->add_cols) {
row = dtuple_copy(log->add_cols, heap);
@@ -1121,15 +1257,43 @@ row_log_table_apply_convert_mrec(
dfield_t* dfield
= dtuple_get_nth_field(row, col_no);
ulint len;
- const void* data;
+ const byte* data= NULL;
if (rec_offs_nth_extern(offsets, i)) {
ut_ad(rec_offs_any_extern(offsets));
- data = btr_rec_copy_externally_stored_field(
- mrec, offsets,
- dict_table_zip_size(index->table),
- i, &len, heap);
- ut_a(data);
+ rw_lock_x_lock(dict_index_get_lock(index));
+
+ if (const page_no_map* blobs = log->blobs) {
+ data = rec_get_nth_field(
+ mrec, offsets, i, &len);
+ ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ ulint page_no = mach_read_from_4(
+ data + len - (BTR_EXTERN_FIELD_REF_SIZE
+ - BTR_EXTERN_PAGE_NO));
+ page_no_map::const_iterator p = blobs->find(
+ page_no);
+ if (p != blobs->end()
+ && p->second.is_freed(log->head.total)) {
+ /* This BLOB has been freed.
+ We must not access the row. */
+ row = NULL;
+ }
+ }
+
+ if (row) {
+ data = btr_rec_copy_externally_stored_field(
+ mrec, offsets,
+ dict_table_zip_size(index->table),
+ i, &len, heap);
+ ut_a(data);
+ }
+
+ rw_lock_x_unlock(dict_index_get_lock(index));
+
+ if (!row) {
+ goto func_exit;
+ }
} else {
data = rec_get_nth_field(mrec, offsets, i, &len);
}
@@ -1685,7 +1849,7 @@ delete_insert:
| BTR_KEEP_POS_FLAG,
btr_pcur_get_btr_cur(&pcur),
&cur_offsets, &offsets_heap, heap, &big_rec,
- update, 0, NULL, 0, &mtr);
+ update, 0, thr, 0, &mtr);
if (big_rec) {
if (error == DB_SUCCESS) {
@@ -1783,7 +1947,7 @@ row_log_table_apply_op(
ulint* offsets) /*!< in/out: work area
for parsing mrec */
{
- const row_log_t*log = dup->index->online_log;
+ row_log_t* log = dup->index->online_log;
dict_index_t* new_index = dict_table_get_first_index(log->table);
ulint extra_size;
const mrec_t* next_mrec;
@@ -1793,6 +1957,7 @@ row_log_table_apply_op(
ut_ad(dict_index_is_clust(dup->index));
ut_ad(dup->index->table != log->table);
+ ut_ad(log->head.total <= log->tail.total);
*error = DB_SUCCESS;
@@ -1801,6 +1966,8 @@ row_log_table_apply_op(
return(NULL);
}
+ const mrec_t* const mrec_start = mrec;
+
switch (*mrec++) {
default:
ut_ad(0);
@@ -1830,6 +1997,8 @@ row_log_table_apply_op(
if (next_mrec > mrec_end) {
return(NULL);
} else {
+ log->head.total += next_mrec - mrec_start;
+
ulint len;
const byte* db_trx_id
= rec_get_nth_field(
@@ -1863,6 +2032,8 @@ row_log_table_apply_op(
return(NULL);
}
+ log->head.total += next_mrec - mrec_start;
+
/* If there are external fields, retrieve those logged
prefix info and reconstruct the row_ext_t */
if (ext_size) {
@@ -2019,6 +2190,7 @@ row_log_table_apply_op(
}
ut_ad(next_mrec <= mrec_end);
+ log->head.total += next_mrec - mrec_start;
dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq);
{
@@ -2036,6 +2208,7 @@ row_log_table_apply_op(
break;
}
+ ut_ad(log->head.total <= log->tail.total);
mem_heap_empty(offsets_heap);
mem_heap_empty(heap);
return(next_mrec);
@@ -2423,6 +2596,10 @@ row_log_table_apply(
};
error = row_log_table_apply_ops(thr, &dup);
+
+ ut_ad(error != DB_SUCCESS
+ || clust_index->online_log->head.total
+ == clust_index->online_log->tail.total);
}
rw_lock_x_unlock(dict_index_get_lock(clust_index));
@@ -2451,6 +2628,7 @@ row_log_allocate(
byte* buf;
row_log_t* log;
ulint size;
+ DBUG_ENTER("row_log_allocate");
ut_ad(!dict_index_is_online_ddl(index));
ut_ad(dict_index_is_clust(index) == !!table);
@@ -2464,7 +2642,7 @@ row_log_allocate(
size = 2 * srv_sort_buf_size + sizeof *log;
buf = (byte*) os_mem_alloc_large(&size);
if (!buf) {
- return(false);
+ DBUG_RETURN(false);
}
log = (row_log_t*) &buf[2 * srv_sort_buf_size];
@@ -2472,11 +2650,11 @@ row_log_allocate(
log->fd = row_merge_file_create_low();
if (log->fd < 0) {
os_mem_free_large(buf, size);
- return(false);
+ DBUG_RETURN(false);
}
mutex_create(index_online_log_key, &log->mutex,
SYNC_INDEX_ONLINE_LOG);
- log->trx_rb = NULL;
+ log->blobs = NULL;
log->table = table;
log->same_pk = same_pk;
log->add_cols = add_cols;
@@ -2486,7 +2664,9 @@ row_log_allocate(
log->head.block = buf;
log->tail.block = buf + srv_sort_buf_size;
log->tail.blocks = log->tail.bytes = 0;
+ log->tail.total = 0;
log->head.blocks = log->head.bytes = 0;
+ log->head.total = 0;
dict_index_set_online_status(index, ONLINE_INDEX_CREATION);
index->online_log = log;
@@ -2495,7 +2675,7 @@ row_log_allocate(
atomic operations in both cases. */
MONITOR_ATOMIC_INC(MONITOR_ONLINE_CREATE_INDEX);
- return(true);
+ DBUG_RETURN(true);
}
/******************************************************//**
@@ -2508,7 +2688,7 @@ row_log_free(
{
MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
- delete log->trx_rb;
+ delete log->blobs;
row_merge_file_destroy_low(log->fd);
mutex_free(&log->mutex);
os_mem_free_large(log->head.block, log->size);
@@ -3183,6 +3363,7 @@ row_log_apply(
dberr_t error;
row_log_t* log;
row_merge_dup_t dup = { index, table, NULL, 0 };
+ DBUG_ENTER("row_log_apply");
ut_ad(dict_index_is_online_ddl(index));
ut_ad(!dict_index_is_clust(index));
@@ -3225,5 +3406,5 @@ row_log_apply(
row_log_free(log);
- return(error);
+ DBUG_RETURN(error);
}
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index a509e2c5ca8..a0c0fd2c8c3 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -260,14 +260,15 @@ row_merge_buf_add(
ulint bucket = 0;
doc_id_t write_doc_id;
ulint n_row_added = 0;
+ DBUG_ENTER("row_merge_buf_add");
if (buf->n_tuples >= buf->max_tuples) {
- return(0);
+ DBUG_RETURN(0);
}
DBUG_EXECUTE_IF(
"ib_row_merge_buf_add_two",
- if (buf->n_tuples >= 2) return(0););
+ if (buf->n_tuples >= 2) DBUG_RETURN(0););
UNIV_PREFETCH_R(row->fields);
@@ -325,18 +326,12 @@ row_merge_buf_add(
fts_doc_item_t* doc_item;
byte* value;
- if (dfield_is_null(field)) {
- n_row_added = 1;
- continue;
- }
-
- doc_item = static_cast<fts_doc_item_t*>(
- mem_heap_alloc(
- buf->heap,
- sizeof(fts_doc_item_t)));
-
/* fetch Doc ID if it already exists
- in the row, and not supplied by the caller */
+ in the row, and not supplied by the
+ caller. Even if the value column is
+ NULL, we still need to get the Doc
+ ID so to maintain the correct max
+ Doc ID */
if (*doc_id == 0) {
const dfield_t* doc_field;
doc_field = dtuple_get_nth_field(
@@ -347,14 +342,23 @@ row_merge_buf_add(
dfield_get_data(doc_field)));
if (*doc_id == 0) {
- fprintf(stderr, "InnoDB FTS: "
- "User supplied Doc ID "
- "is zero. Record "
- "Skipped\n");
- return(0);
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "FTS Doc ID is zero. "
+ "Record Skipped");
+ DBUG_RETURN(0);
}
}
+ if (dfield_is_null(field)) {
+ n_row_added = 1;
+ continue;
+ }
+
+ doc_item = static_cast<fts_doc_item_t*>(
+ mem_heap_alloc(
+ buf->heap,
+ sizeof(*doc_item)));
+
value = static_cast<byte*>(
ut_malloc(field->len));
memcpy(value, field->data, field->len);
@@ -458,7 +462,7 @@ row_merge_buf_add(
/* If this is FTS index, we already populated the sort buffer, return
here */
if (index->type & DICT_FTS) {
- return(n_row_added);
+ DBUG_RETURN(n_row_added);
}
#ifdef UNIV_DEBUG
@@ -484,7 +488,7 @@ row_merge_buf_add(
/* Reserve one byte for the end marker of row_merge_block_t. */
if (buf->total_size + data_size >= srv_sort_buf_size - 1) {
- return(0);
+ DBUG_RETURN(0);
}
buf->total_size += data_size;
@@ -499,7 +503,7 @@ row_merge_buf_add(
dfield_dup(field++, buf->heap);
} while (--n_fields);
- return(n_row_added);
+ DBUG_RETURN(n_row_added);
}
/*************************************************************//**
@@ -1180,6 +1184,7 @@ row_merge_read_clustered_index(
os_event_t fts_parallel_sort_event = NULL;
ibool fts_pll_sort = FALSE;
ib_int64_t sig_count = 0;
+ DBUG_ENTER("row_merge_read_clustered_index");
ut_ad((old_table == new_table) == !col_map);
ut_ad(!add_cols || col_map);
@@ -1396,13 +1401,26 @@ end_of_index:
offsets = rec_get_offsets(rec, clust_index, NULL,
ULINT_UNDEFINED, &row_heap);
- if (online && new_table != old_table) {
- /* When rebuilding the table online, perform a
- REPEATABLE READ, so that row_log_table_apply()
- will not see a newer state of the table when
- applying the log. This is mainly to prevent
- false duplicate key errors, because the log
- will identify records by the PRIMARY KEY. */
+ if (online) {
+ /* Perform a REPEATABLE READ.
+
+ When rebuilding the table online,
+ row_log_table_apply() must not see a newer
+ state of the table when applying the log.
+ This is mainly to prevent false duplicate key
+ errors, because the log will identify records
+ by the PRIMARY KEY, and also to prevent unsafe
+ BLOB access.
+
+ When creating a secondary index online, this
+ table scan must not see records that have only
+ been inserted to the clustered index, but have
+ not been written to the online_log of
+ index[]. If we performed READ UNCOMMITTED, it
+ could happen that the ADD INDEX reaches
+ ONLINE_INDEX_COMPLETE state between the time
+ the DML thread has updated the clustered index
+ but has not yet accessed secondary index. */
ut_ad(trx->read_view);
if (!read_view_sees_trx_id(
@@ -1445,38 +1463,13 @@ end_of_index:
would make it tricky to detect duplicate
keys. */
continue;
- } else if (UNIV_LIKELY_NULL(rec_offs_any_null_extern(
- rec, offsets))) {
- /* This is essentially a READ UNCOMMITTED to
- fetch the most recent version of the record. */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- trx_id_t trx_id;
- ulint trx_id_offset;
-
- /* It is possible that the record was
- just inserted and the off-page columns
- have not yet been written. We will
- ignore the record if this is the case,
- because it should be covered by the
- index->info.online log in that case. */
-
- trx_id_offset = clust_index->trx_id_offset;
- if (!trx_id_offset) {
- trx_id_offset = row_get_trx_id_offset(
- clust_index, offsets);
- }
-
- trx_id = trx_read_trx_id(rec + trx_id_offset);
- ut_a(trx_rw_is_active(trx_id, NULL));
- ut_a(trx_undo_trx_id_is_insert(rec + trx_id_offset));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- /* When !online, we are holding an X-lock on
- old_table, preventing any inserts. */
- ut_ad(online);
- continue;
}
+ /* When !online, we are holding a lock on old_table, preventing
+ any inserts that could have written a record 'stub' before
+ writing out off-page columns. */
+ ut_ad(!rec_offs_any_null_extern(rec, offsets));
+
/* Build a row based on the clustered index. */
row = row_build(ROW_COPY_POINTERS, clust_index,
@@ -1692,10 +1685,16 @@ all_done:
DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n");
#endif
if (fts_pll_sort) {
+ bool all_exit = false;
+ ulint trial_count = 0;
+ const ulint max_trial_count = 10000;
+
+ /* Tell all children that parent has done scanning */
for (ulint i = 0; i < fts_sort_pll_degree; i++) {
psort_info[i].state = FTS_PARENT_COMPLETE;
}
wait_again:
+ /* Now wait all children to report back to be completed */
os_event_wait_time_low(fts_parallel_sort_event,
1000000, sig_count);
@@ -1707,6 +1706,31 @@ wait_again:
goto wait_again;
}
}
+
+ /* Now all children should complete, wait a bit until
+ they all finish setting the event, before we free everything.
+ This has a 10 second timeout */
+ do {
+ all_exit = true;
+
+ for (ulint j = 0; j < fts_sort_pll_degree; j++) {
+ if (psort_info[j].child_status
+ != FTS_CHILD_EXITING) {
+ all_exit = false;
+ os_thread_sleep(1000);
+ break;
+ }
+ }
+ trial_count++;
+ } while (!all_exit && trial_count < max_trial_count);
+
+ if (!all_exit) {
+ ut_ad(0);
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Not all child sort threads exited"
+ " when creating FTS index '%s'",
+ fts_sort_idx->name);
+ }
}
#ifdef FTS_INTERNAL_DIAG_PRINT
@@ -1731,7 +1755,7 @@ wait_again:
trx->op_info = "";
- return(err);
+ DBUG_RETURN(err);
}
/** Write a record via buffer 2 and read the next record to buffer N.
@@ -2092,13 +2116,14 @@ row_merge_sort(
ulint num_runs;
ulint* run_offset;
dberr_t error = DB_SUCCESS;
+ DBUG_ENTER("row_merge_sort");
/* Record the number of merge runs we need to perform */
num_runs = file->offset;
/* If num_runs are less than 1, nothing to merge */
if (num_runs <= 1) {
- return(error);
+ DBUG_RETURN(error);
}
/* "run_offset" records each run's first offset number */
@@ -2126,24 +2151,7 @@ row_merge_sort(
mem_free(run_offset);
- return(error);
-}
-
-/*************************************************************//**
-Set blob fields empty */
-static __attribute__((nonnull))
-void
-row_merge_set_blob_empty(
-/*=====================*/
- dtuple_t* tuple) /*!< in/out: data tuple */
-{
- for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
- dfield_t* field = dtuple_get_nth_field(tuple, i);
-
- if (dfield_is_ext(field)) {
- dfield_set_data(field, NULL, 0);
- }
- }
+ DBUG_RETURN(error);
}
/*************************************************************//**
@@ -2211,6 +2219,7 @@ row_merge_insert_index_tuples(
ulint foffs = 0;
ulint* offsets;
mrec_buf_t* buf;
+ DBUG_ENTER("row_merge_insert_index_tuples");
ut_ad(!srv_read_only_mode);
ut_ad(!(index->type & DICT_FTS));
@@ -2272,52 +2281,31 @@ row_merge_insert_index_tuples(
if (!n_ext) {
/* There are no externally stored columns. */
- } else if (!dict_index_is_online_ddl(old_index)) {
+ } else {
ut_ad(dict_index_is_clust(index));
- /* Modifications to the table are
- blocked while we are not rebuilding it
- or creating indexes. Off-page columns
- can be fetched safely. */
+ /* Off-page columns can be fetched safely
+ when concurrent modifications to the table
+ are disabled. (Purge can process delete-marked
+ records, but row_merge_read_clustered_index()
+ would have skipped them.)
+
+ When concurrent modifications are enabled,
+ row_merge_read_clustered_index() will
+ only see rows from transactions that were
+ committed before the ALTER TABLE started
+ (REPEATABLE READ).
+
+ Any modifications after the
+ row_merge_read_clustered_index() scan
+ will go through row_log_table_apply().
+ Any modifications to off-page columns
+ will be tracked by
+ row_log_table_blob_alloc() and
+ row_log_table_blob_free(). */
row_merge_copy_blobs(
mrec, offsets,
dict_table_zip_size(old_table),
dtuple, tuple_heap);
- } else {
- ut_ad(dict_index_is_clust(index));
-
- ulint offset = index->trx_id_offset;
-
- if (!offset) {
- offset = row_get_trx_id_offset(
- index, offsets);
- }
-
- /* Copy the off-page columns while
- holding old_index->lock, so
- that they cannot be freed by
- a rollback of a fresh insert. */
- rw_lock_s_lock(&old_index->lock);
-
- if (row_log_table_is_rollback(
- old_index,
- trx_read_trx_id(mrec + offset))) {
- /* The row and BLOB could
- already be freed. They
- will be deleted by
- row_undo_ins_remove_clust_rec
- when rolling back a fresh
- insert. So, no need to retrieve
- the off-page column. */
- row_merge_set_blob_empty(
- dtuple);
- } else {
- row_merge_copy_blobs(
- mrec, offsets,
- dict_table_zip_size(old_table),
- dtuple, tuple_heap);
- }
-
- rw_lock_s_unlock(&old_index->lock);
}
ut_ad(dtuple_validate(dtuple));
@@ -2415,7 +2403,7 @@ err_exit:
mem_heap_free(ins_heap);
mem_heap_free(heap);
- return(error);
+ DBUG_RETURN(error);
}
/*********************************************************************//**
@@ -2903,7 +2891,7 @@ row_merge_file_create_low(void)
if (fd < 0) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Cannot create temporary merge file");
- return -1;
+ return (-1);
}
return(fd);
}
@@ -3114,48 +3102,34 @@ will not be committed.
@return error code or DB_SUCCESS */
UNIV_INTERN
dberr_t
-row_merge_rename_tables(
-/*====================*/
+row_merge_rename_tables_dict(
+/*=========================*/
dict_table_t* old_table, /*!< in/out: old table, renamed to
tmp_name */
dict_table_t* new_table, /*!< in/out: new table, renamed to
old_table->name */
const char* tmp_name, /*!< in: new name for old_table */
- trx_t* trx) /*!< in: transaction handle */
+ trx_t* trx) /*!< in/out: dictionary transaction */
{
dberr_t err = DB_ERROR;
pars_info_t* info;
- char old_name[MAX_FULL_NAME_LEN + 1];
ut_ad(!srv_read_only_mode);
ut_ad(old_table != new_table);
ut_ad(mutex_own(&dict_sys->mutex));
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
-
- /* store the old/current name to an automatic variable */
- if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
- memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Too long table name: '%s', max length is %d",
- old_table->name, MAX_FULL_NAME_LEN);
- ut_error;
- }
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE
+ || trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
trx->op_info = "renaming tables";
- DBUG_EXECUTE_IF(
- "ib_rebuild_cannot_rename",
- err = DB_ERROR; goto err_exit;);
-
/* We use the private SQL parser of Innobase to generate the query
graphs needed in updating the dictionary data in system tables. */
info = pars_info_create();
pars_info_add_str_literal(info, "new_name", new_table->name);
- pars_info_add_str_literal(info, "old_name", old_name);
+ pars_info_add_str_literal(info, "old_name", old_table->name);
pars_info_add_str_literal(info, "tmp_name", tmp_name);
err = que_eval_sql(info,
@@ -3200,11 +3174,12 @@ row_merge_rename_tables(
table is in a non-system tablespace where space > 0. */
if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) {
/* Make pathname to update SYS_DATAFILES. */
- char* old_path = row_make_new_pathname(new_table, old_name);
+ char* old_path = row_make_new_pathname(
+ new_table, old_table->name);
info = pars_info_create();
- pars_info_add_str_literal(info, "old_name", old_name);
+ pars_info_add_str_literal(info, "old_name", old_table->name);
pars_info_add_str_literal(info, "old_path", old_path);
pars_info_add_int4_literal(info, "new_space",
(lint) new_table->space);
@@ -3223,75 +3198,9 @@ row_merge_rename_tables(
mem_free(old_path);
}
- if (err != DB_SUCCESS) {
- goto err_exit;
- }
-
- /* Generate the redo logs for file operations */
- fil_mtr_rename_log(old_table->space, old_name,
- new_table->space, new_table->name, tmp_name);
-
- /* What if the redo logs are flushed to disk here? This is
- tested with following crash point */
- DBUG_EXECUTE_IF("bug14669848_precommit", log_buffer_flush_to_disk();
- DBUG_SUICIDE(););
-
- /* File operations cannot be rolled back. So, before proceeding
- with file operations, commit the dictionary changes.*/
- trx_commit_for_mysql(trx);
-
- /* If server crashes here, the dictionary in InnoDB and MySQL
- will differ. The .ibd files and the .frm files must be swapped
- manually by the administrator. No loss of data. */
- DBUG_EXECUTE_IF("bug14669848", DBUG_SUICIDE(););
-
- /* Ensure that the redo logs are flushed to disk. The config
- innodb_flush_log_at_trx_commit must not affect this. */
- log_buffer_flush_to_disk();
-
- /* The following calls will also rename the .ibd data files if
- the tables are stored in a single-table tablespace */
-
- err = dict_table_rename_in_cache(old_table, tmp_name, FALSE);
-
- if (err == DB_SUCCESS) {
-
- ut_ad(dict_table_is_discarded(old_table)
- == dict_table_is_discarded(new_table));
-
- err = dict_table_rename_in_cache(new_table, old_name, FALSE);
-
- if (err != DB_SUCCESS) {
-
- if (dict_table_rename_in_cache(
- old_table, old_name, FALSE)
- != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot undo the rename in cache "
- "from %s to %s", old_name, tmp_name);
- }
-
- goto err_exit;
- }
-
- if (dict_table_is_discarded(new_table)) {
-
- err = row_import_update_discarded_flag(
- trx, new_table->id, true, true);
- }
- }
-
- DBUG_EXECUTE_IF("ib_rebuild_cannot_load_fk",
- err = DB_ERROR; goto err_exit;);
-
- err = dict_load_foreigns(old_name, FALSE, TRUE);
-
- if (err != DB_SUCCESS) {
-err_exit:
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
+ if (err == DB_SUCCESS && dict_table_is_discarded(new_table)) {
+ err = row_import_update_discarded_flag(
+ trx, new_table->id, true, true);
}
trx->op_info = "";
@@ -3417,7 +3326,7 @@ row_merge_is_index_usable(
/*********************************************************************//**
Drop a table. The caller must have ensured that the background stats
thread is not processing the table. This can be done by calling
-dict_stats_wait_bg_to_stop_using_tables() after locking the dictionary and
+dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
before calling this function.
@return DB_SUCCESS or error code */
UNIV_INTERN
@@ -3475,11 +3384,12 @@ row_merge_build_indexes(
ulint i;
ulint j;
dberr_t error;
- int tmpfd;
+ int tmpfd = -1;
dict_index_t* fts_sort_idx = NULL;
fts_psort_t* psort_info = NULL;
fts_psort_t* merge_info = NULL;
ib_int64_t sig_count = 0;
+ DBUG_ENTER("row_merge_build_indexes");
ut_ad(!srv_read_only_mode);
ut_ad((old_table == new_table) == !col_map);
@@ -3493,7 +3403,7 @@ row_merge_build_indexes(
os_mem_alloc_large(&block_size));
if (block == NULL) {
- return(DB_OUT_OF_MEMORY);
+ DBUG_RETURN(DB_OUT_OF_MEMORY);
}
trx_start_if_not_started_xa(trx);
@@ -3501,6 +3411,14 @@ row_merge_build_indexes(
merge_files = static_cast<merge_file_t*>(
mem_alloc(n_indexes * sizeof *merge_files));
+ /* Initialize all the merge file descriptors, so that we
+ don't call row_merge_file_destroy() on uninitialized
+ merge file descriptor */
+
+ for (i = 0; i < n_indexes; i++) {
+ merge_files[i].fd = -1;
+ }
+
for (i = 0; i < n_indexes; i++) {
if (row_merge_file_create(&merge_files[i]) < 0) {
error = DB_OUT_OF_MEMORY;
@@ -3565,41 +3483,16 @@ row_merge_build_indexes(
if (indexes[i]->type & DICT_FTS) {
os_event_t fts_parallel_merge_event;
- bool all_exit = false;
- ulint trial_count = 0;
sort_idx = fts_sort_idx;
- /* Now all children should complete, wait
- a bit until they all finish using event */
- while (!all_exit && trial_count < 10000) {
- all_exit = true;
-
- for (j = 0; j < fts_sort_pll_degree;
- j++) {
- if (psort_info[j].child_status
- != FTS_CHILD_EXITING) {
- all_exit = false;
- os_thread_sleep(1000);
- break;
- }
- }
- trial_count++;
- }
-
- if (!all_exit) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Not all child sort threads exited"
- " when creating FTS index '%s'",
- indexes[i]->name);
- }
-
fts_parallel_merge_event
= merge_info[0].psort_common->merge_event;
if (FTS_PLL_MERGE) {
- trial_count = 0;
- all_exit = false;
+ ulint trial_count = 0;
+ bool all_exit = false;
+
os_event_reset(fts_parallel_merge_event);
row_fts_start_parallel_merge(merge_info);
wait_again:
@@ -3763,5 +3656,5 @@ func_exit:
}
}
- return(error);
+ DBUG_RETURN(error);
}
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 808bd0aaeb5..9aceb305493 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2000, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -62,6 +62,7 @@ Created 9/17/2000 Heikki Tuuri
#include "row0import.h"
#include "m_string.h"
#include "my_sys.h"
+#include "ha_prototypes.h"
/** Provide optional 4.x backwards compatibility for 5.0 and above */
UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
@@ -618,8 +619,8 @@ handle_new_error:
case DB_INTERRUPTED:
case DB_DICT_CHANGED:
if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
+ /* Roll back the latest, possibly incomplete insertion
+ or update */
trx_rollback_to_savepoint(trx, savept);
}
@@ -2521,7 +2522,8 @@ row_table_add_foreign_constraints(
if (err == DB_SUCCESS) {
/* Check that also referencing constraints are ok */
- err = dict_load_foreigns(name, FALSE, TRUE);
+ err = dict_load_foreigns(name, NULL, false, true,
+ DICT_ERR_IGNORE_NONE);
}
if (err != DB_SUCCESS) {
@@ -2801,7 +2803,7 @@ row_discard_tablespace_begin(
name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
if (table) {
- dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx);
+ dict_stats_wait_bg_to_stop_using_table(table, trx);
ut_a(table->space != TRX_SYS_SPACE);
ut_a(table->n_foreign_key_checks_running == 0);
}
@@ -2874,13 +2876,13 @@ row_discard_tablespace_end(
}
DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ log_make_checkpoint_at(LSN_MAX, TRUE);
DBUG_SUICIDE(););
trx_commit_for_mysql(trx);
DBUG_EXECUTE_IF("ib_discard_after_commit_crash",
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ log_make_checkpoint_at(LSN_MAX, TRUE);
DBUG_SUICIDE(););
row_mysql_unlock_data_dictionary(trx);
@@ -3246,7 +3248,7 @@ row_truncate_table_for_mysql(
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx);
+ dict_stats_wait_bg_to_stop_using_table(table, trx);
/* Check if the table is referenced by foreign key constraints from
some other table (not the table itself) */
@@ -3796,8 +3798,8 @@ row_drop_table_for_mysql(
tables since we know temp tables do not use persistent
stats. */
if (!dict_table_is_temporary(table)) {
- dict_stats_wait_bg_to_stop_using_tables(
- table, NULL, trx);
+ dict_stats_wait_bg_to_stop_using_table(
+ table, trx);
}
}
@@ -4167,6 +4169,11 @@ check_next_foreign:
DICT_TF2_FTS flag set. So keep this out of above
dict_table_has_fts_index condition */
if (table->fts) {
+ /* Need to set TABLE_DICT_LOCKED bit, since
+ fts_que_graph_free_check_lock would try to acquire
+ dict mutex lock */
+ table->fts->fts_status |= TABLE_DICT_LOCKED;
+
fts_free(table);
}
@@ -4503,14 +4510,31 @@ loop:
}
- if (row_is_mysql_tmp_table_name(table->name)) {
- /* There could be an orphan temp table left from
- interupted alter table rebuild operation */
- dict_table_close(table, TRUE, FALSE);
- } else {
- ut_a(!table->can_be_evicted || table->ibd_file_missing);
+ if (!row_is_mysql_tmp_table_name(table->name)) {
+ /* There could be orphan temp tables left from
+ interrupted alter table. Leave them, and handle
+ the rest.*/
+ if (table->can_be_evicted) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Orphan table encountered during "
+ "DROP DATABASE. This is possible if "
+ "'%s.frm' was lost.", table->name);
+ }
+
+ if (table->ibd_file_missing) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Missing %s.ibd file for table %s.",
+ table->name, table->name);
+ }
}
+ dict_table_close(table, TRUE, FALSE);
+
+ /* The dict_table_t object must not be accessed before
+ dict_table_open() or after dict_table_close(). But this is OK
+ if we are holding, the dict_sys->mutex. */
+ ut_ad(mutex_own(&dict_sys->mutex));
+
/* Wait until MySQL does not have any queries running on
the table */
@@ -4668,6 +4692,7 @@ row_rename_table_for_mysql(
ut_a(old_name != NULL);
ut_a(new_name != NULL);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
if (srv_created_new_raw || srv_force_recovery) {
fputs("InnoDB: A new raw disk partition was initialized or\n"
@@ -4692,7 +4717,6 @@ row_rename_table_for_mysql(
}
trx->op_info = "renaming table";
- trx_start_if_not_started_xa(trx);
old_is_tmp = row_is_mysql_tmp_table_name(old_name);
new_is_tmp = row_is_mysql_tmp_table_name(new_name);
@@ -4945,6 +4969,24 @@ row_rename_table_for_mysql(
}
}
+ if (dict_table_has_fts_index(table)
+ && !dict_tables_have_same_db(old_name, new_name)) {
+ err = fts_rename_aux_tables(table, new_name, trx);
+
+ if (err != DB_SUCCESS && (table->space != 0)) {
+ char* orig_name = table->name;
+
+ /* If rename fails and table has its own tablespace,
+ we need to call fts_rename_aux_tables again to
+ revert the ibd file rename, which is not under the
+ control of trx. Also notice the parent table name
+ in cache is not changed yet. */
+ table->name = const_cast<char*>(new_name);
+ fts_rename_aux_tables(table, old_name, trx);
+ table->name = orig_name;
+ }
+ }
+
end:
if (err != DB_SUCCESS) {
if (err == DB_DUPLICATE_KEY) {
@@ -5003,7 +5045,9 @@ end:
an ALTER, not in a RENAME. */
err = dict_load_foreigns(
- new_name, FALSE, !old_is_tmp || trx->check_foreigns);
+ new_name, NULL,
+ false, !old_is_tmp || trx->check_foreigns,
+ DICT_ERR_IGNORE_NONE);
if (err != DB_SUCCESS) {
ut_print_timestamp(stderr);
@@ -5052,7 +5096,6 @@ end:
}
funct_exit:
-
if (table != NULL) {
dict_table_close(table, dict_locked, FALSE);
}
@@ -5182,6 +5225,7 @@ func_exit:
dtuple_get_nth_field(prev_entry, i))) {
contains_null = TRUE;
+ break;
}
}
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index ee603be453a..1b836c26c25 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -112,28 +112,19 @@ row_purge_reposition_pcur(
return(node->found_clust);
}
-/** Status of row_purge_remove_clust() */
-enum row_purge_status {
- ROW_PURGE_DONE, /*!< The row has been removed. */
- ROW_PURGE_FAIL, /*!< The purge was not successful. */
- ROW_PURGE_SUSPEND/*!< Cannot purge now, due to online rebuild. */
-};
-
/***********************************************************//**
Removes a delete marked clustered index record if possible.
-@retval ROW_PURGE_DONE if the row was not found, or it was successfully removed
-@retval ROW_PURGE_FAIL if the row was modified after the delete marking
-@retval ROW_PURGE_SUSPEND if the row refers to an off-page column and
-an online ALTER TABLE (table rebuild) is in progress. */
+@retval true if the row was not found, or it was successfully removed
+@retval false if the row was modified after the delete marking */
static __attribute__((nonnull, warn_unused_result))
-enum row_purge_status
+bool
row_purge_remove_clust_if_poss_low(
/*===============================*/
purge_node_t* node, /*!< in/out: row purge node */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
dict_index_t* index;
- enum row_purge_status status = ROW_PURGE_DONE;
+ bool success = true;
mtr_t mtr;
rec_t* rec;
mem_heap_t* heap = NULL;
@@ -165,16 +156,9 @@ row_purge_remove_clust_if_poss_low(
goto func_exit;
}
- if (dict_index_get_online_status(index) == ONLINE_INDEX_CREATION
- && rec_offs_any_extern(offsets)) {
- status = ROW_PURGE_SUSPEND;
- goto func_exit;
- }
-
if (mode == BTR_MODIFY_LEAF) {
- status = btr_cur_optimistic_delete(
- btr_pcur_get_btr_cur(&node->pcur), 0, &mtr)
- ? ROW_PURGE_DONE : ROW_PURGE_FAIL;
+ success = btr_cur_optimistic_delete(
+ btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
} else {
dberr_t err;
ut_ad(mode == BTR_MODIFY_TREE);
@@ -186,7 +170,7 @@ row_purge_remove_clust_if_poss_low(
case DB_SUCCESS:
break;
case DB_OUT_OF_FILE_SPACE:
- status = ROW_PURGE_FAIL;
+ success = false;
break;
default:
ut_error;
@@ -200,43 +184,34 @@ func_exit:
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
- return(status);
+ return(success);
}
/***********************************************************//**
Removes a clustered index record if it has not been modified after the delete
marking.
@retval true if the row was not found, or it was successfully removed
-@retval false the purge needs to be suspended, either because of
-running out of file space or because the row refers to an off-page
-column and an online ALTER TABLE (table rebuild) is in progress. */
+@retval false the purge needs to be suspended because of running out
+of file space. */
static __attribute__((nonnull, warn_unused_result))
bool
row_purge_remove_clust_if_poss(
/*===========================*/
purge_node_t* node) /*!< in/out: row purge node */
{
- switch (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
- case ROW_PURGE_DONE:
+ if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
return(true);
- case ROW_PURGE_SUSPEND:
- return(false);
- case ROW_PURGE_FAIL:
- break;
}
for (ulint n_tries = 0;
n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
n_tries++) {
- switch (row_purge_remove_clust_if_poss_low(
- node, BTR_MODIFY_TREE)) {
- case ROW_PURGE_DONE:
+ if (row_purge_remove_clust_if_poss_low(
+ node, BTR_MODIFY_TREE)) {
return(true);
- case ROW_PURGE_SUSPEND:
- return(false);
- case ROW_PURGE_FAIL:
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
}
+
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
}
return(false);
@@ -529,9 +504,8 @@ retry:
/***********************************************************//**
Purges a delete marking of a record.
@retval true if the row was not found, or it was successfully removed
-@retval false the purge needs to be suspended, either because of
-running out of file space or because the row refers to an off-page
-column and an online ALTER TABLE (table rebuild) is in progress. */
+@retval false the purge needs to be suspended because of
+running out of file space */
static __attribute__((nonnull, warn_unused_result))
bool
row_purge_del_mark(
@@ -567,10 +541,9 @@ row_purge_del_mark(
/***********************************************************//**
Purges an update of an existing record. Also purges an update of a delete
-marked record if that record contained an externally stored field.
-@return true if purged, false if skipped */
-static __attribute__((nonnull, warn_unused_result))
-bool
+marked record if that record contained an externally stored field. */
+static
+void
row_purge_upd_exist_or_extern_func(
/*===============================*/
#ifdef UNIV_DEBUG
@@ -585,20 +558,6 @@ row_purge_upd_exist_or_extern_func(
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
- if (dict_index_get_online_status(dict_table_get_first_index(
- node->table))
- == ONLINE_INDEX_CREATION) {
- for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
-
- const upd_field_t* ufield
- = upd_get_nth_field(node->update, i);
-
- if (dfield_is_ext(&ufield->new_val)) {
- return(false);
- }
- }
- }
-
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
|| (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
@@ -675,16 +634,7 @@ skip_secondaries:
index = dict_table_get_first_index(node->table);
mtr_x_lock(dict_index_get_lock(index), &mtr);
-#ifdef UNIV_DEBUG
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_CREATION:
- case ONLINE_INDEX_ABORTED_DROPPED:
- ut_ad(0);
- case ONLINE_INDEX_COMPLETE:
- case ONLINE_INDEX_ABORTED:
- break;
- }
-#endif /* UNIV_DEBUG */
+
/* NOTE: we must also acquire an X-latch to the
root page of the tree. We will need it when we
free pages from the tree. If the tree is of height 1,
@@ -714,8 +664,6 @@ skip_secondaries:
mtr_commit(&mtr);
}
}
-
- return(true);
}
#ifdef UNIV_DEBUG
@@ -771,7 +719,8 @@ row_purge_parse_undo_rec(
rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__);
- node->table = dict_table_open_on_id(table_id, FALSE, FALSE);
+ node->table = dict_table_open_on_id(
+ table_id, FALSE, DICT_TABLE_OP_NORMAL);
if (node->table == NULL) {
/* The table has been dropped: no need to do purge */
@@ -866,10 +815,7 @@ row_purge_record_func(
}
/* fall through */
case TRX_UNDO_UPD_EXIST_REC:
- purged = row_purge_upd_exist_or_extern(thr, node, undo_rec);
- if (!purged) {
- break;
- }
+ row_purge_upd_exist_or_extern(thr, node, undo_rec);
MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
break;
}
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 72e0bf43d77..79cced1c533 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -532,10 +532,11 @@ row_quiesce_table_start(
ut_a(table->id > 0);
- ulint count = 0;
-
- while (ibuf_contract_in_background(table->id, TRUE) != 0) {
- if (!(++count % 20)) {
+ for (ulint count = 0;
+ ibuf_contract_in_background(table->id, TRUE) != 0
+ && !trx_is_interrupted(trx);
+ ++count) {
+ if (!(count % 20)) {
ib_logf(IB_LOG_LEVEL_INFO,
"Merging change buffer entries for '%s'",
table_name);
@@ -610,7 +611,7 @@ row_quiesce_table_complete(
srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name));
- os_file_delete_if_exists(cfg_name);
+ os_file_delete_if_exists(innodb_file_data_key, cfg_name);
ib_logf(IB_LOG_LEVEL_INFO,
"Deleting the meta-data file '%s'", cfg_name);
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index cd98d13082b..690c6e958fe 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -57,6 +57,8 @@ Created 12/19/1997 Heikki Tuuri
#include "read0read.h"
#include "buf0lru.h"
#include "ha_prototypes.h"
+#include "m_string.h" /* for my_sys.h */
+#include "my_sys.h" /* DEBUG_SYNC_C */
#include "my_compare.h" /* enum icp_result */
@@ -2957,9 +2959,7 @@ row_sel_store_mysql_rec(
&& dict_index_is_clust(index)) {
prebuilt->fts_doc_id = fts_get_doc_id_from_rec(
- prebuilt->table,
- rec,
- prebuilt->heap);
+ prebuilt->table, rec, NULL);
}
return(TRUE);
@@ -4154,7 +4154,9 @@ wait_table_again:
}
rec_loop:
+ DEBUG_SYNC_C("row_search_rec_loop");
if (trx_is_interrupted(trx)) {
+ btr_pcur_store_position(pcur, &mtr);
err = DB_INTERRUPTED;
goto normal_return;
}
@@ -5333,7 +5335,7 @@ row_search_max_autoinc(
btr_pcur_open_at_index_side(
false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
- if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
+ if (!page_is_empty(btr_pcur_get_page(&pcur))) {
const rec_t* rec;
rec = row_search_autoinc_get_rec(&pcur, &mtr);
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 27881c1f4c3..7b50d8b62ae 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -79,12 +79,11 @@ row_undo_ins_remove_clust_rec(
mtr_start(&mtr);
- /* This is similar to row_undo_mod_clust(). Even though we
- call row_log_table_rollback() elsewhere, the DDL thread may
- already have copied this row to the sort buffers or to the new
- table. We must log the removal, so that the row will be
- correctly purged. However, we can log the removal out of sync
- with the B-tree modification. */
+ /* This is similar to row_undo_mod_clust(). The DDL thread may
+ already have copied this row from the log to the new table.
+ We must log the removal, so that the row will be correctly
+ purged. However, we can log the removal out of sync with the
+ B-tree modification. */
online = dict_index_is_online_ddl(index);
if (online) {
@@ -111,9 +110,7 @@ row_undo_ins_remove_clust_rec(
const ulint* offsets = rec_get_offsets(
rec, index, NULL, ULINT_UNDEFINED, &heap);
row_log_table_delete(
- rec, index, offsets,
- trx_read_trx_id(row_get_trx_id_offset(index, offsets)
- + rec));
+ rec, index, offsets, true, node->trx->id);
mem_heap_free(heap);
}
@@ -319,7 +316,8 @@ row_undo_ins_parse_undo_rec(
node->rec_type = type;
node->update = NULL;
- node->table = dict_table_open_on_id(table_id, dict_locked, FALSE);
+ node->table = dict_table_open_on_id(
+ table_id, dict_locked, DICT_TABLE_OP_NORMAL);
/* Skip the UNDO if we can't find the table or the .ibd file. */
if (UNIV_UNLIKELY(node->table == NULL)) {
@@ -441,14 +439,6 @@ row_undo_ins(
node->index = dict_table_get_first_index(node->table);
ut_ad(dict_index_is_clust(node->index));
-
- if (dict_index_is_online_ddl(node->index)) {
- /* Note that we are rolling back this transaction, so
- that all inserts and updates with this DB_TRX_ID can
- be skipped. */
- row_log_table_rollback(node->index, node->trx->id);
- }
-
/* Skip the clustered index (the first index) */
node->index = dict_table_get_next_index(node->index);
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 2fd8a11b35a..efcd63a4d29 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -208,6 +208,36 @@ row_undo_mod_remove_clust_low(
return(DB_SUCCESS);
}
+ trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
+
+ if (!trx_id_offset) {
+ mem_heap_t* heap = NULL;
+ ulint trx_id_col;
+ const ulint* offsets;
+ ulint len;
+
+ trx_id_col = dict_index_get_sys_col_pos(
+ btr_cur_get_index(btr_cur), DATA_TRX_ID);
+ ut_ad(trx_id_col > 0);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+ offsets = rec_get_offsets(
+ btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
+ NULL, trx_id_col + 1, &heap);
+
+ trx_id_offset = rec_get_nth_field_offs(
+ offsets, trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ mem_heap_free(heap);
+ }
+
+ if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset)
+ != node->new_trx_id) {
+ /* The record must have been purged and then replaced
+ with a different one. */
+ return(DB_SUCCESS);
+ }
+
/* We are about to remove an old, delete-marked version of the
record that may have been delete-marked by a different transaction
than the rolling-back one. */
@@ -323,7 +353,7 @@ row_undo_mod_clust(
case TRX_UNDO_UPD_DEL_REC:
row_log_table_delete(
btr_pcur_get_rec(pcur), index, offsets,
- node->trx->id);
+ true, node->trx->id);
break;
default:
ut_ad(0);
@@ -331,6 +361,9 @@ row_undo_mod_clust(
}
}
+ ut_ad(rec_get_trx_id(btr_pcur_get_rec(pcur), index)
+ == node->new_trx_id);
+
btr_pcur_commit_specify_mtr(pcur, &mtr);
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
@@ -1044,7 +1077,8 @@ row_undo_mod_parse_undo_rec(
&dummy_extern, &undo_no, &table_id);
node->rec_type = type;
- node->table = dict_table_open_on_id(table_id, dict_locked, FALSE);
+ node->table = dict_table_open_on_id(
+ table_id, dict_locked, DICT_TABLE_OP_NORMAL);
/* TODO: other fixes associated with DROP TABLE + rollback in the
same table by another user */
@@ -1119,14 +1153,6 @@ row_undo_mod(
node->index = dict_table_get_first_index(node->table);
ut_ad(dict_index_is_clust(node->index));
-
- if (dict_index_is_online_ddl(node->index)) {
- /* Note that we are rolling back this transaction, so
- that all inserts and updates with this DB_TRX_ID can
- be skipped. */
- row_log_table_rollback(node->index, node->trx->id);
- }
-
/* Skip the clustered index (the first index) */
node->index = dict_table_get_next_index(node->index);
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index f97c0c3c82b..ccb905b36f4 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -2437,6 +2437,10 @@ row_upd_clust_step(
}
}
+ ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table,
+ btr_pcur_get_block(pcur),
+ page_rec_get_heap_no(rec)));
+
/* NOTE: the following function calls will also commit mtr */
if (node->is_delete) {
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 3b3da2f070f..d98315ae9a2 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -325,6 +325,11 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
MONITOR_FLUSH_BATCH_SCANNED_PER_CALL},
+ {"buffer_flush_batch_rescan", "buffer",
+ "Number of times rescan of flush list forced",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_HP_RESCAN},
+
/* Cumulative counter for pages flushed in flush batches */
{"buffer_flush_batch_total_pages", "buffer",
"Total pages flushed as part of flush batch",
@@ -387,7 +392,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS},
-
/* Cumulative counter for flush batches for adaptive flushing */
{"buffer_flush_adaptive_total_pages", "buffer",
"Total pages flushed as part of adaptive flushing",
@@ -1616,7 +1620,7 @@ srv_mon_process_existing_counter(
break;
case MONITOR_OVLD_RWLOCK_X_SPIN_WAITS:
- value = rw_lock_stats.rw_x_os_wait_count;
+ value = rw_lock_stats.rw_x_spin_wait_count;
break;
case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS:
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 4c5753ac40e..a25469f35b7 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -1459,20 +1459,30 @@ srv_export_innodb_status(void)
export_vars.innodb_available_undo_logs = srv_available_undo_logs;
#ifdef UNIV_DEBUG
- if (purge_sys->done.trx_no == 0
- || trx_sys->rw_max_trx_id < purge_sys->done.trx_no - 1) {
+ rw_lock_s_lock(&purge_sys->latch);
+ trx_id_t done_trx_no = purge_sys->done.trx_no;
+ trx_id_t up_limit_id = purge_sys->view
+ ? purge_sys->view->up_limit_id
+ : 0;
+ rw_lock_s_unlock(&purge_sys->latch);
+
+ mutex_enter(&trx_sys->mutex);
+ trx_id_t max_trx_id = trx_sys->rw_max_trx_id;
+ mutex_exit(&trx_sys->mutex);
+
+ if (!done_trx_no || max_trx_id < done_trx_no - 1) {
export_vars.innodb_purge_trx_id_age = 0;
} else {
export_vars.innodb_purge_trx_id_age =
- trx_sys->rw_max_trx_id - purge_sys->done.trx_no + 1;
+ (ulint) (max_trx_id - done_trx_no + 1);
}
- if (!purge_sys->view
- || trx_sys->rw_max_trx_id < purge_sys->view->up_limit_id) {
+ if (!up_limit_id
+ || max_trx_id < up_limit_id) {
export_vars.innodb_purge_view_trx_id_age = 0;
} else {
export_vars.innodb_purge_view_trx_id_age =
- trx_sys->rw_max_trx_id - purge_sys->view->up_limit_id;
+ (ulint) (max_trx_id - up_limit_id);
}
#endif /* UNIV_DEBUG */
@@ -2540,7 +2550,9 @@ srv_do_purge(
}
do {
- if (trx_sys->rseg_history_len > rseg_history_len) {
+ if (trx_sys->rseg_history_len > rseg_history_len
+ || (srv_max_purge_lag > 0
+ && rseg_history_len > srv_max_purge_lag)) {
/* History length is now longer than what it was
when we took the last snapshot. Use more threads. */
@@ -2576,7 +2588,8 @@ srv_do_purge(
if (!(count++ % TRX_SYS_N_RSEGS)) {
/* Force a truncate of the history list. */
- trx_purge(1, srv_purge_batch_size, true);
+ n_pages_purged += trx_purge(
+ 1, srv_purge_batch_size, true);
}
*n_total_purged += n_pages_purged;
@@ -2605,9 +2618,10 @@ srv_purge_coordinator_suspend(
/** Maximum wait time on the purge event, in micro-seconds. */
static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
+ ib_int64_t sig_count = srv_suspend_thread(slot);
+
do {
ulint ret;
- ib_int64_t sig_count = srv_suspend_thread(slot);
rw_lock_x_lock(&purge_sys->latch);
@@ -2644,6 +2658,8 @@ srv_purge_coordinator_suspend(
srv_sys_mutex_exit();
+ sig_count = srv_suspend_thread(slot);
+
rw_lock_x_lock(&purge_sys->latch);
stop = (purge_sys->state == PURGE_STATE_STOP);
@@ -2677,7 +2693,15 @@ srv_purge_coordinator_suspend(
} while (stop);
- ut_a(!slot->suspended);
+ srv_sys_mutex_enter();
+
+ if (slot->suspended) {
+ slot->suspended = FALSE;
+ ++srv_sys->n_threads_active[slot->type];
+ ut_a(srv_sys->n_threads_active[slot->type] == 1);
+ }
+
+ srv_sys_mutex_exit();
}
/*********************************************************************//**
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index efe9f094c0d..065a6c94074 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -220,9 +220,9 @@ srv_file_check_mode(
ib_logf(IB_LOG_LEVEL_ERROR,
"%s can't be opened in %s mode",
+ name,
srv_read_only_mode
- ? "read-write" : "read",
- name);
+ ? "read" : "read-write");
return(false);
}
@@ -522,7 +522,13 @@ create_log_file(
*file = os_file_create(
innodb_file_log_key, name,
- OS_FILE_CREATE, OS_FILE_NORMAL, OS_LOG_FILE, &ret);
+ OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
+ OS_LOG_FILE, &ret);
+
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
+ return(DB_ERROR);
+ }
ib_logf(IB_LOG_LEVEL_INFO,
"Setting log file %s size to %lu MB",
@@ -533,7 +539,9 @@ create_log_file(
(os_offset_t) srv_log_file_size
<< UNIV_PAGE_SIZE_SHIFT);
if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Error in creating %s", name);
+ ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
+ " %s to size %lu MB", name, (ulong) srv_log_file_size
+ >> (20 - UNIV_PAGE_SIZE_SHIFT));
return(DB_ERROR);
}
@@ -566,6 +574,8 @@ static
dberr_t
create_log_files(
/*=============*/
+ bool create_new_db, /*!< in: TRUE if new database is being
+ created */
char* logfilename, /*!< in/out: buffer for log file name */
size_t dirnamelen, /*!< in: length of the directory path */
lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
@@ -577,23 +587,28 @@ create_log_files(
return(DB_READ_ONLY);
}
- /* Remove any old log files. */
- for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
- sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
+ /* We prevent system tablespace creation with existing files in
+ data directory. So we do not delete log files when creating new system
+ tablespace */
+ if (!create_new_db) {
+ /* Remove any old log files. */
+ for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
- /* Ignore errors about non-existent files or files
- that cannot be removed. The create_log_file() will
- return an error when the file exists. */
+ /* Ignore errors about non-existent files or files
+ that cannot be removed. The create_log_file() will
+ return an error when the file exists. */
#ifdef __WIN__
- DeleteFile((LPCTSTR) logfilename);
+ DeleteFile((LPCTSTR) logfilename);
#else
- unlink(logfilename);
+ unlink(logfilename);
#endif
- /* Crashing after deleting the first
- file should be recoverable. The buffer
- pool was clean, and we can simply create
- all log files from the scratch. */
- RECOVERY_CRASH(6);
+ /* Crashing after deleting the first
+ file should be recoverable. The buffer
+ pool was clean, and we can simply create
+ all log files from the scratch. */
+ RECOVERY_CRASH(6);
+ }
}
ut_ad(!buf_pool_check_no_pending_io());
@@ -866,6 +881,7 @@ open_or_create_data_files(
}
if (ret == FALSE) {
+ const char* check_msg;
/* We open the data file */
if (one_created) {
@@ -961,13 +977,20 @@ size_check:
return(DB_ERROR);
}
skip_size_check:
- fil_read_first_page(
+ check_msg = fil_read_first_page(
files[i], one_opened, &flags, &space,
#ifdef UNIV_LOG_ARCHIVE
min_arch_log_no, max_arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
min_flushed_lsn, max_flushed_lsn);
+ if (check_msg) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "%s in data file %s",
+ check_msg, name);
+ return(DB_ERROR);
+ }
+
/* The first file of the system tablespace must
have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID
field in files greater than ibdata1 are unreliable. */
@@ -1083,19 +1106,25 @@ srv_undo_tablespace_create(
if (srv_read_only_mode && ret) {
ib_logf(IB_LOG_LEVEL_INFO,
"%s opened in read-only mode", name);
- } else if (ret == FALSE
- && os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
+ } else if (ret == FALSE) {
+ if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our function
- to return 100; work around that AIX problem */
- && os_file_get_last_error(false) != 100
+ /* AIX 5.1 after security patch ML7 may have
+ errno set to 0 here, which causes our function
+ to return 100; work around that AIX problem */
+ && os_file_get_last_error(false) != 100
#endif /* UNIV_AIX */
) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can't create UNDO tablespace %s", name);
-
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Can't create UNDO tablespace %s", name);
+ } else {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Creating system tablespace with"
+ " existing undo tablespaces is not"
+ " supported. Please delete all undo"
+ " tablespaces before creating new"
+ " system tablespace.");
+ }
err = DB_ERROR;
} else {
ut_a(!srv_read_only_mode);
@@ -1455,6 +1484,7 @@ innobase_start_or_create_for_mysql(void)
ulint io_limit;
mtr_t mtr;
ib_bh_t* ib_bh;
+ ulint n_recovered_trx;
char logfilename[10000];
char* logfile0 = NULL;
size_t dirnamelen;
@@ -1579,10 +1609,6 @@ innobase_start_or_create_for_mysql(void)
#endif /* UNIV_ZIP_COPY */
- ib_logf(IB_LOG_LEVEL_INFO,
- "CPU %s crc32 instructions",
- ut_crc32_sse2_enabled ? "supports" : "does not support");
-
/* Since InnoDB does not currently clean up all its internal data
structures in MySQL Embedded Server Library server_end(), we
print an error message if someone tries to start up InnoDB a
@@ -1723,6 +1749,10 @@ innobase_start_or_create_for_mysql(void)
srv_boot();
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "%s CPU crc32 instructions",
+ ut_crc32_sse2_enabled ? "Using" : "Not using");
+
if (!srv_read_only_mode) {
mutex_create(srv_monitor_file_mutex_key,
@@ -1876,7 +1906,7 @@ innobase_start_or_create_for_mysql(void)
/* Create i/o-handler threads: */
- for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
+ for (i = 0; i < srv_n_file_io_threads; ++i) {
n[i] = i;
@@ -2004,7 +2034,7 @@ innobase_start_or_create_for_mysql(void)
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
- err = create_log_files(logfilename, dirnamelen,
+ err = create_log_files(create_new_db, logfilename, dirnamelen,
max_flushed_lsn, logfile0);
if (err != DB_SUCCESS) {
@@ -2049,8 +2079,9 @@ innobase_start_or_create_for_mysql(void)
}
err = create_log_files(
- logfilename, dirnamelen,
- max_flushed_lsn, logfile0);
+ create_new_db, logfilename,
+ dirnamelen, max_flushed_lsn,
+ logfile0);
if (err != DB_SUCCESS) {
return(err);
@@ -2201,6 +2232,7 @@ files_checked:
trx_sys_create_sys_pages();
ib_bh = trx_sys_init_at_db_start();
+ n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
/* The purge system needs to create the purge view and
therefore requires that the trx_sys is inited. */
@@ -2252,6 +2284,7 @@ files_checked:
}
ib_bh = trx_sys_init_at_db_start();
+ n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
/* The purge system needs to create the purge view and
therefore requires that the trx_sys is inited. */
@@ -2294,7 +2327,7 @@ files_checked:
been shut down normally: this is the normal startup path */
err = recv_recovery_from_checkpoint_start(
- LOG_CHECKPOINT, IB_ULONGLONG_MAX,
+ LOG_CHECKPOINT, LSN_MAX,
min_flushed_lsn, max_flushed_lsn);
if (err != DB_SUCCESS) {
@@ -2315,6 +2348,7 @@ files_checked:
}
ib_bh = trx_sys_init_at_db_start();
+ n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
/* The purge system needs to create the purge view and
therefore requires that the trx_sys is inited. */
@@ -2342,9 +2376,17 @@ files_checked:
an .ibd file.
We also determine the maximum tablespace id used. */
+ dict_check_t dict_check;
- dict_check_tablespaces_and_store_max_id(
- recv_needed_recovery);
+ if (recv_needed_recovery) {
+ dict_check = DICT_CHECK_ALL_LOADED;
+ } else if (n_recovered_trx) {
+ dict_check = DICT_CHECK_SOME_LOADED;
+ } else {
+ dict_check = DICT_CHECK_NONE_LOADED;
+ }
+
+ dict_check_tablespaces_and_store_max_id(dict_check);
}
if (!srv_force_recovery
@@ -2420,8 +2462,9 @@ files_checked:
srv_log_file_size = srv_log_file_size_requested;
- err = create_log_files(logfilename, dirnamelen,
- max_flushed_lsn, logfile0);
+ err = create_log_files(create_new_db, logfilename,
+ dirnamelen, max_flushed_lsn,
+ logfile0);
if (err != DB_SUCCESS) {
return(err);
@@ -2517,6 +2560,13 @@ files_checked:
srv_undo_logs = ULONG_UNDEFINED;
}
+ /* Flush the changes made to TRX_SYS_PAGE by trx_sys_create_rsegs()*/
+ if (!srv_force_recovery && !srv_read_only_mode) {
+ bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+ ut_a(success);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ }
+
if (!srv_read_only_mode) {
/* Create the thread which watches the timeouts
for lock waits */
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index 749258021f7..00f92239b64 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -123,7 +123,7 @@ struct sync_array_t {
ulint n_cells; /*!< number of cells in the
wait array */
sync_cell_t* array; /*!< pointer to wait array */
- ib_mutex_t mutex; /*!< possible database mutex
+ ib_mutex_t mutex; /*!< possible database mutex
protecting this data structure */
os_ib_mutex_t os_mutex; /*!< Possible operating system mutex
protecting the data structure.
@@ -570,10 +570,6 @@ sync_array_deadlock_step(
new_cell = sync_array_find_thread(arr, thread);
if (new_cell == start) {
- /* Stop running of other threads */
-
- ut_dbg_stop_threads = TRUE;
-
/* Deadlock */
fputs("########################################\n"
"DEADLOCK of threads detected!\n", stderr);
@@ -978,11 +974,15 @@ sync_array_print_long_waits(
sync_array_t* arr = sync_wait_array[i];
+ sync_array_enter(arr);
+
if (sync_array_print_long_waits_low(
arr, waiter, sema, &noticed)) {
fatal = TRUE;
}
+
+ sync_array_exit(arr);
}
if (noticed) {
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index f6d8dfc6b40..0747cffb791 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -1320,7 +1320,7 @@ trx_purge_stop(void)
/* We need to wakeup the purge thread in case it is suspended,
so that it can acknowledge the state change. */
- srv_wake_purge_thread_if_not_active();
+ srv_purge_wakeup();
}
purge_sys->state = PURGE_STATE_STOP;
@@ -1399,5 +1399,5 @@ trx_purge_run(void)
rw_lock_x_unlock(&purge_sys->latch);
- srv_wake_purge_thread_if_not_active();
+ srv_purge_wakeup();
}
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index d07e40c506d..1089607c6d1 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -97,7 +97,7 @@ trx_rollback_to_savepoint_low(
trx->error_state = DB_SUCCESS;
- if (!trx->read_only) {
+ if (trx->insert_undo || trx->update_undo) {
thr = pars_complete_graph_for_exec(roll_node, trx, heap);
ut_a(thr == que_fork_start_command(
@@ -110,7 +110,7 @@ trx_rollback_to_savepoint_low(
/* Free the memory reserved by the undo graph. */
que_graph_free(static_cast<que_t*>(
- roll_node->undo_thr->common.parent));
+ roll_node->undo_thr->common.parent));
}
if (savept == NULL) {
@@ -628,7 +628,8 @@ trx_rollback_active(
as DISCARDED. If it still exists. */
table = dict_table_open_on_id(
- trx->table_id, dictionary_locked, FALSE);
+ trx->table_id, dictionary_locked,
+ DICT_TABLE_OP_NORMAL);
if (table && !dict_table_is_discarded(table)) {
@@ -689,31 +690,32 @@ trx_rollback_resurrected(
to accidentally clean up a non-recovered transaction here. */
trx_mutex_enter(trx);
+ bool is_recovered = trx->is_recovered;
+ trx_state_t state = trx->state;
+ trx_mutex_exit(trx);
- if (!trx->is_recovered) {
- trx_mutex_exit(trx);
+ if (!is_recovered) {
return(FALSE);
}
- switch (trx->state) {
+ switch (state) {
case TRX_STATE_COMMITTED_IN_MEMORY:
mutex_exit(&trx_sys->mutex);
- trx_mutex_exit(trx);
fprintf(stderr,
"InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n",
trx->id);
trx_cleanup_at_db_startup(trx);
+ trx_free_for_background(trx);
return(TRUE);
case TRX_STATE_ACTIVE:
- trx_mutex_exit(trx);
if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
mutex_exit(&trx_sys->mutex);
trx_rollback_active(trx);
+ trx_free_for_background(trx);
return(TRUE);
}
return(FALSE);
case TRX_STATE_PREPARED:
- trx_mutex_exit(trx);
return(FALSE);
case TRX_STATE_NOT_STARTED:
break;
@@ -1049,7 +1051,8 @@ trx_roll_pop_top_rec(
os_thread_get_curr_id(), trx->id, undo->top_undo_no); */
prev_rec = trx_undo_get_prev_rec(
- undo_page + offset, undo->hdr_page_no, undo->hdr_offset, mtr);
+ undo_page + offset, undo->hdr_page_no, undo->hdr_offset,
+ true, mtr);
if (prev_rec == NULL) {
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 7c2bbc90ad9..ea7ad65ffd9 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -121,7 +121,7 @@ UNIV_INTERN mysql_pfs_key_t trx_sys_mutex_key;
#ifndef UNIV_HOTBACKUP
#ifdef UNIV_DEBUG
/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
-uint trx_rseg_n_slots_debug = 0;
+UNIV_INTERN uint trx_rseg_n_slots_debug = 0;
#endif
/** This is used to track the maximum file format id known to InnoDB. It's
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 2dfa78b229a..288e06173c0 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,11 +42,17 @@ Created 3/26/1996 Heikki Tuuri
#include "btr0sea.h"
#include "os0proc.h"
#include "trx0xa.h"
+#include "trx0rec.h"
#include "trx0purge.h"
#include "ha_prototypes.h"
#include "srv0mon.h"
#include "ut0vec.h"
+#include<set>
+
+/** Set of table_id */
+typedef std::set<table_id_t> table_id_set;
+
/** Dummy session used currently in MySQL interface */
UNIV_INTERN sess_t* trx_dummy_sess = NULL;
@@ -108,7 +114,7 @@ trx_create(void)
trx->active_commit_ordered = 0;
trx->isolation_level = TRX_ISO_REPEATABLE_READ;
- trx->no = IB_ULONGLONG_MAX;
+ trx->no = TRX_ID_MAX;
trx->support_xa = TRUE;
@@ -306,6 +312,9 @@ trx_free_prepared(
UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
ut_d(trx->in_rw_trx_list = FALSE);
+ /* Undo trx_resurrect_table_locks(). */
+ UT_LIST_INIT(trx->lock.trx_locks);
+
trx_free(trx);
}
@@ -386,6 +395,96 @@ trx_list_rw_insert_ordered(
}
/****************************************************************//**
+Resurrect the table locks for a resurrected transaction. */
+static
+void
+trx_resurrect_table_locks(
+/*======================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const trx_undo_t* undo) /*!< in: undo log */
+{
+ mtr_t mtr;
+ page_t* undo_page;
+ trx_undo_rec_t* undo_rec;
+ table_id_set tables;
+
+ ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
+
+ if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
+ || undo->empty) {
+ return;
+ }
+
+ mtr_start(&mtr);
+ /* trx_rseg_mem_create() may have acquired an X-latch on this
+ page, so we cannot acquire an S-latch. */
+ undo_page = trx_undo_page_get(
+ undo->space, undo->zip_size, undo->top_page_no, &mtr);
+ undo_rec = undo_page + undo->top_offset;
+
+ do {
+ ulint type;
+ ulint cmpl_info;
+ bool updated_extern;
+ undo_no_t undo_no;
+ table_id_t table_id;
+
+ page_t* undo_rec_page = page_align(undo_rec);
+
+ if (undo_rec_page != undo_page) {
+ if (!mtr_memo_release(&mtr,
+ buf_block_align(undo_page),
+ MTR_MEMO_PAGE_X_FIX)) {
+ /* The page of the previous undo_rec
+ should have been latched by
+ trx_undo_page_get() or
+ trx_undo_get_prev_rec(). */
+ ut_ad(0);
+ }
+
+ undo_page = undo_rec_page;
+ }
+
+ trx_undo_rec_get_pars(
+ undo_rec, &type, &cmpl_info,
+ &updated_extern, &undo_no, &table_id);
+ tables.insert(table_id);
+
+ undo_rec = trx_undo_get_prev_rec(
+ undo_rec, undo->hdr_page_no,
+ undo->hdr_offset, false, &mtr);
+ } while (undo_rec);
+
+ mtr_commit(&mtr);
+
+ for (table_id_set::const_iterator i = tables.begin();
+ i != tables.end(); i++) {
+ if (dict_table_t* table = dict_table_open_on_id(
+ *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
+ if (table->ibd_file_missing
+ || dict_table_is_temporary(table)) {
+ mutex_enter(&dict_sys->mutex);
+ dict_table_close(table, TRUE, FALSE);
+ dict_table_remove_from_cache(table);
+ mutex_exit(&dict_sys->mutex);
+ continue;
+ }
+
+ lock_table_ix_resurrect(table, trx);
+
+ DBUG_PRINT("ib_trx",
+ ("resurrect" TRX_ID_FMT
+ " table '%s' IX lock from %s undo",
+ trx->id, table->name,
+ undo == trx->insert_undo
+ ? "insert" : "update"));
+
+ dict_table_close(table, FALSE, FALSE);
+ }
+ }
+}
+
+/****************************************************************//**
Resurrect the transactions that were doing inserts the time of the
crash, they need to be undone.
@return trx_t instance */
@@ -447,9 +546,9 @@ trx_resurrect_insert(
trx->state = TRX_STATE_ACTIVE;
/* A running transaction always has the number
- field inited to IB_ULONGLONG_MAX */
+ field inited to TRX_ID_MAX */
- trx->no = IB_ULONGLONG_MAX;
+ trx->no = TRX_ID_MAX;
}
if (undo->dict_operation) {
@@ -534,9 +633,9 @@ trx_resurrect_update(
trx->state = TRX_STATE_ACTIVE;
/* A running transaction always has the number field inited to
- IB_ULONGLONG_MAX */
+ TRX_ID_MAX */
- trx->no = IB_ULONGLONG_MAX;
+ trx->no = TRX_ID_MAX;
}
if (undo->dict_operation) {
@@ -590,6 +689,8 @@ trx_lists_init_at_db_start(void)
trx = trx_resurrect_insert(undo, rseg);
trx_list_rw_insert_ordered(trx);
+
+ trx_resurrect_table_locks(trx, undo);
}
/* Ressurrect transactions that were doing updates. */
@@ -616,6 +717,8 @@ trx_lists_init_at_db_start(void)
if (trx_created) {
trx_list_rw_insert_ordered(trx);
}
+
+ trx_resurrect_table_locks(trx, undo);
}
}
}
@@ -722,10 +825,10 @@ trx_start_low(
srv_undo_logs, srv_undo_tablespaces);
}
- /* The initial value for trx->no: IB_ULONGLONG_MAX is used in
+ /* The initial value for trx->no: TRX_ID_MAX is used in
read_view_open_now: */
- trx->no = IB_ULONGLONG_MAX;
+ trx->no = TRX_ID_MAX;
ut_a(ib_vector_is_empty(trx->autoinc_locks));
ut_a(ib_vector_is_empty(trx->lock.table_locks));
@@ -824,22 +927,18 @@ trx_serialisation_number_get(
/****************************************************************//**
Assign the transaction its history serialisation number and write the
-update UNDO log record to the assigned rollback segment.
-@return the LSN of the UNDO log write. */
-static
-lsn_t
+update UNDO log record to the assigned rollback segment. */
+static __attribute__((nonnull))
+void
trx_write_serialisation_history(
/*============================*/
- trx_t* trx) /*!< in: transaction */
+ trx_t* trx, /*!< in/out: transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
-
- mtr_t mtr;
trx_rseg_t* rseg;
rseg = trx->rseg;
- mtr_start(&mtr);
-
/* Change the undo log segment states from TRX_UNDO_ACTIVE
to some other state: these modifications to the file data
structure define the transaction as committed in the file
@@ -867,15 +966,15 @@ trx_write_serialisation_history(
because only a single OS thread is allowed to do the
transaction commit for this transaction. */
- undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
+ undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
- trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
+ trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
} else {
mutex_enter(&rseg->mutex);
}
if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
+ trx_undo_set_state_at_finish(trx->insert_undo, mtr);
}
mutex_exit(&rseg->mutex);
@@ -892,38 +991,15 @@ trx_write_serialisation_history(
trx_sys_update_mysql_binlog_offset(
trx->mysql_log_file_name,
trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
+ TRX_SYS_MYSQL_LOG_INFO, mtr);
trx->mysql_log_file_name = NULL;
}
-
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
-
- /*--------------*/
- mtr_commit(&mtr);
- /*--------------*/
-
- return(mtr.end_lsn);
}
/********************************************************************
Finalize a transaction containing updates for a FTS table. */
-static
+static __attribute__((nonnull))
void
trx_finalize_for_fts_table(
/*=======================*/
@@ -954,20 +1030,20 @@ trx_finalize_for_fts_table(
}
}
-/********************************************************************
+/******************************************************************//**
Finalize a transaction containing updates to FTS tables. */
-static
+static __attribute__((nonnull))
void
trx_finalize_for_fts(
/*=================*/
- trx_t* trx, /* in: transaction */
- ibool is_commit) /* in: TRUE if the transaction was
- committed, FALSE if it was rolled back. */
+ trx_t* trx, /*!< in/out: transaction */
+ bool is_commit) /*!< in: true if the transaction was
+ committed, false if it was rolled back. */
{
if (is_commit) {
- const ib_rbt_node_t* node;
- ib_rbt_t* tables;
- fts_savepoint_t* savepoint;
+ const ib_rbt_node_t* node;
+ ib_rbt_t* tables;
+ fts_savepoint_t* savepoint;
savepoint = static_cast<fts_savepoint_t*>(
ib_vector_last(trx->fts_trx->savepoints));
@@ -977,7 +1053,7 @@ trx_finalize_for_fts(
for (node = rbt_first(tables);
node;
node = rbt_next(tables, node)) {
- fts_trx_table_t** ftt;
+ fts_trx_table_t** ftt;
ftt = rbt_value(fts_trx_table_t*, node);
@@ -1038,50 +1114,16 @@ trx_flush_log_if_needed(
}
/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
+Commits a transaction in memory. */
+static __attribute__((nonnull))
void
-trx_commit(
-/*=======*/
- trx_t* trx) /*!< in: transaction */
+trx_commit_in_memory(
+/*=================*/
+ trx_t* trx, /*!< in/out: transaction */
+ lsn_t lsn) /*!< in: log sequence number of the mini-transaction
+ commit of trx_write_serialisation_history(), or 0
+ if the transaction did not modify anything */
{
- trx_named_savept_t* savep;
- ib_uint64_t lsn = 0;
- ibool doing_fts_commit = FALSE;
-
- assert_trx_nonlocking_or_in_list(trx);
- ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
-
- /* undo_no is non-zero if we're doing the final commit. */
- if (trx->fts_trx && trx->undo_no != 0) {
- ulint error;
-
- ut_a(!trx_is_autocommit_non_locking(trx));
-
- doing_fts_commit = TRUE;
-
- error = fts_commit(trx);
-
- /* FTS-FIXME: Temparorily tolerate DB_DUPLICATE_KEY
- instead of dying. This is a possible scenario if there
- is a crash between insert to DELETED table committing
- and transaction committing. The fix would be able to
- return error from this function */
- if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
- /* FTS-FIXME: once we can return values from this
- function, we should do so and signal an error
- instead of just dying. */
-
- ut_error;
- }
- }
-
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
- lsn = trx_write_serialisation_history(trx);
- } else {
- lsn = 0;
- }
-
trx->must_flush_log_later = FALSE;
if (trx_is_autocommit_non_locking(trx)) {
@@ -1206,8 +1248,10 @@ trx_commit(
trx->commit_lsn = lsn;
}
+ /* undo_no is non-zero if we're doing the final commit. */
+ bool not_rollback = trx->undo_no != 0;
/* Free all savepoints, starting from the first. */
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+ trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
trx_roll_savepoints_free(trx, savep);
trx->rseg = NULL;
@@ -1227,7 +1271,7 @@ trx_commit(
trx->auto_commit = FALSE;
if (trx->fts_trx) {
- trx_finalize_for_fts(trx, doing_fts_commit);
+ trx_finalize_for_fts(trx, not_rollback);
}
ut_ad(trx->lock.wait_thr == NULL);
@@ -1245,6 +1289,96 @@ trx_commit(
}
/****************************************************************//**
+Commits a transaction and a mini-transaction. */
+UNIV_INTERN
+void
+trx_commit_low(
+/*===========*/
+ trx_t* trx, /*!< in/out: transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
+ or NULL if trx made no modifications */
+{
+ lsn_t lsn;
+
+ assert_trx_nonlocking_or_in_list(trx);
+ ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+ ut_ad(!mtr || mtr->state == MTR_ACTIVE);
+ ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
+
+ /* undo_no is non-zero if we're doing the final commit. */
+ if (trx->fts_trx && trx->undo_no != 0) {
+ dberr_t error;
+
+ ut_a(!trx_is_autocommit_non_locking(trx));
+
+ error = fts_commit(trx);
+
+ /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
+ instead of dying. This is a possible scenario if there
+ is a crash between insert to DELETED table committing
+ and transaction committing. The fix would be able to
+ return error from this function */
+ if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
+ /* FTS-FIXME: once we can return values from this
+ function, we should do so and signal an error
+ instead of just dying. */
+
+ ut_error;
+ }
+ }
+
+ if (mtr) {
+ trx_write_serialisation_history(trx, mtr);
+ /* The following call commits the mini-transaction, making the
+ whole transaction committed in the file-based world, at this
+ log sequence number. The transaction becomes 'durable' when
+ we write the log to disk, but in the logical sense the commit
+ in the file-based data structures (undo logs etc.) happens
+ here.
+
+ NOTE that transaction numbers, which are assigned only to
+ transactions with an update undo log, do not necessarily come
+ in exactly the same order as commit lsn's, if the transactions
+ have different rollback segments. To get exactly the same
+ order we should hold the kernel mutex up to this point,
+ adding to the contention of the kernel mutex. However, if
+ a transaction T2 is able to see modifications made by
+ a transaction T1, T2 will always get a bigger transaction
+ number and a bigger commit lsn than T1. */
+
+ /*--------------*/
+ mtr_commit(mtr);
+ /*--------------*/
+ lsn = mtr->end_lsn;
+ } else {
+ lsn = 0;
+ }
+
+ trx_commit_in_memory(trx, lsn);
+}
+
+/****************************************************************//**
+Commits a transaction. */
+UNIV_INTERN
+void
+trx_commit(
+/*=======*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ mtr_t local_mtr;
+ mtr_t* mtr;
+
+ if (trx->insert_undo || trx->update_undo) {
+ mtr = &local_mtr;
+ mtr_start(mtr);
+ } else {
+ mtr = NULL;
+ }
+
+ trx_commit_low(trx, mtr);
+}
+
+/****************************************************************//**
Cleans up a transaction at database startup. The cleanup is needed if
the transaction already got to the middle of a commit when the database
crashed, and we cannot roll it back. */
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index c4480b11366..290271c6cab 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -159,6 +159,7 @@ trx_undo_get_prev_rec_from_prev_page(
trx_undo_rec_t* rec, /*!< in: undo record */
ulint page_no,/*!< in: undo log header page number */
ulint offset, /*!< in: undo log header offset on page */
+ bool shared, /*!< in: true=S-latch, false=X-latch */
mtr_t* mtr) /*!< in: mtr */
{
ulint space;
@@ -181,8 +182,12 @@ trx_undo_get_prev_rec_from_prev_page(
space = page_get_space_id(undo_page);
zip_size = fil_space_get_zip_size(space);
- prev_page = trx_undo_page_get_s_latched(space, zip_size,
- prev_page_no, mtr);
+ buf_block_t* block = buf_page_get(space, zip_size, prev_page_no,
+ shared ? RW_S_LATCH : RW_X_LATCH,
+ mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
+
+ prev_page = buf_block_get_frame(block);
return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
}
@@ -197,6 +202,7 @@ trx_undo_get_prev_rec(
trx_undo_rec_t* rec, /*!< in: undo record */
ulint page_no,/*!< in: undo log header page number */
ulint offset, /*!< in: undo log header offset on page */
+ bool shared, /*!< in: true=S-latch, false=X-latch */
mtr_t* mtr) /*!< in: mtr */
{
trx_undo_rec_t* prev_rec;
@@ -212,7 +218,7 @@ trx_undo_get_prev_rec(
previous record */
return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset,
- mtr));
+ shared, mtr));
}
/***********************************************************************//**
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 695035d6ae8..1caf27ebae3 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -92,7 +92,7 @@ static ib_uint32_t ut_crc32_slice8_table[8][256];
static ibool ut_crc32_slice8_table_initialized = FALSE;
/* Flag that tells whether the CPU supports CRC32 or not */
-UNIV_INTERN bool ut_crc32_sse2_enabled = false;
+UNIV_INTERN bool ut_crc32_sse2_enabled = false;
/********************************************************************//**
Initializes the table that is used to generate the CRC32 if the CPU does
diff --git a/storage/innobase/ut/ut0dbg.cc b/storage/innobase/ut/ut0dbg.cc
index 37b709785b4..a1cad144da4 100644
--- a/storage/innobase/ut/ut0dbg.cc
+++ b/storage/innobase/ut/ut0dbg.cc
@@ -35,16 +35,6 @@ Created 1/30/1994 Heikki Tuuri
UNIV_INTERN ulint ut_dbg_zero = 0;
#endif
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
-will stop at the next ut_a() or ut_ad(). */
-UNIV_INTERN ibool ut_dbg_stop_threads = FALSE;
-#endif
-#ifndef UT_DBG_USE_ABORT
-/** A null pointer that will be dereferenced to trigger a memory trap */
-UNIV_INTERN ulint* ut_dbg_null_ptr = NULL;
-#endif
-
/*************************************************************//**
Report a failed assertion. */
UNIV_INTERN
@@ -80,30 +70,8 @@ ut_dbg_assertion_failed(
"InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
"InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
- ut_dbg_stop_threads = TRUE;
-#endif
}
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/*************************************************************//**
-Stop a thread after assertion failure. */
-UNIV_INTERN
-void
-ut_dbg_stop_thread(
-/*===============*/
- const char* file,
- ulint line)
-{
-#ifndef UNIV_HOTBACKUP
- fprintf(stderr, "InnoDB: Thread %lu stopped in file %s line %lu\n",
- os_thread_pf(os_thread_get_curr_id()),
- innobase_basename(file), line);
- os_thread_sleep(1000000000);
-#endif /* !UNIV_HOTBACKUP */
-}
-#endif
-
#ifdef UNIV_COMPILE_TEST_FUNCS
#include <sys/types.h>
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index 3c94d96c3ac..e2176a4603a 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -814,6 +814,10 @@ ut_strerr(
return("Log size exceeded during online index creation");
case DB_DICT_CHANGED:
return("Table dictionary has changed");
+ case DB_IDENTIFIER_TOO_LONG:
+ return("Identifier name is too long");
+ case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
+ return("FTS query exceeds result cache limit");
/* do not add default: in order to produce a warning if new code
is added to the enum but not added here */
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index 7389184253f..9b57dd2847d 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -1319,6 +1319,13 @@ MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const uchar *record);
my_bool _ma_write_abort_default(MARIA_HA *info);
int maria_delete_table_files(const char *name, myf sync_dir);
+/*
+ This cannot be in my_base.h as it clashes with HA_SPATIAL.
+ But it was introduced for Aria engine, and is only used there.
+ So it can safely stay here, only visible to Aria
+*/
+#define HA_RTREE_INDEX 16384 /* For RTREE search */
+
C_MODE_START
#define MARIA_FLUSH_DATA 1
#define MARIA_FLUSH_INDEX 2
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 43cfa23a99f..9f717524ce2 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -3763,6 +3763,9 @@ innobase_end(
mysql_mutex_destroy(&pending_checkpoint_mutex);
}
+ my_free(fts_server_stopword_table);
+ fts_server_stopword_table= NULL;
+
DBUG_RETURN(err);
}