summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBjorn Munch <bjorn.munch@oracle.com>2011-08-29 22:47:08 +0200
committerBjorn Munch <bjorn.munch@oracle.com>2011-08-29 22:47:08 +0200
commit98136ecf1802258dbe3ce2959ebd3a50615e59fd (patch)
tree1a6068bf140dadd80392e7f44ea1729856ae53e3
parenta6212ddac6522a1cfcf16fb1e3af69939db5709c (diff)
parentf610c5658748ae97a5e2c1e1afbd229f2121a082 (diff)
downloadmariadb-git-98136ecf1802258dbe3ce2959ebd3a50615e59fd.tar.gz
merge from 5.1 main
-rw-r--r--extra/innochecksum.c8
-rw-r--r--include/decimal.h9
-rw-r--r--mysql-test/r/merge.result29
-rw-r--r--mysql-test/r/type_newdecimal.result11
-rw-r--r--mysql-test/suite/innodb_plugin/r/innodb-index.result9
-rw-r--r--mysql-test/suite/innodb_plugin/t/innodb-index.test13
-rw-r--r--mysql-test/t/merge.test45
-rw-r--r--mysql-test/t/type_newdecimal.test14
-rw-r--r--mysql-test/valgrind.supp34
-rw-r--r--sql/filesort.cc11
-rw-r--r--sql/my_decimal.h6
-rw-r--r--sql/rpl_rli.h8
-rw-r--r--sql/slave.cc11
-rw-r--r--sql/sql_base.cc183
-rw-r--r--storage/innobase/btr/btr0btr.c222
-rw-r--r--storage/innobase/btr/btr0cur.c92
-rw-r--r--storage/innobase/buf/buf0buf.c23
-rw-r--r--storage/innobase/fsp/fsp0fsp.c168
-rw-r--r--storage/innobase/include/btr0btr.h31
-rw-r--r--storage/innobase/include/btr0cur.h14
-rw-r--r--storage/innobase/include/buf0buf.h9
-rw-r--r--storage/innobase/include/fsp0fsp.h8
-rw-r--r--storage/innobase/include/mtr0mtr.h7
-rw-r--r--storage/innobase/include/mtr0mtr.ic4
-rw-r--r--storage/innobase/mtr/mtr0mtr.c10
-rw-r--r--storage/innobase/row/row0ins.c32
-rw-r--r--storage/innobase/row/row0row.c38
-rw-r--r--storage/innobase/row/row0upd.c23
-rw-r--r--storage/innobase/trx/trx0undo.c2
-rw-r--r--storage/innodb_plugin/ChangeLog14
-rw-r--r--storage/innodb_plugin/btr/btr0btr.c220
-rw-r--r--storage/innodb_plugin/btr/btr0cur.c114
-rw-r--r--storage/innodb_plugin/buf/buf0buf.c23
-rw-r--r--storage/innodb_plugin/fsp/fsp0fsp.c234
-rw-r--r--storage/innodb_plugin/include/btr0btr.h34
-rw-r--r--storage/innodb_plugin/include/btr0cur.h38
-rw-r--r--storage/innodb_plugin/include/buf0buf.h9
-rw-r--r--storage/innodb_plugin/include/fsp0fsp.h30
-rw-r--r--storage/innodb_plugin/include/mtr0mtr.h13
-rw-r--r--storage/innodb_plugin/include/mtr0mtr.ic6
-rw-r--r--storage/innodb_plugin/include/trx0undo.h46
-rw-r--r--storage/innodb_plugin/mtr/mtr0mtr.c7
-rw-r--r--storage/innodb_plugin/row/row0ins.c31
-rw-r--r--storage/innodb_plugin/row/row0row.c27
-rw-r--r--storage/innodb_plugin/row/row0upd.c23
-rw-r--r--storage/innodb_plugin/sync/sync0sync.c8
-rw-r--r--storage/innodb_plugin/trx/trx0rec.c68
-rw-r--r--storage/innodb_plugin/trx/trx0undo.c70
-rw-r--r--strings/decimal.c9
-rw-r--r--support-files/mysql.spec.sh18
50 files changed, 1484 insertions, 632 deletions
diff --git a/extra/innochecksum.c b/extra/innochecksum.c
index 7ad900d16d3..b55b510b888 100644
--- a/extra/innochecksum.c
+++ b/extra/innochecksum.c
@@ -25,12 +25,7 @@
Published with a permission.
*/
-/* needed to have access to 64 bit file functions */
-#define _LARGEFILE_SOURCE
-#define _LARGEFILE64_SOURCE
-
-#define _XOPEN_SOURCE 500 /* needed to include getopt.h on some platforms. */
-
+#include <my_global.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -53,7 +48,6 @@
/* another argument to specify page ranges... seek to right spot and go from there */
typedef unsigned long int ulint;
-typedef unsigned char uchar;
/* innodb function in name; modified slightly to not have the ASM version (lots of #ifs that didn't apply) */
ulint mach_read_from_4(uchar *b)
diff --git a/include/decimal.h b/include/decimal.h
index 530ed9e1757..c377bd4a400 100644
--- a/include/decimal.h
+++ b/include/decimal.h
@@ -21,6 +21,15 @@ typedef enum
decimal_round_mode;
typedef int32 decimal_digit_t;
+/**
+ intg is the number of *decimal* digits (NOT number of decimal_digit_t's !)
+ before the point
+ frac is the number of decimal digits after the point
+ len is the length of buf (length of allocated space) in decimal_digit_t's,
+ not in bytes
+ sign false means positive, true means negative
+ buf is an array of decimal_digit_t's
+ */
typedef struct st_decimal_t {
int intg, frac, len;
my_bool sign;
diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result
index 3af152672ab..a4f1c79dff4 100644
--- a/mysql-test/r/merge.result
+++ b/mysql-test/r/merge.result
@@ -2341,4 +2341,33 @@ REPAIR TABLE m1;
Table Op Msg_type Msg_text
test.m1 repair note The storage engine for the table doesn't support repair
DROP TABLE m1, t1;
+#
+# BUG#11763712 - 56458: KILLING A FLUSH TABLE FOR A MERGE/CHILD
+# CRASHES SERVER
+#
+CREATE TABLE t1(a INT);
+CREATE TABLE t2(a INT);
+CREATE TABLE t3(a INT, b INT);
+CREATE TABLE m1(a INT) ENGINE=MERGE UNION=(t1, t2);
+# Test reopen merge parent failure
+LOCK TABLES m1 READ;
+# Remove 'm1' table using file operations.
+FLUSH TABLES;
+ERROR 42S02: Table 'test.m1' doesn't exist
+UNLOCK TABLES;
+CREATE TABLE m1(a INT) ENGINE=MERGE UNION=(t1, t2);
+# Test reopen merge child failure
+LOCK TABLES m1 READ;
+# Remove 't1' table using file operations.
+FLUSH TABLES;
+ERROR 42S02: Table 'test.t1' doesn't exist
+UNLOCK TABLES;
+CREATE TABLE t1(a INT);
+# Test reattach merge failure
+LOCK TABLES m1 READ;
+# Replace 't1' with 't3' table using file operations.
+FLUSH TABLES;
+ERROR HY000: Can't reopen table: 'm1'
+UNLOCK TABLES;
+DROP TABLE t1, t2, t3, m1;
End of 5.1 tests
diff --git a/mysql-test/r/type_newdecimal.result b/mysql-test/r/type_newdecimal.result
index c301a7dd629..0c6c1333e9b 100644
--- a/mysql-test/r/type_newdecimal.result
+++ b/mysql-test/r/type_newdecimal.result
@@ -1927,3 +1927,14 @@ f1
0.000000000000000000000000
DROP TABLE IF EXISTS t1;
End of 5.1 tests
+#
+# BUG#12911710 - VALGRIND FAILURE IN
+# ROW-DEBUG:PERFSCHEMA.SOCKET_SUMMARY_BY_INSTANCE_FUNC
+#
+CREATE TABLE t1(d1 DECIMAL(60,0) NOT NULL,
+d2 DECIMAL(60,0) NOT NULL);
+INSERT INTO t1 (d1, d2) VALUES(0.0, 0.0);
+SELECT d1 * d2 FROM t1;
+d1 * d2
+0
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_plugin/r/innodb-index.result b/mysql-test/suite/innodb_plugin/r/innodb-index.result
index b24f282dfc4..5be1460d2b7 100644
--- a/mysql-test/suite/innodb_plugin/r/innodb-index.result
+++ b/mysql-test/suite/innodb_plugin/r/innodb-index.result
@@ -1024,6 +1024,15 @@ INSERT INTO t1 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
UPDATE t1 SET a=1000;
DELETE FROM t1;
DROP TABLE t1;
+CREATE TABLE bug12547647(
+a INT NOT NULL, b BLOB NOT NULL, c TEXT,
+PRIMARY KEY (b(10), a), INDEX (c(10))
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+INSERT INTO bug12547647 VALUES (5,repeat('khdfo5AlOq',1900),repeat('g',7731));
+COMMIT;
+UPDATE bug12547647 SET c = REPEAT('b',16928);
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
+DROP TABLE bug12547647;
set global innodb_file_per_table=0;
set global innodb_file_format=Antelope;
set global innodb_file_format_check=Antelope;
diff --git a/mysql-test/suite/innodb_plugin/t/innodb-index.test b/mysql-test/suite/innodb_plugin/t/innodb-index.test
index 52f94990b15..b4e2aae09e9 100644
--- a/mysql-test/suite/innodb_plugin/t/innodb-index.test
+++ b/mysql-test/suite/innodb_plugin/t/innodb-index.test
@@ -480,6 +480,19 @@ DELETE FROM t1;
-- sleep 10
DROP TABLE t1;
+# Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE
+CREATE TABLE bug12547647(
+a INT NOT NULL, b BLOB NOT NULL, c TEXT,
+PRIMARY KEY (b(10), a), INDEX (c(10))
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+
+INSERT INTO bug12547647 VALUES (5,repeat('khdfo5AlOq',1900),repeat('g',7731));
+COMMIT;
+# The following used to cause infinite undo log allocation.
+--error ER_TOO_BIG_ROWSIZE
+UPDATE bug12547647 SET c = REPEAT('b',16928);
+DROP TABLE bug12547647;
+
eval set global innodb_file_per_table=$per_table;
eval set global innodb_file_format=$format;
eval set global innodb_file_format_check=$format;
diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test
index f290803bbd2..a6affbb0540 100644
--- a/mysql-test/t/merge.test
+++ b/mysql-test/t/merge.test
@@ -1783,4 +1783,49 @@ REPAIR TABLE m1;
#
DROP TABLE m1, t1;
+
+--echo #
+--echo # BUG#11763712 - 56458: KILLING A FLUSH TABLE FOR A MERGE/CHILD
+--echo # CRASHES SERVER
+--echo #
+CREATE TABLE t1(a INT);
+CREATE TABLE t2(a INT);
+CREATE TABLE t3(a INT, b INT);
+CREATE TABLE m1(a INT) ENGINE=MERGE UNION=(t1, t2);
+
+--echo # Test reopen merge parent failure
+LOCK TABLES m1 READ;
+--echo # Remove 'm1' table using file operations.
+remove_file $MYSQLD_DATADIR/test/m1.MRG;
+remove_file $MYSQLD_DATADIR/test/m1.frm;
+--error ER_NO_SUCH_TABLE
+FLUSH TABLES;
+UNLOCK TABLES;
+CREATE TABLE m1(a INT) ENGINE=MERGE UNION=(t1, t2);
+
+--echo # Test reopen merge child failure
+LOCK TABLES m1 READ;
+--echo # Remove 't1' table using file operations.
+remove_file $MYSQLD_DATADIR/test/t1.frm;
+remove_file $MYSQLD_DATADIR/test/t1.MYI;
+remove_file $MYSQLD_DATADIR/test/t1.MYD;
+--error ER_NO_SUCH_TABLE
+FLUSH TABLES;
+UNLOCK TABLES;
+CREATE TABLE t1(a INT);
+
+--echo # Test reattach merge failure
+LOCK TABLES m1 READ;
+--echo # Replace 't1' with 't3' table using file operations.
+remove_file $MYSQLD_DATADIR/test/t1.frm;
+remove_file $MYSQLD_DATADIR/test/t1.MYI;
+remove_file $MYSQLD_DATADIR/test/t1.MYD;
+copy_file $MYSQLD_DATADIR/test/t3.frm $MYSQLD_DATADIR/test/t1.frm;
+copy_file $MYSQLD_DATADIR/test/t3.MYI $MYSQLD_DATADIR/test/t1.MYI;
+copy_file $MYSQLD_DATADIR/test/t3.MYD $MYSQLD_DATADIR/test/t1.MYD;
+--error ER_CANT_REOPEN_TABLE
+FLUSH TABLES;
+UNLOCK TABLES;
+DROP TABLE t1, t2, t3, m1;
+
--echo End of 5.1 tests
diff --git a/mysql-test/t/type_newdecimal.test b/mysql-test/t/type_newdecimal.test
index 31a8808da55..567d6c0b6a1 100644
--- a/mysql-test/t/type_newdecimal.test
+++ b/mysql-test/t/type_newdecimal.test
@@ -1526,3 +1526,17 @@ DROP TABLE IF EXISTS t1;
--echo End of 5.1 tests
+
+--echo #
+--echo # BUG#12911710 - VALGRIND FAILURE IN
+--echo # ROW-DEBUG:PERFSCHEMA.SOCKET_SUMMARY_BY_INSTANCE_FUNC
+--echo #
+
+CREATE TABLE t1(d1 DECIMAL(60,0) NOT NULL,
+ d2 DECIMAL(60,0) NOT NULL);
+
+INSERT INTO t1 (d1, d2) VALUES(0.0, 0.0);
+SELECT d1 * d2 FROM t1;
+
+DROP TABLE t1;
+
diff --git a/mysql-test/valgrind.supp b/mysql-test/valgrind.supp
index 8720cd511b9..3751a339a1a 100644
--- a/mysql-test/valgrind.supp
+++ b/mysql-test/valgrind.supp
@@ -791,3 +791,37 @@
fun:fil_delete_tablespace
fun:row_drop_table_for_mysql
}
+
+{
+ Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / one
+ Memcheck:Param
+ write(buf)
+ obj:*/libpthread*.so
+ fun:my_write
+ fun:my_b_flush_io_cache
+ fun:_my_b_write
+ fun:_ZL10write_keysP13st_sort_paramPPhjP11st_io_cacheS4_
+ fun:_ZL13find_all_keysP13st_sort_paramP10SQL_SELECTPPhP11st_io_cacheS6_S6_
+ fun:_Z8filesortP3THDP8st_tableP13st_sort_fieldjP10SQL_SELECTybPy
+}
+
+{
+ Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / two
+ Memcheck:Param
+ write(buf)
+ obj:*/libpthread*.so
+ fun:my_write
+ fun:my_b_flush_io_cache
+ fun:_Z15merge_many_buffP13st_sort_paramPhP10st_buffpekPjP11st_io_cache
+ fun:_Z8filesortP3THDP8st_tableP13st_sort_fieldjP10SQL_SELECTybPy
+}
+
+{
+ Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / three
+ Memcheck:Param
+ write(buf)
+ obj:*/libpthread*.so
+ fun:my_write
+ fun:my_b_flush_io_cache
+ fun:_Z8filesortP3THDP8st_tableP13st_sort_fieldjP10SQL_SELECTybPy
+}
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 99e5156427a..0ff354b334c 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -959,21 +959,10 @@ static void make_sortkey(register SORTPARAM *param,
if (addonf->null_bit && field->is_null())
{
nulls[addonf->null_offset]|= addonf->null_bit;
-#ifdef HAVE_purify
- bzero(to, addonf->length);
-#endif
}
else
{
-#ifdef HAVE_purify
- uchar *end= field->pack(to, field->ptr);
- uint length= (uint) ((to + addonf->length) - end);
- DBUG_ASSERT((int) length >= 0);
- if (length)
- bzero(end, length);
-#else
(void) field->pack(to, field->ptr);
-#endif
}
to+= addonf->length;
}
diff --git a/sql/my_decimal.h b/sql/my_decimal.h
index c7a99e10233..21f485560da 100644
--- a/sql/my_decimal.h
+++ b/sql/my_decimal.h
@@ -101,12 +101,8 @@ public:
{
len= DECIMAL_BUFF_LENGTH;
buf= buffer;
-#if !defined (HAVE_purify) && !defined(DBUG_OFF)
- /* Set buffer to 'random' value to find wrong buffer usage */
- for (uint i= 0; i < DECIMAL_BUFF_LENGTH; i++)
- buffer[i]= i;
-#endif
}
+
my_decimal()
{
init();
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index 811ad8eb864..ae8639c69ac 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -221,7 +221,13 @@ public:
#endif
/* if not set, the value of other members of the structure are undefined */
- bool inited;
+ /*
+ inited changes its value within LOCK_active_mi-guarded critical
+ sections at times of start_slave_threads() (0->1) and end_slave() (1->0).
+ Readers may not acquire the mutex while they realize potential concurrency
+ issue.
+ */
+ volatile bool inited;
volatile bool abort_slave;
volatile uint slave_running;
diff --git a/sql/slave.cc b/sql/slave.cc
index 6c375238ce4..02d8cc2c199 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -598,11 +598,15 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock,
DBUG_PRINT("sleep",("Waiting for slave thread to start"));
const char* old_msg = thd->enter_cond(start_cond,cond_lock,
"Waiting for slave thread to start");
- pthread_cond_wait(start_cond,cond_lock);
+ pthread_cond_wait(start_cond, cond_lock);
thd->exit_cond(old_msg);
pthread_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
if (thd->killed)
+ {
+ if (start_lock)
+ pthread_mutex_unlock(start_lock);
DBUG_RETURN(thd->killed_errno());
+ }
}
}
if (start_lock)
@@ -2531,6 +2535,7 @@ pthread_handler_t handle_slave_io(void *arg)
thd= new THD; // note that contructor of THD uses DBUG_ !
THD_CHECK_SENTRY(thd);
+ DBUG_ASSERT(mi->io_thd == 0);
mi->io_thd = thd;
pthread_detach_this_thread();
@@ -4489,9 +4494,6 @@ int rotate_relay_log(Master_info* mi)
Relay_log_info* rli= &mi->rli;
int error= 0;
- /* We don't lock rli->run_lock. This would lead to deadlocks. */
- pthread_mutex_lock(&mi->run_lock);
-
/*
We need to test inited because otherwise, new_file() will attempt to lock
LOCK_log, which may not be inited (if we're not a slave).
@@ -4521,7 +4523,6 @@ int rotate_relay_log(Master_info* mi)
*/
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
end:
- pthread_mutex_unlock(&mi->run_lock);
DBUG_RETURN(error);
}
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index ab1ba156905..ace78947054 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -96,6 +96,13 @@ static TABLE_SHARE *oldest_unused_share, end_of_unused_share;
static pthread_mutex_t LOCK_table_share;
static bool table_def_inited= 0;
+/**
+ Dummy TABLE instance which is used in reopen_tables() and reattach_merge()
+ functions to mark MERGE tables and their children with which there is some
+ kind of problem and which therefore we need to close.
+*/
+static TABLE bad_merge_marker;
+
static int open_unireg_entry(THD *thd, TABLE *entry, TABLE_LIST *table_list,
const char *alias,
char *cache_key, uint cache_key_length,
@@ -3215,46 +3222,65 @@ void close_data_files_and_morph_locks(THD *thd, const char *db,
/**
+ @brief Mark merge parent and children with bad_merge_marker
+
+ @param[in,out] parent the TABLE object of the parent
+*/
+
+static void mark_merge_parent_and_children_as_bad(TABLE *parent)
+{
+ TABLE_LIST *child_l;
+ DBUG_ENTER("mark_merge_parent_and_children_as_bad");
+ parent->parent= &bad_merge_marker;
+ for (child_l= parent->child_l; ; child_l= child_l->next_global)
+ {
+ child_l->table->parent= &bad_merge_marker;
+ child_l->table= NULL;
+ if (&child_l->next_global == parent->child_last_l)
+ break;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
Reattach MERGE children after reopen.
@param[in] thd thread context
- @param[in,out] err_tables_p pointer to pointer of tables in error
+
+ @note If reattach failed for certain MERGE table, the table (and all
+ it's children) are marked with bad_merge_marker.
@return status
- @retval FALSE OK, err_tables_p unchanged
- @retval TRUE Error, err_tables_p contains table(s)
+ @retval FALSE OK
+ @retval TRUE Error
*/
-static bool reattach_merge(THD *thd, TABLE **err_tables_p)
+static bool reattach_merge(THD *thd)
{
TABLE *table;
- TABLE *next;
- TABLE **prv_p= &thd->open_tables;
bool error= FALSE;
DBUG_ENTER("reattach_merge");
- for (table= thd->open_tables; table; table= next)
+ for (table= thd->open_tables; table; table= table->next)
{
- next= table->next;
- DBUG_PRINT("tcache", ("check table: '%s'.'%s' 0x%lx next: 0x%lx",
+ DBUG_PRINT("tcache", ("check table: '%s'.'%s' 0x%lx",
table->s->db.str, table->s->table_name.str,
- (long) table, (long) next));
- /* Reattach children for MERGE tables with "closed data files" only. */
- if (table->child_l && !table->children_attached)
+ (long) table));
+ /*
+ Reattach children only for MERGE tables that had children or parent
+ with "closed data files" and were reopen. For extra safety skip MERGE
+ tables which we failed to reopen (should not happen with current code).
+ */
+ if (table->child_l && table->parent != &bad_merge_marker &&
+ !table->children_attached)
{
DBUG_PRINT("tcache", ("MERGE parent, attach children"));
- if(table->file->extra(HA_EXTRA_ATTACH_CHILDREN))
+ if (table->file->extra(HA_EXTRA_ATTACH_CHILDREN))
{
my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias);
error= TRUE;
- /* Remove table from open_tables. */
- *prv_p= next;
- if (next)
- prv_p= &next->next;
- /* Stack table on error list. */
- table->next= *err_tables_p;
- *err_tables_p= table;
- continue;
+ mark_merge_parent_and_children_as_bad(table);
}
else
{
@@ -3264,7 +3290,6 @@ static bool reattach_merge(THD *thd, TABLE **err_tables_p)
table->s->table_name.str, (long) table));
}
}
- prv_p= &table->next;
}
DBUG_RETURN(error);
}
@@ -3294,7 +3319,6 @@ bool reopen_tables(THD *thd, bool get_locks, bool mark_share_as_old)
{
TABLE *table,*next,**prev;
TABLE **tables,**tables_ptr; // For locks
- TABLE *err_tables= NULL;
bool error=0, not_used;
bool merge_table_found= FALSE;
const uint flags= MYSQL_LOCK_NOTIFY_IF_NEED_REOPEN |
@@ -3328,29 +3352,69 @@ bool reopen_tables(THD *thd, bool get_locks, bool mark_share_as_old)
for (table=thd->open_tables; table ; table=next)
{
uint db_stat=table->db_stat;
+ TABLE *parent= table->child_l ? table : table->parent;
next=table->next;
DBUG_PRINT("tcache", ("open table: '%s'.'%s' 0x%lx "
"parent: 0x%lx db_stat: %u",
table->s->db.str, table->s->table_name.str,
(long) table, (long) table->parent, db_stat));
- if (table->child_l && !db_stat)
+ /*
+ If we need to reopen child or parent table in a MERGE table, then
+ children in this MERGE table has to be already detached at this
+ point.
+ */
+ DBUG_ASSERT(db_stat || !parent || !parent->children_attached);
+ /*
+ Thanks to the above assumption the below condition will guarantee that
+ merge_table_found is TRUE when we need to reopen child or parent table.
+ Note that it works even in situation when it is only a child and not a
+ parent that needs reopen (this can happen when get_locks == FALSE).
+ */
+ if (table->child_l && !table->children_attached)
merge_table_found= TRUE;
- if (!tables || (!db_stat && reopen_table(table)))
+
+ if (!tables)
{
- my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias);
/*
- If we could not allocate 'tables', we may close open tables
- here. If a MERGE table is affected, detach the children first.
- It is not necessary to clear the child or parent table reference
- of this table because the TABLE is freed. But we need to clear
- the child or parent references of the other belonging tables so
- that they cannot be moved into the unused_tables chain with
- these pointers set.
+ If we could not allocate 'tables' we close ALL open tables here.
+ Before closing MERGE child or parent we need to detach children
+ and/or clear references in/to them.
*/
- if (table->child_l || table->parent)
+ if (parent)
detach_merge_children(table, TRUE);
- VOID(hash_delete(&open_cache,(uchar*) table));
- error=1;
+ }
+ else if (table->parent == &bad_merge_marker)
+ {
+ /*
+ This is either a child or a parent of a MERGE table for which
+ we already decided that we are unable to reopen it. Close it.
+
+ Reset parent reference, it may be used while freeing the table.
+ */
+ table->parent= NULL;
+ }
+ else if (!db_stat && reopen_table(table))
+ {
+ /*
+ If we fail to reopen a child or a parent in a MERGE table and the
+ MERGE table is affected for the first time, mark all relevant tables
+ invalid. Otherwise handle it as usual.
+
+ All in all we must end up with:
+ - child tables are detached from parent. This was done earlier,
+ but child<->parent references were kept valid for reopen.
+ - parent is not in the to-be-locked tables
+ - all child tables and parent are not in the THD::open_tables.
+ - all child tables and parent are not in the open_cache.
+
+ Please note that below we do additional pass through THD::open_tables
+ list to achieve the last three points.
+ */
+ if (parent)
+ {
+ mark_merge_parent_and_children_as_bad(parent);
+ table->parent= NULL;
+ }
}
else
{
@@ -3366,21 +3430,56 @@ bool reopen_tables(THD *thd, bool get_locks, bool mark_share_as_old)
table->s->version=0;
table->open_placeholder= 0;
}
+ continue;
}
+ my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias);
+ VOID(hash_delete(&open_cache, (uchar *) table));
+ error= 1;
}
*prev=0;
/*
When all tables are open again, we can re-attach MERGE children to
- their parents. All TABLE objects are still present.
+ their parents.
+
+ If there was an error while reopening a child or a parent of a MERGE
+ table, or while reattaching child tables to their parents, some tables
+ may have been kept open but marked for close with bad_merge_marker.
+ Close these tables now.
*/
- DBUG_PRINT("tcache", ("re-attaching MERGE tables: %d", merge_table_found));
- if (!error && merge_table_found && reattach_merge(thd, &err_tables))
+ if (tables && merge_table_found && (error|= reattach_merge(thd)))
{
- while (err_tables)
+ prev= &thd->open_tables;
+ for (table= thd->open_tables; table; table= next)
{
- VOID(hash_delete(&open_cache, (uchar*) err_tables));
- err_tables= err_tables->next;
+ next= table->next;
+ if (table->parent == &bad_merge_marker)
+ {
+ /* Remove merge parent from to-be-locked tables array. */
+ if (get_locks && table->child_l)
+ {
+ TABLE **t;
+ for (t= tables; t < tables_ptr; t++)
+ {
+ if (*t == table)
+ {
+ tables_ptr--;
+ memmove(t, t + 1, (tables_ptr - t) * sizeof(TABLE *));
+ break;
+ }
+ }
+ }
+ /* Reset parent reference, it may be used while freeing the table. */
+ table->parent= NULL;
+ /* Free table. */
+ VOID(hash_delete(&open_cache, (uchar *) table));
+ }
+ else
+ {
+ *prev= table;
+ prev= &table->next;
+ }
}
+ *prev= 0;
}
DBUG_PRINT("tcache", ("open tables to lock: %u",
(uint) (tables_ptr - tables)));
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
index 790582815a3..ad99913cf3b 100644
--- a/storage/innobase/btr/btr0btr.c
+++ b/storage/innobase/btr/btr0btr.c
@@ -300,29 +300,30 @@ btr_page_alloc_for_ibuf(
/******************************************************************
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! */
-
-page_t*
-btr_page_alloc(
-/*===========*/
- /* out: new allocated page, x-latched;
- NULL if out of space */
+static
+ulint
+btr_page_alloc_low(
+/*===============*/
+ /* out: allocated page number,
+ FIL_NULL if out of space */
dict_index_t* index, /* in: index */
ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr, /* in/out: mini-transaction
+ for the allocation */
+ mtr_t* init_mtr) /* in/out: mini-transaction
+ in which the page should be
+ initialized (may be the same
+ as mtr), or NULL if it should
+ not be initialized (the page
+ at hint was previously freed
+ in mtr) */
{
fseg_header_t* seg_header;
page_t* root;
- page_t* new_page;
- ulint new_page_no;
-
- if (index->type & DICT_IBUF) {
-
- return(btr_page_alloc_for_ibuf(index, mtr));
- }
root = btr_root_get(index, mtr);
@@ -336,19 +337,61 @@ btr_page_alloc(
reservation for free extents, and thus we know that a page can
be allocated: */
- new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
- file_direction, TRUE, mtr);
+ return(fseg_alloc_free_page_general(seg_header, hint_page_no,
+ file_direction, TRUE,
+ mtr, init_mtr));
+}
+
+/**************************************************************//**
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents! */
+
+page_t*
+btr_page_alloc(
+/*===========*/
+ /* out: new allocated block, x-latched;
+ NULL if out of space */
+ dict_index_t* index, /* in: index */
+ ulint hint_page_no, /* in: hint of a good page */
+ byte file_direction, /* in: direction where a possible
+ page split is made */
+ ulint level, /* in: level where the page is placed
+ in the tree */
+ mtr_t* mtr, /* in/out: mini-transaction
+ for the allocation */
+ mtr_t* init_mtr) /* in/out: mini-transaction
+ for x-latching and initializing
+ the page */
+{
+ page_t* new_page;
+ ulint new_page_no;
+
+ if (index->type & DICT_IBUF) {
+
+ return(btr_page_alloc_for_ibuf(index, mtr));
+ }
+
+ new_page_no = btr_page_alloc_low(
+ index, hint_page_no, file_direction, level, mtr, init_mtr);
+
if (new_page_no == FIL_NULL) {
return(NULL);
}
new_page = buf_page_get(dict_index_get_space(index), new_page_no,
- RW_X_LATCH, mtr);
+ RW_X_LATCH, init_mtr);
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
#endif /* UNIV_SYNC_DEBUG */
+ if (mtr->freed_clust_leaf) {
+ mtr_memo_release(mtr, new_page, MTR_MEMO_FREE_CLUST_LEAF);
+ ut_ad(!mtr_memo_contains(mtr, buf_block_align(new_page),
+ MTR_MEMO_FREE_CLUST_LEAF));
+ }
+
+ ut_ad(btr_freed_leaves_validate(mtr));
return(new_page);
}
@@ -464,6 +507,16 @@ btr_page_free_low(
page_no = buf_frame_get_page_no(page);
fseg_free_page(seg_header, space, page_no, mtr);
+
+ /* The page was marked free in the allocation bitmap, but it
+ should remain buffer-fixed until mtr_commit(mtr) or until it
+ is explicitly freed from the mini-transaction. */
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ /* TODO: Discard any operations on the page from the redo log
+ and remove the block from the flush list and the buffer pool.
+ This would free up buffer pool earlier and reduce writes to
+ both the tablespace and the redo log. */
}
/******************************************************************
@@ -479,13 +532,144 @@ btr_page_free(
{
ulint level;
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
level = btr_page_get_level(page, mtr);
btr_page_free_low(index, page, level, mtr);
+
+ /* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+
+ if (level == 0 && (index->type & DICT_CLUSTERED)) {
+ /* We may have to call btr_mark_freed_leaves() to
+ temporarily mark the block nonfree for invoking
+ btr_store_big_rec_extern_fields() after an
+ update. Remember that the block was freed. */
+ mtr->freed_clust_leaf = TRUE;
+ mtr_memo_push(mtr, buf_block_align(page),
+ MTR_MEMO_FREE_CLUST_LEAF);
+ }
+
+ ut_ad(btr_freed_leaves_validate(mtr));
}
+/**************************************************************//**
+Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
+For invoking btr_store_big_rec_extern_fields() after an update,
+we must temporarily mark freed clustered index pages allocated, so
+that off-page columns will not be allocated from them. Between the
+btr_store_big_rec_extern_fields() and mtr_commit() we have to
+mark the pages free again, so that no pages will be leaked. */
+
+void
+btr_mark_freed_leaves(
+/*==================*/
+ dict_index_t* index, /* in/out: clustered index */
+ mtr_t* mtr, /* in/out: mini-transaction */
+ ibool nonfree)/* in: TRUE=mark nonfree, FALSE=mark freed */
+{
+ /* This is loosely based on mtr_memo_release(). */
+
+ ulint offset;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+ if (!mtr->freed_clust_leaf) {
+ return;
+ }
+
+ offset = dyn_array_get_data_size(&mtr->memo);
+
+ while (offset > 0) {
+ mtr_memo_slot_t* slot;
+ buf_block_t* block;
+
+ offset -= sizeof *slot;
+
+ slot = dyn_array_get_element(&mtr->memo, offset);
+
+ if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
+ continue;
+ }
+
+ /* Because btr_page_alloc() does invoke
+ mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
+ blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
+ memo must still be clustered index leaf tree pages. */
+ block = slot->object;
+ ut_a(buf_block_get_space(block)
+ == dict_index_get_space(index));
+ ut_a(fil_page_get_type(buf_block_get_frame(block))
+ == FIL_PAGE_INDEX);
+ ut_a(btr_page_get_level(buf_block_get_frame(block), mtr) == 0);
+
+ if (nonfree) {
+ /* Allocate the same page again. */
+ ulint page_no;
+ page_no = btr_page_alloc_low(
+ index, buf_block_get_page_no(block),
+ FSP_NO_DIR, 0, mtr, NULL);
+ ut_a(page_no == buf_block_get_page_no(block));
+ } else {
+ /* Assert that the page is allocated and free it. */
+ btr_page_free_low(index, buf_block_get_frame(block),
+ 0, mtr);
+ }
+ }
+
+ ut_ad(btr_freed_leaves_validate(mtr));
+}
+
+#ifdef UNIV_DEBUG
+/**************************************************************//**
+Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
+See btr_mark_freed_leaves(). */
+
+ibool
+btr_freed_leaves_validate(
+/*======================*/
+ /* out: TRUE if valid */
+ mtr_t* mtr) /* in: mini-transaction */
+{
+ ulint offset;
+
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+ offset = dyn_array_get_data_size(&mtr->memo);
+
+ while (offset > 0) {
+ mtr_memo_slot_t* slot;
+ buf_block_t* block;
+
+ offset -= sizeof *slot;
+
+ slot = dyn_array_get_element(&mtr->memo, offset);
+
+ if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
+ continue;
+ }
+
+ ut_a(mtr->freed_clust_leaf);
+ /* Because btr_page_alloc() does invoke
+ mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
+ blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
+ memo must still be clustered index leaf tree pages. */
+ block = slot->object;
+ ut_a(fil_page_get_type(buf_block_get_frame(block))
+ == FIL_PAGE_INDEX);
+ ut_a(btr_page_get_level(buf_block_get_frame(block), mtr) == 0);
+ }
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
/******************************************************************
Sets the child node file address in a node pointer. */
UNIV_INLINE
@@ -1015,7 +1199,7 @@ btr_root_raise_and_insert(
a node pointer to the new page, and then splitting the new page. */
new_page = btr_page_alloc(index, 0, FSP_NO_DIR,
- btr_page_get_level(root, mtr), mtr);
+ btr_page_get_level(root, mtr), mtr, mtr);
btr_page_create(new_page, index, mtr);
@@ -1636,7 +1820,7 @@ func_start:
/* 2. Allocate a new page to the index */
new_page = btr_page_alloc(cursor->index, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr);
+ btr_page_get_level(page, mtr), mtr, mtr);
btr_page_create(new_page, cursor->index, mtr);
/* 3. Calculate the first record on the upper half-page, and the
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index 9ce09929f9a..a1dda8edf69 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -2051,43 +2051,6 @@ return_after_reservations:
return(err);
}
-/*****************************************************************
-Commits and restarts a mini-transaction so that it will retain an
-x-lock on index->lock and the cursor page. */
-
-void
-btr_cur_mtr_commit_and_start(
-/*=========================*/
- btr_cur_t* cursor, /* in: cursor */
- mtr_t* mtr) /* in/out: mini-transaction */
-{
- buf_block_t* block;
-
- block = buf_block_align(btr_cur_get_rec(cursor));
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* Keep the locks across the mtr_commit(mtr). */
- rw_lock_x_lock(dict_index_get_lock(cursor->index));
- rw_lock_x_lock(&block->lock);
- mutex_enter(&block->mutex);
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
-#else
- buf_block_buf_fix_inc(block);
-#endif
- mutex_exit(&block->mutex);
- /* Write out the redo log. */
- mtr_commit(mtr);
- mtr_start(mtr);
- /* Reassociate the locks with the mini-transaction.
- They will be released on mtr_commit(mtr). */
- mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK);
- mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
-}
-
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
/********************************************************************
@@ -3494,6 +3457,11 @@ btr_store_big_rec_extern_fields(
this function returns */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
+ mtr_t* alloc_mtr, /* in/out: in an insert, NULL;
+ in an update, local_mtr for
+ allocating BLOB pages and
+ updating BLOB pointers; alloc_mtr
+ must not have freed any leaf pages */
mtr_t* local_mtr __attribute__((unused))) /* in: mtr
containing the latch to rec and to the
tree */
@@ -3514,6 +3482,8 @@ btr_store_big_rec_extern_fields(
ulint i;
mtr_t mtr;
+ ut_ad(local_mtr);
+ ut_ad(!alloc_mtr || alloc_mtr == local_mtr);
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
@@ -3523,6 +3493,25 @@ btr_store_big_rec_extern_fields(
space_id = buf_frame_get_space_id(rec);
+ if (alloc_mtr) {
+ /* Because alloc_mtr will be committed after
+ mtr, it is possible that the tablespace has been
+ extended when the B-tree record was updated or
+ inserted, or it will be extended while allocating
+ pages for big_rec.
+
+ TODO: In mtr (not alloc_mtr), write a redo log record
+ about extending the tablespace to its current size,
+ and remember the current size. Whenever the tablespace
+ grows as pages are allocated, write further redo log
+ records to mtr. (Currently tablespace extension is not
+ covered by the redo log. If it were, the record would
+ only be written to alloc_mtr, which is committed after
+ mtr.) */
+ } else {
+ alloc_mtr = &mtr;
+ }
+
/* We have to create a file segment to the tablespace
for each field and put the pointer to the field in rec */
@@ -3549,7 +3538,7 @@ btr_store_big_rec_extern_fields(
}
page = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, &mtr);
+ FSP_NO_DIR, 0, alloc_mtr, &mtr);
if (page == NULL) {
mtr_commit(&mtr);
@@ -3603,37 +3592,42 @@ btr_store_big_rec_extern_fields(
extern_len -= store_len;
+ if (alloc_mtr == &mtr) {
#ifdef UNIV_SYNC_DEBUG
- rec_page =
+ rec_page =
#endif /* UNIV_SYNC_DEBUG */
- buf_page_get(space_id,
- buf_frame_get_page_no(data),
- RW_X_LATCH, &mtr);
+ buf_page_get(
+ space_id,
+ buf_frame_get_page_no(data),
+ RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
+ buf_page_dbg_add_level(
+ rec_page, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
+ }
+
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len
- extern_len,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
if (prev_page_no == FIL_NULL) {
mlog_write_ulint(data + local_len
+ BTR_EXTERN_SPACE_ID,
space_id,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
mlog_write_ulint(data + local_len
+ BTR_EXTERN_PAGE_NO,
page_no,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
mlog_write_ulint(data + local_len
+ BTR_EXTERN_OFFSET,
FIL_PAGE_DATA,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
/* Set the bit denoting that this field
in rec is stored externally */
@@ -3641,7 +3635,7 @@ btr_store_big_rec_extern_fields(
rec_set_nth_field_extern_bit(
rec, index,
big_rec_vec->fields[i].field_no,
- TRUE, &mtr);
+ TRUE, alloc_mtr);
}
prev_page_no = page_no;
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
index 08e033e7a63..78b39812cff 100644
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -1009,29 +1009,6 @@ buf_page_peek_block(
}
/************************************************************************
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
-{
- buf_block_t* block;
-
- mutex_enter_fast(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- if (block) {
- block->check_index_page_at_flush = FALSE;
- }
-
- mutex_exit(&(buf_pool->mutex));
-}
-
-/************************************************************************
Returns the current state of is_hashed of a page. FALSE if the page is
not in the pool. NOTE that this operation does not fix the page in the
pool if it is found there. */
diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
index d228e683957..d5be8fca38f 100644
--- a/storage/innobase/fsp/fsp0fsp.c
+++ b/storage/innobase/fsp/fsp0fsp.c
@@ -293,15 +293,19 @@ fseg_alloc_free_page_low(
/* out: the allocated page number, FIL_NULL
if no page could be allocated */
ulint space, /* in: space */
- fseg_inode_t* seg_inode, /* in: segment inode */
+ fseg_inode_t* seg_inode, /* in/out: segment inode */
ulint hint, /* in: hint of which page would be desirable */
byte direction, /* in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /* in: mtr handle */
-
+ mtr_t* mtr, /* in/out: mini-transaction */
+ mtr_t* init_mtr);/* in/out: mini-transaction in which the
+ page should be initialized
+ (may be the same as mtr), or NULL if it
+ should not be initialized (the page at hint
+ was previously freed in mtr) */
/**************************************************************************
Reads the file space size stored in the header page. */
@@ -1371,6 +1375,43 @@ fsp_alloc_free_extent(
return(descr);
}
+/**********************************************************************//**
+Allocates a single free page from a space. */
+static __attribute__((nonnull))
+void
+fsp_alloc_from_free_frag(
+/*=====================*/
+ fsp_header_t* header, /* in/out: tablespace header */
+ xdes_t* descr, /* in/out: extent descriptor */
+ ulint bit, /* in: slot to allocate in the extent */
+ mtr_t* mtr) /* in/out: mini-transaction */
+{
+ ulint frag_n_used;
+
+ ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
+ ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr));
+ xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
+
+ /* Update the FRAG_N_USED field */
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
+ mtr);
+ frag_n_used++;
+ mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
+ mtr);
+ if (xdes_is_full(descr, mtr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ xdes_set_state(descr, XDES_FULL_FRAG, mtr);
+
+ flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
+ mtr);
+ }
+}
+
/**************************************************************************
Allocates a single free page from a space. The page is marked as used. */
static
@@ -1381,19 +1422,22 @@ fsp_alloc_free_page(
be allocated */
ulint space, /* in: space id */
ulint hint, /* in: hint of which page would be desirable */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr, /* in/out: mini-transaction */
+ mtr_t* init_mtr)/* in/out: mini-transaction in which the
+ page should be initialized
+ (may be the same as mtr) */
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
page_t* page;
ulint free;
- ulint frag_n_used;
ulint page_no;
ulint space_size;
ibool success;
ut_ad(mtr);
+ ut_ad(init_mtr);
header = fsp_get_space_header(space, mtr);
@@ -1441,6 +1485,7 @@ fsp_alloc_free_page(
if (free == ULINT_UNDEFINED) {
ut_print_buf(stderr, ((byte*)descr) - 500, 1000);
+ putc('\n', stderr);
ut_error;
}
@@ -1472,40 +1517,21 @@ fsp_alloc_free_page(
}
}
- xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
-
- /* Update the FRAG_N_USED field */
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- frag_n_used++;
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
- mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FULL_FRAG, mtr);
-
- flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
- mtr);
- }
+ fsp_alloc_from_free_frag(header, descr, free, mtr);
/* Initialize the allocated page to the buffer pool, so that it can
be obtained immediately with buf_page_get without need for a disk
read. */
- buf_page_create(space, page_no, mtr);
+ buf_page_create(space, page_no, init_mtr);
- page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+ page = buf_page_get(space, page_no, RW_X_LATCH, init_mtr);
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
/* Prior contents of the page should be ignored */
- fsp_init_file_page(page, mtr);
+ fsp_init_file_page(page, init_mtr);
return(page_no);
}
@@ -1724,7 +1750,7 @@ fsp_alloc_seg_inode_page(
space = buf_frame_get_space_id(space_header);
- page_no = fsp_alloc_free_page(space, 0, mtr);
+ page_no = fsp_alloc_free_page(space, 0, mtr, mtr);
if (page_no == FIL_NULL) {
@@ -2094,7 +2120,8 @@ fseg_create_general(
}
if (page == 0) {
- page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr);
+ page = fseg_alloc_free_page_low(space,
+ inode, 0, FSP_UP, mtr, mtr);
if (page == FIL_NULL) {
@@ -2331,14 +2358,19 @@ fseg_alloc_free_page_low(
/* out: the allocated page number, FIL_NULL
if no page could be allocated */
ulint space, /* in: space */
- fseg_inode_t* seg_inode, /* in: segment inode */
+ fseg_inode_t* seg_inode, /* in/out: segment inode */
ulint hint, /* in: hint of which page would be desirable */
byte direction, /* in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr, /* in/out: mini-transaction */
+ mtr_t* init_mtr)/* in/out: mini-transaction in which the
+ page should be initialized
+ (may be the same as mtr), or NULL if it
+ should not be initialized (the page at hint
+ was previously freed in mtr) */
{
fsp_header_t* space_header;
ulint space_size;
@@ -2350,7 +2382,6 @@ fseg_alloc_free_page_low(
if could not be allocated */
xdes_t* ret_descr; /* the extent of the allocated page */
page_t* page;
- ibool frag_page_allocated = FALSE;
ibool success;
ulint n;
@@ -2371,6 +2402,8 @@ fseg_alloc_free_page_low(
if (descr == NULL) {
/* Hint outside space or too high above free limit: reset
hint */
+ ut_a(init_mtr);
+ /* The file space header page is always allocated. */
hint = 0;
descr = xdes_get_descriptor(space, hint, mtr);
}
@@ -2382,15 +2415,20 @@ fseg_alloc_free_page_low(
mtr), seg_id))
&& (xdes_get_bit(descr, XDES_FREE_BIT,
hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
-
+take_hinted_page:
/* 1. We can take the hinted page
=================================*/
ret_descr = descr;
ret_page = hint;
+ /* Skip the check for extending the tablespace. If the
+ page hint were not within the size of the tablespace,
+ we would have got (descr == NULL) above and reset the hint. */
+ goto got_hinted_page;
/*-----------------------------------------------------------*/
- } else if ((xdes_get_state(descr, mtr) == XDES_FREE)
- && ((reserved - used) < reserved / FSEG_FILLFACTOR)
- && (used >= FSEG_FRAG_LIMIT)) {
+ } else if (xdes_get_state(descr, mtr) == XDES_FREE
+ && (!init_mtr
+ || ((reserved - used < reserved / FSEG_FILLFACTOR)
+ && used >= FSEG_FRAG_LIMIT))) {
/* 2. We allocate the free extent from space and can take
=========================================================
@@ -2408,8 +2446,20 @@ fseg_alloc_free_page_low(
/* Try to fill the segment free list */
fseg_fill_free_list(seg_inode, space,
hint + FSP_EXTENT_SIZE, mtr);
- ret_page = hint;
+ goto take_hinted_page;
/*-----------------------------------------------------------*/
+ } else if (!init_mtr) {
+ ut_a(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
+ fsp_alloc_from_free_frag(space_header, descr,
+ hint % FSP_EXTENT_SIZE, mtr);
+ ret_page = hint;
+ ret_descr = NULL;
+
+ /* Put the page in the fragment page array of the segment */
+ n = fseg_find_free_frag_page_slot(seg_inode, mtr);
+ ut_a(n != FIL_NULL);
+ fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr);
+ goto got_hinted_page;
} else if ((direction != FSP_NO_DIR)
&& ((reserved - used) < reserved / FSEG_FILLFACTOR)
&& (used >= FSEG_FRAG_LIMIT)
@@ -2467,11 +2517,9 @@ fseg_alloc_free_page_low(
} else if (used < FSEG_FRAG_LIMIT) {
/* 6. We allocate an individual page from the space
===================================================*/
- ret_page = fsp_alloc_free_page(space, hint, mtr);
+ ret_page = fsp_alloc_free_page(space, hint, mtr, init_mtr);
ret_descr = NULL;
- frag_page_allocated = TRUE;
-
if (ret_page != FIL_NULL) {
/* Put the page in the fragment page array of the
segment */
@@ -2481,6 +2529,10 @@ fseg_alloc_free_page_low(
fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
mtr);
}
+
+ /* fsp_alloc_free_page() invoked fsp_init_file_page()
+ already. */
+ return(ret_page);
/*-----------------------------------------------------------*/
} else {
/* 7. We allocate a new extent and take its first page
@@ -2527,22 +2579,31 @@ fseg_alloc_free_page_low(
}
}
- if (!frag_page_allocated) {
+got_hinted_page:
+ {
/* Initialize the allocated page to buffer pool, so that it
can be obtained immediately with buf_page_get without need
for a disk read */
+ mtr_t* block_mtr = init_mtr ? init_mtr : mtr;
- page = buf_page_create(space, ret_page, mtr);
+ page = buf_page_create(space, ret_page, block_mtr);
- ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr));
+ ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH,
+ block_mtr));
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
- /* The prior contents of the page should be ignored */
- fsp_init_file_page(page, mtr);
+ if (init_mtr) {
+ /* The prior contents of the page should be ignored */
+ fsp_init_file_page(page, init_mtr);
+ }
+ }
+ /* ret_descr == NULL if the block was allocated from free_frag
+ (XDES_FREE_FRAG) */
+ if (ret_descr != NULL) {
/* At this point we know the extent and the page offset.
The extent is still in the appropriate list (FSEG_NOT_FULL
or FSEG_FREE), and the page is not yet marked as used. */
@@ -2554,8 +2615,6 @@ fseg_alloc_free_page_low(
fseg_mark_page_used(seg_inode, space, ret_page, mtr);
}
- buf_reset_check_index_page_at_flush(space, ret_page);
-
return(ret_page);
}
@@ -2569,7 +2628,7 @@ fseg_alloc_free_page_general(
/*=========================*/
/* out: allocated page offset, FIL_NULL if no
page could be allocated */
- fseg_header_t* seg_header,/* in: segment header */
+ fseg_header_t* seg_header,/* in/out: segment header */
ulint hint, /* in: hint of which page would be desirable */
byte direction,/* in: if the new page is needed because
of an index page split, and records are
@@ -2581,7 +2640,11 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr, /* in/out: mini-transaction handle */
+ mtr_t* init_mtr)/* in/out: mtr or another mini-transaction
+ in which the page should be initialized,
+ or NULL if this is a "fake allocation" of
+ a page that was previously freed in mtr */
{
fseg_inode_t* inode;
ulint space;
@@ -2619,7 +2682,8 @@ fseg_alloc_free_page_general(
}
page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode),
- inode, hint, direction, mtr);
+ inode, hint, direction,
+ mtr, init_mtr);
if (!has_done_reservation) {
fil_space_release_free_extents(space, n_reserved);
}
@@ -2647,7 +2711,7 @@ fseg_alloc_free_page(
mtr_t* mtr) /* in: mtr handle */
{
return(fseg_alloc_free_page_general(seg_header, hint, direction,
- FALSE, mtr));
+ FALSE, mtr, mtr));
}
/**************************************************************************
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 269fa355558..3988019589d 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -379,7 +379,11 @@ btr_page_alloc(
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
- mtr_t* mtr); /* in: mtr */
+ mtr_t* mtr, /* in/out: mini-transaction
+ for the allocation */
+ mtr_t* init_mtr); /* in/out: mini-transaction
+ for x-latching and initializing
+ the page */
/******************************************************************
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
@@ -402,6 +406,31 @@ btr_page_free_low(
page_t* page, /* in: page to be freed, x-latched */
ulint level, /* in: page level */
mtr_t* mtr); /* in: mtr */
+/**************************************************************//**
+Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
+For invoking btr_store_big_rec_extern_fields() after an update,
+we must temporarily mark freed clustered index pages allocated, so
+that off-page columns will not be allocated from them. Between the
+btr_store_big_rec_extern_fields() and mtr_commit() we have to
+mark the pages free again, so that no pages will be leaked. */
+
+void
+btr_mark_freed_leaves(
+/*==================*/
+ dict_index_t* index, /* in/out: clustered index */
+ mtr_t* mtr, /* in/out: mini-transaction */
+ ibool nonfree);/* in: TRUE=mark nonfree, FALSE=mark freed */
+#ifdef UNIV_DEBUG
+/**************************************************************//**
+Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
+See btr_mark_freed_leaves(). */
+
+ibool
+btr_freed_leaves_validate(
+/*======================*/
+ /* out: TRUE if valid */
+ mtr_t* mtr); /* in: mini-transaction */
+#endif /* UNIV_DEBUG */
#ifdef UNIV_BTR_PRINT
/*****************************************************************
Prints size info of a B-tree. */
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index c068d8d3318..c2bf84ef9cb 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -252,15 +252,6 @@ btr_cur_pessimistic_update(
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Commits and restarts a mini-transaction so that it will retain an
-x-lock on index->lock and the cursor page. */
-
-void
-btr_cur_mtr_commit_and_start(
-/*=========================*/
- btr_cur_t* cursor, /* in: cursor */
- mtr_t* mtr); /* in/out: mini-transaction */
/***************************************************************
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -471,6 +462,11 @@ btr_store_big_rec_extern_fields(
this function returns */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
+ mtr_t* alloc_mtr, /* in/out: in an insert, NULL;
+ in an update, local_mtr for
+ allocating BLOB pages and
+ updating BLOB pointers; alloc_mtr
+ must not have freed any leaf pages */
mtr_t* local_mtr); /* in: mtr containing the latch to
rec and to the tree */
/***********************************************************************
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 7479ce9cbf0..87b2f6172de 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -294,15 +294,6 @@ buf_page_peek_block(
ulint space, /* in: space id */
ulint offset);/* in: page number */
/************************************************************************
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 17bfbeec2c1..4c58d6075e6 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -167,7 +167,7 @@ fseg_alloc_free_page_general(
/*=========================*/
/* out: allocated page offset, FIL_NULL if no
page could be allocated */
- fseg_header_t* seg_header,/* in: segment header */
+ fseg_header_t* seg_header,/* in/out: segment header */
ulint hint, /* in: hint of which page would be desirable */
byte direction,/* in: if the new page is needed because
of an index page split, and records are
@@ -179,7 +179,11 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr); /* in: mtr handle */
+ mtr_t* mtr, /* in/out: mini-transaction */
+ mtr_t* init_mtr);/* in/out: mtr or another mini-transaction
+ in which the page should be initialized,
+ or NULL if this is a "fake allocation" of
+ a page that was previously freed in mtr */
/**************************************************************************
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index a6e2976830b..a0a51dbbd17 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -36,6 +36,8 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
#define MTR_MEMO_MODIFY 54
#define MTR_MEMO_S_LOCK 55
#define MTR_MEMO_X_LOCK 56
+/* The mini-transaction freed a clustered index leaf page. */
+#define MTR_MEMO_FREE_CLUST_LEAF 57
/* Log item types: we have made them to be of the type 'byte'
for the compiler to warn if val and type parameters are switched
@@ -325,9 +327,12 @@ struct mtr_struct{
ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
dyn_array_t memo; /* memo stack for locks etc. */
dyn_array_t log; /* mini-transaction log */
- ibool modifications;
+ unsigned modifications:1;
/* TRUE if the mtr made modifications to
buffer pool pages */
+ unsigned freed_clust_leaf:1;
+ /* TRUE if MTR_MEMO_FREE_CLUST_LEAF
+ was logged in the mini-transaction */
ulint n_log_recs;
/* count of how many page initial log records
have been written to the mtr log */
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index 81eec3bfc92..6b4cacf0766 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -26,6 +26,7 @@ mtr_start(
mtr->log_mode = MTR_LOG_ALL;
mtr->modifications = FALSE;
+ mtr->freed_clust_leaf = FALSE;
mtr->n_log_recs = 0;
#ifdef UNIV_DEBUG
@@ -50,7 +51,8 @@ mtr_memo_push(
ut_ad(object);
ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_X_LOCK);
+ ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF);
+ ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf);
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
diff --git a/storage/innobase/mtr/mtr0mtr.c b/storage/innobase/mtr/mtr0mtr.c
index 365fa15878a..a11e20ca661 100644
--- a/storage/innobase/mtr/mtr0mtr.c
+++ b/storage/innobase/mtr/mtr0mtr.c
@@ -53,17 +53,13 @@ mtr_memo_slot_release(
buf_page_release((buf_block_t*)object, type, mtr);
} else if (type == MTR_MEMO_S_LOCK) {
rw_lock_s_unlock((rw_lock_t*)object);
-#ifdef UNIV_DEBUG
- } else if (type == MTR_MEMO_X_LOCK) {
- rw_lock_x_unlock((rw_lock_t*)object);
- } else {
- ut_ad(type == MTR_MEMO_MODIFY);
+ } else if (type != MTR_MEMO_X_LOCK) {
+ ut_ad(type == MTR_MEMO_MODIFY
+ || type == MTR_MEMO_FREE_CLUST_LEAF);
ut_ad(mtr_memo_contains(mtr, object,
MTR_MEMO_PAGE_X_FIX));
-#else
} else {
rw_lock_x_unlock((rw_lock_t*)object);
-#endif
}
}
diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
index 7ff443a11ad..6366beb6b47 100644
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
@@ -2089,15 +2089,20 @@ row_ins_index_entry_low(
if (big_rec) {
ut_a(err == DB_SUCCESS);
/* Write out the externally stored
- columns while still x-latching
- index->lock and block->lock. We have
- to mtr_commit(mtr) first, so that the
- redo log will be written in the
- correct order. Otherwise, we would run
- into trouble on crash recovery if mtr
- freed B-tree pages on which some of
- the big_rec fields will be written. */
- btr_cur_mtr_commit_and_start(&cursor, &mtr);
+ columns, but allocate the pages and
+ write the pointers using the
+ mini-transaction of the record update.
+ If any pages were freed in the update,
+ temporarily mark them allocated so
+ that off-page columns will not
+ overwrite them. We must do this,
+ because we will write the redo log for
+ the BLOB writes before writing the
+ redo log for the record update. Thus,
+ redo log application at crash recovery
+ will see BLOBs being written to free pages. */
+
+ btr_mark_freed_leaves(index, &mtr, TRUE);
rec = btr_cur_get_rec(&cursor);
offsets = rec_get_offsets(rec, index, offsets,
@@ -2105,7 +2110,8 @@ row_ins_index_entry_low(
&heap);
err = btr_store_big_rec_extern_fields(
- index, rec, offsets, big_rec, &mtr);
+ index, rec, offsets, big_rec,
+ &mtr, &mtr);
/* If writing big_rec fails (for
example, because of DB_OUT_OF_FILE_SPACE),
the record will be corrupted. Even if
@@ -2118,6 +2124,9 @@ row_ins_index_entry_low(
undo log, and thus the record cannot
be rolled back. */
ut_a(err == DB_SUCCESS);
+ /* Free the pages again
+ in order to avoid a leak. */
+ btr_mark_freed_leaves(index, &mtr, FALSE);
goto stored_big_rec;
}
} else {
@@ -2165,7 +2174,8 @@ function_exit:
ULINT_UNDEFINED, &heap);
err = btr_store_big_rec_extern_fields(index, rec,
- offsets, big_rec, &mtr);
+ offsets, big_rec,
+ NULL, &mtr);
stored_big_rec:
if (modify) {
dtuple_big_rec_free(big_rec);
diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c
index 171039e34ac..ccb3c1f7781 100644
--- a/storage/innobase/row/row0row.c
+++ b/storage/innobase/row/row0row.c
@@ -212,23 +212,27 @@ row_build(
}
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- /* This condition can occur during crash recovery before
- trx_rollback_or_clean_all_without_sess() has completed
- execution.
-
- This condition is possible if the server crashed
- during an insert or update before
- btr_store_big_rec_extern_fields() did mtr_commit() all
- BLOB pointers to the clustered index record.
-
- If the record contains a null BLOB pointer, look up the
- transaction that holds the implicit lock on this record, and
- assert that it is active. (In this version of InnoDB, we
- cannot assert that it was recovered, because there is no
- trx->is_recovered field.) */
-
- ut_a(!rec_offs_any_null_extern(rec, offsets)
- || trx_assert_active(row_get_rec_trx_id(rec, index, offsets)));
+ if (rec_offs_any_null_extern(rec, offsets)) {
+ /* This condition can occur during crash recovery
+ before trx_rollback_or_clean_all_without_sess() has
+ completed execution.
+
+ This condition is possible if the server crashed
+ during an insert or update before
+ btr_store_big_rec_extern_fields() did mtr_commit() all
+ BLOB pointers to the clustered index record.
+
+ If the record contains a null BLOB pointer, look up the
+ transaction that holds the implicit lock on this record, and
+ assert that it is active. (In this version of InnoDB, we
+ cannot assert that it was recovered, because there is no
+ trx->is_recovered field.) */
+
+ ut_a(trx_assert_active(
+ row_get_rec_trx_id(rec, index, offsets)));
+ ut_a(trx_undo_roll_ptr_is_insert(
+ row_get_rec_roll_ptr(rec, index, offsets)));
+ }
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (type != ROW_COPY_POINTERS) {
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index 694b00ea265..58739edfd98 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -1591,21 +1591,22 @@ row_upd_clust_rec(
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
ut_a(err == DB_SUCCESS);
- /* Write out the externally stored columns while still
- x-latching index->lock and block->lock. We have to
- mtr_commit(mtr) first, so that the redo log will be
- written in the correct order. Otherwise, we would run
- into trouble on crash recovery if mtr freed B-tree
- pages on which some of the big_rec fields will be
- written. */
- btr_cur_mtr_commit_and_start(btr_cur, mtr);
-
+ /* Write out the externally stored columns, but
+ allocate the pages and write the pointers using the
+ mini-transaction of the record update. If any pages
+ were freed in the update, temporarily mark them
+ allocated so that off-page columns will not overwrite
+ them. We must do this, because we write the redo log
+ for the BLOB writes before writing the redo log for
+ the record update. */
+
+ btr_mark_freed_leaves(index, mtr, TRUE);
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(
index, rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
- big_rec, mtr);
+ big_rec, mtr, mtr);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@@ -1618,6 +1619,8 @@ row_upd_clust_rec(
to the undo log, and thus the record cannot be rolled
back. */
ut_a(err == DB_SUCCESS);
+ /* Free the pages again in order to avoid a leak. */
+ btr_mark_freed_leaves(index, mtr, FALSE);
}
mtr_commit(mtr);
diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c
index 329565943c8..ce09862f317 100644
--- a/storage/innobase/trx/trx0undo.c
+++ b/storage/innobase/trx/trx0undo.c
@@ -864,7 +864,7 @@ trx_undo_add_page(
page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
+ TRX_UNDO_FSEG_HEADER,
undo->top_page_no + 1, FSP_UP,
- TRUE, mtr);
+ TRUE, mtr, mtr);
fil_space_release_free_extents(undo->space, n_reserved);
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog
index 0b90b5729d5..96b6a47085a 100644
--- a/storage/innodb_plugin/ChangeLog
+++ b/storage/innodb_plugin/ChangeLog
@@ -1,3 +1,17 @@
+2011-08-29 The InnoDB Team
+
+ * btr/btr0btr.c, btr/btr0cur.c, fsp/fsp0fsp.c,
+ include/btr0btr.h, include/btr0cur.h, include/fsp0fsp.h,
+ include/mtr0mtr.h, include/mtr0mtr.ic, mtr/mtr0mtr.c,
+ row/row0ins.c, row/row0row.c, row/row0upd.c, trx/trx0undo.c:
+ Fix Bug#12704861 Corruption after a crash during BLOB update
+ and other regressions from the fix of Bug#12612184
+
+2011-08-23 The InnoDB Team
+
+ * include/trx0undo.h, trx/trx0rec.c, trx/trx0undo.c:
+ Fix Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE
+
2011-08-15 The InnoDB Team
* btr/btr0btr.c, btr/btr0cur.c, btr/btr0pcur.c, btr/btr0sea.c,
diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c
index 5e6724bbd54..71e1599d19e 100644
--- a/storage/innodb_plugin/btr/btr0btr.c
+++ b/storage/innodb_plugin/btr/btr0btr.c
@@ -906,28 +906,29 @@ btr_page_alloc_for_ibuf(
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents!
-@return new allocated block, x-latched; NULL if out of space */
-UNIV_INTERN
-buf_block_t*
-btr_page_alloc(
-/*===========*/
+@return allocated page number, FIL_NULL if out of space */
+static __attribute__((nonnull(1,5), warn_unused_result))
+ulint
+btr_page_alloc_low(
+/*===============*/
dict_index_t* index, /*!< in: index */
ulint hint_page_no, /*!< in: hint of a good page */
byte file_direction, /*!< in: direction where a possible
page split is made */
ulint level, /*!< in: level where the page is placed
in the tree */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr, /*!< in/out: mini-transaction
+ for the allocation */
+ mtr_t* init_mtr) /*!< in/out: mini-transaction
+ in which the page should be
+ initialized (may be the same
+ as mtr), or NULL if it should
+ not be initialized (the page
+ at hint was previously freed
+ in mtr) */
{
fseg_header_t* seg_header;
page_t* root;
- buf_block_t* new_block;
- ulint new_page_no;
-
- if (dict_index_is_ibuf(index)) {
-
- return(btr_page_alloc_for_ibuf(index, mtr));
- }
root = btr_root_get(index, mtr);
@@ -941,8 +942,42 @@ btr_page_alloc(
reservation for free extents, and thus we know that a page can
be allocated: */
- new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
- file_direction, TRUE, mtr);
+ return(fseg_alloc_free_page_general(
+ seg_header, hint_page_no, file_direction,
+ TRUE, mtr, init_mtr));
+}
+
+/**************************************************************//**
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents!
+@return new allocated block, x-latched; NULL if out of space */
+UNIV_INTERN
+buf_block_t*
+btr_page_alloc(
+/*===========*/
+ dict_index_t* index, /*!< in: index */
+ ulint hint_page_no, /*!< in: hint of a good page */
+ byte file_direction, /*!< in: direction where a possible
+ page split is made */
+ ulint level, /*!< in: level where the page is placed
+ in the tree */
+ mtr_t* mtr, /*!< in/out: mini-transaction
+ for the allocation */
+ mtr_t* init_mtr) /*!< in/out: mini-transaction
+ for x-latching and initializing
+ the page */
+{
+ buf_block_t* new_block;
+ ulint new_page_no;
+
+ if (dict_index_is_ibuf(index)) {
+
+ return(btr_page_alloc_for_ibuf(index, mtr));
+ }
+
+ new_page_no = btr_page_alloc_low(
+ index, hint_page_no, file_direction, level, mtr, init_mtr);
+
if (new_page_no == FIL_NULL) {
return(NULL);
@@ -950,9 +985,16 @@ btr_page_alloc(
new_block = buf_page_get(dict_index_get_space(index),
dict_table_zip_size(index->table),
- new_page_no, RW_X_LATCH, mtr);
+ new_page_no, RW_X_LATCH, init_mtr);
buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
+ if (mtr->freed_clust_leaf) {
+ mtr_memo_release(mtr, new_block, MTR_MEMO_FREE_CLUST_LEAF);
+ ut_ad(!mtr_memo_contains(mtr, new_block,
+ MTR_MEMO_FREE_CLUST_LEAF));
+ }
+
+ ut_ad(btr_freed_leaves_validate(mtr));
return(new_block);
}
@@ -1065,6 +1107,15 @@ btr_page_free_low(
fseg_free_page(seg_header,
buf_block_get_space(block),
buf_block_get_page_no(block), mtr);
+
+ /* The page was marked free in the allocation bitmap, but it
+ should remain buffer-fixed until mtr_commit(mtr) or until it
+ is explicitly freed from the mini-transaction. */
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ /* TODO: Discard any operations on the page from the redo log
+ and remove the block from the flush list and the buffer pool.
+ This would free up buffer pool earlier and reduce writes to
+ both the tablespace and the redo log. */
}
/**************************************************************//**
@@ -1078,14 +1129,141 @@ btr_page_free(
buf_block_t* block, /*!< in: block to be freed, x-latched */
mtr_t* mtr) /*!< in: mtr */
{
- ulint level;
-
- level = btr_page_get_level(buf_block_get_frame(block), mtr);
+ const page_t* page = buf_block_get_frame(block);
+ ulint level = btr_page_get_level(page, mtr);
+ ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
btr_page_free_low(index, block, level, mtr);
+
+ /* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+ if (level == 0 && dict_index_is_clust(index)) {
+ /* We may have to call btr_mark_freed_leaves() to
+ temporarily mark the block nonfree for invoking
+ btr_store_big_rec_extern_fields_func() after an
+ update. Remember that the block was freed. */
+ mtr->freed_clust_leaf = TRUE;
+ mtr_memo_push(mtr, block, MTR_MEMO_FREE_CLUST_LEAF);
+ }
+
+ ut_ad(btr_freed_leaves_validate(mtr));
}
/**************************************************************//**
+Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
+For invoking btr_store_big_rec_extern_fields() after an update,
+we must temporarily mark freed clustered index pages allocated, so
+that off-page columns will not be allocated from them. Between the
+btr_store_big_rec_extern_fields() and mtr_commit() we have to
+mark the pages free again, so that no pages will be leaked. */
+UNIV_INTERN
+void
+btr_mark_freed_leaves(
+/*==================*/
+ dict_index_t* index, /*!< in/out: clustered index */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */
+{
+ /* This is loosely based on mtr_memo_release(). */
+
+ ulint offset;
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+ if (!mtr->freed_clust_leaf) {
+ return;
+ }
+
+ offset = dyn_array_get_data_size(&mtr->memo);
+
+ while (offset > 0) {
+ mtr_memo_slot_t* slot;
+ buf_block_t* block;
+
+ offset -= sizeof *slot;
+
+ slot = dyn_array_get_element(&mtr->memo, offset);
+
+ if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
+ continue;
+ }
+
+ /* Because btr_page_alloc() does invoke
+ mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
+ blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
+ memo must still be clustered index leaf tree pages. */
+ block = slot->object;
+ ut_a(buf_block_get_space(block)
+ == dict_index_get_space(index));
+ ut_a(fil_page_get_type(buf_block_get_frame(block))
+ == FIL_PAGE_INDEX);
+ ut_a(page_is_leaf(buf_block_get_frame(block)));
+
+ if (nonfree) {
+ /* Allocate the same page again. */
+ ulint page_no;
+ page_no = btr_page_alloc_low(
+ index, buf_block_get_page_no(block),
+ FSP_NO_DIR, 0, mtr, NULL);
+ ut_a(page_no == buf_block_get_page_no(block));
+ } else {
+ /* Assert that the page is allocated and free it. */
+ btr_page_free_low(index, block, 0, mtr);
+ }
+ }
+
+ ut_ad(btr_freed_leaves_validate(mtr));
+}
+
+#ifdef UNIV_DEBUG
+/**************************************************************//**
+Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
+@see btr_mark_freed_leaves()
+@return TRUE */
+UNIV_INTERN
+ibool
+btr_freed_leaves_validate(
+/*======================*/
+ mtr_t* mtr) /*!< in: mini-transaction */
+{
+ ulint offset;
+
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+ offset = dyn_array_get_data_size(&mtr->memo);
+
+ while (offset > 0) {
+ const mtr_memo_slot_t* slot;
+ const buf_block_t* block;
+
+ offset -= sizeof *slot;
+
+ slot = dyn_array_get_element(&mtr->memo, offset);
+
+ if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
+ continue;
+ }
+
+ ut_a(mtr->freed_clust_leaf);
+ /* Because btr_page_alloc() does invoke
+ mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
+ blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
+ memo must still be clustered index leaf tree pages. */
+ block = slot->object;
+ ut_a(fil_page_get_type(buf_block_get_frame(block))
+ == FIL_PAGE_INDEX);
+ ut_a(page_is_leaf(buf_block_get_frame(block)));
+ }
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/**************************************************************//**
Sets the child node file address in a node pointer. */
UNIV_INLINE
void
@@ -1806,7 +1984,7 @@ btr_root_raise_and_insert(
level = btr_page_get_level(root, mtr);
- new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
+ new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr);
new_page = buf_block_get_frame(new_block);
new_page_zip = buf_block_get_page_zip(new_block);
ut_a(!new_page_zip == !root_page_zip);
@@ -2542,7 +2720,7 @@ func_start:
/* 2. Allocate a new page to the index */
new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr);
+ btr_page_get_level(page, mtr), mtr, mtr);
new_page = buf_block_get_frame(new_block);
new_page_zip = buf_block_get_page_zip(new_block);
btr_page_create(new_block, new_page_zip, cursor->index,
diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c
index 5cefa51bcd5..f1c2c2ddd5e 100644
--- a/storage/innodb_plugin/btr/btr0cur.c
+++ b/storage/innodb_plugin/btr/btr0cur.c
@@ -2414,39 +2414,6 @@ return_after_reservations:
return(err);
}
-/**************************************************************//**
-Commits and restarts a mini-transaction so that it will retain an
-x-lock on index->lock and the cursor page. */
-UNIV_INTERN
-void
-btr_cur_mtr_commit_and_start(
-/*=========================*/
- btr_cur_t* cursor, /*!< in: cursor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- buf_block_t* block;
-
- block = btr_cur_get_block(cursor);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* Keep the locks across the mtr_commit(mtr). */
- rw_lock_x_lock(dict_index_get_lock(cursor->index));
- rw_lock_x_lock(&block->lock);
- mutex_enter(&block->mutex);
- buf_block_buf_fix_inc(block, __FILE__, __LINE__);
- mutex_exit(&block->mutex);
- /* Write out the redo log. */
- mtr_commit(mtr);
- mtr_start(mtr);
- /* Reassociate the locks with the mini-transaction.
- They will be released on mtr_commit(mtr). */
- mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK);
- mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
-}
-
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
/****************************************************************//**
@@ -3901,6 +3868,9 @@ btr_store_big_rec_extern_fields_func(
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
+ const big_rec_t*big_rec_vec, /*!< in: vector containing fields
+ to be stored externally */
+
#ifdef UNIV_DEBUG
mtr_t* local_mtr, /*!< in: mtr containing the
latch to rec and to the tree */
@@ -3909,9 +3879,11 @@ btr_store_big_rec_extern_fields_func(
ibool update_in_place,/*! in: TRUE if the record is updated
in place (not delete+insert) */
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- const big_rec_t*big_rec_vec) /*!< in: vector containing fields
- to be stored externally */
-
+ mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL;
+ in an update, local_mtr for
+ allocating BLOB pages and
+ updating BLOB pointers; alloc_mtr
+ must not have freed any leaf pages */
{
ulint rec_page_no;
byte* field_ref;
@@ -3930,6 +3902,9 @@ btr_store_big_rec_extern_fields_func(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_any_extern(offsets));
+ ut_ad(local_mtr);
+ ut_ad(!alloc_mtr || alloc_mtr == local_mtr);
+ ut_ad(!update_in_place || alloc_mtr);
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
@@ -3945,6 +3920,25 @@ btr_store_big_rec_extern_fields_func(
rec_page_no = buf_block_get_page_no(rec_block);
ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
+ if (alloc_mtr) {
+ /* Because alloc_mtr will be committed after
+ mtr, it is possible that the tablespace has been
+ extended when the B-tree record was updated or
+ inserted, or it will be extended while allocating
+ pages for big_rec.
+
+ TODO: In mtr (not alloc_mtr), write a redo log record
+ about extending the tablespace to its current size,
+ and remember the current size. Whenever the tablespace
+ grows as pages are allocated, write further redo log
+ records to mtr. (Currently tablespace extension is not
+ covered by the redo log. If it were, the record would
+ only be written to alloc_mtr, which is committed after
+ mtr.) */
+ } else {
+ alloc_mtr = &mtr;
+ }
+
if (UNIV_LIKELY_NULL(page_zip)) {
int err;
@@ -4021,7 +4015,7 @@ btr_store_big_rec_extern_fields_func(
}
block = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, &mtr);
+ FSP_NO_DIR, 0, alloc_mtr, &mtr);
if (UNIV_UNLIKELY(block == NULL)) {
mtr_commit(&mtr);
@@ -4148,11 +4142,15 @@ btr_store_big_rec_extern_fields_func(
goto next_zip_page;
}
- rec_block = buf_page_get(space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(rec_block,
- SYNC_NO_ORDER_CHECK);
+ if (alloc_mtr == &mtr) {
+ rec_block = buf_page_get(
+ space_id, zip_size,
+ rec_page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(
+ rec_block,
+ SYNC_NO_ORDER_CHECK);
+ }
if (err == Z_STREAM_END) {
mach_write_to_4(field_ref
@@ -4186,7 +4184,8 @@ btr_store_big_rec_extern_fields_func(
page_zip_write_blob_ptr(
page_zip, rec, index, offsets,
- big_rec_vec->fields[i].field_no, &mtr);
+ big_rec_vec->fields[i].field_no,
+ alloc_mtr);
next_zip_page:
prev_page_no = page_no;
@@ -4231,19 +4230,23 @@ next_zip_page:
extern_len -= store_len;
- rec_block = buf_page_get(space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(rec_block,
- SYNC_NO_ORDER_CHECK);
+ if (alloc_mtr == &mtr) {
+ rec_block = buf_page_get(
+ space_id, zip_size,
+ rec_page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(
+ rec_block,
+ SYNC_NO_ORDER_CHECK);
+ }
mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len
- extern_len,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
if (prev_page_no == FIL_NULL) {
btr_blob_dbg_add_blob(
@@ -4253,18 +4256,19 @@ next_zip_page:
mlog_write_ulint(field_ref
+ BTR_EXTERN_SPACE_ID,
- space_id,
- MLOG_4BYTES, &mtr);
+ space_id, MLOG_4BYTES,
+ alloc_mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_PAGE_NO,
- page_no,
- MLOG_4BYTES, &mtr);
+ page_no, MLOG_4BYTES,
+ alloc_mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_OFFSET,
FIL_PAGE_DATA,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES,
+ alloc_mtr);
}
prev_page_no = page_no;
diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
index cd1461d22b7..47300627acc 100644
--- a/storage/innodb_plugin/buf/buf0buf.c
+++ b/storage/innodb_plugin/buf/buf0buf.c
@@ -1175,29 +1175,6 @@ buf_page_set_accessed_make_young(
}
/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_block_t* block;
-
- buf_pool_mutex_enter();
-
- block = (buf_block_t*) buf_page_hash_get(space, offset);
-
- if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
- block->check_index_page_at_flush = FALSE;
- }
-
- buf_pool_mutex_exit();
-}
-
-/********************************************************************//**
Returns the current state of is_hashed of a page. FALSE if the page is
not in the pool. NOTE that this operation does not fix the page in the
pool if it is found there.
diff --git a/storage/innodb_plugin/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c
index d091a14c474..19846b63d5b 100644
--- a/storage/innodb_plugin/fsp/fsp0fsp.c
+++ b/storage/innodb_plugin/fsp/fsp0fsp.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -312,8 +312,9 @@ fsp_fill_free_list(
descriptor page and ibuf bitmap page;
then we do not allocate more extents */
ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in: space header */
- mtr_t* mtr); /*!< in: mtr */
+ fsp_header_t* header, /*!< in/out: space header */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
@@ -326,14 +327,20 @@ fseg_alloc_free_page_low(
ulint space, /*!< in: space */
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
- fseg_inode_t* seg_inode, /*!< in: segment inode */
+ fseg_inode_t* seg_inode, /*!< in/out: segment inode */
ulint hint, /*!< in: hint of which page would be desirable */
byte direction, /*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /*!< in: mtr handle */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ mtr_t* init_mtr)/*!< in/out: mini-transaction in which the
+ page should be initialized
+ (may be the same as mtr), or NULL if it
+ should not be initialized (the page at hint
+ was previously freed in mtr) */
+ __attribute__((warn_unused_result, nonnull(3,6)));
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
@@ -701,17 +708,18 @@ list, if not free limit == space size. This adding is necessary to make the
descriptor defined, as they are uninitialized above the free limit.
@return pointer to the extent descriptor, NULL if the page does not
exist in the space or if the offset exceeds the free limit */
-UNIV_INLINE
+UNIV_INLINE __attribute__((nonnull, warn_unused_result))
xdes_t*
xdes_get_descriptor_with_space_hdr(
/*===============================*/
- fsp_header_t* sp_header,/*!< in/out: space header, x-latched */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset;
- if equal to the free limit,
- we try to add new extents to
- the space free list */
- mtr_t* mtr) /*!< in: mtr handle */
+ fsp_header_t* sp_header, /*!< in/out: space header, x-latched
+ in mtr */
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: page offset; if equal
+ to the free limit, we try to
+ add new extents to the space
+ free list */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint limit;
ulint size;
@@ -719,11 +727,9 @@ xdes_get_descriptor_with_space_hdr(
ulint descr_page_no;
page_t* descr_page;
- ut_ad(mtr);
ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
/* Read free limit and space size */
limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
@@ -773,7 +779,7 @@ is necessary to make the descriptor defined, as they are uninitialized
above the free limit.
@return pointer to the extent descriptor, NULL if the page does not
exist in the space or if the offset exceeds the free limit */
-static
+static __attribute__((nonnull, warn_unused_result))
xdes_t*
xdes_get_descriptor(
/*================*/
@@ -782,7 +788,7 @@ xdes_get_descriptor(
or 0 for uncompressed pages */
ulint offset, /*!< in: page offset; if equal to the free limit,
we try to add new extents to the space free list */
- mtr_t* mtr) /*!< in: mtr handle */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
buf_block_t* block;
fsp_header_t* sp_header;
@@ -1160,14 +1166,14 @@ fsp_header_get_tablespace_size(void)
Tries to extend a single-table tablespace so that a page would fit in the
data file.
@return TRUE if success */
-static
+static __attribute__((nonnull, warn_unused_result))
ibool
fsp_try_extend_data_file_with_pages(
/*================================*/
ulint space, /*!< in: space */
ulint page_no, /*!< in: page number */
- fsp_header_t* header, /*!< in: space header */
- mtr_t* mtr) /*!< in: mtr */
+ fsp_header_t* header, /*!< in/out: space header */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ibool success;
ulint actual_size;
@@ -1192,7 +1198,7 @@ fsp_try_extend_data_file_with_pages(
/***********************************************************************//**
Tries to extend the last data file of a tablespace if it is auto-extending.
@return FALSE if not auto-extending */
-static
+static __attribute__((nonnull))
ibool
fsp_try_extend_data_file(
/*=====================*/
@@ -1202,8 +1208,8 @@ fsp_try_extend_data_file(
the actual file size rounded down to
megabyte */
ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in: space header */
- mtr_t* mtr) /*!< in: mtr */
+ fsp_header_t* header, /*!< in/out: space header */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint size;
ulint zip_size;
@@ -1339,7 +1345,7 @@ fsp_fill_free_list(
then we do not allocate more extents */
ulint space, /*!< in: space */
fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint limit;
ulint size;
@@ -1538,9 +1544,46 @@ fsp_alloc_free_extent(
}
/**********************************************************************//**
+Allocates a single free page from a space. */
+static __attribute__((nonnull))
+void
+fsp_alloc_from_free_frag(
+/*=====================*/
+ fsp_header_t* header, /*!< in/out: tablespace header */
+ xdes_t* descr, /*!< in/out: extent descriptor */
+ ulint bit, /*!< in: slot to allocate in the extent */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint frag_n_used;
+
+ ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
+ ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr));
+ xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
+
+ /* Update the FRAG_N_USED field */
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
+ mtr);
+ frag_n_used++;
+ mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
+ mtr);
+ if (xdes_is_full(descr, mtr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ xdes_set_state(descr, XDES_FULL_FRAG, mtr);
+
+ flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
+ mtr);
+ }
+}
+
+/**********************************************************************//**
Allocates a single free page from a space. The page is marked as used.
@return the page offset, FIL_NULL if no page could be allocated */
-static
+static __attribute__((nonnull, warn_unused_result))
ulint
fsp_alloc_free_page(
/*================*/
@@ -1548,19 +1591,22 @@ fsp_alloc_free_page(
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint hint, /*!< in: hint of which page would be desirable */
- mtr_t* mtr) /*!< in: mtr handle */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ mtr_t* init_mtr)/*!< in/out: mini-transaction in which the
+ page should be initialized
+ (may be the same as mtr) */
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
buf_block_t* block;
ulint free;
- ulint frag_n_used;
ulint page_no;
ulint space_size;
ibool success;
ut_ad(mtr);
+ ut_ad(init_mtr);
header = fsp_get_space_header(space, zip_size, mtr);
@@ -1642,38 +1688,19 @@ fsp_alloc_free_page(
}
}
- xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
-
- /* Update the FRAG_N_USED field */
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- frag_n_used++;
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
- mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FULL_FRAG, mtr);
-
- flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
- mtr);
- }
+ fsp_alloc_from_free_frag(header, descr, free, mtr);
/* Initialize the allocated page to the buffer pool, so that it can
be obtained immediately with buf_page_get without need for a disk
read. */
- buf_page_create(space, page_no, zip_size, mtr);
+ buf_page_create(space, page_no, zip_size, init_mtr);
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+ block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, init_mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
/* Prior contents of the page should be ignored */
- fsp_init_file_page(block, mtr);
+ fsp_init_file_page(block, init_mtr);
return(page_no);
}
@@ -1909,7 +1936,7 @@ fsp_alloc_seg_inode_page(
zip_size = dict_table_flags_to_zip_size(
mach_read_from_4(FSP_SPACE_FLAGS + space_header));
- page_no = fsp_alloc_free_page(space, zip_size, 0, mtr);
+ page_no = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr);
if (page_no == FIL_NULL) {
@@ -2323,7 +2350,7 @@ fseg_create_general(
if (page == 0) {
page = fseg_alloc_free_page_low(space, zip_size,
- inode, 0, FSP_UP, mtr);
+ inode, 0, FSP_UP, mtr, mtr);
if (page == FIL_NULL) {
@@ -2572,14 +2599,19 @@ fseg_alloc_free_page_low(
ulint space, /*!< in: space */
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
- fseg_inode_t* seg_inode, /*!< in: segment inode */
+ fseg_inode_t* seg_inode, /*!< in/out: segment inode */
ulint hint, /*!< in: hint of which page would be desirable */
byte direction, /*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /*!< in: mtr handle */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ mtr_t* init_mtr)/*!< in/out: mini-transaction in which the
+ page should be initialized
+ (may be the same as mtr), or NULL if it
+ should not be initialized (the page at hint
+ was previously freed in mtr) */
{
fsp_header_t* space_header;
ulint space_size;
@@ -2590,7 +2622,6 @@ fseg_alloc_free_page_low(
ulint ret_page; /*!< the allocated page offset, FIL_NULL
if could not be allocated */
xdes_t* ret_descr; /*!< the extent of the allocated page */
- ibool frag_page_allocated = FALSE;
ibool success;
ulint n;
@@ -2612,6 +2643,8 @@ fseg_alloc_free_page_low(
if (descr == NULL) {
/* Hint outside space or too high above free limit: reset
hint */
+ ut_a(init_mtr);
+ /* The file space header page is always allocated. */
hint = 0;
descr = xdes_get_descriptor(space, zip_size, hint, mtr);
}
@@ -2623,15 +2656,20 @@ fseg_alloc_free_page_low(
mtr), seg_id))
&& (xdes_get_bit(descr, XDES_FREE_BIT,
hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
-
+take_hinted_page:
/* 1. We can take the hinted page
=================================*/
ret_descr = descr;
ret_page = hint;
+ /* Skip the check for extending the tablespace. If the
+ page hint were not within the size of the tablespace,
+ we would have got (descr == NULL) above and reset the hint. */
+ goto got_hinted_page;
/*-----------------------------------------------------------*/
- } else if ((xdes_get_state(descr, mtr) == XDES_FREE)
- && ((reserved - used) < reserved / FSEG_FILLFACTOR)
- && (used >= FSEG_FRAG_LIMIT)) {
+ } else if (xdes_get_state(descr, mtr) == XDES_FREE
+ && (!init_mtr
+ || ((reserved - used < reserved / FSEG_FILLFACTOR)
+ && used >= FSEG_FRAG_LIMIT))) {
/* 2. We allocate the free extent from space and can take
=========================================================
@@ -2649,8 +2687,20 @@ fseg_alloc_free_page_low(
/* Try to fill the segment free list */
fseg_fill_free_list(seg_inode, space, zip_size,
hint + FSP_EXTENT_SIZE, mtr);
- ret_page = hint;
+ goto take_hinted_page;
/*-----------------------------------------------------------*/
+ } else if (!init_mtr) {
+ ut_a(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
+ fsp_alloc_from_free_frag(space_header, descr,
+ hint % FSP_EXTENT_SIZE, mtr);
+ ret_page = hint;
+ ret_descr = NULL;
+
+ /* Put the page in the fragment page array of the segment */
+ n = fseg_find_free_frag_page_slot(seg_inode, mtr);
+ ut_a(n != FIL_NULL);
+ fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr);
+ goto got_hinted_page;
} else if ((direction != FSP_NO_DIR)
&& ((reserved - used) < reserved / FSEG_FILLFACTOR)
&& (used >= FSEG_FRAG_LIMIT)
@@ -2710,11 +2760,10 @@ fseg_alloc_free_page_low(
} else if (used < FSEG_FRAG_LIMIT) {
/* 6. We allocate an individual page from the space
===================================================*/
- ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr);
+ ret_page = fsp_alloc_free_page(space, zip_size, hint,
+ mtr, init_mtr);
ret_descr = NULL;
- frag_page_allocated = TRUE;
-
if (ret_page != FIL_NULL) {
/* Put the page in the fragment page array of the
segment */
@@ -2724,6 +2773,10 @@ fseg_alloc_free_page_low(
fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
mtr);
}
+
+ /* fsp_alloc_free_page() invoked fsp_init_file_page()
+ already. */
+ return(ret_page);
/*-----------------------------------------------------------*/
} else {
/* 7. We allocate a new extent and take its first page
@@ -2771,26 +2824,34 @@ fseg_alloc_free_page_low(
}
}
- if (!frag_page_allocated) {
+got_hinted_page:
+ {
/* Initialize the allocated page to buffer pool, so that it
can be obtained immediately with buf_page_get without need
for a disk read */
buf_block_t* block;
ulint zip_size = dict_table_flags_to_zip_size(
mach_read_from_4(FSP_SPACE_FLAGS + space_header));
+ mtr_t* block_mtr = init_mtr ? init_mtr : mtr;
- block = buf_page_create(space, ret_page, zip_size, mtr);
+ block = buf_page_create(space, ret_page, zip_size, block_mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size,
ret_page, RW_X_LATCH,
- mtr))) {
+ block_mtr))) {
ut_error;
}
- /* The prior contents of the page should be ignored */
- fsp_init_file_page(block, mtr);
+ if (init_mtr) {
+ /* The prior contents of the page should be ignored */
+ fsp_init_file_page(block, init_mtr);
+ }
+ }
+ /* ret_descr == NULL if the block was allocated from free_frag
+ (XDES_FREE_FRAG) */
+ if (ret_descr != NULL) {
/* At this point we know the extent and the page offset.
The extent is still in the appropriate list (FSEG_NOT_FULL
or FSEG_FREE), and the page is not yet marked as used. */
@@ -2803,8 +2864,6 @@ fseg_alloc_free_page_low(
fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr);
}
- buf_reset_check_index_page_at_flush(space, ret_page);
-
return(ret_page);
}
@@ -2817,7 +2876,7 @@ UNIV_INTERN
ulint
fseg_alloc_free_page_general(
/*=========================*/
- fseg_header_t* seg_header,/*!< in: segment header */
+ fseg_header_t* seg_header,/*!< in/out: segment header */
ulint hint, /*!< in: hint of which page would be desirable */
byte direction,/*!< in: if the new page is needed because
of an index page split, and records are
@@ -2829,7 +2888,11 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr) /*!< in: mtr handle */
+ mtr_t* mtr, /*!< in/out: mini-transaction handle */
+ mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
+ in which the page should be initialized,
+ or NULL if this is a "fake allocation" of
+ a page that was previously freed in mtr */
{
fseg_inode_t* inode;
ulint space;
@@ -2871,7 +2934,8 @@ fseg_alloc_free_page_general(
}
page_no = fseg_alloc_free_page_low(space, zip_size,
- inode, hint, direction, mtr);
+ inode, hint, direction,
+ mtr, init_mtr);
if (!has_done_reservation) {
fil_space_release_free_extents(space, n_reserved);
}
@@ -2880,28 +2944,6 @@ fseg_alloc_free_page_general(
}
/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@return allocated page offset, FIL_NULL if no page could be allocated */
-UNIV_INTERN
-ulint
-fseg_alloc_free_page(
-/*=================*/
- fseg_header_t* seg_header,/*!< in: segment header */
- ulint hint, /*!< in: hint of which page would be desirable */
- byte direction,/*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- return(fseg_alloc_free_page_general(seg_header, hint, direction,
- FALSE, mtr));
-}
-
-/**********************************************************************//**
Checks that we have at least 2 frag pages free in the first extent of a
single-table tablespace, and they are also physically initialized to the data
file. That is we have already extended the data file so that those pages are
diff --git a/storage/innodb_plugin/include/btr0btr.h b/storage/innodb_plugin/include/btr0btr.h
index c0a038dd21d..476ad29adac 100644
--- a/storage/innodb_plugin/include/btr0btr.h
+++ b/storage/innodb_plugin/include/btr0btr.h
@@ -557,7 +557,12 @@ btr_page_alloc(
page split is made */
ulint level, /*!< in: level where the page is placed
in the tree */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr, /*!< in/out: mini-transaction
+ for the allocation */
+ mtr_t* init_mtr) /*!< in/out: mini-transaction
+ for x-latching and initializing
+ the page */
+ __attribute__((nonnull, warn_unused_result));
/**************************************************************//**
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
@@ -580,6 +585,33 @@ btr_page_free_low(
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level */
mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
+Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
+For invoking btr_store_big_rec_extern_fields() after an update,
+we must temporarily mark freed clustered index pages allocated, so
+that off-page columns will not be allocated from them. Between the
+btr_store_big_rec_extern_fields() and mtr_commit() we have to
+mark the pages free again, so that no pages will be leaked. */
+UNIV_INTERN
+void
+btr_mark_freed_leaves(
+/*==================*/
+ dict_index_t* index, /*!< in/out: clustered index */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */
+ __attribute__((nonnull));
+#ifdef UNIV_DEBUG
+/**************************************************************//**
+Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
+@see btr_mark_freed_leaves()
+@return TRUE */
+UNIV_INTERN
+ibool
+btr_freed_leaves_validate(
+/*======================*/
+ mtr_t* mtr) /*!< in: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
#ifdef UNIV_BTR_PRINT
/*************************************************************//**
Prints size info of a B-tree. */
diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h
index 6094a2a6c7a..1d97c5b9452 100644
--- a/storage/innodb_plugin/include/btr0cur.h
+++ b/storage/innodb_plugin/include/btr0cur.h
@@ -326,16 +326,6 @@ btr_cur_pessimistic_update(
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr; must be committed before
latching any further pages */
-/*****************************************************************
-Commits and restarts a mini-transaction so that it will retain an
-x-lock on index->lock and the cursor page. */
-UNIV_INTERN
-void
-btr_cur_mtr_commit_and_start(
-/*=========================*/
- btr_cur_t* cursor, /*!< in: cursor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- __attribute__((nonnull));
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -540,6 +530,8 @@ btr_store_big_rec_extern_fields_func(
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
+ const big_rec_t*big_rec_vec, /*!< in: vector containing fields
+ to be stored externally */
#ifdef UNIV_DEBUG
mtr_t* local_mtr, /*!< in: mtr containing the
latch to rec and to the tree */
@@ -548,9 +540,12 @@ btr_store_big_rec_extern_fields_func(
ibool update_in_place,/*! in: TRUE if the record is updated
in place (not delete+insert) */
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- const big_rec_t*big_rec_vec) /*!< in: vector containing fields
- to be stored externally */
- __attribute__((nonnull));
+ mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL;
+ in an update, local_mtr for
+ allocating BLOB pages and
+ updating BLOB pointers; alloc_mtr
+ must not have freed any leaf pages */
+ __attribute__((nonnull(1,2,3,4,5), warn_unused_result));
/** Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand.
@@ -559,21 +554,22 @@ file segment of the index tree.
@param index in: clustered index; MUST be X-latched by mtr
@param b in/out: block containing rec; MUST be X-latched by mtr
@param rec in/out: clustered index record
-@param offsets in: rec_get_offsets(rec, index);
+@param offs in: rec_get_offsets(rec, index);
the "external storage" flags in offsets will not be adjusted
+@param big in: vector containing fields to be stored externally
@param mtr in: mini-transaction that holds x-latch on index and b
@param upd in: TRUE if the record is updated in place (not delete+insert)
-@param big in: vector containing fields to be stored externally
+@param rmtr in/out: in updates, the mini-transaction that holds rec
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
#ifdef UNIV_DEBUG
-# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
- btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big)
+# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,mtr,upd,rmtr)
#elif defined UNIV_BLOB_LIGHT_DEBUG
-# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
- btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big)
+# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,upd,rmtr)
#else
-# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
- btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big)
+# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,rmtr)
#endif
/*******************************************************************//**
diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
index 9856bfce409..557bc17d311 100644
--- a/storage/innodb_plugin/include/buf0buf.h
+++ b/storage/innodb_plugin/include/buf0buf.h
@@ -372,15 +372,6 @@ buf_page_peek(
/*==========*/
ulint space, /*!< in: space id */
ulint offset);/*!< in: page number */
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
/********************************************************************//**
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
diff --git a/storage/innodb_plugin/include/fsp0fsp.h b/storage/innodb_plugin/include/fsp0fsp.h
index 7abd3914eda..2221380c9a2 100644
--- a/storage/innodb_plugin/include/fsp0fsp.h
+++ b/storage/innodb_plugin/include/fsp0fsp.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -176,19 +176,18 @@ fseg_n_reserved_pages(
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize
file space fragmentation.
-@return the allocated page offset FIL_NULL if no page could be allocated */
-UNIV_INTERN
-ulint
-fseg_alloc_free_page(
-/*=================*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint hint, /*!< in: hint of which page would be desirable */
- byte direction, /*!< in: if the new page is needed because
+@param[in/out] seg_header segment header
+@param[in] hint hint of which page would be desirable
+@param[in] direction if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /*!< in: mtr handle */
+ FSP_UP, FSP_NO_DIR
+@param[in/out] mtr mini-transaction
+@return the allocated page offset FIL_NULL if no page could be allocated */
+#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \
+ fseg_alloc_free_page_general(seg_header, hint, direction, \
+ FALSE, mtr, mtr)
/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
@@ -198,7 +197,7 @@ UNIV_INTERN
ulint
fseg_alloc_free_page_general(
/*=========================*/
- fseg_header_t* seg_header,/*!< in: segment header */
+ fseg_header_t* seg_header,/*!< in/out: segment header */
ulint hint, /*!< in: hint of which page would be desirable */
byte direction,/*!< in: if the new page is needed because
of an index page split, and records are
@@ -210,7 +209,12 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr); /*!< in: mtr handle */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
+ in which the page should be initialized,
+ or NULL if this is a "fake allocation" of
+ a page that was previously freed in mtr */
+ __attribute__((warn_unused_result, nonnull(1,5)));
/**********************************************************************//**
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
diff --git a/storage/innodb_plugin/include/mtr0mtr.h b/storage/innodb_plugin/include/mtr0mtr.h
index bc3f1951be9..2a561131c09 100644
--- a/storage/innodb_plugin/include/mtr0mtr.h
+++ b/storage/innodb_plugin/include/mtr0mtr.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -53,6 +53,8 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
#define MTR_MEMO_MODIFY 54
#define MTR_MEMO_S_LOCK 55
#define MTR_MEMO_X_LOCK 56
+/** The mini-transaction freed a clustered index leaf page. */
+#define MTR_MEMO_FREE_CLUST_LEAF 57
/** @name Log item types
The log items are declared 'byte' so that the compiler can warn if val
@@ -387,9 +389,12 @@ struct mtr_struct{
#endif
dyn_array_t memo; /*!< memo stack for locks etc. */
dyn_array_t log; /*!< mini-transaction log */
- ibool modifications;
- /* TRUE if the mtr made modifications to
- buffer pool pages */
+ unsigned modifications:1;
+ /*!< TRUE if the mini-transaction
+ modified buffer pool pages */
+ unsigned freed_clust_leaf:1;
+ /*!< TRUE if MTR_MEMO_FREE_CLUST_LEAF
+ was logged in the mini-transaction */
ulint n_log_recs;
/* count of how many page initial log records
have been written to the mtr log */
diff --git a/storage/innodb_plugin/include/mtr0mtr.ic b/storage/innodb_plugin/include/mtr0mtr.ic
index 18f8e87b3cf..9c0ddff9132 100644
--- a/storage/innodb_plugin/include/mtr0mtr.ic
+++ b/storage/innodb_plugin/include/mtr0mtr.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -44,6 +44,7 @@ mtr_start(
mtr->log_mode = MTR_LOG_ALL;
mtr->modifications = FALSE;
+ mtr->freed_clust_leaf = FALSE;
mtr->n_log_recs = 0;
ut_d(mtr->state = MTR_ACTIVE);
@@ -67,7 +68,8 @@ mtr_memo_push(
ut_ad(object);
ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_X_LOCK);
+ ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF);
+ ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf);
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
diff --git a/storage/innodb_plugin/include/trx0undo.h b/storage/innodb_plugin/include/trx0undo.h
index 4f15cd85833..c95f99d6417 100644
--- a/storage/innodb_plugin/include/trx0undo.h
+++ b/storage/innodb_plugin/include/trx0undo.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -204,17 +204,51 @@ trx_undo_add_page(
mtr_t* mtr); /*!< in: mtr which does not have a latch to any
undo log page; the caller must have reserved
the rollback segment mutex */
+/********************************************************************//**
+Frees the last undo log page.
+The caller must hold the rollback segment mutex. */
+UNIV_INTERN
+void
+trx_undo_free_last_page_func(
+/*==========================*/
+#ifdef UNIV_DEBUG
+ const trx_t* trx, /*!< in: transaction */
+#endif /* UNIV_DEBUG */
+ trx_undo_t* undo, /*!< in/out: undo log memory copy */
+ mtr_t* mtr) /*!< in/out: mini-transaction which does not
+ have a latch to any undo log page or which
+ has allocated the undo log page */
+ __attribute__((nonnull));
+#ifdef UNIV_DEBUG
+# define trx_undo_free_last_page(trx,undo,mtr) \
+ trx_undo_free_last_page_func(trx,undo,mtr)
+#else /* UNIV_DEBUG */
+# define trx_undo_free_last_page(trx,undo,mtr) \
+ trx_undo_free_last_page_func(undo,mtr)
+#endif /* UNIV_DEBUG */
+
/***********************************************************************//**
Truncates an undo log from the end. This function is used during a rollback
to free space from an undo log. */
UNIV_INTERN
void
-trx_undo_truncate_end(
-/*==================*/
- trx_t* trx, /*!< in: transaction whose undo log it is */
- trx_undo_t* undo, /*!< in: undo log */
- undo_no_t limit); /*!< in: all undo records with undo number
+trx_undo_truncate_end_func(
+/*=======================*/
+#ifdef UNIV_DEBUG
+ const trx_t* trx, /*!< in: transaction whose undo log it is */
+#endif /* UNIV_DEBUG */
+ trx_undo_t* undo, /*!< in/out: undo log */
+ undo_no_t limit) /*!< in: all undo records with undo number
>= this value should be truncated */
+ __attribute__((nonnull));
+#ifdef UNIV_DEBUG
+# define trx_undo_truncate_end(trx,undo,limit) \
+ trx_undo_truncate_end_func(trx,undo,limit)
+#else /* UNIV_DEBUG */
+# define trx_undo_truncate_end(trx,undo,limit) \
+ trx_undo_truncate_end_func(undo,limit)
+#endif /* UNIV_DEBUG */
+
/***********************************************************************//**
Truncates an undo log from the start. This function is used during a purge
operation. */
diff --git a/storage/innodb_plugin/mtr/mtr0mtr.c b/storage/innodb_plugin/mtr/mtr0mtr.c
index 417e97732bb..6dd5b6eb8c3 100644
--- a/storage/innodb_plugin/mtr/mtr0mtr.c
+++ b/storage/innodb_plugin/mtr/mtr0mtr.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -58,12 +58,11 @@ mtr_memo_slot_release(
buf_page_release((buf_block_t*)object, type, mtr);
} else if (type == MTR_MEMO_S_LOCK) {
rw_lock_s_unlock((rw_lock_t*)object);
-#ifdef UNIV_DEBUG
} else if (type != MTR_MEMO_X_LOCK) {
- ut_ad(type == MTR_MEMO_MODIFY);
+ ut_ad(type == MTR_MEMO_MODIFY
+ || type == MTR_MEMO_FREE_CLUST_LEAF);
ut_ad(mtr_memo_contains(mtr, object,
MTR_MEMO_PAGE_X_FIX));
-#endif /* UNIV_DEBUG */
} else {
rw_lock_x_unlock((rw_lock_t*)object);
}
diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c
index ea43cbfb5f1..0f158cdc706 100644
--- a/storage/innodb_plugin/row/row0ins.c
+++ b/storage/innodb_plugin/row/row0ins.c
@@ -2094,15 +2094,20 @@ row_ins_index_entry_low(
if (big_rec) {
ut_a(err == DB_SUCCESS);
/* Write out the externally stored
- columns while still x-latching
- index->lock and block->lock. We have
- to mtr_commit(mtr) first, so that the
- redo log will be written in the
- correct order. Otherwise, we would run
- into trouble on crash recovery if mtr
- freed B-tree pages on which some of
- the big_rec fields will be written. */
- btr_cur_mtr_commit_and_start(&cursor, &mtr);
+ columns, but allocate the pages and
+ write the pointers using the
+ mini-transaction of the record update.
+ If any pages were freed in the update,
+ temporarily mark them allocated so
+ that off-page columns will not
+ overwrite them. We must do this,
+ because we will write the redo log for
+ the BLOB writes before writing the
+ redo log for the record update. Thus,
+ redo log application at crash recovery
+ will see BLOBs being written to free pages. */
+
+ btr_mark_freed_leaves(index, &mtr, TRUE);
rec = btr_cur_get_rec(&cursor);
offsets = rec_get_offsets(
@@ -2111,7 +2116,8 @@ row_ins_index_entry_low(
err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(&cursor),
- rec, offsets, &mtr, FALSE, big_rec);
+ rec, offsets, big_rec, &mtr,
+ FALSE, &mtr);
/* If writing big_rec fails (for
example, because of DB_OUT_OF_FILE_SPACE),
the record will be corrupted. Even if
@@ -2124,6 +2130,9 @@ row_ins_index_entry_low(
undo log, and thus the record cannot
be rolled back. */
ut_a(err == DB_SUCCESS);
+ /* Free the pages again
+ in order to avoid a leak. */
+ btr_mark_freed_leaves(index, &mtr, FALSE);
goto stored_big_rec;
}
} else {
@@ -2165,7 +2174,7 @@ function_exit:
err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(&cursor),
- rec, offsets, &mtr, FALSE, big_rec);
+ rec, offsets, big_rec, &mtr, FALSE, NULL);
stored_big_rec:
if (modify) {
diff --git a/storage/innodb_plugin/row/row0row.c b/storage/innodb_plugin/row/row0row.c
index 9cdbbe76e04..e476ffae84e 100644
--- a/storage/innodb_plugin/row/row0row.c
+++ b/storage/innodb_plugin/row/row0row.c
@@ -243,19 +243,20 @@ row_build(
}
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- /* This condition can occur during crash recovery before
- trx_rollback_active() has completed execution.
-
- This condition is possible if the server crashed
- during an insert or update before
- btr_store_big_rec_extern_fields() did mtr_commit() all
- BLOB pointers to the clustered index record.
-
- If the record contains a null BLOB pointer, look up the
- transaction that holds the implicit lock on this record, and
- assert that it was recovered (and will soon be rolled back). */
- ut_a(!rec_offs_any_null_extern(rec, offsets)
- || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets)));
+ if (rec_offs_any_null_extern(rec, offsets)) {
+ /* This condition can occur during crash recovery
+ before trx_rollback_active() has completed execution.
+
+ This condition is possible if the server crashed
+ during an insert or update-by-delete-and-insert before
+ btr_store_big_rec_extern_fields() did mtr_commit() all
+ BLOB pointers to the freshly inserted clustered index
+ record. */
+ ut_a(trx_assert_recovered(
+ row_get_rec_trx_id(rec, index, offsets)));
+ ut_a(trx_undo_roll_ptr_is_insert(
+ row_get_rec_roll_ptr(rec, index, offsets)));
+ }
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (type != ROW_COPY_POINTERS) {
diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c
index b5952ff0a78..05856687015 100644
--- a/storage/innodb_plugin/row/row0upd.c
+++ b/storage/innodb_plugin/row/row0upd.c
@@ -1978,21 +1978,22 @@ row_upd_clust_rec(
rec_offs_init(offsets_);
ut_a(err == DB_SUCCESS);
- /* Write out the externally stored columns while still
- x-latching index->lock and block->lock. We have to
- mtr_commit(mtr) first, so that the redo log will be
- written in the correct order. Otherwise, we would run
- into trouble on crash recovery if mtr freed B-tree
- pages on which some of the big_rec fields will be
- written. */
- btr_cur_mtr_commit_and_start(btr_cur, mtr);
-
+ /* Write out the externally stored columns, but
+ allocate the pages and write the pointers using the
+ mini-transaction of the record update. If any pages
+ were freed in the update, temporarily mark them
+ allocated so that off-page columns will not overwrite
+ them. We must do this, because we write the redo log
+ for the BLOB writes before writing the redo log for
+ the record update. */
+
+ btr_mark_freed_leaves(index, mtr, TRUE);
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(btr_cur), rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
- mtr, TRUE, big_rec);
+ big_rec, mtr, TRUE, mtr);
/* If writing big_rec fails (for example, because of
DB_OUT_OF_FILE_SPACE), the record will be corrupted.
Even if we did not update any externally stored
@@ -2002,6 +2003,8 @@ row_upd_clust_rec(
to the undo log, and thus the record cannot be rolled
back. */
ut_a(err == DB_SUCCESS);
+ /* Free the pages again in order to avoid a leak. */
+ btr_mark_freed_leaves(index, mtr, FALSE);
}
mtr_commit(mtr);
diff --git a/storage/innodb_plugin/sync/sync0sync.c b/storage/innodb_plugin/sync/sync0sync.c
index 1b97e1f11f3..64aadffdfad 100644
--- a/storage/innodb_plugin/sync/sync0sync.c
+++ b/storage/innodb_plugin/sync/sync0sync.c
@@ -1248,7 +1248,13 @@ sync_thread_add_level(
TRUE));
break;
case SYNC_IBUF_TREE_NODE_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ /* ibuf_add_free_page() allocates new pages for the
+ change buffer while only holding the tablespace
+ x-latch. These pre-allocated new pages may only be
+ taken in use while holding ibuf_mutex, in
+ btr_page_alloc_for_ibuf(). */
+ ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+ || sync_thread_levels_contain(array, SYNC_FSP));
break;
case SYNC_IBUF_INDEX_TREE:
if (sync_thread_levels_contain(array, SYNC_FSP)) {
diff --git a/storage/innodb_plugin/trx/trx0rec.c b/storage/innodb_plugin/trx/trx0rec.c
index 9f2fd59d82b..a729a39d0cc 100644
--- a/storage/innodb_plugin/trx/trx0rec.c
+++ b/storage/innodb_plugin/trx/trx0rec.c
@@ -1097,22 +1097,29 @@ trx_undo_rec_get_partial_row(
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
-Erases the unused undo log page end. */
-static
-void
+Erases the unused undo log page end.
+@return TRUE if the page contained something, FALSE if it was empty */
+static __attribute__((nonnull, warn_unused_result))
+ibool
trx_undo_erase_page_end(
/*====================*/
- page_t* undo_page, /*!< in: undo page whose end to erase */
- mtr_t* mtr) /*!< in: mtr */
+ page_t* undo_page, /*!< in/out: undo page whose end to erase */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint first_free;
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_FREE);
+ if (first_free == TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE) {
+ /* This was an empty page to begin with.
+ Do nothing here; the caller should free the page. */
+ return(FALSE);
+ }
memset(undo_page + first_free, 0xff,
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
+ return(TRUE);
}
/***********************************************************//**
@@ -1134,7 +1141,11 @@ trx_undo_parse_erase_page_end(
return(ptr);
}
- trx_undo_erase_page_end(page, mtr);
+ if (!trx_undo_erase_page_end(page, mtr)) {
+ /* The function trx_undo_erase_page_end() should not
+ have done anything to an empty page. */
+ ut_ad(0);
+ }
return(ptr);
}
@@ -1180,6 +1191,9 @@ trx_undo_report_row_operation(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
+#ifdef UNIV_DEBUG
+ int loop_count = 0;
+#endif /* UNIV_DEBUG */
rec_offs_init(offsets_);
ut_a(dict_index_is_clust(index));
@@ -1242,7 +1256,7 @@ trx_undo_report_row_operation(
mtr_start(&mtr);
- for (;;) {
+ do {
buf_block_t* undo_block;
page_t* undo_page;
ulint offset;
@@ -1271,7 +1285,19 @@ trx_undo_report_row_operation(
version the replicate page constructed using the log
records stays identical to the original page */
- trx_undo_erase_page_end(undo_page, &mtr);
+ if (!trx_undo_erase_page_end(undo_page, &mtr)) {
+ /* The record did not fit on an empty
+ undo page. Discard the freshly allocated
+ page and return an error. */
+
+ mutex_enter(&rseg->mutex);
+ trx_undo_free_last_page(trx, undo, &mtr);
+ mutex_exit(&rseg->mutex);
+
+ err = DB_TOO_BIG_RECORD;
+ goto err_exit;
+ }
+
mtr_commit(&mtr);
} else {
/* Success */
@@ -1291,16 +1317,15 @@ trx_undo_report_row_operation(
*roll_ptr = trx_undo_build_roll_ptr(
op_type == TRX_UNDO_INSERT_OP,
rseg->id, page_no, offset);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(DB_SUCCESS);
+ err = DB_SUCCESS;
+ goto func_exit;
}
ut_ad(page_no == undo->last_page_no);
/* We have to extend the undo log by one page */
+ ut_ad(++loop_count < 2);
mtr_start(&mtr);
/* When we add a page to an undo log, this is analogous to
@@ -1312,18 +1337,19 @@ trx_undo_report_row_operation(
page_no = trx_undo_add_page(trx, undo, &mtr);
mutex_exit(&(rseg->mutex));
+ } while (UNIV_LIKELY(page_no != FIL_NULL));
- if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
- /* Did not succeed: out of space */
+ /* Did not succeed: out of space */
+ err = DB_OUT_OF_FILE_SPACE;
- mutex_exit(&(trx->undo_mutex));
- mtr_commit(&mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(DB_OUT_OF_FILE_SPACE);
- }
+err_exit:
+ mutex_exit(&trx->undo_mutex);
+ mtr_commit(&mtr);
+func_exit:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
}
+ return(err);
}
/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
diff --git a/storage/innodb_plugin/trx/trx0undo.c b/storage/innodb_plugin/trx/trx0undo.c
index 7f03b68fb55..c36f55fbd9c 100644
--- a/storage/innodb_plugin/trx/trx0undo.c
+++ b/storage/innodb_plugin/trx/trx0undo.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -912,7 +912,7 @@ trx_undo_add_page(
page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
+ TRX_UNDO_FSEG_HEADER,
undo->top_page_no + 1, FSP_UP,
- TRUE, mtr);
+ TRUE, mtr, mtr);
fil_space_release_free_extents(undo->space, n_reserved);
@@ -998,29 +998,28 @@ trx_undo_free_page(
}
/********************************************************************//**
-Frees an undo log page when there is also the memory object for the undo
-log. */
-static
+Frees the last undo log page.
+The caller must hold the rollback segment mutex. */
+UNIV_INTERN
void
-trx_undo_free_page_in_rollback(
-/*===========================*/
- trx_t* trx __attribute__((unused)), /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- ulint page_no,/*!< in: page number to free: must not be the
- header page */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
+trx_undo_free_last_page_func(
+/*==========================*/
+#ifdef UNIV_DEBUG
+ const trx_t* trx, /*!< in: transaction */
+#endif /* UNIV_DEBUG */
+ trx_undo_t* undo, /*!< in/out: undo log memory copy */
+ mtr_t* mtr) /*!< in/out: mini-transaction which does not
+ have a latch to any undo log page or which
+ has allocated the undo log page */
{
- ulint last_page_no;
-
- ut_ad(undo->hdr_page_no != page_no);
- ut_ad(mutex_own(&(trx->undo_mutex)));
+ ut_ad(mutex_own(&trx->undo_mutex));
+ ut_ad(undo->hdr_page_no != undo->last_page_no);
+ ut_ad(undo->size > 0);
- last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space,
- undo->hdr_page_no, page_no, mtr);
+ undo->last_page_no = trx_undo_free_page(
+ undo->rseg, FALSE, undo->space,
+ undo->hdr_page_no, undo->last_page_no, mtr);
- undo->last_page_no = last_page_no;
undo->size--;
}
@@ -1056,9 +1055,11 @@ Truncates an undo log from the end. This function is used during a rollback
to free space from an undo log. */
UNIV_INTERN
void
-trx_undo_truncate_end(
-/*==================*/
- trx_t* trx, /*!< in: transaction whose undo log it is */
+trx_undo_truncate_end_func(
+/*=======================*/
+#ifdef UNIV_DEBUG
+ const trx_t* trx, /*!< in: transaction whose undo log it is */
+#endif /* UNIV_DEBUG */
trx_undo_t* undo, /*!< in: undo log */
undo_no_t limit) /*!< in: all undo records with undo number
>= this value should be truncated */
@@ -1084,18 +1085,7 @@ trx_undo_truncate_end(
rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
undo->hdr_offset);
- for (;;) {
- if (rec == NULL) {
- if (last_page_no == undo->hdr_page_no) {
-
- goto function_exit;
- }
-
- trx_undo_free_page_in_rollback(
- trx, undo, last_page_no, &mtr);
- break;
- }
-
+ while (rec) {
if (ut_dulint_cmp(trx_undo_rec_get_undo_no(rec), limit)
>= 0) {
/* Truncate at least this record off, maybe
@@ -1110,6 +1100,14 @@ trx_undo_truncate_end(
undo->hdr_offset);
}
+ if (last_page_no == undo->hdr_page_no) {
+
+ goto function_exit;
+ }
+
+ ut_ad(last_page_no == undo->last_page_no);
+ trx_undo_free_last_page(trx, undo, &mtr);
+
mtr_commit(&mtr);
}
diff --git a/strings/decimal.c b/strings/decimal.c
index 43957c7dc19..6c89657004c 100644
--- a/strings/decimal.c
+++ b/strings/decimal.c
@@ -1423,11 +1423,18 @@ int bin2decimal(const uchar *from, decimal_t *to, int precision, int scale)
buf++;
}
my_afree(d_copy);
+
+ /*
+ No digits? We have read the number zero, of unspecified precision.
+ Make it a proper zero, with non-zero precision.
+ */
+ if (to->intg == 0 && to->frac == 0)
+ decimal_make_zero(to);
return error;
err:
my_afree(d_copy);
- decimal_make_zero(((decimal_t*) to));
+ decimal_make_zero(to);
return(E_DEC_BAD_NUM);
}
diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh
index 006dea45e64..28f2b0f773a 100644
--- a/support-files/mysql.spec.sh
+++ b/support-files/mysql.spec.sh
@@ -382,7 +382,7 @@ sh -c "PATH=\"${MYSQL_BUILD_PATH:-$PATH}\" \
--enable-local-infile \
--with-fast-mutexes \
--with-mysqld-user=%{mysqld_user} \
- --with-unix-socket-path=/var/lib/mysql/mysql.sock \
+ --with-unix-socket-path=%{mysqldatadir}/mysql.sock \
--with-pic \
--prefix=/ \
%if %{CLUSTER_BUILD}
@@ -858,6 +858,13 @@ chown -R %{mysqld_user}:%{mysqld_group} $mysql_datadir
# ----------------------------------------------------------------------
chmod -R og-rw $mysql_datadir/mysql
+# ----------------------------------------------------------------------
+# Deal with SELinux, if it is installed / used
+# ----------------------------------------------------------------------
+if [ -x /sbin/restorecon ] ; then
+ /sbin/restorecon -R %{mysqldatadir}
+fi
+
# Was the server running before the upgrade? If so, restart the new one.
if [ "$SERVER_TO_START" = "true" ] ; then
# Restart in the same way that mysqld will be started normally.
@@ -1165,6 +1172,15 @@ fi
# merging BK trees)
##############################################################################
%changelog
+* Fri Aug 19 2011 Joerg Bruehe <joerg.bruehe@oracle.com>
+
+- Fix bug#37165 "((Generic rpm)) fail to install on Fedora 9 x86_64"
+ On Fedora, certain accesses to "/var/lib/mysql/HOSTNAME.err" were blocked
+ by SELinux policy, this made the server start fail with the message
+ Manager of pid-file quit without updating file
+ Calling "/sbin/restorecon -R /var/lib/mysql" fixes this.
+- Replace occurrences of that path name by the spec file variable %{mysqldatadir}.
+
* Thu Jul 07 2011 Joerg Bruehe <joerg.bruehe@oracle.com>
- Fix bug#45415: "rpm upgrade recreates test database"