summaryrefslogtreecommitdiff
path: root/storage/xtradb/include
diff options
context:
space:
mode:
authorSergei Golubchik <sergii@pisem.net>2013-12-22 17:06:50 +0100
committerSergei Golubchik <sergii@pisem.net>2013-12-22 17:06:50 +0100
commitffa8c4cfcc41d4f160e3bdfca5cfd4b01a7d6e63 (patch)
tree728585c36f22a5db3cea796430883d0ebc5c05eb /storage/xtradb/include
parente27c34f9e4ca15c797fcd3191ee5679c2f237a09 (diff)
parent52c26f7a1f675185d2ef1a28aca7f9bcc67c6414 (diff)
downloadmariadb-git-ffa8c4cfcc41d4f160e3bdfca5cfd4b01a7d6e63.tar.gz
Percona-Server-5.6.14-rel62.0 merge
support ha_innodb.so as a dynamic plugin. * remove obsolete *,innodb_plugin.rdiff files * s/--plugin-load=/--plugin-load-add=/ * MYSQL_PLUGIN_IMPORT glob_hostname[] * use my_error instead of push_warning_printf(ER_DEFAULT) * don't use tdc_size and tc_size in a module update test cases (XtraDB is 5.6.14, InnoDB is 5.6.10) * copy new tests over * disable some tests for (old) InnoDB * delete XtraDB tests that no longer apply small compatibility changes: * s/HTON_EXTENDED_KEYS/HTON_SUPPORTS_EXTENDED_KEYS/ * revert unnecessary InnoDB changes to make it a bit closer to the upstream fix XtraDB to compile on Windows (both as a static and a dynamic plugin) disable XtraDB on Windows (deadlocks) and where no atomic ops are available (e.g. CentOS 5) storage/innobase/handler/ha_innodb.cc: revert few unnecessary changes to make it a bit closer to the original InnoDB storage/innobase/include/univ.i: correct the version to match what it was merged from
Diffstat (limited to 'storage/xtradb/include')
-rw-r--r--storage/xtradb/include/api0api.h1284
-rw-r--r--storage/xtradb/include/api0misc.h78
-rw-r--r--storage/xtradb/include/btr0btr.h270
-rw-r--r--storage/xtradb/include/btr0btr.ic33
-rw-r--r--storage/xtradb/include/btr0cur.h250
-rw-r--r--storage/xtradb/include/btr0cur.ic21
-rw-r--r--storage/xtradb/include/btr0pcur.h43
-rw-r--r--storage/xtradb/include/btr0pcur.ic50
-rw-r--r--storage/xtradb/include/btr0sea.h61
-rw-r--r--storage/xtradb/include/btr0sea.ic59
-rw-r--r--storage/xtradb/include/btr0types.h18
-rw-r--r--storage/xtradb/include/buf0buddy.h20
-rw-r--r--storage/xtradb/include/buf0buddy.ic59
-rw-r--r--storage/xtradb/include/buf0buf.h663
-rw-r--r--storage/xtradb/include/buf0buf.ic445
-rw-r--r--storage/xtradb/include/buf0checksum.h88
-rw-r--r--storage/xtradb/include/buf0dblwr.h153
-rw-r--r--storage/xtradb/include/buf0dump.h72
-rw-r--r--storage/xtradb/include/buf0flu.h222
-rw-r--r--storage/xtradb/include/buf0flu.ic46
-rw-r--r--storage/xtradb/include/buf0lru.h134
-rw-r--r--storage/xtradb/include/buf0lru.ic6
-rw-r--r--storage/xtradb/include/buf0rea.h57
-rw-r--r--storage/xtradb/include/buf0types.h115
-rw-r--r--storage/xtradb/include/data0data.h155
-rw-r--r--storage/xtradb/include/data0data.ic54
-rw-r--r--storage/xtradb/include/data0type.h46
-rw-r--r--storage/xtradb/include/data0type.ic119
-rw-r--r--storage/xtradb/include/data0types.h10
-rw-r--r--storage/xtradb/include/db0err.h88
-rw-r--r--storage/xtradb/include/dict0boot.h263
-rw-r--r--storage/xtradb/include/dict0boot.ic27
-rw-r--r--storage/xtradb/include/dict0crea.h95
-rw-r--r--storage/xtradb/include/dict0crea.ic79
-rw-r--r--storage/xtradb/include/dict0dict.h988
-rw-r--r--storage/xtradb/include/dict0dict.ic684
-rw-r--r--storage/xtradb/include/dict0load.h155
-rw-r--r--storage/xtradb/include/dict0load.ic6
-rw-r--r--storage/xtradb/include/dict0mem.h537
-rw-r--r--storage/xtradb/include/dict0mem.ic24
-rw-r--r--storage/xtradb/include/dict0priv.h63
-rw-r--r--storage/xtradb/include/dict0priv.ic125
-rw-r--r--storage/xtradb/include/dict0stats.h202
-rw-r--r--storage/xtradb/include/dict0stats.ic236
-rw-r--r--storage/xtradb/include/dict0stats_bg.h127
-rw-r--r--storage/xtradb/include/dict0stats_bg.ic45
-rw-r--r--storage/xtradb/include/dict0types.h49
-rw-r--r--storage/xtradb/include/dyn0dyn.h7
-rw-r--r--storage/xtradb/include/dyn0dyn.ic8
-rw-r--r--storage/xtradb/include/eval0eval.h6
-rw-r--r--storage/xtradb/include/eval0eval.ic30
-rw-r--r--storage/xtradb/include/eval0proc.h6
-rw-r--r--storage/xtradb/include/eval0proc.ic10
-rw-r--r--storage/xtradb/include/fil0fil.h466
-rw-r--r--storage/xtradb/include/fsp0fsp.h346
-rw-r--r--storage/xtradb/include/fsp0fsp.ic261
-rw-r--r--storage/xtradb/include/fsp0types.h14
-rw-r--r--storage/xtradb/include/fts0ast.h281
-rw-r--r--storage/xtradb/include/fts0blex.h349
-rw-r--r--storage/xtradb/include/fts0fts.h1042
-rw-r--r--storage/xtradb/include/fts0opt.h37
-rw-r--r--storage/xtradb/include/fts0pars.h72
-rw-r--r--storage/xtradb/include/fts0priv.h650
-rw-r--r--storage/xtradb/include/fts0priv.ic92
-rw-r--r--storage/xtradb/include/fts0tlex.h349
-rw-r--r--storage/xtradb/include/fts0types.h473
-rw-r--r--storage/xtradb/include/fts0types.ic388
-rw-r--r--storage/xtradb/include/fts0vlc.ic142
-rw-r--r--storage/xtradb/include/fut0fut.h6
-rw-r--r--storage/xtradb/include/fut0fut.ic6
-rw-r--r--storage/xtradb/include/fut0lst.h6
-rw-r--r--storage/xtradb/include/fut0lst.ic6
-rw-r--r--storage/xtradb/include/ha0ha.h83
-rw-r--r--storage/xtradb/include/ha0ha.ic66
-rw-r--r--storage/xtradb/include/ha0storage.h8
-rw-r--r--storage/xtradb/include/ha0storage.ic12
-rw-r--r--storage/xtradb/include/ha_prototypes.h323
-rw-r--r--storage/xtradb/include/handler0alter.h90
-rw-r--r--storage/xtradb/include/hash0hash.h223
-rw-r--r--storage/xtradb/include/hash0hash.ic74
-rw-r--r--storage/xtradb/include/ibuf0ibuf.h63
-rw-r--r--storage/xtradb/include/ibuf0ibuf.ic64
-rw-r--r--storage/xtradb/include/ibuf0types.h8
-rw-r--r--storage/xtradb/include/lock0iter.h10
-rw-r--r--storage/xtradb/include/lock0lock.h330
-rw-r--r--storage/xtradb/include/lock0lock.ic35
-rw-r--r--storage/xtradb/include/lock0priv.h31
-rw-r--r--storage/xtradb/include/lock0priv.ic28
-rw-r--r--storage/xtradb/include/lock0types.h14
-rw-r--r--storage/xtradb/include/log0log.h264
-rw-r--r--storage/xtradb/include/log0log.ic81
-rw-r--r--storage/xtradb/include/log0online.h23
-rw-r--r--storage/xtradb/include/log0recv.h136
-rw-r--r--storage/xtradb/include/log0recv.ic8
-rw-r--r--storage/xtradb/include/mach0data.h48
-rw-r--r--storage/xtradb/include/mach0data.ic161
-rw-r--r--storage/xtradb/include/mem0dbg.h10
-rw-r--r--storage/xtradb/include/mem0dbg.ic6
-rw-r--r--storage/xtradb/include/mem0mem.h40
-rw-r--r--storage/xtradb/include/mem0mem.ic48
-rw-r--r--storage/xtradb/include/mem0pool.h15
-rw-r--r--storage/xtradb/include/mem0pool.ic6
-rw-r--r--storage/xtradb/include/mtr0log.h29
-rw-r--r--storage/xtradb/include/mtr0log.ic15
-rw-r--r--storage/xtradb/include/mtr0mtr.h47
-rw-r--r--storage/xtradb/include/mtr0mtr.ic27
-rw-r--r--storage/xtradb/include/mtr0types.h8
-rw-r--r--storage/xtradb/include/os0file.h425
-rw-r--r--storage/xtradb/include/os0file.ic120
-rw-r--r--storage/xtradb/include/os0proc.h6
-rw-r--r--storage/xtradb/include/os0proc.ic6
-rw-r--r--storage/xtradb/include/os0sync.h333
-rw-r--r--storage/xtradb/include/os0sync.ic198
-rw-r--r--storage/xtradb/include/os0thread.h75
-rw-r--r--storage/xtradb/include/os0thread.ic6
-rw-r--r--storage/xtradb/include/page0cur.h71
-rw-r--r--storage/xtradb/include/page0cur.ic52
-rw-r--r--storage/xtradb/include/page0page.h102
-rw-r--r--storage/xtradb/include/page0page.ic123
-rw-r--r--storage/xtradb/include/page0types.h68
-rw-r--r--storage/xtradb/include/page0zip.h83
-rw-r--r--storage/xtradb/include/page0zip.ic103
-rw-r--r--storage/xtradb/include/pars0grm.h73
-rw-r--r--storage/xtradb/include/pars0opt.h6
-rw-r--r--storage/xtradb/include/pars0opt.ic6
-rw-r--r--storage/xtradb/include/pars0pars.h164
-rw-r--r--storage/xtradb/include/pars0pars.ic6
-rw-r--r--storage/xtradb/include/pars0sym.h30
-rw-r--r--storage/xtradb/include/pars0sym.ic6
-rw-r--r--storage/xtradb/include/pars0types.h42
-rw-r--r--storage/xtradb/include/que0que.h149
-rw-r--r--storage/xtradb/include/que0que.ic50
-rw-r--r--storage/xtradb/include/que0types.h15
-rw-r--r--storage/xtradb/include/read0i_s.h54
-rw-r--r--storage/xtradb/include/read0read.h67
-rw-r--r--storage/xtradb/include/read0read.ic118
-rw-r--r--storage/xtradb/include/read0types.h10
-rw-r--r--storage/xtradb/include/rem0cmp.h121
-rw-r--r--storage/xtradb/include/rem0cmp.ic101
-rw-r--r--storage/xtradb/include/rem0rec.h284
-rw-r--r--storage/xtradb/include/rem0rec.ic134
-rw-r--r--storage/xtradb/include/rem0types.h28
-rw-r--r--storage/xtradb/include/row0ext.h8
-rw-r--r--storage/xtradb/include/row0ext.ic8
-rw-r--r--storage/xtradb/include/row0ftsort.h275
-rw-r--r--storage/xtradb/include/row0import.h91
-rw-r--r--storage/xtradb/include/row0import.ic25
-rw-r--r--storage/xtradb/include/row0ins.h133
-rw-r--r--storage/xtradb/include/row0ins.ic6
-rw-r--r--storage/xtradb/include/row0log.h238
-rw-r--r--storage/xtradb/include/row0log.ic84
-rw-r--r--storage/xtradb/include/row0merge.h367
-rw-r--r--storage/xtradb/include/row0mysql.h254
-rw-r--r--storage/xtradb/include/row0mysql.ic6
-rw-r--r--storage/xtradb/include/row0purge.h50
-rw-r--r--storage/xtradb/include/row0purge.ic6
-rw-r--r--storage/xtradb/include/row0quiesce.h74
-rw-r--r--storage/xtradb/include/row0quiesce.ic26
-rw-r--r--storage/xtradb/include/row0row.h101
-rw-r--r--storage/xtradb/include/row0row.ic31
-rw-r--r--storage/xtradb/include/row0sel.h28
-rw-r--r--storage/xtradb/include/row0sel.ic8
-rw-r--r--storage/xtradb/include/row0types.h42
-rw-r--r--storage/xtradb/include/row0uins.h12
-rw-r--r--storage/xtradb/include/row0uins.ic6
-rw-r--r--storage/xtradb/include/row0umod.h12
-rw-r--r--storage/xtradb/include/row0umod.ic6
-rw-r--r--storage/xtradb/include/row0undo.h8
-rw-r--r--storage/xtradb/include/row0undo.ic6
-rw-r--r--storage/xtradb/include/row0upd.h68
-rw-r--r--storage/xtradb/include/row0upd.ic31
-rw-r--r--storage/xtradb/include/row0vers.h36
-rw-r--r--storage/xtradb/include/row0vers.ic6
-rw-r--r--storage/xtradb/include/srv0conc.h111
-rw-r--r--storage/xtradb/include/srv0mon.h892
-rw-r--r--storage/xtradb/include/srv0mon.ic113
-rw-r--r--storage/xtradb/include/srv0srv.h834
-rw-r--r--storage/xtradb/include/srv0srv.ic6
-rw-r--r--storage/xtradb/include/srv0start.h72
-rw-r--r--storage/xtradb/include/sync0arr.h69
-rw-r--r--storage/xtradb/include/sync0arr.ic7
-rw-r--r--storage/xtradb/include/sync0rw.h411
-rw-r--r--storage/xtradb/include/sync0rw.ic744
-rw-r--r--storage/xtradb/include/sync0sync.h403
-rw-r--r--storage/xtradb/include/sync0sync.ic388
-rw-r--r--storage/xtradb/include/sync0types.h24
-rw-r--r--storage/xtradb/include/trx0i_s.h73
-rw-r--r--storage/xtradb/include/trx0purge.h160
-rw-r--r--storage/xtradb/include/trx0purge.ic25
-rw-r--r--storage/xtradb/include/trx0rec.h66
-rw-r--r--storage/xtradb/include/trx0rec.ic4
-rw-r--r--storage/xtradb/include/trx0roll.h140
-rw-r--r--storage/xtradb/include/trx0roll.ic6
-rw-r--r--storage/xtradb/include/trx0rseg.h59
-rw-r--r--storage/xtradb/include/trx0rseg.ic25
-rw-r--r--storage/xtradb/include/trx0sys.h382
-rw-r--r--storage/xtradb/include/trx0sys.ic343
-rw-r--r--storage/xtradb/include/trx0trx.h952
-rw-r--r--storage/xtradb/include/trx0trx.ic112
-rw-r--r--storage/xtradb/include/trx0types.h73
-rw-r--r--storage/xtradb/include/trx0undo.h33
-rw-r--r--storage/xtradb/include/trx0undo.ic21
-rw-r--r--storage/xtradb/include/trx0xa.h6
-rw-r--r--storage/xtradb/include/univ.i325
-rw-r--r--storage/xtradb/include/usr0sess.h11
-rw-r--r--storage/xtradb/include/usr0sess.ic6
-rw-r--r--storage/xtradb/include/usr0types.h8
-rw-r--r--storage/xtradb/include/ut0bh.h8
-rw-r--r--storage/xtradb/include/ut0bh.ic8
-rw-r--r--storage/xtradb/include/ut0byte.h8
-rw-r--r--storage/xtradb/include/ut0byte.ic14
-rw-r--r--storage/xtradb/include/ut0counter.h203
-rw-r--r--storage/xtradb/include/ut0crc32.h51
-rw-r--r--storage/xtradb/include/ut0dbg.h18
-rw-r--r--storage/xtradb/include/ut0list.h26
-rw-r--r--storage/xtradb/include/ut0list.ic18
-rw-r--r--storage/xtradb/include/ut0lst.h409
-rw-r--r--storage/xtradb/include/ut0mem.h18
-rw-r--r--storage/xtradb/include/ut0mem.ic8
-rw-r--r--storage/xtradb/include/ut0rbt.h50
-rw-r--r--storage/xtradb/include/ut0rnd.h58
-rw-r--r--storage/xtradb/include/ut0rnd.ic90
-rw-r--r--storage/xtradb/include/ut0sort.h6
-rw-r--r--storage/xtradb/include/ut0ut.h147
-rw-r--r--storage/xtradb/include/ut0ut.ic19
-rw-r--r--storage/xtradb/include/ut0vec.h289
-rw-r--r--storage/xtradb/include/ut0vec.ic350
-rw-r--r--storage/xtradb/include/ut0wqueue.h32
228 files changed, 23438 insertions, 6494 deletions
diff --git a/storage/xtradb/include/api0api.h b/storage/xtradb/include/api0api.h
new file mode 100644
index 00000000000..1d6aaab60bc
--- /dev/null
+++ b/storage/xtradb/include/api0api.h
@@ -0,0 +1,1284 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/api0api.h
+InnoDB Native API
+
+2008-08-01 Created by Sunny Bains.
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+*******************************************************/
+
+#ifndef api0api_h
+#define api0api_h
+
+#include "db0err.h"
+#include <stdio.h>
+
+#ifdef _MSC_VER
+#define strncasecmp _strnicmp
+#define strcasecmp _stricmp
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+#define UNIV_NO_IGNORE __attribute__ ((warn_unused_result))
+#else
+#define UNIV_NO_IGNORE
+#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */
+
+/* See comment about ib_bool_t as to why the two macros are unsigned long. */
+/** The boolean value of "true" used internally within InnoDB */
+#define IB_TRUE 0x1UL
+/** The boolean value of "false" used internally within InnoDB */
+#define IB_FALSE 0x0UL
+
+/* Basic types used by the InnoDB API. */
+/** All InnoDB error codes are represented by ib_err_t */
+typedef enum dberr_t ib_err_t;
+/** Representation of a byte within InnoDB */
+typedef unsigned char ib_byte_t;
+/** Representation of an unsigned long int within InnoDB */
+typedef unsigned long int ib_ulint_t;
+
+/* We assume C99 support except when using VisualStudio. */
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif /* _MSC_VER */
+
+/* Integer types used by the API. Microsft VS defines its own types
+and we use the Microsoft types when building with Visual Studio. */
+#if defined(_MSC_VER)
+/** A signed 8 bit integral type. */
+typedef __int8 ib_i8_t;
+#else
+/** A signed 8 bit integral type. */
+typedef int8_t ib_i8_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 8 bit integral type. */
+typedef unsigned __int8 ib_u8_t;
+#else
+/** An unsigned 8 bit integral type. */
+typedef uint8_t ib_u8_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 16 bit integral type. */
+typedef __int16 ib_i16_t;
+#else
+/** A signed 16 bit integral type. */
+typedef int16_t ib_i16_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 16 bit integral type. */
+typedef unsigned __int16 ib_u16_t;
+#else
+/** An unsigned 16 bit integral type. */
+typedef uint16_t ib_u16_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 32 bit integral type. */
+typedef __int32 ib_i32_t;
+#else
+/** A signed 32 bit integral type. */
+typedef int32_t ib_i32_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 32 bit integral type. */
+typedef unsigned __int32 ib_u32_t;
+#else
+/** An unsigned 32 bit integral type. */
+typedef uint32_t ib_u32_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 64 bit integral type. */
+typedef __int64 ib_i64_t;
+#else
+/** A signed 64 bit integral type. */
+typedef int64_t ib_i64_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 64 bit integral type. */
+typedef unsigned __int64 ib_u64_t;
+#else
+/** An unsigned 64 bit integral type. */
+typedef uint64_t ib_u64_t;
+#endif
+
+typedef void* ib_opaque_t;
+typedef ib_opaque_t ib_charset_t;
+typedef ib_ulint_t ib_bool_t;
+typedef ib_u64_t ib_id_u64_t;
+
+/** @enum ib_cfg_type_t Possible types for a configuration variable. */
+typedef enum {
+ IB_CFG_IBOOL, /*!< The configuration parameter is
+ of type ibool */
+
+ /* XXX Can we avoid having different types for ulint and ulong?
+ - On Win64 "unsigned long" is 32 bits
+ - ulong is always defined as "unsigned long"
+ - On Win64 ulint is defined as 64 bit integer
+ => On Win64 ulint != ulong.
+ If we typecast all ulong and ulint variables to the smaller type
+ ulong, then we will cut the range of the ulint variables.
+ This is not a problem for most ulint variables because their max
+ allowed values do not exceed 2^32-1 (e.g. log_groups is ulint
+ but its max allowed value is 10). BUT buffer_pool_size and
+ log_file_size allow up to 2^64-1. */
+
+ IB_CFG_ULINT, /*!< The configuration parameter is
+ of type ulint */
+
+ IB_CFG_ULONG, /*!< The configuration parameter is
+ of type ulong */
+
+ IB_CFG_TEXT, /*!< The configuration parameter is
+ of type char* */
+
+ IB_CFG_CB /*!< The configuration parameter is
+ a callback parameter */
+} ib_cfg_type_t;
+
+/** @enum ib_col_type_t column types that are supported. */
+typedef enum {
+ IB_VARCHAR = 1, /*!< Character varying length. The
+ column is not padded. */
+
+ IB_CHAR = 2, /*!< Fixed length character string. The
+ column is padded to the right. */
+
+ IB_BINARY = 3, /*!< Fixed length binary, similar to
+ IB_CHAR but the column is not padded
+ to the right. */
+
+ IB_VARBINARY = 4, /*!< Variable length binary */
+
+ IB_BLOB = 5, /*!< Binary large object, or
+ a TEXT type */
+
+ IB_INT = 6, /*!< Integer: can be any size
+ from 1 - 8 bytes. If the size is
+ 1, 2, 4 and 8 bytes then you can use
+ the typed read and write functions. For
+ other sizes you will need to use the
+ ib_col_get_value() function and do the
+ conversion yourself. */
+
+ IB_SYS = 8, /*!< System column, this column can
+ be one of DATA_TRX_ID, DATA_ROLL_PTR
+ or DATA_ROW_ID. */
+
+ IB_FLOAT = 9, /*!< C (float) floating point value. */
+
+ IB_DOUBLE = 10, /*!> C (double) floating point value. */
+
+ IB_DECIMAL = 11, /*!< Decimal stored as an ASCII
+ string */
+
+ IB_VARCHAR_ANYCHARSET = 12, /*!< Any charset, varying length */
+
+ IB_CHAR_ANYCHARSET = 13 /*!< Any charset, fixed length */
+
+} ib_col_type_t;
+
+/** @enum ib_tbl_fmt_t InnoDB table format types */
+typedef enum {
+ IB_TBL_REDUNDANT, /*!< Redundant row format, the column
+ type and length is stored in the row.*/
+
+ IB_TBL_COMPACT, /*!< Compact row format, the column
+ type is not stored in the row. The
+ length is stored in the row but the
+ storage format uses a compact format
+ to store the length of the column data
+ and record data storage format also
+ uses less storage. */
+
+ IB_TBL_DYNAMIC, /*!< Compact row format. BLOB prefixes
+ are not stored in the clustered index */
+
+ IB_TBL_COMPRESSED /*!< Similar to dynamic format but
+ with pages compressed */
+} ib_tbl_fmt_t;
+
+/** @enum ib_col_attr_t InnoDB column attributes */
+typedef enum {
+ IB_COL_NONE = 0, /*!< No special attributes. */
+
+ IB_COL_NOT_NULL = 1, /*!< Column data can't be NULL. */
+
+ IB_COL_UNSIGNED = 2, /*!< Column is IB_INT and unsigned. */
+
+ IB_COL_NOT_USED = 4, /*!< Future use, reserved. */
+
+ IB_COL_CUSTOM1 = 8, /*!< Custom precision type, this is
+ a bit that is ignored by InnoDB and so
+ can be set and queried by users. */
+
+ IB_COL_CUSTOM2 = 16, /*!< Custom precision type, this is
+ a bit that is ignored by InnoDB and so
+ can be set and queried by users. */
+
+ IB_COL_CUSTOM3 = 32 /*!< Custom precision type, this is
+ a bit that is ignored by InnoDB and so
+ can be set and queried by users. */
+} ib_col_attr_t;
+
+/* Note: must match lock0types.h */
+/** @enum ib_lck_mode_t InnoDB lock modes. */
+typedef enum {
+ IB_LOCK_IS = 0, /*!< Intention shared, an intention
+ lock should be used to lock tables */
+
+ IB_LOCK_IX, /*!< Intention exclusive, an intention
+ lock should be used to lock tables */
+
+ IB_LOCK_S, /*!< Shared locks should be used to
+ lock rows */
+
+ IB_LOCK_X, /*!< Exclusive locks should be used to
+ lock rows*/
+
+ IB_LOCK_TABLE_X, /*!< exclusive table lock */
+
+ IB_LOCK_NONE, /*!< This is used internally to note
+ consistent read */
+
+ IB_LOCK_NUM = IB_LOCK_NONE /*!< number of lock modes */
+} ib_lck_mode_t;
+
+typedef enum {
+ IB_CLUSTERED = 1, /*!< clustered index */
+ IB_UNIQUE = 2 /*!< unique index */
+} ib_index_type_t;
+
+/** @enum ib_srch_mode_t InnoDB cursor search modes for ib_cursor_moveto().
+Note: Values must match those found in page0cur.h */
+typedef enum {
+ IB_CUR_G = 1, /*!< If search key is not found then
+ position the cursor on the row that
+ is greater than the search key */
+
+ IB_CUR_GE = 2, /*!< If the search key not found then
+ position the cursor on the row that
+ is greater than or equal to the search
+ key */
+
+ IB_CUR_L = 3, /*!< If search key is not found then
+ position the cursor on the row that
+ is less than the search key */
+
+ IB_CUR_LE = 4 /*!< If search key is not found then
+ position the cursor on the row that
+ is less than or equal to the search
+ key */
+} ib_srch_mode_t;
+
+/** @enum ib_match_mode_t Various match modes used by ib_cursor_moveto() */
+typedef enum {
+ IB_CLOSEST_MATCH, /*!< Closest match possible */
+
+ IB_EXACT_MATCH, /*!< Search using a complete key
+ value */
+
+ IB_EXACT_PREFIX /*!< Search using a key prefix which
+ must match to rows: the prefix may
+ contain an incomplete field (the
+ last field in prefix may be just
+ a prefix of a fixed length column) */
+} ib_match_mode_t;
+
+/** @struct ib_col_meta_t InnoDB column meta data. */
+typedef struct {
+ ib_col_type_t type; /*!< Type of the column */
+
+ ib_col_attr_t attr; /*!< Column attributes */
+
+ ib_u32_t type_len; /*!< Length of type */
+
+ ib_u16_t client_type; /*!< 16 bits of data relevant only to
+ the client. InnoDB doesn't care */
+
+ ib_charset_t* charset; /*!< Column charset */
+} ib_col_meta_t;
+
+/* Note: Must be in sync with trx0trx.h */
+/** @enum ib_trx_state_t The transaction state can be queried using the
+ib_trx_state() function. The InnoDB deadlock monitor can roll back a
+transaction and users should be prepared for this, especially where there
+is high contention. The way to determine the state of the transaction is to
+query it's state and check. */
+typedef enum {
+ IB_TRX_NOT_STARTED, /*!< Has not started yet, the
+ transaction has not ben started yet.*/
+
+ IB_TRX_ACTIVE, /*!< The transaction is currently
+ active and needs to be either
+ committed or rolled back. */
+
+ IB_TRX_COMMITTED_IN_MEMORY, /*!< Not committed to disk yet */
+
+ IB_TRX_PREPARED /*!< Support for 2PC/XA */
+} ib_trx_state_t;
+
+/* Note: Must be in sync with trx0trx.h */
+/** @enum ib_trx_level_t Transaction isolation levels */
+typedef enum {
+ IB_TRX_READ_UNCOMMITTED = 0, /*!< Dirty read: non-locking SELECTs are
+ performed so that we do not look at a
+ possible earlier version of a record;
+ thus they are not 'consistent' reads
+ under this isolation level; otherwise
+ like level 2 */
+
+ IB_TRX_READ_COMMITTED = 1, /*!< Somewhat Oracle-like isolation,
+ except that in range UPDATE and DELETE
+ we must block phantom rows with
+ next-key locks; SELECT ... FOR UPDATE
+ and ... LOCK IN SHARE MODE only lock
+ the index records, NOT the gaps before
+ them, and thus allow free inserting;
+ each consistent read reads its own
+ snapshot */
+
+ IB_TRX_REPEATABLE_READ = 2, /*!< All consistent reads in the same
+ trx read the same snapshot; full
+ next-key locking used in locking reads
+ to block insertions into gaps */
+
+ IB_TRX_SERIALIZABLE = 3 /*!< All plain SELECTs are converted to
+ LOCK IN SHARE MODE reads */
+} ib_trx_level_t;
+
+/** Generical InnoDB callback prototype. */
+typedef void (*ib_cb_t)(void);
+
+#define IB_CFG_BINLOG_ENABLED 0x1
+#define IB_CFG_MDL_ENABLED 0x2
+#define IB_CFG_DISABLE_ROWLOCK 0x4
+
+/** The first argument to the InnoDB message logging function. By default
+it's set to stderr. You should treat ib_msg_stream_t as a void*, since
+it will probably change in the future. */
+typedef FILE* ib_msg_stream_t;
+
+/** All log messages are written to this function.It should have the same
+behavior as fprintf(3). */
+typedef int (*ib_msg_log_t)(ib_msg_stream_t, const char*, ...);
+
+/* Note: This is to make it easy for API users to have type
+checking for arguments to our functions. Making it ib_opaque_t
+by itself will result in pointer decay resulting in subverting
+of the compiler's type checking. */
+
+/** InnoDB tuple handle. This handle can refer to either a cluster index
+tuple or a secondary index tuple. There are two types of tuples for each
+type of index, making a total of four types of tuple handles. There
+is a tuple for reading the entire row contents and another for searching
+on the index key. */
+typedef struct ib_tuple_t* ib_tpl_t;
+
+/** InnoDB transaction handle, all database operations need to be covered
+by transactions. This handle represents a transaction. The handle can be
+created with ib_trx_begin(), you commit your changes with ib_trx_commit()
+and undo your changes using ib_trx_rollback(). If the InnoDB deadlock
+monitor rolls back the transaction then you need to free the transaction
+using the function ib_trx_release(). You can query the state of an InnoDB
+transaction by calling ib_trx_state(). */
+typedef struct trx_t* ib_trx_t;
+
+/** InnoDB cursor handle */
+typedef struct ib_cursor_t* ib_crsr_t;
+
+/*************************************************************//**
+This function is used to compare two data fields for which the data type
+is such that we must use the client code to compare them.
+
+@param col_meta column meta data
+@param p1 key
+@oaram p1_len key length
+@param p2 second key
+@param p2_len second key length
+@return 1, 0, -1, if a is greater, equal, less than b, respectively */
+
+typedef int (*ib_client_cmp_t)(
+ const ib_col_meta_t* col_meta,
+ const ib_byte_t* p1,
+ ib_ulint_t p1_len,
+ const ib_byte_t* p2,
+ ib_ulint_t p2_len);
+
+/* This should be the same as univ.i */
+/** Represents SQL_NULL length */
+#define IB_SQL_NULL 0xFFFFFFFF
+/** The number of system columns in a row. */
+#define IB_N_SYS_COLS 3
+
+/** The maximum length of a text column. */
+#define MAX_TEXT_LEN 4096
+
+/* MySQL uses 3 byte UTF-8 encoding. */
+/** The maximum length of a column name in a table schema. */
+#define IB_MAX_COL_NAME_LEN (64 * 3)
+
+/** The maximum length of a table name (plus database name). */
+#define IB_MAX_TABLE_NAME_LEN (64 * 3) * 2
+
+/*****************************************************************//**
+Start a transaction that's been rolled back. This special function
+exists for the case when InnoDB's deadlock detector has rolledack
+a transaction. While the transaction has been rolled back the handle
+is still valid and can be reused by calling this function. If you
+don't want to reuse the transaction handle then you can free the handle
+by calling ib_trx_release().
+@return innobase txn handle */
+
+ib_err_t
+ib_trx_start(
+/*=========*/
+ ib_trx_t ib_trx, /*!< in: transaction to restart */
+ ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
+ void* thd); /*!< in: THD */
+
+/*****************************************************************//**
+Begin a transaction. This will allocate a new transaction handle and
+put the transaction in the active state.
+@return innobase txn handle */
+
+ib_trx_t
+ib_trx_begin(
+/*=========*/
+ ib_trx_level_t ib_trx_level); /*!< in: trx isolation level */
+
+/*****************************************************************//**
+Query the transaction's state. This function can be used to check for
+the state of the transaction in case it has been rolled back by the
+InnoDB deadlock detector. Note that when a transaction is selected as
+a victim for rollback, InnoDB will always return an appropriate error
+code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and
+@see DB_LOCK_WAIT_TIMEOUT
+@return transaction state */
+
+ib_trx_state_t
+ib_trx_state(
+/*=========*/
+ ib_trx_t ib_trx); /*!< in: trx handle */
+
+/*****************************************************************//**
+Release the resources of the transaction. If the transaction was
+selected as a victim by InnoDB and rolled back then use this function
+to free the transaction handle.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_release(
+/*===========*/
+ ib_trx_t ib_trx); /*!< in: trx handle */
+
+/*****************************************************************//**
+Commit a transaction. This function will release the schema latches too.
+It will also free the transaction handle.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_commit(
+/*==========*/
+ ib_trx_t ib_trx); /*!< in: trx handle */
+
+/*****************************************************************//**
+Rollback a transaction. This function will release the schema latches too.
+It will also free the transaction handle.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_rollback(
+/*============*/
+ ib_trx_t ib_trx); /*!< in: trx handle */
+
+/*****************************************************************//**
+Open an InnoDB table and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_table_using_id(
+/*==========================*/
+ ib_id_u64_t table_id, /*!< in: table id of table to open */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */
+
+/*****************************************************************//**
+Open an InnoDB index and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_index_using_id(
+/*==========================*/
+ ib_id_u64_t index_id, /*!< in: index id of index to open */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr); /*!< out: InnoDB cursor */
+
+/*****************************************************************//**
+Open an InnoDB secondary index cursor and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_index_using_name(
+/*============================*/
+ ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */
+ const char* index_name, /*!< in: secondary index name */
+ ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */
+ int* idx_type, /*!< out: index is cluster index */
+ ib_id_u64_t* idx_id); /*!< out: index id */
+
+/*****************************************************************//**
+Open an InnoDB table by name and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_table(
+/*=================*/
+ const char* name, /*!< in: table name */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */
+
+/*****************************************************************//**
+Reset the cursor.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_reset(
+/*============*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+
+/*****************************************************************//**
+set a cursor trx to NULL*/
+
+void
+ib_cursor_clear_trx(
+/*================*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Close an InnoDB table and free the cursor.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_close(
+/*============*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Close the table, decrement n_ref_count count.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_close_table(
+/*==================*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+update the cursor with new transactions and also reset the cursor
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_new_trx(
+/*==============*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_trx_t ib_trx); /*!< in: transaction */
+
+/*****************************************************************//**
+Commit the transaction in a cursor
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_commit_trx(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_trx_t ib_trx); /*!< in: transaction */
+
+/********************************************************************//**
+Open a table using the table name, if found then increment table ref count.
+@return table instance if found */
+
+void*
+ib_open_table_by_name(
+/*==================*/
+ const char* name); /*!< in: table name to lookup */
+
+/*****************************************************************//**
+Insert a row to a table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_insert_row(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */
+ const ib_tpl_t ib_tpl); /*!< in: tuple to insert */
+
+/*****************************************************************//**
+Update a row in a table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_update_row(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */
+ const ib_tpl_t ib_new_tpl); /*!< in: New tuple to update */
+
+/*****************************************************************//**
+Delete a row in a table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_delete_row(
+/*=================*/
+ ib_crsr_t ib_crsr); /*!< in: cursor instance */
+
+/*****************************************************************//**
+Read current row.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_read_row(
+/*===============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_tpl_t ib_tpl); /*!< out: read cols into this tuple */
+
+/*****************************************************************//**
+Move cursor to the first record in the table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_first(
+/*============*/
+ ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Move cursor to the last record in the table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_last(
+/*===========*/
+ ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Move cursor to the next record in the table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_next(
+/*===========*/
+ ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Search for key.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_moveto(
+/*=============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_tpl_t ib_tpl, /*!< in: Key to search for */
+ ib_srch_mode_t ib_srch_mode); /*!< in: search mode */
+
+/*****************************************************************//**
+Set the match mode for ib_cursor_move(). */
+
+void
+ib_cursor_set_match_mode(
+/*=====================*/
+ ib_crsr_t ib_crsr, /*!< in: Cursor instance */
+ ib_match_mode_t match_mode); /*!< in: ib_cursor_moveto match mode */
+
+/*****************************************************************//**
+Set a column of the tuple. Make a copy using the tuple's heap.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_col_set_value(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t col_no, /*!< in: column index in tuple */
+ const void* src, /*!< in: data value */
+ ib_ulint_t len, /*!< in: data value len */
+ ib_bool_t need_cpy); /*!< in: if need memcpy */
+
+
+/*****************************************************************//**
+Get the size of the data available in the column the tuple.
+@return bytes avail or IB_SQL_NULL */
+
+ib_ulint_t
+ib_col_get_len(
+/*===========*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i); /*!< in: column index in tuple */
+
+/*****************************************************************//**
+Copy a column value from the tuple.
+@return bytes copied or IB_SQL_NULL */
+
+ib_ulint_t
+ib_col_copy_value(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i, /*!< in: column index in tuple */
+ void* dst, /*!< out: copied data value */
+ ib_ulint_t len); /*!< in: max data value len to copy */
+
+/*************************************************************//**
+Read a signed int 8 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i8(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i8_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 8 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u8(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u8_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 16 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i16(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i16_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 16 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u16(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u16_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 32 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i32(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i32_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 32 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u32(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u32_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 64 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i64(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i64_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 64 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u64(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u64_t* ival); /*!< out: integer value */
+
+/*****************************************************************//**
+Get a column value pointer from the tuple.
+@return NULL or pointer to buffer */
+
+const void*
+ib_col_get_value(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i); /*!< in: column number */
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return len of column data */
+
+ib_ulint_t
+ib_col_get_meta(
+/*============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_col_meta_t* ib_col_meta); /*!< out: column meta data */
+
+/*****************************************************************//**
+"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
+@return new tuple, or NULL */
+
+ib_tpl_t
+ib_tuple_clear(
+/*============*/
+ ib_tpl_t ib_tpl); /*!< in: InnoDB tuple */
+
+/*****************************************************************//**
+Create a new cluster key search tuple and copy the contents of the
+secondary index key tuple columns that refer to the cluster index record
+to the cluster key. It does a deep copy of the column data.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_tuple_get_cluster_key(
+/*=====================*/
+ ib_crsr_t ib_crsr, /*!< in: secondary index cursor */
+ ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */
+ const ib_tpl_t ib_src_tpl); /*!< in: source tuple */
+
+/*****************************************************************//**
+Copy the contents of source tuple to destination tuple. The tuples
+must be of the same type and belong to the same table/index.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_tuple_copy(
+/*==========*/
+ ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */
+ const ib_tpl_t ib_src_tpl); /*!< in: source tuple */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return tuple for current index */
+
+ib_tpl_t
+ib_sec_search_tuple_create(
+/*=======================*/
+ ib_crsr_t ib_crsr); /*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return tuple for current index */
+
+ib_tpl_t
+ib_sec_read_tuple_create(
+/*=====================*/
+ ib_crsr_t ib_crsr); /*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for table key operations.
+@return tuple for current table */
+
+ib_tpl_t
+ib_clust_search_tuple_create(
+/*=========================*/
+ ib_crsr_t ib_crsr); /*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple for table row operations.
+@return tuple for current table */
+
+ib_tpl_t
+ib_clust_read_tuple_create(
+/*=======================*/
+ ib_crsr_t ib_crsr); /*!< in: Cursor instance */
+
+/*****************************************************************//**
+Return the number of user columns in the tuple definition.
+@return number of user columns */
+
+ib_ulint_t
+ib_tuple_get_n_user_cols(
+/*=====================*/
+ const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */
+
+/*****************************************************************//**
+Return the number of columns in the tuple definition.
+@return number of columns */
+
+ib_ulint_t
+ib_tuple_get_n_cols(
+/*================*/
+ const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */
+
+/*****************************************************************//**
+Destroy an InnoDB tuple. */
+
+void
+ib_tuple_delete(
+/*============*/
+ ib_tpl_t ib_tpl); /*!< in,own: Tuple instance to delete */
+
+/*****************************************************************//**
+Truncate a table. The cursor handle will be closed and set to NULL
+on success.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_truncate(
+/*===============*/
+ ib_crsr_t* ib_crsr, /*!< in/out: cursor for table
+ to truncate */
+ ib_id_u64_t* table_id); /*!< out: new table id */
+
+/*****************************************************************//**
+Get a table id.
+@return DB_SUCCESS if found */
+
+ib_err_t
+ib_table_get_id(
+/*============*/
+ const char* table_name, /*!< in: table to find */
+ ib_id_u64_t* table_id); /*!< out: table id if found */
+
+/*****************************************************************//**
+Get an index id.
+@return DB_SUCCESS if found */
+
+ib_err_t
+ib_index_get_id(
+/*============*/
+ const char* table_name, /*!< in: find index for this table */
+ const char* index_name, /*!< in: index to find */
+ ib_id_u64_t* index_id); /*!< out: index id if found */
+
+/*****************************************************************//**
+Check if cursor is positioned.
+@return IB_TRUE if positioned */
+
+ib_bool_t
+ib_cursor_is_positioned(
+/*====================*/
+ const ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Checks if the data dictionary is latched in exclusive mode by a
+user transaction.
+@return TRUE if exclusive latch */
+
+ib_bool_t
+ib_schema_lock_is_exclusive(
+/*========================*/
+ const ib_trx_t ib_trx); /*!< in: transaction */
+
+/*****************************************************************//**
+Lock an InnoDB cursor/table.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_lock(
+/*===========*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set the Lock an InnoDB table using the table id.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_table_lock(
+/*===========*/
+ ib_trx_t ib_trx, /*!< in/out: transaction */
+ ib_id_u64_t table_id, /*!< in: table id */
+ ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set the Lock mode of the cursor.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_set_lock_mode(
+/*====================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set need to access clustered index record flag. */
+
+void
+ib_cursor_set_cluster_access(
+/*=========================*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i8(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i8_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i16(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i16_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i32(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i32_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i64(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i64_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u8(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u8_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u16(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u16_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u32(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u32_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u64(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u64_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Inform the cursor that it's the start of an SQL statement. */
+
+void
+ib_cursor_stmt_begin(
+/*=================*/
+ ib_crsr_t ib_crsr); /*!< in: cursor */
+
+/*****************************************************************//**
+Write a double value to a column.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_write_double(
+/*==================*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ int col_no, /*!< in: column number */
+ double val); /*!< in: value to write */
+
+/*************************************************************//**
+Read a double column value from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_double(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t col_no, /*!< in: column number */
+ double* dval); /*!< out: double value */
+
+/*****************************************************************//**
+Write a float value to a column.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_write_float(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ float val); /*!< in: value to write */
+
+/*************************************************************//**
+Read a float value from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_float(
+/*================*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t col_no, /*!< in: column number */
+ float* fval); /*!< out: float value */
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return len of column data */
+
+const char*
+ib_col_get_name(
+/*============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_ulint_t i); /*!< in: column index in tuple */
+
+/*****************************************************************//**
+Get an index field name from the cursor.
+@return name of the field */
+
+const char*
+ib_get_idx_field_name(
+/*==================*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_ulint_t i); /*!< in: column index in tuple */
+
+/*****************************************************************//**
+Truncate a table.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_table_truncate(
+/*==============*/
+ const char* table_name, /*!< in: table name */
+ ib_id_u64_t* table_id); /*!< out: new table id */
+
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return DB_SUCCESS or error number */
+
+ib_err_t
+ib_close_thd(
+/*=========*/
+ void* thd); /*!< in: handle to the MySQL
+ thread of the user whose resources
+ should be free'd */
+
+/*****************************************************************//**
+Get generic configure status
+@return configure status*/
+
+int
+ib_cfg_get_cfg();
+/*============*/
+
+/*****************************************************************//**
+Check whether the table name conforms to our requirements. Currently
+we only do a simple check for the presence of a '/'.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_table_name_check(
+/*================*/
+ const char* name); /*!< in: table name to check */
+
+/*****************************************************************//**
+Return isolation configuration set by "innodb_api_trx_level"
+@return trx isolation level*/
+
+ib_trx_state_t
+ib_cfg_trx_level();
+/*==============*/
+
+/*****************************************************************//**
+Return configure value for background commit interval (in seconds)
+@return background commit interval (in seconds) */
+
+ib_ulint_t
+ib_cfg_bk_commit_interval();
+/*=======================*/
+
+/*****************************************************************//**
+Get a trx start time.
+@return trx start_time */
+
+ib_u64_t
+ib_trx_get_start_time(
+/*==================*/
+ ib_trx_t ib_trx); /*!< in: transaction */
+
+#endif /* api0api_h */
diff --git a/storage/xtradb/include/api0misc.h b/storage/xtradb/include/api0misc.h
new file mode 100644
index 00000000000..fcd748390d1
--- /dev/null
+++ b/storage/xtradb/include/api0misc.h
@@ -0,0 +1,78 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/api0misc.h
+InnoDB Native API
+
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+2008 Created by Sunny Bains
+*******************************************************/
+
+#ifndef api0misc_h
+#define api0misc_h
+
+#include "univ.i"
+#include "os0file.h"
+#include "que0que.h"
+#include "trx0trx.h"
+
+/** Whether binlog is enabled for applications using InnoDB APIs */
+extern my_bool ib_binlog_enabled;
+
+/** Whether MySQL MDL is enabled for applications using InnoDB APIs */
+extern my_bool ib_mdl_enabled;
+
+/** Whether InnoDB row lock is disabled for applications using InnoDB APIs */
+extern my_bool ib_disable_row_lock;
+
+/** configure value for transaction isolation level */
+extern ulong ib_trx_level_setting;
+
+/** configure value for background commit interval (in seconds) */
+extern ulong ib_bk_commit_interval;
+
+/********************************************************************
+Handles user errors and lock waits detected by the database engine.
+@return TRUE if it was a lock wait and we should continue running
+the query thread */
+UNIV_INTERN
+ibool
+ib_handle_errors(
+/*=============*/
+ dberr_t* new_err, /*!< out: possible new error
+ encountered in lock wait, or if
+ no new error, the value of
+ trx->error_state at the entry of this
+ function */
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t* thr, /*!< in: query thread */
+ trx_savept_t* savept); /*!< in: savepoint or NULL */
+
+/*************************************************************************
+Sets a lock on a table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+ib_trx_lock_table_with_retry(
+/*=========================*/
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in: table to lock */
+ enum lock_mode mode); /*!< in: lock mode */
+
+#endif /* api0misc_h */
diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h
index fb06a774b82..a3f7cee2733 100644
--- a/storage/xtradb/include/btr0btr.h
+++ b/storage/xtradb/include/btr0btr.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -95,6 +96,17 @@ insert/delete buffer when the record is not in the buffer pool. */
buffer when the record is not in the buffer pool. */
#define BTR_DELETE 8192
+/** In the case of BTR_SEARCH_LEAF or BTR_MODIFY_LEAF, the caller is
+already holding an S latch on the index tree */
+#define BTR_ALREADY_S_LATCHED 16384
+
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
+ ((latch_mode) & ~(BTR_INSERT \
+ | BTR_DELETE_MARK \
+ | BTR_DELETE \
+ | BTR_ESTIMATE \
+ | BTR_IGNORE_SEC_UNIQUE \
+ | BTR_ALREADY_S_LATCHED))
#endif /* UNIV_HOTBACKUP */
/**************************************************************//**
@@ -121,7 +133,7 @@ btr_corruption_report(
#ifdef UNIV_BLOB_DEBUG
# include "ut0rbt.h"
/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_struct
+struct btr_blob_dbg_t
{
unsigned blob_page_no:32; /*!< first BLOB page number */
unsigned ref_page_no:32; /*!< referring page number */
@@ -210,8 +222,32 @@ UNIV_INTERN
page_t*
btr_root_get(
/*=========*/
+ const dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+ const dict_index_t* index) /*!< in: index tree */
+ __attribute__((nonnull, warn_unused_result));
+
+/**************************************************************//**
+Gets the height of the B-tree (the level of the root, when the leaf
+level is assumed to be 0). The caller must hold an S or X latch on
+the index.
+@return tree height (level of the root) */
+UNIV_INTERN
+ulint
+btr_height_get(
+/*===========*/
dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
@@ -264,17 +300,6 @@ btr_block_get_func(
@return the uncompressed page frame */
# define btr_page_get(space,zip_size,page_no,mode,idx,mtr) \
buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,idx,mtr))
-/**************************************************************//**
-Sets the index id field of a page. */
-UNIV_INLINE
-void
-btr_page_set_index_id(
-/*==================*/
- page_t* page, /*!< in: page to be created */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- index_id_t id, /*!< in: index id */
- mtr_t* mtr); /*!< in: mtr */
#endif /* !UNIV_HOTBACKUP */
/**************************************************************//**
Gets the index id field of a page.
@@ -283,7 +308,8 @@ UNIV_INLINE
index_id_t
btr_page_get_index_id(
/*==================*/
- const page_t* page); /*!< in: index page */
+ const page_t* page) /*!< in: index page */
+ __attribute__((nonnull, pure, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/********************************************************//**
Gets the node level field in an index page.
@@ -292,16 +318,9 @@ UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
- const page_t* page); /*!< in: index page */
-/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ const page_t* page) /*!< in: index page */
+ __attribute__((nonnull, pure, warn_unused_result));
+#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
/********************************************************//**
Gets the next index page number.
@return next page number */
@@ -310,18 +329,8 @@ ulint
btr_page_get_next(
/*==============*/
const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Sets the next index page field. */
-UNIV_INLINE
-void
-btr_page_set_next(
-/*==============*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- ulint next, /*!< in: next page number */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************//**
Gets the previous index page number.
@return prev page number */
@@ -330,18 +339,8 @@ ulint
btr_page_get_prev(
/*==============*/
const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Sets the previous index page field. */
-UNIV_INLINE
-void
-btr_page_set_prev(
-/*==============*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- ulint prev, /*!< in: previous page number */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Gets pointer to the previous user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor.
@@ -351,8 +350,9 @@ rec_t*
btr_get_prev_user_rec(
/*==================*/
rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
+ mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
needed, also to the previous page */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Gets pointer to the next user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor.
@@ -362,8 +362,9 @@ rec_t*
btr_get_next_user_rec(
/*==================*/
rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
+ mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
needed, also to the next page */
+ __attribute__((nonnull, warn_unused_result));
/**************************************************************//**
Releases the latch on a leaf page and bufferunfixes it. */
UNIV_INLINE
@@ -373,7 +374,8 @@ btr_leaf_page_release(
buf_block_t* block, /*!< in: buffer block */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
/**************************************************************//**
Gets the child node file address in a node pointer.
NOTE: the offsets array must contain all offsets for the record since
@@ -386,19 +388,8 @@ ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
const rec_t* rec, /*!< in: node pointer record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**************************************************************//**
-Creates a new index page (not the root, and also not
-used in page reorganization). @see btr_page_empty(). */
-UNIV_INTERN
-void
-btr_page_create(
-/*============*/
- buf_block_t* block, /*!< in/out: page to be created */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr); /*!< in: mtr */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/************************************************************//**
Creates the root node for a new index tree.
@return page number of the created root, FIL_NULL if did not succeed */
@@ -412,7 +403,8 @@ btr_create(
or 0 for uncompressed pages */
index_id_t index_id,/*!< in: index id */
dict_index_t* index, /*!< in: index */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+ __attribute__((nonnull));
/************************************************************//**
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
@@ -434,7 +426,8 @@ btr_free_root(
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint root_page_no, /*!< in: root page number */
- mtr_t* mtr); /*!< in/out: mini-transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts
the tuple. It is assumed that mtr contains an x-latch on the tree.
@@ -446,38 +439,63 @@ UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Reorganizes an index page.
-IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
-page of a non-clustered index, the caller must update the insert
-buffer free bits in the same mini-transaction in such a way that the
-modification will be redo-logged.
-@return TRUE on success, FALSE on failure */
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
+UNIV_INTERN
+bool
+btr_page_reorganize_low(
+/*====================*/
+ bool recovery,/*!< in: true if called in recovery:
+ locks should not be updated, i.e.,
+ there cannot exist locks on the
+ page, and a hash index should not be
+ dropped: it cannot exist */
+ ulint z_level,/*!< in: compression level to be used
+ if dealing with compressed page */
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ dict_index_t* index, /*!< in: the index tree of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Reorganizes an index page.
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
UNIV_INTERN
-ibool
+bool
btr_page_reorganize(
/*================*/
- buf_block_t* block, /*!< in: page to be reorganized */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Empties an index page. @see btr_page_create(). */
-UNIV_INTERN
-void
-btr_page_empty(
-/*===========*/
- buf_block_t* block, /*!< in: page to be emptied */
- page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
- dict_index_t* index, /*!< in: index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr); /*!< in: mtr */
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ dict_index_t* index, /*!< in: the index tree of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to left.
@@ -487,9 +505,10 @@ ibool
btr_page_get_split_rec_to_left(
/*===========================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec);/*!< out: if split recommended,
+ rec_t** split_rec)/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to right.
@@ -499,9 +518,10 @@ ibool
btr_page_get_split_rec_to_right(
/*============================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec);/*!< out: if split recommended,
+ rec_t** split_rec)/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
@@ -515,12 +535,17 @@ UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
@@ -528,29 +553,17 @@ UNIV_INTERN
void
btr_insert_on_non_leaf_level_func(
/*==============================*/
+ ulint flags, /*!< in: undo logging and locking flags */
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-# define btr_insert_on_non_leaf_level(i,l,t,m) \
- btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
+# define btr_insert_on_non_leaf_level(f,i,l,t,m) \
+ btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m)
#endif /* !UNIV_HOTBACKUP */
-/**************************************************************//**
-Attaches the halves of an index page on the appropriate level in an
-index tree. */
-UNIV_INTERN
-void
-btr_attach_half_pages(
-/*==================*/
- dict_index_t* index, /*!< in: the index tree */
- buf_block_t* block, /*!< in/out: page to be split */
- rec_t* split_rec, /*!< in: first record on upper
- half page */
- buf_block_t* new_block, /*!< in/out: the new half page */
- ulint direction, /*!< in: FSP_UP or FSP_DOWN */
- mtr_t* mtr); /*!< in: mtr */
/****************************************************************//**
Sets a record as the predefined minimum record. */
UNIV_INTERN
@@ -558,7 +571,8 @@ void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /*!< in/out: record */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Deletes on the upper level the node pointer to a page. */
@@ -568,7 +582,8 @@ btr_node_ptr_delete(
/*================*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#ifdef UNIV_DEBUG
/************************************************************//**
Checks that the node pointer to a page is appropriate.
@@ -579,7 +594,8 @@ btr_check_node_ptr(
/*===============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: index page */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/*************************************************************//**
Tries to merge the page first to the left immediate brother if such a
@@ -613,7 +629,8 @@ btr_discard_page(
/*=============*/
btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
the root page */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Parses the redo log record for setting an index record as the predefined
@@ -627,7 +644,8 @@ btr_parse_set_min_rec_mark(
byte* end_ptr,/*!< in: buffer end */
ulint comp, /*!< in: nonzero=compact page format */
page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+ __attribute__((nonnull(1,2), warn_unused_result));
/***********************************************************//**
Parses a redo log record of reorganizing a page.
@return end of log record or NULL */
@@ -638,8 +656,10 @@ btr_parse_page_reorganize(
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
dict_index_t* index, /*!< in: record descriptor */
+ bool compressed,/*!< in: true if compressed page */
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+ __attribute__((nonnull(1,2,3), warn_unused_result));
#ifndef UNIV_HOTBACKUP
/**************************************************************//**
Gets the number of pages in a B-tree.
@@ -685,7 +705,8 @@ btr_page_free(
/*==========*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
/**************************************************************//**
Frees a file page used in an index tree. Can be used also to BLOB
external storage pages, because the page level 0 can be given as an
@@ -697,7 +718,8 @@ btr_page_free_low(
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#ifdef UNIV_BTR_PRINT
/*************************************************************//**
Prints size info of a B-tree. */
@@ -705,7 +727,8 @@ UNIV_INTERN
void
btr_print_size(
/*===========*/
- dict_index_t* index); /*!< in: index tree */
+ dict_index_t* index) /*!< in: index tree */
+ __attribute__((nonnull));
/**************************************************************//**
Prints directories and other info of all nodes in the index. */
UNIV_INTERN
@@ -713,8 +736,9 @@ void
btr_print_index(
/*============*/
dict_index_t* index, /*!< in: index */
- ulint width); /*!< in: print this many entries from start
+ ulint width) /*!< in: print this many entries from start
and end */
+ __attribute__((nonnull));
#endif /* UNIV_BTR_PRINT */
/************************************************************//**
Checks the size and number of fields in a record based on the definition of
@@ -726,18 +750,20 @@ btr_index_rec_validate(
/*===================*/
const rec_t* rec, /*!< in: index record */
const dict_index_t* index, /*!< in: index */
- ibool dump_on_error); /*!< in: TRUE if the function
+ ibool dump_on_error) /*!< in: TRUE if the function
should print hex dump of record
and page on error */
+ __attribute__((nonnull, warn_unused_result));
/**************************************************************//**
Checks the consistency of an index tree.
@return TRUE if ok */
UNIV_INTERN
-ibool
+bool
btr_validate_index(
/*===============*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx); /*!< in: transaction or NULL */
+ dict_index_t* index, /*!< in: index */
+ const trx_t* trx) /*!< in: transaction or 0 */
+ __attribute__((nonnull(1), warn_unused_result));
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2
diff --git a/storage/xtradb/include/btr0btr.ic b/storage/xtradb/include/btr0btr.ic
index 21eaa9bd026..9cc611ee450 100644
--- a/storage/xtradb/include/btr0btr.ic
+++ b/storage/xtradb/include/btr0btr.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -83,7 +83,7 @@ btr_page_set_index_id(
index_id_t id, /*!< in: index id */
mtr_t* mtr) /*!< in: mtr */
{
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_INDEX_ID),
@@ -128,22 +128,6 @@ btr_page_get_level_low(
}
/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr __attribute__((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- return(btr_page_get_level_low(page));
-}
-
-/********************************************************//**
Sets the node level field in an index page. */
UNIV_INLINE
void
@@ -158,7 +142,7 @@ btr_page_set_level(
ut_ad(page && mtr);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_LEVEL),
@@ -201,7 +185,7 @@ btr_page_set_next(
{
ut_ad(page && mtr);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
mach_write_to_4(page + FIL_PAGE_NEXT, next);
page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
} else {
@@ -238,7 +222,7 @@ btr_page_set_prev(
{
ut_ad(page && mtr);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
mach_write_to_4(page + FIL_PAGE_PREV, prev);
page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
} else {
@@ -274,12 +258,13 @@ btr_node_ptr_get_child_page_no(
page_no = mach_read_from_4(field);
- if (UNIV_UNLIKELY(page_no == 0)) {
+ if (page_no == 0) {
fprintf(stderr,
"InnoDB: a nonsensical page number 0"
" in a node ptr record at offset %lu\n",
(ulong) page_offset(rec));
buf_page_print(page_align(rec), 0, 0);
+ ut_ad(0);
}
return(page_no);
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h
index 97929d44159..cf7c1a24139 100644
--- a/storage/xtradb/include/btr0cur.h
+++ b/storage/xtradb/include/btr0cur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -31,14 +31,26 @@ Created 10/16/1994 Heikki Tuuri
#include "page0cur.h"
#include "btr0types.h"
-/* Mode flags for btr_cur operations; these can be ORed */
-#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
-#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
-#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
- update vector or inserted entry */
-#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update()
- must keep cursor position when
- moving columns to big_rec */
+/** Mode flags for btr_cur operations; these can be ORed */
+enum {
+ /** do no undo logging */
+ BTR_NO_UNDO_LOG_FLAG = 1,
+ /** do no record lock checking */
+ BTR_NO_LOCKING_FLAG = 2,
+ /** sys fields will be found in the update vector or inserted
+ entry */
+ BTR_KEEP_SYS_FLAG = 4,
+ /** btr_cur_pessimistic_update() must keep cursor position
+ when moving columns to big_rec */
+ BTR_KEEP_POS_FLAG = 8,
+ /** the caller is creating the index or wants to bypass the
+ index->info.online creation log */
+ BTR_CREATE_FLAG = 16,
+ /** the caller of btr_cur_optimistic_update() or
+ btr_cur_update_in_place() will take care of
+ updating IBUF_BITMAP_FREE */
+ BTR_KEEP_IBUF_BITMAP = 32
+};
#ifndef UNIV_HOTBACKUP
#include "que0types.h"
@@ -57,9 +69,6 @@ page_cur_t*
btr_cur_get_page_cur(
/*=================*/
const btr_cur_t* cursor);/*!< in: tree cursor */
-#else /* UNIV_DEBUG */
-# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
-#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the buffer block on which the tree cursor is positioned.
@return pointer to buffer block */
@@ -67,7 +76,7 @@ UNIV_INLINE
buf_block_t*
btr_cur_get_block(
/*==============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
+ const btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the record pointer of a tree cursor.
@return pointer to record */
@@ -75,7 +84,12 @@ UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
+ const btr_cur_t* cursor);/*!< in: tree cursor */
+#else /* UNIV_DEBUG */
+# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
+# define btr_cur_get_block(cursor) ((cursor)->page_cur.block)
+# define btr_cur_get_rec(cursor) ((cursor)->page_cur.rec)
+#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the compressed page on which the tree cursor is positioned.
@return pointer to compressed page, or NULL if the page is not compressed */
@@ -101,12 +115,9 @@ btr_cur_get_page(
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the index of a cursor.
+@param cursor b-tree cursor
@return index */
-UNIV_INLINE
-dict_index_t*
-btr_cur_get_index(
-/*==============*/
- btr_cur_t* cursor);/*!< in: B-tree cursor */
+#define btr_cur_get_index(cursor) ((cursor)->index)
/*********************************************************//**
Positions a tree cursor at a given record. */
UNIV_INLINE
@@ -165,16 +176,19 @@ UNIV_INTERN
void
btr_cur_open_at_index_side_func(
/*============================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
+ bool from_left, /*!< in: true if open to the low end,
+ false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
- btr_cur_t* cursor, /*!< in: cursor */
+ btr_cur_t* cursor, /*!< in/out: cursor */
+ ulint level, /*!< in: level to search for
+ (0=leaf) */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-#define btr_cur_open_at_index_side(f,i,l,c,m) \
- btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m)
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
+#define btr_cur_open_at_index_side(f,i,l,c,lv,m) \
+ btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INTERN
@@ -197,7 +211,7 @@ one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record.
@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_optimistic_insert(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
@@ -205,6 +219,8 @@ btr_cur_optimistic_insert(
specified */
btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
@@ -213,11 +229,13 @@ btr_cur_optimistic_insert(
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr); /*!< in: mtr; if this function returns
- DB_SUCCESS on a leaf page of a secondary
- index in a compressed tablespace, the
- mtr must be committed before latching
+ mtr_t* mtr) /*!< in/out: mini-transaction;
+ if this function returns DB_SUCCESS on
+ a leaf page of a secondary index in a
+ compressed tablespace, the caller must
+ mtr_commit(mtr) before latching
any further pages */
+ __attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
/*************************************************************//**
Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
@@ -225,7 +243,7 @@ made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_pessimistic_insert(
/*=======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
@@ -236,6 +254,9 @@ btr_cur_pessimistic_insert(
insertion will certainly succeed */
btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
@@ -244,64 +265,105 @@ btr_cur_pessimistic_insert(
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
/*************************************************************//**
See if there is enough place in the page modification log to log
an update-in-place.
-@return TRUE if enough place */
+
+@retval false if out of space; IBUF_BITMAP_FREE will be reset
+outside mtr if the page was recompressed
+@retval true if enough place;
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
+a secondary index leaf page. This has to be done either within the
+same mini-transaction, or by invoking ibuf_reset_free_bits() before
+mtr_commit(mtr). */
UNIV_INTERN
-ibool
-btr_cur_update_alloc_zip(
-/*=====================*/
+bool
+btr_cur_update_alloc_zip_func(
+/*==========================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
- buf_block_t* block, /*!< in/out: buffer page */
- dict_index_t* index, /*!< in: the index corresponding to the block */
+ page_cur_t* cursor, /*!< in/out: B-tree page cursor */
+ dict_index_t* index, /*!< in: the index corresponding to cursor */
+#ifdef UNIV_DEBUG
+ ulint* offsets,/*!< in/out: offsets of the cursor record */
+#endif /* UNIV_DEBUG */
ulint length, /*!< in: size needed */
- ibool create, /*!< in: TRUE=delete-and-insert,
- FALSE=update-in-place */
- mtr_t* mtr, /*!< in: mini-transaction */
+ bool create, /*!< in: true=delete-and-insert,
+ false=update-in-place */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
trx_t* trx) /*!< in: NULL or transaction */
- __attribute__((nonnull (1, 2, 3, 6), warn_unused_result));
+#ifdef UNIV_DEBUG
+ __attribute__((nonnull (1, 2, 3, 4, 7), warn_unused_result));
+#else
+ __attribute__((nonnull (1, 2, 3, 6), warn_unused_result));
+#endif
+
+#ifdef UNIV_DEBUG
+# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr,trx) \
+ btr_cur_update_alloc_zip_func(page_zip,cursor,index,offsets,len,cr,mtr,trx)
+#else /* UNIV_DEBUG */
+# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr,trx) \
+ btr_cur_update_alloc_zip_func(page_zip,cursor,index,len,cr,mtr,trx)
+#endif /* UNIV_DEBUG */
/*************************************************************//**
Updates a record when the update causes no size changes in its fields.
-@return DB_SUCCESS or error number */
+@return locking or undo log related error code, or
+@retval DB_SUCCESS on success
+@retval DB_ZIP_OVERFLOW if there is not enough space left
+on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_update_in_place(
/*====================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
+ ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
- latching any further pages */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in/out: mini-transaction; if this
+ is a secondary index, the caller must
+ mtr_commit(mtr) before latching any
+ further pages */
+ __attribute__((warn_unused_result, nonnull));
/*************************************************************//**
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended.
-@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
-DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
-there is not enough space left on the compressed page */
+@return error code, including
+@retval DB_SUCCESS on success
+@retval DB_OVERFLOW if the updated record does not fit
+@retval DB_UNDERFLOW if the page would become too empty
+@retval DB_ZIP_OVERFLOW if there is not enough space left
+on the compressed page */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_optimistic_update(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
- latching any further pages */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in/out: mini-transaction; if this
+ is a secondary index, the caller must
+ mtr_commit(mtr) before latching any
+ further pages */
+ __attribute__((warn_unused_result, nonnull));
/*************************************************************//**
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
@@ -309,7 +371,7 @@ update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
@@ -317,7 +379,13 @@ btr_cur_pessimistic_update(
btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
cursor may become invalid if *big_rec == NULL
|| !(flags & BTR_KEEP_POS_FLAG) */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** offsets_heap,
+ /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
+ mem_heap_t* entry_heap,
+ /*!< in/out: memory heap for allocating
+ big_rec and the index tuple */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
const upd_t* update, /*!< in: update vector; this is allowed also
@@ -326,8 +394,10 @@ btr_cur_pessimistic_update(
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
- latching any further pages */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in/out: mini-transaction; must be committed
+ before latching any further pages */
+ __attribute__((warn_unused_result, nonnull));
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -335,30 +405,29 @@ of the deleting transaction, and in the roll ptr field pointer to the
undo log record created.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_del_mark_set_clust_rec(
/*===========================*/
- ulint flags, /*!< in: undo logging and locking flags */
buf_block_t* block, /*!< in/out: buffer block of the record */
rec_t* rec, /*!< in/out: record */
dict_index_t* index, /*!< in: clustered index of the record */
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************//**
Sets a secondary index record delete mark to TRUE or FALSE.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_del_mark_set_sec_rec(
/*=========================*/
ulint flags, /*!< in: locking flag */
btr_cur_t* cursor, /*!< in: cursor */
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
@@ -384,16 +453,27 @@ but no latch on the whole tree.
@return TRUE if success, i.e., the page did not become too empty */
UNIV_INTERN
ibool
-btr_cur_optimistic_delete(
-/*======================*/
+btr_cur_optimistic_delete_func(
+/*===========================*/
btr_cur_t* cursor, /*!< in: cursor on the record to delete;
cursor stays valid: if deletion succeeds,
on function exit it points to the successor
of the deleted record */
- mtr_t* mtr); /*!< in: mtr; if this function returns
+# ifdef UNIV_DEBUG
+ ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
+# endif /* UNIV_DEBUG */
+ mtr_t* mtr) /*!< in: mtr; if this function returns
TRUE on a leaf page of a secondary
index, the mtr must be committed
before latching any further pages */
+ __attribute__((nonnull, warn_unused_result));
+# ifdef UNIV_DEBUG
+# define btr_cur_optimistic_delete(cursor, flags, mtr) \
+ btr_cur_optimistic_delete_func(cursor, flags, mtr)
+# else /* UNIV_DEBUG */
+# define btr_cur_optimistic_delete(cursor, flags, mtr) \
+ btr_cur_optimistic_delete_func(cursor, mtr)
+# endif /* UNIV_DEBUG */
/*************************************************************//**
Removes the record on which the tree cursor is positioned. Tries
to compress the page if its fillfactor drops below a threshold
@@ -406,7 +486,7 @@ UNIV_INTERN
ibool
btr_cur_pessimistic_delete(
/*=======================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+ dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
@@ -419,8 +499,10 @@ btr_cur_pessimistic_delete(
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
+ ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a redo log record of updating a record in-place.
@@ -474,8 +556,10 @@ btr_estimate_n_rows_in_range(
ulint mode2); /*!< in: search mode for range end */
/*******************************************************************//**
Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals.
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
+0..n_uniq-1) and the number of pages that were sampled is saved in
+index->stat_n_sample_sizes[].
If innodb_stats_method is nulls_ignored, we also record the number of
non-null values for each prefix and stored the estimates in
array index->stat_n_non_null_key_vals. */
@@ -529,7 +613,7 @@ The fields are stored on pages allocated from leaf node
file segment of the index tree.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
UNIV_INTERN
-enum db_err
+dberr_t
btr_store_big_rec_extern_fields(
/*============================*/
dict_index_t* index, /*!< in: index of rec; the index tree
@@ -596,6 +680,23 @@ btr_copy_externally_stored_field_prefix(
a lock or a page latch */
ulint local_len);/*!< in: length of data, in bytes */
/*******************************************************************//**
+Copies an externally stored field of a record to mem heap. The
+clustered index record must be protected by a lock or a page latch.
+@return the whole field copied to heap */
+UNIV_INTERN
+byte*
+btr_copy_externally_stored_field(
+/*=============================*/
+ ulint* len, /*!< out: length of the whole field */
+ const byte* data, /*!< in: 'internally' stored part of the
+ field containing also the reference to
+ the external part; must be protected by
+ a lock or a page latch */
+ ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
+ zero for uncompressed BLOBs */
+ ulint local_len,/*!< in: length of data */
+ mem_heap_t* heap); /*!< in: mem heap */
+/*******************************************************************//**
Copies an externally stored field of a record to mem heap.
@return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN
@@ -646,8 +747,7 @@ limit, merging it to a neighbor is tried */
/** A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
-typedef struct btr_path_struct btr_path_t;
-struct btr_path_struct{
+struct btr_path_t{
ulint nth_rec; /*!< index of the record
where the page cursor stopped on
this level (index in alphabetical
@@ -684,7 +784,7 @@ enum btr_cur_method {
/** The tree cursor: the definition appears here only for the compiler
to know struct size! */
-struct btr_cur_struct {
+struct btr_cur_t {
dict_index_t* index; /*!< index where positioned */
page_cur_t page_cur; /*!< page cursor */
purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
@@ -721,7 +821,7 @@ struct btr_cur_struct {
for comparison to the adjacent user
record if that record is on a
different leaf page! (See the note in
- row_ins_duplicate_key.) */
+ row_ins_duplicate_error_in_clust.) */
ulint up_bytes; /*!< number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
diff --git a/storage/xtradb/include/btr0cur.ic b/storage/xtradb/include/btr0cur.ic
index 5fc4651ca13..080866c7465 100644
--- a/storage/xtradb/include/btr0cur.ic
+++ b/storage/xtradb/include/btr0cur.ic
@@ -48,7 +48,7 @@ btr_cur_get_page_cur(
{
return(&((btr_cur_t*) cursor)->page_cur);
}
-#endif /* UNIV_DEBUG */
+
/*********************************************************//**
Returns the buffer block on which the tree cursor is positioned.
@return pointer to buffer block */
@@ -56,7 +56,7 @@ UNIV_INLINE
buf_block_t*
btr_cur_get_block(
/*==============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
+ const btr_cur_t* cursor) /*!< in: tree cursor */
{
return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
}
@@ -68,10 +68,11 @@ UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
+ const btr_cur_t* cursor) /*!< in: tree cursor */
{
- return(page_cur_get_rec(&(cursor->page_cur)));
+ return(page_cur_get_rec(btr_cur_get_page_cur(cursor)));
}
+#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the compressed page on which the tree cursor is positioned.
@@ -109,18 +110,6 @@ btr_cur_get_page(
}
/*********************************************************//**
-Returns the index of a cursor.
-@return index */
-UNIV_INLINE
-dict_index_t*
-btr_cur_get_index(
-/*==============*/
- btr_cur_t* cursor) /*!< in: B-tree cursor */
-{
- return(cursor->index);
-}
-
-/*********************************************************//**
Positions a tree cursor at a given record. */
UNIV_INLINE
void
diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h
index 4312f73ca4a..973fae382ab 100644
--- a/storage/xtradb/include/btr0pcur.h
+++ b/storage/xtradb/include/btr0pcur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -92,9 +92,10 @@ Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
-btr_pcur_open_func(
-/*===============*/
+btr_pcur_open_low(
+/*==============*/
dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level in the btree */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
@@ -108,7 +109,7 @@ btr_pcur_open_func(
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open(i,t,md,l,c,m) \
- btr_pcur_open_func(i,t,md,l,c,__FILE__,__LINE__,m)
+ btr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
Opens an persistent cursor to an index tree without initializing the
cursor. */
@@ -145,13 +146,16 @@ UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
+ bool from_left, /*!< in: true if open to the low end,
+ false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
- btr_pcur_t* pcur, /*!< in: cursor */
- ibool do_init, /*!< in: TRUE if should be initialized */
- mtr_t* mtr); /*!< in: mtr */
+ btr_pcur_t* pcur, /*!< in/out: cursor */
+ bool init_pcur, /*!< in: whether to initialize pcur */
+ ulint level, /*!< in: level to search for
+ (0=leaf) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/**************************************************************//**
Gets the up_match value for a pcur after a search.
@return number of matched fields at the cursor or to the right if
@@ -208,8 +212,17 @@ btr_pcur_open_at_rnd_pos_func(
#define btr_pcur_open_at_rnd_pos(i,l,c,m) \
btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
-Frees the possible old_rec_buf buffer of a persistent cursor and sets the
-latch mode of the persistent cursor to BTR_NO_LATCHES. */
+Frees the possible memory heap of a persistent cursor and sets the latch
+mode of the persistent cursor to BTR_NO_LATCHES.
+WARNING: this function does not release the latch on the page where the
+cursor is currently positioned. The latch is acquired by the
+"move to next/previous" family of functions. Since recursive shared locks
+are not allowed, you must take care (if using the cursor in S-mode) to
+manually release the latch by either calling
+btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
+or by committing the mini-transaction right after btr_pcur_close().
+A subsequent attempt to crawl the same page in the same mtr would cause
+an assertion failure. */
UNIV_INLINE
void
btr_pcur_close(
@@ -451,14 +464,14 @@ btr_pcur_move_to_prev_on_page(
/* The persistent B-tree cursor structure. This is used mainly for SQL
selects, updates, and deletes. */
-struct btr_pcur_struct{
+struct btr_pcur_t{
btr_cur_t btr_cur; /*!< a B-tree cursor */
ulint latch_mode; /*!< see TODO note below!
BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
BTR_MODIFY_TREE, or BTR_NO_LATCHES,
depending on the latching state of
the page and tree where the cursor is
- positioned; the last value means that
+ positioned; BTR_NO_LATCHES means that
the cursor is not currently positioned:
we say then that the cursor is
detached; it can be restored to
diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic
index 696dfc728dc..79afd7c322e 100644
--- a/storage/xtradb/include/btr0pcur.ic
+++ b/storage/xtradb/include/btr0pcur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -406,9 +406,10 @@ Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
-btr_pcur_open_func(
-/*===============*/
+btr_pcur_open_low(
+/*==============*/
dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level in the btree */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
@@ -428,14 +429,14 @@ btr_pcur_open_func(
btr_pcur_init(cursor);
- cursor->latch_mode = latch_mode;
+ cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
cursor->search_mode = mode;
/* Search with the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+ btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode,
btr_cursor, 0, file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
@@ -495,28 +496,26 @@ UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
+ bool from_left, /*!< in: true if open to the low end,
+ false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
- btr_pcur_t* pcur, /*!< in: cursor */
- ibool do_init, /*!< in: TRUE if should be initialized */
- mtr_t* mtr) /*!< in: mtr */
+ btr_pcur_t* pcur, /*!< in/out: cursor */
+ bool init_pcur, /*!< in: whether to initialize pcur */
+ ulint level, /*!< in: level to search for
+ (0=leaf) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- pcur->latch_mode = latch_mode;
+ pcur->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- if (from_left) {
- pcur->search_mode = PAGE_CUR_G;
- } else {
- pcur->search_mode = PAGE_CUR_L;
- }
+ pcur->search_mode = from_left ? PAGE_CUR_G : PAGE_CUR_L;
- if (do_init) {
+ if (init_pcur) {
btr_pcur_init(pcur);
}
btr_cur_open_at_index_side(from_left, index, latch_mode,
- btr_pcur_get_btr_cur(pcur), mtr);
+ btr_pcur_get_btr_cur(pcur), level, mtr);
pcur->pos_state = BTR_PCUR_IS_POSITIONED;
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
@@ -555,7 +554,16 @@ btr_pcur_open_at_rnd_pos_func(
/**************************************************************//**
Frees the possible memory heap of a persistent cursor and sets the latch
-mode of the persistent cursor to BTR_NO_LATCHES. */
+mode of the persistent cursor to BTR_NO_LATCHES.
+WARNING: this function does not release the latch on the page where the
+cursor is currently positioned. The latch is acquired by the
+"move to next/previous" family of functions. Since recursive shared locks
+are not allowed, you must take care (if using the cursor in S-mode) to
+manually release the latch by either calling
+btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
+or by committing the mini-transaction right after btr_pcur_close().
+A subsequent attempt to crawl the same page in the same mtr would cause
+an assertion failure. */
UNIV_INLINE
void
btr_pcur_close(
diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h
index 6fa7a2d87bf..9f9c2b04191 100644
--- a/storage/xtradb/include/btr0sea.h
+++ b/storage/xtradb/include/btr0sea.h
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -68,7 +68,8 @@ UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
- dict_index_t* index); /*!< in: index */
+ dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull));
/*****************************************************************//**
Creates and initializes a search info struct.
@return own: search info struct */
@@ -141,13 +142,6 @@ btr_search_drop_page_hash_index(
s- or x-latched, or an index page
for which we know that
block->buf_fix_count == 0 */
-/************************************************************************
-Drops a page hash index based on index */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_index_on_index(
-/*=====================================*/
- dict_index_t* index); /* in: record descriptor */
/********************************************************************//**
Drops a possible page hash index when a page is evicted from the buffer pool
or freed in a file segment. */
@@ -201,20 +195,24 @@ btr_search_validate(void);
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
/********************************************************************//**
-New functions to control split btr_search_index */
+Returns the adaptive hash index table for a given index key.
+@return the adaptive hash index table for a given index key */
UNIV_INLINE
hash_table_t*
btr_search_get_hash_table(
/*======================*/
const dict_index_t* index) /*!< in: index */
- __attribute__((nonnull,pure,warn_unused_result));
+ __attribute__((pure,warn_unused_result));
+/********************************************************************//**
+Returns the adaptive hash index latch for a given index key.
+@return the adaptive hash index latch for a given index key */
UNIV_INLINE
-rw_lock_t*
+prio_rw_lock_t*
btr_search_get_latch(
/*=================*/
const dict_index_t* index) /*!< in: index */
- __attribute__((nonnull,pure,warn_unused_result));
+ __attribute__((pure,warn_unused_result));
/*********************************************************************//**
Returns the AHI partition number corresponding to a given index ID. */
@@ -234,29 +232,45 @@ btr_search_index_init(
dict_index_t* index) /*!< in: index */
__attribute__((nonnull));
+/********************************************************************//**
+Latches all adaptive hash index latches in exclusive mode. */
UNIV_INLINE
void
btr_search_x_lock_all(void);
/*========================*/
+/********************************************************************//**
+Unlatches all adaptive hash index latches in exclusive mode. */
UNIV_INLINE
void
btr_search_x_unlock_all(void);
/*==========================*/
#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+Checks if the thread has locked all the adaptive hash index latches in the
+specified mode.
+
+@return true if all latches are locked by the current thread, false
+otherwise. */
+UNIV_INLINE
+bool
+btr_search_own_all(
+/*===============*/
+ ulint lock_type)
+ __attribute__((warn_unused_result));
/********************************************************************//**
Checks if the thread owns any adaptive hash latches in either S or X mode.
-@return TRUE if the thread owns at least one latch in any mode. */
+@return true if the thread owns at least one latch in any mode. */
UNIV_INLINE
-ibool
+bool
btr_search_own_any(void)
/*=====================*/
__attribute__((warn_unused_result));
#endif
/** The search info struct in an index */
-struct btr_search_struct{
+struct btr_search_t{
ulint ref_count; /*!< Number of blocks in this index tree
that have search index built
i.e. block->index points to this index.
@@ -305,19 +319,16 @@ struct btr_search_struct{
#endif /* UNIV_SEARCH_PERF_STAT */
#ifdef UNIV_DEBUG
ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */
-/** value of btr_search_struct::magic_n, used in assertions */
+/** value of btr_search_t::magic_n, used in assertions */
# define BTR_SEARCH_MAGIC_N 1112765
#endif /* UNIV_DEBUG */
};
/** The hash index system */
-typedef struct btr_search_sys_struct btr_search_sys_t;
-
-/** The hash index system */
-struct btr_search_sys_struct{
- hash_table_t** hash_tables; /*!< the array of adaptive hash index,
- tables mapping dtuple_fold values
- to rec_t pointers on index pages */
+struct btr_search_sys_t{
+ hash_table_t** hash_tables; /*!< the array of adaptive hash index
+ tables, mapping dtuple_fold values to
+ rec_t pointers on index pages */
};
/** The adaptive hash index */
diff --git a/storage/xtradb/include/btr0sea.ic b/storage/xtradb/include/btr0sea.ic
index 3f0dfdaa511..3cbcff75f31 100644
--- a/storage/xtradb/include/btr0sea.ic
+++ b/storage/xtradb/include/btr0sea.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -45,8 +45,6 @@ btr_search_get_info(
/*================*/
dict_index_t* index) /*!< in: index */
{
- ut_ad(index);
-
return(index->search_info);
}
@@ -62,8 +60,8 @@ btr_search_info_update(
btr_search_t* info;
#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX));
+ ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED));
+ ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
info = btr_search_get_info(index);
@@ -83,8 +81,9 @@ btr_search_info_update(
btr_search_info_update_slow(info, cursor);
}
-/*********************************************************************//**
-New functions to control split btr_search_index */
+/********************************************************************//**
+Returns the adaptive hash index table for a given index key.
+@return the adaptive hash index table for a given index key */
UNIV_INLINE
hash_table_t*
btr_search_get_hash_table(
@@ -97,8 +96,11 @@ btr_search_get_hash_table(
return(index->search_table);
}
+/********************************************************************//**
+Returns the adaptive hash index latch for a given index key.
+@return the adaptive hash index latch for a given index key */
UNIV_INLINE
-rw_lock_t*
+prio_rw_lock_t*
btr_search_get_latch(
/*=================*/
const dict_index_t* index) /*!< in: index */
@@ -138,6 +140,8 @@ btr_search_index_init(
btr_search_sys->hash_tables[btr_search_get_key(index->id)];
}
+/********************************************************************//**
+Latches all adaptive hash index latches in exclusive mode. */
UNIV_INLINE
void
btr_search_x_lock_all(void)
@@ -150,6 +154,8 @@ btr_search_x_lock_all(void)
}
}
+/********************************************************************//**
+Unlatches all adaptive hash index latches in exclusive mode. */
UNIV_INLINE
void
btr_search_x_unlock_all(void)
@@ -163,11 +169,34 @@ btr_search_x_unlock_all(void)
}
#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+Checks if the thread has locked all the adaptive hash index latches in the
+specified mode.
+
+@return true if all latches are locked by the current thread, false
+otherwise. */
+UNIV_INLINE
+bool
+btr_search_own_all(
+/*===============*/
+ ulint lock_type)
+{
+ ulint i;
+
+ for (i = 0; i < btr_search_index_num; i++) {
+ if (!rw_lock_own(&btr_search_latch_arr[i], lock_type)) {
+ return(false);
+ }
+ }
+
+ return(true);
+}
+
/********************************************************************//**
Checks if the thread owns any adaptive hash latches in either S or X mode.
-@return TRUE if the thread owns at least one latch in any mode. */
+@return true if the thread owns at least one latch in any mode. */
UNIV_INLINE
-ibool
+bool
btr_search_own_any(void)
/*====================*/
{
@@ -176,10 +205,10 @@ btr_search_own_any(void)
for (i = 0; i < btr_search_index_num; i++) {
if (rw_lock_own(&btr_search_latch_arr[i], RW_LOCK_SHARED) ||
rw_lock_own(&btr_search_latch_arr[i], RW_LOCK_EX)) {
- return(TRUE);
+ return(true);
}
}
- return(FALSE);
+ return(false);
}
-#endif
+#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/xtradb/include/btr0types.h b/storage/xtradb/include/btr0types.h
index a7cd64df276..cd0392e7951 100644
--- a/storage/xtradb/include/btr0types.h
+++ b/storage/xtradb/include/btr0types.h
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -33,11 +33,11 @@ Created 2/17/1996 Heikki Tuuri
#include "sync0rw.h"
/** Persistent cursor */
-typedef struct btr_pcur_struct btr_pcur_t;
+struct btr_pcur_t;
/** B-tree cursor */
-typedef struct btr_cur_struct btr_cur_t;
+struct btr_cur_t;
/** B-tree search information for the adaptive hash index */
-typedef struct btr_search_struct btr_search_t;
+struct btr_search_t;
#ifndef UNIV_HOTBACKUP
@@ -55,23 +55,21 @@ but do NOT protect:
Bear in mind (3) and (4) when using the hash indexes.
*/
-extern rw_lock_t* btr_search_latch_arr;
+extern prio_rw_lock_t* btr_search_latch_arr;
#endif /* UNIV_HOTBACKUP */
-/** The latch protecting the adaptive search system */
-//#define btr_search_latch (*btr_search_latch_temp)
-
/** Flag: has the search system been enabled?
Protected by btr_search_latch. */
extern char btr_search_enabled;
+/** Number of adaptive hash index partitions */
extern ulint btr_search_index_num;
#ifdef UNIV_BLOB_DEBUG
# include "buf0types.h"
/** An index->blobs entry for keeping track of off-page column references */
-typedef struct btr_blob_dbg_struct btr_blob_dbg_t;
+struct btr_blob_dbg_t;
/** Insert to index->blobs a reference to an off-page column.
@param index the index tree
diff --git a/storage/xtradb/include/buf0buddy.h b/storage/xtradb/include/buf0buddy.h
index 7060316dad9..a86fc87e3d3 100644
--- a/storage/xtradb/include/buf0buddy.h
+++ b/storage/xtradb/include/buf0buddy.h
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -36,8 +36,8 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
-buf_pool->mutex and must not hold buf_pool->zip_mutex or any
-block->mutex. The buf_pool->mutex may be released and reacquired.
+buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
+block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
@return allocated block, never NULL */
UNIV_INLINE
@@ -47,14 +47,13 @@ buf_buddy_alloc(
buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
the page resides */
ulint size, /*!< in: compressed page size
- (between PAGE_ZIP_MIN_SIZE and
+ (between UNIV_ZIP_SIZE_MIN and
UNIV_PAGE_SIZE) */
- ibool* lru, /*!< in: pointer to a variable
+ ibool* lru) /*!< in: pointer to a variable
that will be assigned TRUE if
storage was allocated from the
- LRU list and buf_pool->mutex was
- temporarily released */
- ibool have_page_hash_mutex)
+ LRU list and buf_pool->LRU_list_mutex
+ was temporarily released */
__attribute__((malloc, nonnull));
/**********************************************************************//**
@@ -67,9 +66,8 @@ buf_buddy_free(
the block resides */
void* buf, /*!< in: block to be freed, must not
be pointed to by the buffer pool */
- ulint size, /*!< in: block size,
+ ulint size) /*!< in: block size,
up to UNIV_PAGE_SIZE */
- ibool have_page_hash_mutex)
__attribute__((nonnull));
#ifndef UNIV_NONINL
diff --git a/storage/xtradb/include/buf0buddy.ic b/storage/xtradb/include/buf0buddy.ic
index d7053881caa..020442016d0 100644
--- a/storage/xtradb/include/buf0buddy.ic
+++ b/storage/xtradb/include/buf0buddy.ic
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -35,8 +35,8 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
-buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
-The buf_pool_mutex may be released and reacquired.
+buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
+block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired.
@return allocated block, never NULL */
UNIV_INTERN
void*
@@ -45,12 +45,11 @@ buf_buddy_alloc_low(
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
ulint i, /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
- ibool* lru, /*!< in: pointer to a variable that
+ ibool* lru) /*!< in: pointer to a variable that
will be assigned TRUE if storage was
allocated from the LRU list and
- buf_pool->mutex was temporarily
- released */
- ibool have_page_hash_mutex)
+ buf_pool->LRU_list_mutex was
+ temporarily released */
__attribute__((malloc, nonnull));
/**********************************************************************//**
@@ -62,9 +61,8 @@ buf_buddy_free_low(
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
void* buf, /*!< in: block to be freed, must not be
pointed to by the buffer pool */
- ulint i, /*!< in: index of buf_pool->zip_free[],
+ ulint i) /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
- ibool have_page_hash_mutex)
__attribute__((nonnull));
/**********************************************************************//**
@@ -79,7 +77,7 @@ buf_buddy_get_slot(
ulint i;
ulint s;
- ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+ ut_ad(size >= UNIV_ZIP_SIZE_MIN);
for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
}
@@ -90,8 +88,8 @@ buf_buddy_get_slot(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
-buf_pool->mutex and must not hold buf_pool->zip_mutex or any
-block->mutex. The buf_pool->mutex may be released and reacquired.
+buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
+block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
@return allocated block, never NULL */
UNIV_INLINE
@@ -101,22 +99,21 @@ buf_buddy_alloc(
buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
the page resides */
ulint size, /*!< in: compressed page size
- (between PAGE_ZIP_MIN_SIZE and
+ (between UNIV_ZIP_SIZE_MIN and
UNIV_PAGE_SIZE) */
- ibool* lru, /*!< in: pointer to a variable
+ ibool* lru) /*!< in: pointer to a variable
that will be assigned TRUE if
storage was allocated from the
- LRU list and buf_pool->mutex was
- temporarily released */
- ibool have_page_hash_mutex)
+ LRU list and buf_pool->LRU_list_mutex
+ was temporarily released */
{
- //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
ut_ad(ut_is_2pow(size));
- ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+ ut_ad(size >= UNIV_ZIP_SIZE_MIN);
ut_ad(size <= UNIV_PAGE_SIZE);
return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
- lru, have_page_hash_mutex));
+ lru));
}
/**********************************************************************//**
@@ -129,28 +126,14 @@ buf_buddy_free(
the block resides */
void* buf, /*!< in: block to be freed, must not
be pointed to by the buffer pool */
- ulint size, /*!< in: block size,
+ ulint size) /*!< in: block size,
up to UNIV_PAGE_SIZE */
- ibool have_page_hash_mutex)
{
- //ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(ut_is_2pow(size));
- ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+ ut_ad(size >= UNIV_ZIP_SIZE_MIN);
ut_ad(size <= UNIV_PAGE_SIZE);
- if (!have_page_hash_mutex) {
- mutex_enter(&buf_pool->LRU_list_mutex);
- rw_lock_x_lock(&buf_pool->page_hash_latch);
- }
-
- mutex_enter(&buf_pool->zip_free_mutex);
- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
- mutex_exit(&buf_pool->zip_free_mutex);
-
- if (!have_page_hash_mutex) {
- mutex_exit(&buf_pool->LRU_list_mutex);
- rw_lock_x_unlock(&buf_pool->page_hash_latch);
- }
+ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
}
#ifdef UNIV_MATERIALIZE
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index 701e820a23f..ba2f413429c 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -36,6 +36,7 @@ Created 11/5/1995 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "ut0rbt.h"
#include "os0proc.h"
+#include "log0log.h"
/** @name Modes for buf_page_get_gen */
/* @{ */
@@ -68,14 +69,18 @@ Created 11/5/1995 Heikki Tuuri
position of the block. */
/* @} */
-#define MAX_BUFFER_POOLS_BITS 6 /*!< Number of bits to representing
+#define MAX_BUFFER_POOLS_BITS 6 /*!< Number of bits to representing
a buffer pool ID */
-#define MAX_BUFFER_POOLS (1 << MAX_BUFFER_POOLS_BITS)
+
+#define MAX_BUFFER_POOLS (1 << MAX_BUFFER_POOLS_BITS)
/*!< The maximum number of buffer
pools that can be defined */
-#define BUF_POOL_WATCH_SIZE 1 /*!< Maximum number of concurrent
+#define BUF_POOL_WATCH_SIZE (srv_n_purge_threads + 1)
+ /*!< Maximum number of concurrent
buffer pool watches */
+#define MAX_PAGE_HASH_LOCKS 1024 /*!< The maximum number of
+ page_hash locks */
extern buf_pool_t* buf_pool_ptr; /*!< The buffer pools
of the database */
@@ -84,8 +89,6 @@ extern ibool buf_debug_prints;/*!< If this is set TRUE, the program
prints info whenever read or flush
occurs */
#endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_write_requests; /*!< variable to count write request
- issued */
extern ulint srv_buf_pool_instances;
extern ulint srv_buf_pool_curr_size;
#else /* !UNIV_HOTBACKUP */
@@ -97,13 +100,11 @@ extern buf_block_t* back_block2; /*!< second block, for page reorganize */
#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
/** @brief States of a control block
-@see buf_page_struct
+@see buf_page_t
The enumeration values must be 0..7. */
enum buf_page_state {
- BUF_BLOCK_ZIP_FREE = 0, /*!< contains a free
- compressed page */
- BUF_BLOCK_POOL_WATCH = 0, /*!< a sentinel for the buffer pool
+ BUF_BLOCK_POOL_WATCH, /*!< a sentinel for the buffer pool
watch, element of buf_pool->watch[] */
BUF_BLOCK_ZIP_PAGE, /*!< contains a clean
compressed page */
@@ -127,7 +128,7 @@ enum buf_page_state {
/** This structure defines information we will fetch from each buffer pool. It
will be used to print table IO stats */
-struct buf_pool_info_struct{
+struct buf_pool_info_t{
/* General buffer pool info */
ulint pool_unique_id; /*!< Buffer Pool ID */
ulint pool_size; /*!< Buffer Pool size in pages */
@@ -141,10 +142,12 @@ struct buf_pool_info_struct{
ulint n_pend_reads; /*!< buf_pool->n_pend_reads, pages
pending read */
ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */
+ ulint n_pending_flush_single_page;/*!< Pages pending to be
+ flushed as part of single page
+ flushes issued by various user
+ threads */
ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH
LIST */
- ulint n_pending_flush_single_page;/*!< Pages pending flush in
- BUF_FLUSH_SINGLE_PAGE list */
ulint n_pages_made_young; /*!< number of pages made young */
ulint n_pages_not_made_young; /*!< number of pages not made young */
ulint n_pages_read; /*!< buf_pool->n_pages_read */
@@ -197,51 +200,20 @@ struct buf_pool_info_struct{
interval */
};
-typedef struct buf_pool_info_struct buf_pool_info_t;
-
/** The occupied bytes of lists in all buffer pools */
-struct buf_pools_list_size_struct {
+struct buf_pools_list_size_t {
ulint LRU_bytes; /*!< LRU size in bytes */
ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */
ulint flush_list_bytes; /*!< flush_list size in bytes */
};
-typedef struct buf_pools_list_size_struct buf_pools_list_size_t;
-
#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Acquire mutex on all buffer pool instances */
-UNIV_INLINE
-void
-buf_pool_mutex_enter_all(void);
-/*===========================*/
-
-/********************************************************************//**
-Release mutex on all buffer pool instances */
-UNIV_INLINE
-void
-buf_pool_mutex_exit_all(void);
-/*==========================*/
-
-/********************************************************************//**
-*/
-UNIV_INLINE
-void
-buf_pool_page_hash_x_lock_all(void);
-/*================================*/
-
-/********************************************************************//**
-*/
-UNIV_INLINE
-void
-buf_pool_page_hash_x_unlock_all(void);
-/*==================================*/
/********************************************************************//**
Creates the buffer pool.
-@return own: buf_pool object, NULL if not enough memory or error */
+@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
UNIV_INTERN
-ulint
+dberr_t
buf_pool_init(
/*=========*/
ulint size, /*!< in: Size of the total pool in bytes */
@@ -295,9 +267,10 @@ Gets the smallest oldest_modification lsn for any page in the pool. Returns
zero if all modified pages have been flushed to disk.
@return oldest modification in pool, zero if none */
UNIV_INTERN
-ib_uint64_t
+lsn_t
buf_pool_get_oldest_modification(void);
/*==================================*/
+
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function. */
@@ -369,8 +342,7 @@ buf_page_optimistic_get(
/*====================*/
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
buf_block_t* block, /*!< in: guessed block */
- ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
- ..._GUESS_ON_CLOCK */
+ ib_uint64_t modify_clock,/*!< in: modify clock value */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mini-transaction */
@@ -392,7 +364,7 @@ buf_page_get_known_nowait(
/*******************************************************************//**
Given a tablespace id and page number tries to get that page. If the
page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the kernel mutex. */
+Suitable for using when holding the lock_sys_t::mutex. */
UNIV_INTERN
const buf_block_t*
buf_page_try_get_func(
@@ -404,7 +376,7 @@ buf_page_try_get_func(
mtr_t* mtr); /*!< in: mini-transaction */
/** Tries to get a page. If the page is not in the buffer pool it is
-not loaded. Suitable for using when holding the kernel mutex.
+not loaded. Suitable for using when holding the lock_sys_t::mutex.
@param space_id in: tablespace id
@param page_no in: page number
@param mtr in: mini-transaction
@@ -517,15 +489,6 @@ buf_page_peek(
/*==========*/
ulint space, /*!< in: space id */
ulint offset);/*!< in: page number */
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
/********************************************************************//**
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
@@ -598,14 +561,14 @@ Gets the youngest modification log sequence number for a frame.
Returns zero if not file page or no modification occurred yet.
@return newest modification to page */
UNIV_INLINE
-ib_uint64_t
+lsn_t
buf_page_get_newest_modification(
/*=============================*/
const buf_page_t* bpage); /*!< in: block containing the
page frame */
/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool->mutex and block bufferfix count has to be zero, (2) or own an x-lock
+LRU list mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
void
@@ -650,46 +613,18 @@ buf_block_buf_fix_inc_func(
# define buf_block_modify_clock_inc(block) ((void) 0)
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value
-on 32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- const byte* page); /*!< in: buffer page */
-UNIV_INTERN
-ulint
-buf_calc_page_new_checksum_32(
-/*==========================*/
- const byte* page); /*!< in: buffer page */
-/********************************************************************//**
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input!
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- const byte* page); /*!< in: buffer page */
-/********************************************************************//**
Checks if a page is corrupt.
@return TRUE if corrupted */
UNIV_INTERN
ibool
buf_page_is_corrupted(
/*==================*/
- ibool check_lsn, /*!< in: TRUE if we need to check
+ bool check_lsn, /*!< in: true if we need to check the
and complain about the LSN */
const byte* read_buf, /*!< in: a database page */
ulint zip_size) /*!< in: size of compressed page;
0 for uncompressed pages */
- __attribute__((warn_unused_result));
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
@@ -723,6 +658,17 @@ buf_pool_contains_zip(
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
const void* data); /*!< in: pointer to compressed page */
#endif /* UNIV_DEBUG */
+
+/***********************************************************************
+FIXME_FTS: Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+ /* out: pointer to frame */
+ byte* ptr); /* in: pointer to a frame */
+
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/*********************************************************************//**
Validates the buffer pool data structure.
@@ -757,10 +703,10 @@ buf_page_print(
const byte* read_buf, /*!< in: a database page */
ulint zip_size, /*!< in: compressed page size, or
0 for uncompressed pages */
- ulint flags) /*!< in: 0 or
+ ulint flags); /*!< in: 0 or
BUF_PAGE_PRINT_NO_CRASH or
BUF_PAGE_PRINT_NO_FULL */
- __attribute__((nonnull));
+
/********************************************************************//**
Decompress a block.
@return TRUE if successful */
@@ -781,12 +727,12 @@ buf_get_latched_pages_number(void);
/*==============================*/
#endif /* UNIV_DEBUG */
/*********************************************************************//**
-Returns the number of pending buf pool ios.
-@return number of pending I/O operations */
+Returns the number of pending buf pool read ios.
+@return number of pending read I/O operations */
UNIV_INTERN
ulint
-buf_get_n_pending_ios(void);
-/*=======================*/
+buf_get_n_pending_read_ios(void);
+/*============================*/
/*********************************************************************//**
Prints info of the buffer i/o. */
UNIV_INTERN
@@ -840,8 +786,8 @@ pool.
@return number of pending i/o operations */
UNIV_INTERN
ulint
-buf_pool_check_num_pending_io(void);
-/*===============================*/
+buf_pool_check_no_pending_io(void);
+/*==============================*/
/*********************************************************************//**
Invalidates the file pages in the buffer pool when an archive recovery is
completed. All the file pages buffered must be in a replaceable state when
@@ -928,26 +874,17 @@ buf_page_belongs_to_unzip_LRU(
Gets the mutex of a block.
@return pointer to mutex protecting bpage */
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
buf_page_get_mutex(
/*===============*/
const buf_page_t* bpage) /*!< in: pointer to control block */
__attribute__((pure));
-/*************************************************************************
-Gets the mutex of a block and enter the mutex with consistency. */
-UNIV_INLINE
-mutex_t*
-buf_page_get_mutex_enter(
-/*=========================*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
- __attribute__((pure));
-
/*********************************************************************//**
Get the flush type of a page.
@return flush type */
UNIV_INLINE
-enum buf_flush
+buf_flush_t
buf_page_get_flush_type(
/*====================*/
const buf_page_t* bpage) /*!< in: buffer page */
@@ -959,7 +896,7 @@ void
buf_page_set_flush_type(
/*====================*/
buf_page_t* bpage, /*!< in: buffer page */
- enum buf_flush flush_type); /*!< in: flush type */
+ buf_flush_t flush_type); /*!< in: flush type */
/*********************************************************************//**
Map a block to a file page. */
UNIV_INLINE
@@ -970,7 +907,7 @@ buf_block_set_file_page(
ulint space, /*!< in: tablespace id */
ulint page_no);/*!< in: page number */
/*********************************************************************//**
-Gets the io_fix state of a block. Requires that the block mutex is held.
+Gets the io_fix state of a block.
@return io_fix state */
UNIV_INLINE
enum buf_io_fix
@@ -979,17 +916,7 @@ buf_page_get_io_fix(
const buf_page_t* bpage) /*!< in: pointer to the control block */
__attribute__((pure));
/*********************************************************************//**
-Gets the io_fix state of a block. Does not assert that the block mutex is
-held, to be used in the cases where it is safe not to hold it.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_page_get_io_fix_unlocked(
-/*=========================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Gets the io_fix state of a block. Requires that the block mutex is held.
+Gets the io_fix state of a block.
@return io_fix state */
UNIV_INLINE
enum buf_io_fix
@@ -998,14 +925,15 @@ buf_block_get_io_fix(
const buf_block_t* block) /*!< in: pointer to the control block */
__attribute__((pure));
/*********************************************************************//**
-Gets the io_fix state of a block. Does not assert that the block mutex is
-held, to be used in the cases where it is safe not to hold it.
+Gets the io_fix state of a block. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
@return io_fix state */
UNIV_INLINE
enum buf_io_fix
-buf_block_get_io_fix_unlocked(
-/*==========================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
+buf_page_get_io_fix_unlocked(
+/*=========================*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
__attribute__((pure));
/*********************************************************************//**
Sets the io_fix state of a block. */
@@ -1025,7 +953,7 @@ buf_block_set_io_fix(
enum buf_io_fix io_fix);/*!< in: io_fix state */
/*********************************************************************//**
Makes a block sticky. A sticky block implies that even after we release
-the buf_pool->mutex and the block->mutex:
+the buf_pool->LRU_list_mutex and the block->mutex:
* it cannot be removed from the flush_list
* the block descriptor cannot be relocated
* it cannot be removed from the LRU list
@@ -1173,7 +1101,7 @@ buf_block_get_zip_size(
Gets the compressed page descriptor corresponding to an uncompressed page
if applicable. */
#define buf_block_get_page_zip(block) \
- (UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL)
+ ((block)->page.zip.data ? &(block)->page.zip : NULL)
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Gets the block to whose frame the pointer is pointing to.
@@ -1229,7 +1157,7 @@ UNIV_INTERN
buf_page_t*
buf_page_init_for_read(
/*===================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+ dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size, or 0 */
@@ -1241,9 +1169,9 @@ buf_page_init_for_read(
/********************************************************************//**
Completes an asynchronous read or write request of a file page to or from
the buffer pool.
-@return TRUE if successful */
+@return true if successful */
UNIV_INTERN
-ibool
+bool
buf_page_io_complete(
/*=================*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
@@ -1267,14 +1195,6 @@ buf_pool_index(
/*===========*/
const buf_pool_t* buf_pool) /*!< in: buffer pool */
__attribute__((nonnull, const));
-/********************************************************************//**
-*/
-UNIV_INTERN
-buf_block_t*
-buf_page_from_array(
-/*================*/
- buf_pool_t* buf_pool,
- ulint n_block);
/******************************************************************//**
Returns the buffer pool instance given a page instance
@return buf_pool */
@@ -1316,35 +1236,83 @@ UNIV_INLINE
buf_page_t*
buf_page_hash_get_low(
/*==================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page
- within space */
- ulint fold); /*!< in: buf_page_address_fold(
- space, offset) */
+ buf_pool_t* buf_pool,/*!< buffer pool instance */
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page within space */
+ ulint fold); /*!< in: buf_page_address_fold(space, offset) */
/******************************************************************//**
Returns the control block of a file page, NULL if not found.
-@return block, NULL if not found or not a real control block */
+If the block is found and lock is not NULL then the appropriate
+page_hash lock is acquired in the specified lock mode. Otherwise,
+mode value is ignored. It is up to the caller to release the
+lock. If the block is found and the lock is NULL then the page_hash
+lock is released by this function.
+@return block, NULL if not found */
UNIV_INLINE
buf_page_t*
-buf_page_hash_get(
-/*==============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+buf_page_hash_get_locked(
+/*=====================*/
+ /*!< out: pointer to the bpage,
+ or NULL; if NULL, hash_lock
+ is also NULL. */
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint space, /*!< in: space id */
- ulint offset); /*!< in: offset of the page
- within space */
+ ulint offset, /*!< in: page number */
+ prio_rw_lock_t** lock, /*!< in/out: lock of the page
+ hash acquired if bpage is
+ found. NULL otherwise. If NULL
+ is passed then the hash_lock
+ is released by this function */
+ ulint lock_mode); /*!< in: RW_LOCK_EX or
+ RW_LOCK_SHARED. Ignored if
+ lock == NULL */
/******************************************************************//**
-Returns the control block of a file page, NULL if not found
-or an uncompressed page frame does not exist.
+Returns the control block of a file page, NULL if not found.
+If the block is found and lock is not NULL then the appropriate
+page_hash lock is acquired in the specified lock mode. Otherwise,
+mode value is ignored. It is up to the caller to release the
+lock. If the block is found and the lock is NULL then the page_hash
+lock is released by this function.
@return block, NULL if not found */
UNIV_INLINE
buf_block_t*
-buf_block_hash_get(
-/*===============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+buf_block_hash_get_locked(
+/*=====================*/
+ /*!< out: pointer to the bpage,
+ or NULL; if NULL, hash_lock
+ is also NULL. */
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint space, /*!< in: space id */
- ulint offset); /*!< in: offset of the page
- within space */
+ ulint offset, /*!< in: page number */
+ prio_rw_lock_t** lock, /*!< in/out: lock of the page
+ hash acquired if bpage is
+ found. NULL otherwise. If NULL
+ is passed then the hash_lock
+ is released by this function */
+ ulint lock_mode); /*!< in: RW_LOCK_EX or
+ RW_LOCK_SHARED. Ignored if
+ lock == NULL */
+/* There are four different ways we can try to get a bpage or block
+from the page hash:
+1) Caller already holds the appropriate page hash lock: in the case call
+buf_page_hash_get_low() function.
+2) Caller wants to hold page hash lock in x-mode
+3) Caller wants to hold page hash lock in s-mode
+4) Caller doesn't want to hold page hash lock */
+#define buf_page_hash_get_s_locked(b, s, o, l) \
+ buf_page_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
+#define buf_page_hash_get_x_locked(b, s, o, l) \
+ buf_page_hash_get_locked(b, s, o, l, RW_LOCK_EX)
+#define buf_page_hash_get(b, s, o) \
+ buf_page_hash_get_locked(b, s, o, NULL, 0)
+
+#define buf_block_hash_get_s_locked(b, s, o, l) \
+ buf_block_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
+#define buf_block_hash_get_x_locked(b, s, o, l) \
+ buf_block_hash_get_locked(b, s, o, l, RW_LOCK_EX)
+#define buf_block_hash_get(b, s, o) \
+ buf_block_hash_get_locked(b, s, o, NULL, 0)
+
/*********************************************************************//**
Gets the current length of the free list of buffer blocks.
@return length of the free list */
@@ -1430,44 +1398,82 @@ buf_get_nth_chunk_block(
ulint n, /*!< in: nth chunk in the buffer pool */
ulint* chunk_size); /*!< in: chunk size */
+/********************************************************************//**
+Calculate the checksum of a page from compressed table and update the page. */
+UNIV_INTERN
+void
+buf_flush_update_zip_checksum(
+/*==========================*/
+ buf_frame_t* page, /*!< in/out: Page to update */
+ ulint zip_size, /*!< in: Compressed page size */
+ lsn_t lsn); /*!< in: Lsn to stamp on the page */
+
#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Checks if buf_pool->zip_mutex is owned and is serving for a given page as its
+block mutex.
+@return true if buf_pool->zip_mutex is owned. */
+UNIV_INLINE
+bool
+buf_own_zip_mutex_for_page(
+/*=======================*/
+ const buf_page_t* bpage)
+ __attribute__((nonnull,warn_unused_result));
+#endif /* UNIV_DEBUG */
+
/** The common buffer control block structure
for compressed and uncompressed frames */
/** Number of bits used for buffer page states. */
#define BUF_PAGE_STATE_BITS 3
-struct buf_page_struct{
+struct buf_page_t{
/** @name General fields
None of these bit-fields must be modified without holding
- buf_page_get_mutex() [buf_block_struct::mutex or
+ buf_page_get_mutex() [buf_block_t::mutex or
buf_pool->zip_mutex], since they can be stored in the same
- machine word. Some of these fields are additionally protected
- by buf_pool->mutex. */
+ machine word. */
/* @{ */
- unsigned space:32; /*!< tablespace id; also protected
- by buf_pool->mutex. */
- unsigned offset:32; /*!< page number; also protected
- by buf_pool->mutex. */
+ unsigned space:32; /*!< tablespace id. */
+ unsigned offset:32; /*!< page number. */
unsigned state:BUF_PAGE_STATE_BITS;
- /*!< state of the control block; also
- protected by buf_pool->mutex.
+ /*!< state of the control block.
State transitions from
BUF_BLOCK_READY_FOR_USE to
BUF_BLOCK_MEMORY need not be
protected by buf_page_get_mutex().
- @see enum buf_page_state */
+ @see enum buf_page_state.
+ State changes that are relevant
+ to page_hash are additionally
+ protected by the appropriate
+ page_hash mutex i.e.: if a page
+ is in page_hash or is being
+ added to/removed from page_hash
+ then the corresponding changes
+ must also be protected by
+ page_hash mutex. */
#ifndef UNIV_HOTBACKUP
unsigned flush_type:2; /*!< if this block is currently being
flushed to disk, this tells the
- flush_type.
- @see enum buf_flush */
- unsigned io_fix:2; /*!< type of pending I/O operation;
- also protected by buf_pool->mutex
- @see enum buf_io_fix */
+ flush_type. Writes during flushing
+ protected by buf_page_get_mutex_enter()
+ mutex and the corresponding flush state
+ mutex.
+ @see buf_flush_t */
+ unsigned io_fix:2; /*!< type of pending I/O operation.
+ Transitions from BUF_IO_NONE to
+ BUF_IO_WRITE and back are protected by
+ the buf_page_get_mutex() mutex and the
+ corresponding flush state mutex. The
+ flush state mutex protection for io_fix
+ and flush_type is not strictly
+ required, but it ensures consistent
+ buffer pool instance state snapshots in
+ buf_pool_validate_instance(). */
unsigned buf_fix_count:19;/*!< count of how manyfold this block
is currently bufferfixed */
unsigned buf_pool_index:6;/*!< index number of the buffer pool
@@ -1479,7 +1485,7 @@ struct buf_page_struct{
#endif /* !UNIV_HOTBACKUP */
page_zip_des_t zip; /*!< compressed page; zip.data
(but not the data it points to) is
- also protected by buf_pool->mutex;
+ protected by buf_pool->zip_mutex;
state == BUF_BLOCK_ZIP_PAGE and
zip.data == NULL means an active
buf_pool->watch */
@@ -1492,15 +1498,13 @@ struct buf_page_struct{
ibool in_zip_hash; /*!< TRUE if in buf_pool->zip_hash */
#endif /* UNIV_DEBUG */
- /** @name Page flushing fields
- All these are protected by buf_pool->mutex. */
+ /** @name Page flushing fields */
/* @{ */
- /* UT_LIST_NODE_T(buf_page_t) list; */
+ UT_LIST_NODE_T(buf_page_t) list;
/*!< based on state, this is a
list node, protected either by
- buf_pool->mutex or by
- buf_pool->flush_list_mutex,
+ a corresponding list mutex,
in one of the following lists in
buf_pool:
@@ -1508,13 +1512,13 @@ struct buf_page_struct{
- BUF_BLOCK_FILE_PAGE: flush_list
- BUF_BLOCK_ZIP_DIRTY: flush_list
- BUF_BLOCK_ZIP_PAGE: zip_clean
- - BUF_BLOCK_ZIP_FREE: zip_free[]
If bpage is part of flush_list
then the node pointers are
covered by buf_pool->flush_list_mutex.
Otherwise these pointers are
- protected by buf_pool->mutex.
+ protected by a corresponding list
+ mutex.
The contents of the list node
is undefined if !in_flush_list
@@ -1524,10 +1528,6 @@ struct buf_page_struct{
BUF_BLOCK_REMOVE_HASH or
BUF_BLOCK_READY_IN_USE. */
- /* resplit for optimistic use */
- UT_LIST_NODE_T(buf_page_t) free;
- UT_LIST_NODE_T(buf_page_t) flush_list;
- UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
#ifdef UNIV_DEBUG
ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
when buf_pool->flush_list_mutex is
@@ -1541,17 +1541,17 @@ struct buf_page_struct{
reads can happen while holding
any one of the two mutexes */
ibool in_free_list; /*!< TRUE if in buf_pool->free; when
- buf_pool->mutex is free, the following
- should hold: in_free_list
+ buf_pool->free_list_mutex is free, the
+ following should hold: in_free_list
== (state == BUF_BLOCK_NOT_USED) */
#endif /* UNIV_DEBUG */
- ib_uint64_t newest_modification;
+ lsn_t newest_modification;
/*!< log sequence number of
the youngest modification to
this block, zero if not
modified. Protected by block
mutex */
- ib_uint64_t oldest_modification;
+ lsn_t oldest_modification;
/*!< log sequence number of
the START of the log entry
written of the oldest
@@ -1565,20 +1565,21 @@ struct buf_page_struct{
reads can happen while holding
any one of the two mutexes */
/* @} */
- /** @name LRU replacement algorithm fields
- These fields are protected by buf_pool->mutex only (not
- buf_pool->zip_mutex or buf_block_struct::mutex). */
+ /** @name LRU replacement algorithm fields */
/* @{ */
UT_LIST_NODE_T(buf_page_t) LRU;
/*!< node of the LRU list */
-//#ifdef UNIV_DEBUG
+#ifdef UNIV_DEBUG
ibool in_LRU_list; /*!< TRUE if the page is in
the LRU list; used in
debugging */
-//#endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
unsigned old:1; /*!< TRUE if the block is in the old
- blocks in buf_pool->LRU_old */
+ blocks in buf_pool->LRU_old. Protected
+ by the LRU list mutex. May be read for
+ heuristics purposes under the block
+ mutex instead. */
unsigned freed_page_clock:31;/*!< the value of
buf_pool->freed_page_clock
when this block was the last
@@ -1595,15 +1596,17 @@ struct buf_page_struct{
ibool is_corrupt;
# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ibool file_page_was_freed;
- /*!< this is set to TRUE when fsp
- frees a page in buffer pool */
+ /*!< this is set to TRUE when
+ fsp frees a page in buffer pool;
+ protected by buf_pool->zip_mutex
+ or buf_block_t::mutex. */
# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
};
/** The buffer control block structure */
-struct buf_block_struct{
+struct buf_block_t{
/** @name General fields */
/* @{ */
@@ -1622,14 +1625,13 @@ struct buf_block_struct{
a block is in the unzip_LRU list
if page.state == BUF_BLOCK_FILE_PAGE
and page.zip.data != NULL */
-//#ifdef UNIV_DEBUG
+#ifdef UNIV_DEBUG
ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
decompressed LRU list;
used in debugging */
-//#endif /* UNIV_DEBUG */
- mutex_t mutex; /*!< mutex protecting this block:
- state (also protected by the buffer
- pool mutex), io_fix, buf_fix_count,
+#endif /* UNIV_DEBUG */
+ ib_mutex_t mutex; /*!< mutex protecting this block:
+ state, io_fix, buf_fix_count,
and accessed; we introduce this new
mutex in InnoDB-5.1 to relieve
contention on the buffer pool mutex */
@@ -1638,8 +1640,8 @@ struct buf_block_struct{
unsigned lock_hash_val:32;/*!< hashed value of the page address
in the record lock hash table;
protected by buf_block_t::lock
- (or buf_block_t::mutex, buf_pool->mutex
- in buf_page_get_gen(),
+ (or buf_block_t::mutex in
+ buf_page_get_gen(),
buf_page_init_for_read()
and buf_page_create()) */
ibool check_index_page_at_flush;
@@ -1662,8 +1664,8 @@ struct buf_block_struct{
positioning: if the modify clock has
not changed, we know that the pointer
is still valid; this field may be
- changed if the thread (1) owns the
- pool mutex and the page is not
+ changed if the thread (1) owns the LRU
+ list mutex and the page is not
bufferfixed, or (2) the thread has an
x-latch on the block */
/* @} */
@@ -1686,11 +1688,11 @@ struct buf_block_struct{
/** @name Hash search fields
These 5 fields may only be modified when we have
an x-latch on btr_search_latch AND
- - we are holding an s-latch or x-latch on buf_block_struct::lock or
- - we know that buf_block_struct::buf_fix_count == 0.
+ - we are holding an s-latch or x-latch on buf_block_t::lock or
+ - we know that buf_block_t::buf_fix_count == 0.
An exception to this is when we init or create a page
- in the buffer pool in buf0buf.c.
+ in the buffer pool in buf0buf.cc.
Another exception is that assigning block->index = NULL
is allowed whenever holding an x-latch on btr_search_latch. */
@@ -1745,25 +1747,36 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
-/** A chunk of buffers. The buffer pool is allocated in chunks. */
-struct buf_chunk_struct{
- ulint mem_size; /*!< allocated size of the chunk */
- ulint size; /*!< size of frames[] and blocks[] */
- void* mem; /*!< pointer to the memory area which
- was allocated for the frames */
- buf_block_t* blocks; /*!< array of buffer control blocks */
+/** Struct that is embedded in the free zip blocks */
+struct buf_buddy_free_t {
+ union {
+ ulint size; /*!< size of the block */
+ byte bytes[FIL_PAGE_DATA];
+ /*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
+ == BUF_BUDDY_FREE_STAMP denotes a free
+ block. If the space_id field of buddy
+ block != BUF_BUDDY_FREE_STAMP, the block
+ is not in any zip_free list. If the
+ space_id is BUF_BUDDY_FREE_STAMP then
+ stamp[0] will contain the
+ buddy block size. */
+ } stamp;
+
+ buf_page_t bpage; /*!< Embedded bpage descriptor */
+ UT_LIST_NODE_T(buf_buddy_free_t) list;
+ /*!< Node of zip_free list */
};
/** @brief The buffer pool statistics structure. */
-struct buf_pool_stat_struct{
+struct buf_pool_stat_t{
ulint n_page_gets; /*!< number of page gets performed;
also successful searches through
the adaptive hash index are
- counted as page gets; this field
- is NOT protected by the buffer
- pool mutex */
- ulint n_pages_read; /*!< number read operations */
- ulint n_pages_written;/*!< number write operations */
+ counted as page gets. */
+ ulint n_pages_read; /*!< number read operations. Accessed
+ atomically. */
+ ulint n_pages_written;/*!< number write operations. Accessed
+ atomically.*/
ulint n_pages_created;/*!< number of pages created
in the pool with no read */
ulint n_ra_pages_read_rnd;/*!< number of pages read in
@@ -1781,10 +1794,11 @@ struct buf_pool_stat_struct{
buf_page_peek_if_too_old() */
ulint LRU_bytes; /*!< LRU size in bytes */
ulint flush_list_bytes;/*!< flush_list size in bytes */
+ ulint buf_lru_flush_page_count;
};
/** Statistics of buddy blocks of a given size. */
-struct buf_buddy_stat_struct {
+struct buf_buddy_stat_t {
/** Number of blocks allocated from the buddy system. */
ulint used;
/** Number of blocks relocated by the buddy system. */
@@ -1798,21 +1812,20 @@ struct buf_buddy_stat_struct {
NOTE! The definition appears here only for other modules of this
directory (buf) to see it. Do not use from outside! */
-struct buf_pool_struct{
+struct buf_pool_t{
/** @name General fields */
/* @{ */
- mutex_t mutex; /*!< Buffer pool mutex of this
- instance */
- mutex_t zip_mutex; /*!< Zip mutex of this buffer
+ ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer
pool instance, protects compressed
only pages (of type buf_page_t, not
buf_block_t */
- mutex_t LRU_list_mutex;
- rw_lock_t page_hash_latch;
- mutex_t free_list_mutex;
- mutex_t zip_free_mutex;
- mutex_t zip_hash_mutex;
+ ib_prio_mutex_t LRU_list_mutex;
+ ib_prio_mutex_t free_list_mutex;
+ ib_mutex_t zip_free_mutex;
+ ib_mutex_t zip_hash_mutex;
+ ib_mutex_t flush_state_mutex; /*!< Flush state protection
+ mutex */
ulint instance_no; /*!< Array index of this buffer
pool instance */
ulint old_pool_size; /*!< Old pool size in bytes */
@@ -1823,30 +1836,34 @@ struct buf_pool_struct{
ulint buddy_n_frames; /*!< Number of frames allocated from
the buffer pool to the buddy system */
#endif
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ulint mutex_exit_forbidden; /*!< Forbid release mutex */
-#endif
ulint n_chunks; /*!< number of buffer pool chunks */
buf_chunk_t* chunks; /*!< buffer pool chunks */
ulint curr_size; /*!< current pool size in pages */
+ ulint read_ahead_area;/*!< size in pages of the area which
+ the read-ahead algorithms read if
+ invoked */
hash_table_t* page_hash; /*!< hash table of buf_page_t or
buf_block_t file pages,
buf_page_in_file() == TRUE,
- indexed by (space_id, offset) */
+ indexed by (space_id, offset).
+ page_hash is protected by an
+ array of mutexes. */
hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks
whose frames are allocated to the
zip buddy system,
indexed by block->frame */
ulint n_pend_reads; /*!< number of pending read
- operations */
- ulint n_pend_unzip; /*!< number of pending decompressions */
+ operations. Accessed atomically */
+ ulint n_pend_unzip; /*!< number of pending decompressions.
+ Accesssed atomically */
time_t last_printout_time;
/*!< when buf_print_io was last time
- called */
+ called. Accesses not protected */
buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
/*!< Statistics of buddy system,
- indexed by block size */
+ indexed by block size. Protected by
+ zip_free_mutex. */
buf_pool_stat_t stat; /*!< current statistics */
buf_pool_stat_t old_stat; /*!< old statistics */
@@ -1856,22 +1873,29 @@ struct buf_pool_struct{
/* @{ */
- mutex_t flush_list_mutex;/*!< mutex protecting the
+ ib_mutex_t flush_list_mutex;/*!< mutex protecting the
flush list access. This mutex
protects flush_list, flush_rbt
and bpage::list pointers when
the bpage is on flush_list. It
also protects writes to
- bpage::oldest_modification */
+ bpage::oldest_modification and
+ flush_list_hp */
+ const buf_page_t* flush_list_hp;/*!< "hazard pointer"
+ used during scan of flush_list
+ while doing flush list batch.
+ Protected by flush_list_mutex */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
/*!< base node of the modified block
list */
ibool init_flush[BUF_FLUSH_N_TYPES];
/*!< this is TRUE when a flush of the
- given type is being initialized */
+ given type is being initialized.
+ Protected by flush_state_mutex. */
ulint n_flush[BUF_FLUSH_N_TYPES];
/*!< this is the number of pending
- writes in the given flush type */
+ writes in the given flush type.
+ Protected by flush_state_mutex. */
os_event_t no_flush[BUF_FLUSH_N_TYPES];
/*!< this is in the set state
when there is no flush batch
@@ -1898,11 +1922,17 @@ struct buf_pool_struct{
billion! A thread is allowed
to read this for heuristic
purposes without holding any
- mutex or latch */
- ulint LRU_flush_ended;/*!< when an LRU flush ends for a page,
- this is incremented by one; this is
- set to zero when a buffer block is
- allocated */
+ mutex or latch. For non-heuristic
+ purposes protected by LRU_list_mutex */
+ ibool try_LRU_scan; /*!< Set to FALSE when an LRU
+ scan for free block fails. This
+ flag is used to avoid repeated
+ scans of LRU list when we know
+ that there is no free block
+ available in the scan depth for
+ eviction. Set to TRUE whenever
+ we flush a batch from the
+ buffer pool. Accessed atomically. */
/* @} */
/** @name LRU replacement algorithm fields */
@@ -1923,14 +1953,15 @@ struct buf_pool_struct{
ulint LRU_old_len; /*!< length of the LRU list from
the block to which LRU_old points
onward, including that block;
- see buf0lru.c for the restrictions
+ see buf0lru.cc for the restrictions
on this value; 0 if LRU_old == NULL;
NOTE: LRU_old_len must be adjusted
whenever LRU_old shrinks or grows! */
UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
/*!< base node of the
- unzip_LRU list */
+ unzip_LRU list. The list is protected
+ by LRU list mutex. */
/* @} */
/** @name Buddy allocator fields
@@ -1942,35 +1973,23 @@ struct buf_pool_struct{
UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
/*!< unmodified compressed pages */
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES_MAX];
+ UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
/*!< buddy free lists */
- buf_page_t watch[BUF_POOL_WATCH_SIZE];
+ buf_page_t* watch;
/*!< Sentinel records for buffer
- pool watches. Protected by
- buf_pool->mutex. */
-
-//#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
-//# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
-//#endif
-#if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE
-# error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE"
+ pool watches. */
+
+#if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
+# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
#endif
/* @} */
};
-/** @name Accessors for buf_pool->mutex.
-Use these instead of accessing buf_pool->mutex directly. */
+/** @name Accessors for buffer pool mutexes
+Use these instead of accessing buffer pool mutexes directly. */
/* @{ */
-/** Test if a buffer pool mutex is owned. */
-#define buf_pool_mutex_own(b) mutex_own(&b->mutex)
-/** Acquire a buffer pool mutex. */
-/* the buf_pool_mutex is changed the latch order */
-#define buf_pool_mutex_enter(b) do { \
- mutex_enter(&b->mutex); \
-} while (0)
-
/** Test if flush list mutex is owned. */
#define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex)
@@ -1985,31 +2004,47 @@ Use these instead of accessing buf_pool->mutex directly. */
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid(b) do { \
- ut_ad(buf_pool_mutex_own(b)); \
- b->mutex_exit_forbidden++; \
-} while (0)
-/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow(b) do { \
- ut_ad(buf_pool_mutex_own(b)); \
- ut_a(b->mutex_exit_forbidden); \
- b->mutex_exit_forbidden--; \
-} while (0)
-/** Release the buffer pool mutex. */
-# define buf_pool_mutex_exit(b) do { \
- ut_a(!b->mutex_exit_forbidden); \
- mutex_exit(&b->mutex); \
-} while (0)
-#else
-/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid(b) ((void) 0)
-/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow(b) ((void) 0)
-/** Release the buffer pool mutex. */
-# define buf_pool_mutex_exit(b) mutex_exit(&b->mutex)
-#endif
+/** Get appropriate page_hash_lock. */
+# define buf_page_hash_lock_get(b, f) \
+ hash_get_lock(b->page_hash, f)
+
+#ifdef UNIV_SYNC_DEBUG
+/** Test if page_hash lock is held in s-mode. */
+# define buf_page_hash_lock_held_s(b, p) \
+ rw_lock_own(buf_page_hash_lock_get(b, \
+ buf_page_address_fold(p->space, \
+ p->offset)), \
+ RW_LOCK_SHARED)
+
+/** Test if page_hash lock is held in x-mode. */
+# define buf_page_hash_lock_held_x(b, p) \
+ rw_lock_own(buf_page_hash_lock_get(b, \
+ buf_page_address_fold(p->space, \
+ p->offset)), \
+ RW_LOCK_EX)
+
+/** Test if page_hash lock is held in x or s-mode. */
+# define buf_page_hash_lock_held_s_or_x(b, p) \
+ (buf_page_hash_lock_held_s(b, p) \
+ || buf_page_hash_lock_held_x(b, p))
+
+# define buf_block_hash_lock_held_s(b, p) \
+ buf_page_hash_lock_held_s(b, &(p->page))
+
+# define buf_block_hash_lock_held_x(b, p) \
+ buf_page_hash_lock_held_x(b, &(p->page))
+
+# define buf_block_hash_lock_held_s_or_x(b, p) \
+ buf_page_hash_lock_held_s_or_x(b, &(p->page))
+#else /* UNIV_SYNC_DEBUG */
+# define buf_page_hash_lock_held_s(b, p) (TRUE)
+# define buf_page_hash_lock_held_x(b, p) (TRUE)
+# define buf_page_hash_lock_held_s_or_x(b, p) (TRUE)
+# define buf_block_hash_lock_held_s(b, p) (TRUE)
+# define buf_block_hash_lock_held_x(b, p) (TRUE)
+# define buf_block_hash_lock_held_s_or_x(b, p) (TRUE)
+#endif /* UNIV_SYNC_DEBUG */
+
#endif /* !UNIV_HOTBACKUP */
/* @} */
@@ -2057,6 +2092,32 @@ FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
(3) io_fix == 0.
*/
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/** Functor to validate the LRU list. */
+struct CheckInLRUList {
+ void operator()(const buf_page_t* elem) const
+ {
+ ut_a(elem->in_LRU_list);
+ }
+};
+
+/** Functor to validate the LRU list. */
+struct CheckInFreeList {
+ void operator()(const buf_page_t* elem) const
+ {
+ ut_a(elem->in_free_list);
+ }
+};
+
+struct CheckUnzipLRUAndLRUList {
+ void operator()(const buf_block_t* elem) const
+ {
+ ut_a(elem->page.in_LRU_list);
+ ut_a(elem->in_unzip_LRU_list);
+ }
+};
+#endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
+
#ifndef UNIV_NONINL
#include "buf0buf.ic"
#endif
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
index 18c46b6412e..4ef354b11ab 100644
--- a/storage/xtradb/include/buf0buf.ic
+++ b/storage/xtradb/include/buf0buf.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -35,8 +35,18 @@ Created 11/5/1995 Heikki Tuuri
#include "buf0flu.h"
#include "buf0lru.h"
#include "buf0rea.h"
+
+/** A chunk of buffers. The buffer pool is allocated in chunks. */
+struct buf_chunk_t{
+ ulint mem_size; /*!< allocated size of the chunk */
+ ulint size; /*!< size of frames[] and blocks[] */
+ void* mem; /*!< pointer to the memory area which
+ was allocated for the frames */
+ buf_block_t* blocks; /*!< array of buffer control blocks */
+};
+
+
#include "srv0srv.h"
-#include "buf0types.h"
/*********************************************************************//**
Gets the current size of buffer buf_pool in bytes.
@@ -111,7 +121,7 @@ buf_page_get_freed_page_clock(
/*==========================*/
const buf_page_t* bpage) /*!< in: block */
{
- /* This is sometimes read without holding buf_pool->mutex. */
+ /* This is sometimes read without holding any buffer pool mutex. */
return(bpage->freed_page_clock);
}
@@ -163,7 +173,7 @@ buf_page_peek_if_too_old(
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- if (UNIV_UNLIKELY(buf_pool->freed_page_clock == 0)) {
+ if (buf_pool->freed_page_clock == 0) {
/* If eviction has not started yet, do not update the
statistics or move blocks in the LRU list. This is
either the warm-up phase or an in-memory workload. */
@@ -198,7 +208,7 @@ buf_page_get_state(
#ifdef UNIV_DEBUG
switch (state) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
case BUF_BLOCK_NOT_USED:
@@ -238,7 +248,7 @@ buf_page_set_state(
enum buf_page_state old_state = buf_page_get_state(bpage);
switch (old_state) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
ut_error;
break;
case BUF_BLOCK_ZIP_PAGE:
@@ -293,10 +303,8 @@ buf_page_in_file(
const buf_page_t* bpage) /*!< in: pointer to control block */
{
switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- /* This is a free page in buf_pool->zip_free[].
- Such pages should only be accessed by the buddy allocator. */
- /* ut_error; */ /* optimistic */
+ case BUF_BLOCK_POOL_WATCH:
+ ut_error;
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
@@ -332,23 +340,16 @@ buf_page_belongs_to_unzip_LRU(
Gets the mutex of a block.
@return pointer to mutex protecting bpage */
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
buf_page_get_mutex(
/*===============*/
const buf_page_t* bpage) /*!< in: pointer to control block */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- if (/*equivalent to buf_pool_watch_is_sentinel(buf_pool, bpage)*/
- bpage >= &buf_pool->watch[0]
- && bpage < &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
- /* TODO: this code is the interim. should be confirmed later. */
- return(&buf_pool->zip_mutex);
- }
-
switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- /* ut_error; */ /* optimistic */
+ case BUF_BLOCK_POOL_WATCH:
+ ut_error;
return(NULL);
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
@@ -358,47 +359,25 @@ buf_page_get_mutex(
}
}
-/*************************************************************************
-Gets the mutex of a block and enter the mutex with consistency. */
-UNIV_INLINE
-mutex_t*
-buf_page_get_mutex_enter(
-/*=========================*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
-{
- mutex_t* block_mutex;
-
- while(1) {
- block_mutex = buf_page_get_mutex(bpage);
- if (!block_mutex)
- return block_mutex;
-
- mutex_enter(block_mutex);
- if (block_mutex == buf_page_get_mutex(bpage))
- return block_mutex;
- mutex_exit(block_mutex);
- }
-}
-
/*********************************************************************//**
Get the flush type of a page.
@return flush type */
UNIV_INLINE
-enum buf_flush
+buf_flush_t
buf_page_get_flush_type(
/*====================*/
const buf_page_t* bpage) /*!< in: buffer page */
{
- enum buf_flush flush_type = (enum buf_flush) bpage->flush_type;
+ buf_flush_t flush_type = (buf_flush_t) bpage->flush_type;
#ifdef UNIV_DEBUG
switch (flush_type) {
case BUF_FLUSH_LRU:
- case BUF_FLUSH_SINGLE_PAGE:
case BUF_FLUSH_LIST:
+ case BUF_FLUSH_SINGLE_PAGE:
return(flush_type);
case BUF_FLUSH_N_TYPES:
- break;
+ ut_error;
}
ut_error;
#endif /* UNIV_DEBUG */
@@ -411,7 +390,7 @@ void
buf_page_set_flush_type(
/*====================*/
buf_page_t* bpage, /*!< in: buffer page */
- enum buf_flush flush_type) /*!< in: flush type */
+ buf_flush_t flush_type) /*!< in: flush type */
{
bpage->flush_type = flush_type;
ut_ad(buf_page_get_flush_type(bpage) == flush_type);
@@ -433,7 +412,7 @@ buf_block_set_file_page(
}
/*********************************************************************//**
-Gets the io_fix state of a block. Requires that the block mutex is held.
+Gets the io_fix state of a block.
@return io_fix state */
UNIV_INLINE
enum buf_io_fix
@@ -446,8 +425,9 @@ buf_page_get_io_fix(
}
/*********************************************************************//**
-Gets the io_fix state of a block. Does not assert that the block mutex is
-held, to be used in the cases where it is safe not to hold it.
+Gets the io_fix state of a block. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
@return io_fix state */
UNIV_INLINE
enum buf_io_fix
@@ -470,7 +450,7 @@ buf_page_get_io_fix_unlocked(
}
/*********************************************************************//**
-Gets the io_fix state of a block. Requires that the block mutex is held.
+Gets the io_fix state of a block.
@return io_fix state */
UNIV_INLINE
enum buf_io_fix
@@ -482,8 +462,9 @@ buf_block_get_io_fix(
}
/*********************************************************************//**
-Gets the io_fix state of a block. Does not assert that the block mutex is
-held, to be used in the cases where it is safe not to hold it.
+Gets the io_fix state of a block. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
@return io_fix state */
UNIV_INLINE
enum buf_io_fix
@@ -494,6 +475,7 @@ buf_block_get_io_fix_unlocked(
return(buf_page_get_io_fix_unlocked(&block->page));
}
+
/*********************************************************************//**
Sets the io_fix state of a block. */
UNIV_INLINE
@@ -503,10 +485,6 @@ buf_page_set_io_fix(
buf_page_t* bpage, /*!< in/out: control block */
enum buf_io_fix io_fix) /*!< in: io_fix state */
{
-#ifdef UNIV_DEBUG
- //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- //ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
bpage->io_fix = io_fix;
@@ -527,7 +505,7 @@ buf_block_set_io_fix(
/*********************************************************************//**
Makes a block sticky. A sticky block implies that even after we release
-the buf_pool->mutex and the block->mutex:
+the buf_pool->LRU_list_mutex and the block->mutex:
* it cannot be removed from the flush_list
* the block descriptor cannot be relocated
* it cannot be removed from the LRU list
@@ -546,6 +524,7 @@ buf_page_set_sticky(
#endif
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+ ut_ad(bpage->in_LRU_list);
bpage->io_fix = BUF_IO_PIN;
}
@@ -558,10 +537,6 @@ buf_page_unset_sticky(
/*==================*/
buf_page_t* bpage) /*!< in/out: control block */
{
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-#endif
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
@@ -577,15 +552,11 @@ buf_page_can_relocate(
/*==================*/
const buf_page_t* bpage) /*!< control block being relocated */
{
-#ifdef UNIV_DEBUG
- //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- //ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_in_file(bpage));
- //ut_ad(bpage->in_LRU_list);
+ ut_ad(bpage->in_LRU_list);
- return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
+ return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
&& bpage->buf_fix_count == 0);
}
@@ -599,9 +570,13 @@ buf_page_is_old(
const buf_page_t* bpage) /*!< in: control block */
{
#ifdef UNIV_DEBUG
- //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- //ut_ad(buf_pool_mutex_own(buf_pool));
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
#endif
+ /* Buffer page mutex is not strictly required here for heuristic
+ purposes even if LRU mutex is not being held. Keep the assertion
+ for now since all the callers hold it. */
+ ut_ad(mutex_own(buf_page_get_mutex(bpage))
+ || mutex_own(&buf_pool->LRU_list_mutex));
ut_ad(buf_page_in_file(bpage));
return(bpage->old);
@@ -620,7 +595,6 @@ buf_page_set_old(
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
#endif /* UNIV_DEBUG */
ut_a(buf_page_in_file(bpage));
- //ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
ut_ad(bpage->in_LRU_list);
@@ -666,11 +640,7 @@ buf_page_set_accessed(
/*==================*/
buf_page_t* bpage) /*!< in/out: control block */
{
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-#endif
ut_a(buf_page_in_file(bpage));
if (!bpage->access_time) {
@@ -689,7 +659,7 @@ buf_page_get_block(
/*===============*/
buf_page_t* bpage) /*!< in: control block, or NULL */
{
- if (UNIV_LIKELY(bpage != NULL)) {
+ if (bpage != NULL) {
ut_ad(buf_page_in_file(bpage));
if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
@@ -714,7 +684,7 @@ buf_block_get_frame(
SRV_CORRUPT_TABLE_CHECK(block, return(0););
switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
case BUF_BLOCK_NOT_USED:
@@ -780,6 +750,23 @@ buf_page_get_page_no(
return(bpage->offset);
}
+/***********************************************************************
+FIXME_FTS Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+ /* out: pointer to frame */
+ byte* ptr) /* in: pointer to a frame */
+{
+ buf_frame_t* frame;
+
+ ut_ad(ptr);
+
+ frame = (buf_frame_t*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+
+ return(frame);
+}
/*********************************************************************//**
Gets the page number of a block.
@@ -805,7 +792,8 @@ buf_page_get_zip_size(
/*==================*/
const buf_page_t* bpage) /*!< in: pointer to the control block */
{
- return(bpage->zip.ssize ? 512 << bpage->zip.ssize : 0);
+ return(bpage->zip.ssize
+ ? (UNIV_ZIP_SIZE_MIN >> 1) << bpage->zip.ssize : 0);
}
/*********************************************************************//**
@@ -817,7 +805,8 @@ buf_block_get_zip_size(
/*===================*/
const buf_block_t* block) /*!< in: pointer to the control block */
{
- return(block->page.zip.ssize ? 512 << block->page.zip.ssize : 0);
+ return(block->page.zip.ssize
+ ? (UNIV_ZIP_SIZE_MIN >> 1) << block->page.zip.ssize : 0);
}
#ifndef UNIV_HOTBACKUP
@@ -913,19 +902,13 @@ buf_block_free(
/*===========*/
buf_block_t* block) /*!< in, own: block to be freed */
{
- //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
-
- //buf_pool_mutex_enter(buf_pool);
-
mutex_enter(&block->mutex);
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
- buf_LRU_block_free_non_file_page(block, FALSE);
+ buf_LRU_block_free_non_file_page(block);
mutex_exit(&block->mutex);
-
- //buf_pool_mutex_exit(buf_pool);
}
#endif /* !UNIV_HOTBACKUP */
@@ -966,31 +949,31 @@ Gets the youngest modification log sequence number for a frame.
Returns zero if not file page or no modification occurred yet.
@return newest modification to page */
UNIV_INLINE
-ib_uint64_t
+lsn_t
buf_page_get_newest_modification(
/*=============================*/
const buf_page_t* bpage) /*!< in: block containing the
page frame */
{
- ib_uint64_t lsn;
- mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+ lsn_t lsn;
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
- if (block_mutex && buf_page_in_file(bpage)) {
+ mutex_enter(block_mutex);
+
+ if (buf_page_in_file(bpage)) {
lsn = bpage->newest_modification;
} else {
lsn = 0;
}
- if (block_mutex) {
- mutex_exit(block_mutex);
- }
+ mutex_exit(block_mutex);
return(lsn);
}
/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+LRU list mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
void
@@ -999,7 +982,7 @@ buf_block_modify_clock_inc(
buf_block_t* block) /*!< in: block */
{
#ifdef UNIV_SYNC_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+ buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*) block);
ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
&& (block->page.buf_fix_count == 0))
@@ -1108,22 +1091,24 @@ UNIV_INLINE
buf_page_t*
buf_page_hash_get_low(
/*==================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page
- within space */
- ulint fold) /*!< in: buf_page_address_fold(
- space, offset) */
+ buf_pool_t* buf_pool,/*!< buffer pool instance */
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page within space */
+ ulint fold) /*!< in: buf_page_address_fold(space, offset) */
{
buf_page_t* bpage;
- ut_ad(buf_pool);
- //ut_ad(buf_pool_mutex_own(buf_pool));
#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
- || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
-#endif
- ut_ad(fold == buf_page_address_fold(space, offset));
+ ulint hash_fold;
+ prio_rw_lock_t* hash_lock;
+
+ hash_fold = buf_page_address_fold(space, offset);
+ ut_ad(hash_fold == fold);
+
+ hash_lock = hash_get_lock(buf_pool->page_hash, fold);
+ ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)
+ || rw_lock_own(hash_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
/* Look for the page in the hash table */
@@ -1148,46 +1133,145 @@ buf_page_hash_get_low(
/******************************************************************//**
Returns the control block of a file page, NULL if not found.
-@return block, NULL if not found or not a real control block */
+If the block is found and lock is not NULL then the appropriate
+page_hash lock is acquired in the specified lock mode. Otherwise,
+mode value is ignored. It is up to the caller to release the
+lock. If the block is found and the lock is NULL then the page_hash
+lock is released by this function.
+@return block, NULL if not found */
UNIV_INLINE
buf_page_t*
-buf_page_hash_get(
-/*==============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+buf_page_hash_get_locked(
+/*=====================*/
+ /*!< out: pointer to the bpage,
+ or NULL; if NULL, hash_lock
+ is also NULL. */
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page
- within space */
-{
- buf_page_t* bpage;
- ulint fold = buf_page_address_fold(space, offset);
+ ulint offset, /*!< in: page number */
+ prio_rw_lock_t** lock, /*!< in/out: lock of the page
+ hash acquired if bpage is
+ found. NULL otherwise. If NULL
+ is passed then the hash_lock
+ is released by this function */
+ ulint lock_mode) /*!< in: RW_LOCK_EX or
+ RW_LOCK_SHARED. Ignored if
+ lock == NULL */
+{
+ buf_page_t* bpage = NULL;
+ ulint fold;
+ prio_rw_lock_t* hash_lock;
+ ulint mode = RW_LOCK_SHARED;
+
+ if (lock != NULL) {
+ *lock = NULL;
+ ut_ad(lock_mode == RW_LOCK_EX
+ || lock_mode == RW_LOCK_SHARED);
+ mode = lock_mode;
+ }
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ fold = buf_page_address_fold(space, offset);
+ hash_lock = hash_get_lock(buf_pool->page_hash, fold);
- if (bpage && buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
+ && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (mode == RW_LOCK_SHARED) {
+ rw_lock_s_lock(hash_lock);
+ } else {
+ rw_lock_x_lock(hash_lock);
+ }
+
+ bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+
+ if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) {
bpage = NULL;
+ goto unlock_and_exit;
+ }
+
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(offset == bpage->offset);
+ ut_ad(space == bpage->space);
+
+ if (lock == NULL) {
+ /* The caller wants us to release the page_hash lock */
+ goto unlock_and_exit;
+ } else {
+ /* To be released by the caller */
+ *lock = hash_lock;
+ goto exit;
}
+unlock_and_exit:
+ if (mode == RW_LOCK_SHARED) {
+ rw_lock_s_unlock(hash_lock);
+ } else {
+ rw_lock_x_unlock(hash_lock);
+ }
+exit:
return(bpage);
}
/******************************************************************//**
-Returns the control block of a file page, NULL if not found
-or an uncompressed page frame does not exist.
+Returns the control block of a file page, NULL if not found.
+If the block is found and lock is not NULL then the appropriate
+page_hash lock is acquired in the specified lock mode. Otherwise,
+mode value is ignored. It is up to the caller to release the
+lock. If the block is found and the lock is NULL then the page_hash
+lock is released by this function.
@return block, NULL if not found */
UNIV_INLINE
buf_block_t*
-buf_block_hash_get(
-/*===============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+buf_block_hash_get_locked(
+/*=====================*/
+ /*!< out: pointer to the bpage,
+ or NULL; if NULL, hash_lock
+ is also NULL. */
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page
- within space */
-{
- buf_block_t* block;
+ ulint offset, /*!< in: page number */
+ prio_rw_lock_t** lock, /*!< in/out: lock of the page
+ hash acquired if bpage is
+ found. NULL otherwise. If NULL
+ is passed then the hash_lock
+ is released by this function */
+ ulint lock_mode) /*!< in: RW_LOCK_EX or
+ RW_LOCK_SHARED. Ignored if
+ lock == NULL */
+{
+ buf_page_t* bpage = buf_page_hash_get_locked(buf_pool,
+ space,
+ offset,
+ lock,
+ lock_mode);
+ buf_block_t* block = buf_page_get_block(bpage);
+
+ if (block) {
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!lock || rw_lock_own(*lock, lock_mode));
+#endif /* UNIV_SYNC_DEBUG */
+ return(block);
+ } else if (bpage) {
+ /* It is not a block. Just a bpage */
+ ut_ad(buf_page_in_file(bpage));
- block = buf_page_get_block(buf_page_hash_get(buf_pool, space, offset));
+ if (lock) {
+ if (lock_mode == RW_LOCK_SHARED) {
+ rw_lock_s_unlock(*lock);
+ } else {
+ rw_lock_x_unlock(*lock);
+ }
+ }
+ *lock = NULL;
+ return(NULL);
+ }
- return(block);
+ ut_ad(!bpage);
+ ut_ad(lock == NULL ||*lock == NULL);
+ return(NULL);
}
/********************************************************************//**
@@ -1204,18 +1288,9 @@ buf_page_peek(
ulint space, /*!< in: space id */
ulint offset) /*!< in: page number */
{
- const buf_page_t* bpage;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
- //buf_pool_mutex_enter(buf_pool);
- rw_lock_s_lock(&buf_pool->page_hash_latch);
-
- bpage = buf_page_hash_get(buf_pool, space, offset);
-
- //buf_pool_mutex_exit(buf_pool);
- rw_lock_s_unlock(&buf_pool->page_hash_latch);
-
- return(bpage != NULL);
+ return(buf_page_hash_get(buf_pool, space, offset) != NULL);
}
/********************************************************************//**
@@ -1248,7 +1323,7 @@ buf_page_release_zip(
bpage->buf_fix_count--;
mutex_exit(&block->mutex);
return;
- case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
@@ -1256,7 +1331,6 @@ buf_page_release_zip(
break;
}
-
ut_error;
}
@@ -1308,73 +1382,6 @@ buf_block_dbg_add_level(
sync_thread_add_level(&block->lock, level, FALSE);
}
#endif /* UNIV_SYNC_DEBUG */
-/********************************************************************//**
-Acquire mutex on all buffer pool instances. */
-UNIV_INLINE
-void
-buf_pool_mutex_enter_all(void)
-/*==========================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- buf_pool_mutex_enter(buf_pool);
- }
-}
-
-/********************************************************************//**
-Release mutex on all buffer pool instances. */
-UNIV_INLINE
-void
-buf_pool_mutex_exit_all(void)
-/*=========================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- buf_pool_mutex_exit(buf_pool);
- }
-}
-
-/********************************************************************//**
-*/
-UNIV_INLINE
-void
-buf_pool_page_hash_x_lock_all(void)
-/*===============================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- rw_lock_x_lock(&buf_pool->page_hash_latch);
- }
-}
-
-/********************************************************************//**
-*/
-UNIV_INLINE
-void
-buf_pool_page_hash_x_unlock_all(void)
-/*=================================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- rw_lock_x_unlock(&buf_pool->page_hash_latch);
- }
-}
/*********************************************************************//**
Get the nth chunk's buffer block in the specified buffer pool.
@return the nth chunk's buffer block. */
@@ -1392,4 +1399,26 @@ buf_get_nth_chunk_block(
*chunk_size = chunk->size;
return(chunk->blocks);
}
+
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Checks if buf_pool->zip_mutex is owned and is serving for a given page as its
+block mutex.
+@return true if buf_pool->zip_mutex is owned. */
+UNIV_INLINE
+bool
+buf_own_zip_mutex_for_page(
+/*=======================*/
+ const buf_page_t* bpage)
+{
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE
+ || buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
+ ut_ad(buf_page_get_mutex(bpage) == &buf_pool->zip_mutex);
+
+ return(mutex_own(&buf_pool->zip_mutex));
+}
+#endif /* UNIV_DEBUG */
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/buf0checksum.h b/storage/xtradb/include/buf0checksum.h
new file mode 100644
index 00000000000..cd21781dc6e
--- /dev/null
+++ b/storage/xtradb/include/buf0checksum.h
@@ -0,0 +1,88 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0checksum.h
+Buffer pool checksum functions, also linked from /extra/innochecksum.cc
+
+Created Aug 11, 2011 Vasil Dimov
+*******************************************************/
+
+#ifndef buf0checksum_h
+#define buf0checksum_h
+
+#include "univ.i"
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "buf0types.h"
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/********************************************************************//**
+Calculates a page CRC32 which is stored to the page when it is written
+to a file. Note that we must be careful to calculate the same value on
+32-bit and 64-bit architectures.
+@return checksum */
+UNIV_INTERN
+ib_uint32_t
+buf_calc_page_crc32(
+/*================*/
+ const byte* page); /*!< in: buffer page */
+
+/********************************************************************//**
+Calculates a page checksum which is stored to the page when it is written
+to a file. Note that we must be careful to calculate the same value on
+32-bit and 64-bit architectures.
+@return checksum */
+UNIV_INTERN
+ulint
+buf_calc_page_new_checksum(
+/*=======================*/
+ const byte* page); /*!< in: buffer page */
+
+/********************************************************************//**
+In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+looked at the first few bytes of the page. This calculates that old
+checksum.
+NOTE: we must first store the new formula checksum to
+FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
+because this takes that field as an input!
+@return checksum */
+UNIV_INTERN
+ulint
+buf_calc_page_old_checksum(
+/*=======================*/
+ const byte* page); /*!< in: buffer page */
+
+#ifndef UNIV_INNOCHECKSUM
+
+/********************************************************************//**
+Return a printable string describing the checksum algorithm.
+@return algorithm name */
+UNIV_INTERN
+const char*
+buf_checksum_algorithm_name(
+/*========================*/
+ srv_checksum_algorithm_t algo); /*!< in: algorithm */
+
+extern ulong srv_checksum_algorithm;
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+#endif /* buf0checksum_h */
diff --git a/storage/xtradb/include/buf0dblwr.h b/storage/xtradb/include/buf0dblwr.h
new file mode 100644
index 00000000000..1b9336f4002
--- /dev/null
+++ b/storage/xtradb/include/buf0dblwr.h
@@ -0,0 +1,153 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0dblwr.h
+Doublewrite buffer module
+
+Created 2011/12/19 Inaam Rana
+*******************************************************/
+
+#ifndef buf0dblwr_h
+#define buf0dblwr_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "log0log.h"
+
+#ifndef UNIV_HOTBACKUP
+
+/** Doublewrite system */
+extern buf_dblwr_t* buf_dblwr;
+/** Set to TRUE when the doublewrite buffer is being created */
+extern ibool buf_dblwr_being_created;
+
+/****************************************************************//**
+Creates the doublewrite buffer to a new InnoDB installation. The header of the
+doublewrite buffer is placed on the trx system header page. */
+UNIV_INTERN
+void
+buf_dblwr_create(void);
+/*==================*/
+/****************************************************************//**
+At a database startup initializes the doublewrite buffer memory structure if
+we already have a doublewrite buffer created in the data files. If we are
+upgrading to an InnoDB version which supports multiple tablespaces, then this
+function performs the necessary update operations. If we are in a crash
+recovery, this function uses a possible doublewrite buffer to restore
+half-written pages in the data files. */
+UNIV_INTERN
+void
+buf_dblwr_init_or_restore_pages(
+/*============================*/
+ ibool restore_corrupt_pages); /*!< in: TRUE=restore pages */
+/****************************************************************//**
+frees doublewrite buffer. */
+UNIV_INTERN
+void
+buf_dblwr_free(void);
+/*================*/
+/********************************************************************//**
+Updates the doublewrite buffer when an IO request is completed. */
+UNIV_INTERN
+void
+buf_dblwr_update(
+/*=============*/
+ const buf_page_t* bpage, /*!< in: buffer block descriptor */
+ buf_flush_t flush_type);/*!< in: flush type */
+/****************************************************************//**
+Determines if a page number is located inside the doublewrite buffer.
+@return TRUE if the location is inside the two blocks of the
+doublewrite buffer */
+UNIV_INTERN
+ibool
+buf_dblwr_page_inside(
+/*==================*/
+ ulint page_no); /*!< in: page number */
+/********************************************************************//**
+Posts a buffer page for writing. If the doublewrite memory buffer is
+full, calls buf_dblwr_flush_buffered_writes and waits for for free
+space to appear. */
+UNIV_INTERN
+void
+buf_dblwr_add_to_batch(
+/*====================*/
+ buf_page_t* bpage); /*!< in: buffer block to write */
+/********************************************************************//**
+Flushes possible buffered writes from the doublewrite memory buffer to disk,
+and also wakes up the aio thread if simulated aio is used. It is very
+important to call this function after a batch of writes has been posted,
+and also when we may have to wait for a page latch! Otherwise a deadlock
+of threads can occur. */
+UNIV_INTERN
+void
+buf_dblwr_flush_buffered_writes(void);
+/*=================================*/
+/********************************************************************//**
+Writes a page to the doublewrite buffer on disk, sync it, then write
+the page to the datafile and sync the datafile. This function is used
+for single page flushes. If all the buffers allocated for single page
+flushes in the doublewrite buffer are in use we wait here for one to
+become free. We are guaranteed that a slot will become free because any
+thread that is using a slot must also release the slot before leaving
+this function. */
+UNIV_INTERN
+void
+buf_dblwr_write_single_page(
+/*========================*/
+ buf_page_t* bpage, /*!< in: buffer block to write */
+ bool sync); /*!< in: true if sync IO requested */
+
+/** Doublewrite control struct */
+struct buf_dblwr_t{
+ ib_mutex_t mutex; /*!< mutex protecting the first_free
+ field and write_buf */
+ ulint block1; /*!< the page number of the first
+ doublewrite block (64 pages) */
+ ulint block2; /*!< page number of the second block */
+ ulint first_free;/*!< first free position in write_buf
+ measured in units of UNIV_PAGE_SIZE */
+ ulint b_reserved;/*!< number of slots currently reserved
+ for batch flush. */
+ os_event_t b_event;/*!< event where threads wait for a
+ batch flush to end. */
+ ulint s_reserved;/*!< number of slots currently
+ reserved for single page flushes. */
+ os_event_t s_event;/*!< event where threads wait for a
+ single page flush slot. */
+ bool* in_use; /*!< flag used to indicate if a slot is
+ in use. Only used for single page
+ flushes. */
+ bool batch_running;/*!< set to TRUE if currently a batch
+ is being written from the doublewrite
+ buffer. */
+ byte* write_buf;/*!< write buffer used in writing to the
+ doublewrite buffer, aligned to an
+ address divisible by UNIV_PAGE_SIZE
+ (which is required by Windows aio) */
+ byte* write_buf_unaligned;/*!< pointer to write_buf,
+ but unaligned */
+ buf_page_t** buf_block_arr;/*!< array to store pointers to
+ the buffer blocks which have been
+ cached to write_buf */
+};
+
+
+#endif /* UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/xtradb/include/buf0dump.h b/storage/xtradb/include/buf0dump.h
new file mode 100644
index 00000000000..c704a8e97e0
--- /dev/null
+++ b/storage/xtradb/include/buf0dump.h
@@ -0,0 +1,72 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0dump.h
+Implements a buffer pool dump/load.
+
+Created April 08, 2011 Vasil Dimov
+*******************************************************/
+
+#ifndef buf0dump_h
+#define buf0dump_h
+
+#include "univ.i"
+
+/*****************************************************************//**
+Wakes up the buffer pool dump/load thread and instructs it to start
+a dump. This function is called by MySQL code via buffer_pool_dump_now()
+and it should return immediately because the whole MySQL is frozen during
+its execution. */
+UNIV_INTERN
+void
+buf_dump_start();
+/*============*/
+
+/*****************************************************************//**
+Wakes up the buffer pool dump/load thread and instructs it to start
+a load. This function is called by MySQL code via buffer_pool_load_now()
+and it should return immediately because the whole MySQL is frozen during
+its execution. */
+UNIV_INTERN
+void
+buf_load_start();
+/*============*/
+
+/*****************************************************************//**
+Aborts a currently running buffer pool load. This function is called by
+MySQL code via buffer_pool_load_abort() and it should return immediately
+because the whole MySQL is frozen during its execution. */
+UNIV_INTERN
+void
+buf_load_abort();
+/*============*/
+
+/*****************************************************************//**
+This is the main thread for buffer pool dump/load. It waits for an
+event and when waked up either performs a dump or load and sleeps
+again.
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(buf_dump_thread)(
+/*============================*/
+ void* arg); /*!< in: a dummy parameter
+ required by os_thread_create */
+
+#endif /* buf0dump_h */
diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h
index 81085ab9552..f4542e7c206 100644
--- a/storage/xtradb/include/buf0flu.h
+++ b/storage/xtradb/include/buf0flu.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -28,13 +28,16 @@ Created 11/5/1995 Heikki Tuuri
#include "univ.i"
#include "ut0byte.h"
+#include "log0log.h"
#ifndef UNIV_HOTBACKUP
#include "mtr0types.h"
#include "buf0types.h"
-#include "log0log.h"
+
+/** Flag indicating if the page_cleaner is in active state. */
+extern ibool buf_page_cleaner_is_active;
/********************************************************************//**
-Remove a block from the flush list of modified blocks. */
+Remove a block from the flush list of modified blocks. */
UNIV_INTERN
void
buf_flush_remove(
@@ -57,23 +60,6 @@ void
buf_flush_write_complete(
/*=====================*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
-/*********************************************************************//**
-Flushes pages from the end of the LRU list if there is too small
-a margin of replaceable pages there. If buffer pool is NULL it
-means flush free margin on all buffer pool instances. */
-UNIV_INTERN
-void
-buf_flush_free_margin(
-/*==================*/
- buf_pool_t* buf_pool,
- ibool wait);
-/*********************************************************************//**
-Flushes pages from the end of all the LRU lists. */
-UNIV_INTERN
-void
-buf_flush_free_margins(
-/*=========================*/
- ibool wait);
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Initializes a page for writing to the tablespace. */
@@ -81,17 +67,17 @@ UNIV_INTERN
void
buf_flush_init_for_writing(
/*=======================*/
- byte* page, /*!< in/out: page */
- void* page_zip_, /*!< in/out: compressed page, or NULL */
- ib_uint64_t newest_lsn); /*!< in: newest modification lsn
- to the page */
+ byte* page, /*!< in/out: page */
+ void* page_zip_, /*!< in/out: compressed page, or NULL */
+ lsn_t newest_lsn); /*!< in: newest modification lsn
+ to the page */
#ifndef UNIV_HOTBACKUP
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/********************************************************************//**
Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: buf_pool->mutex and block->mutex must be held upon entering this
-function, and they will be released by this function after flushing.
-This is loosely based on buf_flush_batch() and buf_flush_page().
+NOTE: block->mutex must be held upon entering this function, and they will be
+released by this function after flushing. This is loosely based on
+buf_flush_batch() and buf_flush_page().
@return TRUE if the page was flushed and the mutexes released */
UNIV_INTERN
ibool
@@ -102,38 +88,40 @@ buf_flush_page_try(
__attribute__((nonnull, warn_unused_result));
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list.
-NOTE: The calling thread may own latches to pages: to avoid deadlocks,
-this function must be written so that it cannot end up waiting for these
-latches!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
-UNIV_INTERN
-ulint
-buf_flush_LRU(
-/*==========*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint min_n); /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush_list of
+This utility flushes dirty blocks from the end of the flush list of
all buffer pool instances.
NOTE: The calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return true if a batch was queued successfully for each buffer pool
+instance. false if another batch of same type was already running in
+at least one of the buffer pool instance */
UNIV_INTERN
-ulint
+bool
buf_flush_list(
-/*============*/
+/*===========*/
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
- ib_uint64_t lsn_limit); /*!< in the case BUF_FLUSH_LIST all
+ lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
+ ulint* n_processed); /*!< out: the number of pages
+ which were processed is passed
+ back to caller. Ignored if NULL */
+/******************************************************************//**
+This function picks up a single dirty page from the tail of the LRU
+list, flushes it, removes it from page_hash and LRU list and puts
+it on the free list. It is called from user threads when they are
+unable to find a replacable page at the tail of the LRU list i.e.:
+when the background LRU flushing in the page_cleaner thread is not
+fast enough to keep pace with the workload.
+@return TRUE if success. */
+UNIV_INTERN
+ibool
+buf_flush_single_page_from_LRU(
+/*===========================*/
+ buf_pool_t* buf_pool); /*!< in/out: buffer pool instance */
/******************************************************************//**
Waits until a flush batch of the given type ends */
UNIV_INTERN
@@ -141,7 +129,7 @@ void
buf_flush_wait_batch_end(
/*=====================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- enum buf_flush type); /*!< in: BUF_FLUSH_LRU
+ buf_flush_t type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/******************************************************************//**
Waits until a flush batch of the given type ends. This is called by
@@ -152,7 +140,7 @@ void
buf_flush_wait_batch_end_wait_only(
/*===============================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- enum buf_flush type); /*!< in: BUF_FLUSH_LRU
+ buf_flush_t type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
@@ -171,9 +159,9 @@ void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
- ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a
+ lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
- ib_uint64_t end_lsn); /*!< in: end lsn of the last mtr in the
+ lsn_t end_lsn); /*!< in: end lsn of the last mtr in the
set of mtr's */
/********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement,
@@ -185,43 +173,33 @@ buf_flush_ready_for_replace(
/*========================*/
buf_page_t* bpage); /*!< in: buffer control block, must be
buf_page_in_file(bpage) and in the LRU list */
-
-/** @brief Statistics for selecting flush rate based on redo log
-generation speed.
-
-These statistics are generated for heuristics used in estimating the
-rate at which we should flush the dirty blocks to avoid bursty IO
-activity. Note that the rate of flushing not only depends on how many
-dirty pages we have in the buffer pool but it is also a fucntion of
-how much redo the workload is generating and at what rate. */
-
-struct buf_flush_stat_struct
-{
- ib_uint64_t redo; /**< amount of redo generated. */
- ulint n_flushed; /**< number of pages flushed. */
-};
-
-/** Statistics for selecting flush rate of dirty pages. */
-typedef struct buf_flush_stat_struct buf_flush_stat_t;
-/*********************************************************************
-Update the historical stats that we are collecting for flush rate
-heuristics at the end of each interval. */
-UNIV_INTERN
-void
-buf_flush_stat_update(void);
-/*=======================*/
-/*********************************************************************
-Determines the fraction of dirty pages that need to be flushed based
-on the speed at which we generate redo log. Note that if redo log
-is generated at significant rate without a corresponding increase
-in the number of dirty pages (for example, an in-memory workload)
-it can cause IO bursts of flushing. This function implements heuristics
-to avoid this burstiness.
-@return number of dirty pages to be flushed / second */
+/******************************************************************//**
+page_cleaner thread tasked with flushing dirty pages from the buffer
+pools. As of now we'll have only one instance of this thread.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(buf_flush_page_cleaner_thread)(
+/*==========================================*/
+ void* arg); /*!< in: a dummy parameter required by
+ os_thread_create */
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
UNIV_INTERN
ulint
-buf_flush_get_desired_flush_rate(void);
-/*==================================*/
+buf_flush_LRU_tail(void);
+/*====================*/
+/*********************************************************************//**
+Wait for any possible LRU flushes that are in progress to end. */
+UNIV_INTERN
+void
+buf_flush_wait_LRU_batch_end(void);
+/*==============================*/
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/******************************************************************//**
@@ -250,16 +228,66 @@ void
buf_flush_free_flush_rbt(void);
/*==========================*/
-/** When buf_flush_free_margin is called, it tries to make this many blocks
-available to replacement in the free list and at the end of the LRU list (to
-make sure that a read-ahead batch can be read efficiently in a single
-sweep). */
-#define BUF_FLUSH_FREE_BLOCK_MARGIN(b) (5 + BUF_READ_AHEAD_AREA(b))
-/** Extra margin to apply above BUF_FLUSH_FREE_BLOCK_MARGIN */
-#define BUF_FLUSH_EXTRA_MARGIN(b) ((BUF_FLUSH_FREE_BLOCK_MARGIN(b) / 4 \
- + 100) / srv_buf_pool_instances)
+/********************************************************************//**
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: in simulated aio we must call
+os_aio_simulated_wake_handler_threads after we have posted a batch of
+writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this
+function, and they will be released by this function. */
+UNIV_INTERN
+void
+buf_flush_page(
+/*===========*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_page_t* bpage, /*!< in: buffer control block */
+ buf_flush_t flush_type, /*!< in: type of flush */
+ bool sync) /*!< in: true if sync IO request */
+ __attribute__((nonnull));
+/********************************************************************//**
+Returns true if the block is modified and ready for flushing.
+@return true if can flush immediately */
+UNIV_INTERN
+bool
+buf_flush_ready_for_flush(
+/*======================*/
+ buf_page_t* bpage, /*!< in: buffer control block, must be
+ buf_page_in_file(bpage) */
+ buf_flush_t flush_type)/*!< in: type of flush */
+ __attribute__((warn_unused_result));
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush
+list in a particular buffer pool.
+@return number of dirty pages present in a single buffer pool */
+UNIV_INTERN
+ulint
+buf_pool_get_dirty_pages_count(
+/*===========================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool */
+ ulint id); /*!< in: space id to check */
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush list.
+@return count of dirty pages present in all the buffer pools */
+UNIV_INTERN
+ulint
+buf_flush_get_dirty_pages_count(
+/*============================*/
+ ulint id); /*!< in: space id to check */
+#endif /* UNIV_DEBUG */
+
#endif /* !UNIV_HOTBACKUP */
+/******************************************************************//**
+Check if a flush list flush is in progress for any buffer pool instance for
+heuristic purposes.
+@return true if flush list flush is in progress */
+UNIV_INLINE
+bool
+buf_flush_flush_list_in_progress(void)
+/*==================================*/
+ __attribute__((warn_unused_result));
+
#ifndef UNIV_NONINL
#include "buf0flu.ic"
#endif
diff --git a/storage/xtradb/include/buf0flu.ic b/storage/xtradb/include/buf0flu.ic
index c8d95d1849c..b1e64def462 100644
--- a/storage/xtradb/include/buf0flu.ic
+++ b/storage/xtradb/include/buf0flu.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,6 +26,7 @@ Created 11/5/1995 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "buf0buf.h"
#include "mtr0mtr.h"
+#include "srv0srv.h"
/********************************************************************//**
Inserts a modified block into the flush list. */
@@ -35,7 +36,7 @@ buf_flush_insert_into_flush_list(
/*=============================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
- ib_uint64_t lsn); /*!< in: oldest modification */
+ lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
@@ -46,7 +47,7 @@ buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
- ib_uint64_t lsn); /*!< in: oldest modification */
+ lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
@@ -61,14 +62,13 @@ buf_flush_note_modification(
{
buf_pool_t* buf_pool = buf_pool_from_block(block);
- ut_ad(block);
+ ut_ad(!srv_read_only_mode);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
@@ -81,6 +81,8 @@ buf_flush_note_modification(
block->page.newest_modification = mtr->end_lsn;
if (!block->page.oldest_modification) {
+ ut_a(mtr->made_dirty);
+ ut_ad(log_flush_order_mutex_own());
buf_flush_insert_into_flush_list(
buf_pool, block, mtr->start_lsn);
} else {
@@ -89,7 +91,7 @@ buf_flush_note_modification(
mutex_exit(&block->mutex);
- ++srv_buf_pool_write_requests;
+ srv_stats.buf_pool_write_requests.inc();
}
/********************************************************************//**
@@ -99,21 +101,20 @@ void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
- ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a
+ lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
- ib_uint64_t end_lsn) /*!< in: end lsn of the last mtr in the
+ lsn_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
buf_pool_t* buf_pool = buf_pool_from_block(block);
- ut_ad(block);
+ ut_ad(!srv_read_only_mode);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
ut_ad(log_flush_order_mutex_own());
@@ -134,3 +135,24 @@ buf_flush_recv_note_modification(
}
#endif /* !UNIV_HOTBACKUP */
+
+/******************************************************************//**
+Check if a flush list flush is in progress for any buffer pool instance for
+heuristic purposes.
+@return true if flush list flush is in progress */
+UNIV_INLINE
+bool
+buf_flush_flush_list_in_progress(void)
+/*==================================*/
+{
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+
+ const buf_pool_t* buf_pool = buf_pool_from_array(i);
+ if (buf_pool->init_flush[BUF_FLUSH_LIST]
+ || buf_pool->n_flush[BUF_FLUSH_LIST]) {
+
+ return(true);
+ }
+ }
+ return(false);
+}
diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h
index 4b415214fa5..6415540178c 100644
--- a/storage/xtradb/include/buf0lru.h
+++ b/storage/xtradb/include/buf0lru.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,22 +27,13 @@ Created 11/5/1995 Heikki Tuuri
#define buf0lru_h
#include "univ.i"
+#ifndef UNIV_HOTBACKUP
#include "ut0byte.h"
#include "buf0types.h"
-/******************************************************************//**
-Tries to remove LRU flushed blocks from the end of the LRU list and put them
-to the free list. This is beneficial for the efficiency of the insert buffer
-operation, as flushed pages from non-unique non-clustered indexes are here
-taken out of the buffer pool, and their inserts redirected to the insert
-buffer. Otherwise, the flushed blocks could get modified again before read
-operations need new buffer blocks, and the i/o work done in flushing would be
-wasted. */
-UNIV_INTERN
-void
-buf_LRU_try_free_flushed_blocks(
-/*============================*/
- buf_pool_t* buf_pool); /*!< in: buffer pool instance */
+// Forward declaration
+struct trx_t;
+
/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
@@ -60,18 +51,19 @@ These are low-level functions
/** Minimum LRU list length for which the LRU_old pointer is defined */
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
-/** Maximum LRU list search length in buf_flush_LRU_recommendation() */
-#define BUF_LRU_FREE_SEARCH_LEN(b) (5 + 2 * BUF_READ_AHEAD_AREA(b))
-
/******************************************************************//**
-Removes all pages belonging to a given tablespace. */
+Flushes all dirty pages or removes all pages belonging
+to a given tablespace. A PROBLEM: if readahead is being started, what
+guarantees that it will not try to read in pages after this operation
+has completed? */
UNIV_INTERN
void
buf_LRU_flush_or_remove_pages(
/*==========================*/
- ulint id, /*!< in: space id */
- enum buf_remove_t buf_remove);/*!< in: remove or flush
- strategy */
+ ulint id, /*!< in: space id */
+ buf_remove_t buf_remove, /*!< in: remove or flush strategy */
+ const trx_t* trx); /*!< to check if the operation must
+ be interrupted */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**
@@ -87,40 +79,35 @@ buf_LRU_insert_zip_clean(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
-NOTE: This will temporarily release buf_pool_mutex. Furthermore, the
-page frame will no longer be accessible via bpage.
+NOTE: If this function returns true, it will release the LRU list mutex,
+and temporarily release and relock the buf_page_get_mutex() mutex.
+Furthermore, the page frame will no longer be accessible via bpage. If this
+function returns false, the buf_page_get_mutex() might be temporarily released
+and relocked too.
+
+The caller must hold the LRU list and buf_page_get_mutex() mutexes.
-The caller must hold buf_page_get_mutex(bpage) and release this mutex
-after the call. No other buf_page_get_mutex() may be held when
-calling this function.
-@return TRUE if freed, FALSE otherwise. */
+@return true if freed, false otherwise. */
UNIV_INTERN
-ibool
-buf_LRU_free_block(
-/*===============*/
+bool
+buf_LRU_free_page(
+/*==============*/
buf_page_t* bpage, /*!< in: block to be freed */
- ibool zip, /*!< in: TRUE if should remove also the
+ bool zip) /*!< in: true if should remove also the
compressed page of an uncompressed page */
- ibool* have_LRU_mutex)
__attribute__((nonnull));
/******************************************************************//**
Try to free a replaceable block.
@return TRUE if found and freed */
UNIV_INTERN
ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
+buf_LRU_scan_and_free_block(
+/*========================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint n_iterations); /*!< in: how many times this has
- been called repeatedly without
- result: a high value means that
- we should search farther; if
- n_iterations < 10, then we search
- n_iterations / 10 * buf_pool->curr_size
- pages from the end of the LRU list; if
- n_iterations < 5, then we will
- also search n_iterations / 5
- of the unzip_LRU list. */
+ ibool scan_all) /*!< in: scan whole LRU list
+ if TRUE, otherwise scan only
+ 'old' blocks. */
+ __attribute__((nonnull,warn_unused_result));
/******************************************************************//**
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, returns NULL.
@@ -134,6 +121,27 @@ buf_LRU_get_free_only(
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, blocks are moved from the end of the
LRU list to the free list.
+This function is called from a user thread when it needs a clean
+block to read in a page. Note that we only ever get a block from
+the free list. Even when we flush a page or find a page in LRU scan
+we put it to free list to be used.
+* iteration 0:
+ * get a block from free list, success:done
+ * if there is an LRU flush batch in progress:
+ * wait for batch to end: retry free list
+ * if buf_pool->try_LRU_scan is set
+ * scan LRU up to srv_LRU_scan_depth to find a clean block
+ * the above will put the block on free list
+ * success:retry the free list
+ * flush one dirty page from tail of LRU to disk
+ * the above will put the block on free list
+ * success: retry the free list
+* iteration 1:
+ * same as iteration 0 except:
+ * scan whole LRU list
+ * scan LRU list even if buf_pool->try_LRU_scan is not set
+* iteration > 1:
+ * same as iteration 1 but sleep 100ms
@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
UNIV_INTERN
buf_block_t*
@@ -141,15 +149,22 @@ buf_LRU_get_free_block(
/*===================*/
buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
__attribute__((nonnull,warn_unused_result));
-
+/******************************************************************//**
+Determines if the unzip_LRU list should be used for evicting a victim
+instead of the general LRU list.
+@return TRUE if should use unzip_LRU */
+UNIV_INTERN
+ibool
+buf_LRU_evict_from_unzip_LRU(
+/*=========================*/
+ buf_pool_t* buf_pool);
/******************************************************************//**
Puts a block back to the free list. */
UNIV_INTERN
void
buf_LRU_block_free_non_file_page(
/*=============================*/
- buf_block_t* block, /*!< in: block, must not contain a file page */
- ibool have_page_hash_mutex);
+ buf_block_t* block); /*!< in: block, must not contain a file page */
/******************************************************************//**
Adds a block to the LRU list. Please make sure that the zip_size is
already set into the page zip when invoking the function, so that we
@@ -206,18 +221,6 @@ UNIV_INTERN
void
buf_LRU_stat_update(void);
/*=====================*/
-/********************************************************************//**
-Dump the LRU page list to the specific file. */
-UNIV_INTERN
-ibool
-buf_LRU_file_dump(void);
-/*===================*/
-/********************************************************************//**
-Read the pages based on the specific file.*/
-UNIV_INTERN
-ibool
-buf_LRU_file_restore(void);
-/*======================*/
/******************************************************************//**
Remove one page from LRU list and put it to free list */
@@ -279,21 +282,18 @@ extern uint buf_LRU_old_threshold_ms;
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics we decide
if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
-struct buf_LRU_stat_struct
+struct buf_LRU_stat_t
{
ulint io; /**< Counter of buffer pool I/O operations. */
ulint unzip; /**< Counter of page_zip_decompress operations. */
};
-/** Statistics for selecting the LRU list for eviction. */
-typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
-
/** Current operation counters. Not protected by any mutex.
Cleared by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_cur;
/** Running sum of past values of buf_LRU_stat_cur.
-Updated by buf_LRU_stat_update(). Protected by buf_pool->mutex. */
+Updated by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_sum;
/********************************************************************//**
@@ -307,4 +307,6 @@ Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
#include "buf0lru.ic"
#endif
+#endif /* !UNIV_HOTBACKUP */
+
#endif
diff --git a/storage/xtradb/include/buf0lru.ic b/storage/xtradb/include/buf0lru.ic
index d1a89b9fbee..6e0da7a2588 100644
--- a/storage/xtradb/include/buf0lru.ic
+++ b/storage/xtradb/include/buf0lru.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h
index 613b89e9f5c..9adeaa7455a 100644
--- a/storage/xtradb/include/buf0rea.h
+++ b/storage/xtradb/include/buf0rea.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,38 +27,20 @@ Created 11/5/1995 Heikki Tuuri
#define buf0rea_h
#include "univ.i"
-#include "trx0types.h"
#include "buf0types.h"
/********************************************************************//**
-Low-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there, in which case does nothing.
-Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
-flag is cleared and the x-lock released by an i/o-handler thread.
-@return 1 if a read request was queued, 0 if the page already resided
-in buf_pool, or if the page is in the doublewrite buffer blocks in
-which case it is never read into the pool, or if the tablespace does
-not exist or is being dropped
-@return 1 if read request is issued. 0 if it is not */
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread.
+@return TRUE if page has been read in, FALSE in case of failure */
UNIV_INTERN
-ulint
-buf_read_page_low(
-/*==============*/
- ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
- trying to read from a non-existent tablespace, or a
- tablespace which is just now being dropped */
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
- ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
- ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
- at read-ahead functions) */
+ibool
+buf_read_page(
+/*==========*/
ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- ibool unzip, /*!< in: TRUE=request uncompressed page */
- ib_int64_t tablespace_version, /*!< in: if the space memory object has
- this timestamp different from what we are giving here,
- treat the tablespace as dropped; this is a timestamp we
- use to stop dangling page reads from a tablespace
- which we have DISCARDed + IMPORTed back */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
ulint offset, /*!< in: page number */
trx_t* trx);
/********************************************************************//**
@@ -69,12 +51,10 @@ released by the i/o-handler thread.
@return TRUE if page has been read in, FALSE in case of failure */
UNIV_INTERN
ibool
-buf_read_page(
-/*==========*/
+buf_read_page_async(
+/*================*/
ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint offset, /*!< in: page number */
- trx_t* trx);
+ ulint offset);/*!< in: page number */
/********************************************************************//**
Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
@@ -142,7 +122,7 @@ UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
- ibool sync, /*!< in: TRUE if the caller
+ bool sync, /*!< in: true if the caller
wants this function to wait
for the highest address page
to get read in, before this
@@ -184,13 +164,16 @@ buf_read_recv_pages(
/** The size in pages of the area which the read-ahead algorithms read if
invoked */
-#define BUF_READ_AHEAD_AREA(b) 64
+#define BUF_READ_AHEAD_AREA(b) ((b)->read_ahead_area)
/** @name Modes used in read-ahead @{ */
/** read only pages belonging to the insert buffer tree */
#define BUF_READ_IBUF_PAGES_ONLY 131
/** read any page */
#define BUF_READ_ANY_PAGE 132
+/** read any page, but ignore (return an error) if a page does not exist
+instead of crashing like BUF_READ_ANY_PAGE does */
+#define BUF_READ_IGNORE_NONEXISTENT_PAGES 1024
/* @} */
#endif
diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h
index 9a0af8b648b..e19eb04a2ce 100644
--- a/storage/xtradb/include/buf0types.h
+++ b/storage/xtradb/include/buf0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,33 +26,45 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h
#define buf0types_h
-#include "page0types.h"
-
/** Buffer page (uncompressed or compressed) */
-typedef struct buf_page_struct buf_page_t;
+struct buf_page_t;
/** Buffer block for which an uncompressed page exists */
-typedef struct buf_block_struct buf_block_t;
+struct buf_block_t;
/** Buffer pool chunk comprising buf_block_t */
-typedef struct buf_chunk_struct buf_chunk_t;
+struct buf_chunk_t;
/** Buffer pool comprising buf_chunk_t */
-typedef struct buf_pool_struct buf_pool_t;
+struct buf_pool_t;
/** Buffer pool statistics struct */
-typedef struct buf_pool_stat_struct buf_pool_stat_t;
+struct buf_pool_stat_t;
/** Buffer pool buddy statistics struct */
-typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
+struct buf_buddy_stat_t;
+/** Doublewrite memory struct */
+struct buf_dblwr_t;
/** A buffer frame. @see page_t */
typedef byte buf_frame_t;
/** Flags for flush types */
-enum buf_flush {
+enum buf_flush_t {
BUF_FLUSH_LRU = 0, /*!< flush via the LRU list */
- BUF_FLUSH_SINGLE_PAGE, /*!< flush a single page */
BUF_FLUSH_LIST, /*!< flush via the flush list
of dirty blocks */
+ BUF_FLUSH_SINGLE_PAGE, /*!< flush via the LRU list
+ but only a single page */
BUF_FLUSH_N_TYPES /*!< index of last element + 1 */
};
+/** Algorithm to remove the pages for a tablespace from the buffer pool.
+See buf_LRU_flush_or_remove_pages(). */
+enum buf_remove_t {
+ BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer
+ pool, don't write or sync to disk */
+ BUF_REMOVE_FLUSH_NO_WRITE, /*!< Remove only, from the flush list,
+ don't write or sync to disk */
+ BUF_REMOVE_FLUSH_WRITE /*!< Flush dirty pages to disk only
+ don't remove from the buffer pool */
+};
+
/** Flags for io_fix types */
enum buf_io_fix {
BUF_IO_NONE = 0, /**< no pending I/O */
@@ -63,30 +75,79 @@ enum buf_io_fix {
the flush_list */
};
-/** Algorithm to remove the pages for a tablespace from the buffer pool.
-@See buf_LRU_flush_or_remove_pages(). */
-enum buf_remove_t {
- BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer
- pool, don't write or sync to disk */
- BUF_REMOVE_FLUSH_NO_WRITE /*!< Remove only, from the flush list,
- don't write or sync to disk */
+/** Alternatives for srv_checksum_algorithm, which can be changed by
+setting innodb_checksum_algorithm */
+enum srv_checksum_algorithm_t {
+ SRV_CHECKSUM_ALGORITHM_CRC32, /*!< Write crc32, allow crc32,
+ innodb or none when reading */
+ SRV_CHECKSUM_ALGORITHM_STRICT_CRC32, /*!< Write crc32, allow crc32
+ when reading */
+ SRV_CHECKSUM_ALGORITHM_INNODB, /*!< Write innodb, allow crc32,
+ innodb or none when reading */
+ SRV_CHECKSUM_ALGORITHM_STRICT_INNODB, /*!< Write innodb, allow
+ innodb when reading */
+ SRV_CHECKSUM_ALGORITHM_NONE, /*!< Write none, allow crc32,
+ innodb or none when reading */
+ SRV_CHECKSUM_ALGORITHM_STRICT_NONE /*!< Write none, allow none
+ when reading */
+};
+
+/** Alternatives for srv_cleaner_lsn_age_factor, set through
+innodb_cleaner_lsn_age_factor variable */
+enum srv_cleaner_lsn_age_factor_t {
+ SRV_CLEANER_LSN_AGE_FACTOR_LEGACY, /*!< Original Oracle MySQL 5.6
+ formula */
+ SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT
+ /*!< Percona Server 5.6 formula
+ that returns lower values than
+ legacy option for low
+ checkpoint ages, and higher
+ values for high ages. This has
+ the effect of stabilizing the
+ checkpoint age higher. */
+};
+
+/** Alternatives for srv_foreground_preflush, set through
+innodb_foreground_preflush variable */
+enum srv_foreground_preflush_t {
+ SRV_FOREGROUND_PREFLUSH_SYNC_PREFLUSH, /*!< Original Oracle MySQL 5.6
+ behavior of performing a sync
+ flush list flush */
+ SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF /*!< Exponential backoff wait
+ for the page cleaner to flush
+ for us */
+};
+
+/** Alternatives for srv_empty_free_list_algorithm, set through
+innodb_empty_free_list_algorithm variable */
+enum srv_empty_free_list_t {
+ SRV_EMPTY_FREE_LIST_LEGACY, /*!< Original Oracle MySQL 5.6
+ algorithm */
+ SRV_EMPTY_FREE_LIST_BACKOFF /*!< Percona Server 5.6 algorithm that
+ loops in a progressive backoff until a
+ free page is produced by the cleaner
+ thread */
};
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */
-#define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT
+/** Zip shift value for the smallest page size */
+#define BUF_BUDDY_LOW_SHIFT UNIV_ZIP_SIZE_SHIFT_MIN
-#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
+/** Smallest buddy page size */
+#define BUF_BUDDY_LOW (1U << BUF_BUDDY_LOW_SHIFT)
+/** Actual number of buddy sizes based on current page size */
#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
-#define BUF_BUDDY_SIZES_MAX (UNIV_PAGE_SIZE_SHIFT_MAX - BUF_BUDDY_LOW_SHIFT)
- /*!< number of buddy sizes */
+
+/** Maximum number of buddy sizes based on the max page size */
+#define BUF_BUDDY_SIZES_MAX (UNIV_PAGE_SIZE_SHIFT_MAX \
+ - BUF_BUDDY_LOW_SHIFT)
/** twice the maximum block size of the buddy system;
the underlying memory is aligned by this amount:
this must be equal to UNIV_PAGE_SIZE */
-#define BUF_BUDDY_HIGH ((ulint)BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
+#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
/* @} */
-#endif
-
+#endif /* buf0types.h */
diff --git a/storage/xtradb/include/data0data.h b/storage/xtradb/include/data0data.h
index c6e864dafc9..a548c7b89b3 100644
--- a/storage/xtradb/include/data0data.h
+++ b/storage/xtradb/include/data0data.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -35,7 +35,7 @@ Created 5/30/1994 Heikki Tuuri
/** Storage for overflow data in a big record, that is, a clustered
index record which needs external storage of data fields */
-typedef struct big_rec_struct big_rec_t;
+struct big_rec_t;
#ifdef UNIV_DEBUG
/*********************************************************************//**
@@ -45,7 +45,8 @@ UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
- const dfield_t* field); /*!< in: SQL data field */
+ const dfield_t* field) /*!< in: SQL data field */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets pointer to the data in a field.
@return pointer to data */
@@ -53,7 +54,8 @@ UNIV_INLINE
void*
dfield_get_data(
/*============*/
- const dfield_t* field); /*!< in: field */
+ const dfield_t* field) /*!< in: field */
+ __attribute__((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
# define dfield_get_type(field) (&(field)->type)
# define dfield_get_data(field) ((field)->data)
@@ -65,7 +67,8 @@ void
dfield_set_type(
/*============*/
dfield_t* field, /*!< in: SQL data field */
- dtype_t* type); /*!< in: pointer to data type struct */
+ const dtype_t* type) /*!< in: pointer to data type struct */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets length of field data.
@return length of data; UNIV_SQL_NULL if SQL null data */
@@ -73,7 +76,8 @@ UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
- const dfield_t* field); /*!< in: field */
+ const dfield_t* field) /*!< in: field */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets length in a field. */
UNIV_INLINE
@@ -81,7 +85,8 @@ void
dfield_set_len(
/*===========*/
dfield_t* field, /*!< in: field */
- ulint len); /*!< in: length or UNIV_SQL_NULL */
+ ulint len) /*!< in: length or UNIV_SQL_NULL */
+ __attribute__((nonnull));
/*********************************************************************//**
Determines if a field is SQL NULL
@return nonzero if SQL null data */
@@ -89,7 +94,8 @@ UNIV_INLINE
ulint
dfield_is_null(
/*===========*/
- const dfield_t* field); /*!< in: field */
+ const dfield_t* field) /*!< in: field */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Determines if a field is externally stored
@return nonzero if externally stored */
@@ -97,14 +103,16 @@ UNIV_INLINE
ulint
dfield_is_ext(
/*==========*/
- const dfield_t* field); /*!< in: field */
+ const dfield_t* field) /*!< in: field */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets the "external storage" flag */
UNIV_INLINE
void
dfield_set_ext(
/*===========*/
- dfield_t* field); /*!< in/out: field */
+ dfield_t* field) /*!< in/out: field */
+ __attribute__((nonnull));
/*********************************************************************//**
Sets pointer to the data and length in a field. */
UNIV_INLINE
@@ -113,14 +121,16 @@ dfield_set_data(
/*============*/
dfield_t* field, /*!< in: field */
const void* data, /*!< in: data */
- ulint len); /*!< in: length or UNIV_SQL_NULL */
+ ulint len) /*!< in: length or UNIV_SQL_NULL */
+ __attribute__((nonnull(1)));
/*********************************************************************//**
Sets a data field to SQL NULL. */
UNIV_INLINE
void
dfield_set_null(
/*============*/
- dfield_t* field); /*!< in/out: field */
+ dfield_t* field) /*!< in/out: field */
+ __attribute__((nonnull));
/**********************************************************************//**
Writes an SQL null field full of zeros. */
UNIV_INLINE
@@ -128,7 +138,8 @@ void
data_write_sql_null(
/*================*/
byte* data, /*!< in: pointer to a buffer of size len */
- ulint len); /*!< in: SQL null size in bytes */
+ ulint len) /*!< in: SQL null size in bytes */
+ __attribute__((nonnull));
/*********************************************************************//**
Copies the data and len fields. */
UNIV_INLINE
@@ -136,7 +147,8 @@ void
dfield_copy_data(
/*=============*/
dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2);/*!< in: field to copy from */
+ const dfield_t* field2) /*!< in: field to copy from */
+ __attribute__((nonnull));
/*********************************************************************//**
Copies a data field to another. */
UNIV_INLINE
@@ -144,7 +156,8 @@ void
dfield_copy(
/*========*/
dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2);/*!< in: field to copy from */
+ const dfield_t* field2) /*!< in: field to copy from */
+ __attribute__((nonnull));
/*********************************************************************//**
Copies the data pointed to by a data field. */
UNIV_INLINE
@@ -152,7 +165,9 @@ void
dfield_dup(
/*=======*/
dfield_t* field, /*!< in/out: data field */
- mem_heap_t* heap); /*!< in: memory heap where allocated */
+ mem_heap_t* heap) /*!< in: memory heap where allocated */
+ __attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Tests if two data fields are equal.
If len==0, tests the data length and content for equality.
@@ -170,13 +185,15 @@ dfield_datas_are_binary_equal(
/*********************************************************************//**
Tests if dfield data length and content is equal to the given.
@return TRUE if equal */
-UNIV_INTERN
+UNIV_INLINE
ibool
dfield_data_is_binary_equal(
/*========================*/
const dfield_t* field, /*!< in: field */
ulint len, /*!< in: data length or UNIV_SQL_NULL */
- const byte* data); /*!< in: data */
+ const byte* data) /*!< in: data */
+ __attribute__((nonnull, warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Gets number of fields in a data tuple.
@return number of fields */
@@ -184,7 +201,8 @@ UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
/*********************************************************************//**
Gets nth field of a tuple.
@@ -205,7 +223,8 @@ UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets info bits in a data tuple. */
UNIV_INLINE
@@ -213,7 +232,8 @@ void
dtuple_set_info_bits(
/*=================*/
dtuple_t* tuple, /*!< in: tuple */
- ulint info_bits); /*!< in: info bits */
+ ulint info_bits) /*!< in: info bits */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets number of fields used in record comparisons.
@return number of fields used in comparisons in rem0cmp.* */
@@ -221,7 +241,8 @@ UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets number of fields used in record comparisons. */
UNIV_INLINE
@@ -229,8 +250,9 @@ void
dtuple_set_n_fields_cmp(
/*====================*/
dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields_cmp); /*!< in: number of fields used in
+ ulint n_fields_cmp) /*!< in: number of fields used in
comparisons in rem0cmp.* */
+ __attribute__((nonnull));
/* Estimate the number of bytes that are going to be allocated when
creating a new dtuple_t object */
@@ -249,7 +271,8 @@ dtuple_create_from_mem(
/*===================*/
void* buf, /*!< in, out: buffer to use */
ulint buf_size, /*!< in: buffer size */
- ulint n_fields); /*!< in: number of fields */
+ ulint n_fields) /*!< in: number of fields */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Creates a data tuple to a memory heap. The default value for number
@@ -262,19 +285,8 @@ dtuple_create(
mem_heap_t* heap, /*!< in: memory heap where the tuple
is created, DTUPLE_EST_ALLOC(n_fields)
bytes will be allocated from this heap */
- ulint n_fields); /*!< in: number of fields */
-
-/**********************************************************//**
-Wrap data fields in a tuple. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return data tuple */
-UNIV_INLINE
-const dtuple_t*
-dtuple_from_fields(
-/*===============*/
- dtuple_t* tuple, /*!< in: storage for data tuple */
- const dfield_t* fields, /*!< in: fields */
- ulint n_fields); /*!< in: number of fields */
+ ulint n_fields)/*!< in: number of fields */
+ __attribute__((nonnull, malloc));
/*********************************************************************//**
Sets number of fields used in a tuple. Normally this is set in
@@ -284,7 +296,8 @@ void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields); /*!< in: number of fields */
+ ulint n_fields) /*!< in: number of fields */
+ __attribute__((nonnull));
/*********************************************************************//**
Copies a data tuple to another. This is a shallow copy; if a deep copy
is desired, dfield_dup() will have to be invoked on each field.
@@ -294,8 +307,9 @@ dtuple_t*
dtuple_copy(
/*========*/
const dtuple_t* tuple, /*!< in: tuple to copy from */
- mem_heap_t* heap); /*!< in: memory heap
+ mem_heap_t* heap) /*!< in: memory heap
where the tuple is created */
+ __attribute__((nonnull, malloc));
/**********************************************************//**
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted.
@@ -305,7 +319,8 @@ ulint
dtuple_get_data_size(
/*=================*/
const dtuple_t* tuple, /*!< in: typed data tuple */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ __attribute__((nonnull));
/*********************************************************************//**
Computes the number of externally stored fields in a data tuple.
@return number of fields */
@@ -313,7 +328,8 @@ UNIV_INLINE
ulint
dtuple_get_n_ext(
/*=============*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull));
/************************************************************//**
Compare two data tuples, respecting the collation of character fields.
@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
@@ -323,7 +339,8 @@ int
dtuple_coll_cmp(
/*============*/
const dtuple_t* tuple1, /*!< in: tuple 1 */
- const dtuple_t* tuple2);/*!< in: tuple 2 */
+ const dtuple_t* tuple2) /*!< in: tuple 2 */
+ __attribute__((nonnull, warn_unused_result));
/************************************************************//**
Folds a prefix given as the number of fields of a tuple.
@return the folded value */
@@ -336,7 +353,7 @@ dtuple_fold(
ulint n_bytes,/*!< in: number of bytes to fold in an
incomplete last field */
index_id_t tree_id)/*!< in: index tree id */
- __attribute__((pure));
+ __attribute__((nonnull, pure, warn_unused_result));
/*******************************************************************//**
Sets types of fields binary in a tuple. */
UNIV_INLINE
@@ -344,7 +361,8 @@ void
dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /*!< in: data tuple */
- ulint n); /*!< in: number of fields to set */
+ ulint n) /*!< in: number of fields to set */
+ __attribute__((nonnull));
/**********************************************************************//**
Checks if a dtuple contains an SQL null value.
@return TRUE if some field is SQL null */
@@ -352,7 +370,8 @@ UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
- const dtuple_t* tuple); /*!< in: dtuple */
+ const dtuple_t* tuple) /*!< in: dtuple */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Checks that a data field is typed. Asserts an error if not.
@return TRUE if ok */
@@ -360,7 +379,8 @@ UNIV_INTERN
ibool
dfield_check_typed(
/*===============*/
- const dfield_t* field); /*!< in: data field */
+ const dfield_t* field) /*!< in: data field */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Checks that a data tuple is typed. Asserts an error if not.
@return TRUE if ok */
@@ -368,7 +388,8 @@ UNIV_INTERN
ibool
dtuple_check_typed(
/*===============*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Checks that a data tuple is typed.
@return TRUE if ok */
@@ -376,7 +397,8 @@ UNIV_INTERN
ibool
dtuple_check_typed_no_assert(
/*=========================*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
/**********************************************************//**
Validates the consistency of a tuple which must be complete, i.e,
@@ -386,7 +408,8 @@ UNIV_INTERN
ibool
dtuple_validate(
/*============*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/*************************************************************//**
Pretty prints a dfield value according to its data type. */
@@ -394,7 +417,8 @@ UNIV_INTERN
void
dfield_print(
/*=========*/
- const dfield_t* dfield);/*!< in: dfield */
+ const dfield_t* dfield) /*!< in: dfield */
+ __attribute__((nonnull));
/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
@@ -402,7 +426,8 @@ UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
- const dfield_t* dfield); /*!< in: dfield */
+ const dfield_t* dfield) /*!< in: dfield */
+ __attribute__((nonnull));
/**********************************************************//**
The following function prints the contents of a tuple. */
UNIV_INTERN
@@ -410,7 +435,8 @@ void
dtuple_print(
/*=========*/
FILE* f, /*!< in: output stream */
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull));
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
@@ -425,8 +451,9 @@ dtuple_convert_big_rec(
/*===================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in/out: index entry */
- ulint* n_ext); /*!< in/out: number of
+ ulint* n_ext) /*!< in/out: number of
externally stored columns */
+ __attribute__((nonnull, malloc, warn_unused_result));
/**************************************************************//**
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
@@ -437,21 +464,23 @@ dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in: entry whose data was put to vector */
- big_rec_t* vector);/*!< in, own: big rec vector; it is
+ big_rec_t* vector) /*!< in, own: big rec vector; it is
freed in this function */
+ __attribute__((nonnull));
/**************************************************************//**
Frees the memory in a big rec vector. */
UNIV_INLINE
void
dtuple_big_rec_free(
/*================*/
- big_rec_t* vector); /*!< in, own: big rec vector; it is
+ big_rec_t* vector) /*!< in, own: big rec vector; it is
freed in this function */
+ __attribute__((nonnull));
/*######################################################################*/
/** Structure for an SQL data field */
-struct dfield_struct{
+struct dfield_t{
void* data; /*!< pointer to data */
unsigned ext:1; /*!< TRUE=externally stored, FALSE=local */
unsigned len:32; /*!< data length; UNIV_SQL_NULL if SQL null */
@@ -459,7 +488,7 @@ struct dfield_struct{
};
/** Structure for an SQL data tuple of fields (logical record) */
-struct dtuple_struct {
+struct dtuple_t {
ulint info_bits; /*!< info bits of an index record:
the default is 0; this field is used
if an index record is built from
@@ -479,15 +508,13 @@ struct dtuple_struct {
#ifdef UNIV_DEBUG
ulint magic_n; /*!< magic number, used in
debug assertions */
-/** Value of dtuple_struct::magic_n */
+/** Value of dtuple_t::magic_n */
# define DATA_TUPLE_MAGIC_N 65478679
#endif /* UNIV_DEBUG */
};
/** A slot for a field in a big rec vector */
-typedef struct big_rec_field_struct big_rec_field_t;
-/** A slot for a field in a big rec vector */
-struct big_rec_field_struct {
+struct big_rec_field_t {
ulint field_no; /*!< field number in record */
ulint len; /*!< stored data length, in bytes */
const void* data; /*!< stored data */
@@ -495,7 +522,7 @@ struct big_rec_field_struct {
/** Storage format for overflow data in a big record, that is, a
clustered index record which needs external storage of data fields */
-struct big_rec_struct {
+struct big_rec_t {
mem_heap_t* heap; /*!< memory heap from which
allocated */
ulint n_fields; /*!< number of stored fields */
diff --git a/storage/xtradb/include/data0data.ic b/storage/xtradb/include/data0data.ic
index 2059eefaf89..6937d55d211 100644
--- a/storage/xtradb/include/data0data.ic
+++ b/storage/xtradb/include/data0data.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -54,7 +54,7 @@ void
dfield_set_type(
/*============*/
dfield_t* field, /*!< in: SQL data field */
- dtype_t* type) /*!< in: pointer to data type struct */
+ const dtype_t* type) /*!< in: pointer to data type struct */
{
ut_ad(field && type);
@@ -138,7 +138,7 @@ dfield_is_ext(
{
ut_ad(field);
- return(UNIV_UNLIKELY(field->ext));
+ return(field->ext);
}
/*********************************************************************//**
@@ -228,6 +228,7 @@ dfield_dup(
}
}
+#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Tests if two data fields are equal.
If len==0, tests the data length and content for equality.
@@ -258,6 +259,23 @@ dfield_datas_are_binary_equal(
}
/*********************************************************************//**
+Tests if dfield data length and content is equal to the given.
+@return TRUE if equal */
+UNIV_INLINE
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+ const dfield_t* field, /*!< in: field */
+ ulint len, /*!< in: data length or UNIV_SQL_NULL */
+ const byte* data) /*!< in: data */
+{
+ return(len == dfield_get_len(field)
+ && (len == UNIV_SQL_NULL
+ || !memcmp(dfield_get_data(field), data, len)));
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
Gets info bits in a data tuple.
@return info bits */
UNIV_INLINE
@@ -389,6 +407,8 @@ dtuple_create_from_mem(
}
}
#endif
+ UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields);
+ UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
return(tuple);
}
@@ -416,30 +436,6 @@ dtuple_create(
tuple = dtuple_create_from_mem(buf, buf_size, n_fields);
-#ifdef UNIV_DEBUG
- UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
-#endif
-
- return(tuple);
-}
-
-/**********************************************************//**
-Wrap data fields in a tuple. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return data tuple */
-UNIV_INLINE
-const dtuple_t*
-dtuple_from_fields(
-/*===============*/
- dtuple_t* tuple, /*!< in: storage for data tuple */
- const dfield_t* fields, /*!< in: fields */
- ulint n_fields) /*!< in: number of fields */
-{
- tuple->info_bits = 0;
- tuple->n_fields = tuple->n_fields_cmp = n_fields;
- tuple->fields = (dfield_t*) fields;
- ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
-
return(tuple);
}
diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h
index 25d68de6646..111664b0b52 100644
--- a/storage/xtradb/include/data0type.h
+++ b/storage/xtradb/include/data0type.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -33,10 +33,20 @@ extern ulint data_mysql_default_charset_coll;
#define DATA_MYSQL_BINARY_CHARSET_COLL 63
/* SQL data type struct */
-typedef struct dtype_struct dtype_t;
+struct dtype_t;
+
+/* SQL Like operator comparison types */
+enum ib_like_t {
+ IB_LIKE_EXACT, /* e.g. STRING */
+ IB_LIKE_PREFIX, /* e.g., STRING% */
+ IB_LIKE_SUFFIX, /* e.g., %STRING */
+ IB_LIKE_SUBSTR, /* e.g., %STRING% */
+ IB_LIKE_REGEXP /* Future */
+};
/*-------------------------------------------*/
/* The 'MAIN TYPE' of a column */
+#define DATA_MISSING 0 /* missing column */
#define DATA_VARCHAR 1 /* character varying of the
latin1_swedish_ci charset-collation; note
that the MySQL format for this, DATA_BINARY,
@@ -139,6 +149,8 @@ be less than 256 */
#define DATA_N_SYS_COLS 3 /* number of system columns defined above */
+#define DATA_FTS_DOC_ID 3 /* Used as FTS DOC ID column */
+
#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
/* Flags ORed to the precise data type */
@@ -182,6 +194,12 @@ because in GCC it returns a long. */
/* Get mbmaxlen from mbminmaxlen. */
#define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX))
+/* We now support 15 bits (up to 32767) collation number */
+#define MAX_CHAR_COLL_NUM 32767
+
+/* Mask to get the Charset Collation number (0x7fff) */
+#define CHAR_COLL_MASK MAX_CHAR_COLL_NUM
+
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Gets the MySQL type code from a dtype.
@@ -450,6 +468,20 @@ dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /*!< in: type struct */
const byte* buf); /*!< in: buffer for stored type order info */
+
+/*********************************************************************//**
+Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len
+@return the SQL type name */
+UNIV_INLINE
+char*
+dtype_sql_name(
+/*===========*/
+ unsigned mtype, /*!< in: mtype */
+ unsigned prtype, /*!< in: prtype */
+ unsigned len, /*!< in: len */
+ char* name, /*!< out: SQL name */
+ unsigned name_sz);/*!< in: size of the name buffer */
+
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
@@ -476,15 +508,15 @@ dtype_read_for_order_and_null_size()
dtype_new_read_for_order_and_null_size()
sym_tab_add_null_lit() */
-struct dtype_struct{
- unsigned mtype:8; /*!< main data type */
- unsigned prtype:24; /*!< precise type; MySQL data
+struct dtype_t{
+ unsigned prtype:32; /*!< precise type; MySQL data
type, charset code, flags to
indicate nullability,
signedness, whether this is a
binary string, whether this is
a true VARCHAR where MySQL
uses 2 bytes to store the length */
+ unsigned mtype:8; /*!< main data type */
/* the remaining fields do not affect alphabetical ordering: */
diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic
index 410970ac50e..d489bef89a8 100644
--- a/storage/xtradb/include/data0type.ic
+++ b/storage/xtradb/include/data0type.ic
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -23,6 +23,8 @@ Data types
Created 1/16/1996 Heikki Tuuri
*******************************************************/
+#include <string.h> /* strlen() */
+
#include "mach0data.h"
#ifndef UNIV_HOTBACKUP
# include "ha_prototypes.h"
@@ -36,7 +38,7 @@ dtype_get_charset_coll(
/*===================*/
ulint prtype) /*!< in: precise data type */
{
- return((prtype >> 16) & 0xFFUL);
+ return((prtype >> 16) & CHAR_COLL_MASK);
}
/*********************************************************************//**
@@ -259,8 +261,8 @@ dtype_get_pad_char(
switch (mtype) {
case DATA_FIXBINARY:
case DATA_BINARY:
- if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype)
- == DATA_MYSQL_BINARY_CHARSET_COLL)) {
+ if (dtype_get_charset_coll(prtype)
+ == DATA_MYSQL_BINARY_CHARSET_COLL) {
/* Starting from 5.0.18, do not pad
VARBINARY or BINARY columns. */
return(ULINT_UNDEFINED);
@@ -312,11 +314,11 @@ dtype_new_store_for_order_and_null_size(
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {
- buf[0] = buf[0] | 128;
+ buf[0] |= 128;
}
/* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
- buf[0] = buf[0] | 64;
+ buf[0] |= 64;
}
*/
@@ -326,7 +328,7 @@ dtype_new_store_for_order_and_null_size(
mach_write_to_2(buf + 2, len & 0xFFFFUL);
- ut_ad(dtype_get_charset_coll(type->prtype) < 256);
+ ut_ad(dtype_get_charset_coll(type->prtype) <= MAX_CHAR_COLL_NUM);
mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
if (type->prtype & DATA_NOT_NULL) {
@@ -353,7 +355,7 @@ dtype_read_for_order_and_null_size(
type->prtype = buf[1];
if (buf[0] & 128) {
- type->prtype = type->prtype | DATA_BINARY_TYPE;
+ type->prtype |= DATA_BINARY_TYPE;
}
type->len = mach_read_from_2(buf + 2);
@@ -393,10 +395,10 @@ dtype_new_read_for_order_and_null_size(
type->len = mach_read_from_2(buf + 2);
- charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
+ charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
if (dtype_is_string_type(type->mtype)) {
- ut_a(charset_coll < 256);
+ ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
if (charset_coll == 0) {
/* This insert buffer record was inserted with MySQL
@@ -412,6 +414,101 @@ dtype_new_read_for_order_and_null_size(
}
dtype_set_mblen(type);
}
+
+/*********************************************************************//**
+Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len
+@return the SQL type name */
+UNIV_INLINE
+char*
+dtype_sql_name(
+/*===========*/
+ unsigned mtype, /*!< in: mtype */
+ unsigned prtype, /*!< in: prtype */
+ unsigned len, /*!< in: len */
+ char* name, /*!< out: SQL name */
+ unsigned name_sz)/*!< in: size of the name buffer */
+{
+
+#define APPEND_UNSIGNED() \
+ do { \
+ if (prtype & DATA_UNSIGNED) { \
+ ut_snprintf(name + strlen(name), \
+ name_sz - strlen(name), \
+ " UNSIGNED"); \
+ } \
+ } while (0)
+
+ ut_snprintf(name, name_sz, "UNKNOWN");
+
+ switch (mtype) {
+ case DATA_INT:
+ switch (len) {
+ case 1:
+ ut_snprintf(name, name_sz, "TINYINT");
+ break;
+ case 2:
+ ut_snprintf(name, name_sz, "SMALLINT");
+ break;
+ case 3:
+ ut_snprintf(name, name_sz, "MEDIUMINT");
+ break;
+ case 4:
+ ut_snprintf(name, name_sz, "INT");
+ break;
+ case 8:
+ ut_snprintf(name, name_sz, "BIGINT");
+ break;
+ }
+ APPEND_UNSIGNED();
+ break;
+ case DATA_FLOAT:
+ ut_snprintf(name, name_sz, "FLOAT");
+ APPEND_UNSIGNED();
+ break;
+ case DATA_DOUBLE:
+ ut_snprintf(name, name_sz, "DOUBLE");
+ APPEND_UNSIGNED();
+ break;
+ case DATA_FIXBINARY:
+ ut_snprintf(name, name_sz, "BINARY(%u)", len);
+ break;
+ case DATA_CHAR:
+ case DATA_MYSQL:
+ ut_snprintf(name, name_sz, "CHAR(%u)", len);
+ break;
+ case DATA_VARCHAR:
+ case DATA_VARMYSQL:
+ ut_snprintf(name, name_sz, "VARCHAR(%u)", len);
+ break;
+ case DATA_BINARY:
+ ut_snprintf(name, name_sz, "VARBINARY(%u)", len);
+ break;
+ case DATA_BLOB:
+ switch (len) {
+ case 9:
+ ut_snprintf(name, name_sz, "TINYBLOB");
+ break;
+ case 10:
+ ut_snprintf(name, name_sz, "BLOB");
+ break;
+ case 11:
+ ut_snprintf(name, name_sz, "MEDIUMBLOB");
+ break;
+ case 12:
+ ut_snprintf(name, name_sz, "LONGBLOB");
+ break;
+ }
+ }
+
+ if (prtype & DATA_NOT_NULL) {
+ ut_snprintf(name + strlen(name),
+ name_sz - strlen(name),
+ " NOT NULL");
+ }
+
+ return(name);
+}
+
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
diff --git a/storage/xtradb/include/data0types.h b/storage/xtradb/include/data0types.h
index 245aca599c0..bd2bb577611 100644
--- a/storage/xtradb/include/data0types.h
+++ b/storage/xtradb/include/data0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,10 +27,10 @@ Created 9/21/2000 Heikki Tuuri
#define data0types_h
/* SQL data field struct */
-typedef struct dfield_struct dfield_t;
+struct dfield_t;
/* SQL data tuple struct */
-typedef struct dtuple_struct dtuple_t;
+struct dtuple_t;
#endif
diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h
index 4d0e3051fe6..af651c61b66 100644
--- a/storage/xtradb/include/db0err.h
+++ b/storage/xtradb/include/db0err.h
@@ -27,7 +27,7 @@ Created 5/24/1996 Heikki Tuuri
#define db0err_h
-enum db_err {
+enum dberr_t {
DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new
explicit record lock was created */
DB_SUCCESS = 10,
@@ -42,79 +42,91 @@ enum db_err {
DB_ROLLBACK,
DB_DUPLICATE_KEY,
DB_QUE_THR_SUSPENDED,
- DB_MISSING_HISTORY, /* required history data has been
+ DB_MISSING_HISTORY, /*!< required history data has been
deleted due to lack of space in
rollback segment */
DB_CLUSTER_NOT_FOUND = 30,
DB_TABLE_NOT_FOUND,
- DB_MUST_GET_MORE_FILE_SPACE, /* the database has to be stopped
+ DB_MUST_GET_MORE_FILE_SPACE, /*!< the database has to be stopped
and restarted with more file space */
DB_TABLE_IS_BEING_USED,
- DB_TOO_BIG_RECORD, /* a record in an index would not fit
+ DB_TOO_BIG_RECORD, /*!< a record in an index would not fit
on a compressed page, or it would
become bigger than 1/2 free space in
an uncompressed page frame */
- DB_LOCK_WAIT_TIMEOUT, /* lock wait lasted too long */
- DB_NO_REFERENCED_ROW, /* referenced key value not found
+ DB_LOCK_WAIT_TIMEOUT, /*!< lock wait lasted too long */
+ DB_NO_REFERENCED_ROW, /*!< referenced key value not found
for a foreign key in an insert or
update of a row */
- DB_ROW_IS_REFERENCED, /* cannot delete or update a row
+ DB_ROW_IS_REFERENCED, /*!< cannot delete or update a row
because it contains a key value
which is referenced */
- DB_CANNOT_ADD_CONSTRAINT, /* adding a foreign key constraint
+ DB_CANNOT_ADD_CONSTRAINT, /*!< adding a foreign key constraint
to a table failed */
- DB_CORRUPTION, /* data structure corruption noticed */
- DB_CANNOT_DROP_CONSTRAINT, /* dropping a foreign key constraint
+ DB_CORRUPTION, /*!< data structure corruption noticed */
+ DB_CANNOT_DROP_CONSTRAINT, /*!< dropping a foreign key constraint
from a table failed */
- DB_NO_SAVEPOINT, /* no savepoint exists with the given
+ DB_NO_SAVEPOINT, /*!< no savepoint exists with the given
name */
- DB_TABLESPACE_ALREADY_EXISTS, /* we cannot create a new single-table
+ DB_TABLESPACE_EXISTS, /*!< we cannot create a new single-table
tablespace because a file of the same
name already exists */
- DB_TABLESPACE_DELETED, /* tablespace does not exist or is
+ DB_TABLESPACE_DELETED, /*!< tablespace was deleted or is
being dropped right now */
- DB_LOCK_TABLE_FULL, /* lock structs have exhausted the
+ DB_TABLESPACE_NOT_FOUND, /*<! Attempt to delete a tablespace
+ instance that was not found in the
+ tablespace hash table */
+ DB_LOCK_TABLE_FULL, /*!< lock structs have exhausted the
buffer pool (for big transactions,
InnoDB stores the lock structs in the
buffer pool) */
- DB_FOREIGN_DUPLICATE_KEY, /* foreign key constraints
+ DB_FOREIGN_DUPLICATE_KEY, /*!< foreign key constraints
activated by the operation would
lead to a duplicate key in some
table */
- DB_TOO_MANY_CONCURRENT_TRXS, /* when InnoDB runs out of the
+ DB_TOO_MANY_CONCURRENT_TRXS, /*!< when InnoDB runs out of the
preconfigured undo slots, this can
only happen when there are too many
concurrent transactions */
- DB_UNSUPPORTED, /* when InnoDB sees any artefact or
+ DB_UNSUPPORTED, /*!< when InnoDB sees any artefact or
a feature that it can't recoginize or
work with e.g., FT indexes created by
a later version of the engine. */
- DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY
- was found to be NULL */
+ DB_INVALID_NULL, /*!< a NOT NULL column was found to
+ be NULL during table rebuild */
- DB_STATS_DO_NOT_EXIST, /* an operation that requires the
+ DB_STATS_DO_NOT_EXIST, /*!< an operation that requires the
persistent storage, used for recording
table and index statistics, was
requested but this storage does not
exist itself or the stats for a given
table do not exist */
- DB_FOREIGN_EXCEED_MAX_CASCADE, /* Foreign key constraint related
+ DB_FOREIGN_EXCEED_MAX_CASCADE, /*!< Foreign key constraint related
cascading delete/update exceeds
maximum allowed depth */
- DB_CHILD_NO_INDEX, /* the child (foreign) table does not
- have an index that contains the
+ DB_CHILD_NO_INDEX, /*!< the child (foreign) table does
+ not have an index that contains the
foreign keys as its prefix columns */
- DB_PARENT_NO_INDEX, /* the parent table does not
+ DB_PARENT_NO_INDEX, /*!< the parent table does not
have an index that contains the
foreign keys as its prefix columns */
- DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum
- limit */
- DB_INDEX_CORRUPT, /* we have corrupted index */
- DB_UNDO_RECORD_TOO_BIG, /* the undo log record is too big */
+ DB_TOO_BIG_INDEX_COL, /*!< index column size exceeds
+ maximum limit */
+ DB_INDEX_CORRUPT, /*!< we have corrupted index */
+ DB_UNDO_RECORD_TOO_BIG, /*!< the undo log record is too big */
+ DB_READ_ONLY, /*!< Update operation attempted in
+ a read-only transaction */
+ DB_FTS_INVALID_DOCID, /* FTS Doc ID cannot be zero */
DB_TABLE_IN_FK_CHECK, /* table is being used in foreign
key check */
- DB_IDENTIFIER_TOO_LONG, /* Identifier name too long */
+ DB_ONLINE_LOG_TOO_BIG, /*!< Modification log grew too big
+ during online index creation */
+
+ DB_IO_ERROR, /*!< Generic IO error */
+ DB_IDENTIFIER_TOO_LONG, /*!< Identifier name too long */
+ DB_FTS_EXCEED_RESULT_CACHE_LIMIT, /*!< FTS query memory
+ exceeds result cache limit */
/* The following are partial failure codes */
DB_FAIL = 1000,
@@ -124,7 +136,23 @@ enum db_err {
DB_ZIP_OVERFLOW,
DB_RECORD_NOT_FOUND = 1500,
DB_END_OF_INDEX,
- DB_SEARCH_ABORTED_BY_USER= 1533
+ DB_DICT_CHANGED, /*!< Some part of table dictionary has
+ changed. Such as index dropped or
+ foreign key dropped */
+
+ DB_SEARCH_ABORTED_BY_USER= 1533,
+
+ /* The following are API only error codes. */
+ DB_DATA_MISMATCH = 2000, /*!< Column update or read failed
+ because the types mismatch */
+
+ DB_SCHEMA_NOT_LOCKED, /*!< If an API function expects the
+ schema to be locked in exclusive mode
+ and if it's not then that API function
+ will return this error code */
+
+ DB_NOT_FOUND /*!< Generic error code for "Not found"
+ type of errors */
};
#endif
diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h
index 27e87d16750..a994c9d8ff1 100644
--- a/storage/xtradb/include/dict0boot.h
+++ b/storage/xtradb/include/dict0boot.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -58,6 +58,13 @@ dict_hdr_get_new_id(
ulint* space_id); /*!< out: space id
(not assigned if NULL) */
/**********************************************************************//**
+Writes the current value of the row id counter to the dictionary header file
+page. */
+UNIV_INTERN
+void
+dict_hdr_flush_row_id(void);
+/*=======================*/
+/**********************************************************************//**
Returns a new row id.
@return the new id */
UNIV_INLINE
@@ -82,38 +89,32 @@ dict_sys_write_row_id(
row_id_t row_id);/*!< in: row id */
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
+started. This function is also called when the data dictionary is created.
+@return DB_SUCCESS or error code. */
UNIV_INTERN
-void
-dict_boot(void);
+dberr_t
+dict_boot(void)
/*===========*/
-/*****************************************************************//**
-Creates and initializes the data dictionary at the database creation. */
-UNIV_INTERN
-void
-dict_create(void);
-/*=============*/
-
-/*****************************************************************//**
-Verifies the SYS_STATS table by scanning its clustered index. This
-function may only be called at InnoDB startup time.
-
-@return TRUE if SYS_STATS was verified successfully */
-UNIV_INTERN
-ibool
-dict_verify_xtradb_sys_stats(void);
-/*==============================*/
+ __attribute__((warn_unused_result));
/*****************************************************************//**
-Discard the existing dictionary cache SYS_STATS information, create and
-add it there anew. Does not touch the old SYS_STATS tablespace page
-under the assumption that they are corrupted or overwritten for other
-purposes. */
+Creates and initializes the data dictionary at the server bootstrap.
+@return DB_SUCCESS or error code. */
UNIV_INTERN
-void
-dict_recreate_xtradb_sys_stats(void);
-/*================================*/
+dberr_t
+dict_create(void)
+/*=============*/
+ __attribute__((warn_unused_result));
+/*********************************************************************//**
+Check if a table id belongs to system table.
+@return true if the table id belongs to a system table. */
+UNIV_INLINE
+bool
+dict_is_sys_table(
+/*==============*/
+ table_id_t id) /*!< in: table id to check */
+ __attribute__((warn_unused_result));
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
@@ -124,7 +125,6 @@ dict_recreate_xtradb_sys_stats(void);
#define DICT_COLUMNS_ID 2
#define DICT_INDEXES_ID 3
#define DICT_FIELDS_ID 4
-#define DICT_STATS_ID 6
/* The following is a secondary index on SYS_TABLES */
#define DICT_TABLE_IDS_ID 5
@@ -134,7 +134,6 @@ dict_recreate_xtradb_sys_stats(void);
indexes; ibuf tables and indexes are
assigned as the id the number
DICT_IBUF_ID_MIN plus the space id */
-#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL
/* The offset of the dictionary header on the page */
#define DICT_HDR FSEG_PAGE_DATA
@@ -142,44 +141,200 @@ dict_recreate_xtradb_sys_stats(void);
/*-------------------------------------------------------------*/
/* Dictionary header offsets */
#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
-#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
-#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
-#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id, or 0*/
-#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID */
-#define DICT_HDR_TABLES 32 /* Root of the table index tree */
-#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */
-#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */
-#define DICT_HDR_INDEXES 44 /* Root of the index index tree */
-#define DICT_HDR_FIELDS 48 /* Root of the index field
- index tree */
-#define DICT_HDR_STATS 52 /* Root of the stats tree */
+#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
+#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
+#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/
+#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID*/
+#define DICT_HDR_TABLES 32 /* Root of SYS_TABLES clust index */
+#define DICT_HDR_TABLE_IDS 36 /* Root of SYS_TABLE_IDS sec index */
+#define DICT_HDR_COLUMNS 40 /* Root of SYS_COLUMNS clust index */
+#define DICT_HDR_INDEXES 44 /* Root of SYS_INDEXES clust index */
+#define DICT_HDR_FIELDS 48 /* Root of SYS_FIELDS clust index */
#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
segment into which the dictionary
header is created */
-
-#define DICT_HDR_XTRADB_MARK 256 /* Flag to distinguish expansion of XtraDB */
/*-------------------------------------------------------------*/
+/* The columns in SYS_TABLES */
+enum dict_col_sys_tables_enum {
+ DICT_COL__SYS_TABLES__NAME = 0,
+ DICT_COL__SYS_TABLES__ID = 1,
+ DICT_COL__SYS_TABLES__N_COLS = 2,
+ DICT_COL__SYS_TABLES__TYPE = 3,
+ DICT_COL__SYS_TABLES__MIX_ID = 4,
+ DICT_COL__SYS_TABLES__MIX_LEN = 5,
+ DICT_COL__SYS_TABLES__CLUSTER_ID = 6,
+ DICT_COL__SYS_TABLES__SPACE = 7,
+ DICT_NUM_COLS__SYS_TABLES = 8
+};
/* The field numbers in the SYS_TABLES clustered index */
-#define DICT_SYS_TABLES_TYPE_FIELD 5
-
+enum dict_fld_sys_tables_enum {
+ DICT_FLD__SYS_TABLES__NAME = 0,
+ DICT_FLD__SYS_TABLES__DB_TRX_ID = 1,
+ DICT_FLD__SYS_TABLES__DB_ROLL_PTR = 2,
+ DICT_FLD__SYS_TABLES__ID = 3,
+ DICT_FLD__SYS_TABLES__N_COLS = 4,
+ DICT_FLD__SYS_TABLES__TYPE = 5,
+ DICT_FLD__SYS_TABLES__MIX_ID = 6,
+ DICT_FLD__SYS_TABLES__MIX_LEN = 7,
+ DICT_FLD__SYS_TABLES__CLUSTER_ID = 8,
+ DICT_FLD__SYS_TABLES__SPACE = 9,
+ DICT_NUM_FIELDS__SYS_TABLES = 10
+};
+/* The field numbers in the SYS_TABLE_IDS index */
+enum dict_fld_sys_table_ids_enum {
+ DICT_FLD__SYS_TABLE_IDS__ID = 0,
+ DICT_FLD__SYS_TABLE_IDS__NAME = 1,
+ DICT_NUM_FIELDS__SYS_TABLE_IDS = 2
+};
+/* The columns in SYS_COLUMNS */
+enum dict_col_sys_columns_enum {
+ DICT_COL__SYS_COLUMNS__TABLE_ID = 0,
+ DICT_COL__SYS_COLUMNS__POS = 1,
+ DICT_COL__SYS_COLUMNS__NAME = 2,
+ DICT_COL__SYS_COLUMNS__MTYPE = 3,
+ DICT_COL__SYS_COLUMNS__PRTYPE = 4,
+ DICT_COL__SYS_COLUMNS__LEN = 5,
+ DICT_COL__SYS_COLUMNS__PREC = 6,
+ DICT_NUM_COLS__SYS_COLUMNS = 7
+};
+/* The field numbers in the SYS_COLUMNS clustered index */
+enum dict_fld_sys_columns_enum {
+ DICT_FLD__SYS_COLUMNS__TABLE_ID = 0,
+ DICT_FLD__SYS_COLUMNS__POS = 1,
+ DICT_FLD__SYS_COLUMNS__DB_TRX_ID = 2,
+ DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR = 3,
+ DICT_FLD__SYS_COLUMNS__NAME = 4,
+ DICT_FLD__SYS_COLUMNS__MTYPE = 5,
+ DICT_FLD__SYS_COLUMNS__PRTYPE = 6,
+ DICT_FLD__SYS_COLUMNS__LEN = 7,
+ DICT_FLD__SYS_COLUMNS__PREC = 8,
+ DICT_NUM_FIELDS__SYS_COLUMNS = 9
+};
+/* The columns in SYS_INDEXES */
+enum dict_col_sys_indexes_enum {
+ DICT_COL__SYS_INDEXES__TABLE_ID = 0,
+ DICT_COL__SYS_INDEXES__ID = 1,
+ DICT_COL__SYS_INDEXES__NAME = 2,
+ DICT_COL__SYS_INDEXES__N_FIELDS = 3,
+ DICT_COL__SYS_INDEXES__TYPE = 4,
+ DICT_COL__SYS_INDEXES__SPACE = 5,
+ DICT_COL__SYS_INDEXES__PAGE_NO = 6,
+ DICT_NUM_COLS__SYS_INDEXES = 7
+};
/* The field numbers in the SYS_INDEXES clustered index */
-#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
-#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
-#define DICT_SYS_INDEXES_TYPE_FIELD 6
-#define DICT_SYS_INDEXES_NAME_FIELD 4
+enum dict_fld_sys_indexes_enum {
+ DICT_FLD__SYS_INDEXES__TABLE_ID = 0,
+ DICT_FLD__SYS_INDEXES__ID = 1,
+ DICT_FLD__SYS_INDEXES__DB_TRX_ID = 2,
+ DICT_FLD__SYS_INDEXES__DB_ROLL_PTR = 3,
+ DICT_FLD__SYS_INDEXES__NAME = 4,
+ DICT_FLD__SYS_INDEXES__N_FIELDS = 5,
+ DICT_FLD__SYS_INDEXES__TYPE = 6,
+ DICT_FLD__SYS_INDEXES__SPACE = 7,
+ DICT_FLD__SYS_INDEXES__PAGE_NO = 8,
+ DICT_NUM_FIELDS__SYS_INDEXES = 9
+};
+/* The columns in SYS_FIELDS */
+enum dict_col_sys_fields_enum {
+ DICT_COL__SYS_FIELDS__INDEX_ID = 0,
+ DICT_COL__SYS_FIELDS__POS = 1,
+ DICT_COL__SYS_FIELDS__COL_NAME = 2,
+ DICT_NUM_COLS__SYS_FIELDS = 3
+};
+/* The field numbers in the SYS_FIELDS clustered index */
+enum dict_fld_sys_fields_enum {
+ DICT_FLD__SYS_FIELDS__INDEX_ID = 0,
+ DICT_FLD__SYS_FIELDS__POS = 1,
+ DICT_FLD__SYS_FIELDS__DB_TRX_ID = 2,
+ DICT_FLD__SYS_FIELDS__DB_ROLL_PTR = 3,
+ DICT_FLD__SYS_FIELDS__COL_NAME = 4,
+ DICT_NUM_FIELDS__SYS_FIELDS = 5
+};
+/* The columns in SYS_FOREIGN */
+enum dict_col_sys_foreign_enum {
+ DICT_COL__SYS_FOREIGN__ID = 0,
+ DICT_COL__SYS_FOREIGN__FOR_NAME = 1,
+ DICT_COL__SYS_FOREIGN__REF_NAME = 2,
+ DICT_COL__SYS_FOREIGN__N_COLS = 3,
+ DICT_NUM_COLS__SYS_FOREIGN = 4
+};
+/* The field numbers in the SYS_FOREIGN clustered index */
+enum dict_fld_sys_foreign_enum {
+ DICT_FLD__SYS_FOREIGN__ID = 0,
+ DICT_FLD__SYS_FOREIGN__DB_TRX_ID = 1,
+ DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR = 2,
+ DICT_FLD__SYS_FOREIGN__FOR_NAME = 3,
+ DICT_FLD__SYS_FOREIGN__REF_NAME = 4,
+ DICT_FLD__SYS_FOREIGN__N_COLS = 5,
+ DICT_NUM_FIELDS__SYS_FOREIGN = 6
+};
+/* The field numbers in the SYS_FOREIGN_FOR_NAME secondary index */
+enum dict_fld_sys_foreign_for_name_enum {
+ DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME = 0,
+ DICT_FLD__SYS_FOREIGN_FOR_NAME__ID = 1,
+ DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME = 2
+};
+/* The columns in SYS_FOREIGN_COLS */
+enum dict_col_sys_foreign_cols_enum {
+ DICT_COL__SYS_FOREIGN_COLS__ID = 0,
+ DICT_COL__SYS_FOREIGN_COLS__POS = 1,
+ DICT_COL__SYS_FOREIGN_COLS__FOR_COL_NAME = 2,
+ DICT_COL__SYS_FOREIGN_COLS__REF_COL_NAME = 3,
+ DICT_NUM_COLS__SYS_FOREIGN_COLS = 4
+};
+/* The field numbers in the SYS_FOREIGN_COLS clustered index */
+enum dict_fld_sys_foreign_cols_enum {
+ DICT_FLD__SYS_FOREIGN_COLS__ID = 0,
+ DICT_FLD__SYS_FOREIGN_COLS__POS = 1,
+ DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID = 2,
+ DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR = 3,
+ DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME = 4,
+ DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME = 5,
+ DICT_NUM_FIELDS__SYS_FOREIGN_COLS = 6
+};
+/* The columns in SYS_TABLESPACES */
+enum dict_col_sys_tablespaces_enum {
+ DICT_COL__SYS_TABLESPACES__SPACE = 0,
+ DICT_COL__SYS_TABLESPACES__NAME = 1,
+ DICT_COL__SYS_TABLESPACES__FLAGS = 2,
+ DICT_NUM_COLS__SYS_TABLESPACES = 3
+};
+/* The field numbers in the SYS_TABLESPACES clustered index */
+enum dict_fld_sys_tablespaces_enum {
+ DICT_FLD__SYS_TABLESPACES__SPACE = 0,
+ DICT_FLD__SYS_TABLESPACES__DB_TRX_ID = 1,
+ DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR = 2,
+ DICT_FLD__SYS_TABLESPACES__NAME = 3,
+ DICT_FLD__SYS_TABLESPACES__FLAGS = 4,
+ DICT_NUM_FIELDS__SYS_TABLESPACES = 5
+};
+/* The columns in SYS_DATAFILES */
+enum dict_col_sys_datafiles_enum {
+ DICT_COL__SYS_DATAFILES__SPACE = 0,
+ DICT_COL__SYS_DATAFILES__PATH = 1,
+ DICT_NUM_COLS__SYS_DATAFILES = 2
+};
+/* The field numbers in the SYS_DATAFILES clustered index */
+enum dict_fld_sys_datafiles_enum {
+ DICT_FLD__SYS_DATAFILES__SPACE = 0,
+ DICT_FLD__SYS_DATAFILES__DB_TRX_ID = 1,
+ DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR = 2,
+ DICT_FLD__SYS_DATAFILES__PATH = 3,
+ DICT_NUM_FIELDS__SYS_DATAFILES = 4
+};
-#define DICT_SYS_STATS_DIFF_VALS_FIELD 4
-#define DICT_SYS_STATS_NON_NULL_VALS_FIELD 5
+/* A number of the columns above occur in multiple tables. These are the
+length of thos fields. */
+#define DICT_FLD_LEN_SPACE 4
+#define DICT_FLD_LEN_FLAGS 4
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
updated */
#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
-#define DICT_HDR_XTRADB_FLAG 0x5854524144425F31ULL /* "XTRADB_1" */
-
#ifndef UNIV_NONINL
#include "dict0boot.ic"
#endif
diff --git a/storage/xtradb/include/dict0boot.ic b/storage/xtradb/include/dict0boot.ic
index 5fa33837640..2b156a4f672 100644
--- a/storage/xtradb/include/dict0boot.ic
+++ b/storage/xtradb/include/dict0boot.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -24,15 +24,6 @@ Created 4/18/1996 Heikki Tuuri
*******************************************************/
/**********************************************************************//**
-Writes the current value of the row id counter to the dictionary header file
-page. */
-UNIV_INTERN
-void
-dict_hdr_flush_row_id(void);
-/*=======================*/
-
-
-/**********************************************************************//**
Returns a new row id.
@return the new id */
UNIV_INLINE
@@ -90,4 +81,16 @@ dict_sys_write_row_id(
mach_write_to_6(field, row_id);
}
+/*********************************************************************//**
+Check if a table id belongs to system table.
+@return true if the table id belongs to a system table. */
+UNIV_INLINE
+bool
+dict_is_sys_table(
+/*==============*/
+ table_id_t id) /*!< in: table id to check */
+{
+ return(id < DICT_HDR_FIRST_ID);
+}
+
diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h
index 762ab54a353..6ec1079957b 100644
--- a/storage/xtradb/include/dict0crea.h
+++ b/storage/xtradb/include/dict0crea.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -42,7 +42,9 @@ tab_create_graph_create(
/*====================*/
dict_table_t* table, /*!< in: table to create, built as a memory data
structure */
- mem_heap_t* heap); /*!< in: heap where created */
+ mem_heap_t* heap, /*!< in: heap where created */
+ bool commit);/*!< in: true if the commit node should be
+ added to the query graph */
/*********************************************************************//**
Creates an index create graph.
@return own: index create node */
@@ -52,15 +54,9 @@ ind_create_graph_create(
/*====================*/
dict_index_t* index, /*!< in: index to create, built as a memory data
structure */
- mem_heap_t* heap); /*!< in: heap where created */
-/*********************************************************************//**
-*/
-UNIV_INTERN
-ind_node_t*
-ind_insert_stats_graph_create(
-/*==========================*/
- dict_index_t* index,
- mem_heap_t* heap);
+ mem_heap_t* heap, /*!< in: heap where created */
+ bool commit);/*!< in: true if the commit node should be
+ added to the query graph */
/***********************************************************//**
Creates a table. This is a high-level function used in SQL execution graphs.
@return query thread to run next or NULL */
@@ -70,13 +66,6 @@ dict_create_table_step(
/*===================*/
que_thr_t* thr); /*!< in: query thread */
/***********************************************************//**
-*/
-UNIV_INTERN
-que_thr_t*
-dict_insert_stats_step(
-/*===================*/
- que_thr_t* thr);
-/***********************************************************//**
Creates an index. This is a high-level function used in SQL execution
graphs.
@return query thread to run next or NULL */
@@ -114,14 +103,28 @@ dict_drop_index_tree(
mtr_t* mtr); /*!< in: mtr having the latch on the record page */
/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
+at server bootstrap or server start if they are not found or are
not of the right form.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_create_or_check_foreign_constraint_tables(void);
/*================================================*/
/********************************************************************//**
+Generate a foreign key constraint name when it was not named by the user.
+A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
+where the numbers start from 1, and are given locally for this table, that is,
+the number is not global, as it used to be before MySQL 4.0.18. */
+UNIV_INLINE
+dberr_t
+dict_create_add_foreign_id(
+/*=======================*/
+ ulint* id_nr, /*!< in/out: number to use in id generation;
+ incremented if used */
+ const char* name, /*!< in: table name */
+ dict_foreign_t* foreign)/*!< in/out: foreign key */
+ __attribute__((nonnull));
+/********************************************************************//**
Adds foreign key definitions to data dictionary tables in the database. We
look at table->foreign_list, and also generate names to constraints that were
not named by the user. A generated constraint has a name of the format
@@ -130,7 +133,7 @@ given locally for this table, that is, the number is not global, as in the
old format constraints < 4.0.18 it used to be.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
dict_create_add_foreigns_to_dictionary(
/*===================================*/
ulint start_id,/*!< in: if we are actually doing ALTER TABLE
@@ -142,11 +145,46 @@ dict_create_add_foreigns_to_dictionary(
so far has no constraints for which the name
was generated here */
dict_table_t* table, /*!< in: table */
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull, warn_unused_result));
+/****************************************************************//**
+Creates the tablespaces and datafiles system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_tablespace(void);
+/*=====================================*/
+/********************************************************************//**
+Add a single tablespace definition to the data dictionary tables in the
+database.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_tablespace_to_dictionary(
+/*=====================================*/
+ ulint space, /*!< in: tablespace id */
+ const char* name, /*!< in: tablespace name */
+ ulint flags, /*!< in: tablespace flags */
+ const char* path, /*!< in: tablespace path */
+ trx_t* trx, /*!< in: transaction */
+ bool commit); /*!< in: if true then commit the
+ transaction */
+/********************************************************************//**
+Add a foreign key definition to the data dictionary tables.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_foreign_to_dictionary(
+/*==================================*/
+ const char* name, /*!< in: table name */
+ const dict_foreign_t* foreign,/*!< in: foreign key */
+ trx_t* trx) /*!< in/out: dictionary transaction */
+ __attribute__((nonnull, warn_unused_result));
/* Table create node structure */
-
-struct tab_node_struct{
+struct tab_node_t{
que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */
dict_table_t* table; /*!< table to create, built as a memory data
structure with dict_mem_... functions */
@@ -175,7 +213,7 @@ struct tab_node_struct{
/* Index create node struct */
-struct ind_node_struct{
+struct ind_node_t{
que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */
dict_index_t* index; /*!< index to create, built as a memory data
structure with dict_mem_... functions */
@@ -185,7 +223,6 @@ struct ind_node_struct{
ins_node_t* field_def; /* child node which does the inserts of
the field definitions; the row to be inserted
is built by the parent node */
- ins_node_t* stats_def;
commit_node_t* commit_node;
/* child node which performs a commit after
a successful index creation */
@@ -196,7 +233,6 @@ struct ind_node_struct{
dict_table_t* table; /*!< table which owns the index */
dtuple_t* ind_row;/* index definition row built */
ulint field_no;/* next field definition to insert */
- ulint stats_no;
mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
};
@@ -206,7 +242,6 @@ struct ind_node_struct{
#define INDEX_CREATE_INDEX_TREE 3
#define INDEX_COMMIT_WORK 4
#define INDEX_ADD_TO_CACHE 5
-#define INDEX_BUILD_STATS_COLS 6
#ifndef UNIV_NONINL
#include "dict0crea.ic"
diff --git a/storage/xtradb/include/dict0crea.ic b/storage/xtradb/include/dict0crea.ic
index 36f77e5c7d1..2d0d9dcb858 100644
--- a/storage/xtradb/include/dict0crea.ic
+++ b/storage/xtradb/include/dict0crea.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -23,3 +23,76 @@ Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
+#include "mem0mem.h"
+
+/*********************************************************************//**
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL.
+@return true if temporary table */
+UNIV_INTERN
+bool
+row_is_mysql_tmp_table_name(
+/*========================*/
+ const char* name) __attribute__((warn_unused_result));
+ /*!< in: table name in the form
+ 'database/tablename' */
+
+
+/********************************************************************//**
+Generate a foreign key constraint name when it was not named by the user.
+A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
+where the numbers start from 1, and are given locally for this table, that is,
+the number is not global, as it used to be before MySQL 4.0.18. */
+UNIV_INLINE
+dberr_t
+dict_create_add_foreign_id(
+/*=======================*/
+ ulint* id_nr, /*!< in/out: number to use in id generation;
+ incremented if used */
+ const char* name, /*!< in: table name */
+ dict_foreign_t* foreign)/*!< in/out: foreign key */
+{
+ if (foreign->id == NULL) {
+ /* Generate a new constraint id */
+ ulint namelen = strlen(name);
+ char* id = static_cast<char*>(
+ mem_heap_alloc(foreign->heap,
+ namelen + 20));
+
+ if (row_is_mysql_tmp_table_name(name)) {
+
+ /* no overflow if number < 1e13 */
+ sprintf(id, "%s_ibfk_%lu", name,
+ (ulong) (*id_nr)++);
+ } else {
+ char table_name[MAX_TABLE_NAME_LEN + 20] = "";
+ uint errors = 0;
+
+ strncpy(table_name, name,
+ MAX_TABLE_NAME_LEN + 20);
+
+ innobase_convert_to_system_charset(
+ strchr(table_name, '/') + 1,
+ strchr(name, '/') + 1,
+ MAX_TABLE_NAME_LEN, &errors);
+
+ if (errors) {
+ strncpy(table_name, name,
+ MAX_TABLE_NAME_LEN + 20);
+ }
+
+ /* no overflow if number < 1e13 */
+ sprintf(id, "%s_ibfk_%lu", table_name,
+ (ulong) (*id_nr)++);
+
+ if (innobase_check_identifier_length(
+ strchr(id,'/') + 1)) {
+ return(DB_IDENTIFIER_TOO_LONG);
+ }
+ }
+ foreign->id = id;
+ }
+
+ return(DB_SUCCESS);
+}
+
diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h
index 8c6620b94b3..6669f60b95a 100644
--- a/storage/xtradb/include/dict0dict.h
+++ b/storage/xtradb/include/dict0dict.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,6 +28,7 @@ Created 1/8/1996 Heikki Tuuri
#define dict0dict_h
#include "univ.i"
+#include "db0err.h"
#include "dict0types.h"
#include "dict0mem.h"
#include "data0type.h"
@@ -39,6 +41,7 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0rnd.h"
#include "ut0byte.h"
#include "trx0types.h"
+#include "row0types.h"
#ifndef UNIV_HOTBACKUP
# include "sync0sync.h"
@@ -49,7 +52,8 @@ UNIV_INTERN
void
dict_casedn_str(
/*============*/
- char* a); /*!< in/out: string to put in lower case */
+ char* a) /*!< in/out: string to put in lower case */
+ __attribute__((nonnull));
/********************************************************************//**
Get the database name length in a table name.
@return database name length */
@@ -57,34 +61,88 @@ UNIV_INTERN
ulint
dict_get_db_name_len(
/*=================*/
- const char* name); /*!< in: table name in the form
+ const char* name) /*!< in: table name in the form
dbname '/' tablename */
+ __attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Open a table from its database and table name, this is currently used by
+foreign constraint parser to get the referenced table.
+@return complete table name with database and table name, allocated from
+heap memory passed in */
+UNIV_INTERN
+char*
+dict_get_referenced_table(
+/*======================*/
+ const char* name, /*!< in: foreign key table name */
+ const char* database_name, /*!< in: table db name */
+ ulint database_name_len,/*!< in: db name length */
+ const char* table_name, /*!< in: table name */
+ ulint table_name_len, /*!< in: table name length */
+ dict_table_t** table, /*!< out: table object or NULL */
+ mem_heap_t* heap); /*!< in: heap memory */
+/*********************************************************************//**
+Frees a foreign key struct. */
+UNIV_INTERN
+void
+dict_foreign_free(
+/*==============*/
+ dict_foreign_t* foreign); /*!< in, own: foreign key struct */
+/*********************************************************************//**
+Finds the highest [number] for foreign key constraints of the table. Looks
+only at the >= 4.0.18-format id's, which are of the form
+databasename/tablename_ibfk_[number].
+@return highest number, 0 if table has no new format foreign key constraints */
+UNIV_INTERN
+ulint
+dict_table_get_highest_foreign_id(
+/*==============================*/
+ dict_table_t* table); /*!< in: table in the dictionary
+ memory cache */
/********************************************************************//**
Return the end of table name where we have removed dbname and '/'.
@return table name */
-
+UNIV_INTERN
const char*
dict_remove_db_name(
/*================*/
- const char* name); /*!< in: table name in the form
+ const char* name) /*!< in: table name in the form
dbname '/' tablename */
+ __attribute__((nonnull, warn_unused_result));
+
+/** Operation to perform when opening a table */
+enum dict_table_op_t {
+ /** Expect the tablespace to exist. */
+ DICT_TABLE_OP_NORMAL = 0,
+ /** Drop any orphan indexes after an aborted online index creation */
+ DICT_TABLE_OP_DROP_ORPHAN,
+ /** Silently load the tablespace if it does not exist,
+ and do not load the definitions of incomplete indexes. */
+ DICT_TABLE_OP_LOAD_TABLESPACE
+};
+
/**********************************************************************//**
Returns a table object based on table id.
@return table, NULL if does not exist */
UNIV_INTERN
dict_table_t*
-dict_table_get_on_id(
-/*=================*/
- table_id_t table_id, /*!< in: table id */
- trx_t* trx); /*!< in: transaction handle */
+dict_table_open_on_id(
+/*==================*/
+ table_id_t table_id, /*!< in: table id */
+ ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ dict_table_op_t table_op) /*!< in: operation to perform */
+ __attribute__((warn_unused_result));
/********************************************************************//**
-Decrements the count of open MySQL handles to a table. */
+Decrements the count of open handles to a table. */
UNIV_INTERN
void
-dict_table_decrement_handle_count(
-/*==============================*/
+dict_table_close(
+/*=============*/
dict_table_t* table, /*!< in/out: table */
- ibool dict_locked); /*!< in: TRUE=data dictionary locked */
+ ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop) /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
+ __attribute__((nonnull));
/**********************************************************************//**
Inits the data dictionary module. */
UNIV_INTERN
@@ -108,7 +166,8 @@ UNIV_INLINE
ulint
dict_col_get_mbminlen(
/*==================*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the maximum number of bytes per character.
@return maximum multi-byte char size, in bytes */
@@ -116,7 +175,8 @@ UNIV_INLINE
ulint
dict_col_get_mbmaxlen(
/*==================*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets the minimum and maximum number of bytes per character. */
UNIV_INLINE
@@ -126,8 +186,9 @@ dict_col_set_mbminmaxlen(
dict_col_t* col, /*!< in/out: column */
ulint mbminlen, /*!< in: minimum multi-byte
character size, in bytes */
- ulint mbmaxlen); /*!< in: minimum multi-byte
+ ulint mbmaxlen) /*!< in: minimum multi-byte
character size, in bytes */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets the column data type. */
UNIV_INLINE
@@ -135,10 +196,11 @@ void
dict_col_copy_type(
/*===============*/
const dict_col_t* col, /*!< in: column */
- dtype_t* type); /*!< out: data type */
+ dtype_t* type) /*!< out: data type */
+ __attribute__((nonnull));
/**********************************************************************//**
Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix
+note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
needs to be stored in the undo log.
@return bytes of column prefix to be stored in the undo log */
UNIV_INLINE
@@ -146,9 +208,9 @@ ulint
dict_max_field_len_store_undo(
/*==========================*/
dict_table_t* table, /*!< in: table */
- const dict_col_t* col); /*!< in: column which index prefix
+ const dict_col_t* col) /*!< in: column which index prefix
is based on */
-
+ __attribute__((nonnull, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/*********************************************************************//**
@@ -159,7 +221,8 @@ ibool
dict_col_type_assert_equal(
/*=======================*/
const dict_col_t* col, /*!< in: column */
- const dtype_t* type); /*!< in: data type */
+ const dtype_t* type) /*!< in: data type */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
@@ -169,7 +232,8 @@ UNIV_INLINE
ulint
dict_col_get_min_size(
/*==================*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Returns the maximum size of the column.
@return maximum size */
@@ -177,7 +241,8 @@ UNIV_INLINE
ulint
dict_col_get_max_size(
/*==================*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Returns the size of a fixed size column, 0 if not a fixed size column.
@return fixed size, or 0 */
@@ -186,7 +251,8 @@ ulint
dict_col_get_fixed_size(
/*====================*/
const dict_col_t* col, /*!< in: column */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
For fixed length types it is the fixed length of the type, otherwise 0.
@@ -196,8 +262,8 @@ ulint
dict_col_get_sql_null_size(
/*=======================*/
const dict_col_t* col, /*!< in: column */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the column number.
@return col->ind, table column position (starting from 0) */
@@ -205,7 +271,8 @@ UNIV_INLINE
ulint
dict_col_get_no(
/*============*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the column position in the clustered index. */
UNIV_INLINE
@@ -213,7 +280,8 @@ ulint
dict_col_get_clust_pos(
/*===================*/
const dict_col_t* col, /*!< in: table column */
- const dict_index_t* clust_index); /*!< in: clustered index */
+ const dict_index_t* clust_index) /*!< in: clustered index */
+ __attribute__((nonnull, warn_unused_result));
/****************************************************************//**
If the given column name is reserved for InnoDB system columns, return
TRUE.
@@ -222,14 +290,16 @@ UNIV_INTERN
ibool
dict_col_name_is_reserved(
/*======================*/
- const char* name); /*!< in: column name */
+ const char* name) /*!< in: column name */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Acquire the autoinc lock. */
UNIV_INTERN
void
dict_table_autoinc_lock(
/*====================*/
- dict_table_t* table); /*!< in/out: table */
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
/********************************************************************//**
Unconditionally set the autoinc counter. */
UNIV_INTERN
@@ -237,7 +307,8 @@ void
dict_table_autoinc_initialize(
/*==========================*/
dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value); /*!< in: next value to assign to a row */
+ ib_uint64_t value) /*!< in: next value to assign to a row */
+ __attribute__((nonnull));
/********************************************************************//**
Reads the next autoinc value (== autoinc counter value), 0 if not yet
initialized.
@@ -246,7 +317,8 @@ UNIV_INTERN
ib_uint64_t
dict_table_autoinc_read(
/*====================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Updates the autoinc counter if the value supplied is greater than the
current value. */
@@ -256,14 +328,16 @@ dict_table_autoinc_update_if_greater(
/*=================================*/
dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value); /*!< in: value which was assigned to a row */
+ ib_uint64_t value) /*!< in: value which was assigned to a row */
+ __attribute__((nonnull));
/********************************************************************//**
Release the autoinc lock. */
UNIV_INTERN
void
dict_table_autoinc_unlock(
/*======================*/
- dict_table_t* table); /*!< in/out: table */
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
Adds system columns to a table object. */
@@ -272,7 +346,8 @@ void
dict_table_add_system_columns(
/*==========================*/
dict_table_t* table, /*!< in/out: table */
- mem_heap_t* heap); /*!< in: temporary heap */
+ mem_heap_t* heap) /*!< in: temporary heap */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Adds a table object to the dictionary cache. */
@@ -280,27 +355,32 @@ UNIV_INTERN
void
dict_table_add_to_cache(
/*====================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap); /*!< in: temporary heap */
+ dict_table_t* table, /*!< in: table */
+ ibool can_be_evicted, /*!< in: TRUE if can be evicted*/
+ mem_heap_t* heap) /*!< in: temporary heap */
+ __attribute__((nonnull));
/**********************************************************************//**
Removes a table object from the dictionary cache. */
UNIV_INTERN
void
dict_table_remove_from_cache(
/*=========================*/
- dict_table_t* table); /*!< in, own: table */
+ dict_table_t* table) /*!< in, own: table */
+ __attribute__((nonnull));
/**********************************************************************//**
Renames a table object.
@return TRUE if success */
UNIV_INTERN
-ibool
+dberr_t
dict_table_rename_in_cache(
/*=======================*/
dict_table_t* table, /*!< in/out: table */
const char* new_name, /*!< in: new name */
- ibool rename_also_foreigns);/*!< in: in ALTER TABLE we want
+ ibool rename_also_foreigns)
+ /*!< in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Removes an index from the dictionary cache. */
UNIV_INTERN
@@ -308,7 +388,8 @@ void
dict_index_remove_from_cache(
/*=========================*/
dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
+ dict_index_t* index) /*!< in, own: index */
+ __attribute__((nonnull));
/**********************************************************************//**
Change the id of a table object in the dictionary cache. This is used in
DISCARD TABLESPACE. */
@@ -317,7 +398,16 @@ void
dict_table_change_id_in_cache(
/*==========================*/
dict_table_t* table, /*!< in/out: table object already in cache */
- table_id_t new_id);/*!< in: new id to set */
+ table_id_t new_id) /*!< in: new id to set */
+ __attribute__((nonnull));
+/**********************************************************************//**
+Removes a foreign constraint struct from the dictionary cache. */
+UNIV_INTERN
+void
+dict_foreign_remove_from_cache(
+/*===========================*/
+ dict_foreign_t* foreign) /*!< in, own: foreign constraint */
+ __attribute__((nonnull));
/**********************************************************************//**
Adds a foreign key constraint object to the dictionary cache. May free
the object if there already is an object with the same identifier in.
@@ -325,14 +415,20 @@ At least one of foreign table or referenced table must already be in
the dictionary cache!
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_foreign_add_to_cache(
/*======================*/
- dict_foreign_t* foreign, /*!< in, own: foreign key
- constraint */
- ibool check_charsets, /*!< in: TRUE=check charset
- compatibility */
- dict_err_ignore_t ignore_err); /*!< in: error to be ignored */
+ dict_foreign_t* foreign,
+ /*!< in, own: foreign key constraint */
+ const char** col_names,
+ /*!< in: column names, or NULL to use
+ foreign->foreign_table->col_names */
+ bool check_charsets,
+ /*!< in: whether to check charset
+ compatibility */
+ dict_err_ignore_t ignore_err)
+ /*!< in: error to be ignored */
+ __attribute__((nonnull(1), warn_unused_result));
/*********************************************************************//**
Check if the index is referenced by a foreign key, if TRUE return the
matching instance NULL otherwise.
@@ -343,7 +439,8 @@ dict_foreign_t*
dict_table_get_referenced_constraint(
/*=================================*/
dict_table_t* table, /*!< in: InnoDB table */
- dict_index_t* index); /*!< in: InnoDB index */
+ dict_index_t* index) /*!< in: InnoDB index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks if a table is referenced by foreign keys.
@return TRUE if table is referenced by a foreign key */
@@ -351,17 +448,33 @@ UNIV_INTERN
ibool
dict_table_is_referenced_by_foreign_key(
/*====================================*/
- const dict_table_t* table); /*!< in: InnoDB table */
+ const dict_table_t* table) /*!< in: InnoDB table */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
-Replace the index in the foreign key list that matches this index's
-definition with an equivalent index. */
+Replace the index passed in with another equivalent index in the
+foreign key lists of the table.
+@return whether all replacements were found */
UNIV_INTERN
-void
-dict_table_replace_index_in_foreign_list(
-/*=====================================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index, /*!< in: index to be replaced */
- const trx_t* trx); /*!< in: transaction handle */
+bool
+dict_foreign_replace_index(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const dict_index_t* index) /*!< in: index to be replaced */
+ __attribute__((nonnull(1,3), warn_unused_result));
+/**********************************************************************//**
+Determines whether a string starts with the specified keyword.
+@return TRUE if str starts with keyword */
+UNIV_INTERN
+ibool
+dict_str_starts_with_keyword(
+/*=========================*/
+ THD* thd, /*!< in: MySQL thread handle */
+ const char* str, /*!< in: string to scan for keyword */
+ const char* keyword) /*!< in: keyword to look for */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks if a index is defined for a foreign key constraint. Index is a part
of a foreign key constraint if the index is referenced by foreign key
@@ -373,7 +486,8 @@ dict_foreign_t*
dict_table_get_foreign_constraint(
/*==============================*/
dict_table_t* table, /*!< in: InnoDB table */
- dict_index_t* index); /*!< in: InnoDB index */
+ dict_index_t* index) /*!< in: InnoDB index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
@@ -383,7 +497,7 @@ bot participating tables. The indexes are allowed to contain more
fields than mentioned in the constraint.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
dict_create_foreign_constraints(
/*============================*/
trx_t* trx, /*!< in: transaction */
@@ -399,15 +513,16 @@ dict_create_foreign_constraints(
const char* name, /*!< in: table full name in the
normalized form
database_name/table_name */
- ibool reject_fks); /*!< in: if TRUE, fail with error
+ ibool reject_fks) /*!< in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
constraint id does not match */
UNIV_INTERN
-ulint
+dberr_t
dict_foreign_parse_drop_constraints(
/*================================*/
mem_heap_t* heap, /*!< in: heap from which we can
@@ -416,85 +531,57 @@ dict_foreign_parse_drop_constraints(
dict_table_t* table, /*!< in: table */
ulint* n, /*!< out: number of constraints
to drop */
- const char*** constraints_to_drop); /*!< out: id's of the
+ const char*** constraints_to_drop) /*!< out: id's of the
constraints to drop */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
-Returns a table object and optionally increment its MySQL open handle count.
+Returns a table object and increments its open handle count.
NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function.
+'dict' directory. Inside this directory dict_table_get_low
+is usually the appropriate function.
@return table, NULL if does not exist */
UNIV_INTERN
dict_table_t*
-dict_table_get(
-/*===========*/
- const char* table_name,
- /*!< in: table name */
- ibool inc_mysql_count,
- /*!< in: whether to increment the open
- handle count on the table */
- dict_err_ignore_t ignore_err);
- /*!< in: errors to ignore when loading
- the table */
-/**********************************************************************//**
-Returns a index object, based on table and index id, and memoryfixes it.
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_on_id_low(
-/*=====================*/
- dict_table_t* table, /*!< in: table */
- index_id_t index_id); /*!< in: index id */
-/**********************************************************************//**
-Checks if a table is in the dictionary cache.
-@return table, NULL if not found */
-
-UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
- const char* table_name); /*!< in: table name */
-/**********************************************************************//**
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function.
-@return table, NULL if not found */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
+dict_table_open_on_name(
+/*====================*/
const char* table_name, /*!< in: table name */
+ ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop, /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
dict_err_ignore_t
- ignore_err); /*!< in: error to be ignored when
- loading a table definition */
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_on_id_low(
-/*=====================*/
- table_id_t table_id); /*!< in: table id */
-/**********************************************************************//**
-Find an index that is equivalent to the one passed in and is not marked
-for deletion.
-@return index equivalent to foreign->foreign_index, or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_equiv_index(
-/*==========================*/
- dict_foreign_t* foreign);/*!< in: foreign key */
-/**********************************************************************//**
-Returns an index object by matching on the name and column names and
-if more than one index matches return the index with the max id
+ ignore_err) /*!< in: error to be ignored when
+ loading the table */
+ __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
@return matching index, NULL if not found */
UNIV_INTERN
dict_index_t*
-dict_table_get_index_by_max_id(
-/*===========================*/
- dict_table_t* table, /*!< in: table */
- const char* name, /*!< in: the index name to find */
- const char** columns,/*!< in: array of column names */
- ulint n_cols);/*!< in: number of columns */
+dict_foreign_find_index(
+/*====================*/
+ const dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ bool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
+ __attribute__((nonnull(1,3), warn_unused_result));
/**********************************************************************//**
Returns a column's name.
@return column name. NOTE: not guaranteed to stay valid if table is
@@ -504,29 +591,16 @@ const char*
dict_table_get_col_name(
/*====================*/
const dict_table_t* table, /*!< in: table */
- ulint col_nr);/*!< in: column number */
-
+ ulint col_nr) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
-Prints a table definition. */
+Prints a table data. */
UNIV_INTERN
void
dict_table_print(
/*=============*/
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print_low(
-/*=================*/
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Prints a table data when we know the table name. */
-UNIV_INTERN
-void
-dict_table_print_by_name(
-/*=====================*/
- const char* name); /*!< in: table name */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/**********************************************************************//**
Outputs info on foreign keys of a table. */
UNIV_INTERN
@@ -539,7 +613,8 @@ dict_print_info_on_foreign_keys(
of SHOW TABLE STATUS */
FILE* file, /*!< in: file where to print */
trx_t* trx, /*!< in: transaction */
- dict_table_t* table); /*!< in: table */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/**********************************************************************//**
Outputs info on a foreign key of a table in a format suitable for
CREATE TABLE. */
@@ -550,7 +625,8 @@ dict_print_info_on_foreign_key_in_create_format(
FILE* file, /*!< in: file where to print */
trx_t* trx, /*!< in: transaction */
dict_foreign_t* foreign, /*!< in: foreign key constraint */
- ibool add_newline); /*!< in: whether to add a newline */
+ ibool add_newline) /*!< in: whether to add a newline */
+ __attribute__((nonnull(1,3)));
/********************************************************************//**
Displays the names of the index and the table. */
UNIV_INTERN
@@ -558,8 +634,38 @@ void
dict_index_name_print(
/*==================*/
FILE* file, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- const dict_index_t* index); /*!< in: index to print */
+ const trx_t* trx, /*!< in: transaction */
+ const dict_index_t* index) /*!< in: index to print */
+ __attribute__((nonnull(1,3)));
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return matching index, NULL if not found */
+UNIV_INTERN
+bool
+dict_foreign_qualify_index(
+/*====================*/
+ const dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* index, /*!< in: index to check */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ bool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
+ __attribute__((nonnull(1,3), warn_unused_result));
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the first index on the table (the clustered index).
@@ -568,7 +674,17 @@ UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
/*=======================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the last index on the table.
+@return index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_last_index(
+/*=======================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the next index on the table.
@return index, NULL if none left */
@@ -576,9 +692,11 @@ UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
/*======================*/
- const dict_index_t* index); /*!< in: index */
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
+# define dict_table_get_last_index(table) UT_LIST_GET_LAST((table)->indexes)
# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
#endif /* UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
@@ -633,6 +751,17 @@ dict_index_is_sec_or_ibuf(
const dict_index_t* index) /*!< in: index */
__attribute__((nonnull, pure, warn_unused_result));
+/************************************************************************
+Gets the all the FTS indexes for the table. NOTE: must not be called for
+tables which do not have an FTS-index. */
+UNIV_INTERN
+ulint
+dict_table_get_all_fts_indexes(
+/*===========================*/
+ /* out: number of indexes collected */
+ dict_table_t* table, /* in: table */
+ ib_vector_t* indexes)/* out: vector for collecting FTS indexes */
+ __attribute__((nonnull));
/********************************************************************//**
Gets the number of user-defined columns in a table in the dictionary
cache.
@@ -662,6 +791,35 @@ dict_table_get_n_cols(
/*==================*/
const dict_table_t* table) /*!< in: table */
__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Gets the approximately estimated number of rows in the table.
+@return estimated number of rows */
+UNIV_INLINE
+ib_uint64_t
+dict_table_get_n_rows(
+/*==================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Increment the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_inc(
+/*==================*/
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
+/********************************************************************//**
+Decrement the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_dec(
+/*==================*/
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth column of a table.
@@ -671,7 +829,8 @@ dict_col_t*
dict_table_get_nth_col(
/*===================*/
const dict_table_t* table, /*!< in: table */
- ulint pos); /*!< in: position of column */
+ ulint pos) /*!< in: position of column */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the given system column of a table.
@return pointer to column object */
@@ -680,7 +839,8 @@ dict_col_t*
dict_table_get_sys_col(
/*===================*/
const dict_table_t* table, /*!< in: table */
- ulint sys); /*!< in: DATA_ROW_ID, ... */
+ ulint sys) /*!< in: DATA_ROW_ID, ... */
+ __attribute__((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
#define dict_table_get_nth_col(table, pos) \
((table)->cols + (pos))
@@ -695,7 +855,8 @@ ulint
dict_table_get_sys_col_no(
/*======================*/
const dict_table_t* table, /*!< in: table */
- ulint sys); /*!< in: DATA_ROW_ID, ... */
+ ulint sys) /*!< in: DATA_ROW_ID, ... */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Returns the minimum data size of an index record.
@@ -704,7 +865,8 @@ UNIV_INLINE
ulint
dict_index_get_min_size(
/*====================*/
- const dict_index_t* index); /*!< in: index */
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Check whether the table uses the compact page format.
@@ -713,7 +875,8 @@ UNIV_INLINE
ibool
dict_table_is_comp(
/*===============*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Determine the file format of a table.
@return file format version */
@@ -721,23 +884,53 @@ UNIV_INLINE
ulint
dict_table_get_format(
/*==================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Determine the file format from a dict_table_t::flags.
+@return file format version */
+UNIV_INLINE
+ulint
+dict_tf_get_format(
+/*===============*/
+ ulint flags) /*!< in: dict_table_t::flags */
+ __attribute__((warn_unused_result));
/********************************************************************//**
-Set the file format of a table. */
+Set the various values in a dict_table_t::flags pointer. */
UNIV_INLINE
void
-dict_table_set_format(
-/*==================*/
- dict_table_t* table, /*!< in/out: table */
- ulint format);/*!< in: file format version */
+dict_tf_set(
+/*========*/
+ ulint* flags, /*!< in/out: table */
+ rec_format_t format, /*!< in: file format */
+ ulint zip_ssize, /*!< in: zip shift size */
+ bool remote_path) /*!< in: table uses DATA DIRECTORY */
+ __attribute__((nonnull));
+/********************************************************************//**
+Convert a 32 bit integer table flags to the 32 bit integer that is
+written into the tablespace header at the offset FSP_SPACE_FLAGS and is
+also stored in the fil_space_t::flags field. The following chart shows
+the translation of the low order bit. Other bits are the same.
+========================= Low order bit ==========================
+ | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
+dict_table_t::flags | 0 | 1 | 1 | 1
+fil_space_t::flags | 0 | 0 | 1 | 1
+==================================================================
+@return tablespace flags (fil_space_t::flags) */
+UNIV_INLINE
+ulint
+dict_tf_to_fsp_flags(
+/*=================*/
+ ulint flags) /*!< in: dict_table_t::flags */
+ __attribute__((const));
/********************************************************************//**
Extract the compressed page size from table flags.
@return compressed page size, or 0 if not compressed */
UNIV_INLINE
ulint
-dict_table_flags_to_zip_size(
-/*=========================*/
- ulint flags) /*!< in: flags */
+dict_tf_get_zip_size(
+/*=================*/
+ ulint flags) /*!< in: flags */
__attribute__((const));
/********************************************************************//**
Check whether the table uses the compressed compact page format.
@@ -746,7 +939,8 @@ UNIV_INLINE
ulint
dict_table_zip_size(
/*================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Obtain exclusive locks on all index trees of the table. This is to prevent
@@ -756,15 +950,16 @@ UNIV_INLINE
void
dict_table_x_lock_indexes(
/*======================*/
- dict_table_t* table); /*!< in: table */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/*********************************************************************//**
Release the exclusive locks on all index tree. */
UNIV_INLINE
void
dict_table_x_unlock_indexes(
/*========================*/
- dict_table_t* table); /*!< in: table */
-#endif /* !UNIV_HOTBACKUP */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/********************************************************************//**
Checks if a column is in the ordering columns of the clustered index of a
table. Column prefixes are treated like whole columns.
@@ -774,8 +969,17 @@ ibool
dict_table_col_in_clustered_key(
/*============================*/
const dict_table_t* table, /*!< in: table */
- ulint n); /*!< in: column number */
-#ifndef UNIV_HOTBACKUP
+ ulint n) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Check if the table has an FTS index.
+@return TRUE if table has an FTS index */
+UNIV_INLINE
+ibool
+dict_table_has_fts_index(
+/*=====================*/
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Copies types of columns contained in table to tuple and sets all
fields of the tuple to the SQL NULL value. This function should
@@ -785,7 +989,20 @@ void
dict_table_copy_types(
/*==================*/
dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
+/********************************************************************
+Wait until all the background threads of the given table have exited, i.e.,
+bg_threads == 0. Note: bg_threads_mutex must be reserved when
+calling this. */
+UNIV_INTERN
+void
+dict_table_wait_for_bg_threads_to_exit(
+/*===================================*/
+ dict_table_t* table, /* in: table */
+ ulint delay) /* in: time in microseconds to wait between
+ checks of bg_threads. */
+ __attribute__((nonnull));
/**********************************************************************//**
Looks for an index with the given id. NOTE that we do not reserve
the dictionary mutex: this function is for emergency purposes like
@@ -795,21 +1012,34 @@ UNIV_INTERN
dict_index_t*
dict_index_find_on_id_low(
/*======================*/
- index_id_t id); /*!< in: index id */
+ index_id_t id) /*!< in: index id */
+ __attribute__((warn_unused_result));
+/**********************************************************************//**
+Make room in the table cache by evicting an unused table. The unused table
+should not be part of FK relationship and currently not used in any user
+transaction. There is no guarantee that it will remove a table.
+@return number of tables evicted. */
+UNIV_INTERN
+ulint
+dict_make_room_in_cache(
+/*====================*/
+ ulint max_tables, /*!< in: max tables allowed in cache */
+ ulint pct_check); /*!< in: max percent to check */
/**********************************************************************//**
Adds an index to the dictionary cache.
@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
UNIV_INTERN
-ulint
+dberr_t
dict_index_add_to_cache(
/*====================*/
dict_table_t* table, /*!< in: table on which the index is */
dict_index_t* index, /*!< in, own: index; NOTE! The index memory
object is freed in this function! */
ulint page_no,/*!< in: root page number of the index */
- ibool strict);/*!< in: TRUE=refuse to create the index
+ ibool strict) /*!< in: TRUE=refuse to create the index
if records could be too big to fit in
an B-tree page */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Removes an index from the dictionary cache. */
UNIV_INTERN
@@ -817,7 +1047,8 @@ void
dict_index_remove_from_cache(
/*=========================*/
dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
+ dict_index_t* index) /*!< in, own: index */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Gets the number of fields in the internal representation of an index,
@@ -827,9 +1058,10 @@ UNIV_INLINE
ulint
dict_index_get_n_fields(
/*====================*/
- const dict_index_t* index); /*!< in: an internal
+ const dict_index_t* index) /*!< in: an internal
representation of index (in
the dictionary cache) */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
@@ -840,8 +1072,9 @@ UNIV_INLINE
ulint
dict_index_get_n_unique(
/*====================*/
- const dict_index_t* index); /*!< in: an internal representation
+ const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
@@ -851,8 +1084,9 @@ UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
/*============================*/
- const dict_index_t* index); /*!< in: an internal representation
+ const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the number of user-defined ordering fields in the index. In the internal
representation we add the row id to the ordering fields to make all indexes
@@ -863,8 +1097,9 @@ UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
/*======================================*/
- const dict_index_t* index); /*!< in: an internal representation
+ const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
+ __attribute__((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth field of an index.
@@ -874,7 +1109,8 @@ dict_field_t*
dict_index_get_nth_field(
/*=====================*/
const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of field */
+ ulint pos) /*!< in: position of field */
+ __attribute__((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
#endif /* UNIV_DEBUG */
@@ -886,7 +1122,8 @@ const dict_col_t*
dict_index_get_nth_col(
/*===================*/
const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of the field */
+ ulint pos) /*!< in: position of the field */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the column number of the nth field in an index.
@return column number */
@@ -895,17 +1132,19 @@ ulint
dict_index_get_nth_col_no(
/*======================*/
const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of the field */
+ ulint pos) /*!< in: position of the field */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Looks for column n in an index.
@return position in internal representation of the index;
ULINT_UNDEFINED if not contained */
-UNIV_INTERN
+UNIV_INLINE
ulint
dict_index_get_nth_col_pos(
/*=======================*/
const dict_index_t* index, /*!< in: index */
- ulint n); /*!< in: column number */
+ ulint n) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Looks for column n in an index.
@return position in internal representation of the index;
@@ -916,8 +1155,9 @@ dict_index_get_nth_col_or_prefix_pos(
/*=================================*/
const dict_index_t* index, /*!< in: index */
ulint n, /*!< in: column number */
- ibool inc_prefix); /*!< in: TRUE=consider
+ ibool inc_prefix) /*!< in: TRUE=consider
column prefixes too */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Returns TRUE if the index contains a column or a prefix of that column.
@return TRUE if contains the column or its prefix */
@@ -926,7 +1166,8 @@ ibool
dict_index_contains_col_or_prefix(
/*==============================*/
const dict_index_t* index, /*!< in: index */
- ulint n); /*!< in: column number */
+ ulint n) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Looks for a matching field in an index. The column has to be the same. The
column in index must be complete, or must contain a prefix longer than the
@@ -940,7 +1181,8 @@ dict_index_get_nth_field_pos(
/*=========================*/
const dict_index_t* index, /*!< in: index from which to search */
const dict_index_t* index2, /*!< in: index */
- ulint n); /*!< in: field number in index2 */
+ ulint n) /*!< in: field number in index2 */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Looks for column n position in the clustered index.
@return position in internal representation of the clustered index */
@@ -949,7 +1191,8 @@ ulint
dict_table_get_nth_col_pos(
/*=======================*/
const dict_table_t* table, /*!< in: table */
- ulint n); /*!< in: column number */
+ ulint n) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Returns the position of a system column in an index.
@return position, ULINT_UNDEFINED if not contained */
@@ -958,7 +1201,8 @@ ulint
dict_index_get_sys_col_pos(
/*=======================*/
const dict_index_t* index, /*!< in: index */
- ulint type); /*!< in: DATA_ROW_ID, ... */
+ ulint type) /*!< in: DATA_ROW_ID, ... */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Adds a column to index. */
UNIV_INTERN
@@ -968,7 +1212,8 @@ dict_index_add_col(
dict_index_t* index, /*!< in/out: index */
const dict_table_t* table, /*!< in: table */
dict_col_t* col, /*!< in: column */
- ulint prefix_len); /*!< in: column prefix length */
+ ulint prefix_len) /*!< in: column prefix length */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Copies types of fields contained in index to tuple. */
@@ -978,8 +1223,9 @@ dict_index_copy_types(
/*==================*/
dtuple_t* tuple, /*!< in/out: data tuple */
const dict_index_t* index, /*!< in: index */
- ulint n_fields); /*!< in: number of
+ ulint n_fields) /*!< in: number of
field types to copy */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Gets the field column.
@@ -988,7 +1234,8 @@ UNIV_INLINE
const dict_col_t*
dict_field_get_col(
/*===============*/
- const dict_field_t* field); /*!< in: index field */
+ const dict_field_t* field) /*!< in: index field */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
@@ -998,7 +1245,8 @@ UNIV_INTERN
dict_index_t*
dict_index_get_if_in_cache_low(
/*===========================*/
- index_id_t index_id); /*!< in: index id */
+ index_id_t index_id) /*!< in: index id */
+ __attribute__((warn_unused_result));
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
@@ -1007,7 +1255,8 @@ UNIV_INTERN
dict_index_t*
dict_index_get_if_in_cache(
/*=======================*/
- index_id_t index_id); /*!< in: index id */
+ index_id_t index_id) /*!< in: index id */
+ __attribute__((warn_unused_result));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG
/**********************************************************************//**
@@ -1019,7 +1268,17 @@ ibool
dict_index_check_search_tuple(
/*==========================*/
const dict_index_t* index, /*!< in: index tree */
- const dtuple_t* tuple); /*!< in: tuple used in a search */
+ const dtuple_t* tuple) /*!< in: tuple used in a search */
+ __attribute__((nonnull, warn_unused_result));
+/** Whether and when to allow temporary index names */
+enum check_name {
+ /** Require all indexes to be complete. */
+ CHECK_ALL_COMPLETE,
+ /** Allow aborted online index creation. */
+ CHECK_ABORTED_OK,
+ /** Allow partial indexes to exist. */
+ CHECK_PARTIAL_OK
+};
/**********************************************************************//**
Check for duplicate index entries in a table [using the index name] */
UNIV_INTERN
@@ -1028,8 +1287,9 @@ dict_table_check_for_dup_indexes(
/*=============================*/
const dict_table_t* table, /*!< in: Check for dup indexes
in this table */
- ibool tmp_ok);/*!< in: TRUE=allow temporary
- index names */
+ enum check_name check) /*!< in: whether and when to allow
+ temporary index names */
+ __attribute__((nonnull));
#endif /* UNIV_DEBUG */
/**********************************************************************//**
Builds a node pointer out of a physical record and a page number.
@@ -1045,8 +1305,9 @@ dict_index_build_node_ptr(
pointer */
mem_heap_t* heap, /*!< in: memory heap where pointer
created */
- ulint level); /*!< in: level of rec in tree:
+ ulint level) /*!< in: level of rec in tree:
0 means leaf level */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Copies an initial segment of a physical record, long enough to specify an
index entry uniquely.
@@ -1061,7 +1322,8 @@ dict_index_copy_rec_order_prefix(
ulint* n_fields,/*!< out: number of fields copied */
byte** buf, /*!< in/out: memory buffer for the
copied prefix, or NULL */
- ulint* buf_size);/*!< in/out: buffer size */
+ ulint* buf_size)/*!< in/out: buffer size */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Builds a typed data tuple out of a physical record.
@return own: data tuple */
@@ -1072,7 +1334,8 @@ dict_index_build_data_tuple(
dict_index_t* index, /*!< in: index */
rec_t* rec, /*!< in: record for which to build data tuple */
ulint n_fields,/*!< in: number of data fields */
- mem_heap_t* heap); /*!< in: memory heap where tuple created */
+ mem_heap_t* heap) /*!< in: memory heap where tuple created */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the space id of the root of the index tree.
@return space id */
@@ -1080,7 +1343,8 @@ UNIV_INLINE
ulint
dict_index_get_space(
/*=================*/
- const dict_index_t* index); /*!< in: index */
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets the space id of the root of the index tree. */
UNIV_INLINE
@@ -1088,7 +1352,8 @@ void
dict_index_set_space(
/*=================*/
dict_index_t* index, /*!< in/out: index */
- ulint space); /*!< in: space id */
+ ulint space) /*!< in: space id */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets the page number of the root of the index tree.
@return page number */
@@ -1096,15 +1361,17 @@ UNIV_INLINE
ulint
dict_index_get_page(
/*================*/
- const dict_index_t* tree); /*!< in: index */
+ const dict_index_t* tree) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the read-write lock of the index tree.
@return read-write lock */
UNIV_INLINE
-rw_lock_t*
+prio_rw_lock_t*
dict_index_get_lock(
/*================*/
- dict_index_t* index); /*!< in: index */
+ dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
@@ -1114,49 +1381,48 @@ UNIV_INLINE
ulint
dict_index_get_space_reserve(void);
/*==============================*/
+
+/* Online index creation @{ */
+/********************************************************************//**
+Gets the status of online index creation.
+@return the status */
+UNIV_INLINE
+enum online_index_status
+dict_index_get_online_status(
+/*=========================*/
+ const dict_index_t* index) /*!< in: secondary index */
+ __attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Sets the status of online index creation. */
+UNIV_INLINE
+void
+dict_index_set_online_status(
+/*=========================*/
+ dict_index_t* index, /*!< in/out: index */
+ enum online_index_status status) /*!< in: status */
+ __attribute__((nonnull));
+/********************************************************************//**
+Determines if a secondary index is being or has been created online,
+or if the table is being rebuilt online, allowing concurrent modifications
+to the table.
+@retval true if the index is being or has been built online, or
+if this is a clustered index and the table is being or has been rebuilt online
+@retval false if the index has been created or the table has been
+rebuilt completely */
+UNIV_INLINE
+bool
+dict_index_is_online_ddl(
+/*=====================*/
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Calculates the minimum record length in an index. */
UNIV_INTERN
ulint
dict_index_calc_min_rec_len(
/*========================*/
- const dict_index_t* index); /*!< in: index */
-
-/** Calculate new statistics if 1 / 16 of table has been modified
-since the last time a statistics batch was run.
-We calculate statistics at most every 16th round, since we may have
-a counter table which is very small and updated very often.
-@param t table
-@return true if the table has changed too much and stats need to be
-recalculated
-*/
-#define DICT_TABLE_CHANGED_TOO_MUCH(t) \
- ((ib_int64_t) (t)->stat_modified_counter > 16 + (t)->stat_n_rows / 16)
-
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-UNIV_INTERN
-void
-dict_update_statistics(
-/*===================*/
- dict_table_t* table, /*!< in/out: table */
- ibool only_calc_if_missing_stats,/*!< in: only
- update/recalc the stats if they have
- not been initialized yet, otherwise
- do nothing */
- ibool sync,
- ibool only_calc_if_changed_too_much);/*!< in: only
- update/recalc the stats if the table
- has been changed too much since the
- last stats update/recalc */
-/*********************************************************************//**
-*/
-UNIV_INTERN
-ibool
-dict_is_older_statistics(
-/*=====================*/
- dict_index_t* index);
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Reserves the dictionary system mutex for MySQL. */
UNIV_INTERN
@@ -1178,8 +1444,9 @@ void
dict_table_stats_lock(
/*==================*/
const dict_table_t* table, /*!< in: table */
- ulint latch_mode); /*!< in: RW_S_LATCH or
+ ulint latch_mode) /*!< in: RW_S_LATCH or
RW_X_LATCH */
+ __attribute__((nonnull));
/**********************************************************************//**
Unlock the latch that has been locked by dict_table_stats_lock() */
UNIV_INTERN
@@ -1187,8 +1454,9 @@ void
dict_table_stats_unlock(
/*====================*/
const dict_table_t* table, /*!< in: table */
- ulint latch_mode); /*!< in: RW_S_LATCH or
+ ulint latch_mode) /*!< in: RW_S_LATCH or
RW_X_LATCH */
+ __attribute__((nonnull));
/********************************************************************//**
Checks if the database name in two table names is the same.
@return TRUE if same db name */
@@ -1198,8 +1466,9 @@ dict_tables_have_same_db(
/*=====================*/
const char* name1, /*!< in: table name in the form
dbname '/' tablename */
- const char* name2); /*!< in: table name in the form
+ const char* name2) /*!< in: table name in the form
dbname '/' tablename */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Removes an index from the cache */
UNIV_INTERN
@@ -1207,7 +1476,8 @@ void
dict_index_remove_from_cache(
/*=========================*/
dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
+ dict_index_t* index) /*!< in, own: index */
+ __attribute__((nonnull));
/**********************************************************************//**
Get index by name
@return index, NULL if does not exist */
@@ -1216,7 +1486,8 @@ dict_index_t*
dict_table_get_index_on_name(
/*=========================*/
dict_table_t* table, /*!< in: table */
- const char* name); /*!< in: name of the index to find */
+ const char* name) /*!< in: name of the index to find */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
In case there is more than one index with the same name return the index
with the min(id).
@@ -1226,17 +1497,53 @@ dict_index_t*
dict_table_get_index_on_name_and_min_id(
/*====================================*/
dict_table_t* table, /*!< in: table */
- const char* name); /*!< in: name of the index to find */
-
+ const char* name) /*!< in: name of the index to find */
+ __attribute__((nonnull, warn_unused_result));
+/***************************************************************
+Check whether a column exists in an FTS index. */
+UNIV_INLINE
+ulint
+dict_table_is_fts_column(
+/*=====================*/
+ /* out: ULINT_UNDEFINED if no match else
+ the offset within the vector */
+ ib_vector_t* indexes,/* in: vector containing only FTS indexes */
+ ulint col_no) /* in: col number to search for */
+ __attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Move a table to the non LRU end of the LRU list. */
UNIV_INTERN
void
-dict_table_LRU_trim(
-/*================*/
- dict_table_t* self);
+dict_table_move_from_lru_to_non_lru(
+/*================================*/
+ dict_table_t* table) /*!< in: table to move from LRU to non-LRU */
+ __attribute__((nonnull));
+/**********************************************************************//**
+Move a table to the LRU list from the non-LRU list. */
+UNIV_INTERN
+void
+dict_table_move_from_non_lru_to_lru(
+/*================================*/
+ dict_table_t* table) /*!< in: table to move from non-LRU to LRU */
+ __attribute__((nonnull));
+/**********************************************************************//**
+Move to the most recently used segment of the LRU list. */
+UNIV_INTERN
+void
+dict_move_to_mru(
+/*=============*/
+ dict_table_t* table) /*!< in: table to move to MRU */
+ __attribute__((nonnull));
+
+/** Maximum number of columns in a foreign key constraint. Please Note MySQL
+has a much lower limit on the number of columns allowed in a foreign key
+constraint */
+#define MAX_NUM_FK_COLUMNS 500
+
/* Buffers for storing detailed information about the latest foreign key
and unique key errors */
extern FILE* dict_foreign_err_file;
-extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
+extern ib_mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
/** the dictionary system */
extern dict_sys_t* dict_sys;
@@ -1244,8 +1551,8 @@ extern dict_sys_t* dict_sys;
extern rw_lock_t dict_operation_lock;
/* Dictionary system struct */
-struct dict_sys_struct{
- mutex_t mutex; /*!< mutex protecting the data
+struct dict_sys_t{
+ ib_prio_mutex_t mutex; /*!< mutex protecting the data
dictionary; protects also the
disk-based dictionary system tables;
this mutex serializes CREATE TABLE
@@ -1262,8 +1569,6 @@ struct dict_sys_struct{
on name */
hash_table_t* table_id_hash; /*!< hash table of the tables, based
on id */
- UT_LIST_BASE_NODE_T(dict_table_t)
- table_LRU; /*!< LRU list of tables */
ulint size; /*!< varying space in bytes occupied
by the data dictionary table and
index objects */
@@ -1271,7 +1576,14 @@ struct dict_sys_struct{
dict_table_t* sys_columns; /*!< SYS_COLUMNS table */
dict_table_t* sys_indexes; /*!< SYS_INDEXES table */
dict_table_t* sys_fields; /*!< SYS_FIELDS table */
- dict_table_t* sys_stats; /*!< SYS_STATS table */
+
+ /*=============================*/
+ UT_LIST_BASE_NODE_T(dict_table_t)
+ table_LRU; /*!< List of tables that can be evicted
+ from the cache */
+ UT_LIST_BASE_NODE_T(dict_table_t)
+ table_non_LRU; /*!< List of tables that can't be
+ evicted from the cache */
};
#endif /* !UNIV_HOTBACKUP */
@@ -1287,6 +1599,80 @@ void
dict_ind_init(void);
/*===============*/
+/* Auxiliary structs for checking a table definition @{ */
+
+/* This struct is used to specify the name and type that a column must
+have when checking a table's schema. */
+struct dict_col_meta_t {
+ const char* name; /* column name */
+ ulint mtype; /* required column main type */
+ ulint prtype_mask; /* required column precise type mask;
+ if this is non-zero then all the
+ bits it has set must also be set
+ in the column's prtype */
+ ulint len; /* required column length */
+};
+
+/* This struct is used for checking whether a given table exists and
+whether it has a predefined schema (number of columns and columns names
+and types) */
+struct dict_table_schema_t {
+ const char* table_name; /* the name of the table whose
+ structure we are checking */
+ ulint n_cols; /* the number of columns the
+ table must have */
+ dict_col_meta_t* columns; /* metadata for the columns;
+ this array has n_cols
+ elements */
+ ulint n_foreign; /* number of foreign keys this
+ table has, pointing to other
+ tables (where this table is
+ FK child) */
+ ulint n_referenced; /* number of foreign keys other
+ tables have, pointing to this
+ table (where this table is
+ parent) */
+};
+/* @} */
+
+/*********************************************************************//**
+Checks whether a table exists and whether it has the given structure.
+The table must have the same number of columns with the same names and
+types. The order of the columns does not matter.
+The caller must own the dictionary mutex.
+dict_table_schema_check() @{
+@return DB_SUCCESS if the table exists and contains the necessary columns */
+UNIV_INTERN
+dberr_t
+dict_table_schema_check(
+/*====================*/
+ dict_table_schema_t* req_schema, /*!< in/out: required table
+ schema */
+ char* errstr, /*!< out: human readable error
+ message if != DB_SUCCESS and
+ != DB_TABLE_NOT_FOUND is
+ returned */
+ size_t errstr_sz) /*!< in: errstr size */
+ __attribute__((nonnull, warn_unused_result));
+/* @} */
+
+/*********************************************************************//**
+Converts a database and table name from filesystem encoding
+(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
+strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
+at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
+UNIV_INTERN
+void
+dict_fs2utf8(
+/*=========*/
+ const char* db_and_table, /*!< in: database and table names,
+ e.g. d@i1b/a@q1b@1Kc */
+ char* db_utf8, /*!< out: database name, e.g. dцb */
+ size_t db_utf8_size, /*!< in: dbname_utf8 size */
+ char* table_utf8, /*!< out: table name, e.g. aюbØc */
+ size_t table_utf8_size)/*!< in: table_utf8 size */
+ __attribute__((nonnull));
+
/**********************************************************************//**
Closes the data dictionary module. */
UNIV_INTERN
@@ -1302,7 +1688,7 @@ ulint
dict_table_is_corrupted(
/*====================*/
const dict_table_t* table) /*!< in: table */
- __attribute__((nonnull, pure, warn_unused_result));
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Check whether the index is corrupted.
@@ -1312,7 +1698,7 @@ ulint
dict_index_is_corrupted(
/*====================*/
const dict_index_t* index) /*!< in: index */
- __attribute__((nonnull, pure, warn_unused_result));
+ __attribute__((nonnull, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
@@ -1322,7 +1708,9 @@ UNIV_INTERN
void
dict_set_corrupted(
/*===============*/
- dict_index_t* index) /*!< in/out: index */
+ dict_index_t* index, /*!< in/out: index */
+ trx_t* trx, /*!< in/out: transaction */
+ const char* ctx) /*!< in: context */
UNIV_COLD __attribute__((nonnull));
/**********************************************************************//**
@@ -1334,7 +1722,8 @@ void
dict_set_corrupted_index_cache_only(
/*================================*/
dict_index_t* index, /*!< in/out: index */
- dict_table_t* table); /*!< in/out: table */
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
/**********************************************************************//**
Flags a table with specified space_id corrupted in the table dictionary
@@ -1346,6 +1735,75 @@ dict_set_corrupted_by_space(
/*========================*/
ulint space_id); /*!< in: space ID */
+/********************************************************************//**
+Validate the table flags.
+@return true if valid. */
+UNIV_INLINE
+bool
+dict_tf_is_valid(
+/*=============*/
+ ulint flags) /*!< in: table flags */
+ __attribute__((warn_unused_result));
+
+/********************************************************************//**
+Check if the tablespace for the table has been discarded.
+@return true if the tablespace has been discarded. */
+UNIV_INLINE
+bool
+dict_table_is_discarded(
+/*====================*/
+ const dict_table_t* table) /*!< in: table to check */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/********************************************************************//**
+Check if it is a temporary table.
+@return true if temporary table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_temporary(
+/*====================*/
+ const dict_table_t* table) /*!< in: table to check */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+This function should be called whenever a page is successfully
+compressed. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_success(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index to be updated. */
+ __attribute__((nonnull));
+/*********************************************************************//**
+This function should be called whenever a page compression attempt
+fails. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_failure(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index to be updated. */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Return the optimal page size, for which page will likely compress.
+@return page size beyond which page may not compress*/
+UNIV_INTERN
+ulint
+dict_index_zip_pad_optimal_page_size(
+/*=================================*/
+ dict_index_t* index) /*!< in: index for which page size
+ is requested */
+ __attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Convert table flag to row format string.
+@return row format name */
+UNIV_INTERN
+const char*
+dict_tf_to_row_format_string(
+/*=========================*/
+ ulint table_flag); /*!< in: row format setting */
+
+#endif /* !UNIV_HOTBACKUP */
/*************************************************************************
set is_corrupt flag by space_id*/
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index 1d2eb34042d..c261d6a3aee 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,9 @@ Created 1/8/1996 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "dict0load.h"
#include "rem0types.h"
+#include "fsp0fsp.h"
#include "srv0srv.h"
+#include "sync0rw.h" /* RW_S_LATCH */
/*********************************************************************//**
Gets the minimum number of bytes per character.
@@ -103,7 +105,7 @@ dict_col_type_assert_equal(
ut_ad(col->mtype == type->mtype);
ut_ad(col->prtype == type->prtype);
- ut_ad(col->len == type->len);
+ //ut_ad(col->len == type->len);
# ifndef UNIV_HOTBACKUP
ut_ad(col->mbminmaxlen == type->mbminmaxlen);
# endif /* !UNIV_HOTBACKUP */
@@ -145,7 +147,7 @@ ulint
dict_col_get_fixed_size(
/*====================*/
const dict_col_t* col, /*!< in: column */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
col->mbminmaxlen, comp));
@@ -222,6 +224,22 @@ dict_table_get_first_index(
}
/********************************************************************//**
+Gets the last index on the table.
+@return index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_last_index(
+/*=======================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ return(UT_LIST_GET_LAST((const_cast<dict_table_t*>(table))
+ ->indexes));
+}
+
+/********************************************************************//**
Gets the next index on the table.
@return index, NULL if none left */
UNIV_INLINE
@@ -250,7 +268,7 @@ dict_index_is_clust(
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED));
+ return(index->type & DICT_CLUSTERED);
}
/********************************************************************//**
Check whether the index is unique.
@@ -264,7 +282,7 @@ dict_index_is_unique(
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return(UNIV_UNLIKELY(index->type & DICT_UNIQUE));
+ return(index->type & DICT_UNIQUE);
}
/********************************************************************//**
@@ -279,7 +297,22 @@ dict_index_is_ibuf(
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return(UNIV_UNLIKELY(index->type & DICT_IBUF));
+ return(index->type & DICT_IBUF);
+}
+
+/********************************************************************//**
+Check whether the index is an universal index tree.
+@return nonzero for universal tree, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_univ(
+/*===============*/
+ const dict_index_t* index) /*!< in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return(index->type & DICT_UNIVERSAL);
}
/********************************************************************//**
@@ -298,7 +331,7 @@ dict_index_is_sec_or_ibuf(
type = index->type;
- return(UNIV_LIKELY(!(type & DICT_CLUSTERED) || (type & DICT_IBUF)));
+ return(!(type & DICT_CLUSTERED) || (type & DICT_IBUF));
}
/********************************************************************//**
@@ -349,6 +382,56 @@ dict_table_get_n_cols(
return(table->n_cols);
}
+/********************************************************************//**
+Gets the approximately estimated number of rows in the table.
+@return estimated number of rows */
+UNIV_INLINE
+ib_uint64_t
+dict_table_get_n_rows(
+/*==================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table->stat_initialized);
+
+ return(table->stat_n_rows);
+}
+
+/********************************************************************//**
+Increment the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_inc(
+/*==================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ if (table->stat_initialized) {
+ ib_uint64_t n_rows = table->stat_n_rows;
+ if (n_rows < 0xFFFFFFFFFFFFFFFFULL) {
+ table->stat_n_rows = n_rows + 1;
+ }
+ }
+}
+
+/********************************************************************//**
+Decrement the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_dec(
+/*==================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ if (table->stat_initialized) {
+ ib_uint64_t n_rows = table->stat_n_rows;
+ if (n_rows > 0) {
+ table->stat_n_rows = n_rows - 1;
+ }
+ }
+}
+
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth column of a table.
@@ -420,11 +503,196 @@ dict_table_is_comp(
{
ut_ad(table);
-#if DICT_TF_COMPACT != TRUE
-#error
+#if DICT_TF_COMPACT != 1
+#error "DICT_TF_COMPACT must be 1"
#endif
- return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT));
+ return(table->flags & DICT_TF_COMPACT);
+}
+
+/************************************************************************
+Check if the table has an FTS index. */
+UNIV_INLINE
+ibool
+dict_table_has_fts_index(
+/*=====================*/
+ /* out: TRUE if table has an FTS index */
+ dict_table_t* table) /* in: table */
+{
+ ut_ad(table);
+
+ return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS));
+}
+
+/********************************************************************//**
+Validate the table flags.
+@return true if valid. */
+UNIV_INLINE
+bool
+dict_tf_is_valid(
+/*=============*/
+ ulint flags) /*!< in: table flags */
+{
+ ulint compact = DICT_TF_GET_COMPACT(flags);
+ ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
+ ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
+ ulint unused = DICT_TF_GET_UNUSED(flags);
+
+ /* Make sure there are no bits that we do not know about. */
+ if (unused != 0) {
+
+ return(false);
+
+ } else if (atomic_blobs) {
+ /* Barracuda row formats COMPRESSED and DYNAMIC build on
+ the page structure introduced for the COMPACT row format
+ by allowing keys in secondary indexes to be made from
+ data stored off-page in the clustered index. */
+
+ if (!compact) {
+ return(false);
+ }
+
+ } else if (zip_ssize) {
+
+ /* Antelope does not support COMPRESSED row format. */
+ return(false);
+ }
+
+ if (zip_ssize) {
+
+ /* COMPRESSED row format must have compact and atomic_blobs
+ bits set and validate the number is within allowed range. */
+
+ if (!compact
+ || !atomic_blobs
+ || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+
+ return(false);
+ }
+ }
+
+ /* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
+ so the DATA_DIR flag is compatible with all other table flags. */
+
+ return(true);
+}
+
+/********************************************************************//**
+Validate a SYS_TABLES TYPE field and return it.
+@return Same as input after validating it as a SYS_TABLES TYPE field.
+If there is an error, return ULINT_UNDEFINED. */
+UNIV_INLINE
+ulint
+dict_sys_tables_type_validate(
+/*==========================*/
+ ulint type, /*!< in: SYS_TABLES.TYPE */
+ ulint n_cols) /*!< in: SYS_TABLES.N_COLS */
+{
+ ulint low_order_bit = DICT_TF_GET_COMPACT(type);
+ ulint redundant = !(n_cols & DICT_N_COLS_COMPACT);
+ ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
+ ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
+ ulint unused = DICT_TF_GET_UNUSED(type);
+
+ /* The low order bit of SYS_TABLES.TYPE is always set to 1.
+ If the format is UNIV_FORMAT_B or higher, this field is the same
+ as dict_table_t::flags. Zero is not allowed here. */
+ if (!low_order_bit) {
+ return(ULINT_UNDEFINED);
+ }
+
+ if (redundant) {
+ if (zip_ssize || atomic_blobs) {
+ return(ULINT_UNDEFINED);
+ }
+ }
+
+ /* Make sure there are no bits that we do not know about. */
+ if (unused) {
+ return(ULINT_UNDEFINED);
+ }
+
+ if (atomic_blobs) {
+ /* Barracuda row formats COMPRESSED and DYNAMIC build on
+ the page structure introduced for the COMPACT row format
+ by allowing keys in secondary indexes to be made from
+ data stored off-page in the clustered index.
+
+ The DICT_N_COLS_COMPACT flag should be in N_COLS,
+ but we already know that. */
+
+ } else if (zip_ssize) {
+ /* Antelope does not support COMPRESSED format. */
+ return(ULINT_UNDEFINED);
+ }
+
+ if (zip_ssize) {
+ /* COMPRESSED row format must have low_order_bit and
+ atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
+ should be in N_COLS, but we already know about the
+ low_order_bit and DICT_N_COLS_COMPACT flags. */
+ if (!atomic_blobs) {
+ return(ULINT_UNDEFINED);
+ }
+
+ /* Validate that the number is within allowed range. */
+ if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ return(ULINT_UNDEFINED);
+ }
+ }
+
+ /* There is nothing to validate for the data_dir field.
+ CREATE TABLE ... DATA DIRECTORY is supported for any row
+ format, so the DATA_DIR flag is compatible with any other
+ table flags. However, it is not used with TEMPORARY tables.*/
+
+ /* Return the validated SYS_TABLES.TYPE. */
+ return(type);
+}
+
+/********************************************************************//**
+Determine the file format from dict_table_t::flags
+The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any
+other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set.
+@return file format version */
+UNIV_INLINE
+rec_format_t
+dict_tf_get_rec_format(
+/*===================*/
+ ulint flags) /*!< in: dict_table_t::flags */
+{
+ ut_a(dict_tf_is_valid(flags));
+
+ if (!DICT_TF_GET_COMPACT(flags)) {
+ return(REC_FORMAT_REDUNDANT);
+ }
+
+ if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
+ return(REC_FORMAT_COMPACT);
+ }
+
+ if (DICT_TF_GET_ZIP_SSIZE(flags)) {
+ return(REC_FORMAT_COMPRESSED);
+ }
+
+ return(REC_FORMAT_DYNAMIC);
+}
+
+/********************************************************************//**
+Determine the file format from a dict_table_t::flags.
+@return file format version */
+UNIV_INLINE
+ulint
+dict_tf_get_format(
+/*===============*/
+ ulint flags) /*!< in: dict_table_t::flags */
+{
+ if (DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
+ return(UNIV_FORMAT_B);
+ }
+
+ return(UNIV_FORMAT_A);
}
/********************************************************************//**
@@ -438,41 +706,166 @@ dict_table_get_format(
{
ut_ad(table);
- return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT);
+ return(dict_tf_get_format(table->flags));
}
/********************************************************************//**
-Determine the file format of a table. */
+Set the file format and zip size in a dict_table_t::flags. If zip size
+is not needed, it should be 0. */
UNIV_INLINE
void
-dict_table_set_format(
-/*==================*/
- dict_table_t* table, /*!< in/out: table */
- ulint format) /*!< in: file format version */
+dict_tf_set(
+/*========*/
+ ulint* flags, /*!< in/out: table flags */
+ rec_format_t format, /*!< in: file format */
+ ulint zip_ssize, /*!< in: zip shift size */
+ bool use_data_dir) /*!< in: table uses DATA DIRECTORY */
{
- ut_ad(table);
+ switch (format) {
+ case REC_FORMAT_REDUNDANT:
+ *flags = 0;
+ ut_ad(zip_ssize == 0);
+ break;
+ case REC_FORMAT_COMPACT:
+ *flags = DICT_TF_COMPACT;
+ ut_ad(zip_ssize == 0);
+ break;
+ case REC_FORMAT_COMPRESSED:
+ *flags = DICT_TF_COMPACT
+ | (1 << DICT_TF_POS_ATOMIC_BLOBS)
+ | (zip_ssize << DICT_TF_POS_ZIP_SSIZE);
+ break;
+ case REC_FORMAT_DYNAMIC:
+ *flags = DICT_TF_COMPACT
+ | (1 << DICT_TF_POS_ATOMIC_BLOBS);
+ ut_ad(zip_ssize == 0);
+ break;
+ }
- table->flags = (table->flags & ~DICT_TF_FORMAT_MASK)
- | (format << DICT_TF_FORMAT_SHIFT);
+ if (use_data_dir) {
+ *flags |= (1 << DICT_TF_POS_DATA_DIR);
+ }
}
/********************************************************************//**
-Extract the compressed page size from table flags.
+Convert a 32 bit integer table flags to the 32 bit integer that is
+written into the tablespace header at the offset FSP_SPACE_FLAGS and is
+also stored in the fil_space_t::flags field. The following chart shows
+the translation of the low order bit. Other bits are the same.
+========================= Low order bit ==========================
+ | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
+dict_table_t::flags | 0 | 1 | 1 | 1
+fil_space_t::flags | 0 | 0 | 1 | 1
+==================================================================
+@return tablespace flags (fil_space_t::flags) */
+UNIV_INLINE
+ulint
+dict_tf_to_fsp_flags(
+/*=================*/
+ ulint table_flags) /*!< in: dict_table_t::flags */
+{
+ ulint fsp_flags;
+
+ DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
+ return(ULINT_UNDEFINED););
+
+ /* Adjust bit zero. */
+ fsp_flags = DICT_TF_HAS_ATOMIC_BLOBS(table_flags) ? 1 : 0;
+
+ /* ZIP_SSIZE and ATOMIC_BLOBS are at the same position. */
+ fsp_flags |= table_flags & DICT_TF_MASK_ZIP_SSIZE;
+ fsp_flags |= table_flags & DICT_TF_MASK_ATOMIC_BLOBS;
+
+ /* In addition, tablespace flags also contain the page size. */
+ fsp_flags |= fsp_flags_set_page_size(fsp_flags, UNIV_PAGE_SIZE);
+
+ /* The DATA_DIR flag is in a different position in fsp_flag */
+ fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
+ ? FSP_FLAGS_MASK_DATA_DIR : 0;
+
+ ut_a(fsp_flags_is_valid(fsp_flags));
+
+ return(fsp_flags);
+}
+
+/********************************************************************//**
+Convert a 32 bit integer from SYS_TABLES.TYPE to dict_table_t::flags
+The following chart shows the translation of the low order bit.
+Other bits are the same.
+========================= Low order bit ==========================
+ | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
+SYS_TABLES.TYPE | 1 | 1 | 1
+dict_table_t::flags | 0 | 1 | 1
+==================================================================
+@return ulint containing SYS_TABLES.TYPE */
+UNIV_INLINE
+ulint
+dict_sys_tables_type_to_tf(
+/*=======================*/
+ ulint type, /*!< in: SYS_TABLES.TYPE field */
+ ulint n_cols) /*!< in: SYS_TABLES.N_COLS field */
+{
+ ulint flags;
+ ulint redundant = !(n_cols & DICT_N_COLS_COMPACT);
+
+ /* Adjust bit zero. */
+ flags = redundant ? 0 : 1;
+
+ /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+ flags |= type & (DICT_TF_MASK_ZIP_SSIZE
+ | DICT_TF_MASK_ATOMIC_BLOBS
+ | DICT_TF_MASK_DATA_DIR);
+
+ return(flags);
+}
+
+/********************************************************************//**
+Convert a 32 bit integer table flags to the 32bit integer that is written
+to a SYS_TABLES.TYPE field. The following chart shows the translation of
+the low order bit. Other bits are the same.
+========================= Low order bit ==========================
+ | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
+dict_table_t::flags | 0 | 1 | 1
+SYS_TABLES.TYPE | 1 | 1 | 1
+==================================================================
+@return ulint containing SYS_TABLES.TYPE */
+UNIV_INLINE
+ulint
+dict_tf_to_sys_tables_type(
+/*=======================*/
+ ulint flags) /*!< in: dict_table_t::flags */
+{
+ ulint type;
+
+ ut_a(dict_tf_is_valid(flags));
+
+ /* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
+ type = 1;
+
+ /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+ type |= flags & (DICT_TF_MASK_ZIP_SSIZE
+ | DICT_TF_MASK_ATOMIC_BLOBS
+ | DICT_TF_MASK_DATA_DIR);
+
+ return(type);
+}
+
+/********************************************************************//**
+Extract the compressed page size from dict_table_t::flags.
+These flags are in memory, so assert that they are valid.
@return compressed page size, or 0 if not compressed */
UNIV_INLINE
ulint
-dict_table_flags_to_zip_size(
-/*=========================*/
+dict_tf_get_zip_size(
+/*=================*/
ulint flags) /*!< in: flags */
{
- ulint zip_size = flags & DICT_TF_ZSSIZE_MASK;
+ ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
+ ulint zip_size = (zip_ssize
+ ? (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize
+ : 0);
- if (UNIV_UNLIKELY(zip_size)) {
- zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
- << (zip_size >> DICT_TF_ZSSIZE_SHIFT));
-
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
- }
+ ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
return(zip_size);
}
@@ -488,7 +881,7 @@ dict_table_zip_size(
{
ut_ad(table);
- return(dict_table_flags_to_zip_size(table->flags));
+ return(dict_tf_get_zip_size(table->flags));
}
#ifndef UNIV_HOTBACKUP
@@ -535,6 +928,7 @@ dict_table_x_unlock_indexes(
}
}
#endif /* !UNIV_HOTBACKUP */
+
/********************************************************************//**
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system.
@@ -644,7 +1038,7 @@ dict_index_get_sys_col_pos(
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(!(index->type & DICT_UNIVERSAL));
+ ut_ad(!dict_index_is_univ(index));
if (dict_index_is_clust(index)) {
@@ -697,6 +1091,20 @@ dict_index_get_nth_col_no(
return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
}
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+ const dict_index_t* index, /*!< in: index */
+ ulint n) /*!< in: column number */
+{
+ return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE));
+}
+
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Returns the minimum data size of an index record.
@@ -767,7 +1175,7 @@ dict_index_get_page(
Gets the read-write lock of the index tree.
@return read-write lock */
UNIV_INLINE
-rw_lock_t*
+prio_rw_lock_t*
dict_index_get_lock(
/*================*/
dict_index_t* index) /*!< in: index */
@@ -791,119 +1199,133 @@ dict_index_get_space_reserve(void)
return(UNIV_PAGE_SIZE / 16);
}
-/**********************************************************************//**
-Checks if a table is in the dictionary cache.
-@return table, NULL if not found */
+/********************************************************************//**
+Gets the status of online index creation.
+@return the status */
UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
- const char* table_name) /*!< in: table name */
+enum online_index_status
+dict_index_get_online_status(
+/*=========================*/
+ const dict_index_t* index) /*!< in: secondary index */
{
- dict_table_t* table;
- ulint table_fold;
-
- ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ enum online_index_status status;
- /* Look for the table name in the hash table */
- table_fold = ut_fold_string(table_name);
+ status = (enum online_index_status) index->online_status;
- HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
- dict_table_t*, table, ut_ad(table->cached),
- !strcmp(table->name, table_name));
+ /* Without the index->lock protection, the online
+ status can change from ONLINE_INDEX_CREATION to
+ ONLINE_INDEX_COMPLETE (or ONLINE_INDEX_ABORTED) in
+ row_log_apply() once log application is done. So to make
+ sure the status is ONLINE_INDEX_CREATION or ONLINE_INDEX_COMPLETE
+ you should always do the recheck after acquiring index->lock */
- /* make young in table_LRU */
- if (table) {
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+#ifdef UNIV_DEBUG
+ switch (status) {
+ case ONLINE_INDEX_COMPLETE:
+ case ONLINE_INDEX_CREATION:
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ return(status);
}
-
- return(table);
+ ut_error;
+#endif /* UNIV_DEBUG */
+ return(status);
}
-/**********************************************************************//**
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function.
-@return table, NULL if not found */
+/********************************************************************//**
+Sets the status of online index creation. */
UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
- const char* table_name, /*!< in: table name */
- dict_err_ignore_t
- ignore_err) /*!< in: error to be ignored when
- loading a table definition */
+void
+dict_index_set_online_status(
+/*=========================*/
+ dict_index_t* index, /*!< in/out: index */
+ enum online_index_status status) /*!< in: status */
{
- dict_table_t* table;
+ ut_ad(!(index->type & DICT_FTS));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_COMPLETE:
+ case ONLINE_INDEX_CREATION:
+ break;
+ case ONLINE_INDEX_ABORTED:
+ ut_ad(status == ONLINE_INDEX_ABORTED_DROPPED);
+ break;
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
- ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ index->online_status = status;
+ ut_ad(dict_index_get_online_status(index) == status);
+}
- table = dict_table_check_if_in_cache_low(table_name);
-
- if (table && table->corrupted
- && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
- fprintf(stderr, "InnoDB: table");
- ut_print_name(stderr, NULL, TRUE, table->name);
- if (srv_load_corrupted) {
- fputs(" is corrupted, but"
- " innodb_force_load_corrupted is set\n", stderr);
- } else {
- fputs(" is corrupted\n", stderr);
- return(NULL);
+/********************************************************************//**
+Determines if a secondary index is being or has been created online,
+or if the table is being rebuilt online, allowing concurrent modifications
+to the table.
+@retval true if the index is being or has been built online, or
+if this is a clustered index and the table is being or has been rebuilt online
+@retval false if the index has been created or the table has been
+rebuilt completely */
+UNIV_INLINE
+bool
+dict_index_is_online_ddl(
+/*=====================*/
+ const dict_index_t* index) /*!< in: index */
+{
+#ifdef UNIV_DEBUG
+ if (dict_index_is_clust(index)) {
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_CREATION:
+ return(true);
+ case ONLINE_INDEX_COMPLETE:
+ return(false);
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ break;
}
+ ut_ad(0);
+ return(false);
}
+#endif /* UNIV_DEBUG */
- if (table == NULL) {
- table = dict_load_table(table_name, TRUE, ignore_err);
- }
-
- ut_ad(!table || table->cached);
-
- return(table);
+ return(UNIV_UNLIKELY(dict_index_get_online_status(index)
+ != ONLINE_INDEX_COMPLETE));
}
/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
+Check whether a column exists in an FTS index.
+@return ULINT_UNDEFINED if no match else the offset within the vector */
UNIV_INLINE
-dict_table_t*
-dict_table_get_on_id_low(
+ulint
+dict_table_is_fts_column(
/*=====================*/
- table_id_t table_id) /*!< in: table id */
+ ib_vector_t* indexes,/*!< in: vector containing only FTS indexes */
+ ulint col_no) /*!< in: col number to search for */
+
{
- dict_table_t* table;
- ulint fold;
+ ulint i;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ for (i = 0; i < ib_vector_size(indexes); ++i) {
+ dict_index_t* index;
- /* Look for the table name in the hash table */
- fold = ut_fold_ull(table_id);
+ index = (dict_index_t*) ib_vector_getp(indexes, i);
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
- dict_table_t*, table, ut_ad(table->cached),
- table->id == table_id);
- if (table == NULL) {
- table = dict_load_table_on_id(table_id);
- }
+ if (dict_index_contains_col_or_prefix(index, col_no)) {
- /* make young in table_LRU */
- if (table) {
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+ return(i);
+ }
}
- ut_ad(!table || table->cached);
-
- /* TODO: should get the type information from MySQL */
-
- return(table);
+ return(ULINT_UNDEFINED);
}
/**********************************************************************//**
Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix
+note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
needs to be stored in the undo log.
@return bytes of column prefix to be stored in the undo log */
UNIV_INLINE
@@ -914,9 +1336,9 @@ dict_max_field_len_store_undo(
const dict_col_t* col) /*!< in: column which index prefix
is based on */
{
- ulint prefix_len = 0;
+ ulint prefix_len = 0;
- if (dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP)
+ if (dict_table_get_format(table) >= UNIV_FORMAT_B)
{
prefix_len = col->max_prefix
? col->max_prefix
@@ -938,7 +1360,7 @@ dict_table_is_corrupted(
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- return(UNIV_UNLIKELY(table->corrupted));
+ return(table->corrupted);
}
/********************************************************************//**
@@ -953,8 +1375,32 @@ dict_index_is_corrupted(
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return(UNIV_UNLIKELY((index->type & DICT_CORRUPT)
- || (index->table && index->table->corrupted)));
+ return((index->type & DICT_CORRUPT)
+ || (index->table && index->table->corrupted));
+}
+
+/********************************************************************//**
+Check if the tablespace for the table has been discarded.
+@return true if the tablespace has been discarded. */
+UNIV_INLINE
+bool
+dict_table_is_discarded(
+/*====================*/
+ const dict_table_t* table) /*!< in: table to check */
+{
+ return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
+}
+
+/********************************************************************//**
+Check if it is a temporary table.
+@return true if temporary table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_temporary(
+/*====================*/
+ const dict_table_t* table) /*!< in: table to check */
+{
+ return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h
index 5bb015346ac..030190b1a8e 100644
--- a/storage/xtradb/include/dict0load.h
+++ b/storage/xtradb/include/dict0load.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -29,38 +29,46 @@ Created 4/24/1996 Heikki Tuuri
#include "univ.i"
#include "dict0types.h"
+#include "trx0types.h"
#include "ut0byte.h"
#include "mem0mem.h"
#include "btr0types.h"
-/** enum that defines all 6 system table IDs */
-enum dict_system_table_id {
+/** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */
+enum dict_system_id_t {
SYS_TABLES = 0,
SYS_INDEXES,
SYS_COLUMNS,
SYS_FIELDS,
SYS_FOREIGN,
SYS_FOREIGN_COLS,
- SYS_STATS,
+ SYS_TABLESPACES,
+ SYS_DATAFILES,
/* This must be last item. Defines the number of system tables. */
SYS_NUM_SYSTEM_TABLES
};
-typedef enum dict_system_table_id dict_system_id_t;
-
-/** Status bit for dict_process_sys_tables_rec() */
-enum dict_table_info {
+/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */
+enum dict_table_info_t {
DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
structure with information from
a SYS_TABLES record */
- DICT_TABLE_LOAD_FROM_CACHE = 1, /*!< Check first whether dict_table_t
+ DICT_TABLE_LOAD_FROM_CACHE = 1 /*!< Check first whether dict_table_t
is in the cache, if so, return it */
- DICT_TABLE_UPDATE_STATS = 2 /*!< whether to update statistics
- when loading SYS_TABLES information. */
};
-typedef enum dict_table_info dict_table_info_t;
+/** Check type for dict_check_tablespaces_and_store_max_id() */
+enum dict_check_t {
+ /** No user tablespaces have been opened
+ (no crash recovery, no transactions recovered). */
+ DICT_CHECK_NONE_LOADED = 0,
+ /** Some user tablespaces may have been opened
+ (no crash recovery; recovered table locks for transactions). */
+ DICT_CHECK_SOME_LOADED,
+ /** All user tablespaces have been opened (crash recovery). */
+ DICT_CHECK_ALL_LOADED
+};
/********************************************************************//**
In a crash recovery we already have all the tablespace objects created.
@@ -74,7 +82,7 @@ UNIV_INTERN
void
dict_check_tablespaces_and_store_max_id(
/*====================================*/
- ibool in_crash_recovery); /*!< in: are we doing a crash recovery */
+ dict_check_t dict_check); /*!< in: how to check */
/********************************************************************//**
Finds the first table name in the given database.
@return own: table name, NULL if does not exist; the caller must free
@@ -156,12 +164,28 @@ dict_load_field_low(
byte* last_index_id, /*!< in: last index id */
mem_heap_t* heap, /*!< in/out: memory heap
for temporary storage */
- const rec_t* rec, /*!< in: SYS_FIELDS record */
- char* addition_err_str,/*!< out: additional error message
- that requires information to be
- filled, or NULL */
- ulint err_str_len); /*!< in: length of addition_err_str
- in bytes */
+ const rec_t* rec); /*!< in: SYS_FIELDS record */
+/********************************************************************//**
+Using the table->heap, copy the null-terminated filepath into
+table->data_dir_path and put a null byte before the extension.
+This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
+Make this data directory path only if it has not yet been saved. */
+UNIV_INTERN
+void
+dict_save_data_dir_path(
+/*====================*/
+ dict_table_t* table, /*!< in/out: table */
+ char* filepath); /*!< in: filepath of tablespace */
+/*****************************************************************//**
+Make sure the data_file_name is saved in dict_table_t if needed. Try to
+read it from the file dictionary first, then from SYS_DATAFILES. */
+UNIV_INTERN
+void
+dict_get_and_save_data_dir_path(
+/*============================*/
+ dict_table_t* table, /*!< in/out: table */
+ bool dict_mutex_own); /*!< in: true if dict_sys->mutex
+ is owned already */
/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
@@ -187,7 +211,9 @@ UNIV_INTERN
dict_table_t*
dict_load_table_on_id(
/*==================*/
- table_id_t table_id); /*!< in: table id */
+ table_id_t table_id, /*!< in: table id */
+ dict_err_ignore_t ignore_err); /*!< in: errors to ignore
+ when loading the table */
/********************************************************************//**
This function is called when the database is booted.
Loads system table index definitions except for the clustered index which
@@ -205,16 +231,19 @@ cache already contains all constraints where the other relevant table is
already in the dictionary cache.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_load_foreigns(
/*===============*/
const char* table_name, /*!< in: table name */
- ibool check_recursive,/*!< in: Whether to check
+ const char** col_names, /*!< in: column names, or NULL
+ to use table->col_names */
+ bool check_recursive,/*!< in: Whether to check
recursive load of tables
chained by FK */
- ibool check_charsets, /*!< in: TRUE=check charsets
- compatibility */
- dict_err_ignore_t ignore_err); /*!< in: error to be ignored */
+ bool check_charsets, /*!< in: whether to check
+ charset compatibility */
+ dict_err_ignore_t ignore_err) /*!< in: error to be ignored */
+ __attribute__((nonnull(1), warn_unused_result));
/********************************************************************//**
Prints to the standard output information on all tables found in the data
dictionary system table. */
@@ -251,15 +280,17 @@ both monitor table output and information schema innodb_sys_tables output.
@return error message, or NULL on success */
UNIV_INTERN
const char*
-dict_process_sys_tables_rec(
-/*========================*/
+dict_process_sys_tables_rec_and_mtr_commit(
+/*=======================================*/
mem_heap_t* heap, /*!< in: temporary memory heap */
const rec_t* rec, /*!< in: SYS_TABLES record */
dict_table_t** table, /*!< out: dict_table_t to fill */
- dict_table_info_t status); /*!< in: status bit controls
+ dict_table_info_t status, /*!< in: status bit controls
options such as whether we shall
look for dict_table_t from cache
first */
+ mtr_t* mtr); /*!< in/out: mini-transaction,
+ will be committed */
/********************************************************************//**
This function parses a SYS_INDEXES record and populate a dict_index_t
structure with the information from the record. For detail information
@@ -331,19 +362,65 @@ dict_process_sys_foreign_col_rec(
in referenced table */
ulint* pos); /*!< out: column position */
/********************************************************************//**
-This function parses a SYS_STATS record and extract necessary
-information from the record and return to caller.
+This function parses a SYS_TABLESPACES record, extracts necessary
+information from the record and returns to caller.
@return error message, or NULL on success */
UNIV_INTERN
const char*
-dict_process_sys_stats_rec(
-/*=============================*/
+dict_process_sys_tablespaces(
+/*=========================*/
mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_STATS rec */
- index_id_t* index_id, /*!< out: INDEX_ID */
- ulint* key_cols, /*!< out: KEY_COLS */
- ib_uint64_t* diff_vals, /*!< out: DIFF_VALS */
- ib_uint64_t* non_null_vals); /*!< out: NON_NULL_VALS */
+ const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */
+ ulint* space, /*!< out: pace id */
+ const char** name, /*!< out: tablespace name */
+ ulint* flags); /*!< out: tablespace flags */
+/********************************************************************//**
+This function parses a SYS_DATAFILES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_datafiles(
+/*=======================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_DATAFILES rec */
+ ulint* space, /*!< out: pace id */
+ const char** path); /*!< out: datafile path */
+/********************************************************************//**
+Get the filepath for a spaceid from SYS_DATAFILES. This function provides
+a temporary heap which is used for the table lookup, but not for the path.
+The caller must free the memory for the path returned. This function can
+return NULL if the space ID is not found in SYS_DATAFILES, then the caller
+will assume that the ibd file is in the normal datadir.
+@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for
+the given space ID. NULL if space ID is zero or not found. */
+UNIV_INTERN
+char*
+dict_get_first_path(
+/*================*/
+ ulint space, /*!< in: space id */
+ const char* name); /*!< in: tablespace name */
+/********************************************************************//**
+Update the record for space_id in SYS_TABLESPACES to this filepath.
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_update_filepath(
+/*=================*/
+ ulint space_id, /*!< in: space id */
+ const char* filepath); /*!< in: filepath */
+/********************************************************************//**
+Insert records into SYS_TABLESPACES and SYS_DATAFILES.
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_insert_tablespace_and_filepath(
+/*================================*/
+ ulint space, /*!< in: space id */
+ const char* name, /*!< in: talespace name */
+ const char* filepath, /*!< in: filepath */
+ ulint fsp_flags); /*!< in: tablespace flags */
+
#ifndef UNIV_NONINL
#include "dict0load.ic"
#endif
diff --git a/storage/xtradb/include/dict0load.ic b/storage/xtradb/include/dict0load.ic
index da224db7927..2c0f1ff38a5 100644
--- a/storage/xtradb/include/dict0load.ic
+++ b/storage/xtradb/include/dict0load.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h
index 717c7532dc9..bde0ce16094 100644
--- a/storage/xtradb/include/dict0mem.h
+++ b/storage/xtradb/include/dict0mem.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,9 +28,13 @@ Created 1/8/1996 Heikki Tuuri
#define dict0mem_h
#include "univ.i"
+
+#ifndef UNIV_INNOCHECKSUM
+
#include "dict0types.h"
#include "data0type.h"
#include "mem0mem.h"
+#include "row0types.h"
#include "rem0types.h"
#include "btr0types.h"
#ifndef UNIV_HOTBACKUP
@@ -43,6 +48,10 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0byte.h"
#include "hash0hash.h"
#include "trx0types.h"
+#include "fts0fts.h"
+
+/* Forward declaration. */
+struct ib_rbt_t;
/** Type flags of an index: OR'ing of the flags is allowed to define a
combination of types */
@@ -54,73 +63,170 @@ combination of types */
#define DICT_IBUF 8 /*!< insert buffer tree */
#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag
in SYS_INDEXES.TYPE */
+#define DICT_FTS 32 /* FTS index; can't be combined with the
+ other flags */
-#define DICT_IT_BITS 5 /*!< number of bits used for
+#define DICT_IT_BITS 6 /*!< number of bits used for
SYS_INDEXES.TYPE */
/* @} */
+#if 0 /* not implemented, retained for history */
/** Types for a table object */
#define DICT_TABLE_ORDINARY 1 /*!< ordinary table */
-#if 0 /* not implemented */
#define DICT_TABLE_CLUSTER_MEMBER 2
#define DICT_TABLE_CLUSTER 3 /* this means that the table is
really a cluster definition */
#endif
-/** Table flags. All unused bits must be 0. */
-/* @{ */
-#define DICT_TF_COMPACT 1 /* Compact page format.
- This must be set for
- new file formats
- (later than
- DICT_TF_FORMAT_51). */
+/* Table and tablespace flags are generally not used for the Antelope file
+format except for the low order bit, which is used differently depending on
+where the flags are stored.
-/** Compressed page size (0=uncompressed, up to 15 compressed sizes) */
-/* @{ */
-#define DICT_TF_ZSSIZE_SHIFT 1
-#define DICT_TF_ZSSIZE_MASK (15 << DICT_TF_ZSSIZE_SHIFT)
-#define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1)
-/* @} */
+==================== Low order flags bit =========================
+ | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
+SYS_TABLES.TYPE | 1 | 1 | 1
+dict_table_t::flags | 0 | 1 | 1
+FSP_SPACE_FLAGS | 0 | 0 | 1
+fil_space_t::flags | 0 | 0 | 1
-/** File format */
-/* @{ */
-#define DICT_TF_FORMAT_SHIFT 5 /* file format */
-#define DICT_TF_FORMAT_MASK \
-((~(~0 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT))) << DICT_TF_FORMAT_SHIFT)
-#define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */
-#define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1:
- compressed tables,
- new BLOB treatment */
-/** Maximum supported file format */
-#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP
-
-/** Minimum supported file format */
-#define DICT_TF_FORMAT_MIN DICT_TF_FORMAT_51
+Before the 5.1 plugin, SYS_TABLES.TYPE was always DICT_TABLE_ORDINARY (1)
+and the tablespace flags field was always 0. In the 5.1 plugin, these fields
+were repurposed to identify compressed and dynamic row formats.
-/* @} */
-#define DICT_TF_BITS 6 /*!< number of flag bits */
-#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
-# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
-#endif
+The following types and constants describe the flags found in dict_table_t
+and SYS_TABLES.TYPE. Similar flags found in fil_space_t and FSP_SPACE_FLAGS
+are described in fsp0fsp.h. */
+
+/* @{ */
+/** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */
+#define DICT_TF_REDUNDANT 0 /*!< Redundant row format. */
+/** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */
+#define DICT_TF_COMPACT 1 /*!< Compact row format. */
+
+/** This bitmask is used in SYS_TABLES.N_COLS to set and test whether
+the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
+#define DICT_N_COLS_COMPACT 0x80000000UL
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Width of the COMPACT flag */
+#define DICT_TF_WIDTH_COMPACT 1
+/** Width of the ZIP_SSIZE flag */
+#define DICT_TF_WIDTH_ZIP_SSIZE 4
+/** Width of the ATOMIC_BLOBS flag. The Antelope file formats broke up
+BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
+Brracuda row formats store the whole blob or text field off-page atomically.
+Secondary indexes are created from this external data using row_ext_t
+to cache the BLOB prefixes. */
+#define DICT_TF_WIDTH_ATOMIC_BLOBS 1
+/** If a table is created with the MYSQL option DATA DIRECTORY and
+innodb-file-per-table, an older engine will not be able to find that table.
+This flag prevents older engines from attempting to open the table and
+allows InnoDB to update_create_info() accordingly. */
+#define DICT_TF_WIDTH_DATA_DIR 1
+
+/** Width of all the currently known table flags */
+#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \
+ + DICT_TF_WIDTH_ZIP_SSIZE \
+ + DICT_TF_WIDTH_ATOMIC_BLOBS \
+ + DICT_TF_WIDTH_DATA_DIR)
+
+/** A mask of all the known/used bits in table flags */
+#define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS))
+
+/** Zero relative shift position of the COMPACT field */
+#define DICT_TF_POS_COMPACT 0
+/** Zero relative shift position of the ZIP_SSIZE field */
+#define DICT_TF_POS_ZIP_SSIZE (DICT_TF_POS_COMPACT \
+ + DICT_TF_WIDTH_COMPACT)
+/** Zero relative shift position of the ATOMIC_BLOBS field */
+#define DICT_TF_POS_ATOMIC_BLOBS (DICT_TF_POS_ZIP_SSIZE \
+ + DICT_TF_WIDTH_ZIP_SSIZE)
+/** Zero relative shift position of the DATA_DIR field */
+#define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \
+ + DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define DICT_TF_POS_UNUSED (DICT_TF_POS_DATA_DIR \
+ + DICT_TF_WIDTH_DATA_DIR)
+
+/** Bit mask of the COMPACT field */
+#define DICT_TF_MASK_COMPACT \
+ ((~(~0 << DICT_TF_WIDTH_COMPACT)) \
+ << DICT_TF_POS_COMPACT)
+/** Bit mask of the ZIP_SSIZE field */
+#define DICT_TF_MASK_ZIP_SSIZE \
+ ((~(~0 << DICT_TF_WIDTH_ZIP_SSIZE)) \
+ << DICT_TF_POS_ZIP_SSIZE)
+/** Bit mask of the ATOMIC_BLOBS field */
+#define DICT_TF_MASK_ATOMIC_BLOBS \
+ ((~(~0 << DICT_TF_WIDTH_ATOMIC_BLOBS)) \
+ << DICT_TF_POS_ATOMIC_BLOBS)
+/** Bit mask of the DATA_DIR field */
+#define DICT_TF_MASK_DATA_DIR \
+ ((~(~0 << DICT_TF_WIDTH_DATA_DIR)) \
+ << DICT_TF_POS_DATA_DIR)
+
+/** Return the value of the COMPACT field */
+#define DICT_TF_GET_COMPACT(flags) \
+ ((flags & DICT_TF_MASK_COMPACT) \
+ >> DICT_TF_POS_COMPACT)
+/** Return the value of the ZIP_SSIZE field */
+#define DICT_TF_GET_ZIP_SSIZE(flags) \
+ ((flags & DICT_TF_MASK_ZIP_SSIZE) \
+ >> DICT_TF_POS_ZIP_SSIZE)
+/** Return the value of the ATOMIC_BLOBS field */
+#define DICT_TF_HAS_ATOMIC_BLOBS(flags) \
+ ((flags & DICT_TF_MASK_ATOMIC_BLOBS) \
+ >> DICT_TF_POS_ATOMIC_BLOBS)
+/** Return the value of the ATOMIC_BLOBS field */
+#define DICT_TF_HAS_DATA_DIR(flags) \
+ ((flags & DICT_TF_MASK_DATA_DIR) \
+ >> DICT_TF_POS_DATA_DIR)
+/** Return the contents of the UNUSED bits */
+#define DICT_TF_GET_UNUSED(flags) \
+ (flags >> DICT_TF_POS_UNUSED)
/* @} */
-/** @brief Additional table flags.
+#ifndef UNIV_INNOCHECKSUM
+
+/** @brief Table Flags set number 2.
These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags
will be written as 0. The column may contain garbage for tables
created with old versions of InnoDB that only implemented
-ROW_FORMAT=REDUNDANT. */
+ROW_FORMAT=REDUNDANT. InnoDB engines do not check these flags
+for unknown bits in order to protect backward incompatibility. */
/* @{ */
-#define DICT_TF2_SHIFT DICT_TF_BITS
- /*!< Shift value for
- table->flags. */
-#define DICT_TF2_TEMPORARY 1 /*!< TRUE for tables from
- CREATE TEMPORARY TABLE. */
-#define DICT_TF2_BITS (DICT_TF2_SHIFT + 1)
- /*!< Total number of bits
- in table->flags. */
+/** Total number of bits in table->flags2. */
+#define DICT_TF2_BITS 6
+#define DICT_TF2_BIT_MASK ~(~0 << DICT_TF2_BITS)
+
+/** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */
+#define DICT_TF2_TEMPORARY 1
+/** The table has an internal defined DOC ID column */
+#define DICT_TF2_FTS_HAS_DOC_ID 2
+/** The table has an FTS index */
+#define DICT_TF2_FTS 4
+/** Need to add Doc ID column for FTS index build.
+This is a transient bit for index build */
+#define DICT_TF2_FTS_ADD_DOC_ID 8
+/** This bit is used during table creation to indicate that it will
+use its own tablespace instead of the system tablespace. */
+#define DICT_TF2_USE_TABLESPACE 16
+
+/** Set when we discard/detach the tablespace */
+#define DICT_TF2_DISCARDED 32
/* @} */
+#define DICT_TF2_FLAG_SET(table, flag) \
+ (table->flags2 |= (flag))
+
+#define DICT_TF2_FLAG_IS_SET(table, flag) \
+ (table->flags2 & (flag))
+
+#define DICT_TF2_FLAG_UNSET(table, flag) \
+ (table->flags2 &= ~(flag))
+
/** Tables could be chained together with Foreign key constraint. When
first load the parent table, we would load all of its descedents.
This could result in rescursive calls and out of stack error eventually.
@@ -146,11 +252,10 @@ dict_mem_table_create(
/*==================*/
const char* name, /*!< in: table name */
ulint space, /*!< in: space where the clustered index
- of the table is placed; this parameter
- is ignored if the table is made
- a member of a cluster */
+ of the table is placed */
ulint n_cols, /*!< in: number of columns */
- ulint flags); /*!< in: table flags */
+ ulint flags, /*!< in: table flags */
+ ulint flags2); /*!< in: table flags2 */
/****************************************************************//**
Free a table memory object. */
UNIV_INTERN
@@ -169,7 +274,19 @@ dict_mem_table_add_col(
const char* name, /*!< in: column name, or NULL */
ulint mtype, /*!< in: main datatype */
ulint prtype, /*!< in: precise type */
- ulint len); /*!< in: precision */
+ ulint len) /*!< in: precision */
+ __attribute__((nonnull(1)));
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+UNIV_INTERN
+void
+dict_mem_table_col_rename(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ unsigned nth_col,/*!< in: column index */
+ const char* from, /*!< in: old column name */
+ const char* to) /*!< in: new column name */
+ __attribute__((nonnull));
/**********************************************************************//**
This function populates a dict_col_t memory structure with
supplied information. */
@@ -267,20 +384,31 @@ dict_mem_referenced_table_name_lookup_set(
dict_foreign_t* foreign, /*!< in/out: foreign struct */
ibool do_alloc); /*!< in: is an alloc needed */
+/*******************************************************************//**
+Create a temporary tablename.
+@return temporary tablename suitable for InnoDB use */
+UNIV_INTERN __attribute__((nonnull, warn_unused_result))
+char*
+dict_mem_create_temporary_tablename(
+/*================================*/
+ mem_heap_t* heap, /*!< in: memory heap */
+ const char* dbtab, /*!< in: database/table name */
+ table_id_t id); /*!< in: InnoDB table id */
+
/** Data structure for a column in a table */
-struct dict_col_struct{
+struct dict_col_t{
/*----------------------*/
/** The following are copied from dtype_t,
so that all bit-fields can be packed tightly. */
/* @{ */
- unsigned mtype:8; /*!< main data type */
- unsigned prtype:24; /*!< precise type; MySQL data
+ unsigned prtype:32; /*!< precise type; MySQL data
type, charset code, flags to
indicate nullability,
signedness, whether this is a
binary string, whether this is
a true VARCHAR where MySQL
uses 2 bytes to store the length */
+ unsigned mtype:8; /*!< main data type */
/* the remaining fields do not affect alphabetical ordering: */
@@ -327,17 +455,16 @@ files would be at risk! */
/** Find out maximum indexed column length by its table format.
For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
-field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For new
-barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN
-(3072) bytes */
+field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For
+Barracuda row formats COMPRESSED and DYNAMIC, the length could
+be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \
- ((dict_table_get_format(table) < DICT_TF_FORMAT_ZIP) \
+ ((dict_table_get_format(table) < UNIV_FORMAT_B) \
? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
: REC_VERSION_56_MAX_INDEX_COL_LEN)
#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \
- ((((flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT)\
- < DICT_TF_FORMAT_ZIP) \
+ ((DICT_TF_HAS_ATOMIC_BLOBS(flags) < UNIV_FORMAT_B) \
? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
: REC_VERSION_56_MAX_INDEX_COL_LEN)
@@ -345,7 +472,7 @@ barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN
#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
/** Data structure for a field in an index */
-struct dict_field_struct{
+struct dict_field_t{
dict_col_t* col; /*!< pointer to the table column */
const char* name; /*!< name of the column */
unsigned prefix_len:12; /*!< 0 or the length of the column
@@ -361,11 +488,63 @@ struct dict_field_struct{
DICT_ANTELOPE_MAX_INDEX_COL_LEN */
};
+/**********************************************************************//**
+PADDING HEURISTIC BASED ON LINEAR INCREASE OF PADDING TO AVOID
+COMPRESSION FAILURES
+(Note: this is relevant only for compressed indexes)
+GOAL: Avoid compression failures by maintaining information about the
+compressibility of data. If data is not very compressible then leave
+some extra space 'padding' in the uncompressed page making it more
+likely that compression of less than fully packed uncompressed page will
+succeed.
+
+This padding heuristic works by increasing the pad linearly until the
+desired failure rate is reached. A "round" is a fixed number of
+compression operations.
+After each round, the compression failure rate for that round is
+computed. If the failure rate is too high, then padding is incremented
+by a fixed value, otherwise it's left intact.
+If the compression failure is lower than the desired rate for a fixed
+number of consecutive rounds, then the padding is decreased by a fixed
+value. This is done to prevent overshooting the padding value,
+and to accommodate the possible change in data compressibility. */
+
+/** Number of zip ops in one round. */
+#define ZIP_PAD_ROUND_LEN (128)
+
+/** Number of successful rounds after which the padding is decreased */
+#define ZIP_PAD_SUCCESSFUL_ROUND_LIMIT (5)
+
+/** Amount by which padding is increased. */
+#define ZIP_PAD_INCR (128)
+
+/** Percentage of compression failures that are allowed in a single
+round */
+extern ulong zip_failure_threshold_pct;
+
+/** Maximum percentage of a page that can be allowed as a pad to avoid
+compression failures */
+extern ulong zip_pad_max;
+
+/** Data structure to hold information about about how much space in
+an uncompressed page should be left as padding to avoid compression
+failures. This estimate is based on a self-adapting heuristic. */
+struct zip_pad_info_t {
+ os_fast_mutex_t mutex; /*!< mutex protecting the info */
+ ulint pad; /*!< number of bytes used as pad */
+ ulint success;/*!< successful compression ops during
+ current round */
+ ulint failure;/*!< failed compression ops during
+ current round */
+ ulint n_rounds;/*!< number of currently successful
+ rounds */
+};
+
/** Data structure for an index. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
-struct dict_index_struct{
+struct dict_index_t{
index_id_t id; /*!< id of the index */
- rw_lock_t* search_latch; /*!< latch protecting the AHI partition
+ prio_rw_lock_t* search_latch; /*!< latch protecting the AHI partition
corresponding to this index */
hash_table_t* search_table; /*!< hash table protected by
search_latch */
@@ -403,30 +582,47 @@ struct dict_index_struct{
unsigned cached:1;/*!< TRUE if the index object is in the
dictionary cache */
unsigned to_be_dropped:1;
- /*!< TRUE if this index is marked to be
- dropped in ha_innobase::prepare_drop_index(),
- otherwise FALSE. Protected by
- dict_sys->mutex, dict_operation_lock and
- index->lock.*/
+ /*!< TRUE if the index is to be dropped;
+ protected by dict_operation_lock */
+ unsigned online_status:2;
+ /*!< enum online_index_status.
+ Transitions from ONLINE_INDEX_COMPLETE (to
+ ONLINE_INDEX_CREATION) are protected
+ by dict_operation_lock and
+ dict_sys->mutex. Other changes are
+ protected by index->lock. */
dict_field_t* fields; /*!< array of field descriptions */
#ifndef UNIV_HOTBACKUP
UT_LIST_NODE_T(dict_index_t)
indexes;/*!< list of indexes of the table */
- btr_search_t* search_info; /*!< info used in optimistic searches */
+ btr_search_t* search_info;
+ /*!< info used in optimistic searches */
+ row_log_t* online_log;
+ /*!< the log of modifications
+ during online index creation;
+ valid when online_status is
+ ONLINE_INDEX_CREATION */
/*----------------------*/
/** Statistics for query optimization */
/* @{ */
- ib_int64_t* stat_n_diff_key_vals;
+ ib_uint64_t* stat_n_diff_key_vals;
/*!< approximate number of different
key values for this index, for each
- n-column prefix where n <=
- dict_get_n_unique(index); we
+ n-column prefix where 1 <= n <=
+ dict_get_n_unique(index) (the array is
+ indexed from 0 to n_uniq-1); we
periodically calculate new
estimates */
- ib_int64_t* stat_n_non_null_key_vals;
+ ib_uint64_t* stat_n_sample_sizes;
+ /*!< number of pages that were sampled
+ to calculate each of stat_n_diff_key_vals[],
+ e.g. stat_n_sample_sizes[3] pages were sampled
+ to get the number stat_n_diff_key_vals[3]. */
+ ib_uint64_t* stat_n_non_null_key_vals;
/* approximate number of non-null key values
for this index, for each column where
- n < dict_get_n_unique(index); This
+ 1 <= n <= dict_get_n_unique(index) (the array
+ is indexed from 0 to n_uniq-1); This
is used when innodb_stats_method is
"nulls_ignored". */
ulint stat_index_size;
@@ -436,30 +632,52 @@ struct dict_index_struct{
/*!< approximate number of leaf pages in the
index tree */
/* @} */
- rw_lock_t lock; /*!< read-write lock protecting the
+ prio_rw_lock_t lock; /*!< read-write lock protecting the
upper levels of the index tree */
trx_id_t trx_id; /*!< id of the transaction that created this
index, or 0 if the index existed
when InnoDB was started up */
+ zip_pad_info_t zip_pad;/*!< Information about state of
+ compression failures and successes */
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_BLOB_DEBUG
- mutex_t blobs_mutex;
+ ib_mutex_t blobs_mutex;
/*!< mutex protecting blobs */
- void* blobs; /*!< map of (page_no,heap_no,field_no)
+ ib_rbt_t* blobs; /*!< map of (page_no,heap_no,field_no)
to first_blob_page_no; protected by
blobs_mutex; @see btr_blob_dbg_t */
#endif /* UNIV_BLOB_DEBUG */
#ifdef UNIV_DEBUG
ulint magic_n;/*!< magic number */
-/** Value of dict_index_struct::magic_n */
+/** Value of dict_index_t::magic_n */
# define DICT_INDEX_MAGIC_N 76789786
#endif
};
+/** The status of online index creation */
+enum online_index_status {
+ /** the index is complete and ready for access */
+ ONLINE_INDEX_COMPLETE = 0,
+ /** the index is being created, online
+ (allowing concurrent modifications) */
+ ONLINE_INDEX_CREATION,
+ /** secondary index creation was aborted and the index
+ should be dropped as soon as index->table->n_ref_count reaches 0,
+ or online table rebuild was aborted and the clustered index
+ of the original table should soon be restored to
+ ONLINE_INDEX_COMPLETE */
+ ONLINE_INDEX_ABORTED,
+ /** the online index creation was aborted, the index was
+ dropped from the data dictionary and the tablespace, and it
+ should be dropped from the data dictionary cache as soon as
+ index->table->n_ref_count reaches 0. */
+ ONLINE_INDEX_ABORTED_DROPPED
+};
+
/** Data structure for a foreign key constraint; an example:
FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be
initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
-struct dict_foreign_struct{
+struct dict_foreign_t{
mem_heap_t* heap; /*!< this object is allocated from
this memory heap */
char* id; /*!< id of the constraint as a
@@ -510,10 +728,9 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */
#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */
/* @} */
-
/** Data structure for a database table. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_table_create(). */
-struct dict_table_struct{
+struct dict_table_t{
table_id_t id; /*!< id of the table */
mem_heap_t* heap; /*!< memory heap */
char* name; /*!< table name */
@@ -523,26 +740,39 @@ struct dict_table_struct{
innodb_file_per_table is defined in my.cnf;
in Unix this is usually /tmp/..., in Windows
temp\... */
+ char* data_dir_path; /*!< NULL or the directory path
+ specified by DATA DIRECTORY */
unsigned space:32;
/*!< space where the clustered index of the
table is placed */
- unsigned flags:DICT_TF2_BITS;/*!< DICT_TF_COMPACT, ... */
+ unsigned flags:DICT_TF_BITS; /*!< DICT_TF_... */
+ unsigned flags2:DICT_TF2_BITS; /*!< DICT_TF2_... */
unsigned ibd_file_missing:1;
/*!< TRUE if this is in a single-table
tablespace and the .ibd file is missing; then
we must return in ha_innodb.cc an error if the
user tries to query such an orphaned table */
- unsigned tablespace_discarded:1;
- /*!< this flag is set TRUE when the user
- calls DISCARD TABLESPACE on this
- table, and reset to FALSE in IMPORT
- TABLESPACE */
unsigned cached:1;/*!< TRUE if the table object has been added
to the dictionary cache */
+ unsigned to_be_dropped:1;
+ /*!< TRUE if the table is to be dropped, but
+ not yet actually dropped (could in the bk
+ drop list); It is turned on at the beginning
+ of row_drop_table_for_mysql() and turned off
+ just before we start to update system tables
+ for the drop. It is protected by
+ dict_operation_lock */
unsigned n_def:10;/*!< number of columns defined so far */
unsigned n_cols:10;/*!< number of columns */
+ unsigned can_be_evicted:1;
+ /*!< TRUE if it's not an InnoDB system table
+ or a table that has no FK relationships */
unsigned corrupted:1;
/*!< TRUE if table is corrupted */
+ unsigned drop_aborted:1;
+ /*!< TRUE if some indexes should be dropped
+ after ONLINE_INDEX_ABORTED
+ or ONLINE_INDEX_ABORTED_DROPPED */
dict_col_t* cols; /*!< array of column descriptions */
const char* col_names;
/*!< Column names packed in a character string
@@ -564,12 +794,6 @@ struct dict_table_struct{
which refer to this table */
UT_LIST_NODE_T(dict_table_t)
table_LRU; /*!< node of the LRU list of tables */
- ulint n_mysql_handles_opened;
- /*!< count of how many handles MySQL has opened
- to this table; dropping of the table is
- NOT allowed until this count gets to zero;
- MySQL does NOT itself check the number of
- open handles at drop */
unsigned fk_max_recusive_level:8;
/*!< maximum recursive level we support when
loading tables chained together with FK
@@ -582,6 +806,12 @@ struct dict_table_struct{
on the table: we cannot drop the table while
there are foreign key checks running on
it! */
+ trx_id_t def_trx_id;
+ /*!< transaction id that last touched
+ the table definition, either when
+ loading the definition or CREATE
+ TABLE, or ALTER TABLE (prepare,
+ commit, and rollback phases) */
trx_id_t query_cache_inv_trx_id;
/*!< transactions whose trx id is
smaller than this number are not
@@ -590,8 +820,6 @@ struct dict_table_struct{
with undo logs commits, it sets this
to the value of the trx id counter for
the tables it had an IX lock on */
- UT_LIST_BASE_NODE_T(lock_t)
- locks; /*!< list of locks on the table */
#ifdef UNIV_DEBUG
/*----------------------*/
ibool does_not_fit_in_memory;
@@ -611,18 +839,60 @@ struct dict_table_struct{
/*!< flag: TRUE if the maximum length of
a single row exceeds BIG_ROW_SIZE;
initialized in dict_table_add_to_cache() */
- /** Statistics for query optimization.
- The following stat_* members are usually
- protected by dict_table_stats_lock(). In
- some exceptional cases (performance critical
- code paths) we access or modify stat_n_rows
- and stat_modified_counter without any
- protection. */
+ /** Statistics for query optimization */
/* @{ */
unsigned stat_initialized:1; /*!< TRUE if statistics have
been calculated the first time
after database startup or table creation */
- ib_int64_t stat_n_rows;
+ ib_time_t stats_last_recalc;
+ /*!< Timestamp of last recalc of the stats */
+ ib_uint32_t stat_persistent;
+ /*!< The two bits below are set in the
+ ::stat_persistent member and have the following
+ meaning:
+ 1. _ON=0, _OFF=0, no explicit persistent stats
+ setting for this table, the value of the global
+ srv_stats_persistent is used to determine
+ whether the table has persistent stats enabled
+ or not
+ 2. _ON=0, _OFF=1, persistent stats are
+ explicitly disabled for this table, regardless
+ of the value of the global srv_stats_persistent
+ 3. _ON=1, _OFF=0, persistent stats are
+ explicitly enabled for this table, regardless
+ of the value of the global srv_stats_persistent
+ 4. _ON=1, _OFF=1, not allowed, we assert if
+ this ever happens. */
+#define DICT_STATS_PERSISTENT_ON (1 << 1)
+#define DICT_STATS_PERSISTENT_OFF (1 << 2)
+ ib_uint32_t stats_auto_recalc;
+ /*!< The two bits below are set in the
+ ::stats_auto_recalc member and have
+ the following meaning:
+ 1. _ON=0, _OFF=0, no explicit auto recalc
+ setting for this table, the value of the global
+ srv_stats_persistent_auto_recalc is used to
+ determine whether the table has auto recalc
+ enabled or not
+ 2. _ON=0, _OFF=1, auto recalc is explicitly
+ disabled for this table, regardless of the
+ value of the global
+ srv_stats_persistent_auto_recalc
+ 3. _ON=1, _OFF=0, auto recalc is explicitly
+ enabled for this table, regardless of the
+ value of the global
+ srv_stats_persistent_auto_recalc
+ 4. _ON=1, _OFF=1, not allowed, we assert if
+ this ever happens. */
+#define DICT_STATS_AUTO_RECALC_ON (1 << 1)
+#define DICT_STATS_AUTO_RECALC_OFF (1 << 2)
+ ulint stats_sample_pages;
+ /*!< the number of pages to sample for this
+ table during persistent stats estimation;
+ if this is 0, then the value of the global
+ srv_stats_persistent_sample_pages will be
+ used instead. */
+ ib_uint64_t stat_n_rows;
/*!< approximate number of rows in the table;
we periodically calculate new estimates */
ulint stat_clustered_index_size;
@@ -630,19 +900,36 @@ struct dict_table_struct{
database pages */
ulint stat_sum_of_other_index_sizes;
/*!< other indexes in database pages */
- ulint stat_modified_counter;
+ ib_uint64_t stat_modified_counter;
/*!< when a row is inserted, updated,
or deleted,
we add 1 to this number; we calculate new
estimates for the stat_... values for the
- table and the indexes at an interval of 2 GB
- or when about 1 / 16 of table has been
- modified; also when the estimate operation is
+ table and the indexes when about 1 / 16 of
+ table has been modified;
+ also when the estimate operation is
called for MySQL SHOW TABLE STATUS; the
counter is reset to zero at statistics
calculation; this counter is not protected by
any latch, because this is only used for
heuristics */
+#define BG_STAT_NONE 0
+#define BG_STAT_IN_PROGRESS (1 << 0)
+ /*!< BG_STAT_IN_PROGRESS is set in
+ stats_bg_flag when the background
+ stats code is working on this table. The DROP
+ TABLE code waits for this to be cleared
+ before proceeding. */
+#define BG_STAT_SHOULD_QUIT (1 << 1)
+ /*!< BG_STAT_SHOULD_QUIT is set in
+ stats_bg_flag when DROP TABLE starts
+ waiting on BG_STAT_IN_PROGRESS to be cleared,
+ the background stats thread will detect this
+ and will eventually quit sooner */
+ byte stats_bg_flag;
+ /*!< see BG_STAT_* above.
+ Writes are covered by dict_sys->mutex.
+ Dirty reads are possible. */
/* @} */
/*----------------------*/
/**!< The following fields are used by the
@@ -652,8 +939,8 @@ struct dict_table_struct{
whether a transaction has locked the AUTOINC
lock we keep a pointer to the transaction
here in the autoinc_trx variable. This is to
- avoid acquiring the kernel mutex and scanning
- the vector in trx_t.
+ avoid acquiring the lock_sys_t::mutex and
+ scanning the vector in trx_t.
When an AUTOINC lock has to wait, the
corresponding lock instance is created on
@@ -668,7 +955,7 @@ struct dict_table_struct{
space from the lock heap of the trx:
otherwise the lock heap would grow rapidly
if we do a large insert from a select */
- mutex_t autoinc_mutex;
+ ib_mutex_t autoinc_mutex;
/*!< mutex protecting the autoincrement
counter */
ib_uint64_t autoinc;/*!< autoinc counter value to give to the
@@ -677,22 +964,46 @@ struct dict_table_struct{
/*!< This counter is used to track the number
of granted and pending autoinc locks on this
table. This value is set after acquiring the
- kernel mutex but we peek the contents to
+ lock_sys_t::mutex but we peek the contents to
determine whether other transactions have
acquired the AUTOINC lock or not. Of course
only one transaction can be granted the
lock but there can be multiple waiters. */
- const trx_t* autoinc_trx;
+ const trx_t* autoinc_trx;
/*!< The transaction that currently holds the
- the AUTOINC lock on this table. */
+ the AUTOINC lock on this table.
+ Protected by lock_sys->mutex. */
+ fts_t* fts; /* FTS specific state variables */
/* @} */
/*----------------------*/
+
+ ib_quiesce_t quiesce;/*!< Quiescing states, protected by the
+ dict_index_t::lock. ie. we can only change
+ the state if we acquire all the latches
+ (dict_index_t::lock) in X mode of this table's
+ indexes. */
+
+ /*----------------------*/
+ ulint n_rec_locks;
+ /*!< Count of the number of record locks on
+ this table. We use this to determine whether
+ we can evict the table from the dictionary
+ cache. It is protected by lock_sys->mutex. */
+ ulint n_ref_count;
+ /*!< count of how many handles are opened
+ to this table; dropping of the table is
+ NOT allowed until this count gets to zero;
+ MySQL does NOT itself check the number of
+ open handles at drop */
+ UT_LIST_BASE_NODE_T(lock_t)
+ locks; /*!< list of locks on the table; protected
+ by lock_sys->mutex */
ibool is_corrupt;
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
ulint magic_n;/*!< magic number */
-/** Value of dict_table_struct::magic_n */
+/** Value of dict_table_t::magic_n */
# define DICT_TABLE_MAGIC_N 76333786
#endif /* UNIV_DEBUG */
};
@@ -701,4 +1012,6 @@ struct dict_table_struct{
#include "dict0mem.ic"
#endif
+#endif /* !UNIV_INNOCHECKSUM */
+
#endif
diff --git a/storage/xtradb/include/dict0mem.ic b/storage/xtradb/include/dict0mem.ic
index 41dacb1c643..38d51f61789 100644
--- a/storage/xtradb/include/dict0mem.ic
+++ b/storage/xtradb/include/dict0mem.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -57,16 +57,18 @@ dict_mem_fill_index_struct(
index->fields = NULL;
}
- index->type = type;
+ /* Assign a ulint to a 4-bit-mapped field.
+ Only the low-order 4 bits are assigned. */
+ index->type = type;
#ifndef UNIV_HOTBACKUP
- index->space = (unsigned int) space;
- index->page = FIL_NULL;
+ index->space = (unsigned int) space;
+ index->page = FIL_NULL;
#endif /* !UNIV_HOTBACKUP */
- index->table_name = table_name;
- index->n_fields = (unsigned int) n_fields;
- /* The '1 +' above prevents allocation
- of an empty mem block */
+ index->table_name = table_name;
+ index->n_fields = (unsigned int) n_fields;
+ /* The '1 +' above prevents allocation
+ of an empty mem block */
#ifdef UNIV_DEBUG
- index->magic_n = DICT_INDEX_MAGIC_N;
+ index->magic_n = DICT_INDEX_MAGIC_N;
#endif /* UNIV_DEBUG */
}
diff --git a/storage/xtradb/include/dict0priv.h b/storage/xtradb/include/dict0priv.h
new file mode 100644
index 00000000000..9a3c8e22992
--- /dev/null
+++ b/storage/xtradb/include/dict0priv.h
@@ -0,0 +1,63 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0priv.h
+Data dictionary private functions
+
+Created Fri 2 Jul 2010 13:30:38 EST - Sunny Bains
+*******************************************************/
+
+#ifndef dict0priv_h
+#define dict0priv_h
+
+/**********************************************************************//**
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. Note: Not to be called from outside dict0*c functions.
+@return table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+ const char* table_name); /*!< in: table name */
+
+/**********************************************************************//**
+Checks if a table is in the dictionary cache.
+@return table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_check_if_in_cache_low(
+/*=============================*/
+ const char* table_name); /*!< in: table name */
+
+/**********************************************************************//**
+Returns a table object based on table id.
+@return table, NULL if does not exist */
+UNIV_INLINE
+dict_table_t*
+dict_table_open_on_id_low(
+/*=====================*/
+ table_id_t table_id, /*!< in: table id */
+ dict_err_ignore_t ignore_err); /*!< in: errors to ignore
+ when loading the table */
+
+#ifndef UNIV_NONINL
+#include "dict0priv.ic"
+#endif
+
+#endif /* dict0priv.h */
diff --git a/storage/xtradb/include/dict0priv.ic b/storage/xtradb/include/dict0priv.ic
new file mode 100644
index 00000000000..30ba8fb60aa
--- /dev/null
+++ b/storage/xtradb/include/dict0priv.ic
@@ -0,0 +1,125 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0priv.ic
+Data dictionary system private include file
+
+Created Wed 13 Oct 2010 16:10:14 EST Sunny Bains
+***********************************************************************/
+
+#include "dict0dict.h"
+#include "dict0load.h"
+#include "dict0priv.h"
+#ifndef UNIV_HOTBACKUP
+
+/**********************************************************************//**
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function.
+@return table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+ const char* table_name) /*!< in: table name */
+{
+ dict_table_t* table;
+
+ ut_ad(table_name);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ table = dict_table_check_if_in_cache_low(table_name);
+
+ if (table && table->corrupted) {
+ fprintf(stderr, "InnoDB: table");
+ ut_print_name(stderr, NULL, TRUE, table->name);
+ if (srv_load_corrupted) {
+ fputs(" is corrupted, but"
+ " innodb_force_load_corrupted is set\n", stderr);
+ } else {
+ fputs(" is corrupted\n", stderr);
+ return(NULL);
+ }
+ }
+
+ if (table == NULL) {
+ table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
+ }
+
+ ut_ad(!table || table->cached);
+
+ return(table);
+}
+
+/**********************************************************************//**
+Returns a table object based on table id.
+@return table, NULL if does not exist */
+UNIV_INLINE
+dict_table_t*
+dict_table_open_on_id_low(
+/*======================*/
+ table_id_t table_id, /*!< in: table id */
+ dict_err_ignore_t ignore_err) /*!< in: errors to ignore
+ when loading the table */
+{
+ dict_table_t* table;
+ ulint fold;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ /* Look for the table name in the hash table */
+ fold = ut_fold_ull(table_id);
+
+ HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
+ dict_table_t*, table, ut_ad(table->cached),
+ table->id == table_id);
+ if (table == NULL) {
+ table = dict_load_table_on_id(table_id, ignore_err);
+ }
+
+ ut_ad(!table || table->cached);
+
+ /* TODO: should get the type information from MySQL */
+
+ return(table);
+}
+
+/**********************************************************************//**
+Checks if a table is in the dictionary cache.
+@return table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_check_if_in_cache_low(
+/*=============================*/
+ const char* table_name) /*!< in: table name */
+{
+ dict_table_t* table;
+ ulint table_fold;
+
+ ut_ad(table_name);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ /* Look for the table name in the hash table */
+ table_fold = ut_fold_string(table_name);
+
+ HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
+ dict_table_t*, table, ut_ad(table->cached),
+ !strcmp(table->name, table_name));
+ return(table);
+}
+#endif /*! UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/dict0stats.h b/storage/xtradb/include/dict0stats.h
new file mode 100644
index 00000000000..186f90e3694
--- /dev/null
+++ b/storage/xtradb/include/dict0stats.h
@@ -0,0 +1,202 @@
+/*****************************************************************************
+
+Copyright (c) 2009, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats.h
+Code used for calculating and manipulating table statistics.
+
+Created Jan 06, 2010 Vasil Dimov
+*******************************************************/
+
+#ifndef dict0stats_h
+#define dict0stats_h
+
+#include "univ.i"
+
+#include "db0err.h"
+#include "dict0types.h"
+#include "trx0types.h"
+
+enum dict_stats_upd_option_t {
+ DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the
+ statistics using a precise and slow
+ algo and save them to the persistent
+ storage, if the persistent storage is
+ not present then emit a warning and
+ fall back to transient stats */
+ DICT_STATS_RECALC_TRANSIENT,/* (re) calculate the statistics
+ using an imprecise quick algo
+ without saving the results
+ persistently */
+ DICT_STATS_EMPTY_TABLE, /* Write all zeros (or 1 where it makes sense)
+ into a table and its indexes' statistics
+ members. The resulting stats correspond to an
+ empty table. If the table is using persistent
+ statistics, then they are saved on disk. */
+ DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* fetch the stats
+ from the persistent storage if the in-memory
+ structures have not been initialized yet,
+ otherwise do nothing */
+};
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced. */
+UNIV_INTERN
+void
+dict_stats_update_transient(
+/*========================*/
+ dict_table_t* table); /*!< in/out: table */
+
+/*********************************************************************//**
+Set the persistent statistics flag for a given table. This is set only
+in the in-memory table object and is not saved on disk. It will be read
+from the .frm file upon first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_set_persistent(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool ps_on, /*!< in: persistent stats explicitly enabled */
+ ibool ps_off) /*!< in: persistent stats explicitly disabled */
+ __attribute__((nonnull));
+
+/*********************************************************************//**
+Check whether persistent statistics is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_is_persistent_enabled(
+/*=============================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Set the auto recalc flag for a given table (only honored for a persistent
+stats enabled table). The flag is set only in the in-memory table object
+and is not saved in InnoDB files. It will be read from the .frm file upon
+first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_auto_recalc_set(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool auto_recalc_on, /*!< in: explicitly enabled */
+ ibool auto_recalc_off); /*!< in: explicitly disabled */
+
+/*********************************************************************//**
+Check whether auto recalc is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_auto_recalc_is_enabled(
+/*==============================*/
+ const dict_table_t* table); /*!< in: table */
+
+/*********************************************************************//**
+Initialize table's stats for the first time when opening a table. */
+UNIV_INLINE
+void
+dict_stats_init(
+/*============*/
+ dict_table_t* table); /*!< in/out: table */
+
+/*********************************************************************//**
+Deinitialize table's stats after the last close of the table. This is
+used to detect "FLUSH TABLE" and refresh the stats upon next open. */
+UNIV_INLINE
+void
+dict_stats_deinit(
+/*==============*/
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization.
+@return DB_* error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_stats_update(
+/*==============*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_stats_upd_option_t stats_upd_option);
+ /*!< in: whether to (re) calc
+ the stats or to fetch them from
+ the persistent storage */
+
+/*********************************************************************//**
+Removes the information for a particular index's stats from the persistent
+storage if it exists and if there is data stored for this index.
+This function creates its own trx and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_drop_index(
+/*==================*/
+ const char* tname, /*!< in: table name */
+ const char* iname, /*!< in: index name */
+ char* errstr, /*!< out: error message if != DB_SUCCESS
+ is returned */
+ ulint errstr_sz);/*!< in: size of the errstr buffer */
+
+/*********************************************************************//**
+Removes the statistics for a table and all of its indexes from the
+persistent storage if it exists and if there is data stored for the table.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_drop_table(
+/*==================*/
+ const char* table_name, /*!< in: table name */
+ char* errstr, /*!< out: error message
+ if != DB_SUCCESS is returned */
+ ulint errstr_sz); /*!< in: size of errstr buffer */
+
+/*********************************************************************//**
+Fetches or calculates new estimates for index statistics. */
+UNIV_INTERN
+void
+dict_stats_update_for_index(
+/*========================*/
+ dict_index_t* index) /*!< in/out: index */
+ __attribute__((nonnull));
+
+/*********************************************************************//**
+Renames a table in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_rename_table(
+/*====================*/
+ const char* old_name, /*!< in: old table name */
+ const char* new_name, /*!< in: new table name */
+ char* errstr, /*!< out: error string if != DB_SUCCESS
+ is returned */
+ size_t errstr_sz); /*!< in: errstr size */
+
+#ifndef UNIV_NONINL
+#include "dict0stats.ic"
+#endif
+
+#endif /* dict0stats_h */
diff --git a/storage/xtradb/include/dict0stats.ic b/storage/xtradb/include/dict0stats.ic
new file mode 100644
index 00000000000..8fb31678af9
--- /dev/null
+++ b/storage/xtradb/include/dict0stats.ic
@@ -0,0 +1,236 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats.ic
+Code used for calculating and manipulating table statistics.
+
+Created Jan 23, 2012 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+#include "dict0dict.h" /* dict_table_stats_lock() */
+#include "dict0types.h" /* dict_table_t */
+#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */
+
+/*********************************************************************//**
+Set the persistent statistics flag for a given table. This is set only
+in the in-memory table object and is not saved on disk. It will be read
+from the .frm file upon first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_set_persistent(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool ps_on, /*!< in: persistent stats explicitly enabled */
+ ibool ps_off) /*!< in: persistent stats explicitly disabled */
+{
+ /* Not allowed to have both flags set, but a CREATE or ALTER
+ statement that contains "STATS_PERSISTENT=0 STATS_PERSISTENT=1" would
+ end up having both set. In this case we clear the OFF flag. */
+ if (ps_on && ps_off) {
+ ps_off = FALSE;
+ }
+
+ ib_uint32_t stat_persistent = 0;
+
+ if (ps_on) {
+ stat_persistent |= DICT_STATS_PERSISTENT_ON;
+ }
+
+ if (ps_off) {
+ stat_persistent |= DICT_STATS_PERSISTENT_OFF;
+ }
+
+ /* we rely on this assignment to be atomic */
+ table->stat_persistent = stat_persistent;
+}
+
+/*********************************************************************//**
+Check whether persistent statistics is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_is_persistent_enabled(
+/*=============================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ /* Because of the nature of this check (non-locking) it is possible
+ that a table becomes:
+ * PS-disabled immediately after this function has returned TRUE or
+ * PS-enabled immediately after this function has returned FALSE.
+ This means that it is possible that we do:
+ + dict_stats_update(DICT_STATS_RECALC_PERSISTENT) on a table that has
+ just been PS-disabled or
+ + dict_stats_update(DICT_STATS_RECALC_TRANSIENT) on a table that has
+ just been PS-enabled.
+ This is acceptable. Avoiding this would mean that we would have to
+ protect the ::stat_persistent with dict_table_stats_lock() like the
+ other ::stat_ members which would be too big performance penalty,
+ especially when this function is called from
+ row_update_statistics_if_needed(). */
+
+ /* we rely on this read to be atomic */
+ ib_uint32_t stat_persistent = table->stat_persistent;
+
+ if (stat_persistent & DICT_STATS_PERSISTENT_ON) {
+ ut_ad(!(stat_persistent & DICT_STATS_PERSISTENT_OFF));
+ return(TRUE);
+ } else if (stat_persistent & DICT_STATS_PERSISTENT_OFF) {
+ return(FALSE);
+ } else {
+ return(srv_stats_persistent);
+ }
+}
+
+/*********************************************************************//**
+Set the auto recalc flag for a given table (only honored for a persistent
+stats enabled table). The flag is set only in the in-memory table object
+and is not saved in InnoDB files. It will be read from the .frm file upon
+first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_auto_recalc_set(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool auto_recalc_on, /*!< in: explicitly enabled */
+ ibool auto_recalc_off) /*!< in: explicitly disabled */
+{
+ ut_ad(!auto_recalc_on || !auto_recalc_off);
+
+ ib_uint32_t stats_auto_recalc = 0;
+
+ if (auto_recalc_on) {
+ stats_auto_recalc |= DICT_STATS_AUTO_RECALC_ON;
+ }
+
+ if (auto_recalc_off) {
+ stats_auto_recalc |= DICT_STATS_AUTO_RECALC_OFF;
+ }
+
+ /* we rely on this assignment to be atomic */
+ table->stats_auto_recalc = stats_auto_recalc;
+}
+
+/*********************************************************************//**
+Check whether auto recalc is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_auto_recalc_is_enabled(
+/*==============================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ /* we rely on this read to be atomic */
+ ib_uint32_t stats_auto_recalc = table->stats_auto_recalc;
+
+ if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_ON) {
+ ut_ad(!(stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF));
+ return(TRUE);
+ } else if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF) {
+ return(FALSE);
+ } else {
+ return(srv_stats_auto_recalc);
+ }
+}
+
+/*********************************************************************//**
+Initialize table's stats for the first time when opening a table. */
+UNIV_INLINE
+void
+dict_stats_init(
+/*============*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ ut_ad(!mutex_own(&dict_sys->mutex));
+
+ if (table->stat_initialized) {
+ return;
+ }
+
+ dict_stats_upd_option_t opt;
+
+ if (dict_stats_is_persistent_enabled(table)) {
+ opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
+ } else {
+ opt = DICT_STATS_RECALC_TRANSIENT;
+ }
+
+ dict_stats_update(table, opt);
+}
+
+/*********************************************************************//**
+Deinitialize table's stats after the last close of the table. This is
+used to detect "FLUSH TABLE" and refresh the stats upon next open. */
+UNIV_INLINE
+void
+dict_stats_deinit(
+/*==============*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ ut_a(table->n_ref_count == 0);
+
+ dict_table_stats_lock(table, RW_X_LATCH);
+
+ if (!table->stat_initialized) {
+ dict_table_stats_unlock(table, RW_X_LATCH);
+ return;
+ }
+
+ table->stat_initialized = FALSE;
+
+#ifdef UNIV_DEBUG_VALGRIND
+ UNIV_MEM_INVALID(&table->stat_n_rows,
+ sizeof(table->stat_n_rows));
+ UNIV_MEM_INVALID(&table->stat_clustered_index_size,
+ sizeof(table->stat_clustered_index_size));
+ UNIV_MEM_INVALID(&table->stat_sum_of_other_index_sizes,
+ sizeof(table->stat_sum_of_other_index_sizes));
+ UNIV_MEM_INVALID(&table->stat_modified_counter,
+ sizeof(table->stat_modified_counter));
+
+ dict_index_t* index;
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ ulint n_uniq = dict_index_get_n_unique(index);
+
+ UNIV_MEM_INVALID(
+ index->stat_n_diff_key_vals,
+ n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
+ UNIV_MEM_INVALID(
+ index->stat_n_sample_sizes,
+ n_uniq * sizeof(index->stat_n_sample_sizes[0]));
+ UNIV_MEM_INVALID(
+ index->stat_n_non_null_key_vals,
+ n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
+ UNIV_MEM_INVALID(
+ &index->stat_index_size,
+ sizeof(index->stat_index_size));
+ UNIV_MEM_INVALID(
+ &index->stat_n_leaf_pages,
+ sizeof(index->stat_n_leaf_pages));
+ }
+#endif /* UNIV_DEBUG_VALGRIND */
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
+}
diff --git a/storage/xtradb/include/dict0stats_bg.h b/storage/xtradb/include/dict0stats_bg.h
new file mode 100644
index 00000000000..e866ab419fe
--- /dev/null
+++ b/storage/xtradb/include/dict0stats_bg.h
@@ -0,0 +1,127 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats_bg.h
+Code used for background table and index stats gathering.
+
+Created Apr 26, 2012 Vasil Dimov
+*******************************************************/
+
+#ifndef dict0stats_bg_h
+#define dict0stats_bg_h
+
+#include "univ.i"
+
+#include "dict0types.h" /* dict_table_t, table_id_t */
+#include "os0sync.h" /* os_event_t */
+#include "os0thread.h" /* DECLARE_THREAD */
+
+/** Event to wake up the stats thread */
+extern os_event_t dict_stats_event;
+
+/*****************************************************************//**
+Add a table to the recalc pool, which is processed by the
+background stats gathering thread. Only the table id is added to the
+list, so the table can be closed after being enqueued and it will be
+opened when needed. If the table does not exist later (has been DROPped),
+then it will be removed from the pool and skipped. */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_add(
+/*=======================*/
+ const dict_table_t* table); /*!< in: table to add */
+
+/*****************************************************************//**
+Delete a given table from the auto recalc pool.
+dict_stats_recalc_pool_del() */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_del(
+/*=======================*/
+ const dict_table_t* table); /*!< in: table to remove */
+
+/** Yield the data dictionary latch when waiting
+for the background thread to stop accessing a table.
+@param trx transaction holding the data dictionary locks */
+#define DICT_STATS_BG_YIELD(trx) do { \
+ row_mysql_unlock_data_dictionary(trx); \
+ os_thread_sleep(250000); \
+ row_mysql_lock_data_dictionary(trx); \
+} while (0)
+
+/*****************************************************************//**
+Request the background collection of statistics to stop for a table.
+@retval true when no background process is active
+@retval false when it is not safe to modify the table definition */
+UNIV_INLINE
+bool
+dict_stats_stop_bg(
+/*===============*/
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((warn_unused_result));
+
+/*****************************************************************//**
+Wait until background stats thread has stopped using the specified table.
+The caller must have locked the data dictionary using
+row_mysql_lock_data_dictionary() and this function may unlock it temporarily
+and restore the lock before it exits.
+The background stats thread is guaranteed not to start using the specified
+table after this function returns and before the caller unlocks the data
+dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
+under dict_sys->mutex. */
+UNIV_INTERN
+void
+dict_stats_wait_bg_to_stop_using_table(
+/*===================================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx); /*!< in/out: transaction to use for
+ unlocking/locking the data dict */
+/*****************************************************************//**
+Initialize global variables needed for the operation of dict_stats_thread().
+Must be called before dict_stats_thread() is started. */
+UNIV_INTERN
+void
+dict_stats_thread_init();
+/*====================*/
+
+/*****************************************************************//**
+Free resources allocated by dict_stats_thread_init(), must be called
+after dict_stats_thread() has exited. */
+UNIV_INTERN
+void
+dict_stats_thread_deinit();
+/*======================*/
+
+/*****************************************************************//**
+This is the thread for background stats gathering. It pops tables, from
+the auto recalc list and proceeds them, eventually recalculating their
+statistics.
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(dict_stats_thread)(
+/*==============================*/
+ void* arg); /*!< in: a dummy parameter
+ required by os_thread_create */
+
+# ifndef UNIV_NONINL
+# include "dict0stats_bg.ic"
+# endif
+
+#endif /* dict0stats_bg_h */
diff --git a/storage/xtradb/include/dict0stats_bg.ic b/storage/xtradb/include/dict0stats_bg.ic
new file mode 100644
index 00000000000..87e3225de58
--- /dev/null
+++ b/storage/xtradb/include/dict0stats_bg.ic
@@ -0,0 +1,45 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats_bg.ic
+Code used for background table and index stats gathering.
+
+Created Feb 8, 2013 Marko Makela
+*******************************************************/
+
+/*****************************************************************//**
+Request the background collection of statistics to stop for a table.
+@retval true when no background process is active
+@retval false when it is not safe to modify the table definition */
+UNIV_INLINE
+bool
+dict_stats_stop_bg(
+/*===============*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ if (!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)) {
+ return(true);
+ }
+
+ table->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
+ return(false);
+}
diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h
index 330e6a25114..6acb6a2dcbe 100644
--- a/storage/xtradb/include/dict0types.h
+++ b/storage/xtradb/include/dict0types.h
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,20 +26,24 @@ Created 1/8/1996 Heikki Tuuri
#ifndef dict0types_h
#define dict0types_h
-typedef struct dict_sys_struct dict_sys_t;
-typedef struct dict_col_struct dict_col_t;
-typedef struct dict_field_struct dict_field_t;
-typedef struct dict_index_struct dict_index_t;
-typedef struct dict_table_struct dict_table_t;
-typedef struct dict_foreign_struct dict_foreign_t;
+struct dict_sys_t;
+struct dict_col_t;
+struct dict_field_t;
+struct dict_index_t;
+struct dict_table_t;
+struct dict_foreign_t;
-typedef struct ind_node_struct ind_node_t;
-typedef struct tab_node_struct tab_node_t;
+struct ind_node_t;
+struct tab_node_t;
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
+/* The ibuf table and indexes's ID are assigned as the number
+DICT_IBUF_ID_MIN plus the space id */
+#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL
+
typedef ib_id_t table_id_t;
typedef ib_id_t index_id_t;
@@ -48,17 +52,32 @@ the table and index will be marked as "corrupted", and caller will
be responsible to deal with corrupted table or index.
Note: please define the IGNORE_ERR_* as bits, so their value can
be or-ed together */
-enum dict_err_ignore {
- DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */
- DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
+enum dict_err_ignore_t {
+ DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */
+ DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
page is FIL_NULL or incorrect value */
DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */
DICT_ERR_IGNORE_FK_NOKEY = 4, /*!< ignore error if any foreign
key is missing */
- DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
+ DICT_ERR_IGNORE_RECOVER_LOCK = 8,
+ /*!< Used when recovering table locks
+ for resurrected transactions.
+ Silently load a missing
+ tablespace, and do not load
+ incomplete index definitions. */
+ DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
+};
+
+/** Quiescing states for flushing tables to disk. */
+enum ib_quiesce_t {
+ QUIESCE_NONE,
+ QUIESCE_START, /*!< Initialise, prepare to start */
+ QUIESCE_COMPLETE /*!< All done */
};
-typedef enum dict_err_ignore dict_err_ignore_t;
+/** Prefix for tmp tables, adopted from sql/table.h */
+#define tmp_file_prefix "#sql"
+#define tmp_file_prefix_length 4
#define TEMP_TABLE_PREFIX "#sql"
#define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX
diff --git a/storage/xtradb/include/dyn0dyn.h b/storage/xtradb/include/dyn0dyn.h
index 62ed862e82c..7f23302d1ff 100644
--- a/storage/xtradb/include/dyn0dyn.h
+++ b/storage/xtradb/include/dyn0dyn.h
@@ -31,10 +31,9 @@ Created 2/5/1996 Heikki Tuuri
#include "mem0mem.h"
/** A block in a dynamically allocated array */
-typedef struct dyn_block_struct dyn_block_t;
+struct dyn_block_t;
/** Dynamically allocated array */
-typedef dyn_block_t dyn_array_t;
-
+typedef dyn_block_t dyn_array_t;
/** This is the initial 'payload' size of a dynamic array;
this must be > MLOG_BUF_MARGIN + 30! */
@@ -171,7 +170,7 @@ dyn_push_string(
/** @brief A block in a dynamically allocated array.
NOTE! Do not access the fields of the struct directly: the definition
appears here only for the compiler to know its size! */
-struct dyn_block_struct{
+struct dyn_block_t{
mem_heap_t* heap; /*!< in the first block this is != NULL
if dynamic allocation has been needed */
ulint used; /*!< number of data bytes used in this block;
diff --git a/storage/xtradb/include/dyn0dyn.ic b/storage/xtradb/include/dyn0dyn.ic
index 177877ed1fd..0296554e2ee 100644
--- a/storage/xtradb/include/dyn0dyn.ic
+++ b/storage/xtradb/include/dyn0dyn.ic
@@ -23,9 +23,9 @@ The dynamically allocated array
Created 2/5/1996 Heikki Tuuri
*******************************************************/
-/** Value of dyn_block_struct::magic_n */
+/** Value of dyn_block_t::magic_n */
#define DYN_BLOCK_MAGIC_N 375767
-/** Flag for dyn_block_struct::used that indicates a full block */
+/** Flag for dyn_block_t::used that indicates a full block */
#define DYN_BLOCK_FULL_FLAG 0x1000000UL
/************************************************************//**
@@ -63,7 +63,7 @@ dyn_block_get_data(
{
ut_ad(block);
- return((byte*) block->data);
+ return(const_cast<byte*>(block->data));
}
/*********************************************************************//**
@@ -245,7 +245,7 @@ dyn_array_get_element(
ut_ad(block);
ut_ad(dyn_block_get_used(block) >= pos);
- return((byte*) block->data + pos);
+ return(const_cast<byte*>(block->data) + pos);
}
/************************************************************//**
diff --git a/storage/xtradb/include/eval0eval.h b/storage/xtradb/include/eval0eval.h
index c12df320b88..e3b1e6c16b6 100644
--- a/storage/xtradb/include/eval0eval.h
+++ b/storage/xtradb/include/eval0eval.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/eval0eval.ic b/storage/xtradb/include/eval0eval.ic
index d0ca4c9bea5..e4b1dd08017 100644
--- a/storage/xtradb/include/eval0eval.ic
+++ b/storage/xtradb/include/eval0eval.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -70,7 +70,7 @@ eval_node_ensure_val_buf(
dfield = que_node_get_val(node);
dfield_set_len(dfield, size);
- data = dfield_get_data(dfield);
+ data = static_cast<byte*>(dfield_get_data(dfield));
if (!data || que_node_get_val_buf_size(node) < size) {
@@ -110,12 +110,12 @@ eval_exp(
{
if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
- eval_sym((sym_node_t*)exp_node);
+ eval_sym((sym_node_t*) exp_node);
return;
}
- eval_func(exp_node);
+ eval_func(static_cast<func_node_t*>(exp_node));
}
/*****************************************************************//**
@@ -132,7 +132,7 @@ eval_node_set_int_val(
dfield = que_node_get_val(node);
- data = dfield_get_data(dfield);
+ data = static_cast<byte*>(dfield_get_data(dfield));
if (data == NULL) {
data = eval_node_alloc_val_buf(node, 4);
@@ -140,7 +140,7 @@ eval_node_set_int_val(
ut_ad(dfield_get_len(dfield) == 4);
- mach_write_to_4(data, (ulint)val);
+ mach_write_to_4(data, (ulint) val);
}
/*****************************************************************//**
@@ -152,13 +152,15 @@ eval_node_get_int_val(
/*==================*/
que_node_t* node) /*!< in: expression node */
{
+ const byte* ptr;
dfield_t* dfield;
dfield = que_node_get_val(node);
+ ptr = static_cast<byte*>(dfield_get_data(dfield));
ut_ad(dfield_get_len(dfield) == 4);
- return((int)mach_read_from_4(dfield_get_data(dfield)));
+ return((int) mach_read_from_4(ptr));
}
/*****************************************************************//**
@@ -175,7 +177,7 @@ eval_node_get_ibool_val(
dfield = que_node_get_val(node);
- data = dfield_get_data(dfield);
+ data = static_cast<byte*>(dfield_get_data(dfield));
ut_ad(data != NULL);
@@ -196,7 +198,7 @@ eval_node_set_ibool_val(
dfield = que_node_get_val(func_node);
- data = dfield_get_data(dfield);
+ data = static_cast<byte*>(dfield_get_data(dfield));
if (data == NULL) {
/* Allocate 1 byte to hold the value */
@@ -246,6 +248,8 @@ eval_node_copy_val(
dfield2 = que_node_get_val(node2);
- eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
- dfield_get_len(dfield2));
+ eval_node_copy_and_alloc_val(
+ node1,
+ static_cast<byte*>(dfield_get_data(dfield2)),
+ dfield_get_len(dfield2));
}
diff --git a/storage/xtradb/include/eval0proc.h b/storage/xtradb/include/eval0proc.h
index 450fd5a27c3..7755fb10343 100644
--- a/storage/xtradb/include/eval0proc.h
+++ b/storage/xtradb/include/eval0proc.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/eval0proc.ic b/storage/xtradb/include/eval0proc.ic
index 6949af1557b..81418bae2c9 100644
--- a/storage/xtradb/include/eval0proc.ic
+++ b/storage/xtradb/include/eval0proc.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -40,7 +40,7 @@ proc_step(
ut_ad(thr);
- node = thr->run_node;
+ node = static_cast<proc_node_t*>(thr->run_node);
ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
if (thr->prev_node == que_node_get_parent(node)) {
@@ -75,7 +75,7 @@ proc_eval_step(
ut_ad(thr);
- node = thr->run_node;
+ node = static_cast<func_node_t*>(thr->run_node);
ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
/* Evaluate the procedure */
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index a7d8d87035b..472c57fcbfc 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,15 +27,27 @@ Created 10/25/1995 Heikki Tuuri
#define fil0fil_h
#include "univ.i"
+
+#ifndef UNIV_INNOCHECKSUM
+
#include "dict0types.h"
#include "ut0byte.h"
#include "os0file.h"
#ifndef UNIV_HOTBACKUP
#include "sync0rw.h"
#include "ibuf0types.h"
+#include "log0log.h"
#endif /* !UNIV_HOTBACKUP */
#include "trx0types.h"
+#include <list>
+
+// Forward declaration
+struct trx_t;
+struct fil_space_t;
+
+typedef std::list<const char*> space_name_list_t;
+
/** When mysqld is run, the default directory "." is the mysqld datadir,
but in the MySQL Embedded Server Library and ibbackup it is not the default
directory, and we must set the base file path explicitly */
@@ -58,12 +70,8 @@ typedef byte fil_faddr_t; /*!< 'type' definition in C: an address
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
-/** A struct for storing a space address FIL_ADDR, when it is used
-in C program data structures. */
-
-typedef struct fil_addr_struct fil_addr_t;
/** File space address */
-struct fil_addr_struct{
+struct fil_addr_t{
ulint page; /*!< page number within a space */
ulint boffset; /*!< byte offset within the page */
};
@@ -71,6 +79,8 @@ struct fil_addr_struct{
/** The null file address */
extern fil_addr_t fil_addr_null;
+#endif /* !UNIV_INNOCHECKSUM */
+
/** The byte offsets on a file page for various variables @{ */
#define FIL_PAGE_SPACE_OR_CHKSUM 0 /*!< in < MySQL-4.0.14 space id the
page belongs to (== 0) but in later
@@ -119,7 +129,6 @@ extern fil_addr_t fil_addr_null;
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
contains the space id of the page */
#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
-#define FIL_PAGE_DATA_ALIGN_32 40
/* @} */
/** File page trailer @{ */
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
@@ -148,6 +157,8 @@ extern fil_addr_t fil_addr_null;
/*!< Last page type */
/* @} */
+#ifndef UNIV_INNOCHECKSUM
+
/** Space types @{ */
#define FIL_TABLESPACE 501 /*!< tablespace */
#define FIL_LOG 502 /*!< redo log */
@@ -161,6 +172,8 @@ extern ulint fil_n_pending_log_flushes;
/** Number of pending tablespace flushes */
extern ulint fil_n_pending_tablespace_flushes;
+/** Number of files currently open */
+extern ulint fil_n_file_opened;
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
@@ -176,7 +189,7 @@ fil_space_get_version(
Returns the latch of a file space.
@return latch protecting storage allocation */
UNIV_INTERN
-rw_lock_t*
+prio_rw_lock_t*
fil_space_get_latch(
/*================*/
ulint id, /*!< in: space id */
@@ -192,17 +205,19 @@ fil_space_get_type(
ulint id); /*!< in: space id */
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed. */
+Appends a new file to the chain of files of a space. File must be closed.
+@return pointer to the file name, or NULL on error */
UNIV_INTERN
-void
+char*
fil_node_create(
/*============*/
const char* name, /*!< in: file name (file must be closed) */
ulint size, /*!< in: file size in database blocks, rounded
downwards to an integer */
ulint id, /*!< in: space id where to append */
- ibool is_raw);/*!< in: TRUE if a raw device or
+ ibool is_raw) /*!< in: TRUE if a raw device or
a raw disk partition */
+ __attribute__((nonnull, warn_unused_result));
#ifdef UNIV_LOG_ARCHIVE
/****************************************************************//**
Drops files from the start of a file space, so that its size is cut by
@@ -215,10 +230,18 @@ fil_space_truncate_start(
ulint trunc_len); /*!< in: truncate by this much; it is an error
if this does not equal to the combined size of
some initial files in the space */
+/****************************************************************//**
+Check is there node in file space with given name. */
+UNIV_INTERN
+ibool
+fil_space_contains_node(
+/*====================*/
+ ulint id, /*!< in: space id */
+ char* node_name); /*!< in: node name */
#endif /* UNIV_LOG_ARCHIVE */
/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table. If
-there is an error, prints an error message to the .err log.
+Creates a space memory object and puts it to the 'fil system' hash table.
+If there is an error, prints an error message to the .err log.
@return TRUE if success */
UNIV_INTERN
ibool
@@ -240,6 +263,16 @@ fil_assign_new_space_id(
/*====================*/
ulint* space_id); /*!< in/out: space id */
/*******************************************************************//**
+Returns the path from the first fil_node_t found for the space ID sent.
+The caller is responsible for freeing the memory allocated here for the
+value returned.
+@return a copy of fil_node_t::path, NULL if space is zero or not found. */
+UNIV_INTERN
+char*
+fil_space_get_first_path(
+/*=====================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
@return space size, 0 if space not found */
@@ -308,6 +341,14 @@ void
fil_close_all_files(void);
/*=====================*/
/*******************************************************************//**
+Closes the redo log files. There must not be any pending i/o's or not
+flushed modifications in the files. */
+UNIV_INTERN
+void
+fil_close_log_files(
+/*================*/
+ bool free); /*!< in: whether to free the memory object */
+/*******************************************************************//**
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
UNIV_INTERN
@@ -321,12 +362,11 @@ Writes the flushed lsn and the latest archived log number to the page
header of the first page of each data file in the system tablespace.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
fil_write_flushed_lsn_to_data_files(
/*================================*/
- ib_uint64_t lsn, /*!< in: lsn to write */
- ulint arch_log_no); /*!< in: latest archived log
- file number */
+ lsn_t lsn, /*!< in: lsn to write */
+ ulint arch_log_no); /*!< in: latest archived log file number */
/*******************************************************************//**
Reads the flushed lsn, arch no, and tablespace flag fields from a data
file at database startup.
@@ -341,15 +381,10 @@ fil_read_first_page(
parameters below already
contain sensible data */
ulint* flags, /*!< out: tablespace flags */
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no, /*!< out: min of archived
- log numbers in data files */
- ulint* max_arch_log_no, /*!< out: max of archived
- log numbers in data files */
-#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t* min_flushed_lsn, /*!< out: min of flushed
+ ulint* space_id, /*!< out: tablespace ID */
+ lsn_t* min_flushed_lsn, /*!< out: min of flushed
lsn values in data files */
- ib_uint64_t* max_flushed_lsn) /*!< out: max of flushed
+ lsn_t* max_flushed_lsn) /*!< out: max of flushed
lsn values in data files */
__attribute__((warn_unused_result));
/*******************************************************************//**
@@ -401,27 +436,44 @@ Deletes a single-table tablespace. The tablespace must be cached in the
memory cache.
@return TRUE if success */
UNIV_INTERN
-ibool
+dberr_t
fil_delete_tablespace(
/*==================*/
- ulint id, /*!< in: space id */
- ibool evict_all); /*!< in: TRUE if we want all pages
- evicted from LRU. */
+ ulint id, /*!< in: space id */
+ buf_remove_t buf_remove); /*!< in: specify the action to take
+ on the tables pages in the buffer
+ pool */
+/*******************************************************************//**
+Closes a single-table tablespace. The tablespace must be cached in the
+memory cache. Free all pages used by the tablespace.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_close_tablespace(
+/*=================*/
+ trx_t* trx, /*!< in/out: Transaction covering the close */
+ ulint id); /*!< in: space id */
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had.
-@return TRUE if success */
+
+ 1. We do not drop the table from the data dictionary;
+
+ 2. We remove all insert buffer entries for the tablespace immediately;
+ in DROP TABLE they are only removed gradually in the background;
+
+ 3. When the user does IMPORT TABLESPACE, the tablespace will have the
+ same id as it originally had.
+
+ 4. Free all the pages in use by the tablespace if rename=TRUE.
+@return DB_SUCCESS or error */
UNIV_INTERN
-ibool
+dberr_t
fil_discard_tablespace(
/*===================*/
- ulint id); /*!< in: space id */
+ ulint id) /*!< in: space id */
+ __attribute__((warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Renames a single-table tablespace. The tablespace must be cached in the
@@ -431,16 +483,70 @@ UNIV_INTERN
ibool
fil_rename_tablespace(
/*==================*/
- const char* old_name, /*!< in: old table name in the standard
- databasename/tablename format of
- InnoDB, or NULL if we do the rename
- based on the space id only */
+ const char* old_name_in, /*!< in: old table name in the
+ standard databasename/tablename
+ format of InnoDB, or NULL if we
+ do the rename based on the space
+ id only */
ulint id, /*!< in: space id */
- const char* new_name); /*!< in: new table name in the standard
- databasename/tablename format
- of InnoDB */
+ const char* new_name, /*!< in: new table name in the
+ standard databasename/tablename
+ format of InnoDB */
+ const char* new_path); /*!< in: new full datafile path
+ if the tablespace is remotely
+ located, or NULL if it is located
+ in the normal data directory. */
/*******************************************************************//**
+Allocates a file name for a single-table tablespace. The string must be freed
+by caller with mem_free().
+@return own: file name */
+UNIV_INTERN
+char*
+fil_make_ibd_name(
+/*==============*/
+ const char* name, /*!< in: table name or a dir path */
+ bool is_full_path); /*!< in: TRUE if it is a dir path */
+/*******************************************************************//**
+Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
+The string must be freed by caller with mem_free().
+@return own: file name */
+UNIV_INTERN
+char*
+fil_make_isl_name(
+/*==============*/
+ const char* name); /*!< in: table name */
+/*******************************************************************//**
+Creates a new InnoDB Symbolic Link (ISL) file. It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path '.'.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_create_link_file(
+/*=================*/
+ const char* tablename, /*!< in: tablename */
+ const char* filepath); /*!< in: pathname of tablespace */
+/*******************************************************************//**
+Deletes an InnoDB Symbolic Link (ISL) file. */
+UNIV_INTERN
+void
+fil_delete_link_file(
+/*==================*/
+ const char* tablename); /*!< in: name of table */
+/*******************************************************************//**
+Reads an InnoDB Symbolic Link (ISL) file.
+It is always created under the 'datadir' of MySQL. The name is of the
+form {databasename}/{tablename}. and the isl file is expected to be in a
+'{databasename}' directory called '{tablename}.isl'. The caller must free
+the memory of the null-terminated path returned if it is not null.
+@return own: filepath found in link file, NULL if not found. */
+UNIV_INTERN
+char*
+fil_read_link_file(
+/*===============*/
+ const char* name); /*!< in: tablespace name */
+/*******************************************************************//**
Creates a new single-table tablespace to a database directory of MySQL.
Database directories are under the 'datadir' of MySQL. The datadir is the
directory of a running mysqld program. We can refer to it by simply the
@@ -448,20 +554,20 @@ path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
dir of the mysqld server.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fil_create_new_single_table_tablespace(
/*===================================*/
ulint space_id, /*!< in: space id */
const char* tablename, /*!< in: the table name in the usual
databasename/tablename format
- of InnoDB, or a dir path to a temp
- table */
- ibool is_temp, /*!< in: TRUE if a table created with
- CREATE TEMPORARY TABLE */
+ of InnoDB */
+ const char* dir_path, /*!< in: NULL or a dir path */
ulint flags, /*!< in: tablespace flags */
- ulint size); /*!< in: the initial size of the
+ ulint flags2, /*!< in: table flags2 */
+ ulint size) /*!< in: the initial size of the
tablespace file in pages,
must be >= FIL_IBD_FILE_INITIAL_SIZE */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Tries to open a single-table tablespace and optionally checks the space id is
@@ -472,44 +578,31 @@ NOTE that we assume this operation is used either at the database startup
or under the protection of the dictionary mutex, so that two users cannot
race here. This operation does not leave the file associated with the
tablespace open, but closes it after we have looked at the space id in it.
-@return TRUE if success */
+
+If the validate boolean is set, we read the first page of the file and
+check that the space id in the file is what we expect. We assume that
+this function runs much faster if no check is made, since accessing the
+file inode probably is much faster (the OS caches them) than accessing
+the first page of the file. This boolean may be initially FALSE, but if
+a remote tablespace is found it will be changed to true.
+
+If the fix_dict boolean is set, then it is safe to use an internal SQL
+statement to update the dictionary tables if they are incorrect.
+
+@return DB_SUCCESS or error code */
UNIV_INTERN
-ibool
+dberr_t
fil_open_single_table_tablespace(
/*=============================*/
- ibool check_space_id, /*!< in: should we check that the space
- id in the file is right; we assume
- that this function runs much faster
- if no check is made, since accessing
- the file inode probably is much
- faster (the OS caches them) than
- accessing the first page of the file */
+ bool validate, /*!< in: Do we validate tablespace? */
+ bool fix_dict, /*!< in: Can we fix the dictionary? */
ulint id, /*!< in: space id */
ulint flags, /*!< in: tablespace flags */
- const char* name, /*!< in: table name in the
+ const char* tablename, /*!< in: table name in the
databasename/tablename format */
- trx_t* trx); /*!< in: transaction. This is only used
- for IMPORT TABLESPACE, must be NULL
- otherwise */
-/********************************************************************//**
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
- const char* name, /*!< in: table name in the
- databasename/tablename format */
- ib_uint64_t current_lsn); /*!< in: reset lsn's if the lsn stamped
- to FIL_PAGE_FILE_FLUSH_LSN in the
- first page is too high */
+ const char* filepath) /*!< in: tablespace filepath */
+ __attribute__((nonnull(5), warn_unused_result));
+
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
At the server startup, if we need crash recovery, scans the database
@@ -520,13 +613,13 @@ in the doublewrite buffer, also to know where to apply log records where the
space id is != 0.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
fil_load_single_table_tablespaces(void);
/*===================================*/
/*******************************************************************//**
Returns TRUE if a single-table tablespace does not exist in the memory cache,
or is being deleted there.
-@return TRUE if does not exist or is being\ deleted */
+@return TRUE if does not exist or is being deleted */
UNIV_INTERN
ibool
fil_tablespace_deleted_or_being_deleted_in_mem(
@@ -555,21 +648,22 @@ fil_space_for_table_exists_in_mem(
/*==============================*/
ulint id, /*!< in: space id */
const char* name, /*!< in: table name in the standard
- 'databasename/tablename' format or
- the dir path to a temp table */
- ibool is_temp, /*!< in: TRUE if created with CREATE
- TEMPORARY TABLE */
+ 'databasename/tablename' format */
ibool mark_space, /*!< in: in crash recovery, at database
startup we mark all spaces which have
an associated table in the InnoDB
data dictionary, so that
we can print a warning about orphaned
tablespaces */
- ibool print_error_if_does_not_exist);
+ ibool print_error_if_does_not_exist,
/*!< in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
+ bool adjust_space, /*!< in: whether to adjust space id
+ when find table space mismatch */
+ mem_heap_t* heap, /*!< in: heap memory */
+ table_id_t table_id); /*!< in: table id */
#else /* !UNIV_HOTBACKUP */
/********************************************************************//**
Extends all tablespaces to the size stored in the space header. During the
@@ -631,7 +725,7 @@ i/o on a tablespace which does not exist */
_fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL)
UNIV_INTERN
-ulint
+dberr_t
_fil_io(
/*===*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
@@ -643,7 +737,7 @@ _fil_io(
because i/os are not actually handled until
all have been posted: use with great
caution! */
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ bool sync, /*!< in: true if synchronous aio is desired */
ulint space_id, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes;
0 for uncompressed pages */
@@ -659,19 +753,12 @@ _fil_io(
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
- trx_t* trx);
-/********************************************************************//**
-Confirm whether the parameters are valid or not */
-UNIV_INTERN
-ibool
-fil_is_exist(
-/*==============*/
- ulint space_id, /*!< in: space id */
- ulint block_offset); /*!< in: offset in number of blocks */
+ trx_t* trx)
+ __attribute__((nonnull(8)));
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.c for more info). The thread specifies which
+into segments (see os0file.cc for more info). The thread specifies which
segment it wants to wait for. */
UNIV_INTERN
void
@@ -686,9 +773,8 @@ UNIV_INTERN
void
fil_flush(
/*======*/
- ulint space_id, /*!< in: file space id (this can be a group of
+ ulint space_id); /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
- ibool metadata);
/**********************************************************************//**
Flushes to disk writes in file spaces of the given type possibly cached by
the OS. */
@@ -755,6 +841,159 @@ fil_tablespace_is_being_deleted(
/*============================*/
ulint id); /*!< in: space id */
+/********************************************************************//**
+Delete the tablespace file and any related files like .cfg.
+This should not be called for temporary tables. */
+UNIV_INTERN
+void
+fil_delete_file(
+/*============*/
+ const char* path); /*!< in: filepath of the ibd tablespace */
+
+/** Callback functor. */
+struct PageCallback {
+
+ /**
+ Default constructor */
+ PageCallback()
+ :
+ m_zip_size(),
+ m_page_size(),
+ m_filepath() UNIV_NOTHROW {}
+
+ virtual ~PageCallback() UNIV_NOTHROW {}
+
+ /**
+ Called for page 0 in the tablespace file at the start.
+ @param file_size - size of the file in bytes
+ @param block - contents of the first page in the tablespace file
+ @retval DB_SUCCESS or error code.*/
+ virtual dberr_t init(
+ os_offset_t file_size,
+ const buf_block_t* block) UNIV_NOTHROW = 0;
+
+ /**
+ Called for every page in the tablespace. If the page was not
+ updated then its state must be set to BUF_PAGE_NOT_USED. For
+ compressed tables the page descriptor memory will be at offset:
+ block->frame + UNIV_PAGE_SIZE;
+ @param offset - physical offset within the file
+ @param block - block read from file, note it is not from the buffer pool
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t operator()(
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW = 0;
+
+ /**
+ Set the name of the physical file and the file handle that is used
+ to open it for the file that is being iterated over.
+ @param filename - then physical name of the tablespace file.
+ @param file - OS file handle */
+ void set_file(const char* filename, os_file_t file) UNIV_NOTHROW
+ {
+ m_file = file;
+ m_filepath = filename;
+ }
+
+ /**
+ @return the space id of the tablespace */
+ virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+
+ /** The compressed page size
+ @return the compressed page size */
+ ulint get_zip_size() const
+ {
+ return(m_zip_size);
+ }
+
+ /**
+ Set the tablespace compressed table size.
+ @return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
+ dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
+
+ /** The compressed page size
+ @return the compressed page size */
+ ulint get_page_size() const
+ {
+ return(m_page_size);
+ }
+
+ /** Compressed table page size */
+ ulint m_zip_size;
+
+ /** The tablespace page size. */
+ ulint m_page_size;
+
+ /** File handle to the tablespace */
+ os_file_t m_file;
+
+ /** Physical file path. */
+ const char* m_filepath;
+
+protected:
+ // Disable copying
+ PageCallback(const PageCallback&);
+ PageCallback& operator=(const PageCallback&);
+};
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+ dict_table_t* table,
+ ulint n_io_buffers,
+ PageCallback& callback)
+ __attribute__((nonnull, warn_unused_result));
+
+/*******************************************************************//**
+Checks if a single-table tablespace for a given table name exists in the
+tablespace memory cache.
+@return space id, ULINT_UNDEFINED if not found */
+UNIV_INTERN
+ulint
+fil_get_space_id_for_table(
+/*=======================*/
+ const char* name); /*!< in: table name in the standard
+ 'databasename/tablename' format */
+
+/**
+Iterate over all the spaces in the space list and fetch the
+tablespace names. It will return a copy of the name that must be
+freed by the caller using: delete[].
+@return DB_SUCCESS if all OK. */
+UNIV_INTERN
+dberr_t
+fil_get_space_names(
+/*================*/
+ space_name_list_t& space_name_list)
+ /*!< in/out: Vector for collecting the names. */
+ __attribute__((warn_unused_result));
+
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+ ulint old_space_id, /*!< in: tablespace id of the old
+ table. */
+ const char* old_name, /*!< in: old table name */
+ ulint new_space_id, /*!< in: tablespace id of the new
+ table */
+ const char* new_name, /*!< in: new table name */
+ const char* tmp_name, /*!< in: temp table name used while
+ swapping */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
+
+#endif /* !UNIV_INNOCHECKSUM */
+
/*************************************************************************
Return local hash table informations. */
@@ -779,21 +1018,4 @@ fil_space_set_corrupt(
/*==================*/
ulint space_id);
-/****************************************************************//**
-Generate redo logs for swapping two .ibd files */
-UNIV_INTERN
-void
-fil_mtr_rename_log(
-/*===============*/
- ulint old_space_id, /*!< in: tablespace id of the old
- table. */
- const char* old_name, /*!< in: old table name */
- ulint new_space_id, /*!< in: tablespace id of the new
- table */
- const char* new_name, /*!< in: new table name */
- const char* tmp_name); /*!< in: temp table name used while
- swapping */
-
-typedef struct fil_space_struct fil_space_t;
-
-#endif
+#endif /* fil0fil_h */
diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h
index f07e3decc66..a587ccc9f20 100644
--- a/storage/xtradb/include/fsp0fsp.h
+++ b/storage/xtradb/include/fsp0fsp.h
@@ -28,26 +28,108 @@ Created 12/18/1995 Heikki Tuuri
#include "univ.i"
+#ifndef UNIV_INNOCHECKSUM
+
#include "mtr0mtr.h"
#include "fut0lst.h"
#include "ut0byte.h"
#include "page0types.h"
#include "fsp0types.h"
+#endif /* !UNIV_INNOCHECKSUM */
+
/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
+/** Width of the POST_ANTELOPE flag */
+#define FSP_FLAGS_WIDTH_POST_ANTELOPE 1
+/** Number of flag bits used to indicate the tablespace zip page size */
+#define FSP_FLAGS_WIDTH_ZIP_SSIZE 4
+/** Width of the ATOMIC_BLOBS flag. The ability to break up a long
+column into an in-record prefix and an externally stored part is available
+to the two Barracuda row formats COMPRESSED and DYNAMIC. */
+#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS 1
/** Number of flag bits used to indicate the tablespace page size */
#define FSP_FLAGS_WIDTH_PAGE_SSIZE 4
+/** Width of the DATA_DIR flag. This flag indicates that the tablespace
+is found in a remote location, not the default data directory. */
+#define FSP_FLAGS_WIDTH_DATA_DIR 1
+/** Width of all the currently known tablespace flags */
+#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \
+ + FSP_FLAGS_WIDTH_ZIP_SSIZE \
+ + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \
+ + FSP_FLAGS_WIDTH_PAGE_SSIZE \
+ + FSP_FLAGS_WIDTH_DATA_DIR)
+
+/** A mask of all the known/used bits in tablespace flags */
+#define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH))
+
+/** Zero relative shift position of the POST_ANTELOPE field */
+#define FSP_FLAGS_POS_POST_ANTELOPE 0
+/** Zero relative shift position of the ZIP_SSIZE field */
+#define FSP_FLAGS_POS_ZIP_SSIZE (FSP_FLAGS_POS_POST_ANTELOPE \
+ + FSP_FLAGS_WIDTH_POST_ANTELOPE)
+/** Zero relative shift position of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \
+ + FSP_FLAGS_WIDTH_ZIP_SSIZE)
/** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE 6
+#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \
+ + FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \
+ + FSP_FLAGS_WIDTH_PAGE_SSIZE)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define FSP_FLAGS_POS_UNUSED (FSP_FLAGS_POS_DATA_DIR \
+ + FSP_FLAGS_WIDTH_DATA_DIR)
+
+/** Bit mask of the POST_ANTELOPE field */
+#define FSP_FLAGS_MASK_POST_ANTELOPE \
+ ((~(~0 << FSP_FLAGS_WIDTH_POST_ANTELOPE)) \
+ << FSP_FLAGS_POS_POST_ANTELOPE)
+/** Bit mask of the ZIP_SSIZE field */
+#define FSP_FLAGS_MASK_ZIP_SSIZE \
+ ((~(~0 << FSP_FLAGS_WIDTH_ZIP_SSIZE)) \
+ << FSP_FLAGS_POS_ZIP_SSIZE)
+/** Bit mask of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_MASK_ATOMIC_BLOBS \
+ ((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_BLOBS)) \
+ << FSP_FLAGS_POS_ATOMIC_BLOBS)
/** Bit mask of the PAGE_SSIZE field */
#define FSP_FLAGS_MASK_PAGE_SSIZE \
((~(~0 << FSP_FLAGS_WIDTH_PAGE_SSIZE)) \
<< FSP_FLAGS_POS_PAGE_SSIZE)
+/** Bit mask of the DATA_DIR field */
+#define FSP_FLAGS_MASK_DATA_DIR \
+ ((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR)) \
+ << FSP_FLAGS_POS_DATA_DIR)
+
+/** Return the value of the POST_ANTELOPE field */
+#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \
+ ((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \
+ >> FSP_FLAGS_POS_POST_ANTELOPE)
+/** Return the value of the ZIP_SSIZE field */
+#define FSP_FLAGS_GET_ZIP_SSIZE(flags) \
+ ((flags & FSP_FLAGS_MASK_ZIP_SSIZE) \
+ >> FSP_FLAGS_POS_ZIP_SSIZE)
+/** Return the value of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags) \
+ ((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS) \
+ >> FSP_FLAGS_POS_ATOMIC_BLOBS)
/** Return the value of the PAGE_SSIZE field */
#define FSP_FLAGS_GET_PAGE_SSIZE(flags) \
((flags & FSP_FLAGS_MASK_PAGE_SSIZE) \
>> FSP_FLAGS_POS_PAGE_SSIZE)
+/** Return the value of the DATA_DIR field */
+#define FSP_FLAGS_HAS_DATA_DIR(flags) \
+ ((flags & FSP_FLAGS_MASK_DATA_DIR) \
+ >> FSP_FLAGS_POS_DATA_DIR)
+/** Return the contents of the UNUSED bits */
+#define FSP_FLAGS_GET_UNUSED(flags) \
+ (flags >> FSP_FLAGS_POS_UNUSED)
+
+/** Set a PAGE_SSIZE into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize) \
+ (flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE))
/* @} */
@@ -116,6 +198,142 @@ descriptor page, but used only in the first. */
FSP_FREE_LIMIT at a time */
/* @} */
+#ifndef UNIV_INNOCHECKSUM
+
+/* @defgroup File Segment Inode Constants (moved from fsp0fsp.c) @{ */
+
+/* FILE SEGMENT INODE
+ ==================
+
+Segment inode which is created for each segment in a tablespace. NOTE: in
+purge we assume that a segment having only one currently used page can be
+freed in a few steps, so that the freeing cannot fill the file buffer with
+bufferfixed file pages. */
+
+typedef byte fseg_inode_t;
+
+#define FSEG_INODE_PAGE_NODE FSEG_PAGE_DATA
+ /* the list node for linking
+ segment inode pages */
+
+#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE)
+/*-------------------------------------*/
+#define FSEG_ID 0 /* 8 bytes of segment id: if this is 0,
+ it means that the header is unused */
+#define FSEG_NOT_FULL_N_USED 8
+ /* number of used segment pages in
+ the FSEG_NOT_FULL list */
+#define FSEG_FREE 12
+ /* list of free extents of this
+ segment */
+#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE)
+ /* list of partially free extents */
+#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE)
+ /* list of full extents */
+#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE)
+ /* magic number used in debugging */
+#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE)
+ /* array of individual pages
+ belonging to this segment in fsp
+ fragment extent lists */
+#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2)
+ /* number of slots in the array for
+ the fragment pages */
+#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its
+ page number within space, FIL_NULL
+ means that the slot is not in use */
+/*-------------------------------------*/
+#define FSEG_INODE_SIZE \
+ (16 + 3 * FLST_BASE_NODE_SIZE \
+ + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
+
+#define FSP_SEG_INODES_PER_PAGE(zip_size) \
+ (((zip_size ? zip_size : UNIV_PAGE_SIZE) \
+ - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
+ /* Number of segment inodes which fit on a
+ single page */
+
+#define FSEG_MAGIC_N_VALUE 97937874
+
+#define FSEG_FILLFACTOR 8 /* If this value is x, then if
+ the number of unused but reserved
+ pages in a segment is less than
+ reserved pages * 1/x, and there are
+ at least FSEG_FRAG_LIMIT used pages,
+ then we allow a new empty extent to
+ be added to the segment in
+ fseg_alloc_free_page. Otherwise, we
+ use unused pages of the segment. */
+
+#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS
+ /* If the segment has >= this many
+ used pages, it may be expanded by
+ allocating extents to the segment;
+ until that only individual fragment
+ pages are allocated from the space */
+
+#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
+ is at least this many extents, we
+ allow extents to be put to the free
+ list of the extent: at most
+ FSEG_FREE_LIST_MAX_LEN many */
+#define FSEG_FREE_LIST_MAX_LEN 4
+/* @} */
+
+/* @defgroup Extent Descriptor Constants (moved from fsp0fsp.c) @{ */
+
+/* EXTENT DESCRIPTOR
+ =================
+
+File extent descriptor data structure: contains bits to tell which pages in
+the extent are free and which contain old tuple version to clean. */
+
+/*-------------------------------------*/
+#define XDES_ID 0 /* The identifier of the segment
+ to which this extent belongs */
+#define XDES_FLST_NODE 8 /* The list node data structure
+ for the descriptors */
+#define XDES_STATE (FLST_NODE_SIZE + 8)
+ /* contains state information
+ of the extent */
+#define XDES_BITMAP (FLST_NODE_SIZE + 12)
+ /* Descriptor bitmap of the pages
+ in the extent */
+/*-------------------------------------*/
+
+#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */
+#define XDES_FREE_BIT 0 /* Index of the bit which tells if
+ the page is free */
+#define XDES_CLEAN_BIT 1 /* NOTE: currently not used!
+ Index of the bit which tells if
+ there are old versions of tuples
+ on the page */
+/* States of a descriptor */
+#define XDES_FREE 1 /* extent is in free list of space */
+#define XDES_FREE_FRAG 2 /* extent is in free fragment list of
+ space */
+#define XDES_FULL_FRAG 3 /* extent is in full fragment list of
+ space */
+#define XDES_FSEG 4 /* extent belongs to a segment */
+
+/** File extent data structure size in bytes. */
+#define XDES_SIZE \
+ (XDES_BITMAP \
+ + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
+
+/** File extent data structure size in bytes for MAX page size. */
+#define XDES_SIZE_MAX \
+ (XDES_BITMAP \
+ + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MAX * XDES_BITS_PER_PAGE))
+
+/** File extent data structure size in bytes for MIN page size. */
+#define XDES_SIZE_MIN \
+ (XDES_BITMAP \
+ + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MIN * XDES_BITS_PER_PAGE))
+
+/** Offset of the descriptor array on a descriptor page */
+#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
+
/* @} */
/**********************************************************************//**
@@ -125,16 +343,6 @@ void
fsp_init(void);
/*==========*/
/**********************************************************************//**
-Gets the current free limit of the system tablespace. The free limit
-means the place of the first page which has never been put to the
-free list for allocation. The space above that address is initialized
-to zero. Sets also the global variable log_fsp_current_free_limit.
-@return free limit in megabytes */
-UNIV_INTERN
-ulint
-fsp_header_get_free_limit(void);
-/*===========================*/
-/**********************************************************************//**
Gets the size of the system tablespace from the tablespace header. If
we do not have an auto-extending data file, this should be equal to
the size of the data files. If there is an auto-extending data file,
@@ -177,9 +385,9 @@ fsp_header_get_zip_size(
/*====================*/
const page_t* page); /*!< in: first page of a tablespace */
/**********************************************************************//**
-Writes the space id and compressed page size to a tablespace header.
-This function is used past the buffer pool when we in fil0fil.c create
-a new single-table tablespace. */
+Writes the space id and flags to a tablespace header. The flags contain
+row type, physical/compressed page size, and logical/uncompressed page
+size of the tablespace. */
UNIV_INTERN
void
fsp_header_init_fields(
@@ -197,16 +405,16 @@ fsp_header_init(
/*============*/
ulint space, /*!< in: space id */
ulint size, /*!< in: current size in blocks */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
Increases the space size field of a space. */
UNIV_INTERN
void
fsp_header_inc_size(
/*================*/
- ulint space, /*!< in: space id */
- ulint size_inc,/*!< in: size increment in pages */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ ulint space, /*!< in: space id */
+ ulint size_inc, /*!< in: size increment in pages */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
@@ -222,7 +430,7 @@ fseg_create(
will belong to the created segment */
ulint byte_offset, /*!< in: byte offset of the created segment header
on the page */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
@@ -244,7 +452,7 @@ fseg_create_general(
the inode and the other for the segment) then there is
no need to do the check for this individual
operation */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
currently used.
@@ -255,7 +463,7 @@ fseg_n_reserved_pages(
/*==================*/
fseg_header_t* header, /*!< in: segment header */
ulint* used, /*!< out: number of pages used (<= reserved) */
- mtr_t* mtr); /*!< in: mtr handle */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize
@@ -339,7 +547,7 @@ fsp_reserve_free_extents(
ulint space, /*!< in: space id */
ulint n_ext, /*!< in: number of extents to reserve */
ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr); /*!< in: mini-transaction */
/**********************************************************************//**
This function should be used to get information on how much we still
will be able to insert new data to the database without running out the
@@ -360,7 +568,18 @@ fseg_free_page(
fseg_header_t* seg_header, /*!< in: segment header */
ulint space, /*!< in: space id */
ulint page, /*!< in: page offset */
- mtr_t* mtr); /*!< in: mtr handle */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
+/**********************************************************************//**
+Checks if a single page of a segment is free.
+@return true if free */
+UNIV_INTERN
+bool
+fseg_page_is_free(
+/*==============*/
+ fseg_header_t* seg_header, /*!< in: segment header */
+ ulint space, /*!< in: space id */
+ ulint page) /*!< in: page offset */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
@@ -375,7 +594,7 @@ fseg_free_step(
resides on the first page of the frag list
of the segment, this pointer becomes obsolete
after the last freeing step */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
Frees part of a segment. Differs from fseg_free_step because this function
leaves the header page unfreed.
@@ -386,7 +605,7 @@ fseg_free_step_not_header(
/*======================*/
fseg_header_t* header, /*!< in: segment header which must reside on
the first fragment page of the segment */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
/***********************************************************************//**
Checks if a page address is an extent descriptor page address.
@return TRUE if a descriptor page */
@@ -431,7 +650,7 @@ ibool
fseg_validate(
/*==========*/
fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
#endif /* UNIV_DEBUG */
#ifdef UNIV_BTR_PRINT
/*******************************************************************//**
@@ -441,20 +660,85 @@ void
fseg_print(
/*=======*/
fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
#endif /* UNIV_BTR_PRINT */
/********************************************************************//**
+Validate and return the tablespace flags, which are stored in the
+tablespace header at offset FSP_SPACE_FLAGS. They should be 0 for
+ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
+COMPRESSED and DYNAMIC, use a file format > Antelope so they should
+have a file format number plus the DICT_TF_COMPACT bit set.
+@return true if check ok */
+UNIV_INLINE
+bool
+fsp_flags_is_valid(
+/*===============*/
+ ulint flags) /*!< in: tablespace flags */
+ __attribute__((warn_unused_result, const));
+/********************************************************************//**
+Determine if the tablespace is compressed from dict_table_t::flags.
+@return TRUE if compressed, FALSE if not compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_compressed(
+/*====================*/
+ ulint flags); /*!< in: tablespace flags */
+
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return descriptor index */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset); /*!< in: page offset */
+
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return TRUE if free */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+ const xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset);/*!< in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return descriptor page offset */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset); /*!< in: page offset */
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/********************************************************************//**
+Extract the zip size from tablespace flags. A tablespace has only one
+physical page size whether that page is compressed or not.
+@return compressed page size of the file-per-table tablespace in bytes,
+or zero if the table is not compressed. */
+UNIV_INLINE
+ulint
+fsp_flags_get_zip_size(
+/*====================*/
+ ulint flags); /*!< in: tablespace flags */
+/********************************************************************//**
Extract the page size from tablespace flags.
-This feature, storing the page_ssize into the tablespace flags, is added
-to InnoDB 5.6.4. This is here only to protect against a crash if a newer
-database is opened with this code branch.
@return page size of the tablespace in bytes */
UNIV_INLINE
ulint
fsp_flags_get_page_size(
/*====================*/
- ulint flags); /*!< in: tablespace flags */
+ ulint flags); /*!< in: tablespace flags */
#ifndef UNIV_NONINL
#include "fsp0fsp.ic"
diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic
index c92111a9d89..0d81e817cc9 100644
--- a/storage/xtradb/include/fsp0fsp.ic
+++ b/storage/xtradb/include/fsp0fsp.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,8 @@ File space management
Created 12/18/1995 Heikki Tuuri
*******************************************************/
+#ifndef UNIV_INNOCHECKSUM
+
/***********************************************************************//**
Checks if a page address is an extent descriptor page address.
@return TRUE if a descriptor page */
@@ -37,17 +39,120 @@ fsp_descr_page(
ut_ad(ut_is_2pow(zip_size));
if (!zip_size) {
- return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
- == FSP_XDES_OFFSET));
+ return((page_no & (UNIV_PAGE_SIZE - 1)) == FSP_XDES_OFFSET);
}
- return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET));
+ return((page_no & (zip_size - 1)) == FSP_XDES_OFFSET);
}
+
+/********************************************************************//**
+Validate and return the tablespace flags, which are stored in the
+tablespace header at offset FSP_SPACE_FLAGS. They should be 0 for
+ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
+COMPRESSED and DYNAMIC, use a file format > Antelope so they should
+have a file format number plus the DICT_TF_COMPACT bit set.
+@return true if check ok */
+UNIV_INLINE
+bool
+fsp_flags_is_valid(
+/*===============*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(flags);
+ ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
+ ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
+ ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
+ ulint unused = FSP_FLAGS_GET_UNUSED(flags);
+
+ DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
+
+ /* fsp_flags is zero unless atomic_blobs is set. */
+ /* Make sure there are no bits that we do not know about. */
+ if (unused != 0 || flags == 1) {
+ return(false);
+ } else if (post_antelope) {
+ /* The Antelope row formats REDUNDANT and COMPACT did
+ not use tablespace flags, so this flag and the entire
+ 4-byte field is zero for Antelope row formats. */
+
+ if (!atomic_blobs) {
+ return(false);
+ }
+ }
+
+ if (!atomic_blobs) {
+ /* Barracuda row formats COMPRESSED and DYNAMIC build on
+ the page structure introduced for the COMPACT row format
+ by allowing long fields to be broken into prefix and
+ externally stored parts. */
+
+ if (post_antelope || zip_ssize != 0) {
+ return(false);
+ }
+
+ } else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ return(false);
+ } else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
+
+ /* The page size field can be used for any row type, or it may
+ be zero for an original 16k page size.
+ Validate the page shift size is within allowed range. */
+
+ return(false);
+
+ } else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
+ return(false);
+ }
+
+#if UNIV_FORMAT_MAX != UNIV_FORMAT_B
+# error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
+#endif
+
+ /* The DATA_DIR field can be used for any row type so there is
+ nothing here to validate. */
+
+ return(true);
+}
+
+/********************************************************************//**
+Determine if the tablespace is compressed from dict_table_t::flags.
+@return TRUE if compressed, FALSE if not compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_compressed(
+/*====================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0);
+}
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/********************************************************************//**
+Extract the zip size from tablespace flags.
+@return compressed page size of the file-per-table tablespace in bytes,
+or zero if the table is not compressed. */
+UNIV_INLINE
+ulint
+fsp_flags_get_zip_size(
+/*===================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ ulint zip_size = 0;
+ ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
+
+ /* Convert from a 'log2 minus 9' to a page size in bytes. */
+ if (ssize) {
+ zip_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+
+ ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+ }
+
+ return(zip_size);
+}
+
/********************************************************************//**
Extract the page size from tablespace flags.
-This feature, storing the page_ssize into the tablespace flags, is added
-to InnoDB 5.6.4. This is here only to protect against a crash if a newer
-database is opened with this code branch.
@return page size of the tablespace in bytes */
UNIV_INLINE
ulint
@@ -60,14 +165,150 @@ fsp_flags_get_page_size(
/* Convert from a 'log2 minus 9' to a page size in bytes. */
if (UNIV_UNLIKELY(ssize)) {
- page_size = (512 << ssize);
+ page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
- ut_ad(page_size <= UNIV_PAGE_SIZE);
+ ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
} else {
/* If the page size was not stored, then it is the
original 16k. */
- page_size = UNIV_PAGE_SIZE;
+ page_size = UNIV_PAGE_SIZE_ORIG;
}
return(page_size);
}
+
+#ifndef UNIV_INNOCHECKSUM
+
+/********************************************************************//**
+Add the page size to the tablespace flags.
+@return tablespace flags after page size is added */
+UNIV_INLINE
+ulint
+fsp_flags_set_page_size(
+/*====================*/
+ ulint flags, /*!< in: tablespace flags */
+ ulint page_size) /*!< in: page size in bytes */
+{
+ ulint ssize = 0;
+ ulint shift;
+
+ /* Page size should be > UNIV_PAGE_SIZE_MIN */
+ ut_ad(page_size >= UNIV_PAGE_SIZE_MIN);
+ ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
+
+ if (page_size == UNIV_PAGE_SIZE_ORIG) {
+ ut_ad(0 == FSP_FLAGS_GET_PAGE_SSIZE(flags));
+ return(flags);
+ }
+
+ for (shift = UNIV_PAGE_SIZE_SHIFT_MAX;
+ shift >= UNIV_PAGE_SIZE_SHIFT_MIN;
+ shift--) {
+ ulint mask = (1 << shift);
+ if (page_size & mask) {
+ ut_ad(!(page_size & ~mask));
+ ssize = shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1;
+ break;
+ }
+ }
+
+ ut_ad(ssize);
+ ut_ad(ssize <= UNIV_PAGE_SSIZE_MAX);
+
+ flags = FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize);
+
+ ut_ad(fsp_flags_is_valid(flags));
+
+ return(flags);
+}
+
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return descriptor index */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset) /*!< in: page offset */
+{
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (zip_size == 0) {
+ return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
+ / FSP_EXTENT_SIZE);
+ } else {
+ return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
+ }
+}
+
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return TRUE if free */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+ const xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset) /*!< in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+{
+ ut_ad(offset < FSP_EXTENT_SIZE);
+ ut_ad(bit == XDES_FREE_BIT || bit == XDES_CLEAN_BIT);
+
+ ulint index = bit + XDES_BITS_PER_PAGE * offset;
+
+ ulint bit_index = index % 8;
+ ulint byte_index = index / 8;
+
+ return(ut_bit_get_nth(
+ mach_read_ulint(descr + XDES_BITMAP + byte_index,
+ MLOG_1BYTE),
+ bit_index));
+}
+
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return descriptor page offset */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset) /*!< in: page offset */
+{
+#ifndef DOXYGEN /* Doxygen gets confused by these */
+# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET \
+ + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX) \
+ * XDES_SIZE_MAX
+# error
+# endif
+# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET \
+ + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN) \
+ * XDES_SIZE_MIN
+# error
+# endif
+#endif /* !DOXYGEN */
+
+ ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
+ + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
+ * XDES_SIZE);
+ ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
+ + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
+ * XDES_SIZE);
+
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (zip_size == 0) {
+ return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
+ } else {
+ ut_ad(zip_size > XDES_ARR_OFFSET
+ + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
+ return(ut_2pow_round(offset, zip_size));
+ }
+}
+
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/xtradb/include/fsp0types.h b/storage/xtradb/include/fsp0types.h
index 6e46d647657..94fd908ab0c 100644
--- a/storage/xtradb/include/fsp0types.h
+++ b/storage/xtradb/include/fsp0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -42,7 +42,13 @@ fseg_alloc_free_page) */
/* @} */
/** File space extent size (one megabyte) in pages */
-#define FSP_EXTENT_SIZE (1ULL << (20 - UNIV_PAGE_SIZE_SHIFT))
+#define FSP_EXTENT_SIZE (1048576U / UNIV_PAGE_SIZE)
+
+/** File space extent size (one megabyte) in pages for MAX page size */
+#define FSP_EXTENT_SIZE_MAX (1048576 / UNIV_PAGE_SIZE_MAX)
+
+/** File space extent size (one megabyte) in pages for MIN page size */
+#define FSP_EXTENT_SIZE_MIN (1048576 / UNIV_PAGE_SIZE_MIN)
/** On a page of any file segment, data may be put starting from this
offset */
diff --git a/storage/xtradb/include/fts0ast.h b/storage/xtradb/include/fts0ast.h
new file mode 100644
index 00000000000..c0aac6d8e4c
--- /dev/null
+++ b/storage/xtradb/include/fts0ast.h
@@ -0,0 +1,281 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0ast.h
+The FTS query parser (AST) abstract syntax tree routines
+
+Created 2007/03/16/03 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FST0AST_H
+#define INNOBASE_FST0AST_H
+
+#include "mem0mem.h"
+#include "ha_prototypes.h"
+
+/* The type of AST Node */
+enum fts_ast_type_t {
+ FTS_AST_OPER, /*!< Operator */
+ FTS_AST_NUMB, /*!< Number */
+ FTS_AST_TERM, /*!< Term (or word) */
+ FTS_AST_TEXT, /*!< Text string */
+ FTS_AST_LIST, /*!< Expression list */
+ FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */
+};
+
+/* The FTS query operators that we support */
+enum fts_ast_oper_t {
+ FTS_NONE, /*!< No operator */
+
+ FTS_IGNORE, /*!< Ignore rows that contain
+ this word */
+
+ FTS_EXIST, /*!< Include rows that contain
+ this word */
+
+ FTS_NEGATE, /*!< Include rows that contain
+ this word but rank them
+ lower*/
+
+ FTS_INCR_RATING, /*!< Increase the rank for this
+ word*/
+
+ FTS_DECR_RATING, /*!< Decrease the rank for this
+ word*/
+
+ FTS_DISTANCE, /*!< Proximity distance */
+ FTS_IGNORE_SKIP, /*!< Transient node operator
+ signifies that this is a
+ FTS_IGNORE node, and ignored in
+ the first pass of
+ fts_ast_visit() */
+ FTS_EXIST_SKIP /*!< Transient node operator
+ signifies that this ia a
+ FTS_EXIST node, and ignored in
+ the first pass of
+ fts_ast_visit() */
+};
+
+/* Data types used by the FTS parser */
+struct fts_lexer_t;
+struct fts_ast_node_t;
+struct fts_ast_state_t;
+
+typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
+
+/********************************************************************
+Parse the string using the lexer setup within state.*/
+int
+fts_parse(
+/*======*/
+ /* out: 0 on OK, 1 on error */
+ fts_ast_state_t* state); /*!< in: ast state instance.*/
+
+/********************************************************************
+Create an AST operator node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_oper(
+/*=====================*/
+ void* arg, /*!< in: ast state */
+ fts_ast_oper_t oper); /*!< in: ast operator */
+/********************************************************************
+Create an AST term node, makes a copy of ptr */
+extern
+fts_ast_node_t*
+fts_ast_create_node_term(
+/*=====================*/
+ void* arg, /*!< in: ast state */
+ const char* ptr); /*!< in: term string */
+/********************************************************************
+Create an AST text node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_text(
+/*=====================*/
+ void* arg, /*!< in: ast state */
+ const char* ptr); /*!< in: text string */
+/********************************************************************
+Create an AST expr list node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_list(
+/*=====================*/
+ void* arg, /*!< in: ast state */
+ fts_ast_node_t* expr); /*!< in: ast expr */
+/********************************************************************
+Create a sub-expression list node. This function takes ownership of
+expr and is responsible for deleting it. */
+extern
+fts_ast_node_t*
+fts_ast_create_node_subexp_list(
+/*============================*/
+ /* out: new node */
+ void* arg, /*!< in: ast state instance */
+ fts_ast_node_t* expr); /*!< in: ast expr instance */
+/********************************************************************
+Set the wildcard attribute of a term.*/
+extern
+void
+fts_ast_term_set_wildcard(
+/*======================*/
+ fts_ast_node_t* node); /*!< in: term to change */
+/********************************************************************
+Set the proximity attribute of a text node. */
+
+void
+fts_ast_term_set_distance(
+/*======================*/
+ fts_ast_node_t* node, /*!< in/out: text node */
+ ulint distance); /*!< in: the text proximity
+ distance */
+/********************************************************************//**
+Free a fts_ast_node_t instance.
+@return next node to free */
+UNIV_INTERN
+fts_ast_node_t*
+fts_ast_free_node(
+/*==============*/
+ fts_ast_node_t* node); /*!< in: node to free */
+/********************************************************************
+Add a sub-expression to an AST*/
+extern
+fts_ast_node_t*
+fts_ast_add_node(
+/*=============*/
+ fts_ast_node_t* list, /*!< in: list node instance */
+ fts_ast_node_t* node); /*!< in: (sub) expr to add */
+/********************************************************************
+Print the AST node recursively.*/
+extern
+void
+fts_ast_node_print(
+/*===============*/
+ fts_ast_node_t* node); /*!< in: ast node to print */
+/********************************************************************
+For tracking node allocations, in case there is an during parsing.*/
+extern
+void
+fts_ast_state_add_node(
+/*===================*/
+ fts_ast_state_t*state, /*!< in: ast state instance */
+ fts_ast_node_t* node); /*!< in: node to add to state */
+/********************************************************************
+Free node and expr allocations.*/
+extern
+void
+fts_ast_state_free(
+/*===============*/
+ fts_ast_state_t*state); /*!< in: state instance
+ to free */
+/******************************************************************//**
+Traverse the AST - in-order traversal.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
+fts_ast_visit(
+/*==========*/
+ fts_ast_oper_t oper, /*!< in: FTS operator */
+ fts_ast_node_t* node, /*!< in: instance to traverse*/
+ fts_ast_callback visitor, /*!< in: callback */
+ void* arg, /*!< in: callback arg */
+ bool* has_ignore) /*!< out: whether we encounter
+ and ignored processing an
+ operator, currently we only
+ ignore FTS_IGNORE operator */
+ __attribute__((nonnull, warn_unused_result));
+/*****************************************************************//**
+Process (nested) sub-expression, create a new result set to store the
+sub-expression result by processing nodes under current sub-expression
+list. Merge the sub-expression result with that of parent expression list.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
+fts_ast_visit_sub_exp(
+/*==================*/
+ fts_ast_node_t* node, /*!< in: instance to traverse*/
+ fts_ast_callback visitor, /*!< in: callback */
+ void* arg) /*!< in: callback arg */
+ __attribute__((nonnull, warn_unused_result));
+/********************************************************************
+Create a lex instance.*/
+UNIV_INTERN
+fts_lexer_t*
+fts_lexer_create(
+/*=============*/
+ ibool boolean_mode, /*!< in: query type */
+ const byte* query, /*!< in: query string */
+ ulint query_len) /*!< in: query string len */
+ __attribute__((nonnull, malloc, warn_unused_result));
+/********************************************************************
+Free an fts_lexer_t instance.*/
+UNIV_INTERN
+void
+fts_lexer_free(
+/*===========*/
+ fts_lexer_t* fts_lexer) /*!< in: lexer instance to
+ free */
+ __attribute__((nonnull));
+
+/* Query term type */
+struct fts_ast_term_t {
+ byte* ptr; /*!< Pointer to term string.*/
+ ibool wildcard; /*!< TRUE if wild card set.*/
+};
+
+/* Query text type */
+struct fts_ast_text_t {
+ byte* ptr; /*!< Pointer to term string.*/
+ ulint distance; /*!< > 0 if proximity distance
+ set */
+};
+
+/* The list of nodes in an expr list */
+struct fts_ast_list_t {
+ fts_ast_node_t* head; /*!< Children list head */
+ fts_ast_node_t* tail; /*!< Children list tail */
+};
+
+/* FTS AST node to store the term, text, operator and sub-expressions.*/
+struct fts_ast_node_t {
+ fts_ast_type_t type; /*!< The type of node */
+ fts_ast_text_t text; /*!< Text node */
+ fts_ast_term_t term; /*!< Term node */
+ fts_ast_oper_t oper; /*!< Operator value */
+ fts_ast_list_t list; /*!< Expression list */
+ fts_ast_node_t* next; /*!< Link for expr list */
+ fts_ast_node_t* next_alloc; /*!< For tracking allocations */
+ bool visited; /*!< whether this node is
+ already processed */
+};
+
+/* To track state during parsing */
+struct fts_ast_state_t {
+ mem_heap_t* heap; /*!< Heap to use for alloc */
+ fts_ast_node_t* root; /*!< If all goes OK, then this
+ will point to the root.*/
+
+ fts_ast_list_t list; /*!< List of nodes allocated */
+
+ fts_lexer_t* lexer; /*!< Lexer callback + arg */
+ CHARSET_INFO* charset; /*!< charset used for
+ tokenization */
+};
+
+#endif /* INNOBASE_FSTS0AST_H */
diff --git a/storage/xtradb/include/fts0blex.h b/storage/xtradb/include/fts0blex.h
new file mode 100644
index 00000000000..d0e4cae0678
--- /dev/null
+++ b/storage/xtradb/include/fts0blex.h
@@ -0,0 +1,349 @@
+#ifndef fts0bHEADER_H
+#define fts0bHEADER_H 1
+#define fts0bIN_HEADER 1
+
+#line 6 "../include/fts0blex.h"
+
+#line 8 "../include/fts0blex.h"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 35
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else /* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif /* defined (__STDC__) */
+#endif /* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+
+/* For convenience, these vars (plus the bison vars far below)
+ are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ yy_size_t yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ int yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ int yy_bs_lineno; /**< The line count. */
+ int yy_bs_column; /**< The column count. */
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+
+ };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+void fts0brestart (FILE *input_file ,yyscan_t yyscanner );
+void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0b_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
+void fts0b_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void fts0b_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+void fts0bpop_buffer_state (yyscan_t yyscanner );
+
+YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
+
+void *fts0balloc (yy_size_t ,yyscan_t yyscanner );
+void *fts0brealloc (void *,yy_size_t ,yyscan_t yyscanner );
+void fts0bfree (void * ,yyscan_t yyscanner );
+
+/* Begin user sect3 */
+
+#define fts0bwrap(n) 1
+#define YY_SKIP_YYWRAP
+
+#define yytext_ptr yytext_r
+
+#ifdef YY_HEADER_EXPORT_START_CONDITIONS
+#define INITIAL 0
+
+#endif
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+int fts0blex_init (yyscan_t* scanner);
+
+int fts0blex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
+
+/* Accessor methods to globals.
+ These are made visible to non-reentrant scanners for convenience. */
+
+int fts0blex_destroy (yyscan_t yyscanner );
+
+int fts0bget_debug (yyscan_t yyscanner );
+
+void fts0bset_debug (int debug_flag ,yyscan_t yyscanner );
+
+YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner );
+
+void fts0bset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
+
+FILE *fts0bget_in (yyscan_t yyscanner );
+
+void fts0bset_in (FILE * in_str ,yyscan_t yyscanner );
+
+FILE *fts0bget_out (yyscan_t yyscanner );
+
+void fts0bset_out (FILE * out_str ,yyscan_t yyscanner );
+
+int fts0bget_leng (yyscan_t yyscanner );
+
+char *fts0bget_text (yyscan_t yyscanner );
+
+int fts0bget_lineno (yyscan_t yyscanner );
+
+void fts0bset_lineno (int line_number ,yyscan_t yyscanner );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int fts0bwrap (yyscan_t yyscanner );
+#else
+extern int fts0bwrap (yyscan_t yyscanner );
+#endif
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
+#endif
+
+#ifndef YY_NO_INPUT
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int fts0blex (yyscan_t yyscanner);
+
+#define YY_DECL int fts0blex (yyscan_t yyscanner)
+#endif /* !YY_DECL */
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+#undef YY_NEW_FILE
+#undef YY_FLUSH_BUFFER
+#undef yy_set_bol
+#undef yy_new_buffer
+#undef yy_set_interactive
+#undef YY_DO_BEFORE_ACTION
+
+#ifdef YY_DECL_IS_OURS
+#undef YY_DECL_IS_OURS
+#undef YY_DECL
+#endif
+
+#line 73 "fts0blex.l"
+
+
+#line 348 "../include/fts0blex.h"
+#undef fts0bIN_HEADER
+#endif /* fts0bHEADER_H */
diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h
new file mode 100644
index 00000000000..f94112ef4d4
--- /dev/null
+++ b/storage/xtradb/include/fts0fts.h
@@ -0,0 +1,1042 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0fts.h
+Full text search header file
+
+Created 2011/09/02 Sunny Bains
+***********************************************************************/
+
+#ifndef fts0fts_h
+#define fts0fts_h
+
+#include "univ.i"
+
+#include "data0type.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0vec.h"
+#include "ut0rbt.h"
+#include "ut0wqueue.h"
+#include "que0types.h"
+#include "ft_global.h"
+
+/** "NULL" value of a document id. */
+#define FTS_NULL_DOC_ID 0
+
+/** FTS hidden column that is used to map to and from the row */
+#define FTS_DOC_ID_COL_NAME "FTS_DOC_ID"
+
+/** The name of the index created by FTS */
+#define FTS_DOC_ID_INDEX_NAME "FTS_DOC_ID_INDEX"
+
+#define FTS_DOC_ID_INDEX_NAME_LEN 16
+
+/** Doc ID is a 8 byte value */
+#define FTS_DOC_ID_LEN 8
+
+/** The number of fields to sort when we build FT index with
+FIC. Three fields are sort: (word, doc_id, position) */
+#define FTS_NUM_FIELDS_SORT 3
+
+/** Maximum number of rows in a table, smaller than which, we will
+optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */
+#define MAX_DOC_ID_OPT_VAL 1073741824
+
+/** Document id type. */
+typedef ib_uint64_t doc_id_t;
+
+/** doc_id_t printf format */
+#define FTS_DOC_ID_FORMAT IB_ID_FMT
+
+/** Convert document id to the InnoDB (BIG ENDIAN) storage format. */
+#define fts_write_doc_id(d, s) mach_write_to_8(d, s)
+
+/** Read a document id to internal format. */
+#define fts_read_doc_id(s) mach_read_from_8(s)
+
+/** Bind the doc id to a variable */
+#define fts_bind_doc_id(i, n, v) pars_info_bind_int8_literal(i, n, v)
+
+/** Defines for FTS query mode, they have the same values as
+those defined in mysql file ft_global.h */
+#define FTS_NL 0
+#define FTS_BOOL 1
+#define FTS_SORTED 2
+#define FTS_EXPAND 4
+#define FTS_PROXIMITY 8
+#define FTS_PHRASE 16
+#define FTS_OPT_RANKING 32
+
+#define FTS_INDEX_TABLE_IND_NAME "FTS_INDEX_TABLE_IND"
+
+/** Threshold where our optimize thread automatically kicks in */
+#define FTS_OPTIMIZE_THRESHOLD 10000000
+
+#define FTS_DOC_ID_MAX_STEP 10000
+/** Variable specifying the FTS parallel sort degree */
+extern ulong fts_sort_pll_degree;
+
+/** Variable specifying the number of word to optimize for each optimize table
+call */
+extern ulong fts_num_word_optimize;
+
+/** Variable specifying whether we do additional FTS diagnostic printout
+in the log */
+extern char fts_enable_diag_print;
+
+/** FTS rank type, which will be between 0 .. 1 inclusive */
+typedef float fts_rank_t;
+
+/** Type of a row during a transaction. FTS_NOTHING means the row can be
+forgotten from the FTS system's POV, FTS_INVALID is an internal value used
+to mark invalid states.
+
+NOTE: Do not change the order or value of these, fts_trx_row_get_new_state
+depends on them being exactly as they are. */
+enum fts_row_state {
+ FTS_INSERT = 0,
+ FTS_MODIFY,
+ FTS_DELETE,
+ FTS_NOTHING,
+ FTS_INVALID
+};
+
+/** The FTS table types. */
+enum fts_table_type_t {
+ FTS_INDEX_TABLE, /*!< FTS auxiliary table that is
+ specific to a particular FTS index
+ on a table */
+
+ FTS_COMMON_TABLE /*!< FTS auxiliary table that is common
+ for all FTS index on a table */
+};
+
+struct fts_doc_t;
+struct fts_cache_t;
+struct fts_token_t;
+struct fts_doc_ids_t;
+struct fts_index_cache_t;
+
+
+/** Initialize the "fts_table" for internal query into FTS auxiliary
+tables */
+#define FTS_INIT_FTS_TABLE(fts_table, m_suffix, m_type, m_table)\
+do { \
+ (fts_table)->suffix = m_suffix; \
+ (fts_table)->type = m_type; \
+ (fts_table)->table_id = m_table->id; \
+ (fts_table)->parent = m_table->name; \
+ (fts_table)->table = m_table; \
+} while (0);
+
+#define FTS_INIT_INDEX_TABLE(fts_table, m_suffix, m_type, m_index)\
+do { \
+ (fts_table)->suffix = m_suffix; \
+ (fts_table)->type = m_type; \
+ (fts_table)->table_id = m_index->table->id; \
+ (fts_table)->parent = m_index->table->name; \
+ (fts_table)->table = m_index->table; \
+ (fts_table)->index_id = m_index->id; \
+} while (0);
+
+/** Information about changes in a single transaction affecting
+the FTS system. */
+struct fts_trx_t {
+ trx_t* trx; /*!< InnoDB transaction */
+
+ ib_vector_t* savepoints; /*!< Active savepoints, must have at
+ least one element, the implied
+ savepoint */
+ ib_vector_t* last_stmt; /*!< last_stmt */
+
+ mem_heap_t* heap; /*!< heap */
+};
+
+/** Information required for transaction savepoint handling. */
+struct fts_savepoint_t {
+ char* name; /*!< First entry is always NULL, the
+ default instance. Otherwise the name
+ of the savepoint */
+
+ ib_rbt_t* tables; /*!< Modified FTS tables */
+};
+
+/** Information about changed rows in a transaction for a single table. */
+struct fts_trx_table_t {
+ dict_table_t* table; /*!< table */
+
+ fts_trx_t* fts_trx; /*!< link to parent */
+
+ ib_rbt_t* rows; /*!< rows changed; indexed by doc-id,
+ cells are fts_trx_row_t* */
+
+ fts_doc_ids_t* added_doc_ids; /*!< list of added doc ids (NULL until
+ the first addition) */
+
+ /*!< for adding doc ids */
+ que_t* docs_added_graph;
+};
+
+/** Information about one changed row in a transaction. */
+struct fts_trx_row_t {
+ doc_id_t doc_id; /*!< Id of the ins/upd/del document */
+
+ fts_row_state state; /*!< state of the row */
+
+ ib_vector_t* fts_indexes; /*!< The indexes that are affected */
+};
+
+/** List of document ids that were added during a transaction. This
+list is passed on to a background 'Add' thread and OPTIMIZE, so it
+needs its own memory heap. */
+struct fts_doc_ids_t {
+ ib_vector_t* doc_ids; /*!< document ids (each element is
+ of type doc_id_t). */
+
+ ib_alloc_t* self_heap; /*!< Allocator used to create an
+ instance of this type and the
+ doc_ids vector */
+};
+
+// FIXME: Get rid of this if possible.
+/** Since MySQL's character set support for Unicode is woefully inadequate
+(it supports basic operations like isalpha etc. only for 8-bit characters),
+we have to implement our own. We use UTF-16 without surrogate processing
+as our in-memory format. This typedef is a single such character. */
+typedef unsigned short ib_uc_t;
+
+/** An UTF-16 ro UTF-8 string. */
+struct fts_string_t {
+ byte* f_str; /*!< string, not necessary terminated in
+ any way */
+ ulint f_len; /*!< Length of the string in bytes */
+ ulint f_n_char; /*!< Number of characters */
+};
+
+/** Query ranked doc ids. */
+struct fts_ranking_t {
+ doc_id_t doc_id; /*!< Document id */
+
+ fts_rank_t rank; /*!< Rank is between 0 .. 1 */
+
+ byte* words; /*!< this contains the words
+ that were queried
+ and found in this document */
+ ulint words_len; /*!< words len */
+};
+
+/** Query result. */
+struct fts_result_t {
+ ib_rbt_node_t* current; /*!< Current element */
+
+ ib_rbt_t* rankings_by_id; /*!< RB tree of type fts_ranking_t
+ indexed by doc id */
+ ib_rbt_t* rankings_by_rank;/*!< RB tree of type fts_ranking_t
+ indexed by rank */
+};
+
+/** This is used to generate the FTS auxiliary table name, we need the
+table id and the index id to generate the column specific FTS auxiliary
+table name. */
+struct fts_table_t {
+ const char* parent; /*!< Parent table name, this is
+ required only for the database
+ name */
+
+ fts_table_type_t
+ type; /*!< The auxiliary table type */
+
+ table_id_t table_id; /*!< The table id */
+
+ index_id_t index_id; /*!< The index id */
+
+ const char* suffix; /*!< The suffix of the fts auxiliary
+ table name, can be NULL, not used
+ everywhere (yet) */
+ const dict_table_t*
+ table; /*!< Parent table */
+ CHARSET_INFO* charset; /*!< charset info if it is for FTS
+ index auxiliary table */
+};
+
+enum fts_status {
+ BG_THREAD_STOP = 1, /*!< TRUE if the FTS background thread
+ has finished reading the ADDED table,
+ meaning more items can be added to
+ the table. */
+
+ BG_THREAD_READY = 2, /*!< TRUE if the FTS background thread
+ is ready */
+
+ ADD_THREAD_STARTED = 4, /*!< TRUE if the FTS add thread
+ has started */
+
+ ADDED_TABLE_SYNCED = 8, /*!< TRUE if the ADDED table record is
+ sync-ed after crash recovery */
+
+ TABLE_DICT_LOCKED = 16 /*!< Set if the table has
+ dict_sys->mutex */
+};
+
+typedef enum fts_status fts_status_t;
+
+/** The state of the FTS sub system. */
+struct fts_t {
+ /*!< mutex protecting bg_threads* and
+ fts_add_wq. */
+ ib_mutex_t bg_threads_mutex;
+
+ ulint bg_threads; /*!< number of background threads
+ accessing this table */
+
+ /*!< TRUE if background threads running
+ should stop themselves */
+ ulint fts_status; /*!< Status bit regarding fts
+ running state */
+
+ ib_wqueue_t* add_wq; /*!< Work queue for scheduling jobs
+ for the FTS 'Add' thread, or NULL
+ if the thread has not yet been
+ created. Each work item is a
+ fts_trx_doc_ids_t*. */
+
+ fts_cache_t* cache; /*!< FTS memory buffer for this table,
+ or NULL if the table has no FTS
+ index. */
+
+ ulint doc_col; /*!< FTS doc id hidden column number
+ in the CLUSTERED index. */
+
+ ib_vector_t* indexes; /*!< Vector of FTS indexes, this is
+ mainly for caching purposes. */
+ mem_heap_t* fts_heap; /*!< heap for fts_t allocation */
+};
+
+struct fts_stopword_t;
+
+/** status bits for fts_stopword_t status field. */
+#define STOPWORD_NOT_INIT 0x1
+#define STOPWORD_OFF 0x2
+#define STOPWORD_FROM_DEFAULT 0x4
+#define STOPWORD_USER_TABLE 0x8
+
+extern const char* fts_default_stopword[];
+
+/** Variable specifying the maximum FTS cache size for each table */
+extern ulong fts_max_cache_size;
+
+/** Variable specifying the total memory allocated for FTS cache */
+extern ulong fts_max_total_cache_size;
+
+/** Variable specifying the FTS result cache limit for each query */
+extern ulong fts_result_cache_limit;
+
+/** Variable specifying the maximum FTS max token size */
+extern ulong fts_max_token_size;
+
+/** Variable specifying the minimum FTS max token size */
+extern ulong fts_min_token_size;
+
+/** Whether the total memory used for FTS cache is exhausted, and we will
+need a sync to free some memory */
+extern bool fts_need_sync;
+
+/** Maximum possible Fulltext word length */
+#define FTS_MAX_WORD_LEN HA_FT_MAXBYTELEN
+
+/** Maximum possible Fulltext word length (in characters) */
+#define FTS_MAX_WORD_LEN_IN_CHAR HA_FT_MAXCHARLEN
+
+/** Variable specifying the table that has Fulltext index to display its
+content through information schema table */
+extern char* fts_internal_tbl_name;
+
+#define fts_que_graph_free(graph) \
+do { \
+ mutex_enter(&dict_sys->mutex); \
+ que_graph_free(graph); \
+ mutex_exit(&dict_sys->mutex); \
+} while (0)
+
+/******************************************************************//**
+Create a FTS cache. */
+UNIV_INTERN
+fts_cache_t*
+fts_cache_create(
+/*=============*/
+ dict_table_t* table); /*!< table owns the FTS cache */
+
+/******************************************************************//**
+Create a FTS index cache.
+@return Index Cache */
+UNIV_INTERN
+fts_index_cache_t*
+fts_cache_index_cache_create(
+/*=========================*/
+ dict_table_t* table, /*!< in: table with FTS index */
+ dict_index_t* index); /*!< in: FTS index */
+
+/******************************************************************//**
+Get the next available document id. This function creates a new
+transaction to generate the document id.
+@return DB_SUCCESS if OK */
+UNIV_INTERN
+dberr_t
+fts_get_next_doc_id(
+/*================*/
+ const dict_table_t* table, /*!< in: table */
+ doc_id_t* doc_id) /*!< out: new document id */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Update the next and last Doc ID in the CONFIG table to be the input
+"doc_id" value (+ 1). We would do so after each FTS index build or
+table truncate */
+UNIV_INTERN
+void
+fts_update_next_doc_id(
+/*===================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const dict_table_t* table, /*!< in: table */
+ const char* table_name, /*!< in: table name, or NULL */
+ doc_id_t doc_id) /*!< in: DOC ID to set */
+ __attribute__((nonnull(2)));
+
+/******************************************************************//**
+Create a new document id .
+@return DB_SUCCESS if all went well else error */
+UNIV_INTERN
+dberr_t
+fts_create_doc_id(
+/*==============*/
+ dict_table_t* table, /*!< in: row is of this
+ table. */
+ dtuple_t* row, /*!< in/out: add doc id
+ value to this row. This is the
+ current row that is being
+ inserted. */
+ mem_heap_t* heap) /*!< in: heap */
+ __attribute__((nonnull));
+/******************************************************************//**
+Create a new fts_doc_ids_t.
+@return new fts_doc_ids_t. */
+UNIV_INTERN
+fts_doc_ids_t*
+fts_doc_ids_create(void);
+/*=====================*/
+
+/******************************************************************//**
+Free a fts_doc_ids_t. */
+UNIV_INTERN
+void
+fts_doc_ids_free(
+/*=============*/
+ fts_doc_ids_t* doc_ids); /*!< in: doc_ids to free */
+
+/******************************************************************//**
+Notify the FTS system about an operation on an FTS-indexed table. */
+UNIV_INTERN
+void
+fts_trx_add_op(
+/*===========*/
+ trx_t* trx, /*!< in: InnoDB transaction */
+ dict_table_t* table, /*!< in: table */
+ doc_id_t doc_id, /*!< in: doc id */
+ fts_row_state state, /*!< in: state of the row */
+ ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
+ (NULL=all) */
+ __attribute__((nonnull(1,2)));
+
+/******************************************************************//**
+Free an FTS trx. */
+UNIV_INTERN
+void
+fts_trx_free(
+/*=========*/
+ fts_trx_t* fts_trx); /*!< in, own: FTS trx */
+
+/******************************************************************//**
+Creates the common ancillary tables needed for supporting an FTS index
+on the given table. row_mysql_lock_data_dictionary must have been
+called before this.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_create_common_tables(
+/*=====================*/
+ trx_t* trx, /*!< in: transaction handle */
+ const dict_table_t*
+ table, /*!< in: table with one FTS
+ index */
+ const char* name, /*!< in: table name */
+ bool skip_doc_id_index) /*!< in: Skip index on doc id */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Wrapper function of fts_create_index_tables_low(), create auxiliary
+tables for an FTS index
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_create_index_tables(
+/*====================*/
+ trx_t* trx, /*!< in: transaction handle */
+ const dict_index_t* index) /*!< in: the FTS index
+ instance */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Creates the column specific ancillary tables needed for supporting an
+FTS index on the given table. row_mysql_lock_data_dictionary must have
+been called before this.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_create_index_tables_low(
+/*========================*/
+ trx_t* trx, /*!< in: transaction handle */
+ const dict_index_t*
+ index, /*!< in: the FTS index
+ instance */
+ const char* table_name, /*!< in: the table name */
+ table_id_t table_id) /*!< in: the table id */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Add the FTS document id hidden column. */
+UNIV_INTERN
+void
+fts_add_doc_id_column(
+/*==================*/
+ dict_table_t* table, /*!< in/out: Table with FTS index */
+ mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
+ __attribute__((nonnull(1)));
+
+/*********************************************************************//**
+Drops the ancillary tables needed for supporting an FTS index on the
+given table. row_mysql_lock_data_dictionary must have been called before
+this.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_drop_tables(
+/*============*/
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table) /*!< in: table has the FTS
+ index */
+ __attribute__((nonnull));
+/******************************************************************//**
+The given transaction is about to be committed; do whatever is necessary
+from the FTS system's POV.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_commit(
+/*=======*/
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull, warn_unused_result));
+
+/*******************************************************************//**
+FTS Query entry point.
+@return DB_SUCCESS if successful otherwise error code */
+UNIV_INTERN
+dberr_t
+fts_query(
+/*======*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index to search */
+ uint flags, /*!< in: FTS search mode */
+ const byte* query, /*!< in: FTS query */
+ ulint query_len, /*!< in: FTS query string len
+ in bytes */
+ fts_result_t** result) /*!< out: query result, to be
+ freed by the caller.*/
+ __attribute__((nonnull, warn_unused_result));
+
+/******************************************************************//**
+Retrieve the FTS Relevance Ranking result for doc with doc_id
+@return the relevance ranking value. */
+UNIV_INTERN
+float
+fts_retrieve_ranking(
+/*=================*/
+ fts_result_t* result, /*!< in: FTS result structure */
+ doc_id_t doc_id); /*!< in: the interested document
+ doc_id */
+
+/******************************************************************//**
+FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
+UNIV_INTERN
+void
+fts_query_sort_result_on_rank(
+/*==========================*/
+ fts_result_t* result); /*!< out: result instance
+ to sort.*/
+
+/******************************************************************//**
+FTS Query free result, returned by fts_query(). */
+UNIV_INTERN
+void
+fts_query_free_result(
+/*==================*/
+ fts_result_t* result); /*!< in: result instance
+ to free.*/
+
+/******************************************************************//**
+Extract the doc id from the FTS hidden column. */
+UNIV_INTERN
+doc_id_t
+fts_get_doc_id_from_row(
+/*====================*/
+ dict_table_t* table, /*!< in: table */
+ dtuple_t* row); /*!< in: row whose FTS doc id we
+ want to extract.*/
+
+/******************************************************************//**
+Extract the doc id from the FTS hidden column. */
+UNIV_INTERN
+doc_id_t
+fts_get_doc_id_from_rec(
+/*====================*/
+ dict_table_t* table, /*!< in: table */
+ const rec_t* rec, /*!< in: rec */
+ mem_heap_t* heap); /*!< in: heap */
+
+/******************************************************************//**
+Update the query graph with a new document id.
+@return Doc ID used */
+UNIV_INTERN
+doc_id_t
+fts_update_doc_id(
+/*==============*/
+ dict_table_t* table, /*!< in: table */
+ upd_field_t* ufield, /*!< out: update node */
+ doc_id_t* next_doc_id); /*!< out: buffer for writing */
+
+/******************************************************************//**
+FTS initialize. */
+UNIV_INTERN
+void
+fts_startup(void);
+/*==============*/
+
+/******************************************************************//**
+Signal FTS threads to initiate shutdown. */
+UNIV_INTERN
+void
+fts_start_shutdown(
+/*===============*/
+ dict_table_t* table, /*!< in: table with FTS
+ indexes */
+ fts_t* fts); /*!< in: fts instance to
+ shutdown */
+
+/******************************************************************//**
+Wait for FTS threads to shutdown. */
+UNIV_INTERN
+void
+fts_shutdown(
+/*=========*/
+ dict_table_t* table, /*!< in: table with FTS
+ indexes */
+ fts_t* fts); /*!< in: fts instance to
+ shutdown */
+
+/******************************************************************//**
+Create an instance of fts_t.
+@return instance of fts_t */
+UNIV_INTERN
+fts_t*
+fts_create(
+/*=======*/
+ dict_table_t* table); /*!< out: table with FTS
+ indexes */
+
+/**********************************************************************//**
+Free the FTS resources. */
+UNIV_INTERN
+void
+fts_free(
+/*=====*/
+ dict_table_t* table); /*!< in/out: table with
+ FTS indexes */
+
+/*********************************************************************//**
+Run OPTIMIZE on the given table.
+@return DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+fts_optimize_table(
+/*===============*/
+ dict_table_t* table) /*!< in: table to optimiza */
+ __attribute__((nonnull));
+
+/**********************************************************************//**
+Startup the optimize thread and create the work queue. */
+UNIV_INTERN
+void
+fts_optimize_init(void);
+/*====================*/
+
+/**********************************************************************//**
+Check whether the work queue is initialized.
+@return TRUE if optimze queue is initialized. */
+UNIV_INTERN
+ibool
+fts_optimize_is_init(void);
+/*======================*/
+
+/****************************************************************//**
+Drops index ancillary tables for a FTS index
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_drop_index_tables(
+/*==================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index) /*!< in: Index to drop */
+ __attribute__((nonnull, warn_unused_result));
+
+/******************************************************************//**
+Remove the table from the OPTIMIZER's list. We do wait for
+acknowledgement from the consumer of the message. */
+UNIV_INTERN
+void
+fts_optimize_remove_table(
+/*======================*/
+ dict_table_t* table); /*!< in: table to remove */
+
+/**********************************************************************//**
+Signal the optimize thread to prepare for shutdown. */
+UNIV_INTERN
+void
+fts_optimize_start_shutdown(void);
+/*==============================*/
+
+/**********************************************************************//**
+Inform optimize to clean up. */
+UNIV_INTERN
+void
+fts_optimize_end(void);
+/*===================*/
+
+/**********************************************************************//**
+Take a FTS savepoint. */
+UNIV_INTERN
+void
+fts_savepoint_take(
+/*===============*/
+ trx_t* trx, /*!< in: transaction */
+ const char* name) /*!< in: savepoint name */
+ __attribute__((nonnull));
+/**********************************************************************//**
+Refresh last statement savepoint. */
+UNIV_INTERN
+void
+fts_savepoint_laststmt_refresh(
+/*===========================*/
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull));
+/**********************************************************************//**
+Release the savepoint data identified by name. */
+UNIV_INTERN
+void
+fts_savepoint_release(
+/*==================*/
+ trx_t* trx, /*!< in: transaction */
+ const char* name); /*!< in: savepoint name */
+
+/**********************************************************************//**
+Free the FTS cache. */
+UNIV_INTERN
+void
+fts_cache_destroy(
+/*==============*/
+ fts_cache_t* cache); /*!< in: cache*/
+
+/*********************************************************************//**
+Clear cache. If the shutdown flag is TRUE then the cache can contain
+data that needs to be freed. For regular clear as part of normal
+working we assume the caller has freed all resources. */
+UNIV_INTERN
+void
+fts_cache_clear(
+/*============*/
+ fts_cache_t* cache, /*!< in: cache */
+ ibool free_words); /*!< in: TRUE if free
+ in memory word cache. */
+
+/*********************************************************************//**
+Initialize things in cache. */
+UNIV_INTERN
+void
+fts_cache_init(
+/*===========*/
+ fts_cache_t* cache); /*!< in: cache */
+
+/*********************************************************************//**
+Rollback to and including savepoint indentified by name. */
+UNIV_INTERN
+void
+fts_savepoint_rollback(
+/*===================*/
+ trx_t* trx, /*!< in: transaction */
+ const char* name); /*!< in: savepoint name */
+
+/*********************************************************************//**
+Rollback to and including savepoint indentified by name. */
+UNIV_INTERN
+void
+fts_savepoint_rollback_last_stmt(
+/*=============================*/
+ trx_t* trx); /*!< in: transaction */
+
+/***********************************************************************//**
+Drop all orphaned FTS auxiliary tables, those that don't have a parent
+table or FTS index defined on them. */
+UNIV_INTERN
+void
+fts_drop_orphaned_tables(void);
+/*==========================*/
+
+/******************************************************************//**
+Since we do a horizontal split on the index table, we need to drop
+all the split tables.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_drop_index_split_tables(
+/*========================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index) /*!< in: fts instance */
+ __attribute__((nonnull, warn_unused_result));
+
+/****************************************************************//**
+Run SYNC on the table, i.e., write out data from the cache to the
+FTS auxiliary INDEX table and clear the cache at the end. */
+UNIV_INTERN
+void
+fts_sync_table(
+/*===========*/
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
+
+/****************************************************************//**
+Free the query graph but check whether dict_sys->mutex is already
+held */
+UNIV_INTERN
+void
+fts_que_graph_free_check_lock(
+/*==========================*/
+ fts_table_t* fts_table, /*!< in: FTS table */
+ const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
+ que_t* graph); /*!< in: query graph */
+
+/****************************************************************//**
+Create an FTS index cache. */
+UNIV_INTERN
+CHARSET_INFO*
+fts_index_get_charset(
+/*==================*/
+ dict_index_t* index); /*!< in: FTS index */
+
+/*********************************************************************//**
+Get the initial Doc ID by consulting the CONFIG table
+@return initial Doc ID */
+UNIV_INTERN
+doc_id_t
+fts_init_doc_id(
+/*============*/
+ const dict_table_t* table); /*!< in: table */
+
+/******************************************************************//**
+compare two character string according to their charset. */
+extern
+int
+innobase_fts_text_cmp(
+/*==================*/
+ const void* cs, /*!< in: Character set */
+ const void* p1, /*!< in: key */
+ const void* p2); /*!< in: node */
+
+/******************************************************************//**
+Makes all characters in a string lower case. */
+extern
+size_t
+innobase_fts_casedn_str(
+/*====================*/
+ CHARSET_INFO* cs, /*!< in: Character set */
+ char* src, /*!< in: string to put in
+ lower case */
+ size_t src_len, /*!< in: input string length */
+ char* dst, /*!< in: buffer for result
+ string */
+ size_t dst_len); /*!< in: buffer size */
+
+
+/******************************************************************//**
+compare two character string according to their charset. */
+extern
+int
+innobase_fts_text_cmp_prefix(
+/*=========================*/
+ const void* cs, /*!< in: Character set */
+ const void* p1, /*!< in: key */
+ const void* p2); /*!< in: node */
+
+/*************************************************************//**
+Get the next token from the given string and store it in *token. */
+extern
+ulint
+innobase_mysql_fts_get_token(
+/*=========================*/
+ CHARSET_INFO* charset, /*!< in: Character set */
+ const byte* start, /*!< in: start of text */
+ const byte* end, /*!< in: one character past
+ end of text */
+ fts_string_t* token, /*!< out: token's text */
+ ulint* offset); /*!< out: offset to token,
+ measured as characters from
+ 'start' */
+
+/*********************************************************************//**
+Fetch COUNT(*) from specified table.
+@return the number of rows in the table */
+UNIV_INTERN
+ulint
+fts_get_rows_count(
+/*===============*/
+ fts_table_t* fts_table); /*!< in: fts table to read */
+
+/*************************************************************//**
+Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
+@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
+UNIV_INTERN
+doc_id_t
+fts_get_max_doc_id(
+/*===============*/
+ dict_table_t* table); /*!< in: user table */
+
+/******************************************************************//**
+Check whether user supplied stopword table exists and is of
+the right format.
+@return the stopword column charset if qualifies */
+UNIV_INTERN
+CHARSET_INFO*
+fts_valid_stopword_table(
+/*=====================*/
+ const char* stopword_table_name); /*!< in: Stopword table
+ name */
+/****************************************************************//**
+This function loads specified stopword into FTS cache
+@return TRUE if success */
+UNIV_INTERN
+ibool
+fts_load_stopword(
+/*==============*/
+ const dict_table_t*
+ table, /*!< in: Table with FTS */
+ trx_t* trx, /*!< in: Transaction */
+ const char* global_stopword_table, /*!< in: Global stopword table
+ name */
+ const char* session_stopword_table, /*!< in: Session stopword table
+ name */
+ ibool stopword_is_on, /*!< in: Whether stopword
+ option is turned on/off */
+ ibool reload); /*!< in: Whether it is during
+ reload of FTS table */
+
+/****************************************************************//**
+Create the vector of fts_get_doc_t instances.
+@return vector of fts_get_doc_t instances */
+UNIV_INTERN
+ib_vector_t*
+fts_get_docs_create(
+/*================*/
+ fts_cache_t* cache); /*!< in: fts cache */
+
+/****************************************************************//**
+Read the rows from the FTS index
+@return DB_SUCCESS if OK */
+UNIV_INTERN
+dberr_t
+fts_table_fetch_doc_ids(
+/*====================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table, /*!< in: aux table */
+ fts_doc_ids_t* doc_ids); /*!< in: For collecting
+ doc ids */
+/****************************************************************//**
+This function brings FTS index in sync when FTS index is first
+used. There are documents that have not yet sync-ed to auxiliary
+tables from last server abnormally shutdown, we will need to bring
+such document into FTS cache before any further operations
+@return TRUE if all OK */
+UNIV_INTERN
+ibool
+fts_init_index(
+/*===========*/
+ dict_table_t* table, /*!< in: Table with FTS */
+ ibool has_cache_lock); /*!< in: Whether we already
+ have cache lock */
+/*******************************************************************//**
+Add a newly create index in FTS cache */
+UNIV_INTERN
+void
+fts_add_index(
+/*==========*/
+ dict_index_t* index, /*!< FTS index to be added */
+ dict_table_t* table); /*!< table */
+
+/*******************************************************************//**
+Drop auxiliary tables related to an FTS index
+@return DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+fts_drop_index(
+/*===========*/
+ dict_table_t* table, /*!< in: Table where indexes are dropped */
+ dict_index_t* index, /*!< in: Index to be dropped */
+ trx_t* trx) /*!< in: Transaction for the drop */
+ __attribute__((nonnull));
+
+/****************************************************************//**
+Rename auxiliary tables for all fts index for a table
+@return DB_SUCCESS or error code */
+
+dberr_t
+fts_rename_aux_tables(
+/*==================*/
+ dict_table_t* table, /*!< in: user Table */
+ const char* new_name, /*!< in: new table name */
+ trx_t* trx); /*!< in: transaction */
+
+/*******************************************************************//**
+Check indexes in the fts->indexes is also present in index cache and
+table->indexes list
+@return TRUE if all indexes match */
+UNIV_INTERN
+ibool
+fts_check_cached_index(
+/*===================*/
+ dict_table_t* table); /*!< in: Table where indexes are dropped */
+#endif /*!< fts0fts.h */
+
diff --git a/storage/xtradb/include/fts0opt.h b/storage/xtradb/include/fts0opt.h
new file mode 100644
index 00000000000..92eaf8270d2
--- /dev/null
+++ b/storage/xtradb/include/fts0opt.h
@@ -0,0 +1,37 @@
+/*****************************************************************************
+
+Copyright (c) 2001, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0opt.h
+Full Text Search optimize thread
+
+Created 2011-02-15 Jimmy Yang
+***********************************************************************/
+#ifndef INNODB_FTS0OPT_H
+#define INNODB_FTS0OPT_H
+
+/********************************************************************
+Callback function to fetch the rows in an FTS INDEX record. */
+UNIV_INTERN
+ibool
+fts_optimize_index_fetch_node(
+/*==========================*/
+ /* out: always returns non-NULL */
+ void* row, /* in: sel_node_t* */
+ void* user_arg); /* in: pointer to ib_vector_t */
+#endif
diff --git a/storage/xtradb/include/fts0pars.h b/storage/xtradb/include/fts0pars.h
new file mode 100644
index 00000000000..50f636944e5
--- /dev/null
+++ b/storage/xtradb/include/fts0pars.h
@@ -0,0 +1,72 @@
+/* A Bison parser, made by GNU Bison 2.5. */
+
+/* Bison interface for Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
+
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
+
+
+/* Tokens. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
+ know about them. */
+ enum yytokentype {
+ FTS_OPER = 258,
+ FTS_TEXT = 259,
+ FTS_TERM = 260,
+ FTS_NUMB = 261
+ };
+#endif
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 2068 of yacc.c */
+#line 61 "fts0pars.y"
+
+ int oper;
+ char* token;
+ fts_ast_node_t* node;
+
+
+
+/* Line 2068 of yacc.c */
+#line 64 "fts0pars.hh"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+
+
diff --git a/storage/xtradb/include/fts0priv.h b/storage/xtradb/include/fts0priv.h
new file mode 100644
index 00000000000..c6aca27f6ec
--- /dev/null
+++ b/storage/xtradb/include/fts0priv.h
@@ -0,0 +1,650 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0priv.h
+Full text search internal header file
+
+Created 2011/09/02 Sunny Bains
+***********************************************************************/
+
+#ifndef INNOBASE_FTS0PRIV_H
+#define INNOBASE_FTS0PRIV_H
+
+#include "dict0dict.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "que0types.h"
+#include "fts0types.h"
+
+/* The various states of the FTS sub system pertaining to a table with
+FTS indexes defined on it. */
+enum fts_table_state_enum {
+ /* !<This must be 0 since we insert
+ a hard coded '0' at create time
+ to the config table */
+
+ FTS_TABLE_STATE_RUNNING = 0, /*!< Auxiliary tables created OK */
+
+ FTS_TABLE_STATE_OPTIMIZING, /*!< This is a substate of RUNNING */
+
+ FTS_TABLE_STATE_DELETED /*!< All aux tables to be dropped when
+ it's safe to do so */
+};
+
+typedef enum fts_table_state_enum fts_table_state_t;
+
+/** The default time to wait for the background thread (in microsecnds). */
+#define FTS_MAX_BACKGROUND_THREAD_WAIT 10000
+
+/** Maximum number of iterations to wait before we complain */
+#define FTS_BACKGROUND_THREAD_WAIT_COUNT 1000
+
+/** The maximum length of the config table's value column in bytes */
+#define FTS_MAX_CONFIG_NAME_LEN 64
+
+/** The maximum length of the config table's value column in bytes */
+#define FTS_MAX_CONFIG_VALUE_LEN 1024
+
+/** Approx. upper limit of ilist length in bytes. */
+#define FTS_ILIST_MAX_SIZE (64 * 1024)
+
+/** FTS config table name parameters */
+
+/** The number of seconds after which an OPTIMIZE run will stop */
+#define FTS_OPTIMIZE_LIMIT_IN_SECS "optimize_checkpoint_limit"
+
+/** The next doc id */
+#define FTS_SYNCED_DOC_ID "synced_doc_id"
+
+/** The last word that was OPTIMIZED */
+#define FTS_LAST_OPTIMIZED_WORD "last_optimized_word"
+
+/** Total number of documents that have been deleted. The next_doc_id
+minus this count gives us the total number of documents. */
+#define FTS_TOTAL_DELETED_COUNT "deleted_doc_count"
+
+/** Total number of words parsed from all documents */
+#define FTS_TOTAL_WORD_COUNT "total_word_count"
+
+/** Start of optimize of an FTS index */
+#define FTS_OPTIMIZE_START_TIME "optimize_start_time"
+
+/** End of optimize for an FTS index */
+#define FTS_OPTIMIZE_END_TIME "optimize_end_time"
+
+/** User specified stopword table name */
+#define FTS_STOPWORD_TABLE_NAME "stopword_table_name"
+
+/** Whether to use (turn on/off) stopword */
+#define FTS_USE_STOPWORD "use_stopword"
+
+/** State of the FTS system for this table. It can be one of
+ RUNNING, OPTIMIZING, DELETED. */
+#define FTS_TABLE_STATE "table_state"
+
+/** The minimum length of an FTS auxiliary table names's id component
+e.g., For an auxiliary table name
+
+ FTS_<TABLE_ID>_SUFFIX
+
+This constant is for the minimum length required to store the <TABLE_ID>
+component.
+*/
+#define FTS_AUX_MIN_TABLE_ID_LENGTH 48
+
+/** Maximum length of an integer stored in the config table value column. */
+#define FTS_MAX_INT_LEN 32
+
+/******************************************************************//**
+Parse an SQL string. %s is replaced with the table's id.
+@return query graph */
+UNIV_INTERN
+que_t*
+fts_parse_sql(
+/*==========*/
+ fts_table_t* fts_table, /*!< in: FTS aux table */
+ pars_info_t* info, /*!< in: info struct, or NULL */
+ const char* sql) /*!< in: SQL string to evaluate */
+ __attribute__((nonnull(3), malloc, warn_unused_result));
+/******************************************************************//**
+Evaluate a parsed SQL statement
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_eval_sql(
+/*=========*/
+ trx_t* trx, /*!< in: transaction */
+ que_t* graph) /*!< in: Parsed statement */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Construct the name of an ancillary FTS table for the given table.
+@return own: table name, must be freed with mem_free() */
+UNIV_INTERN
+char*
+fts_get_table_name(
+/*===============*/
+ const fts_table_t*
+ fts_table) /*!< in: FTS aux table info */
+ __attribute__((nonnull, malloc, warn_unused_result));
+/******************************************************************//**
+Construct the column specification part of the SQL string for selecting the
+indexed FTS columns for the given table. Adds the necessary bound
+ids to the given 'info' and returns the SQL string. Examples:
+
+One indexed column named "text":
+
+ "$sel0",
+ info/ids: sel0 -> "text"
+
+Two indexed columns named "subject" and "content":
+
+ "$sel0, $sel1",
+ info/ids: sel0 -> "subject", sel1 -> "content",
+@return heap-allocated WHERE string */
+UNIV_INTERN
+const char*
+fts_get_select_columns_str(
+/*=======================*/
+ dict_index_t* index, /*!< in: FTS index */
+ pars_info_t* info, /*!< in/out: parser info */
+ mem_heap_t* heap) /*!< in: memory heap */
+ __attribute__((nonnull, warn_unused_result));
+
+/** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
+we want to get Doc whose ID is equal to or greater or smaller than supplied
+ID */
+#define FTS_FETCH_DOC_BY_ID_EQUAL 1
+#define FTS_FETCH_DOC_BY_ID_LARGE 2
+#define FTS_FETCH_DOC_BY_ID_SMALL 3
+
+/*************************************************************//**
+Fetch document (= a single row's indexed text) with the given
+document id.
+@return: DB_SUCCESS if fetch is successful, else error */
+UNIV_INTERN
+dberr_t
+fts_doc_fetch_by_doc_id(
+/*====================*/
+ fts_get_doc_t* get_doc, /*!< in: state */
+ doc_id_t doc_id, /*!< in: id of document to fetch */
+ dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
+ or NULL */
+ ulint option, /*!< in: search option, if it is
+ greater than doc_id or equal */
+ fts_sql_callback
+ callback, /*!< in: callback to read
+ records */
+ void* arg) /*!< in: callback arg */
+ __attribute__((nonnull(6)));
+
+/*******************************************************************//**
+Callback function for fetch that stores the text of an FTS document,
+converting each column to UTF-16.
+@return always FALSE */
+UNIV_INTERN
+ibool
+fts_query_expansion_fetch_doc(
+/*==========================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: fts_doc_t* */
+ __attribute__((nonnull));
+/********************************************************************
+Write out a single word's data as new entry/entries in the INDEX table.
+@return DB_SUCCESS if all OK. */
+UNIV_INTERN
+dberr_t
+fts_write_node(
+/*===========*/
+ trx_t* trx, /*!< in: transaction */
+ que_t** graph, /*!< in: query graph */
+ fts_table_t* fts_table, /*!< in: the FTS aux index */
+ fts_string_t* word, /*!< in: word in UTF-8 */
+ fts_node_t* node) /*!< in: node columns */
+ __attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Tokenize a document. */
+UNIV_INTERN
+void
+fts_tokenize_document(
+/*==================*/
+ fts_doc_t* doc, /*!< in/out: document to
+ tokenize */
+ fts_doc_t* result) /*!< out: if provided, save
+ result tokens here */
+ __attribute__((nonnull(1)));
+
+/*******************************************************************//**
+Continue to tokenize a document. */
+UNIV_INTERN
+void
+fts_tokenize_document_next(
+/*=======================*/
+ fts_doc_t* doc, /*!< in/out: document to
+ tokenize */
+ ulint add_pos, /*!< in: add this position to all
+ tokens from this tokenization */
+ fts_doc_t* result) /*!< out: if provided, save
+ result tokens here */
+ __attribute__((nonnull(1)));
+/******************************************************************//**
+Initialize a document. */
+UNIV_INTERN
+void
+fts_doc_init(
+/*=========*/
+ fts_doc_t* doc) /*!< in: doc to initialize */
+ __attribute__((nonnull));
+
+/******************************************************************//**
+Do a binary search for a doc id in the array
+@return +ve index if found -ve index where it should be
+ inserted if not found */
+UNIV_INTERN
+int
+fts_bsearch(
+/*========*/
+ fts_update_t* array, /*!< in: array to sort */
+ int lower, /*!< in: lower bound of array*/
+ int upper, /*!< in: upper bound of array*/
+ doc_id_t doc_id) /*!< in: doc id to lookup */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Free document. */
+UNIV_INTERN
+void
+fts_doc_free(
+/*=========*/
+ fts_doc_t* doc) /*!< in: document */
+ __attribute__((nonnull));
+/******************************************************************//**
+Free fts_optimizer_word_t instanace.*/
+UNIV_INTERN
+void
+fts_word_free(
+/*==========*/
+ fts_word_t* word) /*!< in: instance to free.*/
+ __attribute__((nonnull));
+/******************************************************************//**
+Read the rows from the FTS inde
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_index_fetch_nodes(
+/*==================*/
+ trx_t* trx, /*!< in: transaction */
+ que_t** graph, /*!< in: prepared statement */
+ fts_table_t* fts_table, /*!< in: FTS aux table */
+ const fts_string_t*
+ word, /*!< in: the word to fetch */
+ fts_fetch_t* fetch) /*!< in: fetch callback.*/
+ __attribute__((nonnull));
+/******************************************************************//**
+Create a fts_optimizer_word_t instance.
+@return new instance */
+UNIV_INTERN
+fts_word_t*
+fts_word_init(
+/*==========*/
+ fts_word_t* word, /*!< in: word to initialize */
+ byte* utf8, /*!< in: UTF-8 string */
+ ulint len) /*!< in: length of string in bytes */
+ __attribute__((nonnull));
+/******************************************************************//**
+Compare two fts_trx_table_t instances, we actually compare the
+table id's here.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_cmp(
+/*==============*/
+ const void* v1, /*!< in: id1 */
+ const void* v2) /*!< in: id2 */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Compare a table id with a trx_table_t table id.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_id_cmp(
+/*=================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Commit a transaction.
+@return DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+fts_sql_commit(
+/*===========*/
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull));
+/******************************************************************//**
+Rollback a transaction.
+@return DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+fts_sql_rollback(
+/*=============*/
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull));
+/******************************************************************//**
+Parse an SQL string. %s is replaced with the table's id. Don't acquire
+the dict mutex
+@return query graph */
+UNIV_INTERN
+que_t*
+fts_parse_sql_no_dict_lock(
+/*=======================*/
+ fts_table_t* fts_table, /*!< in: table with FTS index */
+ pars_info_t* info, /*!< in: parser info */
+ const char* sql) /*!< in: SQL string to evaluate */
+ __attribute__((nonnull(3), malloc, warn_unused_result));
+/******************************************************************//**
+Get value from config table. The caller must ensure that enough
+space is allocated for value to hold the column contents
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_get_value(
+/*=================*/
+ trx_t* trx, /* transaction */
+ fts_table_t* fts_table, /*!< in: the indexed FTS table */
+ const char* name, /*!< in: get config value for
+ this parameter name */
+ fts_string_t* value) /*!< out: value read from
+ config table */
+ __attribute__((nonnull));
+/******************************************************************//**
+Get value specific to an FTS index from the config table. The caller
+must ensure that enough space is allocated for value to hold the
+column contents.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_get_index_value(
+/*=======================*/
+ trx_t* trx, /*!< transaction */
+ dict_index_t* index, /*!< in: index */
+ const char* param, /*!< in: get config value for
+ this parameter name */
+ fts_string_t* value) /*!< out: value read from
+ config table */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Set the value in the config table for name.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_set_value(
+/*=================*/
+ trx_t* trx, /*!< transaction */
+ fts_table_t* fts_table, /*!< in: the indexed FTS table */
+ const char* name, /*!< in: get config value for
+ this parameter name */
+ const fts_string_t*
+ value) /*!< in: value to update */
+ __attribute__((nonnull));
+/****************************************************************//**
+Set an ulint value in the config table.
+@return DB_SUCCESS if all OK else error code */
+UNIV_INTERN
+dberr_t
+fts_config_set_ulint(
+/*=================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table, /*!< in: the indexed FTS table */
+ const char* name, /*!< in: param name */
+ ulint int_value) /*!< in: value */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Set the value specific to an FTS index in the config table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_set_index_value(
+/*=======================*/
+ trx_t* trx, /*!< transaction */
+ dict_index_t* index, /*!< in: index */
+ const char* param, /*!< in: get config value for
+ this parameter name */
+ fts_string_t* value) /*!< out: value read from
+ config table */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Increment the value in the config table for column name.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_increment_value(
+/*=======================*/
+ trx_t* trx, /*!< transaction */
+ fts_table_t* fts_table, /*!< in: the indexed FTS table */
+ const char* name, /*!< in: increment config value
+ for this parameter name */
+ ulint delta) /*!< in: increment by this much */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Increment the per index value in the config table for column name.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_increment_index_value(
+/*=============================*/
+ trx_t* trx, /*!< transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ const char* name, /*!< in: increment config value
+ for this parameter name */
+ ulint delta) /*!< in: increment by this much */
+ __attribute__((nonnull));
+/******************************************************************//**
+Get an ulint value from the config table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_get_index_ulint(
+/*=======================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ const char* name, /*!< in: param name */
+ ulint* int_value) /*!< out: value */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Set an ulint value int the config table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_set_index_ulint(
+/*=======================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ const char* name, /*!< in: param name */
+ ulint int_value) /*!< in: value */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Get an ulint value from the config table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_get_ulint(
+/*=================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table, /*!< in: the indexed FTS table */
+ const char* name, /*!< in: param name */
+ ulint* int_value) /*!< out: value */
+ __attribute__((nonnull));
+/******************************************************************//**
+Search cache for word.
+@return the word node vector if found else NULL */
+UNIV_INTERN
+const ib_vector_t*
+fts_cache_find_word(
+/*================*/
+ const fts_index_cache_t*
+ index_cache, /*!< in: cache to search */
+ const fts_string_t*
+ text) /*!< in: word to search for */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Check cache for deleted doc id.
+@return TRUE if deleted */
+UNIV_INTERN
+ibool
+fts_cache_is_deleted_doc_id(
+/*========================*/
+ const fts_cache_t*
+ cache, /*!< in: cache ito search */
+ doc_id_t doc_id) /*!< in: doc id to search for */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Append deleted doc ids to vector and sort the vector. */
+UNIV_INTERN
+void
+fts_cache_append_deleted_doc_ids(
+/*=============================*/
+ const fts_cache_t*
+ cache, /*!< in: cache to use */
+ ib_vector_t* vector); /*!< in: append to this vector */
+/******************************************************************//**
+Wait for the background thread to start. We poll to detect change
+of state, which is acceptable, since the wait should happen only
+once during startup.
+@return true if the thread started else FALSE (i.e timed out) */
+UNIV_INTERN
+ibool
+fts_wait_for_background_thread_to_start(
+/*====================================*/
+ dict_table_t* table, /*!< in: table to which the thread
+ is attached */
+ ulint max_wait); /*!< in: time in microseconds, if set
+ to 0 then it disables timeout
+ checking */
+#ifdef FTS_DOC_STATS_DEBUG
+/******************************************************************//**
+Get the total number of words in the FTS for a particular FTS index.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_get_total_word_count(
+/*=====================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: for this index */
+ ulint* total) /*!< out: total words */
+ __attribute__((nonnull, warn_unused_result));
+#endif
+/******************************************************************//**
+Search the index specific cache for a particular FTS index.
+@return the index specific cache else NULL */
+UNIV_INTERN
+fts_index_cache_t*
+fts_find_index_cache(
+/*================*/
+ const fts_cache_t*
+ cache, /*!< in: cache to search */
+ const dict_index_t*
+ index) /*!< in: index to search for */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Write the table id to the given buffer (including final NUL). Buffer must be
+at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
+@return number of bytes written */
+UNIV_INLINE
+int
+fts_write_object_id(
+/*================*/
+ ib_id_t id, /*!< in: a table/index id */
+ char* str) /*!< in: buffer to write the id to */
+ __attribute__((nonnull));
+/******************************************************************//**
+Read the table id from the string generated by fts_write_object_id().
+@return TRUE if parse successful */
+UNIV_INLINE
+ibool
+fts_read_object_id(
+/*===============*/
+ ib_id_t* id, /*!< out: a table id */
+ const char* str) /*!< in: buffer to read from */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Get the table id.
+@return number of bytes written */
+UNIV_INTERN
+int
+fts_get_table_id(
+/*=============*/
+ const fts_table_t*
+ fts_table, /*!< in: FTS Auxiliary table */
+ char* table_id) /*!< out: table id, must be at least
+ FTS_AUX_MIN_TABLE_ID_LENGTH bytes
+ long */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Add the table to add to the OPTIMIZER's list. */
+UNIV_INTERN
+void
+fts_optimize_add_table(
+/*===================*/
+ dict_table_t* table) /*!< in: table to add */
+ __attribute__((nonnull));
+/******************************************************************//**
+Optimize a table. */
+UNIV_INTERN
+void
+fts_optimize_do_table(
+/*==================*/
+ dict_table_t* table) /*!< in: table to optimize */
+ __attribute__((nonnull));
+/******************************************************************//**
+Construct the prefix name of an FTS table.
+@return own: table name, must be freed with mem_free() */
+UNIV_INTERN
+char*
+fts_get_table_name_prefix(
+/*======================*/
+ const fts_table_t*
+ fts_table) /*!< in: Auxiliary table type */
+ __attribute__((nonnull, malloc, warn_unused_result));
+/******************************************************************//**
+Add node positions. */
+UNIV_INTERN
+void
+fts_cache_node_add_positions(
+/*=========================*/
+ fts_cache_t* cache, /*!< in: cache */
+ fts_node_t* node, /*!< in: word node */
+ doc_id_t doc_id, /*!< in: doc id */
+ ib_vector_t* positions) /*!< in: fts_token_t::positions */
+ __attribute__((nonnull(2,4)));
+
+/******************************************************************//**
+Create the config table name for retrieving index specific value.
+@return index config parameter name */
+UNIV_INTERN
+char*
+fts_config_create_index_param_name(
+/*===============================*/
+ const char* param, /*!< in: base name of param */
+ const dict_index_t* index) /*!< in: index for config */
+ __attribute__((nonnull, malloc, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "fts0priv.ic"
+#endif
+
+#endif /* INNOBASE_FTS0PRIV_H */
diff --git a/storage/xtradb/include/fts0priv.ic b/storage/xtradb/include/fts0priv.ic
new file mode 100644
index 00000000000..268bb7e2227
--- /dev/null
+++ b/storage/xtradb/include/fts0priv.ic
@@ -0,0 +1,92 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0priv.ic
+Full text search internal header file
+
+Created 2011/11/12 Sunny Bains
+***********************************************************************/
+
+/******************************************************************//**
+Write the table id to the given buffer (including final NUL). Buffer must be
+at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
+@return number of bytes written */
+UNIV_INLINE
+int
+fts_write_object_id(
+/*================*/
+ ib_id_t id, /* in: a table/index id */
+ char* str) /* in: buffer to write the id to */
+{
+ // FIXME: Use ut_snprintf()
+ return(sprintf(str, UINT64PFx, id));
+}
+
+/******************************************************************//**
+Read the table id from the string generated by fts_write_object_id().
+@return TRUE if parse successful */
+UNIV_INLINE
+ibool
+fts_read_object_id(
+/*===============*/
+ ib_id_t* id, /* out: an id */
+ const char* str) /* in: buffer to read from */
+{
+ return(sscanf(str, UINT64PFx, id) == 1);
+}
+
+/******************************************************************//**
+Compare two fts_trx_table_t instances.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_cmp(
+/*==============*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
+ const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+ return((table1->id > table2->id)
+ ? 1
+ : (table1->id == table2->id)
+ ? 0
+ : -1);
+}
+
+/******************************************************************//**
+Compare a table id with a fts_trx_table_t table id.
+@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_id_cmp(
+/*=================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const ullint* table_id = (const ullint*) p1;
+ const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+ return((*table_id > table2->id)
+ ? 1
+ : (*table_id == table2->id)
+ ? 0
+ : -1);
+}
diff --git a/storage/xtradb/include/fts0tlex.h b/storage/xtradb/include/fts0tlex.h
new file mode 100644
index 00000000000..f91533803e8
--- /dev/null
+++ b/storage/xtradb/include/fts0tlex.h
@@ -0,0 +1,349 @@
+#ifndef fts0tHEADER_H
+#define fts0tHEADER_H 1
+#define fts0tIN_HEADER 1
+
+#line 6 "../include/fts0tlex.h"
+
+#line 8 "../include/fts0tlex.h"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 35
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else /* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif /* defined (__STDC__) */
+#endif /* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+
+/* For convenience, these vars (plus the bison vars far below)
+ are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ yy_size_t yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ int yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ int yy_bs_lineno; /**< The line count. */
+ int yy_bs_column; /**< The column count. */
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+
+ };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+void fts0trestart (FILE *input_file ,yyscan_t yyscanner );
+void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0t_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
+void fts0t_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void fts0t_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+void fts0tpop_buffer_state (yyscan_t yyscanner );
+
+YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
+
+void *fts0talloc (yy_size_t ,yyscan_t yyscanner );
+void *fts0trealloc (void *,yy_size_t ,yyscan_t yyscanner );
+void fts0tfree (void * ,yyscan_t yyscanner );
+
+/* Begin user sect3 */
+
+#define fts0twrap(n) 1
+#define YY_SKIP_YYWRAP
+
+#define yytext_ptr yytext_r
+
+#ifdef YY_HEADER_EXPORT_START_CONDITIONS
+#define INITIAL 0
+
+#endif
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+int fts0tlex_init (yyscan_t* scanner);
+
+int fts0tlex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
+
+/* Accessor methods to globals.
+ These are made visible to non-reentrant scanners for convenience. */
+
+int fts0tlex_destroy (yyscan_t yyscanner );
+
+int fts0tget_debug (yyscan_t yyscanner );
+
+void fts0tset_debug (int debug_flag ,yyscan_t yyscanner );
+
+YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner );
+
+void fts0tset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
+
+FILE *fts0tget_in (yyscan_t yyscanner );
+
+void fts0tset_in (FILE * in_str ,yyscan_t yyscanner );
+
+FILE *fts0tget_out (yyscan_t yyscanner );
+
+void fts0tset_out (FILE * out_str ,yyscan_t yyscanner );
+
+int fts0tget_leng (yyscan_t yyscanner );
+
+char *fts0tget_text (yyscan_t yyscanner );
+
+int fts0tget_lineno (yyscan_t yyscanner );
+
+void fts0tset_lineno (int line_number ,yyscan_t yyscanner );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int fts0twrap (yyscan_t yyscanner );
+#else
+extern int fts0twrap (yyscan_t yyscanner );
+#endif
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
+#endif
+
+#ifndef YY_NO_INPUT
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int fts0tlex (yyscan_t yyscanner);
+
+#define YY_DECL int fts0tlex (yyscan_t yyscanner)
+#endif /* !YY_DECL */
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+#undef YY_NEW_FILE
+#undef YY_FLUSH_BUFFER
+#undef yy_set_bol
+#undef yy_new_buffer
+#undef yy_set_interactive
+#undef YY_DO_BEFORE_ACTION
+
+#ifdef YY_DECL_IS_OURS
+#undef YY_DECL_IS_OURS
+#undef YY_DECL
+#endif
+
+#line 68 "fts0tlex.l"
+
+
+#line 348 "../include/fts0tlex.h"
+#undef fts0tIN_HEADER
+#endif /* fts0tHEADER_H */
diff --git a/storage/xtradb/include/fts0types.h b/storage/xtradb/include/fts0types.h
new file mode 100644
index 00000000000..b714d326487
--- /dev/null
+++ b/storage/xtradb/include/fts0types.h
@@ -0,0 +1,473 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0types.h
+Full text search types file
+
+Created 2007-03-27 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FTS0TYPES_H
+#define INNOBASE_FTS0TYPES_H
+
+#include "que0types.h"
+#include "ut0byte.h"
+#include "fut0fut.h"
+#include "ut0rbt.h"
+#include "fts0fts.h"
+
+/** Types used within FTS. */
+struct fts_que_t;
+struct fts_node_t;
+struct fts_utf8_str_t;
+
+/** Callbacks used within FTS. */
+typedef pars_user_func_cb_t fts_sql_callback;
+typedef void (*fts_filter)(void*, fts_node_t*, void*, ulint len);
+
+/** Statistics relevant to a particular document, used during retrieval. */
+struct fts_doc_stats_t {
+ doc_id_t doc_id; /*!< Document id */
+ ulint word_count; /*!< Total words in the document */
+};
+
+/** It's main purpose is to store the SQL prepared statements that
+are required to retrieve a document from the database. */
+struct fts_get_doc_t {
+ fts_index_cache_t*
+ index_cache; /*!< The index cache instance */
+
+ /*!< Parsed sql statement */
+ que_t* get_document_graph;
+ fts_cache_t* cache; /*!< The parent cache */
+};
+
+/** Since we can have multiple FTS indexes on a table, we keep a
+per index cache of words etc. */
+struct fts_index_cache_t {
+ dict_index_t* index; /*!< The FTS index instance */
+
+ ib_rbt_t* words; /*!< Nodes; indexed by fts_string_t*,
+ cells are fts_tokenizer_word_t*.*/
+
+ ib_vector_t* doc_stats; /*!< Array of the fts_doc_stats_t
+ contained in the memory buffer.
+ Must be in sorted order (ascending).
+ The ideal choice is an rb tree but
+ the rb tree imposes a space overhead
+ that we can do without */
+
+ que_t** ins_graph; /*!< Insert query graphs */
+
+ que_t** sel_graph; /*!< Select query graphs */
+ CHARSET_INFO* charset; /*!< charset */
+};
+
+/** For supporting the tracking of updates on multiple FTS indexes we need
+to track which FTS indexes need to be updated. For INSERT and DELETE we
+update all fts indexes. */
+struct fts_update_t {
+ doc_id_t doc_id; /*!< The doc id affected */
+
+ ib_vector_t* fts_indexes; /*!< The FTS indexes that need to be
+ updated. A NULL value means all
+ indexes need to be updated. This
+ vector is not allocated on the heap
+ and so must be freed explicitly,
+ when we are done with it */
+};
+
+/** Stop word control infotmation. */
+struct fts_stopword_t {
+ ulint status; /*!< Status of the stopword tree */
+ ib_alloc_t* heap; /*!< The memory allocator to use */
+ ib_rbt_t* cached_stopword;/*!< This stores all active stopwords */
+ CHARSET_INFO* charset; /*!< charset for stopword */
+};
+
+/** The SYNC state of the cache. There is one instance of this struct
+associated with each ADD thread. */
+struct fts_sync_t {
+ trx_t* trx; /*!< The transaction used for SYNCing
+ the cache to disk */
+ dict_table_t* table; /*!< Table with FTS index(es) */
+ ulint max_cache_size; /*!< Max size in bytes of the cache */
+ ibool cache_full; /*!< flag, when true it indicates that
+ we need to sync the cache to disk */
+ ulint lower_index; /*!< the start index of the doc id
+ vector from where to start adding
+ documents to the FTS cache */
+ ulint upper_index; /*!< max index of the doc id vector to
+ add to the FTS cache */
+ ibool interrupted; /*!< TRUE if SYNC was interrupted */
+ doc_id_t min_doc_id; /*!< The smallest doc id added to the
+ cache. It should equal to
+ doc_ids[lower_index] */
+ doc_id_t max_doc_id; /*!< The doc id at which the cache was
+ noted as being full, we use this to
+ set the upper_limit field */
+ ib_time_t start_time; /*!< SYNC start time */
+};
+
+/** The cache for the FTS system. It is a memory-based inverted index
+that new entries are added to, until it grows over the configured maximum
+size, at which time its contents are written to the INDEX table. */
+struct fts_cache_t {
+ rw_lock_t lock; /*!< lock protecting all access to the
+ memory buffer. FIXME: this needs to
+ be our new upgrade-capable rw-lock */
+
+ rw_lock_t init_lock; /*!< lock used for the cache
+ intialization, it has different
+ SYNC level as above cache lock */
+
+ ib_mutex_t optimize_lock; /*!< Lock for OPTIMIZE */
+
+ ib_mutex_t deleted_lock; /*!< Lock covering deleted_doc_ids */
+
+ ib_mutex_t doc_id_lock; /*!< Lock covering Doc ID */
+
+ ib_vector_t* deleted_doc_ids;/*!< Array of deleted doc ids, each
+ element is of type fts_update_t */
+
+ ib_vector_t* indexes; /*!< We store the stats and inverted
+ index for the individual FTS indexes
+ in this vector. Each element is
+ an instance of fts_index_cache_t */
+
+ ib_vector_t* get_docs; /*!< information required to read
+ the document from the table. Each
+ element is of type fts_doc_t */
+
+ ulint total_size; /*!< total size consumed by the ilist
+ field of all nodes. SYNC is run
+ whenever this gets too big */
+ fts_sync_t* sync; /*!< sync structure to sync data to
+ disk */
+ ib_alloc_t* sync_heap; /*!< The heap allocator, for indexes
+ and deleted_doc_ids, ie. transient
+ objects, they are recreated after
+ a SYNC is completed */
+
+
+ ib_alloc_t* self_heap; /*!< This heap is the heap out of
+ which an instance of the cache itself
+ was created. Objects created using
+ this heap will last for the lifetime
+ of the cache */
+
+ doc_id_t next_doc_id; /*!< Next doc id */
+
+ doc_id_t synced_doc_id; /*!< Doc ID sync-ed to CONFIG table */
+
+ doc_id_t first_doc_id; /*!< first doc id since this table
+ was opened */
+
+ ulint deleted; /*!< Number of doc ids deleted since
+ last optimized. This variable is
+ covered by deleted_lock */
+
+ ulint added; /*!< Number of doc ids added since last
+ optimized. This variable is covered by
+ the deleted lock */
+
+ fts_stopword_t stopword_info; /*!< Cached stopwords for the FTS */
+ mem_heap_t* cache_heap; /*!< Cache Heap */
+};
+
+/** Columns of the FTS auxiliary INDEX table */
+struct fts_node_t {
+ doc_id_t first_doc_id; /*!< First document id in ilist. */
+
+ doc_id_t last_doc_id; /*!< Last document id in ilist. */
+
+ byte* ilist; /*!< Binary list of documents & word
+ positions the token appears in.
+ TODO: For now, these are simply
+ ut_malloc'd, but if testing shows
+ that they waste memory unacceptably, a
+ special memory allocator will have
+ to be written */
+
+ ulint doc_count; /*!< Number of doc ids in ilist */
+
+ ulint ilist_size; /*!< Used size of ilist in bytes. */
+
+ ulint ilist_size_alloc;
+ /*!< Allocated size of ilist in
+ bytes */
+};
+
+/** A tokenizer word. Contains information about one word. */
+struct fts_tokenizer_word_t {
+ fts_string_t text; /*!< Token text. */
+
+ ib_vector_t* nodes; /*!< Word node ilists, each element is
+ of type fts_node_t */
+};
+
+/** Word text plus it's array of nodes as on disk in FTS index */
+struct fts_word_t {
+ fts_string_t text; /*!< Word value in UTF-8 */
+ ib_vector_t* nodes; /*!< Nodes read from disk */
+
+ ib_alloc_t* heap_alloc; /*!< For handling all allocations */
+};
+
+/** Callback for reading and filtering nodes that are read from FTS index */
+struct fts_fetch_t {
+ void* read_arg; /*!< Arg for the sql_callback */
+
+ fts_sql_callback
+ read_record; /*!< Callback for reading index
+ record */
+};
+
+/** For horizontally splitting an FTS auxiliary index */
+struct fts_index_selector_t {
+ ulint value; /*!< Character value at which
+ to split */
+
+ const char* suffix; /*!< FTS aux index suffix */
+};
+
+/** This type represents a single document. */
+struct fts_doc_t {
+ fts_string_t text; /*!< document text */
+
+ ibool found; /*!< TRUE if the document was found
+ successfully in the database */
+
+ ib_rbt_t* tokens; /*!< This is filled when the document
+ is tokenized. Tokens; indexed by
+ fts_string_t*, cells are of type
+ fts_token_t* */
+
+ ib_alloc_t* self_heap; /*!< An instance of this type is
+ allocated from this heap along
+ with any objects that have the
+ same lifespan, most notably
+ the vector of token positions */
+ CHARSET_INFO* charset; /*!< Document's charset info */
+};
+
+/** A token and its positions within a document. */
+struct fts_token_t {
+ fts_string_t text; /*!< token text */
+
+ ib_vector_t* positions; /*!< an array of the positions the
+ token is found in; each item is
+ actually an ulint. */
+};
+
+/** It's defined in fts/fts0fts.c */
+extern const fts_index_selector_t fts_index_selector[];
+
+/******************************************************************//**
+Compare two UTF-8 strings. */
+UNIV_INLINE
+int
+fts_utf8_string_cmp(
+/*================*/
+ /*!< out:
+ < 0 if n1 < n2,
+ 0 if n1 == n2,
+ > 0 if n1 > n2 */
+ const void* p1, /*!< in: key */
+ const void* p2); /*!< in: node */
+
+/******************************************************************//**
+Compare two UTF-8 strings, and return match (0) if
+passed in "key" value equals or is the prefix of the "node" value. */
+UNIV_INLINE
+int
+fts_utf8_string_cmp_prefix(
+/*=======================*/
+ /*!< out:
+ < 0 if n1 < n2,
+ 0 if n1 == n2,
+ > 0 if n1 > n2 */
+ const void* p1, /*!< in: key */
+ const void* p2); /*!< in: node */
+
+/******************************************************************//**
+Compare two fts_trx_row_t instances doc_ids. */
+UNIV_INLINE
+int
+fts_trx_row_doc_id_cmp(
+/*===================*/
+ /*!< out:
+ < 0 if n1 < n2,
+ 0 if n1 == n2,
+ > 0 if n1 > n2 */
+ const void* p1, /*!< in: id1 */
+ const void* p2); /*!< in: id2 */
+
+/******************************************************************//**
+Compare two fts_ranking_t instances doc_ids. */
+UNIV_INLINE
+int
+fts_ranking_doc_id_cmp(
+/*===================*/
+ /*!< out:
+ < 0 if n1 < n2,
+ 0 if n1 == n2,
+ > 0 if n1 > n2 */
+ const void* p1, /*!< in: id1 */
+ const void* p2); /*!< in: id2 */
+
+/******************************************************************//**
+Compare two fts_update_t instances doc_ids. */
+UNIV_INLINE
+int
+fts_update_doc_id_cmp(
+/*==================*/
+ /*!< out:
+ < 0 if n1 < n2,
+ 0 if n1 == n2,
+ > 0 if n1 > n2 */
+ const void* p1, /*!< in: id1 */
+ const void* p2); /*!< in: id2 */
+
+/******************************************************************//**
+Decode and return the integer that was encoded using our VLC scheme.*/
+UNIV_INLINE
+ulint
+fts_decode_vlc(
+/*===========*/
+ /*!< out: value decoded */
+ byte** ptr); /*!< in: ptr to decode from, this ptr is
+ incremented by the number of bytes decoded */
+
+/******************************************************************//**
+Duplicate an UTF-8 string. */
+UNIV_INLINE
+void
+fts_utf8_string_dup(
+/*================*/
+ /*!< out:
+ < 0 if n1 < n2,
+ 0 if n1 == n2,
+ > 0 if n1 > n2 */
+ fts_string_t* dst, /*!< in: dup to here */
+ const fts_string_t* src, /*!< in: src string */
+ mem_heap_t* heap); /*!< in: heap to use */
+
+/******************************************************************//**
+Return length of val if it were encoded using our VLC scheme. */
+UNIV_INLINE
+ulint
+fts_get_encoded_len(
+/*================*/
+ /*!< out: length of value
+ encoded, in bytes */
+ ulint val); /*!< in: value to encode */
+
+/******************************************************************//**
+Encode an integer using our VLC scheme and return the length in bytes. */
+UNIV_INLINE
+ulint
+fts_encode_int(
+/*===========*/
+ /*!< out: length of value
+ encoded, in bytes */
+ ulint val, /*!< in: value to encode */
+ byte* buf); /*!< in: buffer, must have
+ enough space */
+
+/******************************************************************//**
+Decode a UTF-8 character.
+
+http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
+
+ Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte
+00000000 0xxxxxxx 0xxxxxxx
+00000yyy yyxxxxxx 110yyyyy 10xxxxxx
+zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
+000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
+
+This function decodes UTF-8 sequences up to 6 bytes (31 bits).
+
+On error *ptr will point to the first byte that was not correctly
+decoded. This will hopefully help in resyncing the input. */
+UNIV_INLINE
+ulint
+fts_utf8_decode(
+/*============*/
+ /*!< out: UTF8_ERROR if *ptr
+ did not point to a valid
+ UTF-8 sequence, or the
+ Unicode code point. */
+ const byte** ptr); /*!< in/out: pointer to
+ UTF-8 string. The
+ pointer is advanced to
+ the start of the next
+ character. */
+
+/******************************************************************//**
+Lowercase an UTF-8 string. */
+UNIV_INLINE
+void
+fts_utf8_tolower(
+/*=============*/
+ fts_string_t* str); /*!< in: string */
+
+/******************************************************************//**
+Get the selected FTS aux INDEX suffix. */
+UNIV_INLINE
+const char*
+fts_get_suffix(
+/*===========*/
+ ulint selected); /*!< in: selected index */
+
+/********************************************************************
+Get the number of index selectors. */
+UNIV_INLINE
+ulint
+fts_get_n_selectors(void);
+/*=====================*/
+
+/******************************************************************//**
+Select the FTS auxiliary index for the given string.
+@return the index to use for the string */
+UNIV_INLINE
+ulint
+fts_select_index(
+/*=============*/
+ const CHARSET_INFO* cs, /*!< Charset */
+ const byte* str, /*!< in: word string */
+ ulint len); /*!< in: string length */
+
+/********************************************************************
+Select the next FTS auxiliary index for the given character.
+@return the next index to use for character */
+UNIV_INLINE
+ulint
+fts_select_next_index(
+/*==================*/
+ const CHARSET_INFO* cs, /*!< Charset */
+ const byte* str, /*!< in: string */
+ ulint len); /*!< in: string length */
+
+#ifndef UNIV_NONINL
+#include "fts0types.ic"
+#include "fts0vlc.ic"
+#endif
+
+#endif /* INNOBASE_FTS0TYPES_H */
diff --git a/storage/xtradb/include/fts0types.ic b/storage/xtradb/include/fts0types.ic
new file mode 100644
index 00000000000..f0dfd023a70
--- /dev/null
+++ b/storage/xtradb/include/fts0types.ic
@@ -0,0 +1,388 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0types.ic
+Full text search types.
+
+Created 2007-03-27 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FTS0TYPES_IC
+#define INNOBASE_FTS0TYPES_IC
+
+#include <ctype.h>
+
+#include "rem0cmp.h"
+#include "ha_prototypes.h"
+
+extern const ulint UTF8_ERROR;
+
+/* Determine if a UTF-8 continuation byte is valid. */
+#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
+
+/******************************************************************//**
+Duplicate an UTF-8 string.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+void
+fts_utf8_string_dup(
+/*================*/
+ fts_string_t* dst, /*!< in: dup to here */
+ const fts_string_t* src, /*!< in: src string */
+ mem_heap_t* heap) /*!< in: heap to use */
+{
+ dst->f_str = (byte*)mem_heap_alloc(heap, src->f_len + 1);
+ memcpy(dst->f_str, src->f_str, src->f_len);
+
+ dst->f_len = src->f_len;
+ dst->f_str[src->f_len] = 0;
+ dst->f_n_char = src->f_n_char;
+}
+
+/******************************************************************//**
+Compare two fts_trx_row_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_row_doc_id_cmp(
+/*===================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1;
+ const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2;
+
+ return((int)(tr1->doc_id - tr2->doc_id));
+}
+
+/******************************************************************//**
+Compare two fts_ranking_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_ranking_doc_id_cmp(
+/*===================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const fts_ranking_t* rk1 = (const fts_ranking_t*) p1;
+ const fts_ranking_t* rk2 = (const fts_ranking_t*) p2;
+
+ return((int)(rk1->doc_id - rk2->doc_id));
+}
+
+/******************************************************************//**
+Compare two fts_update_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_update_doc_id_cmp(
+/*==================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const fts_update_t* up1 = (const fts_update_t*) p1;
+ const fts_update_t* up2 = (const fts_update_t*) p2;
+
+ return((int)(up1->doc_id - up2->doc_id));
+}
+
+
+/******************************************************************//**
+Lowercase an UTF-8 string. */
+UNIV_INLINE
+void
+fts_utf8_tolower(
+/*=============*/
+ fts_string_t* str) /*!< in: string */
+{
+ innobase_casedn_str((char*) str->f_str);
+}
+
+/******************************************************************//**
+Compare two UTF-8 strings.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_utf8_string_cmp(
+/*================*/
+ const void* p1, /*!< in: key */
+ const void* p2) /*!< in: node */
+{
+ const fts_string_t* s1 = (const fts_string_t*) p1;
+ const fts_string_t* s2 = (const fts_string_t*) p2;
+
+ return(cmp_data_data_slow_varchar(
+ s1->f_str, s1->f_len, s2->f_str, s2->f_len));
+}
+
+/******************************************************************//**
+Compare two UTF-8 strings, and return match (0) if
+passed in "key" value equals or is the prefix of the "node" value.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_utf8_string_cmp_prefix(
+/*=======================*/
+ const void* p1, /*!< in: key */
+ const void* p2) /*!< in: node */
+{
+ int result;
+ ulint len;
+
+ const fts_string_t* s1 = (const fts_string_t*) p1;
+ const fts_string_t* s2 = (const fts_string_t*) p2;
+
+ len = ut_min(s1->f_len, s2->f_len);
+
+ result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len);
+
+ if (result) {
+ return(result);
+ }
+
+ if (s1->f_len > s2->f_len) {
+ return(1);
+ }
+
+ return(0);
+}
+
+/******************************************************************//**
+Decode a UTF-8 character.
+
+http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
+
+ Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte
+00000000 0xxxxxxx 0xxxxxxx
+00000yyy yyxxxxxx 110yyyyy 10xxxxxx
+zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
+000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
+
+This function decodes UTF-8 sequences up to 6 bytes (31 bits).
+
+On error *ptr will point to the first byte that was not correctly
+decoded. This will hopefully help in resyncing the input.
+@return UTF8_ERROR if *ptr did not point to a valid
+UTF-8 sequence, or the Unicode code point. */
+UNIV_INLINE
+ulint
+fts_utf8_decode(
+/*============*/
+ const byte** ptr) /*!< in/out: pointer to
+ UTF-8 string. The
+ pointer is advanced to
+ the start of the next
+ character. */
+{
+ const byte* p = *ptr;
+ ulint ch = *p++;
+#ifdef UNIV_DEBUG
+ ulint min_ch;
+#endif /* UNIV_DEBUG */
+
+ if (UNIV_LIKELY(ch < 0x80)) {
+ /* 0xxxxxxx */
+ } else if (UNIV_UNLIKELY(ch < 0xC0)) {
+ /* A continuation byte cannot start a code. */
+ goto err_exit;
+ } else if (ch < 0xE0) {
+ /* 110yyyyy 10xxxxxx */
+ ch &= 0x1F;
+ ut_d(min_ch = 0x80);
+ goto get1;
+ } else if (ch < 0xF0) {
+ /* 1110zzzz 10yyyyyy 10xxxxxx */
+ ch &= 0x0F;
+ ut_d(min_ch = 0x800);
+ goto get2;
+ } else if (ch < 0xF8) {
+ /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
+ ch &= 0x07;
+ ut_d(min_ch = 0x10000);
+ goto get3;
+ } else if (ch < 0xFC) {
+ /* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
+ ch &= 0x03;
+ ut_d(min_ch = 0x200000);
+ goto get4;
+ } else if (ch < 0xFE) {
+ /* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
+ ut_d(min_ch = 0x4000000);
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+get4:
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+get3:
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+get2:
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+get1:
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+
+ /* The following is needed in the 6-byte case
+ when ulint is wider than 32 bits. */
+ ch &= 0xFFFFFFFF;
+
+ /* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs)
+ and U+FFFE and U+FFFF cannot occur in valid UTF-8. */
+
+ if ( (ch >= 0xD800 && ch <= 0xDFFF)
+#ifdef UNIV_DEBUG
+ || ch < min_ch
+#endif /* UNIV_DEBUG */
+ || ch == 0xFFFE || ch == 0xFFFF) {
+
+ ch = UTF8_ERROR;
+ }
+ } else {
+err_exit:
+ ch = UTF8_ERROR;
+ }
+
+ *ptr = p;
+
+ return(ch);
+}
+
+/******************************************************************//**
+Get the first character's code position for FTS index partition */
+extern
+ulint
+innobase_strnxfrm(
+/*==============*/
+ const CHARSET_INFO* cs, /*!< in: Character set */
+ const uchar* p2, /*!< in: string */
+ const ulint len2); /*!< in: string length */
+
+/******************************************************************//**
+Select the FTS auxiliary index for the given character.
+@return the index to use for the string */
+UNIV_INLINE
+ulint
+fts_select_index(
+/*=============*/
+ const CHARSET_INFO* cs, /*!< in: Charset */
+ const byte* str, /*!< in: string */
+ ulint len) /*!< in: string length */
+{
+ ulint selected = 0;
+ ulint value = innobase_strnxfrm(cs, str, len);
+
+ while (fts_index_selector[selected].value != 0) {
+
+ if (fts_index_selector[selected].value == value) {
+
+ return(selected);
+
+ } else if (fts_index_selector[selected].value > value) {
+
+ return(selected > 0 ? selected - 1 : 0);
+ }
+
+ ++selected;
+ }
+
+ ut_ad(selected > 1);
+
+ return(selected - 1);
+}
+
+/******************************************************************//**
+Select the next FTS auxiliary index for the given character.
+@return the next index to use for character */
+UNIV_INLINE
+ulint
+fts_select_next_index(
+/*==================*/
+ const CHARSET_INFO* cs, /*!< in: Charset */
+ const byte* str, /*!< in: string */
+ ulint len) /*!< in: string length */
+{
+ ulint selected = 0;
+ ulint value = innobase_strnxfrm(cs, str, len);
+
+ while (fts_index_selector[selected].value != 0) {
+
+ if (fts_index_selector[selected].value == value) {
+
+ return(selected + 1);
+
+ } else if (fts_index_selector[selected].value > value) {
+
+ return(selected);
+ }
+
+ ++selected;
+ }
+
+ ut_ad(selected > 0);
+
+ return((ulint) selected);
+}
+
+/******************************************************************//**
+Return the selected FTS aux index suffix. */
+UNIV_INLINE
+const char*
+fts_get_suffix(
+/*===========*/
+ ulint selected) /*!< in: selected index */
+{
+ return(fts_index_selector[selected].suffix);
+}
+
+/******************************************************************//**
+Get the number of index selectors.
+@return The number of selectors */
+UNIV_INLINE
+ulint
+fts_get_n_selectors(void)
+/*=====================*/
+{
+ ulint i = 0;
+
+ // FIXME: This is a hack
+ while (fts_index_selector[i].value != 0) {
+ ++i;
+ }
+
+ return(i);
+}
+
+#endif /* INNOBASE_FTS0TYPES_IC */
diff --git a/storage/xtradb/include/fts0vlc.ic b/storage/xtradb/include/fts0vlc.ic
new file mode 100644
index 00000000000..e79bcf59347
--- /dev/null
+++ b/storage/xtradb/include/fts0vlc.ic
@@ -0,0 +1,142 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0vlc.ic
+Full text variable length integer encoding/decoding.
+
+Created 2007-03-27 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FTS0VLC_IC
+#define INNOBASE_FTS0VLC_IC
+
+#include "fts0types.h"
+
+/******************************************************************//**
+Return length of val if it were encoded using our VLC scheme.
+FIXME: We will need to be able encode 8 bytes value
+@return length of value encoded, in bytes */
+UNIV_INLINE
+ulint
+fts_get_encoded_len(
+/*================*/
+ ulint val) /* in: value to encode */
+{
+ if (val <= 127) {
+ return(1);
+ } else if (val <= 16383) {
+ return(2);
+ } else if (val <= 2097151) {
+ return(3);
+ } else if (val <= 268435455) {
+ return(4);
+ } else {
+ /* Possibly we should care that on 64-bit machines ulint can
+ contain values that we can't encode in 5 bytes, but
+ fts_encode_int doesn't handle them either so it doesn't much
+ matter. */
+
+ return(5);
+ }
+}
+
+/******************************************************************//**
+Encode an integer using our VLC scheme and return the length in bytes.
+@return length of value encoded, in bytes */
+UNIV_INLINE
+ulint
+fts_encode_int(
+/*===========*/
+ ulint val, /* in: value to encode */
+ byte* buf) /* in: buffer, must have enough space */
+{
+ ulint len;
+
+ if (val <= 127) {
+ *buf = (byte) val;
+
+ len = 1;
+ } else if (val <= 16383) {
+ *buf++ = (byte)(val >> 7);
+ *buf = (byte)(val & 0x7F);
+
+ len = 2;
+ } else if (val <= 2097151) {
+ *buf++ = (byte)(val >> 14);
+ *buf++ = (byte)((val >> 7) & 0x7F);
+ *buf = (byte)(val & 0x7F);
+
+ len = 3;
+ } else if (val <= 268435455) {
+ *buf++ = (byte)(val >> 21);
+ *buf++ = (byte)((val >> 14) & 0x7F);
+ *buf++ = (byte)((val >> 7) & 0x7F);
+ *buf = (byte)(val & 0x7F);
+
+ len = 4;
+ } else {
+ /* Best to keep the limitations of the 32/64 bit versions
+ identical, at least for the time being. */
+ ut_ad(val <= 4294967295u);
+
+ *buf++ = (byte)(val >> 28);
+ *buf++ = (byte)((val >> 21) & 0x7F);
+ *buf++ = (byte)((val >> 14) & 0x7F);
+ *buf++ = (byte)((val >> 7) & 0x7F);
+ *buf = (byte)(val & 0x7F);
+
+ len = 5;
+ }
+
+ /* High-bit on means "last byte in the encoded integer". */
+ *buf |= 0x80;
+
+ return(len);
+}
+
+/******************************************************************//**
+Decode and return the integer that was encoded using our VLC scheme.
+@return value decoded */
+UNIV_INLINE
+ulint
+fts_decode_vlc(
+/*===========*/
+ byte** ptr) /* in: ptr to decode from, this ptr is
+ incremented by the number of bytes decoded */
+{
+ ulint val = 0;
+
+ for (;;) {
+ byte b = **ptr;
+
+ ++*ptr;
+ val |= (b & 0x7F);
+
+ /* High-bit on means "last byte in the encoded integer". */
+ if (b & 0x80) {
+ break;
+ } else {
+ val <<= 7;
+ }
+ }
+
+ return(val);
+}
+
+#endif
diff --git a/storage/xtradb/include/fut0fut.h b/storage/xtradb/include/fut0fut.h
index 6a68bfffc72..851cdb44cdf 100644
--- a/storage/xtradb/include/fut0fut.h
+++ b/storage/xtradb/include/fut0fut.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/fut0fut.ic b/storage/xtradb/include/fut0fut.ic
index b881baff13c..15c964df6c7 100644
--- a/storage/xtradb/include/fut0fut.ic
+++ b/storage/xtradb/include/fut0fut.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/fut0lst.h b/storage/xtradb/include/fut0lst.h
index c75efd2aab2..90f9a65d4fa 100644
--- a/storage/xtradb/include/fut0lst.h
+++ b/storage/xtradb/include/fut0lst.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/fut0lst.ic b/storage/xtradb/include/fut0lst.ic
index 74d00dc488e..d18cf21378f 100644
--- a/storage/xtradb/include/fut0lst.ic
+++ b/storage/xtradb/include/fut0lst.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/ha0ha.h b/storage/xtradb/include/ha0ha.h
index afa9152a317..7351b407e8c 100644
--- a/storage/xtradb/include/ha0ha.h
+++ b/storage/xtradb/include/ha0ha.h
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -45,9 +45,10 @@ ha_search_and_get_data(
ulint fold); /*!< in: folded value of the searched data */
/*********************************************************//**
Looks for an element when we know the pointer to the data and updates
-the pointer to data if found. */
+the pointer to data if found.
+@return TRUE if found */
UNIV_INTERN
-void
+ibool
ha_search_and_update_if_found_func(
/*===============================*/
hash_table_t* table, /*!< in/out: hash table */
@@ -92,8 +93,12 @@ ha_create_func(
ulint mutex_level, /*!< in: level of the mutexes in the latching
order: this is used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes); /*!< in: number of mutexes to protect the
+ ulint n_mutexes, /*!< in: number of mutexes to protect the
hash table: must be a power of 2, or 0 */
+ ulint type); /*!< in: type of datastructure for which
+ the memory heap is going to be used e.g.:
+ MEM_HEAP_FOR_BTR_SEARCH or
+ MEM_HEAP_FOR_PAGE_HASH */
#ifdef UNIV_SYNC_DEBUG
/** Creates a hash table.
@return own: created table
@@ -102,7 +107,7 @@ chosen to be a slightly bigger prime number.
@param level in: level of the mutexes in the latching order
@param n_m in: number of mutexes to protect the hash table;
must be a power of 2, or 0 */
-# define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m)
+# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type)
#else /* UNIV_SYNC_DEBUG */
/** Creates a hash table.
@return own: created table
@@ -111,10 +116,18 @@ chosen to be a slightly bigger prime number.
@param level in: level of the mutexes in the latching order
@param n_m in: number of mutexes to protect the hash table;
must be a power of 2, or 0 */
-# define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m)
+# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type)
#endif /* UNIV_SYNC_DEBUG */
/*************************************************************//**
+Empties a hash table and frees the memory heaps. */
+UNIV_INTERN
+void
+ha_clear(
+/*=====*/
+ hash_table_t* table); /*!< in, own: hash table */
+
+/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted.
@@ -143,7 +156,10 @@ is inserted.
@param f in: folded value of data
@param b in: buffer block containing the data
@param d in: data, must not be NULL */
-# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d)
+# define ha_insert_for_fold(t,f,b,d) do { \
+ ha_insert_for_fold_func(t,f,b,d); \
+ MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \
+} while(0)
#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/**
Inserts an entry into a hash table. If an entry with the same fold number
@@ -154,7 +170,10 @@ is inserted.
@param f in: folded value of data
@param b ignored: buffer block containing the data
@param d in: data, must not be NULL */
-# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d)
+# define ha_insert_for_fold(t,f,b,d) do { \
+ ha_insert_for_fold_func(t,f,d); \
+ MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \
+} while (0)
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/*********************************************************//**
@@ -202,10 +221,7 @@ ha_print_info(
#endif /* !UNIV_HOTBACKUP */
/** The hash table external chain node */
-typedef struct ha_node_struct ha_node_t;
-
-/** The hash table external chain node */
-struct ha_node_struct {
+struct ha_node_t {
ha_node_t* next; /*!< next chain node or NULL if none */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* block; /*!< buffer block containing the data, or NULL */
@@ -214,20 +230,33 @@ struct ha_node_struct {
ulint fold; /*!< fold value for the data */
};
-#ifndef UNIV_HOTBACKUP
-/** Assert that the current thread is holding the mutex protecting a
-hash bucket corresponding to a fold value.
-@param table in: hash table
-@param fold in: fold value */
-# define ASSERT_HASH_MUTEX_OWN(table, fold) \
- ut_ad(!(table)->mutexes || mutex_own(hash_get_mutex(table, fold)))
-#else /* !UNIV_HOTBACKUP */
-/** Assert that the current thread is holding the mutex protecting a
-hash bucket corresponding to a fold value.
-@param table in: hash table
-@param fold in: fold value */
-# define ASSERT_HASH_MUTEX_OWN(table, fold) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Assert that the synchronization object in a hash operation involving
+possible change in the hash table is held.
+Note that in case of mutexes we assert that mutex is owned while in case
+of rw-locks we assert that it is held in exclusive mode. */
+UNIV_INLINE
+void
+hash_assert_can_modify(
+/*===================*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold value */
+/********************************************************************//**
+Assert that the synchronization object in a hash search operation is held.
+Note that in case of mutexes we assert that mutex is owned while in case
+of rw-locks we assert that it is held either in x-mode or s-mode. */
+UNIV_INLINE
+void
+hash_assert_can_search(
+/*===================*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold value */
+#else /* UNIV_DEBUG */
+#define hash_assert_can_modify(t, f)
+#define hash_assert_can_search(t, f)
+#endif /* UNIV_DEBUG */
+
#ifndef UNIV_NONINL
#include "ha0ha.ic"
diff --git a/storage/xtradb/include/ha0ha.ic b/storage/xtradb/include/ha0ha.ic
index 4c69fe63f91..9d0e396e200 100644
--- a/storage/xtradb/include/ha0ha.ic
+++ b/storage/xtradb/include/ha0ha.ic
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -106,6 +106,56 @@ ha_chain_get_first(
hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
}
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Assert that the synchronization object in a hash operation involving
+possible change in the hash table is held.
+Note that in case of mutexes we assert that mutex is owned while in case
+of rw-locks we assert that it is held in exclusive mode. */
+UNIV_INLINE
+void
+hash_assert_can_modify(
+/*===================*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold value */
+{
+ if (table->type == HASH_TABLE_SYNC_MUTEX) {
+ ut_ad(mutex_own(hash_get_mutex(table, fold)));
+ } else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
+# ifdef UNIV_SYNC_DEBUG
+ prio_rw_lock_t* lock = hash_get_lock(table, fold);
+ ut_ad(rw_lock_own(lock, RW_LOCK_EX));
+# endif
+ } else {
+ ut_ad(table->type == HASH_TABLE_SYNC_NONE);
+ }
+}
+
+/********************************************************************//**
+Assert that the synchronization object in a hash search operation is held.
+Note that in case of mutexes we assert that mutex is owned while in case
+of rw-locks we assert that it is held either in x-mode or s-mode. */
+UNIV_INLINE
+void
+hash_assert_can_search(
+/*===================*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold value */
+{
+ if (table->type == HASH_TABLE_SYNC_MUTEX) {
+ ut_ad(mutex_own(hash_get_mutex(table, fold)));
+ } else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
+# ifdef UNIV_SYNC_DEBUG
+ prio_rw_lock_t* lock = hash_get_lock(table, fold);
+ ut_ad(rw_lock_own(lock, RW_LOCK_EX)
+ || rw_lock_own(lock, RW_LOCK_SHARED));
+# endif
+ } else {
+ ut_ad(table->type == HASH_TABLE_SYNC_NONE);
+ }
+}
+#endif /* UNIV_DEBUG */
+
/*************************************************************//**
Looks for an element in a hash table.
@return pointer to the data of the first hash table node in chain
@@ -119,10 +169,7 @@ ha_search_and_get_data(
{
ha_node_t* node;
- ASSERT_HASH_MUTEX_OWN(table, fold);
-#ifdef UNIV_SYNC_DEBUG
-// ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ hash_assert_can_search(table, fold);
ut_ad(btr_search_enabled);
node = ha_chain_get_first(table, fold);
@@ -152,7 +199,7 @@ ha_search_with_data(
{
ha_node_t* node;
- ASSERT_HASH_MUTEX_OWN(table, fold);
+ hash_assert_can_search(table, fold);
ut_ad(btr_search_enabled);
@@ -184,10 +231,7 @@ ha_search_and_delete_if_found(
{
ha_node_t* node;
- ASSERT_HASH_MUTEX_OWN(table, fold);
-#ifdef UNIV_SYNC_DEBUG
-// ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ hash_assert_can_modify(table, fold);
ut_ad(btr_search_enabled);
node = ha_search_with_data(table, fold, data);
diff --git a/storage/xtradb/include/ha0storage.h b/storage/xtradb/include/ha0storage.h
index 8109646a8e9..0073930b502 100644
--- a/storage/xtradb/include/ha0storage.h
+++ b/storage/xtradb/include/ha0storage.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -39,7 +39,7 @@ constant per ha_storage's lifetime. */
#define HA_STORAGE_DEFAULT_HASH_CELLS 4096
/** Hash storage */
-typedef struct ha_storage_struct ha_storage_t;
+struct ha_storage_t;
/*******************************************************************//**
Creates a hash storage. If any of the parameters is 0, then a default
diff --git a/storage/xtradb/include/ha0storage.ic b/storage/xtradb/include/ha0storage.ic
index 86f2e578090..7150ca045ec 100644
--- a/storage/xtradb/include/ha0storage.ic
+++ b/storage/xtradb/include/ha0storage.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -31,7 +31,7 @@ Created September 24, 2007 Vasil Dimov
#include "mem0mem.h"
/** Hash storage for strings */
-struct ha_storage_struct {
+struct ha_storage_t {
mem_heap_t* heap; /*!< memory heap from which memory is
allocated */
hash_table_t* hash; /*!< hash table used to avoid
@@ -39,9 +39,7 @@ struct ha_storage_struct {
};
/** Objects of this type are stored in ha_storage_t */
-typedef struct ha_storage_node_struct ha_storage_node_t;
-/** Objects of this type are stored in ha_storage_struct */
-struct ha_storage_node_struct {
+struct ha_storage_node_t {
ulint data_len;/*!< length of the data */
const void* data; /*!< pointer to data */
ha_storage_node_t* next; /*!< next node in hash chain */
diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h
index c804e3dc7af..4599547439e 100644
--- a/storage/xtradb/include/ha_prototypes.h
+++ b/storage/xtradb/include/ha_prototypes.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,9 +27,21 @@ Created 5/11/2006 Osku Salerma
#ifndef HA_INNODB_PROTOTYPES_H
#define HA_INNODB_PROTOTYPES_H
+#include "my_dbug.h"
+#include "mysqld_error.h"
+#include "my_compare.h"
+#include "my_sys.h"
+#include "m_string.h"
+#include "debug_sync.h"
+#include "my_base.h"
+
#include "trx0types.h"
#include "m_ctype.h" /* CHARSET_INFO */
+// Forward declarations
+class Field;
+struct fts_string_t;
+
/*********************************************************************//**
Wrapper around MySQL's copy_and_convert function.
@return number of bytes copied to 'to' */
@@ -43,7 +55,8 @@ innobase_convert_string(
CHARSET_INFO* to_cs, /*!< in: character set to convert to */
const void* from, /*!< in: string to convert */
ulint from_length, /*!< in: number of bytes to convert */
- CHARSET_INFO* from_cs, /*!< in: character set to convert from */
+ CHARSET_INFO* from_cs, /*!< in: character set to convert
+ from */
uint* errors); /*!< out: number of errors encountered
during the conversion */
@@ -96,7 +109,7 @@ innobase_convert_name(
ulint buflen, /*!< in: length of buf, in bytes */
const char* id, /*!< in: identifier to convert */
ulint idlen, /*!< in: length of id, in bytes */
- void* thd, /*!< in: MySQL connection thread, or NULL */
+ THD* thd, /*!< in: MySQL connection thread, or NULL */
ibool table_id);/*!< in: TRUE=id is a table or database name;
FALSE=id is an index name */
@@ -111,7 +124,19 @@ UNIV_INTERN
ibool
thd_is_replication_slave_thread(
/*============================*/
- const void* thd); /*!< in: thread handle (THD*) */
+ THD* thd); /*!< in: thread handle */
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer.
+@return the durability property. */
+UNIV_INTERN
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+ const THD* thd) /*!< in: thread handle */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Returns true if the transaction this thread is processing has edited
@@ -123,7 +148,7 @@ UNIV_INTERN
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
- void* thd); /*!< in: thread handle (THD*) */
+ THD* thd); /*!< in: thread handle */
/*************************************************************//**
Prints info of a THD object (== user session thread) to the given file. */
@@ -132,7 +157,7 @@ void
innobase_mysql_print_thd(
/*=====================*/
FILE* f, /*!< in: output stream */
- void* thd, /*!< in: pointer to a MySQL THD object */
+ THD* thd, /*!< in: pointer to a MySQL THD object */
uint max_query_len); /*!< in: max query length to print, or 0 to
use the default max length */
@@ -147,6 +172,23 @@ innobase_mysql_log_notify(
ib_uint64_t write_lsn, /*!< in: LSN written to log file */
ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
+/*************************************************************//**
+InnoDB uses this function to compare two data fields for which the data type
+is such that we must use MySQL code to compare them.
+@return 1, 0, -1, if a is greater, equal, less than b, respectively */
+UNIV_INTERN
+int
+innobase_mysql_cmp(
+/*===============*/
+ int mysql_type, /*!< in: MySQL type */
+ uint charset_number, /*!< in: number of the charset */
+ const unsigned char* a, /*!< in: data field */
+ unsigned int a_length, /*!< in: data field length,
+ not UNIV_SQL_NULL */
+ const unsigned char* b, /*!< in: data field */
+ unsigned int b_length) /*!< in: data field length,
+ not UNIV_SQL_NULL */
+ __attribute__((nonnull, warn_unused_result));
/**************************************************************//**
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
@@ -185,6 +227,17 @@ innobase_strcasecmp(
const char* b); /*!< in: second string to compare */
/******************************************************************//**
+Compares NUL-terminated UTF-8 strings case insensitively. The
+second string contains wildcards.
+@return 0 if a match is found, 1 if not */
+UNIV_INTERN
+int
+innobase_wildcasecmp(
+/*=================*/
+ const char* a, /*!< in: string to compare */
+ const char* b); /*!< in: wildcard string to compare */
+
+/******************************************************************//**
Strip dir name from a full path name and return only its file name.
@return file name or "null" if no file name */
UNIV_INTERN
@@ -196,11 +249,11 @@ innobase_basename(
/******************************************************************//**
Returns true if the thread is executing a SELECT statement.
@return true if thd is executing SELECT */
-
+UNIV_INTERN
ibool
thd_is_select(
/*==========*/
- const void* thd); /*!< in: thread handle (THD*) */
+ const THD* thd); /*!< in: thread handle */
/******************************************************************//**
Converts an identifier to a table name. */
@@ -222,8 +275,8 @@ innobase_convert_from_id(
struct charset_info_st* cs, /*!< in: the 'from' character set */
char* to, /*!< out: converted identifier */
const char* from, /*!< in: identifier to convert */
- ulint len); /*!< in: length of 'to', in bytes; should
- be at least 3 * strlen(to) + 1 */
+ ulint len); /*!< in: length of 'to', in bytes;
+ should be at least 3 * strlen(to) + 1 */
/******************************************************************//**
Makes all characters in a NUL-terminated UTF-8 string lower case. */
UNIV_INTERN
@@ -239,7 +292,7 @@ UNIV_INTERN
struct charset_info_st*
innobase_get_charset(
/*=================*/
- void* mysql_thd); /*!< in: MySQL thread handle */
+ THD* thd); /*!< in: MySQL thread handle */
/**********************************************************************//**
Determines the current SQL statement.
@return SQL statement string */
@@ -247,7 +300,7 @@ UNIV_INTERN
const char*
innobase_get_stmt(
/*==============*/
- void* mysql_thd, /*!< in: MySQL thread handle */
+ THD* thd, /*!< in: MySQL thread handle */
size_t* length) /*!< out: length of the SQL statement */
__attribute__((nonnull));
/******************************************************************//**
@@ -270,8 +323,9 @@ innobase_get_at_most_n_mbchars(
/*************************************************************//**
InnoDB index push-down condition check
@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
+UNIV_INTERN
enum icp_result
-handler_index_cond_check(
+innobase_index_cond(
/*================*/
void* file) /*!< in/out: pointer to ha_innobase */
__attribute__((nonnull, warn_unused_result));
@@ -279,21 +333,21 @@ handler_index_cond_check(
Returns true if the thread supports XA,
global value of innodb_supports_xa if thd is NULL.
@return true if thd supports XA */
-
+UNIV_INTERN
ibool
thd_supports_xa(
/*============*/
- void* thd); /*!< in: thread handle (THD*), or NULL to query
+ THD* thd); /*!< in: thread handle, or NULL to query
the global innodb_supports_xa */
/******************************************************************//**
Returns the lock wait timeout for the current connection.
@return the lock wait timeout, in seconds */
-
+UNIV_INTERN
ulong
thd_lock_wait_timeout(
/*==================*/
- void* thd); /*!< in: thread handle (THD*), or NULL to query
+ THD* thd); /*!< in: thread handle, or NULL to query
the global innodb_lock_wait_timeout */
/******************************************************************//**
Add up the time waited for the lock for the current query. */
@@ -301,11 +355,21 @@ UNIV_INTERN
void
thd_set_lock_wait_time(
/*===================*/
- void* thd, /*!< in: thread handle (THD*) */
- ulint value); /*!< in: time waited for the lock */
-/******************************************************************//**
-*/
+ THD* thd, /*!< in/out: thread handle */
+ ulint value); /*!< in: time waited for the lock */
+/**********************************************************************//**
+Get the current setting of the table_cache_size global parameter. We do
+a dirty read because for one there is no synchronization object and
+secondly there is little harm in doing so even if we get a torn read.
+@return SQL statement string */
+UNIV_INTERN
+ulint
+innobase_get_table_cache_size(void);
+/*===============================*/
+
+/******************************************************************//**
+ */
ulong
thd_flush_log_at_trx_commit(
/*================================*/
@@ -322,17 +386,210 @@ ulint
innobase_get_lower_case_table_names(void);
/*=====================================*/
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return 0 or error number */
+UNIV_INTERN
+int
+innobase_close_thd(
+/*===============*/
+ THD* thd); /*!< in: MySQL thread handle for
+ which to close the connection */
+/*************************************************************//**
+Get the next token from the given string and store it in *token. */
+UNIV_INTERN
+ulint
+innobase_mysql_fts_get_token(
+/*=========================*/
+ CHARSET_INFO* charset, /*!< in: Character set */
+ const byte* start, /*!< in: start of text */
+ const byte* end, /*!< in: one character past end of
+ text */
+ fts_string_t* token, /*!< out: token's text */
+ ulint* offset); /*!< out: offset to token,
+ measured as characters from
+ 'start' */
-/********************************************************************//**
-Returns the merge-sort block size used for the secondary index creation
-for the current connection.
-@return the merge-sort block size, in bytes */
+/******************************************************************//**
+compare two character string case insensitively according to their charset. */
+UNIV_INTERN
+int
+innobase_fts_text_case_cmp(
+/*=======================*/
+ const void* cs, /*!< in: Character set */
+ const void* p1, /*!< in: key */
+ const void* p2); /*!< in: node */
-ulong
-thd_merge_sort_block_size(
-/*======================*/
- void* thd); /*!< in: thread handle (THD*), or NULL to query
- the global merge_sort_block_size */
+/******************************************************************//**
+compare two character string according to their charset. */
+UNIV_INTERN
+int
+innobase_fts_string_cmp(
+/*====================*/
+ const void* cs, /*!< in: Character set */
+ const void* p1, /*!< in: key */
+ const void* p2); /*!< in: node */
+
+/****************************************************************//**
+Get FTS field charset info from the field's prtype
+@return charset info */
+UNIV_INTERN
+CHARSET_INFO*
+innobase_get_fts_charset(
+/*=====================*/
+ int mysql_type, /*!< in: MySQL type */
+ uint charset_number);/*!< in: number of the charset */
+/******************************************************************//**
+Returns true if transaction should be flagged as read-only.
+@return true if the thd is marked as read-only */
+UNIV_INTERN
+ibool
+thd_trx_is_read_only(
+/*=================*/
+ THD* thd); /*!< in/out: thread handle */
+
+/******************************************************************//**
+Check if the transaction is an auto-commit transaction. TRUE also
+implies that it is a SELECT (read-only) transaction.
+@return true if the transaction is an auto commit read-only transaction. */
+UNIV_INTERN
+ibool
+thd_trx_is_auto_commit(
+/*===================*/
+ THD* thd); /*!< in: thread handle, or NULL */
+
+/*****************************************************************//**
+A wrapper function of innobase_convert_name(), convert a table or
+index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return pointer to the end of buf */
+UNIV_INTERN
+void
+innobase_format_name(
+/*==================*/
+ char* buf, /*!< out: buffer for converted
+ identifier */
+ ulint buflen, /*!< in: length of buf, in bytes */
+ const char* name, /*!< in: index or table name
+ to format */
+ ibool is_index_name) /*!< in: index name */
+ __attribute__((nonnull));
+
+/** Corresponds to Sql_condition:enum_warning_level. */
+enum ib_log_level_t {
+ IB_LOG_LEVEL_INFO,
+ IB_LOG_LEVEL_WARN,
+ IB_LOG_LEVEL_ERROR,
+ IB_LOG_LEVEL_FATAL
+};
+
+/******************************************************************//**
+Use this when the args are first converted to a formatted string and then
+passed to the format string from errmsg-utf8.txt. The error message format
+must be: "Some string ... %s".
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+ THD *thd, Sql_condition::enum_warning_level level,
+ uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_errf(
+/*====*/
+ THD* thd, /*!< in/out: session */
+ ib_log_level_t level, /*!< in: warning level */
+ ib_uint32_t code, /*!< MySQL error code */
+ const char* format, /*!< printf format */
+ ...) /*!< Args */
+ __attribute__((format(printf, 4, 5)));
+
+/******************************************************************//**
+Use this when the args are passed to the format string from
+errmsg-utf8.txt directly as is.
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+ THD *thd, Sql_condition::enum_warning_level level,
+ uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_senderrf(
+/*========*/
+ THD* thd, /*!< in/out: session */
+ ib_log_level_t level, /*!< in: warning level */
+ ib_uint32_t code, /*!< MySQL error code */
+ ...); /*!< Args */
+
+/******************************************************************//**
+Write a message to the MySQL log, prefixed with "InnoDB: ".
+Wrapper around sql_print_information() */
+UNIV_INTERN
+void
+ib_logf(
+/*====*/
+ ib_log_level_t level, /*!< in: warning level */
+ const char* format, /*!< printf format */
+ ...) /*!< Args */
+ __attribute__((format(printf, 2, 3)));
+
+/******************************************************************//**
+Returns the NUL terminated value of glob_hostname.
+@return pointer to glob_hostname. */
+UNIV_INTERN
+const char*
+server_get_hostname();
+/*=================*/
+
+/******************************************************************//**
+Get the error message format string.
+@return the format string or 0 if not found. */
+UNIV_INTERN
+const char*
+innobase_get_err_msg(
+/*=================*/
+ int error_code); /*!< in: MySQL error code */
+
+/*********************************************************************//**
+Compute the next autoinc value.
+
+For MySQL replication the autoincrement values can be partitioned among
+the nodes. The offset is the start or origin of the autoincrement value
+for a particular node. For n nodes the increment will be n and the offset
+will be in the interval [1, n]. The formula tries to allocate the next
+value for a particular node.
+
+Note: This function is also called with increment set to the number of
+values we want to reserve for multi-value inserts e.g.,
+
+ INSERT INTO T VALUES(), (), ();
+
+innobase_next_autoinc() will be called with increment set to 3 where
+autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
+the multi-value INSERT above.
+@return the next value */
+UNIV_INTERN
+ulonglong
+innobase_next_autoinc(
+/*==================*/
+ ulonglong current, /*!< in: Current value */
+ ulonglong need, /*!< in: count of values needed */
+ ulonglong step, /*!< in: AUTOINC increment step */
+ ulonglong offset, /*!< in: AUTOINC offset */
+ ulonglong max_value) /*!< in: max value for type */
+ __attribute__((pure, warn_unused_result));
+
+/********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+UNIV_INTERN
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+ const Field* field) /*!< in: MySQL field */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************************
Check if the length of the identifier exceeds the maximum allowed.
@@ -365,4 +622,4 @@ innobase_convert_to_filename_charset(
ulint len); /* in: length of 'to', in bytes */
-#endif
+#endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/xtradb/include/handler0alter.h b/storage/xtradb/include/handler0alter.h
index 3107fb32881..66b963ae39a 100644
--- a/storage/xtradb/include/handler0alter.h
+++ b/storage/xtradb/include/handler0alter.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,11 +27,34 @@ UNIV_INTERN
void
innobase_rec_to_mysql(
/*==================*/
- struct TABLE* table, /*!< in/out: MySQL table */
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets); /*!< in: rec_get_offsets(
- rec, index, ...) */
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: index */
+ const ulint* offsets)/*!< in: rec_get_offsets(
+ rec, index, ...) */
+ __attribute__((nonnull));
+
+/*************************************************************//**
+Copies an InnoDB index entry to table->record[0]. */
+UNIV_INTERN
+void
+innobase_fields_to_mysql(
+/*=====================*/
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const dict_index_t* index, /*!< in: InnoDB index */
+ const dfield_t* fields) /*!< in: InnoDB index fields */
+ __attribute__((nonnull));
+
+/*************************************************************//**
+Copies an InnoDB row to table->record[0]. */
+UNIV_INTERN
+void
+innobase_row_to_mysql(
+/*==================*/
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const dict_table_t* itab, /*!< in: InnoDB table */
+ const dtuple_t* row) /*!< in: InnoDB row */
+ __attribute__((nonnull));
/*************************************************************//**
Resets table->record[0]. */
@@ -39,4 +62,53 @@ UNIV_INTERN
void
innobase_rec_reset(
/*===============*/
- struct TABLE* table); /*!< in/out: MySQL table */
+ struct TABLE* table) /*!< in/out: MySQL table */
+ __attribute__((nonnull));
+
+/** Generate the next autoinc based on a snapshot of the session
+auto_increment_increment and auto_increment_offset variables. */
+struct ib_sequence_t {
+
+ /**
+ @param thd - the session
+ @param start_value - the lower bound
+ @param max_value - the upper bound (inclusive) */
+ ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value);
+
+ /**
+ Postfix increment
+ @return the value to insert */
+ ulonglong operator++(int) UNIV_NOTHROW;
+
+ /** Check if the autoinc "sequence" is exhausted.
+ @return true if the sequence is exhausted */
+ bool eof() const UNIV_NOTHROW
+ {
+ return(m_eof);
+ }
+
+ /**
+ @return the next value in the sequence */
+ ulonglong last() const UNIV_NOTHROW
+ {
+ ut_ad(m_next_value > 0);
+
+ return(m_next_value);
+ }
+
+ /** Maximum calumn value if adding an AUTOINC column else 0. Once
+ we reach the end of the sequence it will be set to ~0. */
+ const ulonglong m_max_value;
+
+ /** Value of auto_increment_increment */
+ ulong m_increment;
+
+ /** Value of auto_increment_offset */
+ ulong m_offset;
+
+ /** Next value in the sequence */
+ ulonglong m_next_value;
+
+ /** true if no more values left in the sequence */
+ bool m_eof;
+};
diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h
index 05b538ed5f5..a6fe4e680a1 100644
--- a/storage/xtradb/include/hash0hash.h
+++ b/storage/xtradb/include/hash0hash.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -30,16 +30,29 @@ Created 5/20/1997 Heikki Tuuri
#include "mem0mem.h"
#ifndef UNIV_HOTBACKUP
# include "sync0sync.h"
+# include "sync0rw.h"
#endif /* !UNIV_HOTBACKUP */
-typedef struct hash_table_struct hash_table_t;
-typedef struct hash_cell_struct hash_cell_t;
+struct hash_table_t;
+struct hash_cell_t;
typedef void* hash_node_t;
/* Fix Bug #13859: symbol collision between imap/mysql */
#define hash_create hash0_create
+/* Differnt types of hash_table based on the synchronization
+method used for it. */
+enum hash_table_sync_t {
+ HASH_TABLE_SYNC_NONE = 0, /*!< Don't use any internal
+ synchronization objects for
+ this hash_table. */
+ HASH_TABLE_SYNC_MUTEX, /*!< Use mutexes to control
+ access to this hash_table. */
+ HASH_TABLE_SYNC_RW_LOCK /*!< Use rw_locks to control
+ access to this hash_table. */
+};
+
/*************************************************************//**
Creates a hash table with >= n array cells. The actual number
of cells is chosen to be a prime number slightly bigger than n.
@@ -51,21 +64,29 @@ hash_create(
ulint n); /*!< in: number of array cells */
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
-Creates a mutex array to protect a hash table. */
+Creates a sync object array array to protect a hash table.
+::sync_obj can be mutexes or rw_locks depening on the type of
+hash table. */
UNIV_INTERN
void
-hash_create_mutexes_func(
-/*=====================*/
- hash_table_t* table, /*!< in: hash table */
+hash_create_sync_obj_func(
+/*======================*/
+ hash_table_t* table, /*!< in: hash table */
+ enum hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX
+ or HASH_TABLE_SYNC_RW_LOCK */
#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /*!< in: latching order level of the
- mutexes: used in the debug version */
+ ulint sync_level,/*!< in: latching order level
+ of the mutexes: used in the
+ debug version */
#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes); /*!< in: number of mutexes */
+ ulint n_sync_obj);/*!< in: number of sync objects,
+ must be a power of 2 */
#ifdef UNIV_SYNC_DEBUG
-# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n)
+# define hash_create_sync_obj(t, s, n, level) \
+ hash_create_sync_obj_func(t, s, level, n)
#else /* UNIV_SYNC_DEBUG */
-# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n)
+# define hash_create_sync_obj(t, s, n, level) \
+ hash_create_sync_obj_func(t, s, n)
#endif /* UNIV_SYNC_DEBUG */
#endif /* !UNIV_HOTBACKUP */
@@ -87,11 +108,12 @@ hash_calc_hash(
hash_table_t* table); /*!< in: hash table */
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
-Assert that the mutex for the table in a hash operation is owned. */
-# define HASH_ASSERT_OWNED(TABLE, FOLD) \
-ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
+Assert that the mutex for the table is held */
+# define HASH_ASSERT_OWN(TABLE, FOLD) \
+ ut_ad((TABLE)->type != HASH_TABLE_SYNC_MUTEX \
+ || (mutex_own(hash_get_mutex((TABLE), FOLD))));
#else /* !UNIV_HOTBACKUP */
-# define HASH_ASSERT_OWNED(TABLE, FOLD)
+# define HASH_ASSERT_OWN(TABLE, FOLD)
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
@@ -102,7 +124,7 @@ do {\
hash_cell_t* cell3333;\
TYPE* struct3333;\
\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
+ HASH_ASSERT_OWN(TABLE, FOLD)\
\
(DATA)->NAME = NULL;\
\
@@ -124,7 +146,7 @@ do {\
#ifdef UNIV_HASH_DEBUG
# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
-# define HASH_INVALIDATE(DATA, NAME) DATA->NAME = (void*) -1
+# define HASH_INVALIDATE(DATA, NAME) *(void**) (&DATA->NAME) = (void*) -1
#else
# define HASH_ASSERT_VALID(DATA) do {} while (0)
# define HASH_INVALIDATE(DATA, NAME) do {} while (0)
@@ -138,7 +160,7 @@ do {\
hash_cell_t* cell3333;\
TYPE* struct3333;\
\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
+ HASH_ASSERT_OWN(TABLE, FOLD)\
\
cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
\
@@ -175,7 +197,7 @@ Looks for a struct in a hash table. */
#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\
{\
\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
+ HASH_ASSERT_OWN(TABLE, FOLD)\
\
(DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
HASH_ASSERT_VALID(DATA);\
@@ -259,7 +281,7 @@ do {\
\
HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
\
- top_node111 = (TYPE*)mem_heap_get_top(\
+ top_node111 = (TYPE*) mem_heap_get_top(\
hash_get_heap(TABLE, fold111),\
sizeof(TYPE));\
\
@@ -284,11 +306,12 @@ do {\
} else {\
/* We have to look for the predecessor of the top\
node */\
- node111 = cell111->node;\
+ node111 = static_cast<TYPE*>(cell111->node);\
\
while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
\
- node111 = HASH_GET_NEXT(NAME, node111);\
+ node111 = static_cast<TYPE*>(\
+ HASH_GET_NEXT(NAME, node111));\
}\
\
/* Now we have the predecessor node */\
@@ -329,12 +352,12 @@ do {\
} while (0)
/************************************************************//**
-Gets the mutex index for a fold value in a hash table.
-@return mutex number */
+Gets the sync object index for a fold value in a hash table.
+@return index */
UNIV_INLINE
ulint
-hash_get_mutex_no(
-/*==============*/
+hash_get_sync_obj_index(
+/*====================*/
hash_table_t* table, /*!< in: hash table */
ulint fold); /*!< in: fold */
/************************************************************//**
@@ -359,21 +382,39 @@ hash_get_heap(
Gets the nth mutex in a hash table.
@return mutex */
UNIV_INLINE
-mutex_t*
+ib_prio_mutex_t*
hash_get_nth_mutex(
/*===============*/
hash_table_t* table, /*!< in: hash table */
ulint i); /*!< in: index of the mutex */
/************************************************************//**
+Gets the nth rw_lock in a hash table.
+@return rw_lock */
+UNIV_INLINE
+prio_rw_lock_t*
+hash_get_nth_lock(
+/*==============*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint i); /*!< in: index of the rw_lock */
+/************************************************************//**
Gets the mutex for a fold value in a hash table.
@return mutex */
UNIV_INLINE
-mutex_t*
+ib_prio_mutex_t*
hash_get_mutex(
/*===========*/
hash_table_t* table, /*!< in: hash table */
ulint fold); /*!< in: fold */
/************************************************************//**
+Gets the rw_lock for a fold value in a hash table.
+@return rw_lock */
+UNIV_INLINE
+prio_rw_lock_t*
+hash_get_lock(
+/*==========*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
Reserves the mutex for a fold value in a hash table. */
UNIV_INTERN
void
@@ -403,39 +444,127 @@ void
hash_mutex_exit_all(
/*================*/
hash_table_t* table); /*!< in: hash table */
+/************************************************************//**
+Releases all but the passed in mutex of a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit_all_but(
+/*====================*/
+ hash_table_t* table, /*!< in: hash table */
+ ib_prio_mutex_t* keep_mutex); /*!< in: mutex to keep */
+/************************************************************//**
+s-lock a lock for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_lock_s(
+/*========*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
+x-lock a lock for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_lock_x(
+/*========*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
+unlock an s-lock for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_unlock_s(
+/*==========*/
+
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
+unlock x-lock for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_unlock_x(
+/*==========*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
+Reserves all the locks of a hash table, in an ascending order. */
+UNIV_INTERN
+void
+hash_lock_x_all(
+/*============*/
+ hash_table_t* table); /*!< in: hash table */
+/************************************************************//**
+Releases all the locks of a hash table, in an ascending order. */
+UNIV_INTERN
+void
+hash_unlock_x_all(
+/*==============*/
+ hash_table_t* table); /*!< in: hash table */
+/************************************************************//**
+Releases all but passed in lock of a hash table, */
+UNIV_INTERN
+void
+hash_unlock_x_all_but(
+/*==================*/
+ hash_table_t* table, /*!< in: hash table */
+ prio_rw_lock_t* keep_lock); /*!< in: lock to keep */
+
#else /* !UNIV_HOTBACKUP */
# define hash_get_heap(table, fold) ((table)->heap)
# define hash_mutex_enter(table, fold) ((void) 0)
# define hash_mutex_exit(table, fold) ((void) 0)
+# define hash_mutex_enter_all(table) ((void) 0)
+# define hash_mutex_exit_all(table) ((void) 0)
+# define hash_mutex_exit_all_but(t, m) ((void) 0)
+# define hash_lock_s(t, f) ((void) 0)
+# define hash_lock_x(t, f) ((void) 0)
+# define hash_unlock_s(t, f) ((void) 0)
+# define hash_unlock_x(t, f) ((void) 0)
+# define hash_lock_x_all(t) ((void) 0)
+# define hash_unlock_x_all(t) ((void) 0)
+# define hash_unlock_x_all_but(t, l) ((void) 0)
#endif /* !UNIV_HOTBACKUP */
-struct hash_cell_struct{
+struct hash_cell_t{
void* node; /*!< hash chain node, NULL if none */
};
/* The hash table structure */
-struct hash_table_struct {
+struct hash_table_t {
+ enum hash_table_sync_t type; /*<! type of hash_table. */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
- ibool adaptive;/* TRUE if this is the hash table of the
- adaptive hash index */
+ ibool adaptive;/* TRUE if this is the hash
+ table of the adaptive hash
+ index */
# endif /* !UNIV_HOTBACKUP */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- ulint n_cells;/* number of cells in the hash table */
- hash_cell_t* array; /*!< pointer to cell array */
+ ulint n_cells;/* number of cells in the hash table */
+ hash_cell_t* array; /*!< pointer to cell array */
#ifndef UNIV_HOTBACKUP
- ulint n_mutexes;/* if mutexes != NULL, then the number of
- mutexes, must be a power of 2 */
- mutex_t* mutexes;/* NULL, or an array of mutexes used to
- protect segments of the hash table */
- mem_heap_t** heaps; /*!< if this is non-NULL, hash chain nodes for
- external chaining can be allocated from these
- memory heaps; there are then n_mutexes many of
- these heaps */
+ ulint n_sync_obj;/* if sync_objs != NULL, then
+ the number of either the number
+ of mutexes or the number of
+ rw_locks depending on the type.
+ Must be a power of 2 */
+ union {
+ ib_prio_mutex_t* mutexes;
+ /* NULL, or an array of mutexes
+ used to protect segments of the
+ hash table */
+ prio_rw_lock_t* rw_locks;/* NULL, or an array of rw_lcoks
+ used to protect segments of the
+ hash table */
+ } sync_obj;
+
+ mem_heap_t** heaps; /*!< if this is non-NULL, hash
+ chain nodes for external chaining
+ can be allocated from these memory
+ heaps; there are then n_mutexes
+ many of these heaps */
#endif /* !UNIV_HOTBACKUP */
- mem_heap_t* heap;
+ mem_heap_t* heap;
#ifdef UNIV_DEBUG
- ulint magic_n;
+ ulint magic_n;
# define HASH_TABLE_MAGIC_N 76561114
#endif /* UNIV_DEBUG */
};
diff --git a/storage/xtradb/include/hash0hash.ic b/storage/xtradb/include/hash0hash.ic
index 2c708cc594b..e4822538e19 100644
--- a/storage/xtradb/include/hash0hash.ic
+++ b/storage/xtradb/include/hash0hash.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -87,20 +87,21 @@ hash_calc_hash(
#ifndef UNIV_HOTBACKUP
/************************************************************//**
-Gets the mutex index for a fold value in a hash table.
-@return mutex number */
+Gets the sync object index for a fold value in a hash table.
+@return index */
UNIV_INLINE
ulint
-hash_get_mutex_no(
-/*==============*/
+hash_get_sync_obj_index(
+/*====================*/
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: fold */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_ad(ut_is_2pow(table->n_mutexes));
+ ut_ad(table->type != HASH_TABLE_SYNC_NONE);
+ ut_ad(ut_is_2pow(table->n_sync_obj));
return(ut_2pow_remainder(hash_calc_hash(fold, table),
- table->n_mutexes));
+ table->n_sync_obj));
}
/************************************************************//**
@@ -115,7 +116,8 @@ hash_get_nth_heap(
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_ad(i < table->n_mutexes);
+ ut_ad(table->type != HASH_TABLE_SYNC_NONE);
+ ut_ad(i < table->n_sync_obj);
return(table->heaps[i]);
}
@@ -139,7 +141,7 @@ hash_get_heap(
return(table->heap);
}
- i = hash_get_mutex_no(table, fold);
+ i = hash_get_sync_obj_index(table, fold);
return(hash_get_nth_heap(table, i));
}
@@ -148,7 +150,7 @@ hash_get_heap(
Gets the nth mutex in a hash table.
@return mutex */
UNIV_INLINE
-mutex_t*
+ib_prio_mutex_t*
hash_get_nth_mutex(
/*===============*/
hash_table_t* table, /*!< in: hash table */
@@ -156,16 +158,17 @@ hash_get_nth_mutex(
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_ad(i < table->n_mutexes);
+ ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
+ ut_ad(i < table->n_sync_obj);
- return(table->mutexes + i);
+ return(table->sync_obj.mutexes + i);
}
/************************************************************//**
Gets the mutex for a fold value in a hash table.
@return mutex */
UNIV_INLINE
-mutex_t*
+ib_prio_mutex_t*
hash_get_mutex(
/*===========*/
hash_table_t* table, /*!< in: hash table */
@@ -176,8 +179,47 @@ hash_get_mutex(
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- i = hash_get_mutex_no(table, fold);
+ i = hash_get_sync_obj_index(table, fold);
return(hash_get_nth_mutex(table, i));
}
+
+/************************************************************//**
+Gets the nth rw_lock in a hash table.
+@return rw_lock */
+UNIV_INLINE
+prio_rw_lock_t*
+hash_get_nth_lock(
+/*==============*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint i) /*!< in: index of the rw_lock */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+ ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
+ ut_ad(i < table->n_sync_obj);
+
+ return(table->sync_obj.rw_locks + i);
+}
+
+/************************************************************//**
+Gets the rw_lock for a fold value in a hash table.
+@return rw_lock */
+UNIV_INLINE
+prio_rw_lock_t*
+hash_get_lock(
+/*==========*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold */
+{
+ ulint i;
+
+ ut_ad(table);
+ ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+
+ i = hash_get_sync_obj_index(table, fold);
+
+ return(hash_get_nth_lock(table, i));
+}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/ibuf0ibuf.h b/storage/xtradb/include/ibuf0ibuf.h
index 03ea0629af4..f2e1c80878e 100644
--- a/storage/xtradb/include/ibuf0ibuf.h
+++ b/storage/xtradb/include/ibuf0ibuf.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -35,6 +35,10 @@ Created 7/19/1997 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
# include "ibuf0types.h"
+/** Default value for maximum on-disk size of change buffer in terms
+of percentage of the buffer pool. */
+#define CHANGE_BUFFER_DEFAULT_SIZE (25)
+
/* Possible operations buffered in the insert/whatever buffer. See
ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
typedef enum {
@@ -98,6 +102,14 @@ void
ibuf_init_at_db_start(void);
/*=======================*/
/*********************************************************************//**
+Updates the max_size value for ibuf. */
+UNIV_INTERN
+void
+ibuf_max_size_update(
+/*=================*/
+ ulint new_val); /*!< in: new value in terms of
+ percentage of the buffer pool size */
+/*********************************************************************//**
Reads the biggest tablespace id from the high end of the insert buffer
tree and updates the counter in fil_system. */
UNIV_INTERN
@@ -364,26 +376,16 @@ will be merged from ibuf trees to the pages read, 0 if ibuf is
empty */
UNIV_INTERN
ulint
-ibuf_contract(
-/*==========*/
- ibool sync); /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
-ibuf_contract_for_n_pages(
-/*======================*/
- ibool sync, /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
- ulint n_pages);/*!< in: try to read at least this many pages to
- the buffer pool and merge the ibuf contents to
- them */
+ibuf_contract_in_background(
+/*========================*/
+ table_id_t table_id, /*!< in: if merge should be done only
+ for a specific table, for all tables
+ this should be 0 */
+ ibool full); /*!< in: TRUE if the caller wants to
+ do a full contract based on PCT_IO(100).
+ If FALSE then the size of contract
+ batch is determined based on the
+ current size of the ibuf tree. */
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Parses a redo log record of an ibuf bitmap page init.
@@ -411,9 +413,9 @@ ibuf_count_get(
#endif
/******************************************************************//**
Looks if the insert buffer is empty.
-@return TRUE if empty */
+@return true if empty */
UNIV_INTERN
-ibool
+bool
ibuf_is_empty(void);
/*===============*/
/******************************************************************//**
@@ -455,6 +457,17 @@ ibuf_export_ibuf_status(
ulint* discarded_delete_marks,
ulint* discarded_deletes);
+/******************************************************************//**
+Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+ibuf_check_bitmap_on_import(
+/*========================*/
+ const trx_t* trx, /*!< in: transaction */
+ ulint space_id) /*!< in: tablespace identifier */
+ __attribute__((nonnull, warn_unused_result));
+
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
diff --git a/storage/xtradb/include/ibuf0ibuf.ic b/storage/xtradb/include/ibuf0ibuf.ic
index 043d7c472d8..21747fdceac 100644
--- a/storage/xtradb/include/ibuf0ibuf.ic
+++ b/storage/xtradb/include/ibuf0ibuf.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -28,9 +28,6 @@ Created 7/19/1997 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "buf0lru.h"
-/** Counter for ibuf_should_try() */
-extern ulint ibuf_flush_count;
-
/** An index page must contain at least UNIV_PAGE_SIZE /
IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
buffer inserts to this page. If there is this much of free space, the
@@ -62,7 +59,7 @@ ibuf_mtr_commit(
}
/** Insert buffer struct */
-struct ibuf_struct{
+struct ibuf_t{
ulint size; /*!< current size of the ibuf index
tree, in pages */
ulint max_size; /*!< recommended maximum size of the
@@ -70,10 +67,10 @@ struct ibuf_struct{
ulint seg_size; /*!< allocated pages of the file
segment containing ibuf header and
tree */
- ibool empty; /*!< Protected by the page
+ bool empty; /*!< Protected by the page
latch of the root page of the
insert buffer tree
- (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
+ (FSP_IBUF_TREE_ROOT_PAGE_NO). true
if and only if the insert
buffer tree is empty. */
ulint free_list_len; /*!< length of the free list */
@@ -127,21 +124,11 @@ ibuf_should_try(
a secondary index when we
decide */
{
- if (ibuf_use != IBUF_USE_NONE
- && !dict_index_is_clust(index)
- && (ignore_sec_unique || !dict_index_is_unique(index))) {
-
- ibuf_flush_count++;
-
- if (ibuf_flush_count % 4 == 0) {
-
- buf_LRU_try_free_flushed_blocks(NULL);
- }
-
- return(TRUE);
- }
-
- return(FALSE);
+ return(ibuf_use != IBUF_USE_NONE
+ && ibuf->max_size != 0
+ && !dict_index_is_clust(index)
+ && index->table->quiesce == QUIESCE_NONE
+ && (ignore_sec_unique || !dict_index_is_unique(index)));
}
/******************************************************************//**
@@ -174,12 +161,11 @@ ibuf_bitmap_page(
ut_ad(ut_is_2pow(zip_size));
if (!zip_size) {
- return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
- == FSP_IBUF_BITMAP_OFFSET));
+ return((page_no & (UNIV_PAGE_SIZE - 1))
+ == FSP_IBUF_BITMAP_OFFSET);
}
- return(UNIV_UNLIKELY((page_no & (zip_size - 1))
- == FSP_IBUF_BITMAP_OFFSET));
+ return((page_no & (zip_size - 1)) == FSP_IBUF_BITMAP_OFFSET);
}
/*********************************************************************//**
@@ -197,7 +183,7 @@ ibuf_index_page_calc_free_bits(
ulint n;
ut_ad(ut_is_2pow(zip_size));
ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
+ ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
if (zip_size) {
n = max_ins_size
@@ -232,7 +218,7 @@ ibuf_index_page_calc_free_from_bits(
ut_ad(bits < 4);
ut_ad(ut_is_2pow(zip_size));
ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
+ ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
if (zip_size) {
if (bits == 3) {
@@ -267,16 +253,24 @@ ibuf_index_page_calc_free_zip(
ut_ad(zip_size == buf_block_get_zip_size(block));
ut_ad(zip_size);
- max_ins_size = page_get_max_insert_size_after_reorganize(
+ /* Consider the maximum insert size on the uncompressed page
+ without reorganizing the page. We must not assume anything
+ about the compression ratio. If zip_max_ins > max_ins_size and
+ there is 1/4 garbage on the page, recompression after the
+ reorganize could fail, in theory. So, let us guarantee that
+ merging a buffered insert to a compressed page will always
+ succeed without reorganizing or recompressing the page, just
+ by using the page modification log. */
+ max_ins_size = page_get_max_insert_size(
buf_block_get_frame(block), 1);
page_zip = buf_block_get_page_zip(block);
zip_max_ins = page_zip_max_ins_size(page_zip,
FALSE/* not clustered */);
- if (UNIV_UNLIKELY(zip_max_ins < 0)) {
+ if (zip_max_ins < 0) {
return(0);
- } else if (UNIV_LIKELY(max_ins_size > (ulint) zip_max_ins)) {
+ } else if (max_ins_size > (ulint) zip_max_ins) {
max_ins_size = (ulint) zip_max_ins;
}
@@ -345,8 +339,8 @@ ibuf_update_free_bits_if_full(
before = ibuf_index_page_calc_free_bits(0, max_ins_size);
if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
+#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
+# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
#endif
after = ibuf_index_page_calc_free_bits(0, max_ins_size
- increase);
diff --git a/storage/xtradb/include/ibuf0types.h b/storage/xtradb/include/ibuf0types.h
index d3e6f9299da..3fdbf078b0b 100644
--- a/storage/xtradb/include/ibuf0types.h
+++ b/storage/xtradb/include/ibuf0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,6 +26,6 @@ Created 7/29/1997 Heikki Tuuri
#ifndef ibuf0types_h
#define ibuf0types_h
-typedef struct ibuf_struct ibuf_t;
+struct ibuf_t;
#endif
diff --git a/storage/xtradb/include/lock0iter.h b/storage/xtradb/include/lock0iter.h
index ce6f28dc514..0054850b526 100644
--- a/storage/xtradb/include/lock0iter.h
+++ b/storage/xtradb/include/lock0iter.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -29,13 +29,13 @@ Created July 16, 2007 Vasil Dimov
#include "univ.i"
#include "lock0types.h"
-typedef struct lock_queue_iterator_struct {
+struct lock_queue_iterator_t {
const lock_t* current_lock;
/* In case this is a record lock queue (not table lock queue)
then bit_no is the record number within the heap in which the
record is stored. */
ulint bit_no;
-} lock_queue_iterator_t;
+};
/*******************************************************************//**
Initialize lock queue iterator so that it starts to iterate from
diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h
index 1d3958e0d50..3a3a28ef525 100644
--- a/storage/xtradb/include/lock0lock.h
+++ b/storage/xtradb/include/lock0lock.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -36,13 +36,13 @@ Created 5/7/1996 Heikki Tuuri
#include "lock0types.h"
#include "read0types.h"
#include "hash0hash.h"
+#include "srv0srv.h"
#include "ut0vec.h"
#ifdef UNIV_DEBUG
extern ibool lock_print_waits;
#endif /* UNIV_DEBUG */
-/* Buffer for storing information about the most recent deadlock error */
-extern FILE* lock_latest_err_file;
+
extern ulint srv_n_lock_deadlock_count;
/*********************************************************************//**
@@ -66,18 +66,6 @@ void
lock_sys_close(void);
/*================*/
/*********************************************************************//**
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index.
-@return transaction which has the x-lock, or NULL */
-UNIV_INLINE
-trx_t*
-lock_clust_rec_some_has_impl(
-/*=========================*/
- const rec_t* rec, /*!< in: user record */
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
- __attribute__((nonnull, warn_unused_result));
-/*********************************************************************//**
Gets the heap_no of the smallest user record on a page.
@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
UNIV_INLINE
@@ -272,14 +260,15 @@ lock_rec_restore_from_page_infimum(
state; lock bits are reset on
the infimum */
/*********************************************************************//**
-Returns TRUE if there are explicit record locks on a page.
-@return TRUE if there are explicit record locks on the page */
+Determines if there are explicit record locks on a page.
+@return an explicit record lock on the page, or NULL if there are none */
UNIV_INTERN
-ibool
+lock_t*
lock_rec_expl_exist_on_page(
/*========================*/
ulint space, /*!< in: space id */
- ulint page_no);/*!< in: page number */
+ ulint page_no)/*!< in: page number */
+ __attribute__((warn_unused_result));
/*********************************************************************//**
Checks if locks of other transactions prevent an immediate insert of
a record. If they do, first tests if the query thread should anyway
@@ -288,7 +277,7 @@ the query thread to the lock wait state and inserts a waiting request
for a gap x-lock to the lock queue.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_rec_insert_check_and_lock(
/*===========================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
@@ -298,10 +287,11 @@ lock_rec_insert_check_and_lock(
dict_index_t* index, /*!< in: index */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit);/*!< out: set to TRUE if the new
+ ibool* inherit)/*!< out: set to TRUE if the new
inserted record maybe should inherit
LOCK_GAP type locks from the successor
record */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify (update,
delete mark, or delete unmark) of a clustered index record. If they do,
@@ -311,7 +301,7 @@ lock wait state and inserts a waiting request for a record x-lock to the
lock queue.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_clust_rec_modify_check_and_lock(
/*=================================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -321,13 +311,14 @@ lock_clust_rec_modify_check_and_lock(
modified */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((warn_unused_result, nonnull));
/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify
(delete mark or delete unmark) of a secondary index record.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_sec_rec_modify_check_and_lock(
/*===============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -339,15 +330,17 @@ lock_sec_rec_modify_check_and_lock(
clustered index record first: see the
comment below */
dict_index_t* index, /*!< in: secondary index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in/out: mini-transaction */
+ que_thr_t* thr, /*!< in: query thread
+ (can be NULL if BTR_NO_LOCKING_FLAG) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((warn_unused_result, nonnull(2,3,4,6)));
/*********************************************************************//**
Like lock_clust_rec_read_check_and_lock(), but reads a
secondary index record.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-enum db_err
+dberr_t
lock_sec_rec_read_check_and_lock(
/*=============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -377,7 +370,7 @@ lock on the record.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-enum db_err
+dberr_t
lock_clust_rec_read_check_and_lock(
/*===============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -408,7 +401,7 @@ lock_clust_rec_read_check_and_lock() that does not require the parameter
"offsets".
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_clust_rec_read_check_and_lock_alt(
/*===================================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -426,13 +419,14 @@ lock_clust_rec_read_check_and_lock_alt(
SELECT FOR UPDATE */
ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks that a record is seen in a consistent read.
-@return TRUE if sees, or FALSE if an earlier version of the record
+@return true if sees, or false if an earlier version of the record
should be retrieved */
UNIV_INTERN
-ibool
+bool
lock_clust_rec_cons_read_sees(
/*==========================*/
const rec_t* rec, /*!< in: user record which should be read or
@@ -444,33 +438,44 @@ lock_clust_rec_cons_read_sees(
Checks that a non-clustered index record is seen in a consistent read.
NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case FALSE, the present version of
+its modifications that also in the case false, the present version of
rec may be the right, but we must check this from the clustered index
record.
-@return TRUE if certainly sees, or FALSE if an earlier version of the
+@return true if certainly sees, or false if an earlier version of the
clustered index record might be needed */
UNIV_INTERN
-ulint
+bool
lock_sec_rec_cons_read_sees(
/*========================*/
const rec_t* rec, /*!< in: user record which
should be read or passed over
by a read cursor */
- const read_view_t* view); /*!< in: consistent read view */
+ const read_view_t* view) /*!< in: consistent read view */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Locks the specified database table in the mode given. If the lock cannot
be granted immediately, the query thread is put to wait.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_table(
/*=======*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
- dict_table_t* table, /*!< in: database table in dictionary cache */
+ dict_table_t* table, /*!< in/out: database table
+ in dictionary cache */
enum lock_mode mode, /*!< in: lock mode */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Creates a table IX lock object for a resurrected transaction. */
+UNIV_INTERN
+void
+lock_table_ix_resurrect(
+/*====================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx); /*!< in/out: transaction */
/*************************************************************//**
Removes a granted record lock of a transaction from the queue and grants
locks to other transactions waiting in the queue if they now are entitled
@@ -479,19 +484,21 @@ UNIV_INTERN
void
lock_rec_unlock(
/*============*/
- trx_t* trx, /*!< in: transaction that has
+ trx_t* trx, /*!< in/out: transaction that has
set a record lock */
const buf_block_t* block, /*!< in: buffer block containing rec */
const rec_t* rec, /*!< in: record */
enum lock_mode lock_mode);/*!< in: LOCK_S or LOCK_X */
/*********************************************************************//**
-Releases transaction locks, and releases possible other transactions waiting
-because of these locks. */
+Releases a transaction's locks, and releases possible other transactions
+waiting because of these locks. Change the state of the transaction to
+TRX_STATE_COMMITTED_IN_MEMORY. */
UNIV_INTERN
void
-lock_release_off_kernel(
-/*====================*/
- trx_t* trx); /*!< in: transaction */
+lock_trx_release_locks(
+/*===================*/
+ trx_t* trx); /*!< in/out: transaction */
+
/*********************************************************************//**
Cancels a waiting lock request and releases possible other transactions
waiting behind it. */
@@ -499,7 +506,7 @@ UNIV_INTERN
void
lock_cancel_waiting_and_release(
/*============================*/
- lock_t* lock); /*!< in: waiting lock request */
+ lock_t* lock); /*!< in/out: waiting lock request */
/*********************************************************************//**
Removes locks on a table to be dropped or truncated.
@@ -573,8 +580,9 @@ UNIV_INTERN
ibool
lock_is_table_exclusive(
/*====================*/
- dict_table_t* table, /*!< in: table */
- trx_t* trx); /*!< in: transaction */
+ const dict_table_t* table, /*!< in: table */
+ const trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull));
/*********************************************************************//**
Checks if a lock request lock1 has to wait for request lock2.
@return TRUE if lock1 has to wait for lock2 to be removed */
@@ -588,18 +596,17 @@ lock_has_to_wait(
on the same record as in lock1 if the
locks are record locks */
/*********************************************************************//**
-Checks that a transaction id is sensible, i.e., not in the future.
-@return TRUE if ok */
+Reports that a transaction id is insensible, i.e., in the future. */
UNIV_INTERN
-ibool
-lock_check_trx_id_sanity(
-/*=====================*/
+void
+lock_report_trx_id_insanity(
+/*========================*/
trx_id_t trx_id, /*!< in: trx id */
const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: clustered index */
+ dict_index_t* index, /*!< in: index */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- ibool has_kernel_mutex);/*!< in: TRUE if the caller owns the
- kernel mutex */
+ trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */
+ __attribute__((nonnull));
/*********************************************************************//**
Prints info of a table lock. */
UNIV_INTERN
@@ -618,16 +625,19 @@ lock_rec_print(
const lock_t* lock); /*!< in: record type lock */
/*********************************************************************//**
Prints info of locks for all transactions.
-@return FALSE if not able to obtain kernel mutex
-and exits without printing info */
+@return FALSE if not able to obtain lock mutex and exits without
+printing info */
UNIV_INTERN
ibool
lock_print_info_summary(
/*====================*/
FILE* file, /*!< in: file where to print */
- ibool nowait);/*!< in: whether to wait for the kernel mutex */
-/*************************************************************************
-Prints info of locks for each transaction. */
+ ibool nowait) /*!< in: whether to wait for the lock mutex */
+ __attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Prints info of locks for each transaction. This function assumes that the
+caller holds the lock mutex and more importantly it will release the lock
+mutex on behalf of the caller. (This should be fixed in the future). */
UNIV_INTERN
void
lock_print_info_all_transactions(
@@ -636,27 +646,14 @@ lock_print_info_all_transactions(
/*********************************************************************//**
Return approximate number or record locks (bits set in the bitmap) for
this transaction. Since delete-marked records may be removed, the
-record count will not be precise. */
+record count will not be precise.
+The caller must be holding lock_sys->mutex. */
UNIV_INTERN
ulint
lock_number_of_rows_locked(
/*=======================*/
- const trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Check if a transaction holds any autoinc locks.
-@return TRUE if the transaction holds any AUTOINC locks. */
-UNIV_INTERN
-ibool
-lock_trx_holds_autoinc_locks(
-/*=========================*/
- const trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Release all the transaction's autoinc locks. */
-UNIV_INTERN
-void
-lock_release_autoinc_locks(
-/*=======================*/
- trx_t* trx); /*!< in/out: transaction */
+ const trx_lock_t* trx_lock) /*!< in: transaction locks */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Gets the type of a lock. Non-inline version for using outside of the
@@ -752,6 +749,115 @@ ulint
lock_rec_get_page_no(
/*=================*/
const lock_t* lock); /*!< in: lock */
+/*******************************************************************//**
+Check if there are any locks (table or rec) against table.
+@return TRUE if locks exist */
+UNIV_INTERN
+ibool
+lock_table_has_locks(
+/*=================*/
+ const dict_table_t* table); /*!< in: check if there are any locks
+ held on records in this table or on the
+ table itself */
+
+/*********************************************************************//**
+A thread which wakes up threads whose lock wait may have lasted too long.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(lock_wait_timeout_thread)(
+/*=====================================*/
+ void* arg); /*!< in: a dummy parameter required by
+ os_thread_create */
+
+/********************************************************************//**
+Releases a user OS thread waiting for a lock to be released, if the
+thread is already suspended. */
+UNIV_INTERN
+void
+lock_wait_release_thread_if_suspended(
+/*==================================*/
+ que_thr_t* thr); /*!< in: query thread associated with the
+ user OS thread */
+
+/***************************************************************//**
+Puts a user OS thread to wait for a lock to be released. If an error
+occurs during the wait trx->error_state associated with thr is
+!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
+are possible errors. DB_DEADLOCK is returned if selective deadlock
+resolution chose this transaction as a victim. */
+UNIV_INTERN
+void
+lock_wait_suspend_thread(
+/*=====================*/
+ que_thr_t* thr); /*!< in: query thread associated with the
+ user OS thread */
+/*********************************************************************//**
+Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
+function should be called at the the end of an SQL statement, by the
+connection thread that owns the transaction (trx->mysql_thd). */
+UNIV_INTERN
+void
+lock_unlock_table_autoinc(
+/*======================*/
+ trx_t* trx); /*!< in/out: transaction */
+/*********************************************************************//**
+Check whether the transaction has already been rolled back because it
+was selected as a deadlock victim, or if it has to wait then cancel
+the wait lock.
+@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+lock_trx_handle_wait(
+/*=================*/
+ trx_t* trx) /*!< in/out: trx lock state */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Get the number of locks on a table.
+@return number of locks */
+UNIV_INTERN
+ulint
+lock_table_get_n_locks(
+/*===================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return true if ok */
+UNIV_INTERN
+bool
+lock_check_trx_id_sanity(
+/*=====================*/
+ trx_id_t trx_id, /*!< in: trx id */
+ const rec_t* rec, /*!< in: user record */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Check if the transaction holds any locks on the sys tables
+or its records.
+@return the strongest lock found on any sys table or 0 for none */
+UNIV_INTERN
+const lock_t*
+lock_trx_has_sys_table_locks(
+/*=========================*/
+ const trx_t* trx) /*!< in: transaction to check */
+ __attribute__((warn_unused_result));
+
+/*******************************************************************//**
+Check if the transaction holds an exclusive lock on a record.
+@return whether the locks are held */
+UNIV_INTERN
+bool
+lock_trx_has_rec_x_lock(
+/*====================*/
+ const trx_t* trx, /*!< in: transaction to check */
+ const dict_table_t* table, /*!< in: table to check */
+ const buf_block_t* block, /*!< in: buffer block of the record */
+ ulint heap_no)/*!< in: record heap number */
+ __attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
/** Lock modes and types */
/* @{ */
@@ -815,22 +921,76 @@ lock_rec_get_page_no(
((type_mode & (LOCK_CONV_BY_OTHER | LOCK_WAIT)) == LOCK_WAIT)
/** Lock operation struct */
-typedef struct lock_op_struct lock_op_t;
-/** Lock operation struct */
-struct lock_op_struct{
+struct lock_op_t{
dict_table_t* table; /*!< table to be locked */
enum lock_mode mode; /*!< lock mode */
};
/** The lock system struct */
-struct lock_sys_struct{
- hash_table_t* rec_hash; /*!< hash table of the record locks */
+struct lock_sys_t{
+ ib_mutex_t mutex; /*!< Mutex protecting the
+ locks */
+ hash_table_t* rec_hash; /*!< hash table of the record
+ locks */
ulint rec_num;
+ ib_mutex_t wait_mutex; /*!< Mutex protecting the
+ next two fields */
+ srv_slot_t* waiting_threads; /*!< Array of user threads
+ suspended while waiting for
+ locks within InnoDB, protected
+ by the lock_sys->wait_mutex */
+ srv_slot_t* last_slot; /*!< highest slot ever used
+ in the waiting_threads array,
+ protected by
+ lock_sys->wait_mutex */
+ ibool rollback_complete;
+ /*!< TRUE if rollback of all
+ recovered transactions is
+ complete. Protected by
+ lock_sys->mutex */
+
+ ulint n_lock_max_wait_time; /*!< Max wait time */
+
+ os_event_t timeout_event; /*!< Set to the event that is
+ created in the lock wait monitor
+ thread. A value of 0 means the
+ thread is not active */
+
+ bool timeout_thread_active; /*!< True if the timeout thread
+ is running */
};
/** The lock system */
extern lock_sys_t* lock_sys;
+/** Test if lock_sys->mutex can be acquired without waiting. */
+#define lock_mutex_enter_nowait() mutex_enter_nowait(&lock_sys->mutex)
+
+/** Test if lock_sys->mutex is owned. */
+#define lock_mutex_own() mutex_own(&lock_sys->mutex)
+
+/** Acquire the lock_sys->mutex. */
+#define lock_mutex_enter() do { \
+ mutex_enter(&lock_sys->mutex); \
+} while (0)
+
+/** Release the lock_sys->mutex. */
+#define lock_mutex_exit() do { \
+ mutex_exit(&lock_sys->mutex); \
+} while (0)
+
+/** Test if lock_sys->wait_mutex is owned. */
+#define lock_wait_mutex_own() mutex_own(&lock_sys->wait_mutex)
+
+/** Acquire the lock_sys->wait_mutex. */
+#define lock_wait_mutex_enter() do { \
+ mutex_enter(&lock_sys->wait_mutex); \
+} while (0)
+
+/** Release the lock_sys->wait_mutex. */
+#define lock_wait_mutex_exit() do { \
+ mutex_exit(&lock_sys->wait_mutex); \
+} while (0)
#ifndef UNIV_NONINL
#include "lock0lock.ic"
diff --git a/storage/xtradb/include/lock0lock.ic b/storage/xtradb/include/lock0lock.ic
index 4e6c0c1b78c..736936954cb 100644
--- a/storage/xtradb/include/lock0lock.ic
+++ b/storage/xtradb/include/lock0lock.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -68,35 +68,6 @@ lock_rec_hash(
}
/*********************************************************************//**
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index.
-@return transaction which has the x-lock, or NULL */
-UNIV_INLINE
-trx_t*
-lock_clust_rec_some_has_impl(
-/*=========================*/
- const rec_t* rec, /*!< in: user record */
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- trx_id_t trx_id;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(dict_index_is_clust(index));
- ut_ad(page_rec_is_user_rec(rec));
-
- trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- if (trx_is_active(trx_id)) {
- /* The modifying or inserting transaction is active */
-
- return(trx_get_on_id(trx_id));
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
Gets the heap_no of the smallest user record on a page.
@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
UNIV_INLINE
diff --git a/storage/xtradb/include/lock0priv.h b/storage/xtradb/include/lock0priv.h
index 491cad95329..e564387ec53 100644
--- a/storage/xtradb/include/lock0priv.h
+++ b/storage/xtradb/include/lock0priv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -40,9 +40,7 @@ those functions in lock/ */
#include "ut0lst.h"
/** A table lock */
-typedef struct lock_table_struct lock_table_t;
-/** A table lock */
-struct lock_table_struct {
+struct lock_table_t {
dict_table_t* table; /*!< database table in dictionary
cache */
UT_LIST_NODE_T(lock_t)
@@ -51,9 +49,7 @@ struct lock_table_struct {
};
/** Record lock for a page */
-typedef struct lock_rec_struct lock_rec_t;
-/** Record lock for a page */
-struct lock_rec_struct {
+struct lock_rec_t {
ulint space; /*!< space id */
ulint page_no; /*!< page number */
ulint n_bits; /*!< number of bits in the lock
@@ -62,8 +58,8 @@ struct lock_rec_struct {
lock struct */
};
-/** Lock struct */
-struct lock_struct {
+/** Lock struct; protected by lock_sys->mutex */
+struct lock_t {
trx_t* trx; /*!< transaction owning the
lock */
UT_LIST_NODE_T(lock_t)
@@ -101,6 +97,19 @@ lock_rec_get_prev(
const lock_t* in_lock,/*!< in: record lock */
ulint heap_no);/*!< in: heap number of the record */
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index.
+@return transaction id of the transaction which has the x-lock, or 0 */
+UNIV_INLINE
+trx_id_t
+lock_clust_rec_some_has_impl(
+/*=========================*/
+ const rec_t* rec, /*!< in: user record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
+
#ifndef UNIV_NONINL
#include "lock0priv.ic"
#endif
diff --git a/storage/xtradb/include/lock0priv.ic b/storage/xtradb/include/lock0priv.ic
index 98b2189680c..6b70dc33d3c 100644
--- a/storage/xtradb/include/lock0priv.ic
+++ b/storage/xtradb/include/lock0priv.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -24,8 +24,8 @@ Created July 16, 2007 Vasil Dimov
*******************************************************/
/* This file contains only methods which are used in
-lock/lock0* files, other than lock/lock0lock.c.
-I.e. lock/lock0lock.c contains more internal inline
+lock/lock0* files, other than lock/lock0lock.cc.
+I.e. lock/lock0lock.cc contains more internal inline
methods but they are used only in that file. */
#ifndef LOCK_MODULE_IMPLEMENTATION
@@ -46,4 +46,22 @@ lock_get_type_low(
return(lock->type_mode & LOCK_TYPE_MASK);
}
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index.
+@return transaction id of the transaction which has the x-lock, or 0 */
+UNIV_INLINE
+trx_id_t
+lock_clust_rec_some_has_impl(
+/*=========================*/
+ const rec_t* rec, /*!< in: user record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(page_rec_is_user_rec(rec));
+
+ return(row_get_rec_trx_id(rec, index, offsets));
+}
+
/* vim: set filetype=c: */
diff --git a/storage/xtradb/include/lock0types.h b/storage/xtradb/include/lock0types.h
index 2eb71e2939f..cf32e72f864 100644
--- a/storage/xtradb/include/lock0types.h
+++ b/storage/xtradb/include/lock0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,8 +27,8 @@ Created 5/7/1996 Heikki Tuuri
#define lock0types_h
#define lock_t ib_lock_t
-typedef struct lock_struct lock_t;
-typedef struct lock_sys_struct lock_sys_t;
+struct lock_t;
+struct lock_sys_t;
/* Basic lock modes */
enum lock_mode {
@@ -39,7 +39,9 @@ enum lock_mode {
LOCK_AUTO_INC, /* locks the auto-inc counter of a table
in an exclusive mode */
LOCK_NONE, /* this is used elsewhere to note consistent read */
- LOCK_NUM = LOCK_NONE/* number of lock modes */
+ LOCK_NUM = LOCK_NONE, /* number of lock modes */
+ LOCK_NONE_UNSET = 255
};
+
#endif
diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h
index 31afe5d8555..bab256e5a65 100644
--- a/storage/xtradb/include/log0log.h
+++ b/storage/xtradb/include/log0log.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -43,11 +43,14 @@ Created 12/9/1995 Heikki Tuuri
/* Type used for all log sequence number storage and arithmetics */
typedef ib_uint64_t lsn_t;
+#define LSN_MAX IB_UINT64_MAX
+
+#define LSN_PF UINT64PF
/** Redo log buffer */
-typedef struct log_struct log_t;
+struct log_t;
/** Redo log group */
-typedef struct log_group_struct log_group_t;
+struct log_group_t;
#ifdef UNIV_DEBUG
/** Flag: write to log file? */
@@ -59,25 +62,27 @@ extern ibool log_debug_writes;
# define log_do_write TRUE
#endif /* UNIV_DEBUG */
+/** Magic value to use instead of log checksums when they are disabled */
+#define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
+
+typedef ulint (*log_checksum_func_t)(const byte* log_block);
+
+/** Pointer to the log checksum calculation function. Protected with
+log_sys->mutex. */
+extern log_checksum_func_t log_checksum_algorithm_ptr;
+
/** Wait modes for log_write_up_to @{ */
#define LOG_NO_WAIT 91
#define LOG_WAIT_ONE_GROUP 92
#define LOG_WAIT_ALL_GROUPS 93
/* @} */
-/** Maximum number of log groups in log_group_struct::checkpoint_buf */
+/** Maximum number of log groups in log_group_t::checkpoint_buf */
#define LOG_MAX_N_GROUPS 32
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
-so that we know that the limit has been written to a log checkpoint field
-on disk. */
-UNIV_INTERN
-void
-log_fsp_current_free_limit_set_and_checkpoint(
-/*==========================================*/
- ulint limit); /*!< in: limit to set */
-#endif /* !UNIV_HOTBACKUP */
+#define IB_ARCHIVED_LOGS_PREFIX "ib_log_archive_"
+#define IB_ARCHIVED_LOGS_PREFIX_LEN (sizeof(IB_ARCHIVED_LOGS_PREFIX) - 1)
+#define IB_ARCHIVED_LOGS_SERIAL_LEN 20
+
/*******************************************************************//**
Calculates where in log files we find a specified lsn.
@return log file number */
@@ -101,12 +106,12 @@ Writes to the log the string given. The log must be released with
log_release.
@return end lsn of the log record, zero if did not succeed */
UNIV_INLINE
-ib_uint64_t
+lsn_t
log_reserve_and_write_fast(
/*=======================*/
const void* str, /*!< in: string */
ulint len, /*!< in: string length */
- ib_uint64_t* start_lsn);/*!< out: start lsn of the log record */
+ lsn_t* start_lsn);/*!< out: start lsn of the log record */
/***********************************************************************//**
Releases the log mutex. */
UNIV_INLINE
@@ -127,7 +132,7 @@ Locks the log mutex and opens the log for log_write_low. The log must be closed
with log_close and released with log_release.
@return start lsn of the log record */
UNIV_INLINE
-ib_uint64_t
+lsn_t
log_reserve_and_open(
/*=================*/
ulint len); /*!< in: length of data to be catenated */
@@ -135,7 +140,7 @@ log_reserve_and_open(
Opens the log for log_write_low. The log must be closed with log_close.
@return start lsn of the log record */
UNIV_INTERN
-ib_uint64_t
+lsn_t
log_open(
/*=====*/
ulint len); /*!< in: length of data to be catenated */
@@ -152,14 +157,14 @@ log_write_low(
Closes the log.
@return lsn */
UNIV_INTERN
-ib_uint64_t
+lsn_t
log_close(void);
/*===========*/
/************************************************************//**
Gets the current lsn.
@return current lsn */
UNIV_INLINE
-ib_uint64_t
+lsn_t
log_get_lsn(void);
/*=============*/
/************************************************************//**
@@ -174,9 +179,17 @@ Gets the log group capacity. It is OK to read the value without
holding log_sys->mutex because it is constant.
@return log group capacity */
UNIV_INLINE
-ulint
+lsn_t
log_get_capacity(void);
/*==================*/
+/****************************************************************
+Get log_sys::max_modified_age_async. It is OK to read the value without
+holding log_sys::mutex because it is constant.
+@return max_modified_age_async */
+UNIV_INLINE
+lsn_t
+log_get_max_modified_age_async(void);
+/*================================*/
/******************************************************//**
Initializes the log. */
UNIV_INTERN
@@ -191,7 +204,7 @@ log_group_init(
/*===========*/
ulint id, /*!< in: group id */
ulint n_files, /*!< in: number of log files */
- ulint file_size, /*!< in: log file size in bytes */
+ lsn_t file_size, /*!< in: log file size in bytes */
ulint space_id, /*!< in: space id of the file space
which contains the log files of this
group */
@@ -216,14 +229,13 @@ UNIV_INTERN
void
log_write_up_to(
/*============*/
- ib_uint64_t lsn, /*!< in: log sequence number up to which
- the log should be written,
- IB_ULONGLONG_MAX if not specified */
- ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk);
- /*!< in: TRUE if we want the written log
- also to be flushed to disk */
+ lsn_t lsn, /*!< in: log sequence number up to which
+ the log should be written, LSN_MAX if not specified */
+ ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ or LOG_WAIT_ALL_GROUPS */
+ ibool flush_to_disk);
+ /*!< in: TRUE if we want the written log
+ also to be flushed to disk */
/****************************************************************//**
Does a syncronous flush of the log buffer to disk. */
UNIV_INTERN
@@ -240,21 +252,6 @@ void
log_buffer_sync_in_background(
/*==========================*/
ibool flush); /*<! in: flush the logs to disk */
-/****************************************************************//**
-Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool and also may make a new checkpoint. NOTE: this function may only
-be called if the calling thread owns no synchronization objects!
-@return FALSE if there was a flush batch of the same type running,
-which means that we could not start this flush batch */
-UNIV_INTERN
-ibool
-log_preflush_pool_modified_pages(
-/*=============================*/
- ib_uint64_t new_oldest, /*!< in: try to advance
- oldest_modified_lsn at least
- to this lsn */
- ibool sync); /*!< in: TRUE if synchronous
- operation is desired */
/******************************************************//**
Makes a checkpoint. Note that this function does not flush dirty
blocks from the buffer pool: it only checks what is lsn of the oldest
@@ -282,16 +279,16 @@ UNIV_INTERN
void
log_make_checkpoint_at(
/*===================*/
- ib_uint64_t lsn, /*!< in: make a checkpoint at this or a
- later lsn, if IB_ULONGLONG_MAX, makes
- a checkpoint at the latest lsn */
- ibool write_always); /*!< in: the function normally checks if
- the new checkpoint would have a
- greater lsn than the previous one: if
- not, then no physical write is done;
- by setting this parameter TRUE, a
- physical write will always be made to
- log files */
+ lsn_t lsn, /*!< in: make a checkpoint at this or a
+ later lsn, if LSN_MAX, makes
+ a checkpoint at the latest lsn */
+ ibool write_always); /*!< in: the function normally checks if
+ the new checkpoint would have a
+ greater lsn than the previous one: if
+ not, then no physical write is done;
+ by setting this parameter TRUE, a
+ physical write will always be made to
+ log files */
/****************************************************************//**
Disable checkpoints. This is used when doing a volume snapshot
to ensure that we don't get checkpoint between snapshoting two
@@ -329,8 +326,7 @@ log_checkpoint_get_nth_group_info(
/*==============================*/
const byte* buf, /*!< in: buffer containing checkpoint info */
ulint n, /*!< in: nth slot */
- ulint* file_no,/*!< out: archived file number */
- ulint* offset);/*!< out: archived file offset */
+ lsn_t* file_no);/*!< out: archived file number */
/******************************************************//**
Writes checkpoint info to groups. */
UNIV_INTERN
@@ -386,8 +382,18 @@ void
log_archived_file_name_gen(
/*=======================*/
char* buf, /*!< in: buffer where to write */
+ ulint buf_len,/*!< in: buffer length */
ulint id, /*!< in: group id */
- ulint file_no);/*!< in: file number */
+ lsn_t file_no);/*!< in: file number */
+
+UNIV_INTERN
+void
+log_archived_get_offset(
+/*====================*/
+ log_group_t* group, /*!< in: log group */
+ lsn_t file_no, /*!< in: archive log file number */
+ lsn_t archived_lsn, /*!< in: last archived LSN */
+ lsn_t* offset); /*!< out: offset within archived file */
#else /* !UNIV_HOTBACKUP */
/******************************************************//**
Writes info to a buffer of a log group when log files are created in
@@ -421,8 +427,8 @@ log_group_read_log_seg(
ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
byte* buf, /*!< in: buffer where to read */
log_group_t* group, /*!< in: log group */
- ib_uint64_t start_lsn, /*!< in: read area start */
- ib_uint64_t end_lsn, /*!< in: read area end */
+ lsn_t start_lsn, /*!< in: read area start */
+ lsn_t end_lsn, /*!< in: read area end */
ibool release_mutex); /*!< in: whether the log_sys->mutex
should be released before the read */
/******************************************************//**
@@ -435,7 +441,7 @@ log_group_write_buf(
byte* buf, /*!< in: buffer */
ulint len, /*!< in: buffer len; must be divisible
by OS_FILE_LOG_BLOCK_SIZE */
- ib_uint64_t start_lsn, /*!< in: start lsn of the buffer; must
+ lsn_t start_lsn, /*!< in: start lsn of the buffer; must
be divisible by
OS_FILE_LOG_BLOCK_SIZE */
ulint new_data_offset);/*!< in: start offset of new data in
@@ -451,14 +457,14 @@ void
log_group_set_fields(
/*=================*/
log_group_t* group, /*!< in/out: group */
- ib_uint64_t lsn); /*!< in: lsn for which the values should be
+ lsn_t lsn); /*!< in: lsn for which the values should be
set */
/******************************************************//**
Calculates the data capacity of a log group, when the log file headers are not
included.
@return capacity in bytes */
UNIV_INTERN
-ulint
+lsn_t
log_group_get_capacity(
/*===================*/
const log_group_t* group); /*!< in: log group */
@@ -550,8 +556,8 @@ UNIV_INLINE
void
log_block_init(
/*===========*/
- byte* log_block, /*!< in: pointer to the log buffer */
- ib_uint64_t lsn); /*!< in: lsn within the log block */
+ byte* log_block, /*!< in: pointer to the log buffer */
+ lsn_t lsn); /*!< in: lsn within the log block */
/************************************************************//**
Initializes a log block in the log buffer in the old, < 3.23.52 format, where
there was no checksum yet. */
@@ -559,8 +565,8 @@ UNIV_INLINE
void
log_block_init_in_old_format(
/*=========================*/
- byte* log_block, /*!< in: pointer to the log buffer */
- ib_uint64_t lsn); /*!< in: lsn within the log block */
+ byte* log_block, /*!< in: pointer to the log buffer */
+ lsn_t lsn); /*!< in: lsn within the log block */
/************************************************************//**
Converts a lsn to a log block number.
@return log block number, it is > 0 and <= 1G */
@@ -568,7 +574,7 @@ UNIV_INLINE
ulint
log_block_convert_lsn_to_no(
/*========================*/
- ib_uint64_t lsn); /*!< in: lsn of a byte within the block */
+ lsn_t lsn); /*!< in: lsn of a byte within the block */
/******************************************************//**
Prints info of the log. */
UNIV_INTERN
@@ -583,20 +589,26 @@ UNIV_INTERN
ibool
log_peek_lsn(
/*=========*/
- ib_uint64_t* lsn); /*!< out: if returns TRUE, current lsn is here */
+ lsn_t* lsn); /*!< out: if returns TRUE, current lsn is here */
/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
UNIV_INTERN
void
log_refresh_stats(void);
/*===================*/
-/**********************************************************
+/********************************************************//**
+Closes all log groups. */
+UNIV_INTERN
+void
+log_group_close_all(void);
+/*=====================*/
+/********************************************************//**
Shutdown the log system but do not release all the memory. */
UNIV_INTERN
void
log_shutdown(void);
/*==============*/
-/**********************************************************
+/********************************************************//**
Free the log system data structures. */
UNIV_INTERN
void
@@ -614,7 +626,7 @@ extern log_t* log_sys;
#define LOG_RECOVER 98887331
/* The counting of lsn's starts from this value: this must be non-zero */
-#define LOG_START_LSN ((ib_uint64_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
+#define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE)
#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
@@ -661,7 +673,7 @@ extern log_t* log_sys;
/* Offsets for a checkpoint field */
#define LOG_CHECKPOINT_NO 0
#define LOG_CHECKPOINT_LSN 8
-#define LOG_CHECKPOINT_OFFSET 16
+#define LOG_CHECKPOINT_OFFSET_LOW32 16
#define LOG_CHECKPOINT_LOG_BUF_SIZE 20
#define LOG_CHECKPOINT_ARCHIVED_LSN 24
#define LOG_CHECKPOINT_GROUP_ARRAY 32
@@ -675,22 +687,38 @@ extern log_t* log_sys;
+ LOG_MAX_N_GROUPS * 8)
#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END
#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END)
+#if 0
#define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END)
- /* current fsp free limit in
+ /*!< Not used (0);
+ This used to contain the
+ current fsp free limit in
tablespace 0, in units of one
- megabyte; this information is only used
- by ibbackup to decide if it can
- truncate unused ends of
- non-auto-extending data files in space
- 0 */
+ megabyte.
+
+ This information might have been used
+ since ibbackup version 0.35 but
+ before 1.41 to decide if unused ends of
+ non-auto-extending data files
+ in space 0 can be truncated.
+
+ This information was made obsolete
+ by ibbackup --compress. */
#define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END)
- /* this magic number tells if the
+ /*!< Not used (0);
+ This magic number tells if the
checkpoint contains the above field:
the field was added to
- InnoDB-3.23.50 */
-#define LOG_CHECKPOINT_SIZE (16 + LOG_CHECKPOINT_ARRAY_END)
-
+ InnoDB-3.23.50 and
+ removed from MySQL 5.6 */
#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243
+ /*!< if LOG_CHECKPOINT_FSP_MAGIC_N
+ contains this value, then
+ LOG_CHECKPOINT_FSP_FREE_LIMIT
+ is valid */
+#endif
+#define LOG_CHECKPOINT_OFFSET_HIGH32 (16 + LOG_CHECKPOINT_ARRAY_END)
+#define LOG_CHECKPOINT_SIZE (20 + LOG_CHECKPOINT_ARRAY_END)
+
/* Offsets of a log file header */
#define LOG_GROUP_ID 0 /* log group number */
@@ -739,19 +767,19 @@ extern log_t* log_sys;
/** Log group consists of a number of log files, each of the same size; a log
group is implemented as a space in the sense of the module fil0fil. */
-struct log_group_struct{
+struct log_group_t{
/* The following fields are protected by log_sys->mutex */
ulint id; /*!< log group id */
ulint n_files; /*!< number of files in the group */
- ulint file_size; /*!< individual log file size in bytes,
+ lsn_t file_size; /*!< individual log file size in bytes,
including the log file header */
ulint space_id; /*!< file space which implements the log
group */
ulint state; /*!< LOG_GROUP_OK or
LOG_GROUP_CORRUPTED */
- ib_uint64_t lsn; /*!< lsn used to fix coordinates within
+ lsn_t lsn; /*!< lsn used to fix coordinates within
the log group */
- ulint lsn_offset; /*!< the offset of the above lsn */
+ lsn_t lsn_offset; /*!< the offset of the above lsn */
ulint n_pending_writes;/*!< number of currently pending flush
writes for this log group */
byte** file_header_bufs_ptr;/*!< unaligned buffers */
@@ -765,22 +793,22 @@ struct log_group_struct{
ulint archive_space_id;/*!< file space which
implements the log group
archive */
- ulint archived_file_no;/*!< file number corresponding to
+ lsn_t archived_file_no;/*!< file number corresponding to
log_sys->archived_lsn */
- ulint archived_offset;/*!< file offset corresponding to
+ lsn_t archived_offset;/*!< file offset corresponding to
log_sys->archived_lsn, 0 if we have
not yet written to the archive file
number archived_file_no */
- ulint next_archived_file_no;/*!< during an archive write,
+ lsn_t next_archived_file_no;/*!< during an archive write,
until the write is completed, we
store the next value for
archived_file_no here: the write
completion function then sets the new
value to ..._file_no */
- ulint next_archived_offset; /*!< like the preceding field */
+ lsn_t next_archived_offset; /*!< like the preceding field */
#endif /* UNIV_LOG_ARCHIVE */
/*-----------------------------*/
- ib_uint64_t scanned_lsn; /*!< used only in recovery: recovery scan
+ lsn_t scanned_lsn; /*!< used only in recovery: recovery scan
succeeded up to this lsn in this log
group */
byte* checkpoint_buf_ptr;/*!< unaligned checkpoint header */
@@ -791,17 +819,17 @@ struct log_group_struct{
};
/** Redo log buffer */
-struct log_struct{
+struct log_t{
byte pad[64]; /*!< padding to prevent other memory
update hotspots from residing on the
same memory cache line */
- ib_uint64_t lsn; /*!< log sequence number */
+ lsn_t lsn; /*!< log sequence number */
ulint buf_free; /*!< first free offset within the log
buffer */
#ifndef UNIV_HOTBACKUP
- mutex_t mutex; /*!< mutex protecting the log */
+ ib_prio_mutex_t mutex; /*!< mutex protecting the log */
- mutex_t log_flush_order_mutex;/*!< mutex to serialize access to
+ ib_mutex_t log_flush_order_mutex;/*!< mutex to serialize access to
the flush list when we are putting
dirty blocks in the list. The idea
behind this mutex is to be able
@@ -816,12 +844,14 @@ struct log_struct{
ulint max_buf_free; /*!< recommended maximum value of
buf_free, after which the buffer is
flushed */
+ #ifdef UNIV_LOG_DEBUG
ulint old_buf_free; /*!< value of buf free when log was
last time opened; only in the debug
version */
ib_uint64_t old_lsn; /*!< value of lsn when log was
last time opened; only in the
debug version */
+#endif /* UNIV_LOG_DEBUG */
ibool check_flush_or_checkpoint;
/*!< this is set to TRUE when there may
be need to flush the log buffer, or
@@ -844,13 +874,13 @@ struct log_struct{
later; this is advanced when a flush
operation is completed to all the log
groups */
- ib_uint64_t written_to_some_lsn;
+ lsn_t written_to_some_lsn;
/*!< first log sequence number not yet
written to any log group; for this to
be advanced, it is enough that the
write i/o has been completed for any
one log group */
- ib_uint64_t written_to_all_lsn;
+ lsn_t written_to_all_lsn;
/*!< first log sequence number not yet
written to some log group; for this to
be advanced, it is enough that the
@@ -866,16 +896,16 @@ struct log_struct{
flushed_to_disk_lsn or
write_lsn which are always
up-to-date and accurate. */
- ib_uint64_t write_lsn; /*!< end lsn for the current running
+ lsn_t write_lsn; /*!< end lsn for the current running
write */
ulint write_end_offset;/*!< the data in buffer has
been written up to this offset
when the current write ends:
this field will then be copied
to buf_next_to_write */
- ib_uint64_t current_flush_lsn;/*!< end lsn for the current running
+ lsn_t current_flush_lsn;/*!< end lsn for the current running
write + flush operation */
- ib_uint64_t flushed_to_disk_lsn;
+ lsn_t flushed_to_disk_lsn;
/*!< how far we have written the log
AND flushed to disk */
ulint n_pending_writes;/*!< number of currently
@@ -912,42 +942,37 @@ struct log_struct{
/* @} */
/** Fields involved in checkpoints @{ */
- ulint log_group_capacity; /*!< capacity of the log group; if
+ lsn_t log_group_capacity; /*!< capacity of the log group; if
the checkpoint age exceeds this, it is
a serious error because it is possible
we will then overwrite log and spoil
crash recovery */
- ulint max_modified_age_async;
+ lsn_t max_modified_age_async;
/*!< when this recommended
value for lsn -
buf_pool_get_oldest_modification()
is exceeded, we start an
asynchronous preflush of pool pages */
- ulint max_modified_age_sync;
+ lsn_t max_modified_age_sync;
/*!< when this recommended
value for lsn -
buf_pool_get_oldest_modification()
is exceeded, we start a
synchronous preflush of pool pages */
- ulint adm_checkpoint_interval;
- /*!< administrator-specified checkpoint
- interval in terms of log growth in
- bytes; the interval actually used by
- the database can be smaller */
- ulint max_checkpoint_age_async;
+ lsn_t max_checkpoint_age_async;
/*!< when this checkpoint age
is exceeded we start an
asynchronous writing of a new
checkpoint */
- ulint max_checkpoint_age;
+ lsn_t max_checkpoint_age;
/*!< this is the maximum allowed value
for lsn - last_checkpoint_lsn when a
new query step is started */
ib_uint64_t next_checkpoint_no;
/*!< next checkpoint number */
- ib_uint64_t last_checkpoint_lsn;
+ lsn_t last_checkpoint_lsn;
/*!< latest checkpoint lsn */
- ib_uint64_t next_checkpoint_lsn;
+ lsn_t next_checkpoint_lsn;
/*!< next checkpoint lsn */
ulint n_pending_checkpoint_writes;
/*!< number of currently pending
@@ -965,16 +990,16 @@ struct log_struct{
/** Fields involved in archiving @{ */
ulint archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
LOG_ARCH_STOPPED, LOG_ARCH_OFF */
- ib_uint64_t archived_lsn; /*!< archiving has advanced to this
+ lsn_t archived_lsn; /*!< archiving has advanced to this
lsn */
- ulint max_archived_lsn_age_async;
+ lsn_t max_archived_lsn_age_async;
/*!< recommended maximum age of
archived_lsn, before we start
asynchronous copying to the archive */
- ulint max_archived_lsn_age;
+ lsn_t max_archived_lsn_age;
/*!< maximum allowed age for
archived_lsn */
- ib_uint64_t next_archived_lsn;/*!< during an archive write,
+ lsn_t next_archived_lsn;/*!< during an archive write,
until the write is completed, we
store the next value for
archived_lsn here: the write
@@ -990,6 +1015,7 @@ struct log_struct{
should wait for this without owning
the log mutex */
ulint archive_buf_size;/*!< size of archive_buf */
+ byte* archive_buf_ptr;/*!< unaligned archived_buf */
byte* archive_buf; /*!< log segment is written to the
archive from this buffer */
os_event_t archiving_on; /*!< if archiving has been stopped,
@@ -997,7 +1023,7 @@ struct log_struct{
become signaled */
/* @} */
#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t tracked_lsn; /*!< log tracking has advanced to this
+ lsn_t tracked_lsn; /*!< log tracking has advanced to this
lsn. Field accessed atomically where
64-bit atomic ops are supported,
protected by the log sys mutex
diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic
index 0088df41225..7724d94b51a 100644
--- a/storage/xtradb/include/log0log.ic
+++ b/storage/xtradb/include/log0log.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,6 +26,9 @@ Created 12/9/1995 Heikki Tuuri
#include "os0file.h"
#include "mach0data.h"
#include "mtr0mtr.h"
+#include "srv0mon.h"
+#include "srv0srv.h"
+#include "ut0crc32.h"
#ifdef UNIV_LOG_DEBUG
/******************************************************//**
@@ -192,13 +195,13 @@ UNIV_INLINE
ulint
log_block_convert_lsn_to_no(
/*========================*/
- ib_uint64_t lsn) /*!< in: lsn of a byte within the block */
+ lsn_t lsn) /*!< in: lsn of a byte within the block */
{
return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1);
}
/************************************************************//**
-Calculates the checksum for a log block.
+Calculates the checksum for a log block using the current algorithm.
@return checksum */
UNIV_INLINE
ulint
@@ -206,6 +209,17 @@ log_block_calc_checksum(
/*====================*/
const byte* block) /*!< in: log block */
{
+ return(log_checksum_algorithm_ptr(block));
+}
+/************************************************************//**
+Calculates the checksum for a log block using the default InnoDB algorithm.
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_innodb(
+/*===========================*/
+ const byte* block) /*!< in: log block */
+{
ulint sum;
ulint sh;
ulint i;
@@ -228,6 +242,30 @@ log_block_calc_checksum(
}
/************************************************************//**
+Calculates the checksum for a log block using the CRC32 algorithm.
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_crc32(
+/*==========================*/
+ const byte* block) /*!< in: log block */
+{
+ return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE));
+}
+
+/************************************************************//**
+Calculates the checksum for a log block using the "no-op" algorithm.
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_none(
+/*=========================*/
+ const byte* block) /*!< in: log block */
+{
+ return(LOG_NO_CHECKSUM_MAGIC);
+}
+
+/************************************************************//**
Gets a log block checksum field value.
@return checksum */
UNIV_INLINE
@@ -260,8 +298,8 @@ UNIV_INLINE
void
log_block_init(
/*===========*/
- byte* log_block, /*!< in: pointer to the log buffer */
- ib_uint64_t lsn) /*!< in: lsn within the log block */
+ byte* log_block, /*!< in: pointer to the log buffer */
+ lsn_t lsn) /*!< in: lsn within the log block */
{
ulint no;
@@ -282,8 +320,8 @@ UNIV_INLINE
void
log_block_init_in_old_format(
/*=========================*/
- byte* log_block, /*!< in: pointer to the log buffer */
- ib_uint64_t lsn) /*!< in: lsn within the log block */
+ byte* log_block, /*!< in: pointer to the log buffer */
+ lsn_t lsn) /*!< in: lsn within the log block */
{
ulint no;
@@ -304,12 +342,12 @@ Writes to the log the string given. The log must be released with
log_release.
@return end lsn of the log record, zero if did not succeed */
UNIV_INLINE
-ib_uint64_t
+lsn_t
log_reserve_and_write_fast(
/*=======================*/
const void* str, /*!< in: string */
ulint len, /*!< in: string length */
- ib_uint64_t* start_lsn)/*!< out: start lsn of the log record */
+ lsn_t* start_lsn)/*!< out: start lsn of the log record */
{
ulint data_len;
#ifdef UNIV_LOG_LSN_DEBUG
@@ -374,6 +412,9 @@ log_reserve_and_write_fast(
log_sys->lsn += len;
+ MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
+ log_sys->lsn - log_sys->last_checkpoint_lsn);
+
#ifdef UNIV_LOG_DEBUG
log_check_log_recs(log_sys->buf + log_sys->old_buf_free,
log_sys->buf_free - log_sys->old_buf_free,
@@ -411,11 +452,11 @@ log_release(void)
Gets the current lsn.
@return current lsn */
UNIV_INLINE
-ib_uint64_t
+lsn_t
log_get_lsn(void)
/*=============*/
{
- ib_uint64_t lsn;
+ lsn_t lsn;
mutex_enter(&(log_sys->mutex));
@@ -450,13 +491,25 @@ Gets the log group capacity. It is OK to read the value without
holding log_sys->mutex because it is constant.
@return log group capacity */
UNIV_INLINE
-ulint
+lsn_t
log_get_capacity(void)
/*==================*/
{
return(log_sys->log_group_capacity);
}
+/****************************************************************
+Get log_sys::max_modified_age_async. It is OK to read the value without
+holding log_sys::mutex because it is constant.
+@return max_modified_age_async */
+UNIV_INLINE
+lsn_t
+log_get_max_modified_age_async(void)
+/*================================*/
+{
+ return(log_sys->max_modified_age_async);
+}
+
/***********************************************************************//**
Checks if there is need for a log buffer flush or a new checkpoint, and does
this if yes. Any database operation should call this when it has modified
diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h
index a20eef57d7a..1ef4df7d6da 100644
--- a/storage/xtradb/include/log0online.h
+++ b/storage/xtradb/include/log0online.h
@@ -26,6 +26,7 @@ Online database log parsing for changed page tracking
#include "univ.i"
#include "os0file.h"
+#include "log0log.h"
/** Single bitmap file information */
typedef struct log_online_bitmap_file_struct log_online_bitmap_file_t;
@@ -109,9 +110,9 @@ ibool
log_online_bitmap_iterator_init(
/*============================*/
log_bitmap_iterator_t *i, /*!<in/out: iterator */
- ib_uint64_t min_lsn, /*!<in: start LSN for the
+ lsn_t min_lsn, /*!<in: start LSN for the
iterator */
- ib_uint64_t max_lsn); /*!<in: end LSN for the
+ lsn_t max_lsn); /*!<in: end LSN for the
iterator */
/*********************************************************************//**
@@ -138,7 +139,7 @@ struct log_online_bitmap_file_struct {
char name[FN_REFLEN]; /*!< Name with full path */
os_file_t file; /*!< Handle to opened file */
ib_uint64_t size; /*!< Size of the file */
- ib_uint64_t offset; /*!< Offset of the next read,
+ os_offset_t offset; /*!< Offset of the next read,
or count of already-read bytes
*/
};
@@ -147,12 +148,12 @@ struct log_online_bitmap_file_struct {
struct log_online_bitmap_file_range_struct {
size_t count; /*!< Number of files */
/*!< Dynamically-allocated array of info about individual files */
- struct {
- char name[FN_REFLEN]; /*!< Name of a file */
- ib_uint64_t start_lsn; /*!< Starting LSN of
- data in this file */
- ulong seq_num; /*!< Sequence number of
- this file */
+ struct files_t {
+ char name[FN_REFLEN]; /*!< Name of a file */
+ lsn_t start_lsn; /*!< Starting LSN of data in
+ this file */
+ ulong seq_num; /*!< Sequence number of this
+ file */
} *files;
};
@@ -171,9 +172,9 @@ struct log_bitmap_iterator_struct
ib_uint32_t bit_offset; /*!< bit offset inside
the current bitmap
block */
- ib_uint64_t start_lsn; /*!< Start LSN of the
+ lsn_t start_lsn; /*!< Start LSN of the
current bitmap block */
- ib_uint64_t end_lsn; /*!< End LSN of the
+ lsn_t end_lsn; /*!< End LSN of the
current bitmap block */
ib_uint32_t space_id; /*!< Current block
space id */
diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h
index ad30f6862c2..a1653c10999 100644
--- a/storage/xtradb/include/log0recv.h
+++ b/storage/xtradb/include/log0recv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -50,7 +50,7 @@ UNIV_INTERN
ib_uint64_t
recv_calc_lsn_on_data_add(
/*======================*/
- ib_uint64_t lsn, /*!< in: old lsn */
+ lsn_t lsn, /*!< in: old lsn */
ib_uint64_t len); /*!< in: this many bytes of data is
added, log block headers not included */
@@ -62,19 +62,17 @@ Reads the checkpoint info needed in hot backup.
@return TRUE if success */
UNIV_INTERN
ibool
-recv_read_cp_info_for_backup(
-/*=========================*/
+recv_read_checkpoint_info_for_backup(
+/*=================================*/
const byte* hdr, /*!< in: buffer containing the log group
header */
- ib_uint64_t* lsn, /*!< out: checkpoint lsn */
- ulint* offset, /*!< out: checkpoint offset in the log group */
- ulint* fsp_limit,/*!< out: fsp limit of space 0,
- 1000000000 if the database is running
- with < version 3.23.50 of InnoDB */
- ib_uint64_t* cp_no, /*!< out: checkpoint number */
- ib_uint64_t* first_header_lsn);
+ lsn_t* lsn, /*!< out: checkpoint lsn */
+ lsn_t* offset, /*!< out: checkpoint offset in the log group */
+ lsn_t* cp_no, /*!< out: checkpoint number */
+ lsn_t* first_header_lsn)
/*!< out: lsn of of the start of the
first log file */
+ __attribute__((nonnull));
/*******************************************************************//**
Scans the log segment and n_bytes_scanned is set to the length of valid
log scanned. */
@@ -84,7 +82,7 @@ recv_scan_log_seg_for_backup(
/*=========================*/
byte* buf, /*!< in: buffer containing log data */
ulint buf_len, /*!< in: data length in that buffer */
- ib_uint64_t* scanned_lsn, /*!< in/out: lsn of buffer start,
+ lsn_t* scanned_lsn, /*!< in/out: lsn of buffer start,
we return scanned lsn */
ulint* scanned_checkpoint_no,
/*!< in/out: 4 lowest bytes of the
@@ -152,18 +150,18 @@ recv_recovery_from_checkpoint_finish should be called later to complete
the recovery and free the resources used in it.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
recv_recovery_from_checkpoint_start_func(
/*=====================================*/
#ifdef UNIV_LOG_ARCHIVE
ulint type, /*!< in: LOG_CHECKPOINT or
LOG_ARCHIVE */
- ib_uint64_t limit_lsn, /*!< in: recover up to this lsn
+ lsn_t limit_lsn, /*!< in: recover up to this lsn
if possible */
#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn from
+ lsn_t min_flushed_lsn,/*!< in: min flushed lsn from
data files */
- ib_uint64_t max_flushed_lsn);/*!< in: max flushed lsn from
+ lsn_t max_flushed_lsn);/*!< in: max flushed lsn from
data files */
#ifdef UNIV_LOG_ARCHIVE
/** Wrapper for recv_recovery_from_checkpoint_start_func().
@@ -239,11 +237,11 @@ recv_scan_log_recs(
const byte* buf, /*!< in: buffer containing a log
segment or garbage */
ulint len, /*!< in: buffer length */
- ib_uint64_t start_lsn, /*!< in: buffer start lsn */
- ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
+ lsn_t start_lsn, /*!< in: buffer start lsn */
+ lsn_t* contiguous_lsn, /*!< in/out: it is known that all log
groups contain contiguous log data up
to this lsn */
- ib_uint64_t* group_scanned_lsn);/*!< out: scanning succeeded up to
+ lsn_t* group_scanned_lsn);/*!< out: scanning succeeded up to
this lsn */
/******************************************************//**
Resets the logs. The contents of log files will be lost! */
@@ -251,18 +249,18 @@ UNIV_INTERN
void
recv_reset_logs(
/*============*/
- ib_uint64_t lsn, /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
#ifdef UNIV_LOG_ARCHIVE
ulint arch_log_no, /*!< in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created);/*!< in: TRUE if resetting logs
+ ibool new_logs_created,/*!< in: TRUE if resetting logs
is done at the log creation;
FALSE if it is done after
archive recovery */
+#endif /* UNIV_LOG_ARCHIVE */
+ lsn_t lsn); /*!< in: reset to this lsn
+ rounded up to be divisible by
+ OS_FILE_LOG_BLOCK_SIZE, after
+ which we add
+ LOG_BLOCK_HDR_SIZE */
#ifdef UNIV_HOTBACKUP
/******************************************************//**
Creates new log files after a backup has been restored. */
@@ -272,8 +270,8 @@ recv_reset_log_files_for_backup(
/*============================*/
const char* log_dir, /*!< in: log file directory path */
ulint n_log_files, /*!< in: number of log files */
- ulint log_file_size, /*!< in: log file size */
- ib_uint64_t lsn); /*!< in: new start lsn, must be
+ lsn_t log_file_size, /*!< in: log file size */
+ lsn_t lsn); /*!< in: new start lsn, must be
divisible by OS_FILE_LOG_BLOCK_SIZE */
#endif /* UNIV_HOTBACKUP */
/********************************************************//**
@@ -336,14 +334,14 @@ recv_apply_log_recs_for_backup(void);
Recovers from archived log files, and also from log files, if they exist.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
recv_recovery_from_archive_start(
/*=============================*/
- ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the
+ lsn_t min_flushed_lsn,/*!< in: min flushed lsn field from the
data files */
- ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if
+ lsn_t limit_lsn, /*!< in: recover up to this lsn if
possible */
- ulint first_log_no); /*!< in: number of the first archived
+ lsn_t first_log_no); /*!< in: number of the first archived
log file to use in the recovery; the
file will be searched from
INNOBASE_LOG_ARCH_DIR specified in
@@ -357,9 +355,7 @@ recv_recovery_from_archive_finish(void);
#endif /* UNIV_LOG_ARCHIVE */
/** Block of log record data */
-typedef struct recv_data_struct recv_data_t;
-/** Block of log record data */
-struct recv_data_struct{
+struct recv_data_t{
recv_data_t* next; /*!< pointer to the next block or NULL */
/*!< the log record data is stored physically
immediately after this struct, max amount
@@ -367,18 +363,16 @@ struct recv_data_struct{
};
/** Stored log record struct */
-typedef struct recv_struct recv_t;
-/** Stored log record struct */
-struct recv_struct{
+struct recv_t{
byte type; /*!< log record type */
ulint len; /*!< log record body length in bytes */
recv_data_t* data; /*!< chain of blocks containing the log record
body */
- ib_uint64_t start_lsn;/*!< start lsn of the log segment written by
+ lsn_t start_lsn;/*!< start lsn of the log segment written by
the mtr which generated this log record: NOTE
that this is not necessarily the start lsn of
this log record */
- ib_uint64_t end_lsn;/*!< end lsn of the log segment written by
+ lsn_t end_lsn;/*!< end lsn of the log segment written by
the mtr which generated this log record: NOTE
that this is not necessarily the end lsn of
this log record */
@@ -386,7 +380,7 @@ struct recv_struct{
rec_list;/*!< list of log records for this page */
};
-/** States of recv_addr_struct */
+/** States of recv_addr_t */
enum recv_addr_state {
/** not yet processed */
RECV_NOT_PROCESSED,
@@ -400,9 +394,7 @@ enum recv_addr_state {
};
/** Hashed page file address struct */
-typedef struct recv_addr_struct recv_addr_t;
-/** Hashed page file address struct */
-struct recv_addr_struct{
+struct recv_addr_t{
enum recv_addr_state state;
/*!< recovery state of the page */
unsigned space:32;/*!< space id */
@@ -413,13 +405,14 @@ struct recv_addr_struct{
};
/** Recovery system data structure */
-typedef struct recv_sys_struct recv_sys_t;
-/** Recovery system data structure */
-struct recv_sys_struct{
+struct recv_sys_t{
#ifndef UNIV_HOTBACKUP
- mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
+ ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
n_addrs, and the state field in each recv_addr
struct */
+ ib_mutex_t writer_mutex;/*!< mutex coordinating
+ flushing between recv_writer_thread and
+ the recovery thread. */
#endif /* !UNIV_HOTBACKUP */
ibool apply_log_recs;
/*!< this is TRUE when log rec application to
@@ -429,7 +422,7 @@ struct recv_sys_struct{
ibool apply_batch_on;
/*!< this is TRUE when a log rec application
batch is running */
- ib_uint64_t lsn; /*!< log sequence number */
+ lsn_t lsn; /*!< log sequence number */
ulint last_log_buf_size;
/*!< size of the log buffer when the database
last time wrote to the log */
@@ -441,12 +434,12 @@ struct recv_sys_struct{
preceding buffer */
byte* buf; /*!< buffer for parsing log records */
ulint len; /*!< amount of data in buf */
- ib_uint64_t parse_start_lsn;
+ lsn_t parse_start_lsn;
/*!< this is the lsn from which we were able to
start parsing log records and adding them to
the hash table; zero if a suitable
start point not found yet */
- ib_uint64_t scanned_lsn;
+ lsn_t scanned_lsn;
/*!< the log data has been scanned up to this
lsn */
ulint scanned_checkpoint_no;
@@ -455,10 +448,10 @@ struct recv_sys_struct{
ulint recovered_offset;
/*!< start offset of non-parsed log records in
buf */
- ib_uint64_t recovered_lsn;
+ lsn_t recovered_lsn;
/*!< the log records have been parsed up to
this lsn */
- ib_uint64_t limit_lsn;/*!< recovery should be made at most
+ lsn_t limit_lsn;/*!< recovery should be made at most
up to this lsn */
ibool found_corrupt_log;
/*!< this is set to TRUE if we during log
@@ -475,39 +468,6 @@ struct recv_sys_struct{
hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
ulint n_addrs;/*!< number of not processed hashed file
addresses in the hash table */
-
-/* If you modified the following defines at original file,
- You should also modify them. */
-/* defined in os0file.c */
-#define OS_AIO_MERGE_N_CONSECUTIVE 64
-/* defined in log0recv.c */
-#define RECV_READ_AHEAD_AREA 32
- time_t stats_recv_start_time;
- ulint stats_recv_turns;
-
- ulint stats_read_requested_pages;
- ulint stats_read_in_area[RECV_READ_AHEAD_AREA];
-
- ulint stats_read_io_pages;
- ulint stats_read_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE];
- ulint stats_write_io_pages;
- ulint stats_write_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE];
-
- ulint stats_doublewrite_check_pages;
- ulint stats_doublewrite_overwrite_pages;
-
- ulint stats_recover_pages_with_read;
- ulint stats_recover_pages_without_read;
-
- ulint stats_log_recs;
- ulint stats_log_len_sum;
-
- ulint stats_applied_log_recs;
- ulint stats_applied_log_len_sum;
- ulint stats_pages_already_new;
-
- ib_uint64_t stats_oldest_modified_lsn;
- ib_uint64_t stats_newest_modified_lsn;
};
/** The recovery system */
diff --git a/storage/xtradb/include/log0recv.ic b/storage/xtradb/include/log0recv.ic
index 62fd5c18e30..32c28dd03e6 100644
--- a/storage/xtradb/include/log0recv.ic
+++ b/storage/xtradb/include/log0recv.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -33,7 +33,7 @@ ibool
recv_recovery_is_on(void)
/*=====================*/
{
- return(UNIV_UNLIKELY(recv_recovery_on));
+ return(recv_recovery_on);
}
#ifdef UNIV_LOG_ARCHIVE
diff --git a/storage/xtradb/include/mach0data.h b/storage/xtradb/include/mach0data.h
index 81c0866f367..d0087f56aaa 100644
--- a/storage/xtradb/include/mach0data.h
+++ b/storage/xtradb/include/mach0data.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,6 +27,8 @@ Created 11/28/1995 Heikki Tuuri
#ifndef mach0data_h
#define mach0data_h
+#ifndef UNIV_INNOCHECKSUM
+
#include "univ.i"
#include "ut0byte.h"
@@ -204,7 +206,7 @@ UNIV_INLINE
void
mach_write_to_8(
/*============*/
- byte* b, /*!< in: pointer to 8 bytes where to store */
+ void* b, /*!< in: pointer to 8 bytes where to store */
ib_uint64_t n); /*!< in: 64-bit integer to be stored */
/********************************************************//**
The following function is used to fetch data from 8 consecutive
@@ -361,19 +363,53 @@ mach_write_to_2_little_endian(
/*==========================*/
byte* dest, /*!< in: where to write */
ulint n); /*!< in: unsigned long int to write */
-
/*********************************************************//**
Convert integral type from storage byte order (big endian) to
host byte order.
@return integer value */
UNIV_INLINE
-ullint
+ib_uint64_t
mach_read_int_type(
/*===============*/
const byte* src, /*!< in: where to read from */
ulint len, /*!< in: length of src */
ibool unsigned_type); /*!< in: signed or unsigned flag */
+/***********************************************************//**
+Convert integral type from host byte order to (big-endian) storage
+byte order. */
+UNIV_INLINE
+void
+mach_write_int_type(
+/*================*/
+ byte* dest, /*!< in: where to write*/
+ const byte* src, /*!< in: where to read from */
+ ulint len, /*!< in: length of src */
+ bool usign); /*!< in: signed or unsigned flag */
+
+/*************************************************************
+Convert a ulonglong integer from host byte order to (big-endian)
+storage byte order. */
+UNIV_INLINE
+void
+mach_write_ulonglong(
+/*=================*/
+ byte* dest, /*!< in: where to write */
+ ulonglong src, /*!< in: where to read from */
+ ulint len, /*!< in: length of dest */
+ bool usign); /*!< in: signed or unsigned flag */
+
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return value read */
+UNIV_INLINE
+ulint
+mach_read_ulint(
+/*============*/
+ const byte* ptr, /*!< in: pointer from where to read */
+ ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+
#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
#ifndef UNIV_NONINL
#include "mach0data.ic"
diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic
index 238a56577af..27b9f62b552 100644
--- a/storage/xtradb/include/mach0data.ic
+++ b/storage/xtradb/include/mach0data.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -24,6 +24,8 @@ to the machine format.
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
+#ifndef UNIV_INNOCHECKSUM
+
#include "ut0mem.h"
/*******************************************************//**
@@ -38,7 +40,7 @@ mach_write_to_1(
ut_ad(b);
ut_ad((n | 0xFFUL) <= 0xFFUL);
- b[0] = (byte)n;
+ b[0] = (byte) n;
}
/********************************************************//**
@@ -72,19 +74,6 @@ mach_write_to_2(
}
/********************************************************//**
-The following function is used to fetch data from 2 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_2(
-/*=============*/
- const byte* b) /*!< in: pointer to 2 bytes */
-{
- return(((ulint)(b[0]) << 8) | (ulint)(b[1]));
-}
-
-/********************************************************//**
The following function is used to convert a 16-bit data item
to the canonical format, for fast bytewise equality test
against memory.
@@ -165,7 +154,22 @@ mach_write_to_4(
b[0] = (byte)(n >> 24);
b[1] = (byte)(n >> 16);
b[2] = (byte)(n >> 8);
- b[3] = (byte)n;
+ b[3] = (byte) n;
+}
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/********************************************************//**
+The following function is used to fetch data from 2 consecutive
+bytes. The most significant byte is at the lowest address.
+@return ulint integer */
+UNIV_INLINE
+ulint
+mach_read_from_2(
+/*=============*/
+ const byte* b) /*!< in: pointer to 2 bytes */
+{
+ return(((ulint)(b[0]) << 8) | (ulint)(b[1]));
}
/********************************************************//**
@@ -186,6 +190,8 @@ mach_read_from_4(
);
}
+#ifndef UNIV_INNOCHECKSUM
+
/*********************************************************//**
Writes a ulint in a compressed form where the first byte codes the
length of the stored ulint. We look at the most significant bits of
@@ -280,13 +286,13 @@ UNIV_INLINE
void
mach_write_to_8(
/*============*/
- byte* b, /*!< in: pointer to 8 bytes where to store */
+ void* b, /*!< in: pointer to 8 bytes where to store */
ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
ut_ad(b);
- mach_write_to_4(b, (ulint) (n >> 32));
- mach_write_to_4(b + 4, (ulint) n);
+ mach_write_to_4(static_cast<byte*>(b), (ulint) (n >> 32));
+ mach_write_to_4(static_cast<byte*>(b) + 4, (ulint) n);
}
/********************************************************//**
@@ -550,7 +556,7 @@ mach_double_read(
ulint i;
byte* ptr;
- ptr = (byte*)&d;
+ ptr = (byte*) &d;
for (i = 0; i < sizeof(double); i++) {
#ifdef WORDS_BIGENDIAN
@@ -575,7 +581,7 @@ mach_double_write(
ulint i;
byte* ptr;
- ptr = (byte*)&d;
+ ptr = (byte*) &d;
for (i = 0; i < sizeof(double); i++) {
#ifdef WORDS_BIGENDIAN
@@ -599,7 +605,7 @@ mach_float_read(
ulint i;
byte* ptr;
- ptr = (byte*)&d;
+ ptr = (byte*) &d;
for (i = 0; i < sizeof(float); i++) {
#ifdef WORDS_BIGENDIAN
@@ -624,7 +630,7 @@ mach_float_write(
ulint i;
byte* ptr;
- ptr = (byte*)&d;
+ ptr = (byte*) &d;
for (i = 0; i < sizeof(float); i++) {
#ifdef WORDS_BIGENDIAN
@@ -648,7 +654,6 @@ mach_read_from_n_little_endian(
ulint n = 0;
const byte* ptr;
- ut_ad(buf_size <= sizeof(ulint));
ut_ad(buf_size > 0);
ptr = buf + buf_size;
@@ -736,7 +741,7 @@ Convert integral type from storage byte order (big endian) to
host byte order.
@return integer value */
UNIV_INLINE
-ullint
+ib_uint64_t
mach_read_int_type(
/*===============*/
const byte* src, /*!< in: where to read from */
@@ -771,4 +776,106 @@ mach_read_int_type(
return(ret);
}
+/*********************************************************//**
+Swap byte ordering. */
+UNIV_INLINE
+void
+mach_swap_byte_order(
+/*=================*/
+ byte* dest, /*!< out: where to write */
+ const byte* from, /*!< in: where to read from */
+ ulint len) /*!< in: length of src */
+{
+ ut_ad(len > 0);
+ ut_ad(len <= 8);
+
+ dest += len;
+
+ switch (len & 0x7) {
+ case 0: *--dest = *from++;
+ case 7: *--dest = *from++;
+ case 6: *--dest = *from++;
+ case 5: *--dest = *from++;
+ case 4: *--dest = *from++;
+ case 3: *--dest = *from++;
+ case 2: *--dest = *from++;
+ case 1: *--dest = *from;
+ }
+}
+
+/*************************************************************
+Convert integral type from host byte order (big-endian) storage
+byte order. */
+UNIV_INLINE
+void
+mach_write_int_type(
+/*================*/
+ byte* dest, /*!< in: where to write */
+ const byte* src, /*!< in: where to read from */
+ ulint len, /*!< in: length of src */
+ bool usign) /*!< in: signed or unsigned flag */
+{
+#ifdef WORDS_BIGENDIAN
+ memcpy(dest, src, len);
+#else
+ mach_swap_byte_order(dest, src, len);
+#endif /* WORDS_BIGENDIAN */
+
+ if (!usign) {
+ *dest ^= 0x80;
+ }
+}
+
+/*************************************************************
+Convert a ulonglong integer from host byte order to (big-endian)
+storage byte order. */
+UNIV_INLINE
+void
+mach_write_ulonglong(
+/*=================*/
+ byte* dest, /*!< in: where to write */
+ ulonglong src, /*!< in: where to read from */
+ ulint len, /*!< in: length of dest */
+ bool usign) /*!< in: signed or unsigned flag */
+{
+ byte* ptr = reinterpret_cast<byte*>(&src);
+
+ ut_ad(len <= sizeof(ulonglong));
+
+#ifdef WORDS_BIGENDIAN
+ memcpy(dest, ptr + (sizeof(src) - len), len);
+#else
+ mach_swap_byte_order(dest, reinterpret_cast<byte*>(ptr), len);
+#endif /* WORDS_BIGENDIAN */
+
+ if (!usign) {
+ *dest ^= 0x80;
+ }
+}
+
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return value read */
+UNIV_INLINE
+ulint
+mach_read_ulint(
+/*============*/
+ const byte* ptr, /*!< in: pointer from where to read */
+ ulint type) /*!< in: 1,2 or 4 bytes */
+{
+ switch (type) {
+ case 1:
+ return(mach_read_from_1(ptr));
+ case 2:
+ return(mach_read_from_2(ptr));
+ case 4:
+ return(mach_read_from_4(ptr));
+ default:
+ ut_error;
+ }
+
+ return(0);
+}
+
#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/xtradb/include/mem0dbg.h b/storage/xtradb/include/mem0dbg.h
index 1c387706c98..cc339b82910 100644
--- a/storage/xtradb/include/mem0dbg.h
+++ b/storage/xtradb/include/mem0dbg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -31,8 +31,8 @@ check fields whose sizes are given below */
# ifndef UNIV_HOTBACKUP
/* The mutex which protects in the debug version the hash table
containing the list of live memory heaps, and also the global
-variables in mem0dbg.c. */
-extern mutex_t mem_hash_mutex;
+variables in mem0dbg.cc. */
+extern ib_mutex_t mem_hash_mutex;
# endif /* !UNIV_HOTBACKUP */
#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
diff --git a/storage/xtradb/include/mem0dbg.ic b/storage/xtradb/include/mem0dbg.ic
index 72c63e0a4c4..ec60ed35337 100644
--- a/storage/xtradb/include/mem0dbg.ic
+++ b/storage/xtradb/include/mem0dbg.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/mem0mem.h b/storage/xtradb/include/mem0mem.h
index 7dff3e7a2b8..c36ef06b554 100644
--- a/storage/xtradb/include/mem0mem.h
+++ b/storage/xtradb/include/mem0mem.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -38,15 +38,12 @@ Created 6/9/1994 Heikki Tuuri
/* -------------------- MEMORY HEAPS ----------------------------- */
-/* The info structure stored at the beginning of a heap block */
-typedef struct mem_block_info_struct mem_block_info_t;
-
/* A block of a memory heap consists of the info structure
followed by an area of memory */
-typedef mem_block_info_t mem_block_t;
+typedef struct mem_block_info_t mem_block_t;
/* A memory heap is a nonempty linear list of memory blocks */
-typedef mem_block_t mem_heap_t;
+typedef mem_block_t mem_heap_t;
/* Types of allocation for memory heaps: DYNAMIC means allocation from the
dynamic memory pool of the C compiler, BUFFER means allocation from the
@@ -62,6 +59,12 @@ buffer pool; the latter method is used for very big heaps */
allocation functions can return
NULL. */
+/* Different type of heaps in terms of which datastructure is using them */
+#define MEM_HEAP_FOR_BTR_SEARCH (MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
+#define MEM_HEAP_FOR_PAGE_HASH (MEM_HEAP_DYNAMIC)
+#define MEM_HEAP_FOR_RECV_SYS (MEM_HEAP_BUFFER)
+#define MEM_HEAP_FOR_LOCK_HEAP (MEM_HEAP_BUFFER)
+
/* The following start size is used for the first block in the memory heap if
the size is not specified, i.e., 0 is given as the parameter in the call of
create. The standard size is the maximum (payload) size of the blocks used for
@@ -99,16 +102,8 @@ heap creation. */
Use this macro instead of the corresponding function! Macro for memory
heap creation. */
-#define mem_heap_create_in_buffer(N) mem_heap_create_func(\
- (N), MEM_HEAP_BUFFER, __FILE__, __LINE__)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\
- (N), MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\
- __FILE__, __LINE__)
-
+#define mem_heap_create_typed(N, T) mem_heap_create_func(\
+ (N), (T), __FILE__, __LINE__)
/**************************************************************//**
Use this macro instead of the corresponding function! Macro for memory
heap freeing. */
@@ -221,7 +216,7 @@ mem_heap_get_size(
Use this macro instead of the corresponding function!
Macro for memory buffer allocation */
-#define mem_zalloc(N) memset(mem_alloc(N), 0, (N));
+#define mem_zalloc(N) memset(mem_alloc(N), 0, (N))
#define mem_alloc(N) mem_alloc_func((N), NULL, __FILE__, __LINE__)
#define mem_alloc2(N,S) mem_alloc_func((N), (S), __FILE__, __LINE__)
@@ -320,7 +315,7 @@ mem_heap_dup(
ulint len); /*!< in: length of data, in bytes */
/****************************************************************//**
-A simple (s)printf replacement that dynamically allocates the space for the
+A simple sprintf replacement that dynamically allocates the space for the
formatted string from the given heap. This supports a very limited set of
the printf syntax: types 's' and 'u' and length modifier 'l' (which is
required for the 'u' type).
@@ -345,9 +340,8 @@ mem_validate_all_blocks(void);
/*#######################################################################*/
-/* The info header of a block in a memory heap */
-
-struct mem_block_info_struct {
+/** The info structure stored at the beginning of a heap block */
+struct mem_block_info_t {
ulint magic_n;/* magic number for debugging */
char file_name[8];/* file name where the mem heap was created */
ulint line; /*!< line number where the mem heap was created */
diff --git a/storage/xtradb/include/mem0mem.ic b/storage/xtradb/include/mem0mem.ic
index 6b2e35d7387..7f0e128cc40 100644
--- a/storage/xtradb/include/mem0mem.ic
+++ b/storage/xtradb/include/mem0mem.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -193,7 +193,7 @@ mem_heap_alloc(
free = mem_block_get_free(block);
- buf = (byte*)block + free;
+ buf = (byte*) block + free;
mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
@@ -202,11 +202,11 @@ mem_heap_alloc(
n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
/* In the debug version write debugging info to the field */
- mem_field_init((byte*)buf, n);
+ mem_field_init((byte*) buf, n);
/* Advance buf to point at the storage which will be given to the
caller */
- buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
+ buf = (byte*) buf + MEM_FIELD_HEADER_SIZE;
#endif
UNIV_MEM_ALLOC(buf, n);
@@ -229,7 +229,7 @@ mem_heap_get_heap_top(
block = UT_LIST_GET_LAST(heap->base);
- buf = (byte*)block + mem_block_get_free(block);
+ buf = (byte*) block + mem_block_get_free(block);
return(buf);
}
@@ -247,16 +247,13 @@ mem_heap_free_heap_top(
{
mem_block_t* block;
mem_block_t* prev_block;
-#ifdef UNIV_MEM_DEBUG
+#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
ibool error;
ulint total_size;
ulint size;
-#endif
ut_ad(mem_heap_check(heap));
-#ifdef UNIV_MEM_DEBUG
-
/* Validate the heap and get its total allocated size */
mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
NULL, NULL);
@@ -272,8 +269,8 @@ mem_heap_free_heap_top(
block = UT_LIST_GET_LAST(heap->base);
while (block != NULL) {
- if (((byte*)block + mem_block_get_free(block) >= old_top)
- && ((byte*)block <= old_top)) {
+ if (((byte*) block + mem_block_get_free(block) >= old_top)
+ && ((byte*) block <= old_top)) {
/* Found the right block */
break;
@@ -292,22 +289,20 @@ mem_heap_free_heap_top(
ut_ad(block);
/* Set the free field of block */
- mem_block_set_free(block, old_top - (byte*)block);
+ mem_block_set_free(block, old_top - (byte*) block);
-#ifdef UNIV_MEM_DEBUG
ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
+ UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
+#if defined UNIV_MEM_DEBUG
/* In the debug version erase block from top up */
- mem_erase_buf(old_top, (byte*)block + block->len - old_top);
+ mem_erase_buf(old_top, (byte*) block + block->len - old_top);
/* Update allocated memory count */
mutex_enter(&mem_hash_mutex);
mem_current_allocated_memory -= (total_size - size);
mutex_exit(&mem_hash_mutex);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_W(old_top, (byte*)block + block->len - old_top);
#endif /* UNIV_MEM_DEBUG */
- UNIV_MEM_ALLOC(old_top, (byte*)block + block->len - old_top);
+ UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top);
/* If free == start, we may free the block if it is not the first
one */
@@ -326,7 +321,7 @@ mem_heap_empty(
/*===========*/
mem_heap_t* heap) /*!< in: heap to empty */
{
- mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap));
+ mem_heap_free_heap_top(heap, (byte*) heap + mem_block_get_start(heap));
#ifndef UNIV_HOTBACKUP
if (heap->free_block) {
mem_heap_free_block_free(heap);
@@ -394,7 +389,7 @@ mem_heap_free_top(
ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
/* In the debug version check the consistency, and erase field */
- mem_field_erase((byte*)block + mem_block_get_free(block), n);
+ mem_field_erase((byte*) block + mem_block_get_free(block), n);
#endif
/* If free == start, we may free the block if it is not the first
@@ -529,7 +524,7 @@ mem_alloc_func(
first block and thus we can calculate the pointer to the heap from
the pointer to the buffer when we free the memory buffer. */
- if (UNIV_LIKELY_NULL(size)) {
+ if (size) {
/* Adjust the allocation to the actual size of the
memory block. */
ulint m = mem_block_get_len(heap)
@@ -538,12 +533,13 @@ mem_alloc_func(
m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
#endif /* UNIV_MEM_DEBUG */
ut_ad(m >= n);
- *size = n = m;
+ n = m;
+ *size = m;
}
buf = mem_heap_alloc(heap, n);
- ut_a((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE
+ ut_a((byte*) heap == (byte*) buf - MEM_BLOCK_HEADER_SIZE
- MEM_FIELD_HEADER_SIZE);
return(buf);
}
@@ -562,7 +558,7 @@ mem_free_func(
{
mem_heap_t* heap;
- heap = (mem_heap_t*)((byte*)ptr - MEM_BLOCK_HEADER_SIZE
+ heap = (mem_heap_t*)((byte*) ptr - MEM_BLOCK_HEADER_SIZE
- MEM_FIELD_HEADER_SIZE);
mem_heap_free_func(heap, file_name, line);
}
diff --git a/storage/xtradb/include/mem0pool.h b/storage/xtradb/include/mem0pool.h
index 26bac1c814b..a65ba50fdf9 100644
--- a/storage/xtradb/include/mem0pool.h
+++ b/storage/xtradb/include/mem0pool.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -30,17 +30,14 @@ Created 6/9/1994 Heikki Tuuri
#include "os0file.h"
#include "ut0lst.h"
-/** Memory area header */
-typedef struct mem_area_struct mem_area_t;
/** Memory pool */
-typedef struct mem_pool_struct mem_pool_t;
+struct mem_pool_t;
/** The common memory pool */
extern mem_pool_t* mem_comm_pool;
/** Memory area header */
-
-struct mem_area_struct{
+struct mem_area_t{
ulint size_and_free; /*!< memory area size is obtained by
anding with ~MEM_AREA_FREE; area in
a free list if ANDing with
@@ -50,7 +47,7 @@ struct mem_area_struct{
};
/** Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_struct),\
+#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_t),\
UNIV_MEM_ALIGNMENT))
/********************************************************************//**
diff --git a/storage/xtradb/include/mem0pool.ic b/storage/xtradb/include/mem0pool.ic
index f0e724648a1..f4bafb8ba63 100644
--- a/storage/xtradb/include/mem0pool.ic
+++ b/storage/xtradb/include/mem0pool.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/mtr0log.h b/storage/xtradb/include/mtr0log.h
index 8cccb982b48..18a345d050f 100644
--- a/storage/xtradb/include/mtr0log.h
+++ b/storage/xtradb/include/mtr0log.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -32,8 +32,8 @@ Created 12/7/1995 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
/********************************************************//**
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log if mtr is not NULL. */
UNIV_INTERN
void
mlog_write_ulint(
@@ -43,8 +43,8 @@ mlog_write_ulint(
byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************//**
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 8 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log, only if mtr is not NULL */
UNIV_INTERN
void
mlog_write_ull(
@@ -168,7 +168,7 @@ mlog_write_initial_log_record_fast(
mtr_t* mtr); /*!< in: mtr */
#else /* !UNIV_HOTBACKUP */
# define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0)
-# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte *) 0)
+# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte*) 0)
#endif /* !UNIV_HOTBACKUP */
/********************************************************//**
Parses an initial log record written by mlog_write_initial_log_record.
@@ -217,12 +217,13 @@ UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* rec, /*!< in: index record or page */
- dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
- ulint size); /*!< in: requested buffer size in bytes
- (if 0, calls mlog_close() and returns NULL) */
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* rec, /*!< in: index record or page */
+ const dict_index_t* index, /*!< in: record descriptor */
+ byte type, /*!< in: log item type */
+ ulint size); /*!< in: requested buffer size in bytes
+ (if 0, calls mlog_close() and
+ returns NULL) */
#endif /* !UNIV_HOTBACKUP */
/********************************************************//**
diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic
index 5ef3f915b94..bc49f655294 100644
--- a/storage/xtradb/include/mtr0log.ic
+++ b/storage/xtradb/include/mtr0log.ic
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,9 +26,11 @@ Created 12/7/1995 Heikki Tuuri
#include "mach0data.h"
#include "ut0lst.h"
#include "buf0buf.h"
+#include "buf0dblwr.h"
#include "fsp0types.h"
-#include "srv0srv.h"
+#include "btr0types.h"
#include "trx0sys.h"
+
/********************************************************//**
Opens a buffer to mlog. It must be closed with mlog_close.
@return buffer, NULL if log mode MTR_LOG_NONE */
@@ -201,10 +203,9 @@ mlog_write_initial_log_record_fast(
the doublewrite buffer is located in pages
FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
system tablespace */
- if ((space == TRX_SYS_SPACE
- || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
- && offset >= (ulint)FSP_EXTENT_SIZE && offset < 3 * (ulint)FSP_EXTENT_SIZE) {
- if (trx_doublewrite_buf_is_being_created) {
+ if (space == TRX_SYS_SPACE
+ && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
+ if (buf_dblwr_being_created) {
/* Do nothing: we only come to this branch in an
InnoDB database creation. We do not redo log
anything for the doublewrite buffer pages. */
diff --git a/storage/xtradb/include/mtr0mtr.h b/storage/xtradb/include/mtr0mtr.h
index 031fccd300c..fd0fb66c464 100644
--- a/storage/xtradb/include/mtr0mtr.h
+++ b/storage/xtradb/include/mtr0mtr.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +40,7 @@ Created 11/26/1995 Heikki Tuuri
#define MTR_LOG_ALL 21 /* default mode: log all operations
modifying disk-based data */
#define MTR_LOG_NONE 22 /* log no operations */
+#define MTR_LOG_NO_REDO 23 /* Don't generate REDO */
/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying
file space page allocation data
(operations in fsp0fsp.* ) */
@@ -180,7 +182,11 @@ For 1 - 8 bytes, the flag value must give the length also! @{ */
#define MLOG_ZIP_WRITE_HEADER ((byte)50) /*!< write to compressed page
header */
#define MLOG_ZIP_PAGE_COMPRESS ((byte)51) /*!< compress an index page */
-#define MLOG_BIGGEST_TYPE ((byte)51) /*!< biggest value (used in
+#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA ((byte)52)/*!< compress an index page
+ without logging it's image */
+#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53) /*!< reorganize a compressed
+ page */
+#define MLOG_BIGGEST_TYPE ((byte)53) /*!< biggest value (used in
assertions) */
/* @} */
@@ -191,6 +197,9 @@ functions). The page number parameter was originally written as 0. @{ */
MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
/* @} */
+/* included here because it needs MLOG_LSN defined */
+#include "log0log.h"
+
/***************************************************************//**
Starts a mini-transaction. */
UNIV_INLINE
@@ -225,7 +234,7 @@ mtr_release_s_latch_at_savepoint(
/*=============================*/
mtr_t* mtr, /*!< in: mtr */
ulint savepoint, /*!< in: savepoint */
- rw_lock_t* lock); /*!< in: latch to release */
+ prio_rw_lock_t* lock); /*!< in: latch to release */
#else /* !UNIV_HOTBACKUP */
# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
#endif /* !UNIV_HOTBACKUP */
@@ -272,7 +281,7 @@ UNIV_INLINE
void
mtr_s_lock_func(
/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
+ prio_rw_lock_t* lock, /*!< in: rw-lock */
const char* file, /*!< in: file name */
ulint line, /*!< in: line number */
mtr_t* mtr); /*!< in: mtr */
@@ -283,16 +292,17 @@ UNIV_INLINE
void
mtr_x_lock_func(
/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
+ prio_rw_lock_t* lock, /*!< in: rw-lock */
const char* file, /*!< in: file name */
ulint line, /*!< in: line number */
mtr_t* mtr); /*!< in: mtr */
#endif /* !UNIV_HOTBACKUP */
/***************************************************//**
-Releases an object in the memo stack. */
+Releases an object in the memo stack.
+@return true if released */
UNIV_INTERN
-void
+bool
mtr_memo_release(
/*=============*/
mtr_t* mtr, /*!< in/out: mini-transaction */
@@ -357,28 +367,27 @@ mtr_memo_push(
void* object, /*!< in: object */
ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-
-/* Type definition of a mini-transaction memo stack slot. */
-typedef struct mtr_memo_slot_struct mtr_memo_slot_t;
-struct mtr_memo_slot_struct{
+/** Mini-transaction memo stack slot. */
+struct mtr_memo_slot_t{
ulint type; /*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
void* object; /*!< pointer to the object */
};
/* Mini-transaction handle and buffer */
-struct mtr_struct{
+struct mtr_t{
#ifdef UNIV_DEBUG
ulint state; /*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
#endif
dyn_array_t memo; /*!< memo stack for locks etc. */
dyn_array_t log; /*!< mini-transaction log */
- ibool inside_ibuf;
+ unsigned inside_ibuf:1;
/*!< TRUE if inside ibuf changes */
- ibool modifications;
- /* TRUE if the mtr made modifications to
- buffer pool pages */
- ibool made_dirty;/*!< TRUE if mtr has made at least
- one buffer pool page dirty */
+ unsigned modifications:1;
+ /*!< TRUE if the mini-transaction
+ modified buffer pool pages */
+ unsigned made_dirty:1;
+ /*!< TRUE if mtr has made at least
+ one buffer pool page dirty */
ulint n_log_recs;
/* count of how many page initial log records
have been written to the mtr log */
@@ -387,9 +396,9 @@ struct mtr_struct{
this mini-transaction */
ulint log_mode; /* specifies which operations should be
logged; default value MTR_LOG_ALL */
- ib_uint64_t start_lsn;/* start lsn of the possible log entry for
+ lsn_t start_lsn;/* start lsn of the possible log entry for
this mtr */
- ib_uint64_t end_lsn;/* end lsn of the possible log entry for
+ lsn_t end_lsn;/* end lsn of the possible log entry for
this mtr */
#ifdef UNIV_DEBUG
ulint magic_n;
diff --git a/storage/xtradb/include/mtr0mtr.ic b/storage/xtradb/include/mtr0mtr.ic
index 7b5d268b70f..4fe23c460ab 100644
--- a/storage/xtradb/include/mtr0mtr.ic
+++ b/storage/xtradb/include/mtr0mtr.ic
@@ -39,7 +39,6 @@ mtr_block_dirtied(
const buf_block_t* block) /*!< in: block being x-fixed */
__attribute__((nonnull,warn_unused_result));
-
/***************************************************************//**
Starts a mini-transaction. */
UNIV_INLINE
@@ -54,11 +53,11 @@ mtr_start(
dyn_array_create(&(mtr->log));
mtr->log_mode = MTR_LOG_ALL;
- mtr->modifications = FALSE;
mtr->inside_ibuf = FALSE;
+ mtr->modifications = FALSE;
+ mtr->made_dirty = FALSE;
mtr->n_log_recs = 0;
mtr->n_freed_pages = 0;
- mtr->made_dirty = FALSE;
ut_d(mtr->state = MTR_ACTIVE);
ut_d(mtr->magic_n = MTR_MAGIC_N);
@@ -77,22 +76,22 @@ mtr_memo_push(
dyn_array_t* memo;
mtr_memo_slot_t* slot;
+ ut_ad(object);
+ ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
+ ut_ad(type <= MTR_MEMO_X_LOCK);
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
/* If this mtr has x-fixed a clean page then we set
the made_dirty flag. This tells us if we need to
grab log_flush_order_mutex at mtr_commit so that we
can insert the dirtied page to the flush list. */
if (type == MTR_MEMO_PAGE_X_FIX && !mtr->made_dirty) {
mtr->made_dirty =
- mtr_block_dirtied((const buf_block_t *)object);
+ mtr_block_dirtied((const buf_block_t*) object);
}
- ut_ad(object);
- ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_X_LOCK);
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
memo = &(mtr->memo);
slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
@@ -131,7 +130,7 @@ mtr_release_s_latch_at_savepoint(
/*=============================*/
mtr_t* mtr, /*!< in: mtr */
ulint savepoint, /*!< in: savepoint */
- rw_lock_t* lock) /*!< in: latch to release */
+ prio_rw_lock_t* lock) /*!< in: latch to release */
{
mtr_memo_slot_t* slot;
dyn_array_t* memo;
@@ -262,7 +261,7 @@ UNIV_INLINE
void
mtr_s_lock_func(
/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
+ prio_rw_lock_t* lock, /*!< in: rw-lock */
const char* file, /*!< in: file name */
ulint line, /*!< in: line number */
mtr_t* mtr) /*!< in: mtr */
@@ -281,7 +280,7 @@ UNIV_INLINE
void
mtr_x_lock_func(
/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
+ prio_rw_lock_t* lock, /*!< in: rw-lock */
const char* file, /*!< in: file name */
ulint line, /*!< in: line number */
mtr_t* mtr) /*!< in: mtr */
diff --git a/storage/xtradb/include/mtr0types.h b/storage/xtradb/include/mtr0types.h
index eb76c824666..43368c0b726 100644
--- a/storage/xtradb/include/mtr0types.h
+++ b/storage/xtradb/include/mtr0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,6 +26,6 @@ Created 11/26/1995 Heikki Tuuri
#ifndef mtr0types_h
#define mtr0types_h
-typedef struct mtr_struct mtr_t;
+struct mtr_t;
#endif
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index e6c70edbd8f..564b579edc8 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -1,6 +1,6 @@
/***********************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
@@ -19,9 +19,9 @@ WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
***********************************************************************/
@@ -45,11 +45,8 @@ Created 10/21/1995 Heikki Tuuri
#endif
/** File node of a tablespace or the log data space */
-typedef struct fil_node_struct fil_node_t;
+struct fil_node_t;
-#ifdef UNIV_DO_FLUSH
-extern ibool os_do_not_call_flush_at_each_write;
-#endif /* UNIV_DO_FLUSH */
extern ibool os_has_said_disk_full;
/** Flag: enable debug printout for asynchronous i/o */
extern ibool os_aio_print_debug;
@@ -75,6 +72,8 @@ extern ulint os_n_pending_writes;
#endif
+/** File offset in bytes */
+typedef ib_uint64_t os_offset_t;
#ifdef __WIN__
#define SRV_PATH_SEPARATOR '\\'
/** File handle */
@@ -107,14 +106,28 @@ log. */
#define OS_FILE_LOG_BLOCK_SIZE srv_log_block_size
-/** Options for file_create @{ */
-#define OS_FILE_OPEN 51
-#define OS_FILE_CREATE 52
-#define OS_FILE_OVERWRITE 53
-#define OS_FILE_OPEN_RAW 54
-#define OS_FILE_CREATE_PATH 55
-#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on
- the first ibdata file */
+/** Options for os_file_create_func @{ */
+enum os_file_create_t {
+ OS_FILE_OPEN = 51, /*!< to open an existing file (if
+ doesn't exist, error) */
+ OS_FILE_CREATE, /*!< to create new file (if
+ exists, error) */
+ OS_FILE_OVERWRITE, /*!< to create a new file, if exists
+ the overwrite old file */
+ OS_FILE_OPEN_RAW, /*!< to open a raw device or disk
+ partition */
+ OS_FILE_CREATE_PATH, /*!< to create the directories */
+ OS_FILE_OPEN_RETRY, /*!< open with retry */
+
+ /** Flags that can be combined with the above values. Please ensure
+ that the above values stay below 128. */
+
+ OS_FILE_ON_ERROR_NO_EXIT = 128, /*!< do not exit on unknown errors */
+ OS_FILE_ON_ERROR_SILENT = 256 /*!< don't print diagnostic messages to
+ the log unless it is a fatal error,
+ this flag is only used if
+ ON_ERROR_NO_EXIT is set */
+};
#define OS_FILE_READ_ONLY 333
#define OS_FILE_READ_WRITE 444
@@ -210,45 +223,62 @@ various file I/O operations with performance schema.
1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
used to register file creation, opening, closing and renaming.
2) register_pfs_file_io_begin() and register_pfs_file_io_end() are
-used to register actual file read, write and flush */
+used to register actual file read, write and flush
+3) register_pfs_file_close_begin() and register_pfs_file_close_end()
+are used to register file deletion operations*/
# define register_pfs_file_open_begin(state, locker, key, op, name, \
src_file, src_line) \
do { \
- if (PSI_server) { \
- locker = PSI_server->get_thread_file_name_locker( \
- state, key, op, name, &locker); \
- if (locker) { \
- PSI_server->start_file_open_wait( \
- locker, src_file, src_line); \
- } \
+ locker = PSI_FILE_CALL(get_thread_file_name_locker)( \
+ state, key, op, name, &locker); \
+ if (UNIV_LIKELY(locker != NULL)) { \
+ PSI_FILE_CALL(start_file_open_wait)( \
+ locker, src_file, src_line); \
} \
} while (0)
# define register_pfs_file_open_end(locker, file) \
do { \
- if (locker) { \
- PSI_server->end_file_open_wait_and_bind_to_descriptor( \
+ if (UNIV_LIKELY(locker != NULL)) { \
+ PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\
locker, file); \
} \
} while (0)
+# define register_pfs_file_close_begin(state, locker, key, op, name, \
+ src_file, src_line) \
+do { \
+ locker = PSI_FILE_CALL(get_thread_file_name_locker)( \
+ state, key, op, name, &locker); \
+ if (UNIV_LIKELY(locker != NULL)) { \
+ PSI_FILE_CALL(start_file_close_wait)( \
+ locker, src_file, src_line); \
+ } \
+} while (0)
+
+# define register_pfs_file_close_end(locker, result) \
+do { \
+ if (UNIV_LIKELY(locker != NULL)) { \
+ PSI_FILE_CALL(end_file_close_wait)( \
+ locker, result); \
+ } \
+} while (0)
+
# define register_pfs_file_io_begin(state, locker, file, count, op, \
src_file, src_line) \
do { \
- if (PSI_server) { \
- locker = PSI_server->get_thread_file_descriptor_locker( \
- state, file, op); \
- if (locker) { \
- PSI_server->start_file_wait( \
- locker, count, src_file, src_line); \
- } \
+ locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( \
+ state, file, op); \
+ if (UNIV_LIKELY(locker != NULL)) { \
+ PSI_FILE_CALL(start_file_wait)( \
+ locker, count, src_file, src_line); \
} \
} while (0)
# define register_pfs_file_io_end(locker, count) \
do { \
- if (locker) { \
- PSI_server->end_file_wait(locker, count); \
+ if (UNIV_LIKELY(locker != NULL)) { \
+ PSI_FILE_CALL(end_file_wait)(locker, count); \
} \
} while (0)
#endif /* UNIV_PFS_IO */
@@ -286,35 +316,39 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__)
-# define os_aio(type, mode, name, file, buf, offset, offset_high, \
+# define os_aio(type, mode, name, file, buf, offset, \
n, message1, message2, space_id, trx) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
- offset_high, n, message1, message2, space_id, trx,\
- __FILE__, __LINE__)
+ n, message1, message2, space_id, trx, \
+ __FILE__, __LINE__)
-# define os_file_read(file, buf, offset, offset_high, n) \
- pfs_os_file_read_func(file, buf, offset, offset_high, n, NULL, \
+# define os_file_read(file, buf, offset, n) \
+ pfs_os_file_read_func(file, buf, offset, n, NULL, \
__FILE__, __LINE__)
-# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
- pfs_os_file_read_func(file, buf, offset, offset_high, n, trx, \
+# define os_file_read_trx(file, buf, offset, n, trx) \
+ pfs_os_file_read_func(file, buf, offset, n, trx, \
__FILE__, __LINE__)
-# define os_file_read_no_error_handling(file, buf, offset, \
- offset_high, n) \
- pfs_os_file_read_no_error_handling_func(file, buf, offset, \
- offset_high, n, \
+# define os_file_read_no_error_handling(file, buf, offset, n) \
+ pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \
__FILE__, __LINE__)
-# define os_file_write(name, file, buf, offset, offset_high, n) \
- pfs_os_file_write_func(name, file, buf, offset, offset_high, \
+# define os_file_write(name, file, buf, offset, n) \
+ pfs_os_file_write_func(name, file, buf, offset, \
n, __FILE__, __LINE__)
-# define os_file_flush(file, metadata) \
- pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
+# define os_file_flush(file) \
+ pfs_os_file_flush_func(file, __FILE__, __LINE__)
# define os_file_rename(key, oldpath, newpath) \
pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
+
+# define os_file_delete(key, name) \
+ pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
+
+# define os_file_delete_if_exists(key, name) \
+ pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__)
#else /* UNIV_PFS_IO */
/* If UNIV_PFS_IO is not defined, these I/O APIs point
@@ -322,8 +356,8 @@ to original un-instrumented file I/O APIs */
# define os_file_create(key, name, create, purpose, type, success) \
os_file_create_func(name, create, purpose, type, success)
-# define os_file_create_simple(key, name, create, access, success) \
- os_file_create_simple_func(name, create, access, success)
+# define os_file_create_simple(key, name, create_mode, access, success) \
+ os_file_create_simple_func(name, create_mode, access, success)
# define os_file_create_simple_no_error_handling( \
key, name, create_mode, access, success) \
@@ -332,40 +366,43 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file)
-# define os_aio(type, mode, name, file, buf, offset, offset_high, \
- n, message1, message2, space_id, trx) \
- os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\
+# define os_aio(type, mode, name, file, buf, offset, n, message1, \
+ message2, space_id, trx) \
+ os_aio_func(type, mode, name, file, buf, offset, n, \
message1, message2, space_id, trx)
-# define os_file_read(file, buf, offset, offset_high, n) \
- os_file_read_func(file, buf, offset, offset_high, n, NULL)
+# define os_file_read(file, buf, offset, n) \
+ os_file_read_func(file, buf, offset, n, NULL)
-# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
- os_file_read_func(file, buf, offset, offset_high, n, trx)
+# define os_file_read_trx(file, buf, offset, n, trx) \
+ os_file_read_func(file, buf, offset, n, trx)
-# define os_file_read_no_error_handling(file, buf, offset, \
- offset_high, n) \
- os_file_read_no_error_handling_func(file, buf, offset, offset_high, n)
+# define os_file_read_no_error_handling(file, buf, offset, n) \
+ os_file_read_no_error_handling_func(file, buf, offset, n)
-# define os_file_write(name, file, buf, offset, offset_high, n) \
- os_file_write_func(name, file, buf, offset, offset_high, n)
+# define os_file_write(name, file, buf, offset, n) \
+ os_file_write_func(name, file, buf, offset, n)
-# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
+# define os_file_flush(file) os_file_flush_func(file)
# define os_file_rename(key, oldpath, newpath) \
os_file_rename_func(oldpath, newpath)
+# define os_file_delete(key, name) os_file_delete_func(name)
+
+# define os_file_delete_if_exists(key, name) \
+ os_file_delete_if_exists_func(name)
+
#endif /* UNIV_PFS_IO */
/* File types for directory entry data type */
-enum os_file_type_enum{
+enum os_file_type_t {
OS_FILE_TYPE_UNKNOWN = 0,
OS_FILE_TYPE_FILE, /* regular file */
OS_FILE_TYPE_DIR, /* directory */
OS_FILE_TYPE_LINK /* symbolic link */
};
-typedef enum os_file_type_enum os_file_type_t;
/* Maximum path string length in bytes when referring to tables with in the
'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
@@ -373,16 +410,18 @@ of this size from the thread stack; that is why this should not be made much
bigger than 4000 bytes */
#define OS_FILE_MAX_PATH 4000
-/* Struct used in fetching information of a file in a directory */
-struct os_file_stat_struct{
+/** Struct used in fetching information of a file in a directory */
+struct os_file_stat_t {
char name[OS_FILE_MAX_PATH]; /*!< path to a file */
os_file_type_t type; /*!< file type */
ib_int64_t size; /*!< file size */
time_t ctime; /*!< creation time */
time_t mtime; /*!< modification time */
time_t atime; /*!< access time */
+ bool rw_perm; /*!< true if can be opened
+ in read-write mode. Only valid
+ if type == OS_FILE_TYPE_FILE */
};
-typedef struct os_file_stat_struct os_file_stat_t;
#ifdef __WIN__
typedef HANDLE os_file_dir_t; /*!< directory stream */
@@ -478,13 +517,7 @@ os_file_create_simple_func(
/*=======================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
- opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error), or
- OS_FILE_CREATE_PATH if new file
- (if exists, error) and subdirectories along
- its path are created (if needed)*/
+ ulint create_mode,/*!< in: create mode */
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
ibool* success);/*!< out: TRUE if succeed, FALSE if error */
@@ -500,15 +533,13 @@ os_file_create_simple_no_error_handling_func(
/*=========================================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error) */
+ ulint create_mode,/*!< in: create mode */
ulint access_type,/*!< in: OS_FILE_READ_ONLY,
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
- ibool* success);/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ __attribute__((nonnull, warn_unused_result));
/****************************************************************//**
Tries to disable OS caching on an opened file descriptor. */
UNIV_INTERN
@@ -532,14 +563,7 @@ os_file_create_func(
/*================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error),
- OS_FILE_OVERWRITE if a new file is created
- or an old overwritten;
- OS_FILE_OPEN_RAW, if a raw device or disk
- partition should be opened */
+ ulint create_mode,/*!< in: create mode */
ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
non-buffered i/o is desired,
OS_FILE_NORMAL, if any normal file;
@@ -548,24 +572,27 @@ os_file_create_func(
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success);/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Deletes a file. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
-ibool
-os_file_delete(
-/*===========*/
- const char* name); /*!< in: file path as a null-terminated string */
+bool
+os_file_delete_func(
+/*================*/
+ const char* name); /*!< in: file path as a null-terminated
+ string */
/***********************************************************************//**
Deletes a file if it exists. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
-ibool
-os_file_delete_if_exists(
-/*=====================*/
- const char* name); /*!< in: file path as a null-terminated string */
+bool
+os_file_delete_if_exists_func(
+/*==========================*/
+ const char* name); /*!< in: file path as a null-terminated
+ string */
/***********************************************************************//**
NOTE! Use the corresponding macro os_file_rename(), not directly
this function!
@@ -606,18 +633,13 @@ pfs_os_file_create_simple_func(
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
- opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error), or
- OS_FILE_CREATE_PATH if new file
- (if exists, error) and subdirectories along
- its path are created (if needed)*/
+ ulint create_mode,/*!< in: create mode */
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+ __attribute__((nonnull, warn_unused_result));
/****************************************************************//**
NOTE! Please use the corresponding macro
@@ -634,17 +656,15 @@ pfs_os_file_create_simple_no_error_handling_func(
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error) */
+ ulint create_mode, /*!< in: file create mode */
ulint access_type,/*!< in: OS_FILE_READ_ONLY,
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+ __attribute__((nonnull, warn_unused_result));
/****************************************************************//**
NOTE! Please use the corresponding macro os_file_create(), not directly
@@ -660,14 +680,7 @@ pfs_os_file_create_func(
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error),
- OS_FILE_OVERWRITE if a new file is created
- or an old overwritten;
- OS_FILE_OPEN_RAW, if a raw device or disk
- partition should be opened */
+ ulint create_mode,/*!< in: file create mode */
ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
non-buffered i/o is desired,
OS_FILE_NORMAL, if any normal file;
@@ -678,7 +691,8 @@ pfs_os_file_create_func(
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
NOTE! Please use the corresponding macro os_file_close(), not directly
@@ -704,10 +718,7 @@ pfs_os_file_read_func(
/*==================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
trx_t* trx,
const char* src_file,/*!< in: file name where func invoked */
@@ -726,10 +737,7 @@ pfs_os_file_read_no_error_handling_func(
/*====================================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@@ -751,10 +759,7 @@ pfs_os_aio_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read or from which
to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read or write */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */
fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed
@@ -782,10 +787,7 @@ pfs_os_file_write_func(
null-terminated string */
os_file_t file, /*!< in: handle to a file */
const void* buf, /*!< in: buffer from which to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to write */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to write */
ulint n, /*!< in: number of bytes to write */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@@ -801,7 +803,6 @@ ibool
pfs_os_file_flush_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
- ibool metadata,
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@@ -821,6 +822,38 @@ pfs_os_file_rename_func(
const char* newpath,/*!< in: new file path */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: old file path as a null-terminated
+ string */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
+directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete_if_exists()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_if_exists_func(
+/*==============================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: old file path as a null-terminated
+ string */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
#endif /* UNIV_PFS_IO */
/***********************************************************************//**
@@ -833,23 +866,13 @@ os_file_close_no_error_handling(
os_file_t file); /*!< in, own: handle to a file */
/***********************************************************************//**
Gets a file size.
-@return TRUE if success */
+@return file size, or (os_offset_t) -1 on failure */
UNIV_INTERN
-ibool
+os_offset_t
os_file_get_size(
/*=============*/
- os_file_t file, /*!< in: handle to a file */
- ulint* size, /*!< out: least significant 32 bits of file
- size */
- ulint* size_high);/*!< out: most significant 32 bits of size */
-/***********************************************************************//**
-Gets file size as a 64-bit integer ib_int64_t.
-@return size in bytes, -1 if error */
-UNIV_INTERN
-ib_int64_t
-os_file_get_size_as_iblonglong(
-/*===========================*/
- os_file_t file); /*!< in: handle to a file */
+ os_file_t file) /*!< in: handle to a file */
+ __attribute__((warn_unused_result));
/***********************************************************************//**
Write the specified number of zeros to a newly created file.
@return TRUE if success */
@@ -860,9 +883,8 @@ os_file_set_size(
const char* name, /*!< in: name of the file or path as a
null-terminated string */
os_file_t file, /*!< in: handle to a file */
- ulint size, /*!< in: least significant 32 bits of file
- size */
- ulint size_high);/*!< in: most significant 32 bits of size */
+ os_offset_t size) /*!< in: file size */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Truncates a file at its current position.
@return TRUE if success */
@@ -887,8 +909,7 @@ UNIV_INTERN
ibool
os_file_flush_func(
/*===============*/
- os_file_t file, /*!< in, own: handle to a file */
- ibool metadata);
+ os_file_t file); /*!< in, own: handle to a file */
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
@@ -899,7 +920,7 @@ UNIV_INTERN
ulint
os_file_get_last_error(
/*===================*/
- ibool report_all_errors); /*!< in: TRUE if we want an error message
+ bool report_all_errors); /*!< in: TRUE if we want an error message
printed of all errors */
/*******************************************************************//**
NOTE! Use the corresponding macro os_file_read(), not directly this function!
@@ -911,10 +932,7 @@ os_file_read_func(
/*==============*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
trx_t* trx);
/*******************************************************************//**
@@ -940,10 +958,7 @@ os_file_read_no_error_handling_func(
/*================================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read */
ulint n); /*!< in: number of bytes to read */
/*******************************************************************//**
@@ -959,10 +974,7 @@ os_file_write_func(
null-terminated string */
os_file_t file, /*!< in: handle to a file */
const void* buf, /*!< in: buffer from which to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to write */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to write */
ulint n); /*!< in: number of bytes to write */
/*******************************************************************//**
Check the existence and type of the given file.
@@ -978,8 +990,8 @@ os_file_status(
The function os_file_dirname returns a directory component of a
null-terminated pathname string. In the usual case, dirname returns
the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' charac­
-ters are not counted as part of the pathname.
+is the component following the final '/'. Trailing '/' characters
+are not counted as part of the pathname.
If path does not contain a slash, dirname returns the string ".".
@@ -1008,6 +1020,60 @@ os_file_dirname(
/*============*/
const char* path); /*!< in: pathname */
/****************************************************************//**
+This function returns a new path name after replacing the basename
+in an old path with a new basename. The old_path is a full path
+name including the extension. The tablename is in the normal
+form "databasename/tablename". The new base name is found after
+the forward slash. Both input strings are null terminated.
+
+This function allocates memory to be returned. It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return own: new full pathname */
+UNIV_INTERN
+char*
+os_file_make_new_pathname(
+/*======================*/
+ const char* old_path, /*!< in: pathname */
+ const char* new_name); /*!< in: new file name */
+/****************************************************************//**
+This function returns a remote path name by combining a data directory
+path provided in a DATA DIRECTORY clause with the tablename which is
+in the form 'database/tablename'. It strips the file basename (which
+is the tablename) found after the last directory in the path provided.
+The full filepath created will include the database name as a directory
+under the path provided. The filename is the tablename with the '.ibd'
+extension. All input and output strings are null-terminated.
+
+This function allocates memory to be returned. It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return own: A full pathname; data_dir_path/databasename/tablename.ibd */
+UNIV_INTERN
+char*
+os_file_make_remote_pathname(
+/*=========================*/
+ const char* data_dir_path, /*!< in: pathname */
+ const char* tablename, /*!< in: tablename */
+ const char* extention); /*!< in: file extention; ibd,cfg*/
+/****************************************************************//**
+This function reduces a null-terminated full remote path name into
+the path that is sent by MySQL for DATA DIRECTORY clause. It replaces
+the 'databasename/tablename.ibd' found at the end of the path with just
+'tablename'.
+
+Since the result is always smaller than the path sent in, no new memory
+is allocated. The caller should allocate memory for the path sent in.
+This function manipulates that path in place.
+
+If the path format is not as expected, just return. The result is used
+to inform a SHOW CREATE TABLE command. */
+UNIV_INTERN
+void
+os_file_make_data_dir_path(
+/*========================*/
+ char* data_dir_path); /*!< in/out: full path/data_dir_path */
+/****************************************************************//**
Creates all missing subdirectories along the given path.
@return TRUE if call succeeded FALSE otherwise */
UNIV_INTERN
@@ -1066,10 +1132,7 @@ os_aio_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read or from which
to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read or write */
- ulint offset_high, /*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */
fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed
@@ -1198,14 +1261,16 @@ os_aio_all_slots_free(void);
/*******************************************************************//**
This function returns information about the specified file
-@return TRUE if stat information found */
+@return DB_SUCCESS if all OK */
UNIV_INTERN
-ibool
+dberr_t
os_file_get_status(
/*===============*/
- const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info); /*!< information of a file in a
+ const char* path, /*!< in: pathname of the file */
+ os_file_stat_t* stat_info, /*!< information of a file in a
directory */
+ bool check_rw_perm); /*!< in: for testing whether the
+ file can be opened in RW mode */
#if !defined(UNIV_HOTBACKUP)
/*********************************************************************//**
diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic
index 137ce59b62d..25a1397147e 100644
--- a/storage/xtradb/include/os0file.ic
+++ b/storage/xtradb/include/os0file.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -40,13 +40,7 @@ pfs_os_file_create_simple_func(
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
- opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error), or
- OS_FILE_CREATE_PATH if new file
- (if exists, error) and subdirectories along
- its path are created (if needed)*/
+ ulint create_mode,/*!< in: create mode */
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
@@ -88,10 +82,7 @@ pfs_os_file_create_simple_no_error_handling_func(
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error) */
+ ulint create_mode, /*!< in: file create mode */
ulint access_type,/*!< in: OS_FILE_READ_ONLY,
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
@@ -133,14 +124,7 @@ pfs_os_file_create_func(
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error),
- OS_FILE_OVERWRITE if a new file is created
- or an old overwritten;
- OS_FILE_OPEN_RAW, if a raw device or disk
- partition should be opened */
+ ulint create_mode,/*!< in: file create mode */
ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
non-buffered i/o is desired,
OS_FILE_NORMAL, if any normal file;
@@ -216,10 +200,7 @@ pfs_os_aio_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read or from which
to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read or write */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */
fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed
@@ -245,7 +226,7 @@ pfs_os_aio_func(
: PSI_FILE_READ,
src_file, src_line);
- result = os_aio_func(type, mode, name, file, buf, offset, offset_high,
+ result = os_aio_func(type, mode, name, file, buf, offset,
n, message1, message2, space_id, trx);
register_pfs_file_io_end(locker, n);
@@ -265,10 +246,7 @@ pfs_os_file_read_func(
/*==================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
trx_t* trx,
const char* src_file,/*!< in: file name where func invoked */
@@ -281,7 +259,7 @@ pfs_os_file_read_func(
register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
src_file, src_line);
- result = os_file_read_func(file, buf, offset, offset_high, n, trx);
+ result = os_file_read_func(file, buf, offset, n, trx);
register_pfs_file_io_end(locker, n);
@@ -302,10 +280,7 @@ pfs_os_file_read_no_error_handling_func(
/*====================================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
@@ -317,8 +292,7 @@ pfs_os_file_read_no_error_handling_func(
register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
src_file, src_line);
- result = os_file_read_no_error_handling_func(file, buf, offset,
- offset_high, n);
+ result = os_file_read_no_error_handling_func(file, buf, offset, n);
register_pfs_file_io_end(locker, n);
@@ -339,10 +313,7 @@ pfs_os_file_write_func(
null-terminated string */
os_file_t file, /*!< in: handle to a file */
const void* buf, /*!< in: buffer from which to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to write */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
+ os_offset_t offset, /*!< in: file offset where to write */
ulint n, /*!< in: number of bytes to write */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
@@ -354,7 +325,7 @@ pfs_os_file_write_func(
register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE,
src_file, src_line);
- result = os_file_write_func(name, file, buf, offset, offset_high, n);
+ result = os_file_write_func(name, file, buf, offset, n);
register_pfs_file_io_end(locker, n);
@@ -372,7 +343,6 @@ ibool
pfs_os_file_flush_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
- ibool metadata,
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -382,7 +352,7 @@ pfs_os_file_flush_func(
register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
src_file, src_line);
- result = os_file_flush_func(file, metadata);
+ result = os_file_flush_func(file);
register_pfs_file_io_end(locker, 0);
@@ -419,4 +389,64 @@ pfs_os_file_rename_func(
return(result);
}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: file path as a null-terminated
+ string */
+ const char* src_file, /*!< in: file name where func invoked */
+ ulint src_line) /*!< in: line where the func invoked */
+{
+ bool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
+ name, src_file, src_line);
+
+ result = os_file_delete_func(name);
+
+ register_pfs_file_close_end(locker, 0);
+
+ return(result);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
+directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete_if_exists()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_if_exists_func(
+/*==============================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: file path as a null-terminated
+ string */
+ const char* src_file, /*!< in: file name where func invoked */
+ ulint src_line) /*!< in: line where the func invoked */
+{
+ bool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
+ name, src_file, src_line);
+
+ result = os_file_delete_if_exists_func(name);
+
+ register_pfs_file_close_end(locker, 0);
+
+ return(result);
+}
#endif /* UNIV_PFS_IO */
diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h
index 7cf80217bec..f9e88ff1a28 100644
--- a/storage/xtradb/include/os0proc.h
+++ b/storage/xtradb/include/os0proc.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/os0proc.ic b/storage/xtradb/include/os0proc.ic
index 6d7eb1be37c..506f4f8ce0c 100644
--- a/storage/xtradb/include/os0proc.ic
+++ b/storage/xtradb/include/os0proc.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h
index 60ee5dca08f..51c4530bb5a 100644
--- a/storage/xtradb/include/os0sync.h
+++ b/storage/xtradb/include/os0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -36,28 +36,37 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i"
#include "ut0lst.h"
+#include "sync0types.h"
#ifdef __WIN__
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
/** Native mutex */
-typedef CRITICAL_SECTION os_fast_mutex_t;
+typedef CRITICAL_SECTION fast_mutex_t;
/** Native condition variable. */
typedef CONDITION_VARIABLE os_cond_t;
#else
/** Native mutex */
-typedef pthread_mutex_t os_fast_mutex_t;
+typedef pthread_mutex_t fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
-/** Operating system event */
-typedef struct os_event_struct os_event_struct_t;
+/** Structure that includes Performance Schema Probe pfs_psi
+in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */
+struct os_fast_mutex_t {
+ fast_mutex_t mutex; /*!< os_fast_mutex */
+#ifdef UNIV_PFS_MUTEX
+ struct PSI_mutex* pfs_psi;/*!< The performance schema
+ instrumentation hook */
+#endif
+};
+
/** Operating system event handle */
-typedef os_event_struct_t* os_event_t;
+typedef struct os_event* os_event_t;
/** An asynchronous signal sent between threads */
-struct os_event_struct {
+struct os_event {
#ifdef __WIN__
HANDLE handle; /*!< kernel event object, slow,
used on older Windows */
@@ -72,7 +81,7 @@ struct os_event_struct {
the event becomes signaled */
os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
- UT_LIST_NODE_T(os_event_struct_t) os_event_list;
+ UT_LIST_NODE_T(os_event_t) os_event_list;
/*!< list of all created events */
};
@@ -82,13 +91,11 @@ struct os_event_struct {
/** Return value of os_event_wait_time() when the time is exceeded */
#define OS_SYNC_TIME_EXCEEDED 1
-/** Operating system mutex */
-typedef struct os_mutex_struct os_mutex_str_t;
/** Operating system mutex handle */
-typedef os_mutex_str_t* os_mutex_t;
+typedef struct os_mutex_t* os_ib_mutex_t;
/** Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_mutex_t os_sync_mutex;
+extern os_ib_mutex_t os_sync_mutex;
/** This is incremented by 1 in os_thread_create and decremented by 1 in
os_thread_exit */
@@ -117,10 +124,8 @@ explicitly by calling sync_os_reset_event.
@return the event handle */
UNIV_INTERN
os_event_t
-os_event_create(
-/*============*/
- const char* name); /*!< in: the name of the event, if NULL
- the event is created without a name */
+os_event_create(void);
+/*==================*/
/**********************************************************//**
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
@@ -176,7 +181,7 @@ os_event_wait_low(
os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0)
-#define os_event_wait_time(e, t) os_event_wait_time_low(e, t, 0)
+#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
/**********************************************************//**
Waits for an event object until it is in the signaled state or
@@ -195,10 +200,10 @@ os_event_wait_time_low(
os_event_reset(). */
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
@return the mutex handle */
UNIV_INTERN
-os_mutex_t
+os_ib_mutex_t
os_mutex_create(void);
/*=================*/
/**********************************************************//**
@@ -207,21 +212,21 @@ UNIV_INTERN
void
os_mutex_enter(
/*===========*/
- os_mutex_t mutex); /*!< in: mutex to acquire */
+ os_ib_mutex_t mutex); /*!< in: mutex to acquire */
/**********************************************************//**
Releases ownership of a mutex. */
UNIV_INTERN
void
os_mutex_exit(
/*==========*/
- os_mutex_t mutex); /*!< in: mutex to release */
+ os_ib_mutex_t mutex); /*!< in: mutex to release */
/**********************************************************//**
Frees an mutex object. */
UNIV_INTERN
void
os_mutex_free(
/*==========*/
- os_mutex_t mutex); /*!< in: mutex to free */
+ os_ib_mutex_t mutex); /*!< in: mutex to free */
/**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same
as os_fast_mutex_lock!
@@ -231,34 +236,119 @@ ulint
os_fast_mutex_trylock(
/*==================*/
os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
+
+/**********************************************************************
+Following os_fast_ mutex APIs would be performance schema instrumented:
+
+os_fast_mutex_init
+os_fast_mutex_lock
+os_fast_mutex_unlock
+os_fast_mutex_free
+
+These mutex APIs will point to corresponding wrapper functions that contain
+the performance schema instrumentation.
+
+NOTE! The following macro should be used in mutex operation, not the
+corresponding function. */
+
+#ifdef UNIV_PFS_MUTEX
+# define os_fast_mutex_init(K, M) \
+ pfs_os_fast_mutex_init(K, M)
+
+# define os_fast_mutex_lock(M) \
+ pfs_os_fast_mutex_lock(M, __FILE__, __LINE__)
+
+# define os_fast_mutex_unlock(M) pfs_os_fast_mutex_unlock(M)
+
+# define os_fast_mutex_free(M) pfs_os_fast_mutex_free(M)
+
+/*********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
+this function!
+A wrapper function for os_fast_mutex_init_func(). Initializes an operating
+system fast mutex semaphore. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_init(
+/*===================*/
+ PSI_mutex_key key, /*!< in: Performance Schema
+ key */
+ os_fast_mutex_t* fast_mutex); /*!< out: fast mutex */
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
+this function!
+Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
+schema probes when freeing the mutex */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_free(
+/*===================*/
+ os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to free */
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
+this function!
+Wrapper function of os_fast_mutex_lock. Acquires ownership of a fast mutex. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_lock(
+/*===================*/
+ os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */
+ const char* file_name, /*!< in: file name where
+ locked */
+ ulint line); /*!< in: line where locked */
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
+this function!
+Wrapper function of os_fast_mutex_unlock. Releases ownership of a fast mutex. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_unlock(
+/*=====================*/
+ os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to release */
+
+#else /* UNIV_PFS_MUTEX */
+
+# define os_fast_mutex_init(K, M) \
+ os_fast_mutex_init_func(&((os_fast_mutex_t*)(M))->mutex)
+
+# define os_fast_mutex_lock(M) \
+ os_fast_mutex_lock_func(&((os_fast_mutex_t*)(M))->mutex)
+
+# define os_fast_mutex_unlock(M) \
+ os_fast_mutex_unlock_func(&((os_fast_mutex_t*)(M))->mutex)
+
+# define os_fast_mutex_free(M) \
+ os_fast_mutex_free_func(&((os_fast_mutex_t*)(M))->mutex)
+#endif /* UNIV_PFS_MUTEX */
+
/**********************************************************//**
Releases ownership of a fast mutex. */
UNIV_INTERN
void
-os_fast_mutex_unlock(
-/*=================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to release */
+os_fast_mutex_unlock_func(
+/*======================*/
+ fast_mutex_t* fast_mutex); /*!< in: mutex to release */
/*********************************************************//**
Initializes an operating system fast mutex semaphore. */
UNIV_INTERN
void
-os_fast_mutex_init(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /*!< in: fast mutex */
+os_fast_mutex_init_func(
+/*====================*/
+ fast_mutex_t* fast_mutex); /*!< in: fast mutex */
/**********************************************************//**
Acquires ownership of a fast mutex. */
UNIV_INTERN
void
-os_fast_mutex_lock(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
+os_fast_mutex_lock_func(
+/*====================*/
+ fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
/**********************************************************//**
Frees an mutex object. */
UNIV_INTERN
void
-os_fast_mutex_free(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to free */
+os_fast_mutex_free_func(
+/*====================*/
+ fast_mutex_t* fast_mutex); /*!< in: mutex to free */
/**********************************************************//**
Atomic compare-and-swap and increment for InnoDB. */
@@ -311,12 +401,30 @@ amount of increment. */
# define os_atomic_increment_uint64(ptr, amount) \
os_atomic_increment(ptr, amount)
+/* Returns the resulting value, ptr is pointer to target, amount is the
+amount to decrement. */
+
+# define os_atomic_decrement(ptr, amount) \
+ __sync_sub_and_fetch(ptr, amount)
+
+# define os_atomic_decrement_lint(ptr, amount) \
+ os_atomic_decrement(ptr, amount)
+
+# define os_atomic_decrement_ulint(ptr, amount) \
+ os_atomic_decrement(ptr, amount)
+
+# define os_atomic_decrement_uint64(ptr, amount) \
+ os_atomic_decrement(ptr, amount)
+
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \
__sync_lock_test_and_set(ptr, (byte) new_val)
+# define os_atomic_test_and_set_ulint(ptr, new_val) \
+ __sync_lock_test_and_set(ptr, new_val)
+
#elif defined(HAVE_IB_SOLARIS_ATOMICS)
# define HAVE_ATOMIC_BUILTINS
@@ -335,15 +443,15 @@ compare to, new_val is the value to swap in. */
(atomic_cas_ulong(ptr, old_val, new_val) == old_val)
# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- ((lint)atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
+ ((lint) atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS
# if SIZEOF_PTHREAD_T == 4
# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- ((pthread_t)atomic_cas_32(ptr, old_val, new_val) == old_val)
+ ((pthread_t) atomic_cas_32(ptr, old_val, new_val) == old_val)
# elif SIZEOF_PTHREAD_T == 8
# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- ((pthread_t)atomic_cas_64(ptr, old_val, new_val) == old_val)
+ ((pthread_t) atomic_cas_64(ptr, old_val, new_val) == old_val)
# else
# error "SIZEOF_PTHREAD_T != 4 or 8"
# endif /* SIZEOF_PTHREAD_T CHECK */
@@ -359,21 +467,36 @@ compare to, new_val is the value to swap in. */
Returns the resulting value, ptr is pointer to target, amount is the
amount of increment. */
-# define os_atomic_increment_lint(ptr, amount) \
- atomic_add_long_nv((ulong_t*) ptr, amount)
-
# define os_atomic_increment_ulint(ptr, amount) \
atomic_add_long_nv(ptr, amount)
+# define os_atomic_increment_lint(ptr, amount) \
+ os_atomic_increment_ulint((ulong_t*) ptr, amount)
+
# define os_atomic_increment_uint64(ptr, amount) \
atomic_add_64_nv(ptr, amount)
+/* Returns the resulting value, ptr is pointer to target, amount is the
+amount to decrement. */
+
+# define os_atomic_decrement_lint(ptr, amount) \
+ os_atomic_increment_ulint((ulong_t*) ptr, -(amount))
+
+# define os_atomic_decrement_ulint(ptr, amount) \
+ os_atomic_increment_ulint(ptr, -(amount))
+
+# define os_atomic_decrement_uint64(ptr, amount) \
+ os_atomic_increment_uint64(ptr, -(amount))
+
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val)
+# define os_atomic_test_and_set_ulint(ptr, new_val) \
+ atomic_swap_ulong(ptr, new_val)
+
#elif defined(HAVE_WINDOWS_ATOMICS)
# define HAVE_ATOMIC_BUILTINS
@@ -382,28 +505,66 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define HAVE_ATOMIC_BUILTINS_64
# endif
-/* On Windows, use Windows atomics / interlocked */
-# ifdef _WIN64
-# define win_cmp_and_xchg InterlockedCompareExchange64
-# define win_xchg_and_add InterlockedExchangeAdd64
-# else /* _WIN64 */
-# define win_cmp_and_xchg InterlockedCompareExchange
-# define win_xchg_and_add InterlockedExchangeAdd
-# endif
+/**********************************************************//**
+Atomic compare and exchange of signed integers (both 32 and 64 bit).
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+lint
+win_cmp_and_xchg_lint(
+/*==================*/
+ volatile lint* ptr, /*!< in/out: source/destination */
+ lint new_val, /*!< in: exchange value */
+ lint old_val); /*!< in: value to compare to */
+
+/**********************************************************//**
+Atomic addition of signed integers.
+@return Initial value of the variable pointed to by ptr */
+UNIV_INLINE
+lint
+win_xchg_and_add(
+/*=============*/
+ volatile lint* ptr, /*!< in/out: address of destination */
+ lint val); /*!< in: number to be added */
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+ulint
+win_cmp_and_xchg_ulint(
+/*===================*/
+ volatile ulint* ptr, /*!< in/out: source/destination */
+ ulint new_val, /*!< in: exchange value */
+ ulint old_val); /*!< in: value to compare to */
+
+/**********************************************************//**
+Atomic compare and exchange of 32 bit unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+DWORD
+win_cmp_and_xchg_dword(
+/*===================*/
+ volatile DWORD* ptr, /*!< in/out: source/destination */
+ DWORD new_val, /*!< in: exchange value */
+ DWORD old_val); /*!< in: value to compare to */
/**********************************************************//**
Returns true if swapped, ptr is pointer to target, old_val is value to
compare to, new_val is the value to swap in. */
# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- (win_cmp_and_xchg(ptr, new_val, old_val) == old_val)
+ (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- (win_cmp_and_xchg(ptr, new_val, old_val) == old_val)
+ (win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val)
/* windows thread objects can always be passed to windows atomic functions */
# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- (InterlockedCompareExchange(ptr, new_val, old_val) == old_val)
+ (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
+
# define INNODB_RW_LOCKS_USE_ATOMICS
# define IB_ATOMICS_STARTUP_MSG \
"Mutexes and rw_locks use Windows interlocked functions"
@@ -416,12 +577,27 @@ amount of increment. */
(win_xchg_and_add(ptr, amount) + amount)
# define os_atomic_increment_ulint(ptr, amount) \
- ((ulint) (win_xchg_and_add(ptr, amount) + amount))
+ ((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount))
# define os_atomic_increment_uint64(ptr, amount) \
((ib_uint64_t) (InterlockedExchangeAdd64( \
- (ib_int64_t*) ptr, \
- (ib_int64_t) amount) + amount))
+ (ib_int64_t*) ptr, \
+ (ib_int64_t) amount) + amount))
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount to decrement. There is no atomic substract function on Windows */
+
+# define os_atomic_decrement_lint(ptr, amount) \
+ (win_xchg_and_add(ptr, -(lint) amount) - amount)
+
+# define os_atomic_decrement_ulint(ptr, amount) \
+ ((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount))
+
+# define os_atomic_decrement_uint64(ptr, amount) \
+ ((ib_uint64_t) (InterlockedExchangeAdd64( \
+ (ib_int64_t*) ptr, \
+ -(ib_int64_t) amount) - amount))
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val.
@@ -431,10 +607,55 @@ clobbered */
# define os_atomic_test_and_set_byte(ptr, new_val) \
((byte) InterlockedExchange(ptr, new_val))
+# define os_atomic_test_and_set_ulong(ptr, new_val) \
+ InterlockedExchange(ptr, new_val)
+
#else
# define IB_ATOMICS_STARTUP_MSG \
"Mutexes and rw_locks use InnoDB's own implementation"
#endif
+#ifdef HAVE_ATOMIC_BUILTINS
+#define os_atomic_inc_ulint(m,v,d) os_atomic_increment_ulint(v, d)
+#define os_atomic_dec_ulint(m,v,d) os_atomic_decrement_ulint(v, d)
+#else
+#define os_atomic_inc_ulint(m,v,d) os_atomic_inc_ulint_func(m, v, d)
+#define os_atomic_dec_ulint(m,v,d) os_atomic_dec_ulint_func(m, v, d)
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+/**********************************************************//**
+Following macros are used to update specified counter atomically
+if HAVE_ATOMIC_BUILTINS defined. Otherwise, use mutex passed in
+for synchronization */
+#ifdef HAVE_ATOMIC_BUILTINS
+#define os_increment_counter_by_amount(mutex, counter, amount) \
+ (void) os_atomic_increment_ulint(&counter, amount)
+
+#define os_decrement_counter_by_amount(mutex, counter, amount) \
+ (void) os_atomic_increment_ulint(&counter, (-((lint) amount)))
+#else
+#define os_increment_counter_by_amount(mutex, counter, amount) \
+ do { \
+ mutex_enter(&(mutex)); \
+ (counter) += (amount); \
+ mutex_exit(&(mutex)); \
+ } while (0)
+
+#define os_decrement_counter_by_amount(mutex, counter, amount) \
+ do { \
+ ut_a(counter >= amount); \
+ mutex_enter(&(mutex)); \
+ (counter) -= (amount); \
+ mutex_exit(&(mutex)); \
+ } while (0)
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+#define os_inc_counter(mutex, counter) \
+ os_increment_counter_by_amount(mutex, counter, 1)
+
+#define os_dec_counter(mutex, counter) \
+ do { \
+ os_decrement_counter_by_amount(mutex, counter, 1);\
+ } while (0);
#ifndef UNIV_NONINL
#include "os0sync.ic"
diff --git a/storage/xtradb/include/os0sync.ic b/storage/xtradb/include/os0sync.ic
index 409ff19170a..33c238ceb47 100644
--- a/storage/xtradb/include/os0sync.ic
+++ b/storage/xtradb/include/os0sync.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -36,14 +36,10 @@ os_fast_mutex_trylock(
/*==================*/
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
-#ifdef __WIN__
- if (TryEnterCriticalSection(fast_mutex)) {
-
- return(0);
- } else {
+ fast_mutex_t* mutex = &fast_mutex->mutex;
- return(1);
- }
+#ifdef __WIN__
+ return(!TryEnterCriticalSection(mutex));
#else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system
@@ -51,6 +47,186 @@ os_fast_mutex_trylock(
returns 1 on success (but MySQL remaps that to 0), while Linux,
FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
- return((ulint) pthread_mutex_trylock(fast_mutex));
+ return((ulint) pthread_mutex_trylock(mutex));
+#endif
+}
+
+#ifdef UNIV_PFS_MUTEX
+/*********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
+this function!
+A wrapper function for os_fast_mutex_init_func(). Initializes an operating
+system fast mutex semaphore. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_init(
+/*===================*/
+ PSI_mutex_key key, /*!< in: Performance Schema
+ key */
+ os_fast_mutex_t* fast_mutex) /*!< out: fast mutex */
+{
+#ifdef HAVE_PSI_MUTEX_INTERFACE
+ fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex);
+#else
+ fast_mutex->pfs_psi = NULL;
+#endif
+
+ os_fast_mutex_init_func(&fast_mutex->mutex);
+}
+/******************************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
+this function!
+Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
+schema probes when freeing the mutex */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_free(
+/*===================*/
+ os_fast_mutex_t* fast_mutex) /*!< in/out: mutex */
+{
+#ifdef HAVE_PSI_MUTEX_INTERFACE
+ if (fast_mutex->pfs_psi != NULL)
+ PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi);
#endif
+ fast_mutex->pfs_psi = NULL;
+
+ os_fast_mutex_free_func(&fast_mutex->mutex);
}
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
+this function!
+Wrapper function of os_fast_mutex_lock_func. Acquires ownership of a fast
+mutex. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_lock(
+/*===================*/
+ os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */
+ const char* file_name, /*!< in: file name where
+ locked */
+ ulint line) /*!< in: line where locked */
+{
+#ifdef HAVE_PSI_MUTEX_INTERFACE
+ if (fast_mutex->pfs_psi != NULL)
+ {
+ PSI_mutex_locker* locker;
+ PSI_mutex_locker_state state;
+
+ locker = PSI_MUTEX_CALL(start_mutex_wait)(&state, fast_mutex->pfs_psi,
+ PSI_MUTEX_LOCK, file_name, line);
+
+ os_fast_mutex_lock_func(&fast_mutex->mutex);
+
+ if (locker != NULL)
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
+ }
+ else
+#endif
+ {
+ os_fast_mutex_lock_func(&fast_mutex->mutex);
+ }
+
+ return;
+}
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
+this function!
+Wrapper function of os_fast_mutex_unlock_func. Releases ownership of a
+fast mutex. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_unlock(
+/*=====================*/
+ os_fast_mutex_t* fast_mutex) /*!< in/out: mutex to release */
+{
+#ifdef HAVE_PSI_MUTEX_INTERFACE
+ if (fast_mutex->pfs_psi != NULL)
+ PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi);
+#endif
+
+ os_fast_mutex_unlock_func(&fast_mutex->mutex);
+}
+#endif /* UNIV_PFS_MUTEX */
+
+#ifdef HAVE_WINDOWS_ATOMICS
+
+/* Use inline functions to make 64 and 32 bit versions of windows atomic
+functions so that typecasts are evaluated at compile time. Take advantage
+that lint is either __int64 or long int and windows atomic functions work
+on __int64 and LONG */
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+lint
+win_cmp_and_xchg_lint(
+/*==================*/
+ volatile lint* ptr, /*!< in/out: source/destination */
+ lint new_val, /*!< in: exchange value */
+ lint old_val) /*!< in: value to compare to */
+{
+# ifdef _WIN64
+ return(InterlockedCompareExchange64(ptr, new_val, old_val));
+# else
+ return(InterlockedCompareExchange(ptr, new_val, old_val));
+# endif
+}
+
+/**********************************************************//**
+Atomic addition of signed integers.
+@return Initial value of the variable pointed to by ptr */
+UNIV_INLINE
+lint
+win_xchg_and_add(
+/*=============*/
+ volatile lint* ptr, /*!< in/out: address of destination */
+ lint val) /*!< in: number to be added */
+{
+#ifdef _WIN64
+ return(InterlockedExchangeAdd64(ptr, val));
+#else
+ return(InterlockedExchangeAdd(ptr, val));
+#endif
+}
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+ulint
+win_cmp_and_xchg_ulint(
+/*===================*/
+ volatile ulint* ptr, /*!< in/out: source/destination */
+ ulint new_val, /*!< in: exchange value */
+ ulint old_val) /*!< in: value to compare to */
+{
+ return((ulint) win_cmp_and_xchg_lint(
+ (volatile lint*) ptr,
+ (lint) new_val,
+ (lint) old_val));
+}
+
+/**********************************************************//**
+Atomic compare and exchange of 32-bit unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+DWORD
+win_cmp_and_xchg_dword(
+/*===================*/
+ volatile DWORD* ptr, /*!< in/out: source/destination */
+ DWORD new_val, /*!< in: exchange value */
+ DWORD old_val) /*!< in: value to compare to */
+{
+ ut_ad(sizeof(DWORD) == sizeof(LONG)); /* We assume this. */
+ return(InterlockedCompareExchange(
+ (volatile LONG*) ptr,
+ (LONG) new_val,
+ (LONG) old_val));
+}
+
+#endif /* HAVE_WINDOWS_ATOMICS */
+
diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h
index e8538247d10..d84eff99519 100644
--- a/storage/xtradb/include/os0thread.h
+++ b/storage/xtradb/include/os0thread.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -29,13 +29,16 @@ Created 9/8/1995 Heikki Tuuri
#include "univ.i"
+#ifdef UNIV_LINUX
+#include <sys/types.h>
+#endif
+
/* Maximum number of threads which can be created in the program;
this is also the size of the wait slot array for MySQL threads which
can wait inside InnoDB */
#define OS_THREAD_MAX_N srv_max_n_threads
-
/* Possible fixed priorities for threads */
#define OS_THREAD_PRIORITY_NONE 100
#define OS_THREAD_PRIORITY_BACKGROUND 1
@@ -44,15 +47,46 @@ can wait inside InnoDB */
#ifdef __WIN__
typedef void* os_thread_t;
-typedef unsigned long os_thread_id_t; /*!< In Windows the thread id
+typedef DWORD os_thread_id_t; /*!< In Windows the thread id
is an unsigned long int */
+typedef os_thread_id_t os_tid_t;
+extern "C" {
+typedef LPTHREAD_START_ROUTINE os_thread_func_t;
+}
+
+/** Macro for specifying a Windows thread start function. */
+#define DECLARE_THREAD(func) WINAPI func
+
+/** Required to get around a build error on Windows. Even though our functions
+are defined/declared as WINAPI f(LPVOID a); the compiler complains that they
+are defined as: os_thread_ret_t (__cdecl*)(void*). Because our functions
+don't access the arguments and don't return any value, we should be safe. */
+#define os_thread_create(f,a,i) \
+ os_thread_create_func(reinterpret_cast<os_thread_func_t>(f), a, i)
+
#else
+
typedef pthread_t os_thread_t;
typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread
handle itself as the id of
the thread */
+#ifdef UNIV_LINUX
+typedef pid_t os_tid_t; /*!< An alias for pid_t on
+ Linux, where setpriority()
+ accepts thread id of this type
+ and not pthread_t */
+#else
+typedef os_thread_id_t os_tid_t;
#endif
+extern "C" { typedef void* (*os_thread_func_t)(void*); }
+
+/** Macro for specifying a POSIX thread start function. */
+#define DECLARE_THREAD(func) func
+#define os_thread_create(f,a,i) os_thread_create_func(f, a, i)
+
+#endif /* __WIN__ */
+
/* Define a function pointer type to use in a typecast */
typedef void* (*os_posix_f_t) (void*);
@@ -88,14 +122,10 @@ thread should always use that to exit and not use return() to exit.
@return handle to the thread */
UNIV_INTERN
os_thread_t
-os_thread_create(
-/*=============*/
-#ifndef __WIN__
- os_posix_f_t start_f,
-#else
- ulint (*start_f)(void*), /*!< in: pointer to function
+os_thread_create_func(
+/*==================*/
+ os_thread_func_t func, /*!< in: pointer to function
from which to start */
-#endif
void* arg, /*!< in: argument to start
function */
os_thread_id_t* thread_id); /*!< out: id of the created
@@ -118,6 +148,15 @@ os_thread_id_t
os_thread_get_curr_id(void);
/*========================*/
/*****************************************************************//**
+Returns the system-specific thread identifier of current thread. On Linux,
+returns tid. On other systems currently returns os_thread_get_curr_id().
+
+@return current thread identifier */
+UNIV_INTERN
+os_tid_t
+os_thread_get_tid(void);
+/*=====================*/
+/*****************************************************************//**
Advises the os to give up remainder of the thread's time slice. */
UNIV_INTERN
void
@@ -130,6 +169,18 @@ void
os_thread_sleep(
/*============*/
ulint tm); /*!< in: time in microseconds */
+/*****************************************************************//**
+Set relative scheduling priority for a given thread on Linux. Currently a
+no-op on other systems.
+
+@return An actual thread priority after the update */
+UNIV_INTERN
+ulint
+os_thread_set_priority(
+/*===================*/
+ os_tid_t thread_id, /*!< in: thread id */
+ ulint relative_priority); /*!< in: system-specific
+ priority value */
#ifndef UNIV_NONINL
#include "os0thread.ic"
diff --git a/storage/xtradb/include/os0thread.ic b/storage/xtradb/include/os0thread.ic
index 5615791c77e..0622d22f2dc 100644
--- a/storage/xtradb/include/os0thread.ic
+++ b/storage/xtradb/include/os0thread.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/page0cur.h b/storage/xtradb/include/page0cur.h
index 5081a1de0ab..b1ad49b4915 100644
--- a/storage/xtradb/include/page0cur.h
+++ b/storage/xtradb/include/page0cur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -162,6 +162,12 @@ Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -170,14 +176,23 @@ page_cur_tuple_insert(
page_cur_t* cursor, /*!< in/out: a page cursor */
const dtuple_t* tuple, /*!< in: pointer to a data tuple */
dict_index_t* index, /*!< in: record descriptor */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+ __attribute__((nonnull(1,2,3,4,5), warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -202,27 +217,38 @@ page_cur_insert_rec_low(
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+ __attribute__((nonnull(1,2,3,4), warn_unused_result));
/***********************************************************//**
Inserts a record next to page cursor on a compressed and uncompressed
page. Returns pointer to inserted record if succeed, i.e.,
enough space available, NULL otherwise.
The cursor stays at the same position.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INTERN
rec_t*
page_cur_insert_rec_zip(
/*====================*/
- rec_t** current_rec,/*!< in/out: pointer to current record after
- which the new record is inserted */
- buf_block_t* block, /*!< in: buffer block of *current_rec */
+ page_cur_t* cursor, /*!< in/out: page cursor */
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+ __attribute__((nonnull(1,2,3,4), warn_unused_result));
/*************************************************************//**
Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
+including that record. Infimum and supremum records are not copied.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
UNIV_INTERN
void
page_copy_rec_list_end_to_created_page(
@@ -238,10 +264,11 @@ UNIV_INTERN
void
page_cur_delete_rec(
/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const ulint* offsets,/*!< in: rec_get_offsets(
+ cursor->rec, index) */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
#ifndef UNIV_HOTBACKUP
/****************************************************************//**
Searches the right position for a page cursor.
@@ -331,10 +358,24 @@ page_cur_parse_delete_rec(
buf_block_t* block, /*!< in: page or NULL */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr); /*!< in: mtr or NULL */
+/*******************************************************//**
+Removes the record from a leaf page. This function does not log
+any changes. It is used by the IMPORT tablespace functions.
+@return true if success, i.e., the page did not become too empty */
+UNIV_INTERN
+bool
+page_delete_rec(
+/*============*/
+ const dict_index_t* index, /*!< in: The index that the record
+ belongs to */
+ page_cur_t* pcur, /*!< in/out: page cursor on record
+ to delete */
+ page_zip_des_t* page_zip,/*!< in: compressed page descriptor */
+ const ulint* offsets);/*!< in: offsets for record */
/** Index page cursor */
-struct page_cur_struct{
+struct page_cur_t{
byte* rec; /*!< pointer to a record on page */
buf_block_t* block; /*!< pointer to the block containing rec */
};
diff --git a/storage/xtradb/include/page0cur.ic b/storage/xtradb/include/page0cur.ic
index 1903fedf9e5..028d33b17aa 100644
--- a/storage/xtradb/include/page0cur.ic
+++ b/storage/xtradb/include/page0cur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,6 +27,8 @@ Created 10/4/1994 Heikki Tuuri
#include "buf0types.h"
#ifdef UNIV_DEBUG
+# include "rem0cmp.h"
+
/*********************************************************//**
Gets pointer to the page frame where the cursor is positioned.
@return page */
@@ -235,6 +237,12 @@ Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -243,32 +251,36 @@ page_cur_tuple_insert(
page_cur_t* cursor, /*!< in/out: a page cursor */
const dtuple_t* tuple, /*!< in: pointer to a data tuple */
dict_index_t* index, /*!< in: record descriptor */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
- mem_heap_t* heap;
- ulint* offsets;
ulint size
= rec_get_converted_size(index, tuple, n_ext);
rec_t* rec;
- heap = mem_heap_create(size
- + (4 + REC_OFFS_HEADER_SIZE
- + dtuple_get_n_fields(tuple))
- * sizeof *offsets);
- rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(heap, size),
+ if (!*heap) {
+ *heap = mem_heap_create(size
+ + (4 + REC_OFFS_HEADER_SIZE
+ + dtuple_get_n_fields(tuple))
+ * sizeof **offsets);
+ }
+
+ rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size),
index, tuple, n_ext);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+ *offsets = rec_get_offsets(
+ rec, index, *offsets, ULINT_UNDEFINED, heap);
if (buf_block_get_page_zip(cursor->block)) {
- rec = page_cur_insert_rec_zip(&cursor->rec, cursor->block,
- index, rec, offsets, mtr);
+ rec = page_cur_insert_rec_zip(
+ cursor, index, rec, *offsets, mtr);
} else {
rec = page_cur_insert_rec_low(cursor->rec,
- index, rec, offsets, mtr);
+ index, rec, *offsets, mtr);
}
- mem_heap_free(heap);
+ ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets));
return(rec);
}
#endif /* !UNIV_HOTBACKUP */
@@ -278,6 +290,12 @@ Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -290,8 +308,8 @@ page_cur_rec_insert(
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
if (buf_block_get_page_zip(cursor->block)) {
- return(page_cur_insert_rec_zip(&cursor->rec, cursor->block,
- index, rec, offsets, mtr));
+ return(page_cur_insert_rec_zip(
+ cursor, index, rec, offsets, mtr));
} else {
return(page_cur_insert_rec_low(cursor->rec,
index, rec, offsets, mtr));
diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h
index ba1ee7a7d11..80181bb5c30 100644
--- a/storage/xtradb/include/page0page.h
+++ b/storage/xtradb/include/page0page.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -518,14 +518,32 @@ page_rec_get_heap_no(
const rec_t* rec); /*!< in: the physical record */
/************************************************************//**
Determine whether the page is a B-tree leaf.
-@return TRUE if the page is a B-tree leaf */
+@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
UNIV_INLINE
-ibool
+bool
page_is_leaf(
/*=========*/
const page_t* page) /*!< in: page */
__attribute__((pure));
/************************************************************//**
+Determine whether the page is empty.
+@return true if the page is empty (PAGE_N_RECS = 0) */
+UNIV_INLINE
+bool
+page_is_empty(
+/*==========*/
+ const page_t* page) /*!< in: page */
+ __attribute__((nonnull, pure));
+/************************************************************//**
+Determine whether the page contains garbage.
+@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
+UNIV_INLINE
+bool
+page_has_garbage(
+/*=============*/
+ const page_t* page) /*!< in: page */
+ __attribute__((nonnull, pure));
+/************************************************************//**
Gets the pointer to the next record on the page.
@return pointer to next record */
UNIV_INLINE
@@ -551,15 +569,25 @@ page_rec_get_next_const(
/*====================*/
const rec_t* rec); /*!< in: pointer to record */
/************************************************************//**
+Gets the pointer to the next non delete-marked record on the page.
+If all subsequent records are delete-marked, then this function
+will return the supremum record.
+@return pointer to next non delete-marked record or pointer to supremum */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_non_del_marked(
+/*=============================*/
+ const rec_t* rec); /*!< in: pointer to record */
+/************************************************************//**
Sets the pointer to the next record on the page. */
UNIV_INLINE
void
page_rec_set_next(
/*==============*/
- rec_t* rec, /*!< in: pointer to record,
- must not be page supremum */
- rec_t* next); /*!< in: pointer to next record,
- must not be page infimum */
+ rec_t* rec, /*!< in: pointer to record,
+ must not be page supremum */
+ const rec_t* next); /*!< in: pointer to next record,
+ must not be page infimum */
/************************************************************//**
Gets the pointer to the previous record.
@return pointer to previous record */
@@ -737,11 +765,14 @@ UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- rec_t* rec, /*!< in: pointer to the (origin of) record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page,
+ or NULL */
+ rec_t* rec, /*!< in: pointer to the (origin of)
+ record */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets);/*!< in: array returned by
+ rec_get_offsets() */
/**********************************************************//**
Create an uncompressed B-tree index page.
@return pointer to the page */
@@ -764,11 +795,27 @@ page_create_zip(
page is created */
dict_index_t* index, /*!< in: the index of the page */
ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-
+ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
+/**********************************************************//**
+Empty a previously created B-tree index page. */
+UNIV_INTERN
+void
+page_create_empty(
+/*==============*/
+ buf_block_t* block, /*!< in/out: B-tree block */
+ dict_index_t* index, /*!< in: the index of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull(1,2)));
/*************************************************************//**
Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page. */
+touch the lock table and max trx id on page or compress the page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
UNIV_INTERN
void
page_copy_rec_list_end_no_locks(
@@ -782,6 +829,12 @@ page_copy_rec_list_end_no_locks(
Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to the original successor of the infimum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
UNIV_INTERN
@@ -798,6 +851,12 @@ page_copy_rec_list_end(
Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return pointer to the original predecessor of the supremum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
UNIV_INTERN
@@ -842,6 +901,12 @@ page_delete_rec_list_start(
/*************************************************************//**
Moves record list end to another page. Moved records include
split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return TRUE on success; FALSE on compression failure (new_block will
be decompressed) */
UNIV_INTERN
@@ -857,6 +922,12 @@ page_move_rec_list_end(
/*************************************************************//**
Moves record list start to another page. Moved records do not include
split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
@return TRUE on success; FALSE on compression failure */
UNIV_INTERN
ibool
@@ -1031,7 +1102,6 @@ page_find_rec_with_heap_no(
/*=======================*/
const page_t* page, /*!< in: index page */
ulint heap_no);/*!< in: heap number */
-
#ifdef UNIV_MATERIALIZE
#undef UNIV_INLINE
#define UNIV_INLINE UNIV_INLINE_ORIGINAL
diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic
index 4fe93345ce5..58add015d34 100644
--- a/storage/xtradb/include/page0page.ic
+++ b/storage/xtradb/include/page0page.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -136,7 +136,7 @@ page_header_set_field(
ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
mach_write_to_2(page + PAGE_HEADER + field, val);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
page_zip_write_header(page_zip,
page + PAGE_HEADER + field, 2, NULL);
}
@@ -211,7 +211,7 @@ page_header_reset_last_insert(
{
ut_ad(page && mtr);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_LAST_INSERT),
@@ -233,8 +233,7 @@ page_is_comp(
/*=========*/
const page_t* page) /*!< in: index page */
{
- return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000,
- 0x8000));
+ return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000);
}
/************************************************************//**
@@ -267,9 +266,9 @@ page_rec_get_heap_no(
/************************************************************//**
Determine whether the page is a B-tree leaf.
-@return TRUE if the page is a B-tree leaf */
+@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
UNIV_INLINE
-ibool
+bool
page_is_leaf(
/*=========*/
const page_t* page) /*!< in: page */
@@ -281,6 +280,30 @@ page_is_leaf(
}
/************************************************************//**
+Determine whether the page is empty.
+@return true if the page is empty (PAGE_N_RECS = 0) */
+UNIV_INLINE
+bool
+page_is_empty(
+/*==========*/
+ const page_t* page) /*!< in: page */
+{
+ return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_N_RECS)));
+}
+
+/************************************************************//**
+Determine whether the page contains garbage.
+@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
+UNIV_INLINE
+bool
+page_has_garbage(
+/*=============*/
+ const page_t* page) /*!< in: page */
+{
+ return(!!*(const uint16*) (page + (PAGE_HEADER + PAGE_GARBAGE)));
+}
+
+/************************************************************//**
Gets the offset of the first record on the page.
@return offset of the first record in record list, relative from page */
UNIV_INLINE
@@ -348,10 +371,10 @@ page_rec_is_user_rec_low(
#endif
ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
- return(UNIV_LIKELY(offset != PAGE_NEW_SUPREMUM)
- && UNIV_LIKELY(offset != PAGE_NEW_INFIMUM)
- && UNIV_LIKELY(offset != PAGE_OLD_INFIMUM)
- && UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM));
+ return(offset != PAGE_NEW_SUPREMUM
+ && offset != PAGE_NEW_INFIMUM
+ && offset != PAGE_OLD_INFIMUM
+ && offset != PAGE_OLD_SUPREMUM);
}
/************************************************************//**
@@ -366,8 +389,8 @@ page_rec_is_supremum_low(
ut_ad(offset >= PAGE_NEW_INFIMUM);
ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
- return(UNIV_UNLIKELY(offset == PAGE_NEW_SUPREMUM)
- || UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM));
+ return(offset == PAGE_NEW_SUPREMUM
+ || offset == PAGE_OLD_SUPREMUM);
}
/************************************************************//**
@@ -382,8 +405,7 @@ page_rec_is_infimum_low(
ut_ad(offset >= PAGE_NEW_INFIMUM);
ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
- return(UNIV_UNLIKELY(offset == PAGE_NEW_INFIMUM)
- || UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM));
+ return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM);
}
/************************************************************//**
@@ -487,12 +509,14 @@ page_cmp_dtuple_rec_with_match(
rec_offset = page_offset(rec);
- if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_INFIMUM)
- || UNIV_UNLIKELY(rec_offset == PAGE_OLD_INFIMUM)) {
+ if (rec_offset == PAGE_NEW_INFIMUM
+ || rec_offset == PAGE_OLD_INFIMUM) {
+
return(1);
- }
- if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_SUPREMUM)
- || UNIV_UNLIKELY(rec_offset == PAGE_OLD_SUPREMUM)) {
+
+ } else if (rec_offset == PAGE_NEW_SUPREMUM
+ || rec_offset == PAGE_OLD_SUPREMUM) {
+
return(-1);
}
@@ -734,21 +758,19 @@ page_rec_get_next_low(
offs = rec_get_next_offs(rec, comp);
- if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) {
+ if (offs >= UNIV_PAGE_SIZE) {
fprintf(stderr,
"InnoDB: Next record offset is nonsensical %lu"
" in record at offset %lu\n"
"InnoDB: rec address %p, space id %lu, page %lu\n",
- (ulong)offs, (ulong) page_offset(rec),
+ (ulong) offs, (ulong) page_offset(rec),
(void*) rec,
(ulong) page_get_space_id(page),
(ulong) page_get_page_no(page));
buf_page_print(page, 0, 0);
ut_error;
- }
-
- if (UNIV_UNLIKELY(offs == 0)) {
+ } else if (offs == 0) {
return(NULL);
}
@@ -781,14 +803,38 @@ page_rec_get_next_const(
}
/************************************************************//**
+Gets the pointer to the next non delete-marked record on the page.
+If all subsequent records are delete-marked, then this function
+will return the supremum record.
+@return pointer to next non delete-marked record or pointer to supremum */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_non_del_marked(
+/*=============================*/
+ const rec_t* rec) /*!< in: pointer to record */
+{
+ const rec_t* r;
+ ulint page_is_compact = page_rec_is_comp(rec);
+
+ for (r = page_rec_get_next_const(rec);
+ !page_rec_is_supremum(r)
+ && rec_get_deleted_flag(r, page_is_compact);
+ r = page_rec_get_next_const(r)) {
+ /* noop */
+ }
+
+ return(r);
+}
+
+/************************************************************//**
Sets the pointer to the next record on the page. */
UNIV_INLINE
void
page_rec_set_next(
/*==============*/
- rec_t* rec, /*!< in: pointer to record,
+ rec_t* rec, /*!< in: pointer to record,
must not be page supremum */
- rec_t* next) /*!< in: pointer to next record,
+ const rec_t* next) /*!< in: pointer to next record,
must not be page infimum */
{
ulint offs;
@@ -800,11 +846,7 @@ page_rec_set_next(
ut_ad(!next || !page_rec_is_infimum(next));
ut_ad(!next || page_align(rec) == page_align(next));
- if (UNIV_LIKELY(next != NULL)) {
- offs = page_offset(next);
- } else {
- offs = 0;
- }
+ offs = next != NULL ? page_offset(next) : 0;
if (page_rec_is_comp(rec)) {
rec_set_next_offs_new(rec, offs);
@@ -979,7 +1021,7 @@ page_get_free_space_of_empty(
/*=========================*/
ulint comp) /*!< in: nonzero=compact page layout */
{
- if (UNIV_LIKELY(comp)) {
+ if (comp) {
return((ulint)(UNIV_PAGE_SIZE
- PAGE_NEW_SUPREMUM_END
- PAGE_DIR
@@ -1094,11 +1136,14 @@ UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- rec_t* rec, /*!< in: pointer to the (origin of) record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page,
+ or NULL */
+ rec_t* rec, /*!< in: pointer to the
+ (origin of) record */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets) /*!< in: array returned by
+ rec_get_offsets() */
{
rec_t* free;
ulint garbage;
@@ -1114,7 +1159,7 @@ page_mem_free(
page_header_set_field(page, page_zip, PAGE_GARBAGE,
garbage + rec_offs_size(offsets));
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
page_zip_dir_delete(page_zip, rec, index, offsets, free);
} else {
page_header_set_field(page, page_zip, PAGE_N_RECS,
diff --git a/storage/xtradb/include/page0types.h b/storage/xtradb/include/page0types.h
index 4e76e52ecfb..95143a4bb44 100644
--- a/storage/xtradb/include/page0types.h
+++ b/storage/xtradb/include/page0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,6 +26,10 @@ Created 2/2/1994 Heikki Tuuri
#ifndef page0types_h
#define page0types_h
+using namespace std;
+
+#include <map>
+
#include "univ.i"
#include "dict0types.h"
#include "mtr0types.h"
@@ -35,12 +39,10 @@ Created 2/2/1994 Heikki Tuuri
/** Type of the index page */
typedef byte page_t;
/** Index page cursor */
-typedef struct page_cur_struct page_cur_t;
+struct page_cur_t;
/** Compressed index page */
-typedef byte page_zip_t;
-/** Compressed page descriptor */
-typedef struct page_zip_des_struct page_zip_des_t;
+typedef byte page_zip_t;
/* The following definitions would better belong to page0zip.h,
but we cannot include page0zip.h from rem0rec.ic, because
@@ -49,25 +51,25 @@ page0*.h includes rem0rec.h and may include rem0rec.ic. */
/** Number of bits needed for representing different compressed page sizes */
#define PAGE_ZIP_SSIZE_BITS 3
-/** log2 of smallest compressed page size */
-#define PAGE_ZIP_MIN_SIZE_SHIFT 10
-/** Smallest compressed page size */
-#define PAGE_ZIP_MIN_SIZE (1 << PAGE_ZIP_MIN_SIZE_SHIFT)
+/** Maximum compressed page shift size */
+#define PAGE_ZIP_SSIZE_MAX \
+ (UNIV_ZIP_SIZE_SHIFT_MAX - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
-/** Number of supported compressed page sizes */
-#define PAGE_ZIP_NUM_SSIZE (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
-#define PAGE_ZIP_NUM_SSIZE_MAX (UNIV_PAGE_SIZE_SHIFT_MAX - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
-#if PAGE_ZIP_NUM_SSIZE_MAX > (1 << PAGE_ZIP_SSIZE_BITS)
-# error "PAGE_ZIP_NUM_SSIZE_MAX > (1 << PAGE_ZIP_SSIZE_BITS)"
+/* Make sure there are enough bits available to store the maximum zip
+ssize, which is the number of shifts from 512. */
+#if PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)
+# error "PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)"
#endif
/** Compressed page descriptor */
-struct page_zip_des_struct
+struct page_zip_des_t
{
page_zip_t* data; /*!< compressed page data */
#ifdef UNIV_DEBUG
unsigned m_start:16; /*!< start offset of modification log */
+ bool m_external; /*!< Allocated externally, not from the
+ buffer pool */
#endif /* UNIV_DEBUG */
unsigned m_end:16; /*!< end offset of modification log */
unsigned m_nonempty:1; /*!< TRUE if the modification log
@@ -76,13 +78,13 @@ struct page_zip_des_struct
columns on the page; the maximum
is 744 on a 16 KiB page */
unsigned ssize:PAGE_ZIP_SSIZE_BITS;
- /*!< 0 or compressed page size;
+ /*!< 0 or compressed page shift size;
the size in bytes is
- PAGE_ZIP_MIN_SIZE << (ssize - 1). */
+ (UNIV_ZIP_SIZE_MIN >> 1) << ssize. */
};
/** Compression statistics for a given page size */
-struct page_zip_stat_struct {
+struct page_zip_stat_t {
/** Number of page compressions */
ulint compressed;
/** Number of successful page compressions */
@@ -93,13 +95,29 @@ struct page_zip_stat_struct {
ib_uint64_t compressed_usec;
/** Duration of page decompressions in microseconds */
ib_uint64_t decompressed_usec;
+ page_zip_stat_t() :
+ /* Initialize members to 0 so that when we do
+ stlmap[key].compressed++ and element with "key" does not
+ exist it gets inserted with zeroed members. */
+ compressed(0),
+ compressed_ok(0),
+ decompressed(0),
+ compressed_usec(0),
+ decompressed_usec(0)
+ { }
};
-/** Compression statistics */
-typedef struct page_zip_stat_struct page_zip_stat_t;
-
-/** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */
-extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE_MAX - 1];
+/** Compression statistics types */
+typedef map<index_id_t, page_zip_stat_t> page_zip_stat_per_index_t;
+
+/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
+extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Statistics on compression, indexed by dict_index_t::id */
+extern page_zip_stat_per_index_t page_zip_stat_per_index;
+extern ib_mutex_t page_zip_stat_per_index_mutex;
+#ifdef HAVE_PSI_INTERFACE
+extern mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
/**********************************************************************//**
Write the "deleted" flag of a record on a compressed page. The flag must
diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h
index a33407e78bc..2f9efc4a40c 100644
--- a/storage/xtradb/include/page0zip.h
+++ b/storage/xtradb/include/page0zip.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +12,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -35,9 +36,20 @@ Created June 2005 by Marko Makela
#include "page0types.h"
#include "buf0types.h"
#include "dict0types.h"
+#include "srv0srv.h"
#include "trx0types.h"
#include "mem0mem.h"
+/* Compression level to be used by zlib. Settable by user. */
+extern uint page_zip_level;
+
+/* Default compression level. */
+#define DEFAULT_COMPRESSION_LEVEL 6
+
+/* Whether or not to log compressed page images to avoid possible
+compression algorithm changes in zlib. */
+extern my_bool page_zip_log_pages;
+
/**********************************************************************//**
Determine the size of a compressed page in bytes.
@return size in bytes */
@@ -113,6 +125,7 @@ page_zip_compress(
m_start, m_end, m_nonempty */
const page_t* page, /*!< in: uncompressed page */
dict_index_t* index, /*!< in: index of the B-tree node */
+ ulint level, /*!< in: compression level */
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
__attribute__((nonnull(1,3)));
@@ -336,11 +349,12 @@ UNIV_INTERN
void
page_zip_dir_delete(
/*================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in: deleted record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- const byte* free) /*!< in: previous start of the free list */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ byte* rec, /*!< in: deleted record */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const byte* free) /*!< in: previous start of
+ the free list */
__attribute__((nonnull(1,2,3,4)));
/**********************************************************************//**
@@ -446,16 +460,63 @@ ulint
page_zip_calc_checksum(
/*===================*/
const void* data, /*!< in: compressed page */
- ulint size) /*!< in: size of compressed page */
+ ulint size, /*!< in: size of compressed page */
+ srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
__attribute__((nonnull));
+/**********************************************************************//**
+Verify a compressed page's checksum.
+@return TRUE if the stored checksum is valid according to the value of
+innodb_checksum_algorithm */
+UNIV_INTERN
+ibool
+page_zip_verify_checksum(
+/*=====================*/
+ const void* data, /*!< in: compressed page */
+ ulint size); /*!< in: size of compressed page */
+/**********************************************************************//**
+Write a log record of compressing an index page without the data on the page. */
+UNIV_INLINE
+void
+page_zip_compress_write_log_no_data(
+/*================================*/
+ ulint level, /*!< in: compression level */
+ const page_t* page, /*!< in: page that is compressed */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
+Parses a log record of compressing an index page without the data.
+@return end of log record or NULL */
+UNIV_INLINE
+byte*
+page_zip_parse_compress_no_data(
+/*============================*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr, /*!< in: buffer end */
+ page_t* page, /*!< in: uncompressed page */
+ page_zip_des_t* page_zip, /*!< out: compressed page */
+ dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Reset the counters used for filling
+INFORMATION_SCHEMA.innodb_cmp_per_index. */
+UNIV_INLINE
+void
+page_zip_reset_stat_per_index();
+/*===========================*/
+
#ifndef UNIV_HOTBACKUP
/** Check if a pointer to an uncompressed page matches a compressed page.
+When we IMPORT a tablespace the blocks and accompanying frames are allocted
+from outside the buffer pool.
@param ptr pointer to an uncompressed page frame
@param page_zip compressed page descriptor
@return TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip) \
- (buf_frame_get_page_zip(ptr) == (page_zip))
+# define PAGE_ZIP_MATCH(ptr, page_zip) \
+ (((page_zip)->m_external \
+ && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)) \
+ || buf_frame_get_page_zip(ptr) == (page_zip))
#else /* !UNIV_HOTBACKUP */
/** Check if a pointer to an uncompressed page matches a compressed page.
@param ptr pointer to an uncompressed page frame
diff --git a/storage/xtradb/include/page0zip.ic b/storage/xtradb/include/page0zip.ic
index e26fa3e3d94..6c7d8cd32c7 100644
--- a/storage/xtradb/include/page0zip.ic
+++ b/storage/xtradb/include/page0zip.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +12,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -29,6 +30,7 @@ Created June 2005 by Marko Makela
#endif
#include "page0zip.h"
+#include "mtr0log.h"
#include "page0page.h"
/* The format of compressed pages is as follows.
@@ -120,13 +122,13 @@ page_zip_get_size(
{
ulint size;
- if (UNIV_UNLIKELY(!page_zip->ssize)) {
+ if (!page_zip->ssize) {
return(0);
}
- size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize;
+ size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize;
- ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+ ut_ad(size >= UNIV_ZIP_SIZE_MIN);
ut_ad(size <= UNIV_PAGE_SIZE);
return(size);
@@ -174,13 +176,13 @@ page_zip_rec_needs_ext(
ut_ad(ut_is_2pow(zip_size));
ut_ad(comp || !zip_size);
-#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE
- if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) {
+#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE
+ if (rec_size >= REC_MAX_DATA_SIZE) {
return(TRUE);
}
#endif
- if (UNIV_UNLIKELY(zip_size)) {
+ if (zip_size) {
ut_ad(comp);
/* On a compressed page, there is a two-byte entry in
the dense page directory for every record. But there
@@ -209,7 +211,7 @@ page_zip_simple_validate(
{
ut_ad(page_zip);
ut_ad(page_zip->data);
- ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE);
+ ut_ad(page_zip->ssize <= PAGE_ZIP_SSIZE_MAX);
ut_ad(page_zip_get_size(page_zip)
> PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
ut_ad(page_zip->m_start <= page_zip->m_end);
@@ -236,11 +238,11 @@ page_zip_get_trailer_len(
ut_ad(page_zip_simple_validate(page_zip));
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) {
+ if (!page_is_leaf(page_zip->data)) {
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
+ REC_NODE_PTR_SIZE;
ut_ad(!page_zip->n_blobs);
- } else if (UNIV_UNLIKELY(is_clust)) {
+ } else if (is_clust) {
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
} else {
@@ -305,7 +307,7 @@ page_zip_available(
space needed for identifying the record (encoded heap_no). */
length -= REC_N_NEW_EXTRA_BYTES - 2;
- if (UNIV_UNLIKELY(create)) {
+ if (create > 0) {
/* When a record is created, a pointer may be added to
the dense directory.
Likewise, space for the columns that will not be
@@ -316,10 +318,8 @@ page_zip_available(
trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
}
- return(UNIV_LIKELY(length
- + trailer_len
- + page_zip->m_end
- < page_zip_get_size(page_zip)));
+ return(length + trailer_len + page_zip->m_end
+ < page_zip_get_size(page_zip));
}
/**********************************************************************//**
@@ -374,13 +374,82 @@ page_zip_write_header(
/* The following would fail in page_cur_insert_rec_zip(). */
/* ut_ad(page_zip_validate(page_zip, str - pos)); */
- if (UNIV_LIKELY_NULL(mtr)) {
+ if (mtr) {
#ifndef UNIV_HOTBACKUP
page_zip_write_header_log(str, length, mtr);
#endif /* !UNIV_HOTBACKUP */
}
}
+/**********************************************************************//**
+Write a log record of compressing an index page without the data on the page. */
+UNIV_INLINE
+void
+page_zip_compress_write_log_no_data(
+/*================================*/
+ ulint level, /*!< in: compression level */
+ const page_t* page, /*!< in: page that is compressed */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ byte* log_ptr = mlog_open_and_write_index(
+ mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1);
+
+ if (log_ptr) {
+ mach_write_to_1(log_ptr, level);
+ mlog_close(mtr, log_ptr + 1);
+ }
+}
+
+/**********************************************************************//**
+Parses a log record of compressing an index page without the data.
+@return end of log record or NULL */
+UNIV_INLINE
+byte*
+page_zip_parse_compress_no_data(
+/*============================*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr, /*!< in: buffer end */
+ page_t* page, /*!< in: uncompressed page */
+ page_zip_des_t* page_zip, /*!< out: compressed page */
+ dict_index_t* index) /*!< in: index */
+{
+ ulint level;
+ if (end_ptr == ptr) {
+ return(NULL);
+ }
+
+ level = mach_read_from_1(ptr);
+
+ /* If page compression fails then there must be something wrong
+ because a compress log record is logged only if the compression
+ was successful. Crash in this case. */
+
+ if (page
+ && !page_zip_compress(page_zip, page, index, level, NULL)) {
+ ut_error;
+ }
+
+ return(ptr + 1);
+}
+
+/**********************************************************************//**
+Reset the counters used for filling
+INFORMATION_SCHEMA.innodb_cmp_per_index. */
+UNIV_INLINE
+void
+page_zip_reset_stat_per_index()
+/*===========================*/
+{
+ mutex_enter(&page_zip_stat_per_index_mutex);
+
+ page_zip_stat_per_index.erase(
+ page_zip_stat_per_index.begin(),
+ page_zip_stat_per_index.end());
+
+ mutex_exit(&page_zip_stat_per_index_mutex);
+}
+
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
diff --git a/storage/xtradb/include/pars0grm.h b/storage/xtradb/include/pars0grm.h
index abaffb66c1e..8e725fe9545 100644
--- a/storage/xtradb/include/pars0grm.h
+++ b/storage/xtradb/include/pars0grm.h
@@ -1,29 +1,37 @@
-/*****************************************************************************
+/* A Bison parser, made by GNU Bison 2.3. */
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software
-Foundation, Inc.
+/* Skeleton interface for Bison's Yacc-like parsers in C
-As a special exception, when this file is copied by Bison into a
-Bison output file, you may use that output file without restriction.
-This special exception was added by the Free Software Foundation
-in version 1.24 of Bison.
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
-*****************************************************************************/
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
-/* A Bison parser, made by GNU Bison 1.875d. */
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
/* Tokens. */
#ifndef YYTOKENTYPE
@@ -123,9 +131,19 @@ this program; if not, write to the Free Software Foundation, Inc.,
PARS_LOCK_TOKEN = 347,
PARS_SHARE_TOKEN = 348,
PARS_MODE_TOKEN = 349,
- NEG = 350
+ PARS_LIKE_TOKEN = 350,
+ PARS_LIKE_TOKEN_EXACT = 351,
+ PARS_LIKE_TOKEN_PREFIX = 352,
+ PARS_LIKE_TOKEN_SUFFIX = 353,
+ PARS_LIKE_TOKEN_SUBSTR = 354,
+ PARS_TABLE_NAME_TOKEN = 355,
+ PARS_COMPACT_TOKEN = 356,
+ PARS_BLOCK_SIZE_TOKEN = 357,
+ PARS_BIGINT_TOKEN = 358,
+ NEG = 359
};
#endif
+/* Tokens. */
#define PARS_INT_LIT 258
#define PARS_FLOAT_LIT 259
#define PARS_STR_LIT 260
@@ -218,12 +236,21 @@ this program; if not, write to the Free Software Foundation, Inc.,
#define PARS_LOCK_TOKEN 347
#define PARS_SHARE_TOKEN 348
#define PARS_MODE_TOKEN 349
-#define NEG 350
+#define PARS_LIKE_TOKEN 350
+#define PARS_LIKE_TOKEN_EXACT 351
+#define PARS_LIKE_TOKEN_PREFIX 352
+#define PARS_LIKE_TOKEN_SUFFIX 353
+#define PARS_LIKE_TOKEN_SUBSTR 354
+#define PARS_TABLE_NAME_TOKEN 355
+#define PARS_COMPACT_TOKEN 356
+#define PARS_BLOCK_SIZE_TOKEN 357
+#define PARS_BIGINT_TOKEN 358
+#define NEG 359
-#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef int YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
@@ -232,5 +259,3 @@ typedef int YYSTYPE;
extern YYSTYPE yylval;
-
-
diff --git a/storage/xtradb/include/pars0opt.h b/storage/xtradb/include/pars0opt.h
index fd6b9726019..1084d644c90 100644
--- a/storage/xtradb/include/pars0opt.h
+++ b/storage/xtradb/include/pars0opt.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/pars0opt.ic b/storage/xtradb/include/pars0opt.ic
index f303fe91d3b..786d911ca3d 100644
--- a/storage/xtradb/include/pars0opt.ic
+++ b/storage/xtradb/include/pars0opt.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/pars0pars.h b/storage/xtradb/include/pars0pars.h
index eb79dcb18c1..65ff7533828 100644
--- a/storage/xtradb/include/pars0pars.h
+++ b/storage/xtradb/include/pars0pars.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -38,7 +38,7 @@ Created 11/19/1996 Heikki Tuuri
and varies in type, while 'user_arg' is a user-supplied argument. The
meaning of the return type also varies. See the individual use cases, e.g.
the FETCH statement, for details on them. */
-typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg);
+typedef ibool (*pars_user_func_cb_t)(void* arg, void* user_arg);
/** If the following is set TRUE, the parser will emit debugging
information */
@@ -74,6 +74,7 @@ extern pars_res_word_t pars_distinct_token;
extern pars_res_word_t pars_binary_token;
extern pars_res_word_t pars_blob_token;
extern pars_res_word_t pars_int_token;
+extern pars_res_word_t pars_bigint_token;
extern pars_res_word_t pars_char_token;
extern pars_res_word_t pars_float_token;
extern pars_res_word_t pars_update_token;
@@ -105,13 +106,13 @@ pars_sql(
pars_info_t* info, /*!< in: extra information, or NULL */
const char* str); /*!< in: SQL string */
/*************************************************************//**
-Retrieves characters to the lexical analyzer. */
+Retrieves characters to the lexical analyzer.
+@return number of characters copied or 0 on EOF */
UNIV_INTERN
-void
+int
pars_get_lex_chars(
/*===============*/
char* buf, /*!< in/out: buffer where to copy */
- int* result, /*!< out: number of characters copied or EOF */
int max_size); /*!< in: maximum number of characters which fit
in the buffer */
/*************************************************************//**
@@ -140,6 +141,17 @@ pars_func(
/*======*/
que_node_t* res_word,/*!< in: function name reserved word */
que_node_t* arg); /*!< in: first argument in the argument list */
+/*************************************************************************
+Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
+within the search string.
+@return own: function node in a query tree */
+UNIV_INTERN
+int
+pars_like_rebind(
+/*=============*/
+ sym_node_t* node, /* in: The search string node.*/
+ const byte* ptr, /* in: literal to (re) bind */
+ ulint len); /* in: length of literal to (re) bind*/
/*********************************************************************//**
Parses an operator expression.
@return own: function node in a query tree */
@@ -397,7 +409,10 @@ pars_create_table(
sym_node_t* table_sym, /*!< in: table name node in the symbol
table */
sym_node_t* column_defs, /*!< in: list of column names */
- void* not_fit_in_memory);/*!< in: a non-NULL pointer means that
+ sym_node_t* compact, /* in: non-NULL if COMPACT table. */
+ sym_node_t* block_size, /* in: block size (can be NULL) */
+ void* not_fit_in_memory);
+ /*!< in: a non-NULL pointer means that
this is a table which in simulations
should be simulated as not fitting
in memory; thread is put to sleep
@@ -454,9 +469,10 @@ que_thr_t*
pars_complete_graph_for_exec(
/*=========================*/
que_node_t* node, /*!< in: root node for an incomplete
- query graph */
+ query graph, or NULL for dummy graph */
trx_t* trx, /*!< in: transaction handle */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ __attribute__((nonnull(2,3), warn_unused_result));
/****************************************************************//**
Create parser info struct.
@@ -498,7 +514,76 @@ pars_info_add_str_literal(
pars_info_t* info, /*!< in: info struct */
const char* name, /*!< in: name */
const char* str); /*!< in: string */
+/********************************************************************
+If the literal value already exists then it rebinds otherwise it
+creates a new entry.*/
+UNIV_INTERN
+void
+pars_info_bind_literal(
+/*===================*/
+ pars_info_t* info, /* in: info struct */
+ const char* name, /* in: name */
+ const void* address, /* in: address */
+ ulint length, /* in: length of data */
+ ulint type, /* in: type, e.g. DATA_FIXBINARY */
+ ulint prtype); /* in: precise type, e.g. */
+/********************************************************************
+If the literal value already exists then it rebinds otherwise it
+creates a new entry.*/
+UNIV_INTERN
+void
+pars_info_bind_varchar_literal(
+/*===========================*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const byte* str, /*!< in: string */
+ ulint str_len); /*!< in: string length */
+/****************************************************************//**
+Equivalent to:
+char buf[4];
+mach_write_to_4(buf, val);
+pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
+
+except that the buffer is dynamically allocated from the info struct's
+heap. */
+UNIV_INTERN
+void
+pars_info_bind_int4_literal(
+/*=======================*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const ib_uint32_t* val); /*!< in: value */
+/********************************************************************
+If the literal value already exists then it rebinds otherwise it
+creates a new entry. */
+UNIV_INTERN
+void
+pars_info_bind_int8_literal(
+/*=======================*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const ib_uint64_t* val); /*!< in: value */
+/****************************************************************//**
+Add user function. */
+UNIV_INTERN
+void
+pars_info_bind_function(
+/*===================*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: function name */
+ pars_user_func_cb_t func, /*!< in: function address */
+ void* arg); /*!< in: user-supplied argument */
+/****************************************************************//**
+Add bound id. */
+UNIV_INTERN
+void
+pars_info_bind_id(
+/*=============*/
+ pars_info_t* info, /*!< in: info struct */
+ ibool copy_name,/* in: make a copy of name if TRUE */
+ const char* name, /*!< in: name */
+ const char* id); /*!< in: id */
/****************************************************************//**
Equivalent to:
@@ -532,16 +617,18 @@ pars_info_add_ull_literal(
pars_info_t* info, /*!< in: info struct */
const char* name, /*!< in: name */
ib_uint64_t val); /*!< in: value */
+
/****************************************************************//**
-Add user function. */
+If the literal value already exists then it rebinds otherwise it
+creates a new entry. */
UNIV_INTERN
void
-pars_info_add_function(
-/*===================*/
+pars_info_bind_ull_literal(
+/*=======================*/
pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: function name */
- pars_user_func_cb_t func, /*!< in: function address */
- void* arg); /*!< in: user-supplied argument */
+ const char* name, /*!< in: name */
+ const ib_uint64_t* val) /*!< in: value */
+ __attribute__((nonnull));
/****************************************************************//**
Add bound id. */
@@ -554,16 +641,6 @@ pars_info_add_id(
const char* id); /*!< in: id */
/****************************************************************//**
-Get user function with the given name.
-@return user func, or NULL if not found */
-UNIV_INTERN
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name); /*!< in: function name to find*/
-
-/****************************************************************//**
Get bound literal with the given name.
@return bound literal, or NULL if not found */
UNIV_INTERN
@@ -591,7 +668,7 @@ pars_lexer_close(void);
/*==================*/
/** Extra information supplied for pars_sql(). */
-struct pars_info_struct {
+struct pars_info_t {
mem_heap_t* heap; /*!< our own memory heap */
ib_vector_t* funcs; /*!< user functions, or NUll
@@ -606,39 +683,40 @@ struct pars_info_struct {
};
/** User-supplied function and argument. */
-struct pars_user_func_struct {
+struct pars_user_func_t {
const char* name; /*!< function name */
pars_user_func_cb_t func; /*!< function address */
void* arg; /*!< user-supplied argument */
};
/** Bound literal. */
-struct pars_bound_lit_struct {
+struct pars_bound_lit_t {
const char* name; /*!< name */
const void* address; /*!< address */
ulint length; /*!< length of data */
ulint type; /*!< type, e.g. DATA_FIXBINARY */
ulint prtype; /*!< precise type, e.g. DATA_UNSIGNED */
+ sym_node_t* node; /*!< symbol node */
};
/** Bound identifier. */
-struct pars_bound_id_struct {
+struct pars_bound_id_t {
const char* name; /*!< name */
const char* id; /*!< identifier */
};
/** Struct used to denote a reserved word in a parsing tree */
-struct pars_res_word_struct{
+struct pars_res_word_t{
int code; /*!< the token code for the reserved word from
pars0grm.h */
};
/** A predefined function or operator node in a parsing tree; this construct
is also used for some non-functions like the assignment ':=' */
-struct func_node_struct{
+struct func_node_t{
que_common_t common; /*!< type: QUE_NODE_FUNC */
int func; /*!< token code of the function name */
- ulint class; /*!< class of the function */
+ ulint fclass; /*!< class of the function */
que_node_t* args; /*!< argument(s) of the function */
UT_LIST_NODE_T(func_node_t) cond_list;
/*!< list of comparison conditions; defined
@@ -650,14 +728,14 @@ struct func_node_struct{
};
/** An order-by node in a select */
-struct order_node_struct{
+struct order_node_t{
que_common_t common; /*!< type: QUE_NODE_ORDER */
sym_node_t* column; /*!< order-by column */
ibool asc; /*!< TRUE if ascending, FALSE if descending */
};
/** Procedure definition node */
-struct proc_node_struct{
+struct proc_node_t{
que_common_t common; /*!< type: QUE_NODE_PROC */
sym_node_t* proc_id; /*!< procedure name symbol in the symbol
table of this same procedure */
@@ -667,14 +745,14 @@ struct proc_node_struct{
};
/** elsif-element node */
-struct elsif_node_struct{
+struct elsif_node_t{
que_common_t common; /*!< type: QUE_NODE_ELSIF */
que_node_t* cond; /*!< if condition */
que_node_t* stat_list; /*!< statement list */
};
/** if-statement node */
-struct if_node_struct{
+struct if_node_t{
que_common_t common; /*!< type: QUE_NODE_IF */
que_node_t* cond; /*!< if condition */
que_node_t* stat_list; /*!< statement list */
@@ -683,14 +761,14 @@ struct if_node_struct{
};
/** while-statement node */
-struct while_node_struct{
+struct while_node_t{
que_common_t common; /*!< type: QUE_NODE_WHILE */
que_node_t* cond; /*!< while condition */
que_node_t* stat_list; /*!< statement list */
};
/** for-loop-statement node */
-struct for_node_struct{
+struct for_node_t{
que_common_t common; /*!< type: QUE_NODE_FOR */
sym_node_t* loop_var; /*!< loop variable: this is the
dereferenced symbol from the
@@ -707,24 +785,24 @@ struct for_node_struct{
};
/** exit statement node */
-struct exit_node_struct{
+struct exit_node_t{
que_common_t common; /*!< type: QUE_NODE_EXIT */
};
/** return-statement node */
-struct return_node_struct{
+struct return_node_t{
que_common_t common; /*!< type: QUE_NODE_RETURN */
};
/** Assignment statement node */
-struct assign_node_struct{
+struct assign_node_t{
que_common_t common; /*!< type: QUE_NODE_ASSIGNMENT */
sym_node_t* var; /*!< variable to set */
que_node_t* val; /*!< value to assign */
};
/** Column assignment node */
-struct col_assign_node_struct{
+struct col_assign_node_t{
que_common_t common; /*!< type: QUE_NODE_COL_ASSIGN */
sym_node_t* col; /*!< column to set */
que_node_t* val; /*!< value to assign */
diff --git a/storage/xtradb/include/pars0pars.ic b/storage/xtradb/include/pars0pars.ic
index 558d1093bfe..4c88337a265 100644
--- a/storage/xtradb/include/pars0pars.ic
+++ b/storage/xtradb/include/pars0pars.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/pars0sym.h b/storage/xtradb/include/pars0sym.h
index 9241aff3be1..bcf73639228 100644
--- a/storage/xtradb/include/pars0sym.h
+++ b/storage/xtradb/include/pars0sym.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -67,7 +67,7 @@ sym_node_t*
sym_tab_add_str_lit(
/*================*/
sym_tab_t* sym_tab, /*!< in: symbol table */
- byte* str, /*!< in: string with no quotes around
+ const byte* str, /*!< in: string with no quotes around
it */
ulint len); /*!< in: string length */
/******************************************************************//**
@@ -80,6 +80,16 @@ sym_tab_add_bound_lit(
sym_tab_t* sym_tab, /*!< in: symbol table */
const char* name, /*!< in: name of bound literal */
ulint* lit_type); /*!< out: type of literal (PARS_*_LIT) */
+/**********************************************************************
+Rebind literal to a node in the symbol table. */
+
+sym_node_t*
+sym_tab_rebind_lit(
+/*===============*/
+ /* out: symbol table node */
+ sym_node_t* node, /* in: node that is bound to literal*/
+ const void* address, /* in: pointer to data */
+ ulint length); /* in: length of data */
/******************************************************************//**
Adds an SQL null literal to a symbol table.
@return symbol table node */
@@ -109,18 +119,21 @@ sym_tab_add_bound_id(
sym_tab_t* sym_tab, /*!< in: symbol table */
const char* name); /*!< in: name of bound id */
-/** Index of sym_node_struct::field_nos corresponding to the clustered index */
+/** Index of sym_node_t::field_nos corresponding to the clustered index */
#define SYM_CLUST_FIELD_NO 0
-/** Index of sym_node_struct::field_nos corresponding to a secondary index */
+/** Index of sym_node_t::field_nos corresponding to a secondary index */
#define SYM_SEC_FIELD_NO 1
/** Types of a symbol table node */
enum sym_tab_entry {
+ SYM_UNSET, /*!< Unset entry. */
SYM_VAR = 91, /*!< declared parameter or local
variable of a procedure */
SYM_IMPLICIT_VAR, /*!< storage for a intermediate result
of a calculation */
SYM_LIT, /*!< literal */
+ SYM_TABLE_REF_COUNTED, /*!< database table name, ref counted. Must
+ be closed explicitly. */
SYM_TABLE, /*!< database table name */
SYM_COLUMN, /*!< database table name */
SYM_CURSOR, /*!< named cursor */
@@ -130,7 +143,7 @@ enum sym_tab_entry {
};
/** Symbol table node */
-struct sym_node_struct{
+struct sym_node_t{
que_common_t common; /*!< node type:
QUE_NODE_SYMBOL */
/* NOTE: if the data field in 'common.val' is not NULL and the symbol
@@ -210,10 +223,11 @@ struct sym_node_struct{
the symbol table */
UT_LIST_NODE_T(sym_node_t) sym_list; /*!< list of symbol
nodes */
+ sym_node_t* like_node; /* LIKE operator node*/
};
/** Symbol table */
-struct sym_tab_struct{
+struct sym_tab_t{
que_t* query_graph;
/*!< query graph generated by the
parser */
diff --git a/storage/xtradb/include/pars0sym.ic b/storage/xtradb/include/pars0sym.ic
index ecf014908a9..266c1a6310d 100644
--- a/storage/xtradb/include/pars0sym.ic
+++ b/storage/xtradb/include/pars0sym.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/pars0types.h b/storage/xtradb/include/pars0types.h
index 4f3b2c06db6..47f4b432d20 100644
--- a/storage/xtradb/include/pars0types.h
+++ b/storage/xtradb/include/pars0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,24 +26,24 @@ Created 1/11/1998 Heikki Tuuri
#ifndef pars0types_h
#define pars0types_h
-typedef struct pars_info_struct pars_info_t;
-typedef struct pars_user_func_struct pars_user_func_t;
-typedef struct pars_bound_lit_struct pars_bound_lit_t;
-typedef struct pars_bound_id_struct pars_bound_id_t;
-typedef struct sym_node_struct sym_node_t;
-typedef struct sym_tab_struct sym_tab_t;
-typedef struct pars_res_word_struct pars_res_word_t;
-typedef struct func_node_struct func_node_t;
-typedef struct order_node_struct order_node_t;
-typedef struct proc_node_struct proc_node_t;
-typedef struct elsif_node_struct elsif_node_t;
-typedef struct if_node_struct if_node_t;
-typedef struct while_node_struct while_node_t;
-typedef struct for_node_struct for_node_t;
-typedef struct exit_node_struct exit_node_t;
-typedef struct return_node_struct return_node_t;
-typedef struct assign_node_struct assign_node_t;
-typedef struct col_assign_node_struct col_assign_node_t;
+struct pars_info_t;
+struct pars_user_func_t;
+struct pars_bound_lit_t;
+struct pars_bound_id_t;
+struct sym_node_t;
+struct sym_tab_t;
+struct pars_res_word_t;
+struct func_node_t;
+struct order_node_t;
+struct proc_node_t;
+struct elsif_node_t;
+struct if_node_t;
+struct while_node_t;
+struct for_node_t;
+struct exit_node_t;
+struct return_node_t;
+struct assign_node_t;
+struct col_assign_node_t;
typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t;
diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h
index 8de221580fd..e5b2a1ba3fc 100644
--- a/storage/xtradb/include/que0que.h
+++ b/storage/xtradb/include/que0que.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -29,6 +29,7 @@ Created 5/27/1996 Heikki Tuuri
#include "univ.i"
#include "data0data.h"
#include "dict0types.h"
+#include "btr0sea.h"
#include "trx0trx.h"
#include "trx0roll.h"
#include "srv0srv.h"
@@ -41,14 +42,9 @@ Created 5/27/1996 Heikki Tuuri
of SQL execution in the UNIV_SQL_DEBUG version */
extern ibool que_trace_on;
-/***********************************************************************//**
-Adds a query graph to the session's list of graphs. */
-UNIV_INTERN
-void
-que_graph_publish(
-/*==============*/
- que_t* graph, /*!< in: graph */
- sess_t* sess); /*!< in: session */
+/** Mutex protecting the query threads. */
+extern ib_mutex_t que_thr_mutex;
+
/***********************************************************************//**
Creates a query graph fork node.
@return own: fork node */
@@ -114,8 +110,8 @@ que_graph_free(
afterwards! */
/**********************************************************************//**
Stops a query thread if graph or trx is in a state requiring it. The
-conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved.
+conditions are tested in the order (1) graph, (2) trx. The lock_sys_t::mutex
+has to be reserved.
@return TRUE if stopped */
UNIV_INTERN
ibool
@@ -143,7 +139,7 @@ que_thr_stop_for_mysql_no_error(
/**********************************************************************//**
A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
query thread is stopped and made inactive, except in the case where
-it was put to the lock wait state in lock0lock.c, but the lock has already
+it was put to the lock wait state in lock0lock.cc, but the lock has already
been granted or the transaction chosen as a victim in deadlock resolution. */
UNIV_INTERN
void
@@ -158,44 +154,17 @@ que_run_threads(
/*============*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
-After signal handling is finished, returns control to a query graph error
-handling routine. (Currently, just returns the control to the root of the
-graph so that the graph can communicate an error message to the client.) */
-UNIV_INTERN
-void
-que_fork_error_handle(
-/*==================*/
- trx_t* trx, /*!< in: trx */
- que_t* fork); /*!< in: query graph which was run before signal
- handling started, NULL not allowed */
-/**********************************************************************//**
-Moves a suspended query thread to the QUE_THR_RUNNING state and releases
-a single worker thread to execute it. This function should be used to end
+Moves a suspended query thread to the QUE_THR_RUNNING state and release
+a worker thread to execute it. This function should be used to end
the wait state of a query thread waiting for a lock or a stored procedure
-completion. */
+completion.
+@return query thread instance of thread to wakeup or NULL */
UNIV_INTERN
-void
-que_thr_end_wait(
-/*=============*/
- que_thr_t* thr, /*!< in: query thread in the
- QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
- que_thr_t** next_thr); /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/**********************************************************************//**
-Same as que_thr_end_wait, but no parameter next_thr available. */
-UNIV_INTERN
-void
-que_thr_end_wait_no_next_thr(
-/*=========================*/
- que_thr_t* thr); /*!< in: query thread in the
- QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
+que_thr_t*
+que_thr_end_lock_wait(
+/*==================*/
+ trx_t* trx); /*!< in: transaction in the
+ QUE_THR_LOCK_WAIT state */
/**********************************************************************//**
Starts execution of a command in a query fork. Picks a query thread which
is not in the QUE_THR_RUNNING state and moves it to that state. If none
@@ -296,6 +265,14 @@ que_node_list_add_last(
/*===================*/
que_node_t* node_list, /*!< in: node list, or NULL */
que_node_t* node); /*!< in: node */
+/*************************************************************************
+Get the last node from the list.*/
+UNIV_INLINE
+que_node_t*
+que_node_list_get_last(
+/*===================*/
+ /* out: node last node from list.*/
+ que_node_t* node_list); /* in: node list, or NULL */
/*********************************************************************//**
Gets a query graph node list length.
@return length, for NULL list 0 */
@@ -308,7 +285,7 @@ que_node_list_get_len(
Checks if graph, trx, or session is in a state where the query thread should
be stopped.
@return TRUE if should be stopped; NOTE that if the peek is made
-without reserving the kernel mutex, then another peek with the mutex
+without reserving the trx_t::mutex, then another peek with the mutex
reserved is necessary before deciding the actual stopping */
UNIV_INLINE
ibool
@@ -334,7 +311,7 @@ que_node_print_info(
Evaluate the given SQL
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
que_eval_sql(
/*=========*/
pars_info_t* info, /*!< in: info struct, or NULL */
@@ -344,33 +321,50 @@ que_eval_sql(
dict_sys->mutex around call to pars_sql. */
trx_t* trx); /*!< in: trx */
-/* Query graph query thread node: the fields are protected by the kernel
-mutex with the exceptions named below */
+/**********************************************************************//**
+Round robin scheduler.
+@return a query thread of the graph moved to QUE_THR_RUNNING state, or
+NULL; the query thread should be executed by que_run_threads by the
+caller */
+UNIV_INTERN
+que_thr_t*
+que_fork_scheduler_round_robin(
+/*===========================*/
+ que_fork_t* fork, /*!< in: a query fork */
+ que_thr_t* thr); /*!< in: current pos */
+
+/*********************************************************************//**
+Initialise the query sub-system. */
+UNIV_INTERN
+void
+que_init(void);
+/*==========*/
-struct que_thr_struct{
+/*********************************************************************//**
+Close the query sub-system. */
+UNIV_INTERN
+void
+que_close(void);
+/*===========*/
+
+/* Query graph query thread node: the fields are protected by the
+trx_t::mutex with the exceptions named below */
+
+struct que_thr_t{
que_common_t common; /*!< type: QUE_NODE_THR */
ulint magic_n; /*!< magic number to catch memory
corruption */
que_node_t* child; /*!< graph child node */
que_t* graph; /*!< graph where this node belongs */
+ ulint state; /*!< state of the query thread */
ibool is_active; /*!< TRUE if the thread has been set
to the run state in
que_thr_move_to_run_state, but not
deactivated in
que_thr_dec_reference_count */
- ulint state; /*!< state of the query thread */
- UT_LIST_NODE_T(que_thr_t)
- thrs; /*!< list of thread nodes of the fork
- node */
- UT_LIST_NODE_T(que_thr_t)
- trx_thrs; /*!< lists of threads in wait list of
- the trx */
- UT_LIST_NODE_T(que_thr_t)
- queue; /*!< list of runnable thread nodes in
- the server task queue */
/*------------------------------*/
/* The following fields are private to the OS thread executing the
- query thread, and are not protected by the kernel mutex: */
+ query thread, and are not protected by any mutex: */
que_node_t* run_node; /*!< pointer to the node where the
subgraph down from this node is
@@ -381,6 +375,21 @@ struct que_thr_struct{
thus far */
ulint lock_state; /*!< lock state of thread (table or
row) */
+ struct srv_slot_t*
+ slot; /* The thread slot in the wait
+ array in srv_sys_t */
+ /*------------------------------*/
+ /* The following fields are links for the various lists that
+ this type can be on. */
+ UT_LIST_NODE_T(que_thr_t)
+ thrs; /*!< list of thread nodes of the fork
+ node */
+ UT_LIST_NODE_T(que_thr_t)
+ trx_thrs; /*!< lists of threads in wait list of
+ the trx */
+ UT_LIST_NODE_T(que_thr_t)
+ queue; /*!< list of runnable thread nodes in
+ the server task queue */
ulint fk_cascade_depth; /*!< maximum cascading call depth
supported for foreign key constraint
related delete/updates */
@@ -389,8 +398,8 @@ struct que_thr_struct{
#define QUE_THR_MAGIC_N 8476583
#define QUE_THR_MAGIC_FREED 123461526
-/* Query graph fork node: its fields are protected by the kernel mutex */
-struct que_fork_struct{
+/* Query graph fork node: its fields are protected by the query thread mutex */
+struct que_fork_t{
que_common_t common; /*!< type: QUE_NODE_FORK */
que_t* graph; /*!< query graph of this node */
ulint fork_type; /*!< fork type */
@@ -492,8 +501,6 @@ struct que_fork_struct{
#define QUE_NODE_CALL 31
#define QUE_NODE_EXIT 32
-#define QUE_NODE_INSERT_STATS 34
-
/* Query thread states */
#define QUE_THR_RUNNING 1
#define QUE_THR_PROCEDURE_WAIT 2
@@ -504,7 +511,6 @@ struct que_fork_struct{
thread has done its task */
#define QUE_THR_COMMAND_WAIT 4
#define QUE_THR_LOCK_WAIT 5
-#define QUE_THR_SIG_REPLY_WAIT 6
#define QUE_THR_SUSPENDED 7
#define QUE_THR_ERROR 8
@@ -518,7 +524,6 @@ struct que_fork_struct{
#define QUE_CUR_START 2
#define QUE_CUR_END 3
-
#ifndef UNIV_NONINL
#include "que0que.ic"
#endif
diff --git a/storage/xtradb/include/que0que.ic b/storage/xtradb/include/que0que.ic
index 2de679e3894..eff5a86d958 100644
--- a/storage/xtradb/include/que0que.ic
+++ b/storage/xtradb/include/que0que.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -88,7 +88,7 @@ que_node_get_type(
{
ut_ad(node);
- return(((que_common_t*)node)->type);
+ return(((que_common_t*) node)->type);
}
/***********************************************************************//**
@@ -101,7 +101,7 @@ que_node_get_val(
{
ut_ad(node);
- return(&(((que_common_t*)node)->val));
+ return(&(((que_common_t*) node)->val));
}
/***********************************************************************//**
@@ -115,7 +115,7 @@ que_node_get_val_buf_size(
{
ut_ad(node);
- return(((que_common_t*)node)->val_buf_size);
+ return(((que_common_t*) node)->val_buf_size);
}
/***********************************************************************//**
@@ -129,7 +129,7 @@ que_node_set_val_buf_size(
{
ut_ad(node);
- ((que_common_t*)node)->val_buf_size = size;
+ ((que_common_t*) node)->val_buf_size = size;
}
/***********************************************************************//**
@@ -143,7 +143,7 @@ que_node_set_parent(
{
ut_ad(node);
- ((que_common_t*)node)->parent = parent;
+ ((que_common_t*) node)->parent = parent;
}
/***********************************************************************//**
@@ -192,6 +192,28 @@ que_node_list_add_last(
return(node_list);
}
+/*************************************************************************
+Removes a query graph node from the list.*/
+UNIV_INLINE
+que_node_t*
+que_node_list_get_last(
+/*===================*/
+ /* out: last node in list.*/
+ que_node_t* node_list) /* in: node list */
+{
+ que_common_t* node;
+
+ ut_a(node_list != NULL);
+
+ node = (que_common_t*) node_list;
+
+ /* We need the last element */
+ while (node->brother != NULL) {
+ node = (que_common_t*) node->brother;
+ }
+
+ return(node);
+}
/*********************************************************************//**
Gets the next list node in a list of query graph nodes.
@return next node in a list of nodes */
@@ -201,7 +223,7 @@ que_node_get_next(
/*==============*/
que_node_t* node) /*!< in: node in a list */
{
- return(((que_common_t*)node)->brother);
+ return(((que_common_t*) node)->brother);
}
/*********************************************************************//**
@@ -236,14 +258,14 @@ que_node_get_parent(
/*================*/
que_node_t* node) /*!< in: node */
{
- return(((que_common_t*)node)->parent);
+ return(((que_common_t*) node)->parent);
}
/**********************************************************************//**
Checks if graph, trx, or session is in a state where the query thread should
be stopped.
@return TRUE if should be stopped; NOTE that if the peek is made
-without reserving the kernel mutex, then another peek with the mutex
+without reserving the trx mutex, then another peek with the mutex
reserved is necessary before deciding the actual stopping */
UNIV_INLINE
ibool
@@ -258,9 +280,9 @@ que_thr_peek_stop(
trx = graph->trx;
if (graph->state != QUE_FORK_ACTIVE
- || trx->que_state == TRX_QUE_LOCK_WAIT
- || (UT_LIST_GET_LEN(trx->signals) > 0
- && trx->que_state == TRX_QUE_RUNNING)) {
+ || trx->lock.que_state == TRX_QUE_LOCK_WAIT
+ || (trx->lock.que_state != TRX_QUE_ROLLING_BACK
+ && trx->lock.que_state != TRX_QUE_RUNNING)) {
return(TRUE);
}
diff --git a/storage/xtradb/include/que0types.h b/storage/xtradb/include/que0types.h
index 69fb0557d8b..0f11cad301a 100644
--- a/storage/xtradb/include/que0types.h
+++ b/storage/xtradb/include/que0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -32,18 +32,15 @@ Created 5/27/1996 Heikki Tuuri
/* Pseudotype for all graph nodes */
typedef void que_node_t;
-typedef struct que_fork_struct que_fork_t;
-
/* Query graph root is a fork node */
-typedef que_fork_t que_t;
+typedef struct que_fork_t que_t;
-typedef struct que_thr_struct que_thr_t;
-typedef struct que_common_struct que_common_t;
+struct que_thr_t;
/* Common struct at the beginning of each query graph node; the name of this
substruct must be 'common' */
-struct que_common_struct{
+struct que_common_t{
ulint type; /*!< query node type */
que_node_t* parent; /*!< back pointer to parent node, or NULL */
que_node_t* brother;/* pointer to a possible brother node */
diff --git a/storage/xtradb/include/read0i_s.h b/storage/xtradb/include/read0i_s.h
new file mode 100644
index 00000000000..11b63affe09
--- /dev/null
+++ b/storage/xtradb/include/read0i_s.h
@@ -0,0 +1,54 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010-2012, Percona Inc. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+#ifndef read0i_s_h
+#define read0i_s_h
+
+#include <trx0types.h>
+
+struct i_s_xtradb_read_view_struct {
+ undo_no_t undo_no;/*!< 0 or if type is
+ VIEW_HIGH_GRANULARITY
+ transaction undo_no when this high-granularity
+ consistent read view was created */
+ trx_id_t low_limit_no;
+ /*!< The view does not need to see the undo
+ logs for transactions whose transaction number
+ is strictly smaller (<) than this value: they
+ can be removed in purge if not needed by other
+ views */
+ trx_id_t low_limit_id;
+ /*!< The read should not see any transaction
+ with trx id >= this value. In other words,
+ this is the "high water mark". */
+ trx_id_t up_limit_id;
+ /*!< The read should see all trx ids which
+ are strictly smaller (<) than this value.
+ In other words,
+ this is the "low water mark". */
+};
+
+typedef struct i_s_xtradb_read_view_struct i_s_xtradb_read_view_t;
+
+UNIV_INTERN
+i_s_xtradb_read_view_t*
+read_fill_i_s_xtradb_read_view(i_s_xtradb_read_view_t *rv);
+
+
+#endif /* read0i_s_h */
diff --git a/storage/xtradb/include/read0read.h b/storage/xtradb/include/read0read.h
index c6ba9557d32..e17d49b1321 100644
--- a/storage/xtradb/include/read0read.h
+++ b/storage/xtradb/include/read0read.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -31,6 +31,7 @@ Created 2/16/1997 Heikki Tuuri
#include "ut0byte.h"
#include "ut0lst.h"
+#include "btr0types.h"
#include "trx0trx.h"
#include "trx0sys.h"
#include "read0types.h"
@@ -45,10 +46,8 @@ read_view_open_now(
/*===============*/
trx_id_t cr_trx_id, /*!< in: trx_id of creating
transaction, or 0 used in purge */
- read_view_t* view, /*!< in: current read view or NULL if it
- doesn't exist yet */
- ibool exclude_self); /*!< in: TRUE, if cr_trx_id should be
- excluded from the resulting view */
+ read_view_t*& view); /*!< in,out: pre-allocated view array or
+ NULL if a new one needs to be created */
/*********************************************************************//**
Makes a copy of the oldest existing read view, or opens a new. The view
@@ -56,26 +55,29 @@ must be closed with ..._close.
@return own: read view struct */
UNIV_INTERN
read_view_t*
-read_view_oldest_copy_or_open_new(
-/*==============================*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or 0 used in purge */
- read_view_t* view); /*!< in: pre-allocated view array or
+read_view_purge_open(
+/*=================*/
+ read_view_t*& clone_view, /*!< in,out: pre-allocated view that
+ will be used to clone the oldest view if
+ exists */
+ read_view_t*& view); /*!< in,out: pre-allocated view array or
NULL if a new one needs to be created */
/*********************************************************************//**
-Closes a read view. */
-UNIV_INTERN
+Remove a read view from the trx_sys->view_list. */
+UNIV_INLINE
void
-read_view_close(
-/*============*/
- read_view_t* view); /*!< in: read view */
+read_view_remove(
+/*=============*/
+ read_view_t* view, /*!< in: read view, can be 0 */
+ bool own_mutex); /*!< in: true if caller owns the
+ trx_sys_t::mutex */
/*********************************************************************//**
Frees memory allocated by a read view. */
UNIV_INTERN
void
read_view_free(
/*===========*/
- read_view_t* view); /*< in: read view */
+ read_view_t*& view); /*< in,out: read view */
/*********************************************************************//**
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
@@ -86,20 +88,21 @@ read_view_close_for_mysql(
trx_t* trx); /*!< in: trx which has a read view */
/*********************************************************************//**
Checks if a read view sees the specified transaction.
-@return TRUE if sees */
+@return true if sees */
UNIV_INLINE
-ibool
+bool
read_view_sees_trx_id(
/*==================*/
const read_view_t* view, /*!< in: read view */
- trx_id_t trx_id);/*!< in: trx id */
+ trx_id_t trx_id) /*!< in: trx id */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
-Prints a read view to stderr. */
+Prints a read view to file. */
UNIV_INTERN
void
read_view_print(
/*============*/
- FILE* file,
+ FILE* file, /*!< in: file to print to */
const read_view_t* view); /*!< in: read view */
/*********************************************************************//**
Create a consistent cursor view for mysql to be used in cursors. In this
@@ -133,7 +136,7 @@ read_cursor_set_for_mysql(
/** Read view lists the trx ids of those transactions for which a consistent
read should not see the modifications to the database. */
-struct read_view_struct{
+struct read_view_t{
ulint type; /*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
undo_no_t undo_no;/*!< 0 or if type is
VIEW_HIGH_GRANULARITY
@@ -160,14 +163,14 @@ struct read_view_struct{
/*!< Maximum number of cells in the trx_ids
array */
trx_id_t* descriptors;
- /*!< Array of trx descriptors which the read
- should not see: typically, these are the active
- transactions at the time when the read is
- serialized, except the reading transaction
+ /*!< Additional trx ids which the read should
+ not see: typically, these are the read-write
+ active transactions at the time when the read
+ is serialized, except the reading transaction
itself; the trx ids in this array are in a
- descending order. These trx_ids should be
- between the "low" and "high" water marks, that
- is, up_limit_id and low_limit_id. */
+ ascending order. These trx_ids should be
+ between the "low" and "high" water marks,
+ that is, up_limit_id and low_limit_id. */
trx_id_t creator_trx_id;
/*!< trx id of creating transaction, or
0 used in purge */
@@ -191,7 +194,7 @@ struct read_view_struct{
cursors. This struct holds both heap where consistent read view
is allocated and pointer to a read view. */
-struct cursor_view_struct{
+struct cursor_view_t{
mem_heap_t* heap;
/*!< Memory heap for the cursor view */
read_view_t* read_view;
diff --git a/storage/xtradb/include/read0read.ic b/storage/xtradb/include/read0read.ic
index 62c47e05b9d..66bef8866c9 100644
--- a/storage/xtradb/include/read0read.ic
+++ b/storage/xtradb/include/read0read.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -23,50 +23,66 @@ Cursor read
Created 2/16/1997 Heikki Tuuri
*******************************************************/
+#include "trx0sys.h"
+
+#ifdef UNIV_DEBUG
/*********************************************************************//**
-Gets the nth trx id in a read view.
+Validates a read view object. */
+static
+bool
+read_view_validate(
+/*===============*/
+ const read_view_t* view) /*!< in: view to validate */
+{
+ ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(view->max_descr >= view->n_descr);
+ ut_ad(view->descriptors == NULL || view->max_descr > 0);
-Upstream code stores array of trx_ids in the descending order. Percona Server
-keeps it in the ascending order for performance reasons. Let us keep the
-semantics.
+ /* Check that the view->descriptors array is in ascending order. */
+ for (ulint i = 1; i < view->n_descr; ++i) {
-@return trx id */
-UNIV_INLINE
-trx_id_t
-read_view_get_nth_trx_id(
-/*=====================*/
- const read_view_t* view, /*!< in: read view */
- ulint n) /*!< in: position */
-{
- ut_ad(n < view->n_descr);
+ ut_a(view->descriptors[i] > view->descriptors[i - 1]);
+ }
- return(view->descriptors[view->n_descr - 1 - n]);
+ return(true);
}
-/*********************************************************************//**
-Sets the nth trx id in a read view.
+/** Functor to validate the view list. */
+struct ViewCheck {
-Upstream code stores array of trx_ids in the descending order. Percona Server
-keeps it in the ascending order for performance reasons. Let us keep the
-semantics. */
-UNIV_INLINE
-void
-read_view_set_nth_trx_id(
-/*=====================*/
- read_view_t* view, /*!< in: read view */
- ulint n, /*!< in: position */
- trx_id_t trx_id) /*!< in: trx id to set */
+ ViewCheck() : m_prev_view(0) { }
+
+ void operator()(const read_view_t* view)
+ {
+ ut_a(m_prev_view == NULL
+ || m_prev_view->low_limit_no >= view->low_limit_no);
+
+ m_prev_view = view;
+ }
+
+ const read_view_t* m_prev_view;
+};
+
+/*********************************************************************//**
+Validates a read view list. */
+static
+bool
+read_view_list_validate(void)
+/*=========================*/
{
- ut_ad(n < view->n_descr);
+ ut_ad(mutex_own(&trx_sys->mutex));
- view->descriptors[view->n_descr - 1 - n] = trx_id;
+ ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck());
+
+ return(true);
}
+#endif /* UNIV_DEBUG */
/*********************************************************************//**
Checks if a read view sees the specified transaction.
-@return TRUE if sees */
+@return true if sees */
UNIV_INLINE
-ibool
+bool
read_view_sees_trx_id(
/*==================*/
const read_view_t* view, /*!< in: read view */
@@ -74,12 +90,10 @@ read_view_sees_trx_id(
{
if (trx_id < view->up_limit_id) {
- return(TRUE);
- }
-
- if (trx_id >= view->low_limit_id) {
+ return(true);
+ } else if (trx_id >= view->low_limit_id) {
- return(FALSE);
+ return(false);
}
/* Do a binary search over this view's descriptors array */
@@ -87,3 +101,31 @@ read_view_sees_trx_id(
return(trx_find_descriptor(view->descriptors, view->n_descr,
trx_id) == NULL);
}
+
+/*********************************************************************//**
+Remove a read view from the trx_sys->view_list. */
+UNIV_INLINE
+void
+read_view_remove(
+/*=============*/
+ read_view_t* view, /*!< in: read view, can be 0 */
+ bool own_mutex) /*!< in: true if caller owns the
+ trx_sys_t::mutex */
+{
+ if (view != 0) {
+ if (!own_mutex) {
+ mutex_enter(&trx_sys->mutex);
+ }
+
+ ut_ad(read_view_validate(view));
+
+ UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
+
+ ut_ad(read_view_list_validate());
+
+ if (!own_mutex) {
+ mutex_exit(&trx_sys->mutex);
+ }
+ }
+}
+
diff --git a/storage/xtradb/include/read0types.h b/storage/xtradb/include/read0types.h
index 4bb9618448b..969f4ebb637 100644
--- a/storage/xtradb/include/read0types.h
+++ b/storage/xtradb/include/read0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,7 +26,7 @@ Created 2/16/1997 Heikki Tuuri
#ifndef read0types_h
#define read0types_h
-typedef struct read_view_struct read_view_t;
-typedef struct cursor_view_struct cursor_view_t;
+struct read_view_t;
+struct cursor_view_t;
#endif
diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h
index c5ef0d5438a..cb3c85ac2c8 100644
--- a/storage/xtradb/include/rem0cmp.h
+++ b/storage/xtradb/include/rem0cmp.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -75,6 +75,63 @@ cmp_data_data_slow(
const byte* data2, /*!< in: data field (== a pointer to a memory
buffer) */
ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
+
+/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type to be VARCHAR.
+@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow_varchar(
+/*=======================*/
+ const byte* lhs, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint lhs_len,/* in: data field length or UNIV_SQL_NULL */
+ const byte* rhs, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint rhs_len);/* in: data field length or UNIV_SQL_NULL */
+/*****************************************************************
+This function is used to compare two varchar/char fields. The comparison
+is for the LIKE operator.
+@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow_like_prefix(
+/*===========================*/
+ const byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ const byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2); /* in: data field length or UNIV_SQL_NULL */
+/*****************************************************************
+This function is used to compare two varchar/char fields. The comparison
+is for the LIKE operator.
+@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow_like_suffix(
+/*===========================*/
+ const byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ const byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2); /* in: data field length or UNIV_SQL_NULL */
+/*****************************************************************
+This function is used to compare two varchar/char fields. The comparison
+is for the LIKE operator.
+@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow_like_substr(
+/*===========================*/
+ const byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ const byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2); /* in: data field length or UNIV_SQL_NULL */
/*************************************************************//**
This function is used to compare two dfields where at least the first
has its data type field set.
@@ -99,21 +156,28 @@ respectively, when only the common first fields are compared, or until
the first externally stored field in rec */
UNIV_INTERN
int
-cmp_dtuple_rec_with_match(
-/*======================*/
+cmp_dtuple_rec_with_match_low(
+/*==========================*/
const dtuple_t* dtuple, /*!< in: data tuple */
const rec_t* rec, /*!< in: physical record which differs from
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
+ ulint n_cmp, /*!< in: number of fields to compare */
+ ulint* matched_fields,
+ /*!< in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
- ulint* matched_bytes); /*!< in/out: number of already matched
+ ulint* matched_bytes)
+ /*!< in/out: number of already matched
bytes within the first field not completely
matched; when function returns, contains the
value for current comparison */
+ __attribute__((nonnull));
+#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes) \
+ cmp_dtuple_rec_with_match_low( \
+ tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes)
/**************************************************************//**
Compares a data tuple to a physical record.
@see cmp_dtuple_rec_with_match
@@ -139,7 +203,9 @@ cmp_dtuple_is_prefix_of_rec(
/*************************************************************//**
Compare two physical records that contain the same number of columns,
none of which are stored externally.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
+@retval 1 if rec1 (including non-ordering columns) is greater than rec2
+@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval 0 if rec1 is a duplicate of rec2 */
UNIV_INTERN
int
cmp_rec_rec_simple(
@@ -149,8 +215,10 @@ cmp_rec_rec_simple(
const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
const dict_index_t* index, /*!< in: data dictionary index */
- ibool* null_eq);/*!< out: set to TRUE if
- found matching null values */
+ struct TABLE* table) /*!< in: MySQL table, for reporting
+ duplicate key value if applicable,
+ or NULL */
+ __attribute__((nonnull(1,2,3,4), warn_unused_result));
/*************************************************************//**
This function is used to compare two physical records. Only the common
first fields are compared, and if an externally stored field is
@@ -192,6 +260,39 @@ cmp_rec_rec(
const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
dict_index_t* index); /*!< in: data dictionary index */
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INTERN
+int
+cmp_dfield_dfield_like_prefix(
+/*==========================*/
+ /* out: 1, 0, -1, if dfield1 is greater, equal,
+ less than dfield2, respectively */
+ dfield_t* dfield1,/* in: data field; must have type field set */
+ dfield_t* dfield2);/* in: data field */
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_substr(
+/*==========================*/
+ /* out: 1, 0, -1, if dfield1 is greater, equal,
+ less than dfield2, respectively */
+ dfield_t* dfield1,/* in: data field; must have type field set */
+ dfield_t* dfield2);/* in: data field */
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_suffix(
+/*==========================*/
+ /* out: 1, 0, -1, if dfield1 is greater, equal,
+ less than dfield2, respectively */
+ dfield_t* dfield1,/* in: data field; must have type field set */
+ dfield_t* dfield2);/* in: data field */
#ifndef UNIV_NONINL
#include "rem0cmp.ic"
diff --git a/storage/xtradb/include/rem0cmp.ic b/storage/xtradb/include/rem0cmp.ic
index 22db4b0cd47..67a2dcacba1 100644
--- a/storage/xtradb/include/rem0cmp.ic
+++ b/storage/xtradb/include/rem0cmp.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -43,6 +43,60 @@ cmp_data_data(
return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
}
+/*****************************************************************
+This function is used to compare two (CHAR) data fields for the LIKE
+operator. */
+UNIV_INLINE
+int
+cmp_data_data_like_prefix(
+/*======================*/
+ /* out: 1, 0, -1, if data1 is greater, equal,
+ less than data2, respectively */
+ byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2) /* in: data field length or UNIV_SQL_NULL */
+{
+ return(cmp_data_data_slow_like_prefix(data1, len1, data2, len2));
+}
+/*****************************************************************
+This function is used to compare two (CHAR) data fields for the LIKE
+operator. */
+UNIV_INLINE
+int
+cmp_data_data_like_suffix(
+/*======================*/
+ /* out: 1, 0, -1, if data1 is greater, equal,
+ less than data2, respectively */
+ byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2) /* in: data field length or UNIV_SQL_NULL */
+{
+ return(cmp_data_data_slow_like_suffix(data1, len1, data2, len2));
+}
+/*****************************************************************
+This function is used to compare two (CHAR) data fields for the LIKE
+operator. */
+UNIV_INLINE
+int
+cmp_data_data_like_substr(
+/*======================*/
+ /* out: 1, 0, -1, if data1 is greater, equal,
+ less than data2, respectively */
+ byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2) /* in: data field length or UNIV_SQL_NULL */
+{
+ return(cmp_data_data_slow_like_substr(data1, len1, data2, len2));
+}
/*************************************************************//**
This function is used to compare two dfields where at least the first
has its data type field set.
@@ -68,6 +122,47 @@ cmp_dfield_dfield(
dfield_get_len(dfield2)));
}
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_suffix(
+/*==========================*/
+ /* out: 1, 0, -1, if dfield1 is greater, equal,
+ less than dfield2, respectively */
+ dfield_t* dfield1,/* in: data field; must have type field set */
+ dfield_t* dfield2)/* in: data field */
+{
+ ut_ad(dfield_check_typed(dfield1));
+
+ return(cmp_data_data_like_suffix(
+ (byte*) dfield_get_data(dfield1),
+ dfield_get_len(dfield1),
+ (byte*) dfield_get_data(dfield2),
+ dfield_get_len(dfield2)));
+}
+
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_substr(
+/*==========================*/
+ /* out: 1, 0, -1, if dfield1 is greater, equal,
+ less than dfield2, respectively */
+ dfield_t* dfield1,/* in: data field; must have type field set */
+ dfield_t* dfield2)/* in: data field */
+{
+ ut_ad(dfield_check_typed(dfield1));
+
+ return(cmp_data_data_like_substr(
+ (byte*) dfield_get_data(dfield1),
+ dfield_get_len(dfield1),
+ (byte*) dfield_get_data(dfield2),
+ dfield_get_len(dfield2)));
+}
/*************************************************************//**
This function is used to compare two physical records. Only the common
first fields are compared.
diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h
index 9dd96f609ea..2a84aee7a6f 100644
--- a/storage/xtradb/include/rem0rec.h
+++ b/storage/xtradb/include/rem0rec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -54,7 +54,7 @@ in addition to the data and the offsets */
#define REC_STATUS_INFIMUM 2
#define REC_STATUS_SUPREMUM 3
-/* The following four constants are needed in page0zip.c in order to
+/* The following four constants are needed in page0zip.cc in order to
efficiently compress and decompress pages. */
/* The offset of heap_no in a compact record */
@@ -66,6 +66,15 @@ The status is stored in the low-order bits. */
/* Length of a B-tree node pointer, in bytes */
#define REC_NODE_PTR_SIZE 4
+/** SQL null flag in a 1-byte offset of ROW_FORMAT=REDUNDANT records */
+#define REC_1BYTE_SQL_NULL_MASK 0x80UL
+/** SQL null flag in a 2-byte offset of ROW_FORMAT=REDUNDANT records */
+#define REC_2BYTE_SQL_NULL_MASK 0x8000UL
+
+/** In a 2-byte offset of ROW_FORMAT=REDUNDANT records, the second most
+significant bit denotes that the tail of a field is stored off-page. */
+#define REC_2BYTE_EXTERN_MASK 0x4000UL
+
#ifdef UNIV_DEBUG
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 4
@@ -88,7 +97,8 @@ const rec_t*
rec_get_next_ptr_const(
/*===================*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the pointer of the next chained record
on the same page.
@@ -98,7 +108,8 @@ rec_t*
rec_get_next_ptr(
/*=============*/
rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the offset of the
next chained record on the same page.
@@ -108,7 +119,8 @@ ulint
rec_get_next_offs(
/*==============*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the next record offset field
of an old-style record. */
@@ -117,7 +129,8 @@ void
rec_set_next_offs_old(
/*==================*/
rec_t* rec, /*!< in: old-style physical record */
- ulint next); /*!< in: offset of the next record */
+ ulint next) /*!< in: offset of the next record */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to set the next record offset field
of a new-style record. */
@@ -126,7 +139,8 @@ void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /*!< in/out: new-style physical record */
- ulint next); /*!< in: offset of the next record */
+ ulint next) /*!< in: offset of the next record */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to get the number of fields
in an old-style record.
@@ -135,7 +149,8 @@ UNIV_INLINE
ulint
rec_get_n_fields_old(
/*=================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the number of fields
in a record.
@@ -145,7 +160,8 @@ ulint
rec_get_n_fields(
/*=============*/
const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index); /*!< in: record descriptor */
+ const dict_index_t* index) /*!< in: record descriptor */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
@@ -154,7 +170,8 @@ UNIV_INLINE
ulint
rec_get_n_owned_old(
/*================*/
- const rec_t* rec); /*!< in: old-style physical record */
+ const rec_t* rec) /*!< in: old-style physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
@@ -162,7 +179,8 @@ void
rec_set_n_owned_old(
/*================*/
rec_t* rec, /*!< in: old-style physical record */
- ulint n_owned); /*!< in: the number of owned */
+ ulint n_owned) /*!< in: the number of owned */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
@@ -171,7 +189,8 @@ UNIV_INLINE
ulint
rec_get_n_owned_new(
/*================*/
- const rec_t* rec); /*!< in: new-style physical record */
+ const rec_t* rec) /*!< in: new-style physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
@@ -180,7 +199,8 @@ rec_set_n_owned_new(
/*================*/
rec_t* rec, /*!< in/out: new-style physical record */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n_owned);/*!< in: the number of owned */
+ ulint n_owned)/*!< in: the number of owned */
+ __attribute__((nonnull(1)));
/******************************************************//**
The following function is used to retrieve the info bits of
a record.
@@ -190,7 +210,8 @@ ulint
rec_get_info_bits(
/*==============*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the info bits of a record. */
UNIV_INLINE
@@ -198,7 +219,8 @@ void
rec_set_info_bits_old(
/*==================*/
rec_t* rec, /*!< in: old-style physical record */
- ulint bits); /*!< in: info bits */
+ ulint bits) /*!< in: info bits */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to set the info bits of a record. */
UNIV_INLINE
@@ -206,7 +228,8 @@ void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /*!< in/out: new-style physical record */
- ulint bits); /*!< in: info bits */
+ ulint bits) /*!< in: info bits */
+ __attribute__((nonnull));
/******************************************************//**
The following function retrieves the status bits of a new-style record.
@return status bits */
@@ -214,7 +237,8 @@ UNIV_INLINE
ulint
rec_get_status(
/*===========*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the status bits of a new-style record. */
@@ -223,7 +247,8 @@ void
rec_set_status(
/*===========*/
rec_t* rec, /*!< in/out: physical record */
- ulint bits); /*!< in: info bits */
+ ulint bits) /*!< in: info bits */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to retrieve the info and status
@@ -234,7 +259,8 @@ ulint
rec_get_info_and_status_bits(
/*=========================*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the info and status
bits of a record. (Only compact records have status bits.) */
@@ -243,7 +269,8 @@ void
rec_set_info_and_status_bits(
/*=========================*/
rec_t* rec, /*!< in/out: compact physical record */
- ulint bits); /*!< in: info bits */
+ ulint bits) /*!< in: info bits */
+ __attribute__((nonnull));
/******************************************************//**
The following function tells if record is delete marked.
@@ -253,7 +280,8 @@ ulint
rec_get_deleted_flag(
/*=================*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the deleted bit. */
UNIV_INLINE
@@ -261,7 +289,8 @@ void
rec_set_deleted_flag_old(
/*=====================*/
rec_t* rec, /*!< in: old-style physical record */
- ulint flag); /*!< in: nonzero if delete marked */
+ ulint flag) /*!< in: nonzero if delete marked */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to set the deleted bit. */
UNIV_INLINE
@@ -270,7 +299,8 @@ rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /*!< in/out: new-style physical record */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint flag); /*!< in: nonzero if delete marked */
+ ulint flag) /*!< in: nonzero if delete marked */
+ __attribute__((nonnull(1)));
/******************************************************//**
The following function tells if a new-style record is a node pointer.
@return TRUE if node pointer */
@@ -278,7 +308,8 @@ UNIV_INLINE
ibool
rec_get_node_ptr_flag(
/*==================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the order number
of an old-style record in the heap of the index page.
@@ -287,7 +318,8 @@ UNIV_INLINE
ulint
rec_get_heap_no_old(
/*================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the heap number
field in an old-style record. */
@@ -296,7 +328,8 @@ void
rec_set_heap_no_old(
/*================*/
rec_t* rec, /*!< in: physical record */
- ulint heap_no);/*!< in: the heap number */
+ ulint heap_no)/*!< in: the heap number */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to get the order number
of a new-style record in the heap of the index page.
@@ -305,7 +338,8 @@ UNIV_INLINE
ulint
rec_get_heap_no_new(
/*================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the heap number
field in a new-style record. */
@@ -314,7 +348,8 @@ void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /*!< in/out: physical record */
- ulint heap_no);/*!< in: the heap number */
+ ulint heap_no)/*!< in: the heap number */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to test whether the data offsets
in the record are stored in one-byte or two-byte format.
@@ -323,7 +358,57 @@ UNIV_INLINE
ibool
rec_get_1byte_offs_flag(
/*====================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+The following function is used to set the 1-byte offsets flag. */
+UNIV_INLINE
+void
+rec_set_1byte_offs_flag(
+/*====================*/
+ rec_t* rec, /*!< in: physical record */
+ ibool flag) /*!< in: TRUE if 1byte form */
+ __attribute__((nonnull));
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return offset of the start of the field, SQL null flag ORed */
+UNIV_INLINE
+ulint
+rec_1_get_field_end_info(
+/*=====================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return offset of the start of the field, SQL null flag and extern
+storage flag ORed */
+UNIV_INLINE
+ulint
+rec_2_get_field_end_info(
+/*=====================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns nonzero if the field is stored off-page.
+@retval 0 if the field is stored in-page
+@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
+UNIV_INLINE
+ulint
+rec_2_is_field_extern(
+/*==================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Determine how many of the first n columns in a compact
@@ -333,9 +418,10 @@ UNIV_INTERN
ulint
rec_get_n_extern_new(
/*=================*/
- const rec_t* rec, /*!< in: compact physical record */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n); /*!< in: number of columns to scan */
+ const rec_t* rec, /*!< in: compact physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint n) /*!< in: number of columns to scan */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************//**
The following function determines the offsets to each field
@@ -356,7 +442,8 @@ rec_get_offsets_func(
(ULINT_UNDEFINED if all fields) */
mem_heap_t** heap, /*!< in/out: memory heap */
const char* file, /*!< in: file name where called */
- ulint line); /*!< in: line number where called */
+ ulint line) /*!< in: line number where called */
+ __attribute__((nonnull(1,2,5,6),warn_unused_result));
#define rec_get_offsets(rec,index,offsets,n,heap) \
rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
@@ -375,9 +462,10 @@ rec_get_offsets_reverse(
const dict_index_t* index, /*!< in: record descriptor */
ulint node_ptr,/*!< in: nonzero=node pointer,
0=leaf node */
- ulint* offsets);/*!< in/out: array consisting of
+ ulint* offsets)/*!< in/out: array consisting of
offsets[0] allocated elements */
-
+ __attribute__((nonnull));
+#ifdef UNIV_DEBUG
/************************************************************//**
Validates offsets returned by rec_get_offsets().
@return TRUE if valid */
@@ -387,9 +475,9 @@ rec_offs_validate(
/*==============*/
const rec_t* rec, /*!< in: record or NULL */
const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets);/*!< in: array returned by
+ const ulint* offsets)/*!< in: array returned by
rec_get_offsets() */
-#ifdef UNIV_DEBUG
+ __attribute__((nonnull(3), warn_unused_result));
/************************************************************//**
Updates debug data in offsets, in order to avoid bogus
rec_offs_validate() failures. */
@@ -399,8 +487,9 @@ rec_offs_make_valid(
/*================*/
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets);/*!< in: array returned by
+ ulint* offsets)/*!< in: array returned by
rec_get_offsets() */
+ __attribute__((nonnull));
#else
# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
#endif /* UNIV_DEBUG */
@@ -415,8 +504,9 @@ rec_get_nth_field_offs_old(
/*=======================*/
const rec_t* rec, /*!< in: record */
ulint n, /*!< in: index of the field */
- ulint* len); /*!< out: length of the field; UNIV_SQL_NULL
+ ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
if SQL null */
+ __attribute__((nonnull));
#define rec_get_nth_field_old(rec, n, len) \
((rec) + rec_get_nth_field_offs_old(rec, n, len))
/************************************************************//**
@@ -429,7 +519,8 @@ ulint
rec_get_nth_field_size(
/*===================*/
const rec_t* rec, /*!< in: record */
- ulint n); /*!< in: index of the field */
+ ulint n) /*!< in: index of the field */
+ __attribute__((nonnull, pure, warn_unused_result));
/************************************************************//**
The following function is used to get an offset to the nth
data field in a record.
@@ -440,8 +531,9 @@ rec_get_nth_field_offs(
/*===================*/
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index of the field */
- ulint* len); /*!< out: length of the field; UNIV_SQL_NULL
+ ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
if SQL null */
+ __attribute__((nonnull));
#define rec_get_nth_field(rec, offsets, n, len) \
((rec) + rec_get_nth_field_offs(offsets, n, len))
/******************************************************//**
@@ -452,7 +544,8 @@ UNIV_INLINE
ulint
rec_offs_comp(
/*==========*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Determine if the offsets are for a record containing
externally stored columns.
@@ -461,8 +554,8 @@ UNIV_INLINE
ulint
rec_offs_any_extern(
/*================*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-#ifdef UNIV_BLOB_NULL_DEBUG
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
@return first field containing a null BLOB pointer, or NULL if none found */
@@ -472,8 +565,7 @@ rec_offs_any_null_extern(
/*=====================*/
const rec_t* rec, /*!< in: record */
const ulint* offsets) /*!< in: rec_get_offsets(rec) */
- __attribute__((nonnull, warn_unused_result));
-#endif /* UNIV_BLOB_NULL_DEBUG */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */
@@ -482,7 +574,8 @@ ulint
rec_offs_nth_extern(
/*================*/
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
+ ulint n) /*!< in: nth field */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Returns nonzero if the SQL NULL bit is set in nth field of rec.
@return nonzero if SQL NULL */
@@ -491,7 +584,8 @@ ulint
rec_offs_nth_sql_null(
/*==================*/
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
+ ulint n) /*!< in: nth field */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Gets the physical size of a field.
@return length of field */
@@ -500,7 +594,8 @@ ulint
rec_offs_nth_size(
/*==============*/
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
+ ulint n) /*!< in: nth field */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Returns the number of extern bits set in a record.
@@ -509,7 +604,8 @@ UNIV_INLINE
ulint
rec_offs_n_extern(
/*==============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/***********************************************************//**
This is used to modify the value of an already existing field in a record.
The previous value must have exactly the same size as the new value. If len
@@ -524,7 +620,12 @@ rec_set_nth_field(
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index number of the field */
const void* data, /*!< in: pointer to the data if not SQL null */
- ulint len); /*!< in: length of the data or UNIV_SQL_NULL */
+ ulint len) /*!< in: length of the data or UNIV_SQL_NULL.
+ If not SQL null, must have the same
+ length as the previous value.
+ If SQL null, previous value must be
+ SQL null. */
+ __attribute__((nonnull(1,2)));
/**********************************************************//**
The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
@@ -535,7 +636,8 @@ UNIV_INLINE
ulint
rec_get_data_size_old(
/*==================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
The following function returns the number of allocated elements
for an array of offsets.
@@ -544,7 +646,8 @@ UNIV_INLINE
ulint
rec_offs_get_n_alloc(
/*=================*/
- const ulint* offsets);/*!< in: array for rec_get_offsets() */
+ const ulint* offsets)/*!< in: array for rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
The following function sets the number of allocated elements
for an array of offsets. */
@@ -554,7 +657,8 @@ rec_offs_set_n_alloc(
/*=================*/
ulint* offsets, /*!< out: array for rec_get_offsets(),
must be allocated */
- ulint n_alloc); /*!< in: number of elements */
+ ulint n_alloc) /*!< in: number of elements */
+ __attribute__((nonnull));
#define rec_offs_init(offsets) \
rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
/**********************************************************//**
@@ -564,7 +668,8 @@ UNIV_INLINE
ulint
rec_offs_n_fields(
/*==============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
The following function returns the data size of a physical
record, that is the sum of field lengths. SQL null fields
@@ -575,7 +680,8 @@ UNIV_INLINE
ulint
rec_offs_data_size(
/*===============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
Returns the total size of record minus data size of record.
The value returned by the function is the distance from record
@@ -585,7 +691,8 @@ UNIV_INLINE
ulint
rec_offs_extra_size(
/*================*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
Returns the total size of a physical record.
@return size */
@@ -593,7 +700,8 @@ UNIV_INLINE
ulint
rec_offs_size(
/*==========*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
#ifdef UNIV_DEBUG
/**********************************************************//**
Returns a pointer to the start of the record.
@@ -603,7 +711,8 @@ byte*
rec_get_start(
/*==========*/
const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
Returns a pointer to the end of the record.
@return pointer to end */
@@ -612,7 +721,8 @@ byte*
rec_get_end(
/*========*/
const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
#else /* UNIV_DEBUG */
# define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
# define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
@@ -683,7 +793,8 @@ rec_copy_prefix_to_buf(
byte** buf, /*!< in/out: memory buffer
for the copied prefix,
or NULL */
- ulint* buf_size); /*!< in/out: buffer size */
+ ulint* buf_size) /*!< in/out: buffer size */
+ __attribute__((nonnull));
/************************************************************//**
Folds a prefix of a physical record to a ulint.
@return the folded value */
@@ -699,7 +810,7 @@ rec_fold(
ulint n_bytes, /*!< in: number of bytes to fold
in an incomplete last field */
index_id_t tree_id) /*!< in: index tree id */
- __attribute__((pure));
+ __attribute__((nonnull, pure, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/*********************************************************//**
Builds a physical record out of a data tuple and
@@ -713,8 +824,9 @@ rec_convert_dtuple_to_rec(
physical record */
const dict_index_t* index, /*!< in: record descriptor */
const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext); /*!< in: number of
+ ulint n_ext) /*!< in: number of
externally stored columns */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Returns the extra size of an old-style physical record if we know its
data size and number of fields.
@@ -726,7 +838,7 @@ rec_get_converted_extra_size(
ulint data_size, /*!< in: data size */
ulint n_fields, /*!< in: number of fields */
ulint n_ext) /*!< in: number of externally stored columns */
- __attribute__((const));
+ __attribute__((const));
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
@return total size */
@@ -737,7 +849,8 @@ rec_get_converted_size_comp_prefix(
const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
- ulint* extra); /*!< out: extra size */
+ ulint* extra) /*!< out: extra size */
+ __attribute__((warn_unused_result, nonnull(1,2)));
/**********************************************************//**
Determines the size of a data tuple in ROW_FORMAT=COMPACT.
@return total size */
@@ -752,7 +865,8 @@ rec_get_converted_size_comp(
ulint status, /*!< in: status bits of the record */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
- ulint* extra); /*!< out: extra size */
+ ulint* extra) /*!< out: extra size */
+ __attribute__((nonnull(1,3)));
/**********************************************************//**
The following function returns the size of a data tuple when converted to
a physical record.
@@ -763,7 +877,8 @@ rec_get_converted_size(
/*===================*/
dict_index_t* index, /*!< in: record descriptor */
const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext); /*!< in: number of externally stored columns */
+ ulint n_ext) /*!< in: number of externally stored columns */
+ __attribute__((warn_unused_result, nonnull));
#ifndef UNIV_HOTBACKUP
/**************************************************************//**
Copies the first n fields of a physical record to a data tuple.
@@ -777,7 +892,8 @@ rec_copy_prefix_to_dtuple(
const dict_index_t* index, /*!< in: record descriptor */
ulint n_fields, /*!< in: number of fields
to copy */
- mem_heap_t* heap); /*!< in: memory heap */
+ mem_heap_t* heap) /*!< in: memory heap */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/***************************************************************//**
Validates the consistency of a physical record.
@@ -787,7 +903,8 @@ ibool
rec_validate(
/*=========*/
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull));
/***************************************************************//**
Prints an old-style physical record. */
UNIV_INTERN
@@ -795,7 +912,8 @@ void
rec_print_old(
/*==========*/
FILE* file, /*!< in: file where to print */
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/***************************************************************//**
Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
@@ -806,7 +924,8 @@ rec_print_comp(
/*===========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull));
/***************************************************************//**
Prints a physical record. */
UNIV_INTERN
@@ -815,7 +934,8 @@ rec_print_new(
/*==========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull));
/***************************************************************//**
Prints a physical record. */
UNIV_INTERN
@@ -824,7 +944,21 @@ rec_print(
/*======*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index); /*!< in: record descriptor */
+ const dict_index_t* index) /*!< in: record descriptor */
+ __attribute__((nonnull));
+
+# ifdef UNIV_DEBUG
+/************************************************************//**
+Reads the DB_TRX_ID of a clustered index record.
+@return the value of DB_TRX_ID */
+UNIV_INTERN
+trx_id_t
+rec_get_trx_id(
+/*===========*/
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index) /*!< in: clustered index */
+ __attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG */
#endif /* UNIV_HOTBACKUP */
/* Maximum lengths for the data in a physical record if the offsets
diff --git a/storage/xtradb/include/rem0rec.ic b/storage/xtradb/include/rem0rec.ic
index b14366312e0..a539320dd2a 100644
--- a/storage/xtradb/include/rem0rec.ic
+++ b/storage/xtradb/include/rem0rec.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -103,7 +103,7 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_OLD_HEAP_NO 5
#define REC_HEAP_NO_MASK 0xFFF8UL
-#if 0 /* defined in rem0rec.h for use of page0zip.c */
+#if 0 /* defined in rem0rec.h for use of page0zip.cc */
#define REC_NEW_HEAP_NO 4
#define REC_HEAP_NO_SHIFT 3
#endif
@@ -118,17 +118,6 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_INFO_BITS_MASK 0xF0UL
#define REC_INFO_BITS_SHIFT 0
-/* The following masks are used to filter the SQL null bit from
-one-byte and two-byte offsets */
-
-#define REC_1BYTE_SQL_NULL_MASK 0x80UL
-#define REC_2BYTE_SQL_NULL_MASK 0x8000UL
-
-/* In a 2-byte offset the second most significant bit denotes
-a field stored to another page: */
-
-#define REC_2BYTE_EXTERN_MASK 0x4000UL
-
#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
@@ -264,13 +253,13 @@ rec_get_next_ptr_const(
field_value = mach_read_from_2(rec - REC_NEXT);
- if (UNIV_UNLIKELY(field_value == 0)) {
+ if (field_value == 0) {
return(NULL);
}
- if (UNIV_LIKELY(comp != 0)) {
-#if UNIV_PAGE_SIZE <= 32768
+ if (comp) {
+#if UNIV_PAGE_SIZE_MAX <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */
@@ -313,7 +302,7 @@ rec_get_next_ptr(
rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
{
- return((rec_t*) rec_get_next_ptr_const(rec, comp));
+ return(const_cast<rec_t*>(rec_get_next_ptr_const(rec, comp)));
}
/******************************************************//**
@@ -337,8 +326,8 @@ rec_get_next_offs(
field_value = mach_read_from_2(rec - REC_NEXT);
- if (UNIV_LIKELY(comp != 0)) {
-#if UNIV_PAGE_SIZE <= 32768
+ if (comp) {
+#if UNIV_PAGE_SIZE_MAX <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */
@@ -354,7 +343,7 @@ rec_get_next_offs(
+ ut_align_offset(rec, UNIV_PAGE_SIZE)
< UNIV_PAGE_SIZE);
#endif
- if (UNIV_UNLIKELY(field_value == 0)) {
+ if (field_value == 0) {
return(0);
}
@@ -410,7 +399,7 @@ rec_set_next_offs_new(
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
- if (UNIV_UNLIKELY(!next)) {
+ if (!next) {
field_value = 0;
} else {
/* The following two statements calculate
@@ -418,7 +407,7 @@ rec_set_next_offs_new(
as a non-negative number */
field_value = (ulint)
- ((lint) next
+ ((lint) next
- (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
field_value &= REC_NEXT_MASK;
}
@@ -572,9 +561,7 @@ rec_set_n_owned_new(
{
rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
- if (UNIV_LIKELY_NULL(page_zip)
- && UNIV_LIKELY(rec_get_status(rec)
- != REC_STATUS_SUPREMUM)) {
+ if (page_zip && rec_get_status(rec) != REC_STATUS_SUPREMUM) {
page_zip_rec_set_owned(page_zip, rec, n_owned);
}
}
@@ -648,7 +635,7 @@ rec_get_info_and_status_bits(
& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
#endif
- if (UNIV_LIKELY(comp != 0)) {
+ if (comp) {
bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
} else {
bits = rec_get_info_bits(rec, FALSE);
@@ -684,16 +671,14 @@ rec_get_deleted_flag(
const rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
{
- if (UNIV_LIKELY(comp != 0)) {
- return(UNIV_UNLIKELY(
- rec_get_bit_field_1(rec, REC_NEW_INFO_BITS,
- REC_INFO_DELETED_FLAG,
- REC_INFO_BITS_SHIFT)));
+ if (comp) {
+ return(rec_get_bit_field_1(rec, REC_NEW_INFO_BITS,
+ REC_INFO_DELETED_FLAG,
+ REC_INFO_BITS_SHIFT));
} else {
- return(UNIV_UNLIKELY(
- rec_get_bit_field_1(rec, REC_OLD_INFO_BITS,
- REC_INFO_DELETED_FLAG,
- REC_INFO_BITS_SHIFT)));
+ return(rec_get_bit_field_1(rec, REC_OLD_INFO_BITS,
+ REC_INFO_DELETED_FLAG,
+ REC_INFO_BITS_SHIFT));
}
}
@@ -741,7 +726,7 @@ rec_set_deleted_flag_new(
rec_set_info_bits_new(rec, val);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
page_zip_rec_set_deleted(page_zip, rec, flag);
}
}
@@ -887,6 +872,20 @@ rec_2_get_field_end_info(
return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
}
+/******************************************************//**
+Returns nonzero if the field is stored off-page.
+@retval 0 if the field is stored in-page
+@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
+UNIV_INLINE
+ulint
+rec_2_is_field_extern(
+/*==================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
+{
+ return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK);
+}
+
/* Get the base address of offsets. The extra_size is stored at
this position, and following positions hold the end offsets of
the fields. */
@@ -1041,7 +1040,7 @@ rec_get_nth_field_offs(
ut_ad(n < rec_offs_n_fields(offsets));
ut_ad(len);
- if (UNIV_UNLIKELY(n == 0)) {
+ if (n == 0) {
offs = 0;
} else {
offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK;
@@ -1085,10 +1084,9 @@ rec_offs_any_extern(
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
- return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
+ return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
}
-#ifdef UNIV_BLOB_NULL_DEBUG
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
@return first field containing a null BLOB pointer, or NULL if none found */
@@ -1124,7 +1122,6 @@ rec_offs_any_null_extern(
return(NULL);
}
-#endif /* UNIV_BLOB_NULL_DEBUG */
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@@ -1138,8 +1135,7 @@ rec_offs_nth_extern(
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
- return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n]
- & REC_OFFS_EXTERNAL));
+ return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL);
}
/******************************************************//**
@@ -1154,8 +1150,7 @@ rec_offs_nth_sql_null(
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
- return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n]
- & REC_OFFS_SQL_NULL));
+ return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL);
}
/******************************************************//**
@@ -1394,7 +1389,7 @@ rec_set_nth_field(
ut_ad(rec);
ut_ad(rec_offs_validate(rec, NULL, offsets));
- if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) {
+ if (len == UNIV_SQL_NULL) {
if (!rec_offs_nth_sql_null(offsets, n)) {
ut_a(!rec_offs_comp(offsets));
rec_set_nth_field_sql_null(rec, n);
@@ -1513,7 +1508,7 @@ rec_get_end(
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
- return((rec_t*) rec + rec_offs_data_size(offsets));
+ return(const_cast<rec_t*>(rec + rec_offs_data_size(offsets)));
}
/**********************************************************//**
@@ -1527,7 +1522,7 @@ rec_get_start(
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
- return((rec_t*) rec - rec_offs_extra_size(offsets));
+ return(const_cast<rec_t*>(rec - rec_offs_extra_size(offsets)));
}
#endif /* UNIV_DEBUG */
@@ -1546,7 +1541,7 @@ rec_copy(
ulint data_len;
ut_ad(rec && buf);
- ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(rec_validate(rec, offsets));
extra_len = rec_offs_extra_size(offsets);
@@ -1554,7 +1549,7 @@ rec_copy(
ut_memcpy(buf, rec - extra_len, extra_len + data_len);
- return((byte*)buf + extra_len);
+ return((byte*) buf + extra_len);
}
/**********************************************************//**
@@ -1596,7 +1591,7 @@ rec_get_converted_size(
ut_ad(dtuple);
ut_ad(dtuple_check_typed(dtuple));
- ut_ad(index->type & DICT_UNIVERSAL
+ ut_ad(dict_index_is_univ(index)
|| dtuple_get_n_fields(dtuple)
== (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
== REC_STATUS_NODE_PTR)
@@ -1616,6 +1611,41 @@ rec_get_converted_size(
extra_size = rec_get_converted_extra_size(
data_size, dtuple_get_n_fields(dtuple), n_ext);
+#if 0
+ /* This code is inactive since it may be the wrong place to add
+ in the size of node pointers used in parent pages AND it is not
+ currently needed since ha_innobase::max_supported_key_length()
+ ensures that the key size limit for each page size is well below
+ the actual limit ((free space on page / 4) - record overhead).
+ But those limits will need to be raised when InnoDB can
+ support multiple page sizes. At that time, we will need
+ to consider the node pointer on these universal btrees. */
+
+ if (dict_index_is_univ(index)) {
+ /* This is for the insert buffer B-tree.
+ All fields in the leaf tuple ascend to the
+ parent node plus the child page pointer. */
+
+ /* ibuf cannot contain externally stored fields */
+ ut_ad(n_ext == 0);
+
+ /* Add the data pointer and recompute extra_size
+ based on one more field. */
+ data_size += REC_NODE_PTR_SIZE;
+ extra_size = rec_get_converted_extra_size(
+ data_size,
+ dtuple_get_n_fields(dtuple) + 1,
+ 0);
+
+ /* Be sure dtuple->n_fields has this node ptr
+ accounted for. This function should correspond to
+ what rec_convert_dtuple_to_rec() needs in storage.
+ In optimistic insert or update-not-in-place, we will
+ have to ensure that if the record is converted to a
+ node pointer, it will not become too large.*/
+ }
+#endif
+
return(data_size + extra_size);
}
diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h
index 248ce27eee3..f8133f77466 100644
--- a/storage/xtradb/include/rem0types.h
+++ b/storage/xtradb/include/rem0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -34,6 +34,15 @@ typedef byte rec_t;
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
#define REC_MAX_N_OWNED (16 - 1)
+/* Maximum number of user defined fields/columns. The reserved columns
+are the ones InnoDB adds internally: DB_ROW_ID, DB_TRX_ID, DB_ROLL_PTR.
+We need "* 2" because mlog_parse_index() creates a dummy table object
+possibly, with some of the system columns in it, and then adds the 3
+system columns (again) using dict_table_add_system_columns(). The problem
+is that mlog_parse_index() cannot recognize the system columns by
+just having n_fields, n_uniq and the lengths of the columns. */
+#define REC_MAX_N_USER_FIELDS (REC_MAX_N_FIELDS - DATA_N_SYS_COLS * 2)
+
/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
indexed field length (or indexed prefix length) for indexes on tables of
ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format.
@@ -45,10 +54,21 @@ This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
#define REC_ANTELOPE_MAX_INDEX_COL_LEN 768
-/** Maximum indexed field length for table format DICT_TF_FORMAT_ZIP and
+/** Maximum indexed field length for table format UNIV_FORMAT_B and
beyond.
This (3072) is the maximum index row length allowed, so we cannot create index
prefix column longer than that. */
#define REC_VERSION_56_MAX_INDEX_COL_LEN 3072
+/** Innodb row types are a subset of the MySQL global enum row_type.
+They are made into their own enum so that switch statements can account
+for each of them. */
+enum rec_format_enum {
+ REC_FORMAT_REDUNDANT = 0, /*!< REDUNDANT row format */
+ REC_FORMAT_COMPACT = 1, /*!< COMPACT row format */
+ REC_FORMAT_COMPRESSED = 2, /*!< COMPRESSED row format */
+ REC_FORMAT_DYNAMIC = 3 /*!< DYNAMIC row format */
+};
+typedef enum rec_format_enum rec_format_t;
+
#endif
diff --git a/storage/xtradb/include/row0ext.h b/storage/xtradb/include/row0ext.h
index 71c7b6ecce4..a098e2f9b29 100644
--- a/storage/xtradb/include/row0ext.h
+++ b/storage/xtradb/include/row0ext.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -84,7 +84,7 @@ row_ext_lookup(
DICT_MAX_FIELD_LEN_BY_FORMAT() */
/** Prefixes of externally stored columns */
-struct row_ext_struct{
+struct row_ext_t{
ulint n_ext; /*!< number of externally stored columns */
const ulint* ext; /*!< col_no's of externally stored columns */
byte* buf; /*!< backing store of the column prefix cache */
diff --git a/storage/xtradb/include/row0ext.ic b/storage/xtradb/include/row0ext.ic
index 56e71d9a968..39e150d91d5 100644
--- a/storage/xtradb/include/row0ext.ic
+++ b/storage/xtradb/include/row0ext.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -48,7 +48,7 @@ row_ext_lookup_ith(
ut_ad(*len <= ext->max_len);
ut_ad(ext->max_len > 0);
- if (UNIV_UNLIKELY(*len == 0)) {
+ if (*len == 0) {
/* The BLOB could not be fetched to the cache. */
return(field_ref_zero);
} else {
diff --git a/storage/xtradb/include/row0ftsort.h b/storage/xtradb/include/row0ftsort.h
new file mode 100644
index 00000000000..4a486450efc
--- /dev/null
+++ b/storage/xtradb/include/row0ftsort.h
@@ -0,0 +1,275 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ftsort.h
+Create Full Text Index with (parallel) merge sort
+
+Created 10/13/2010 Jimmy Yang
+*******************************************************/
+
+#ifndef row0ftsort_h
+#define row0ftsort_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "row0mysql.h"
+#include "fts0fts.h"
+#include "fts0types.h"
+#include "fts0priv.h"
+#include "row0merge.h"
+
+/** This structure defineds information the scan thread will fetch
+and put to the linked list for parallel tokenization/sort threads
+to process */
+typedef struct fts_doc_item fts_doc_item_t;
+
+/** Information about temporary files used in merge sort */
+struct fts_doc_item {
+ dfield_t* field; /*!< field contains document string */
+ doc_id_t doc_id; /*!< document ID */
+ UT_LIST_NODE_T(fts_doc_item_t) doc_list;
+ /*!< list of doc items */
+};
+
+/** This defines the list type that scan thread would feed the parallel
+tokenization threads and sort threads. */
+typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t;
+
+#define FTS_NUM_AUX_INDEX 6
+#define FTS_PLL_MERGE 1
+
+/** Sort information passed to each individual parallel sort thread */
+struct fts_psort_t;
+
+/** Common info passed to each parallel sort thread */
+struct fts_psort_common_t {
+ row_merge_dup_t* dup; /*!< descriptor of FTS index */
+ dict_table_t* new_table; /*!< source table */
+ trx_t* trx; /*!< transaction */
+ fts_psort_t* all_info; /*!< all parallel sort info */
+ os_event_t sort_event; /*!< sort event */
+ os_event_t merge_event; /*!< merge event */
+ ibool opt_doc_id_size;/*!< whether to use 4 bytes
+ instead of 8 bytes integer to
+ store Doc ID during sort, if
+ Doc ID will not be big enough
+ to use 8 bytes value */
+};
+
+struct fts_psort_t {
+ ulint psort_id; /*!< Parallel sort ID */
+ row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX];
+ /*!< sort buffer */
+ merge_file_t* merge_file[FTS_NUM_AUX_INDEX];
+ /*!< sort file */
+ row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX];
+ /*!< buffer to write to file */
+ row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX];
+ /*!< buffer to allocated */
+ ulint child_status; /*!< child thread status */
+ ulint state; /*!< child thread state */
+ fts_doc_list_t fts_doc_list; /*!< doc list to process */
+ fts_psort_common_t* psort_common; /*!< ptr to all psort info */
+ os_thread_t thread_hdl; /*!< thread handler */
+};
+
+/** Structure stores information from string tokenization operation */
+struct fts_tokenize_ctx {
+ ulint processed_len; /*!< processed string length */
+ ulint init_pos; /*!< doc start position */
+ ulint buf_used; /*!< the sort buffer (ID) when
+ tokenization stops, which
+ could due to sort buffer full */
+ ulint rows_added[FTS_NUM_AUX_INDEX];
+ /*!< number of rows added for
+ each FTS index partition */
+ ib_rbt_t* cached_stopword;/*!< in: stopword list */
+ dfield_t sort_field[FTS_NUM_FIELDS_SORT];
+ /*!< in: sort field */
+};
+
+typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
+
+/** Structure stores information needed for the insertion phase of FTS
+parallel sort. */
+struct fts_psort_insert {
+ trx_t* trx; /*!< Transaction used for insertion */
+ que_t** ins_graph; /*!< insert graph */
+ fts_table_t fts_table; /*!< auxiliary table */
+ CHARSET_INFO* charset; /*!< charset info */
+ mem_heap_t* heap; /*!< heap */
+ ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
+ integer for Doc ID */
+};
+
+typedef struct fts_psort_insert fts_psort_insert_t;
+
+
+/** status bit used for communication between parent and child thread */
+#define FTS_PARENT_COMPLETE 1
+#define FTS_CHILD_COMPLETE 1
+#define FTS_CHILD_EXITING 2
+
+/** Print some debug information */
+#define FTSORT_PRINT
+
+#ifdef FTSORT_PRINT
+#define DEBUG_FTS_SORT_PRINT(str) \
+ do { \
+ ut_print_timestamp(stderr); \
+ fprintf(stderr, str); \
+ } while (0)
+#else
+#define DEBUG_FTS_SORT_PRINT(str)
+#endif /* FTSORT_PRINT */
+
+/*************************************************************//**
+Create a temporary "fts sort index" used to merge sort the
+tokenized doc string. The index has three "fields":
+
+1) Tokenized word,
+2) Doc ID
+3) Word's position in original 'doc'.
+
+@return dict_index_t structure for the fts sort index */
+UNIV_INTERN
+dict_index_t*
+row_merge_create_fts_sort_index(
+/*============================*/
+ dict_index_t* index, /*!< in: Original FTS index
+ based on which this sort index
+ is created */
+ const dict_table_t* table, /*!< in: table that FTS index
+ is being created on */
+ ibool* opt_doc_id_size);
+ /*!< out: whether to use 4 bytes
+ instead of 8 bytes integer to
+ store Doc ID during sort */
+
+/********************************************************************//**
+Initialize FTS parallel sort structures.
+@return TRUE if all successful */
+UNIV_INTERN
+ibool
+row_fts_psort_info_init(
+/*====================*/
+ trx_t* trx, /*!< in: transaction */
+ row_merge_dup_t* dup, /*!< in,own: descriptor of
+ FTS index being created */
+ const dict_table_t* new_table,/*!< in: table where indexes are
+ created */
+ ibool opt_doc_id_size,
+ /*!< in: whether to use 4 bytes
+ instead of 8 bytes integer to
+ store Doc ID during sort */
+ fts_psort_t** psort, /*!< out: parallel sort info to be
+ instantiated */
+ fts_psort_t** merge) /*!< out: parallel merge info
+ to be instantiated */
+ __attribute__((nonnull));
+/********************************************************************//**
+Clean up and deallocate FTS parallel sort structures, and close
+temparary merge sort files */
+UNIV_INTERN
+void
+row_fts_psort_info_destroy(
+/*=======================*/
+ fts_psort_t* psort_info, /*!< parallel sort info */
+ fts_psort_t* merge_info); /*!< parallel merge info */
+/********************************************************************//**
+Free up merge buffers when merge sort is done */
+UNIV_INTERN
+void
+row_fts_free_pll_merge_buf(
+/*=======================*/
+ fts_psort_t* psort_info); /*!< in: parallel sort info */
+
+/*********************************************************************//**
+Function performs parallel tokenization of the incoming doc strings.
+@return OS_THREAD_DUMMY_RETURN */
+UNIV_INTERN
+os_thread_ret_t
+fts_parallel_tokenization(
+/*======================*/
+ void* arg); /*!< in: psort_info for the thread */
+/*********************************************************************//**
+Start the parallel tokenization and parallel merge sort */
+UNIV_INTERN
+void
+row_fts_start_psort(
+/*================*/
+ fts_psort_t* psort_info); /*!< in: parallel sort info */
+/*********************************************************************//**
+Function performs the merge and insertion of the sorted records.
+@return OS_THREAD_DUMMY_RETURN */
+UNIV_INTERN
+os_thread_ret_t
+fts_parallel_merge(
+/*===============*/
+ void* arg); /*!< in: parallel merge info */
+/*********************************************************************//**
+Kick off the parallel merge and insert thread */
+UNIV_INTERN
+void
+row_fts_start_parallel_merge(
+/*=========================*/
+ fts_psort_t* merge_info); /*!< in: parallel sort info */
+/********************************************************************//**
+Read sorted FTS data files and insert data tuples to auxillary tables.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
+void
+row_fts_insert_tuple(
+/*=================*/
+ fts_psort_insert_t*
+ ins_ctx, /*!< in: insert context */
+ fts_tokenizer_word_t* word, /*!< in: last processed
+ tokenized word */
+ ib_vector_t* positions, /*!< in: word position */
+ doc_id_t* in_doc_id, /*!< in: last item doc id */
+ dtuple_t* dtuple); /*!< in: entry to insert */
+/********************************************************************//**
+Propagate a newly added record up one level in the selection tree
+@return parent where this value propagated to */
+UNIV_INTERN
+int
+row_merge_fts_sel_propagate(
+/*========================*/
+ int propogated, /*<! in: tree node propagated */
+ int* sel_tree, /*<! in: selection tree */
+ ulint level, /*<! in: selection tree level */
+ const mrec_t** mrec, /*<! in: sort record */
+ ulint** offsets, /*<! in: record offsets */
+ dict_index_t* index); /*<! in: FTS index */
+/********************************************************************//**
+Read sorted file containing index data tuples and insert these data
+tuples to the index
+@return DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+row_fts_merge_insert(
+/*=================*/
+ dict_index_t* index, /*!< in: index */
+ dict_table_t* table, /*!< in: new table */
+ fts_psort_t* psort_info, /*!< parallel sort info */
+ ulint id) /* !< in: which auxiliary table's data
+ to insert to */
+ __attribute__((nonnull));
+#endif /* row0ftsort_h */
diff --git a/storage/xtradb/include/row0import.h b/storage/xtradb/include/row0import.h
new file mode 100644
index 00000000000..aa46fdb7c27
--- /dev/null
+++ b/storage/xtradb/include/row0import.h
@@ -0,0 +1,91 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0import.h
+Header file for import tablespace functions.
+
+Created 2012-02-08 by Sunny Bains
+*******************************************************/
+
+#ifndef row0import_h
+#define row0import_h
+
+#include "univ.i"
+#include "db0err.h"
+#include "dict0types.h"
+
+// Forward declarations
+struct trx_t;
+struct dict_table_t;
+struct row_prebuilt_t;
+
+/*****************************************************************//**
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_import_for_mysql(
+/*=================*/
+ dict_table_t* table, /*!< in/out: table */
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct
+ in MySQL */
+ __attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+row_import_update_discarded_flag(
+/*=============================*/
+ trx_t* trx, /*!< in/out: transaction that
+ covers the update */
+ table_id_t table_id, /*!< in: Table for which we want
+ to set the root table->flags2 */
+ bool discarded, /*!< in: set MIX_LEN column bit
+ to discarded, if true */
+ bool dict_locked) /*!< in: Set to true if the
+ caller already owns the
+ dict_sys_t:: mutex. */
+ __attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Update the (space, root page) of a table's indexes from the values
+in the data dictionary.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_import_update_index_root(
+/*=========================*/
+ trx_t* trx, /*!< in/out: transaction that
+ covers the update */
+ const dict_table_t* table, /*!< in: Table for which we want
+ to set the root page_no */
+ bool reset, /*!< in: if true then set to
+ FIL_NUL */
+ bool dict_locked) /*!< in: Set to true if the
+ caller already owns the
+ dict_sys_t:: mutex. */
+ __attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_NONINL
+#include "row0import.ic"
+#endif
+
+#endif /* row0import_h */
diff --git a/storage/xtradb/include/row0import.ic b/storage/xtradb/include/row0import.ic
new file mode 100644
index 00000000000..c5bbab49f6f
--- /dev/null
+++ b/storage/xtradb/include/row0import.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0import.ic
+
+Import tablespace inline functions.
+
+Created 2012-02-08 Sunny Bains
+*******************************************************/
diff --git a/storage/xtradb/include/row0ins.h b/storage/xtradb/include/row0ins.h
index 1da3ef48a81..2a892d2f5df 100644
--- a/storage/xtradb/include/row0ins.h
+++ b/storage/xtradb/include/row0ins.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -40,7 +40,7 @@ the caller must have a shared latch on dict_foreign_key_check_lock.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
DB_ROW_IS_REFERENCED */
UNIV_INTERN
-ulint
+dberr_t
row_ins_check_foreign_constraint(
/*=============================*/
ibool check_ref,/*!< in: TRUE If we want to check that
@@ -52,7 +52,8 @@ row_ins_check_foreign_constraint(
dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign
table, else the referenced table */
dtuple_t* entry, /*!< in: index entry for index */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Creates an insert node struct.
@return own: insert node struct */
@@ -74,21 +75,110 @@ ins_node_set_new_row(
ins_node_t* node, /*!< in: insert node */
dtuple_t* row); /*!< in: new row (or first row) for the node */
/***************************************************************//**
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+Tries to insert an entry into a clustered index, ignoring foreign key
+constraints. If a record with the same unique key is found, the other
+record is necessarily marked deleted by a committed transaction, or a
+unique key violation error occurs. The delete marked record is then
+updated to an existing record, and we must write an undo log record on
+the delete marked record.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
UNIV_INTERN
-ulint
-row_ins_index_entry(
-/*================*/
- dict_index_t* index, /*!< in: index */
+dberr_t
+row_ins_clust_index_entry_low(
+/*==========================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /*!< in: clustered index */
+ ulint n_uniq, /*!< in: 0 or index->n_uniq */
dtuple_t* entry, /*!< in/out: index entry to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- ibool foreign,/*!< in: TRUE=check foreign key constraints
- (foreign=FALSE only during CREATE INDEX) */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread or NULL */
+ __attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Tries to insert an entry into a secondary index. If a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry_low(
+/*========================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /*!< in: secondary index */
+ mem_heap_t* offsets_heap,
+ /*!< in/out: memory heap that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during
+ row_log_table_apply(), or 0 */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Tries to insert the externally stored fields (off-page columns)
+of a clustered index entry.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+row_ins_index_entry_big_rec_func(
+/*=============================*/
+ const dtuple_t* entry, /*!< in/out: index entry to insert */
+ const big_rec_t* big_rec,/*!< in: externally stored fields */
+ ulint* offsets,/*!< in/out: rec offsets */
+ mem_heap_t** heap, /*!< in/out: memory heap */
+ dict_index_t* index, /*!< in: index */
+ const char* file, /*!< in: file name of caller */
+#ifndef DBUG_OFF
+ const void* thd, /*!< in: connection, or NULL */
+#endif /* DBUG_OFF */
+ ulint line) /*!< in: line number of caller */
+ __attribute__((nonnull(1,2,3,4,5,6), warn_unused_result));
+#ifdef DBUG_OFF
+# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
+ row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,line)
+#else /* DBUG_OFF */
+# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
+ row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,thd,line)
+#endif /* DBUG_OFF */
+/***************************************************************//**
+Inserts an entry into a clustered index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry(
+/*======================*/
+ dict_index_t* index, /*!< in: clustered index */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ que_thr_t* thr, /*!< in: query thread */
+ ulint n_ext) /*!< in: number of externally stored columns */
+ __attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Inserts an entry into a secondary index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry(
+/*====================*/
+ dict_index_t* index, /*!< in: secondary index */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************//**
Inserts a row to a table. This is a high-level function used in
SQL execution graphs.
@@ -98,17 +188,10 @@ que_thr_t*
row_ins_step(
/*=========*/
que_thr_t* thr); /*!< in: query thread */
-/***********************************************************//**
-Creates an entry template for each index of a table. */
-UNIV_INTERN
-void
-ins_node_create_entry_list(
-/*=======================*/
- ins_node_t* node); /*!< in: row insert node */
/* Insert node structure */
-struct ins_node_struct{
+struct ins_node_t{
que_common_t common; /*!< node type: QUE_NODE_INSERT */
ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
dtuple_t* row; /*!< row to insert */
diff --git a/storage/xtradb/include/row0ins.ic b/storage/xtradb/include/row0ins.ic
index 6e96e9fd675..9c191d869a2 100644
--- a/storage/xtradb/include/row0ins.ic
+++ b/storage/xtradb/include/row0ins.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/row0log.h b/storage/xtradb/include/row0log.h
new file mode 100644
index 00000000000..41dac63963d
--- /dev/null
+++ b/storage/xtradb/include/row0log.h
@@ -0,0 +1,238 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0log.h
+Modification log for online index creation and online table rebuild
+
+Created 2011-05-26 Marko Makela
+*******************************************************/
+
+#ifndef row0log_h
+#define row0log_h
+
+#include "univ.i"
+#include "mtr0types.h"
+#include "row0types.h"
+#include "rem0types.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+
+/******************************************************//**
+Allocate the row log for an index and flag the index
+for online creation.
+@retval true if success, false if not */
+UNIV_INTERN
+bool
+row_log_allocate(
+/*=============*/
+ dict_index_t* index, /*!< in/out: index */
+ dict_table_t* table, /*!< in/out: new table being rebuilt,
+ or NULL when creating a secondary index */
+ bool same_pk,/*!< in: whether the definition of the
+ PRIMARY KEY has remained the same */
+ const dtuple_t* add_cols,
+ /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map)/*!< in: mapping of old column
+ numbers to new ones, or NULL if !table */
+ __attribute__((nonnull(1), warn_unused_result));
+
+/******************************************************//**
+Free the row log for an index that was being created online. */
+UNIV_INTERN
+void
+row_log_free(
+/*=========*/
+ row_log_t*& log) /*!< in,own: row log */
+ __attribute__((nonnull));
+
+/******************************************************//**
+Free the row log for an index on which online creation was aborted. */
+UNIV_INLINE
+void
+row_log_abort_sec(
+/*==============*/
+ dict_index_t* index) /*!< in/out: index (x-latched) */
+ __attribute__((nonnull));
+
+/******************************************************//**
+Try to log an operation to a secondary index that is
+(or was) being created.
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
+UNIV_INLINE
+bool
+row_log_online_op_try(
+/*==================*/
+ dict_index_t* index, /*!< in/out: index, S or X latched */
+ const dtuple_t* tuple, /*!< in: index tuple */
+ trx_id_t trx_id) /*!< in: transaction ID for insert,
+ or 0 for delete */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************//**
+Logs an operation to a secondary index that is (or was) being created. */
+UNIV_INTERN
+void
+row_log_online_op(
+/*==============*/
+ dict_index_t* index, /*!< in/out: index, S or X latched */
+ const dtuple_t* tuple, /*!< in: index tuple */
+ trx_id_t trx_id) /*!< in: transaction ID for insert,
+ or 0 for delete */
+ UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Gets the error status of the online index rebuild log.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_log_table_get_error(
+/*====================*/
+ const dict_index_t* index) /*!< in: clustered index of a table
+ that is being rebuilt online */
+ __attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Logs a delete operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_delete(). */
+UNIV_INTERN
+void
+row_log_table_delete(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ bool purge, /*!< in: true=purging BLOBs */
+ trx_id_t trx_id) /*!< in: DB_TRX_ID of the record before
+ it was deleted */
+ UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Logs an update operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_update(). */
+UNIV_INTERN
+void
+row_log_table_update(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const dtuple_t* old_pk) /*!< in: row_log_table_get_pk()
+ before the update */
+ UNIV_COLD __attribute__((nonnull(1,2,3)));
+
+/******************************************************//**
+Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
+of a table that is being rebuilt.
+@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
+or NULL if the PRIMARY KEY definition does not change */
+UNIV_INTERN
+const dtuple_t*
+row_log_table_get_pk(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index),
+ or NULL */
+ mem_heap_t** heap) /*!< in/out: memory heap where allocated */
+ UNIV_COLD __attribute__((nonnull(1,2,4), warn_unused_result));
+
+/******************************************************//**
+Logs an insert to a table that is being rebuilt.
+This will be merged in row_log_table_apply_insert(). */
+UNIV_INTERN
+void
+row_log_table_insert(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */
+ UNIV_COLD __attribute__((nonnull));
+/******************************************************//**
+Notes that a BLOB is being freed during online ALTER TABLE. */
+UNIV_INTERN
+void
+row_log_table_blob_free(
+/*====================*/
+ dict_index_t* index, /*!< in/out: clustered index, X-latched */
+ ulint page_no)/*!< in: starting page number of the BLOB */
+ UNIV_COLD __attribute__((nonnull));
+/******************************************************//**
+Notes that a BLOB is being allocated during online ALTER TABLE. */
+UNIV_INTERN
+void
+row_log_table_blob_alloc(
+/*=====================*/
+ dict_index_t* index, /*!< in/out: clustered index, X-latched */
+ ulint page_no)/*!< in: starting page number of the BLOB */
+ UNIV_COLD __attribute__((nonnull));
+/******************************************************//**
+Apply the row_log_table log to a table upon completing rebuild.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_table_apply(
+/*================*/
+ que_thr_t* thr, /*!< in: query graph */
+ dict_table_t* old_table,
+ /*!< in: old table */
+ struct TABLE* table) /*!< in/out: MySQL table
+ (for reporting duplicates) */
+ __attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Get the latest transaction ID that has invoked row_log_online_op()
+during online creation.
+@return latest transaction ID, or 0 if nothing was logged */
+UNIV_INTERN
+trx_id_t
+row_log_get_max_trx(
+/*================*/
+ dict_index_t* index) /*!< in: index, must be locked */
+ __attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Merge the row log to the index upon completing index creation.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_apply(
+/*==========*/
+ trx_t* trx, /*!< in: transaction (for checking if
+ the operation was interrupted) */
+ dict_index_t* index, /*!< in/out: secondary index */
+ struct TABLE* table) /*!< in/out: MySQL table
+ (for reporting duplicates) */
+ __attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "row0log.ic"
+#endif
+
+#endif /* row0log.h */
diff --git a/storage/xtradb/include/row0log.ic b/storage/xtradb/include/row0log.ic
new file mode 100644
index 00000000000..b0f37dbd8e7
--- /dev/null
+++ b/storage/xtradb/include/row0log.ic
@@ -0,0 +1,84 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0log.ic
+Modification log for online index creation and online table rebuild
+
+Created 2012-10-18 Marko Makela
+*******************************************************/
+
+#include "dict0dict.h"
+
+/******************************************************//**
+Free the row log for an index on which online creation was aborted. */
+UNIV_INLINE
+void
+row_log_abort_sec(
+/*===============*/
+ dict_index_t* index) /*!< in/out: index (x-latched) */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(!dict_index_is_clust(index));
+ dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
+ row_log_free(index->online_log);
+}
+
+/******************************************************//**
+Try to log an operation to a secondary index that is
+(or was) being created.
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
+UNIV_INLINE
+bool
+row_log_online_op_try(
+/*==================*/
+ dict_index_t* index, /*!< in/out: index, S or X latched */
+ const dtuple_t* tuple, /*!< in: index tuple */
+ trx_id_t trx_id) /*!< in: transaction ID for insert,
+ or 0 for delete */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+ || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_COMPLETE:
+ /* This is a normal index. Do not log anything.
+ The caller must perform the operation on the
+ index tree directly. */
+ return(false);
+ case ONLINE_INDEX_CREATION:
+ /* The index is being created online. Log the
+ operation. */
+ row_log_online_op(index, tuple, trx_id);
+ break;
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ /* The index was created online, but the operation was
+ aborted. Do not log the operation and tell the caller
+ to skip the operation. */
+ break;
+ }
+
+ return(true);
+}
diff --git a/storage/xtradb/include/row0merge.h b/storage/xtradb/include/row0merge.h
index 22786fd7e49..390c0ce038b 100644
--- a/storage/xtradb/include/row0merge.h
+++ b/storage/xtradb/include/row0merge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -38,111 +38,210 @@ Created 13/06/2005 Jan Lindstrom
#include "btr0types.h"
#include "row0mysql.h"
#include "lock0types.h"
+#include "srv0srv.h"
+
+// Forward declaration
+struct ib_sequence_t;
+
+/** @brief Block size for I/O operations in merge sort.
+
+The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
+rounded to a power of 2.
+
+When not creating a PRIMARY KEY that contains column prefixes, this
+can be set as small as UNIV_PAGE_SIZE / 2. */
+typedef byte row_merge_block_t;
+
+/** @brief Secondary buffer for I/O operations of merge records.
+
+This buffer is used for writing or reading a record that spans two
+row_merge_block_t. Thus, it must be able to hold one merge record,
+whose maximum size is the same as the minimum size of
+row_merge_block_t. */
+typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX];
+
+/** @brief Merge record in row_merge_block_t.
+
+The format is the same as a record in ROW_FORMAT=COMPACT with the
+exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
+typedef byte mrec_t;
+
+/** Merge record in row_merge_buf_t */
+struct mtuple_t {
+ dfield_t* fields; /*!< data fields */
+};
+
+/** Buffer for sorting in main memory. */
+struct row_merge_buf_t {
+ mem_heap_t* heap; /*!< memory heap where allocated */
+ dict_index_t* index; /*!< the index the tuples belong to */
+ ulint total_size; /*!< total amount of data bytes */
+ ulint n_tuples; /*!< number of data tuples */
+ ulint max_tuples; /*!< maximum number of data tuples */
+ mtuple_t* tuples; /*!< array of data tuples */
+ mtuple_t* tmp_tuples; /*!< temporary copy of tuples,
+ for sorting */
+};
+
+/** Information about temporary files used in merge sort */
+struct merge_file_t {
+ int fd; /*!< file descriptor */
+ ulint offset; /*!< file offset (end of file) */
+ ib_uint64_t n_rec; /*!< number of records in the file */
+};
/** Index field definition */
-struct merge_index_field_struct {
+struct index_field_t {
+ ulint col_no; /*!< column offset */
ulint prefix_len; /*!< column prefix length, or 0
if indexing the whole column */
- const char* field_name; /*!< field name */
};
-/** Index field definition */
-typedef struct merge_index_field_struct merge_index_field_t;
-
/** Definition of an index being created */
-struct merge_index_def_struct {
- const char* name; /*!< index name */
- ulint ind_type; /*!< 0, DICT_UNIQUE,
- or DICT_CLUSTERED */
- ulint n_fields; /*!< number of fields
- in index */
- merge_index_field_t* fields; /*!< field definitions */
+struct index_def_t {
+ const char* name; /*!< index name */
+ ulint ind_type; /*!< 0, DICT_UNIQUE,
+ or DICT_CLUSTERED */
+ ulint key_number; /*!< MySQL key number,
+ or ULINT_UNDEFINED if none */
+ ulint n_fields; /*!< number of fields in index */
+ index_field_t* fields; /*!< field definitions */
};
-/** Definition of an index being created */
-typedef struct merge_index_def_struct merge_index_def_t;
+/** Structure for reporting duplicate records. */
+struct row_merge_dup_t {
+ dict_index_t* index; /*!< index being sorted */
+ struct TABLE* table; /*!< MySQL table object */
+ const ulint* col_map;/*!< mapping of column numbers
+ in table to the rebuilt table
+ (index->table), or NULL if not
+ rebuilding table */
+ ulint n_dup; /*!< number of duplicates */
+};
+/*************************************************************//**
+Report a duplicate key. */
+UNIV_INTERN
+void
+row_merge_dup_report(
+/*=================*/
+ row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
+ const dfield_t* entry) /*!< in: duplicate index entry */
+ __attribute__((nonnull));
/*********************************************************************//**
Sets an exclusive lock on a table, for the duration of creating indexes.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_merge_lock_table(
/*=================*/
trx_t* trx, /*!< in/out: transaction */
dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode); /*!< in: LOCK_X or LOCK_S */
+ enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
-Drop an index from the InnoDB system tables. The data dictionary must
-have been locked exclusively by the caller, because the transaction
-will not be committed. */
+Drop indexes that were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
UNIV_INTERN
void
-row_merge_drop_index(
-/*=================*/
- dict_index_t* index, /*!< in: index to be removed */
- dict_table_t* table, /*!< in: table */
- trx_t* trx); /*!< in: transaction handle */
+row_merge_drop_indexes_dict(
+/*========================*/
+ trx_t* trx, /*!< in/out: dictionary transaction */
+ table_id_t table_id)/*!< in: table identifier */
+ __attribute__((nonnull));
/*********************************************************************//**
-Drop those indexes which were created before an error occurred when
-building an index. The data dictionary must have been locked
-exclusively by the caller, because the transaction will not be
-committed. */
+Drop those indexes which were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
UNIV_INTERN
void
row_merge_drop_indexes(
/*===================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: table containing the indexes */
- dict_index_t** index, /*!< in: indexes to drop */
- ulint num_created); /*!< in: number of elements in index[] */
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in/out: table containing the indexes */
+ ibool locked) /*!< in: TRUE=table locked,
+ FALSE=may need to do a lazy drop */
+ __attribute__((nonnull));
/*********************************************************************//**
Drop all partially created indexes during crash recovery. */
UNIV_INTERN
void
row_merge_drop_temp_indexes(void);
/*=============================*/
+
+/*********************************************************************//**
+Creates temporary merge files, and if UNIV_PFS_IO defined, register
+the file descriptor with Performance Schema.
+@return File descriptor */
+UNIV_INTERN
+int
+row_merge_file_create_low(void)
+/*===========================*/
+ __attribute__((warn_unused_result));
+/*********************************************************************//**
+Destroy a merge file. And de-register the file from Performance Schema
+if UNIV_PFS_IO is defined. */
+UNIV_INTERN
+void
+row_merge_file_destroy_low(
+/*=======================*/
+ int fd); /*!< in: merge file descriptor */
+
+/*********************************************************************//**
+Provide a new pathname for a table that is being renamed if it belongs to
+a file-per-table tablespace. The caller is responsible for freeing the
+memory allocated for the return value.
+@return new pathname of tablespace file, or NULL if space = 0 */
+UNIV_INTERN
+char*
+row_make_new_pathname(
+/*==================*/
+ dict_table_t* table, /*!< in: table to be renamed */
+ const char* new_name); /*!< in: new name */
/*********************************************************************//**
Rename the tables in the data dictionary. The data dictionary must
have been locked exclusively by the caller, because the transaction
will not be committed.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
-row_merge_rename_tables(
-/*====================*/
+dberr_t
+row_merge_rename_tables_dict(
+/*=========================*/
dict_table_t* old_table, /*!< in/out: old table, renamed to
tmp_name */
dict_table_t* new_table, /*!< in/out: new table, renamed to
old_table->name */
const char* tmp_name, /*!< in: new name for old_table */
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in/out: dictionary transaction */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
-Create a temporary table for creating a primary key, using the definition
-of an existing table.
-@return table, or NULL on error */
+Rename an index in the dictionary that was created. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
+@return DB_SUCCESS if all OK */
UNIV_INTERN
-dict_table_t*
-row_merge_create_temporary_table(
-/*=============================*/
- const char* table_name, /*!< in: new table name */
- const merge_index_def_t*index_def, /*!< in: the index definition
- of the primary key */
- const dict_table_t* table, /*!< in: old table definition */
- trx_t* trx); /*!< in/out: transaction
- (sets error_state) */
-/*********************************************************************//**
-Rename the temporary indexes in the dictionary to permanent ones. The
-data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed.
+dberr_t
+row_merge_rename_index_to_add(
+/*==========================*/
+ trx_t* trx, /*!< in/out: transaction */
+ table_id_t table_id, /*!< in: table identifier */
+ index_id_t index_id) /*!< in: index identifier */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Rename an index in the dictionary that is to be dropped. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
@return DB_SUCCESS if all OK */
UNIV_INTERN
-ulint
-row_merge_rename_indexes(
-/*=====================*/
+dberr_t
+row_merge_rename_index_to_drop(
+/*===========================*/
trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table); /*!< in/out: table with new indexes */
+ table_id_t table_id, /*!< in: table identifier */
+ index_id_t index_id) /*!< in: index identifier */
+ __attribute__((nonnull));
/*********************************************************************//**
Create the index and load in to the dictionary.
@return index, or NULL on error */
@@ -152,7 +251,7 @@ row_merge_create_index(
/*===================*/
trx_t* trx, /*!< in/out: trx (sets error_state) */
dict_table_t* table, /*!< in: the index is on this table */
- const merge_index_def_t*index_def);
+ const index_def_t* index_def);
/*!< in: the index definition */
/*********************************************************************//**
Check if a transaction can use an index.
@@ -164,23 +263,25 @@ row_merge_is_index_usable(
const trx_t* trx, /*!< in: transaction */
const dict_index_t* index); /*!< in: index to check */
/*********************************************************************//**
-If there are views that refer to the old table name then we "attach" to
-the new instance of the table else we drop it immediately.
+Drop a table. The caller must have ensured that the background stats
+thread is not processing the table. This can be done by calling
+dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
+before calling this function.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_merge_drop_table(
/*=================*/
trx_t* trx, /*!< in: transaction */
- dict_table_t* table); /*!< in: table instance to drop */
-
+ dict_table_t* table) /*!< in: table instance to drop */
+ __attribute__((nonnull));
/*********************************************************************//**
Build indexes on a table by reading a clustered index,
creating a temporary file containing index entries, merge sorting
these index entries and inserting sorted index entries to indexes.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_merge_build_indexes(
/*====================*/
trx_t* trx, /*!< in: transaction */
@@ -189,9 +290,141 @@ row_merge_build_indexes(
dict_table_t* new_table, /*!< in: table where indexes are
created; identical to old_table
unless creating a PRIMARY KEY */
+ bool online, /*!< in: true if creating indexes
+ online */
dict_index_t** indexes, /*!< in: indexes to be created */
+ const ulint* key_numbers, /*!< in: MySQL key numbers */
ulint n_indexes, /*!< in: size of indexes[] */
- struct TABLE* table); /*!< in/out: MySQL table, for
+ struct TABLE* table, /*!< in/out: MySQL table, for
reporting erroneous key value
if applicable */
+ const dtuple_t* add_cols, /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map, /*!< in: mapping of old column
+ numbers to new ones, or NULL
+ if old_table == new_table */
+ ulint add_autoinc, /*!< in: number of added
+ AUTO_INCREMENT column, or
+ ULINT_UNDEFINED if none is added */
+ ib_sequence_t& sequence) /*!< in/out: autoinc sequence */
+ __attribute__((nonnull(1,2,3,5,6,8), warn_unused_result));
+/********************************************************************//**
+Write a buffer to a block. */
+UNIV_INTERN
+void
+row_merge_buf_write(
+/*================*/
+ const row_merge_buf_t* buf, /*!< in: sorted buffer */
+ const merge_file_t* of, /*!< in: output file */
+ row_merge_block_t* block) /*!< out: buffer for writing to file */
+ __attribute__((nonnull));
+/********************************************************************//**
+Sort a buffer. */
+UNIV_INTERN
+void
+row_merge_buf_sort(
+/*===============*/
+ row_merge_buf_t* buf, /*!< in/out: sort buffer */
+ row_merge_dup_t* dup) /*!< in/out: reporter of duplicates
+ (NULL if non-unique index) */
+ __attribute__((nonnull(1)));
+/********************************************************************//**
+Write a merge block to the file system.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
+ibool
+row_merge_write(
+/*============*/
+ int fd, /*!< in: file descriptor */
+ ulint offset, /*!< in: offset where to write,
+ in number of row_merge_block_t elements */
+ const void* buf); /*!< in: data */
+/********************************************************************//**
+Empty a sort buffer.
+@return sort buffer */
+UNIV_INTERN
+row_merge_buf_t*
+row_merge_buf_empty(
+/*================*/
+ row_merge_buf_t* buf) /*!< in,own: sort buffer */
+ __attribute__((warn_unused_result, nonnull));
+/*********************************************************************//**
+Create a merge file.
+@return file descriptor, or -1 on failure */
+UNIV_INTERN
+int
+row_merge_file_create(
+/*==================*/
+ merge_file_t* merge_file) /*!< out: merge file structure */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Merge disk files.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_merge_sort(
+/*===========*/
+ trx_t* trx, /*!< in: transaction */
+ const row_merge_dup_t* dup, /*!< in: descriptor of
+ index being created */
+ merge_file_t* file, /*!< in/out: file containing
+ index entries */
+ row_merge_block_t* block, /*!< in/out: 3 buffers */
+ int* tmpfd) /*!< in/out: temporary file handle */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Allocate a sort buffer.
+@return own: sort buffer */
+UNIV_INTERN
+row_merge_buf_t*
+row_merge_buf_create(
+/*=================*/
+ dict_index_t* index) /*!< in: secondary index */
+ __attribute__((warn_unused_result, nonnull, malloc));
+/*********************************************************************//**
+Deallocate a sort buffer. */
+UNIV_INTERN
+void
+row_merge_buf_free(
+/*===============*/
+ row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Destroy a merge file. */
+UNIV_INTERN
+void
+row_merge_file_destroy(
+/*===================*/
+ merge_file_t* merge_file) /*!< in/out: merge file structure */
+ __attribute__((nonnull));
+/********************************************************************//**
+Read a merge block from the file system.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
+ibool
+row_merge_read(
+/*===========*/
+ int fd, /*!< in: file descriptor */
+ ulint offset, /*!< in: offset where to read
+ in number of row_merge_block_t
+ elements */
+ row_merge_block_t* buf); /*!< out: data */
+/********************************************************************//**
+Read a merge record.
+@return pointer to next record, or NULL on I/O error or end of list */
+UNIV_INTERN
+const byte*
+row_merge_read_rec(
+/*===============*/
+ row_merge_block_t* block, /*!< in/out: file buffer */
+ mrec_buf_t* buf, /*!< in/out: secondary buffer */
+ const byte* b, /*!< in: pointer to record */
+ const dict_index_t* index, /*!< in: index of the record */
+ int fd, /*!< in: file descriptor */
+ ulint* foffs, /*!< in/out: file offset */
+ const mrec_t** mrec, /*!< out: pointer to merge record,
+ or NULL on end of list
+ (non-NULL on I/O error) */
+ ulint* offsets)/*!< out: offsets of mrec */
+ __attribute__((nonnull, warn_unused_result));
#endif /* row0merge.h */
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
index 35378bf3302..cd37a2f69bb 100644
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -36,9 +36,12 @@ Created 9/17/2000 Heikki Tuuri
#include "btr0pcur.h"
#include "trx0types.h"
+// Forward declaration
+struct SysIndexCallback;
+
extern ibool row_rollback_on_timeout;
-typedef struct row_prebuilt_struct row_prebuilt_t;
+struct row_prebuilt_t;
/*******************************************************************//**
Frees the blob heap in prebuilt when no longer needed. */
@@ -116,7 +119,7 @@ row_mysql_pad_col(
/**************************************************************//**
Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c.
+row0sel.cc.
@return up to which byte we used buf in the conversion */
UNIV_INTERN
byte*
@@ -127,7 +130,10 @@ row_mysql_store_col_in_innobase_format(
this function is called! */
byte* buf, /*!< in/out: buffer for a converted
integer value; this must be at least
- col_len long then! */
+ col_len long then! NOTE that dfield
+ may also get a pointer to 'buf',
+ therefore do not discard this as long
+ as dfield is used! */
ibool row_format_col, /*!< TRUE if the mysql_data is from
a MySQL row, FALSE if from a MySQL
key value;
@@ -149,18 +155,19 @@ row_mysql_store_col_in_innobase_format(
ulint comp); /*!< in: nonzero=compact format */
/****************************************************************//**
Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running the
+@return true if it was a lock wait and we should continue running the
query thread */
UNIV_INTERN
-ibool
+bool
row_mysql_handle_errors(
/*====================*/
- ulint* new_err,/*!< out: possible new error encountered in
+ dberr_t* new_err,/*!< out: possible new error encountered in
rollback, or the old error which was
during the function entry */
trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept);/*!< in: savepoint */
+ que_thr_t* thr, /*!< in: query thread, or NULL */
+ trx_savept_t* savept) /*!< in: savepoint, or NULL */
+ __attribute__((nonnull(1,2)));
/********************************************************************//**
Create a prebuilt struct for a MySQL table handle.
@return own: a prebuilt struct */
@@ -190,15 +197,6 @@ row_update_prebuilt_trx(
in MySQL handle */
trx_t* trx); /*!< in: transaction handle */
/*********************************************************************//**
-Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
-function should be called at the the end of an SQL statement, by the
-connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
-void
-row_unlock_table_autoinc_for_mysql(
-/*===============================*/
- trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
AUTO_INC lock gives exclusive access to the auto-inc counter of the
table. The lock is reserved only for the duration of an SQL statement.
@@ -206,16 +204,17 @@ It is not compatible with another AUTO_INC or exclusive lock on the
table.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_lock_table_autoinc_for_mysql(
/*=============================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in the MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
table handle */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets a table lock on the table mentioned in prebuilt.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_lock_table_for_mysql(
/*=====================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
@@ -224,19 +223,20 @@ row_lock_table_for_mysql(
if prebuilt->table should be
locked as
prebuilt->select_lock_type */
- ulint mode); /*!< in: lock mode of table
+ ulint mode) /*!< in: lock mode of table
(ignored if table==NULL) */
-
+ __attribute__((nonnull(1)));
/*********************************************************************//**
Does an insert for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_insert_for_mysql(
/*=================*/
byte* mysql_rec, /*!< in: row in the MySQL format */
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Builds a dummy query graph used in selects. */
UNIV_INTERN
@@ -269,13 +269,14 @@ row_table_got_default_clust_index(
Does an update or delete of a row for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_update_for_mysql(
/*=================*/
byte* mysql_rec, /*!< in: the row to be updated, in
the MySQL format */
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
@@ -284,19 +285,31 @@ initialized prebuilt->new_rec_locks to store the information which new
record locks really were set. This function removes a newly set
clustered index record lock under prebuilt->pcur or
prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that
-releases the latest clustered index record lock we set.
-@return error code or DB_SUCCESS */
+releases the latest clustered index record lock we set. */
UNIV_INTERN
-int
+void
row_unlock_for_mysql(
/*=================*/
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL
handle */
- ibool has_latches_on_recs);/*!< in: TRUE if called
+ ibool has_latches_on_recs)/*!< in: TRUE if called
so that we have the latches on
the records under pcur and
clust_pcur, and we do not need
to reposition the cursors. */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL.
+@return true if temporary table */
+UNIV_INTERN
+bool
+row_is_mysql_tmp_table_name(
+/*========================*/
+ const char* name) __attribute__((warn_unused_result));
+ /*!< in: table name in the form
+ 'database/tablename' */
+
/*********************************************************************//**
Creates an query graph node of 'update' type to be used in the MySQL
interface.
@@ -311,13 +324,14 @@ row_create_update_node_for_mysql(
Does a cascaded delete or set null in a foreign key operation.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_update_cascade_for_mysql(
/*=========================*/
que_thr_t* thr, /*!< in: query thread */
upd_node_t* node, /*!< in: update node used in the cascade
or set null operation */
- dict_table_t* table); /*!< in: table where we do the operation */
+ dict_table_t* table) /*!< in: table where we do the operation */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Locks the data dictionary exclusively for performing a table create or other
data dictionary modification operation. */
@@ -361,49 +375,38 @@ Creates a table for MySQL. If the name of the table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also start the printing of monitor
output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate().
+InnoDB will try to invoke mem_validate(). On failure the transaction will
+be rolled back.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_create_table_for_mysql(
/*=======================*/
- dict_table_t* table, /*!< in, own: table definition
- (will be freed) */
- trx_t* trx); /*!< in: transaction handle */
+ dict_table_t* table, /*!< in, own: table definition
+ (will be freed, or on DB_SUCCESS
+ added to the data dictionary cache) */
+ trx_t* trx, /*!< in/out: transaction */
+ bool commit) /*!< in: if true, commit the transaction */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Does an index creation operation for MySQL. TODO: currently failure
to create an index results in dropping the whole table! This is no problem
currently as all indexes must be created at the same time as the table.
@return error number or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_create_index_for_mysql(
/*=======================*/
dict_index_t* index, /*!< in, own: index definition
(will be freed) */
trx_t* trx, /*!< in: transaction handle */
- const ulint* field_lengths); /*!< in: if not NULL, must contain
+ const ulint* field_lengths) /*!< in: if not NULL, must contain
dict_index_get_n_fields(index)
actual field lengths for the
index columns, which are
then checked for not being too
large. */
-/*********************************************************************//**
-*/
-UNIV_INTERN
-int
-row_insert_stats_for_mysql(
-/*=======================*/
- dict_index_t* index,
- trx_t* trx);
-/*********************************************************************//**
-*/
-UNIV_INTERN
-int
-row_delete_stats_for_mysql(
-/*=======================*/
- dict_index_t* index,
- trx_t* trx);
+ __attribute__((nonnull(1,2), warn_unused_result));
/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
@@ -413,7 +416,7 @@ bot participating tables. The indexes are allowed to contain more
fields than mentioned in the constraint.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_table_add_foreign_constraints(
/*==============================*/
trx_t* trx, /*!< in: transaction */
@@ -426,12 +429,12 @@ row_table_add_foreign_constraints(
const char* name, /*!< in: table full name in the
normalized form
database_name/table_name */
- ibool reject_fks); /*!< in: if TRUE, fail with error
+ ibool reject_fks) /*!< in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
-
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
-The master thread in srv0srv.c calls this regularly to drop tables which
+The master thread in srv0srv.cc calls this regularly to drop tables which
we must drop in background after queries to them have ended. Such lazy
dropping of tables is needed in ALTER TABLE on Unix.
@return how many tables dropped + remaining tables in list */
@@ -448,14 +451,28 @@ ulint
row_get_background_drop_list_len_low(void);
/*======================================*/
/*********************************************************************//**
+Sets an exclusive lock on a table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_lock_table(
+/*=================*/
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in: table to lock */
+ enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */
+ const char* op_info) /*!< in: string for trx->op_info */
+ __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
Truncates a table for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_truncate_table_for_mysql(
/*=========================*/
dict_table_t* table, /*!< in: table handle */
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Drops a table for MySQL. If the name of the dropped table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
@@ -465,12 +482,16 @@ by the transaction, the transaction will be committed. Otherwise, the
data dictionary will remain locked.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_drop_table_for_mysql(
/*=====================*/
const char* name, /*!< in: table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool drop_db);/*!< in: TRUE=dropping whole database */
+ trx_t* trx, /*!< in: dictionary transaction handle */
+ bool drop_db,/*!< in: true=dropping whole database */
+ bool nonatomic = true)
+ /*!< in: whether it is permitted
+ to release and reacquire dict_operation_lock */
+ __attribute__((nonnull));
/*********************************************************************//**
Drop all temporary tables during crash recovery. */
UNIV_INTERN
@@ -484,73 +505,102 @@ means that this function deletes the .ibd file and assigns a new table id for
the table. Also the flag table->ibd_file_missing is set TRUE.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_discard_tablespace_for_mysql(
/*=============================*/
const char* name, /*!< in: table name */
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
+ __attribute__((nonnull, warn_unused_result));
/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
of the table in the data dictionary.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_import_tablespace_for_mysql(
/*============================*/
- const char* name, /*!< in: table name */
- trx_t* trx); /*!< in: transaction handle */
+ dict_table_t* table, /*!< in/out: table */
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Drops a database for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_drop_database_for_mysql(
/*========================*/
const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
+ __attribute__((nonnull));
/*********************************************************************//**
Renames a table for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_rename_table_for_mysql(
/*=======================*/
const char* old_name, /*!< in: old table name */
const char* new_name, /*!< in: new table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool commit); /*!< in: if TRUE then commit trx */
+ trx_t* trx, /*!< in/out: transaction */
+ bool commit) /*!< in: whether to commit trx */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks that the index contains entries in an ascending order, unique
constraint is not broken, and calculates the number of index entries
in the read view of the current transaction.
-@return DB_SUCCESS if ok */
+@return true if ok */
UNIV_INTERN
-ulint
+bool
row_check_index_for_mysql(
/*======================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
in MySQL handle */
const dict_index_t* index, /*!< in: index */
- ulint* n_rows); /*!< out: number of entries
+ ulint* n_rows) /*!< out: number of entries
seen in the consistent read */
-
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Determines if a table is a magic monitor table.
-@return TRUE if monitor table */
+@return true if monitor table */
UNIV_INTERN
-ibool
+bool
row_is_magic_monitor_table(
/*=======================*/
- const char* table_name); /*!< in: name of the table, in the
+ const char* table_name) /*!< in: name of the table, in the
form database/table_name */
+ __attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Initialize this module */
+UNIV_INTERN
+void
+row_mysql_init(void);
+/*================*/
+
+/*********************************************************************//**
+Close this module */
+UNIV_INTERN
+void
+row_mysql_close(void);
+/*=================*/
+
+/*********************************************************************//**
+Reassigns the table identifier of a table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_table_id_reassign(
+/*========================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx, /*!< in/out: transaction */
+ table_id_t* new_id) /*!< out: new table id */
+ __attribute__((nonnull, warn_unused_result));
/* A struct describing a place for an individual column in the MySQL
row format which is presented to the table handler in ha_innobase.
This template struct is used to speed up row transformations between
Innobase and MySQL. */
-typedef struct mysql_row_templ_struct mysql_row_templ_t;
-struct mysql_row_templ_struct {
+struct mysql_row_templ_t {
ulint col_no; /*!< column number of the column */
ulint rec_field_no; /*!< field number of the column in an
Innobase record in the current index;
@@ -606,7 +656,7 @@ struct mysql_row_templ_struct {
handle used within MySQL; these are used to save CPU time. */
-struct row_prebuilt_struct {
+struct row_prebuilt_t {
ulint magic_n; /*!< this magic number is set to
ROW_PREBUILT_ALLOCATED when created,
or ROW_PREBUILT_FREED when the
@@ -691,8 +741,11 @@ struct row_prebuilt_struct {
columns in the table */
upd_node_t* upd_node; /*!< Innobase SQL update node used
to perform updates and deletes */
+ trx_id_t trx_id; /*!< The table->def_trx_id when
+ ins_graph was built */
que_fork_t* ins_graph; /*!< Innobase SQL query graph used
- in inserts */
+ in inserts. Will be rebuilt on
+ trx_id or n_indexes mismatch. */
que_fork_t* upd_graph; /*!< Innobase SQL query graph used
in updates or deletes */
btr_pcur_t pcur; /*!< persistent cursor used in selects
@@ -707,6 +760,12 @@ struct row_prebuilt_struct {
generated, the row id of the
last row fetched is stored
here */
+ doc_id_t fts_doc_id; /* if the table has an FTS index on
+ it then we fetch the doc_id.
+ FTS-FIXME: Currently we fetch it always
+ but in the future we must only fetch
+ it when FTS columns are being
+ updated */
dtuple_t* clust_ref; /*!< prebuilt dtuple used in
sel/upd/del */
ulint select_lock_type;/*!< LOCK_NONE, LOCK_S, or LOCK_X */
@@ -783,6 +842,7 @@ struct row_prebuilt_struct {
to this heap */
mem_heap_t* old_vers_heap; /*!< memory heap where a previous
version is built in consistent read */
+ bool in_fts_query; /*!< Whether we are in a FTS query */
/*----------------------*/
ulonglong autoinc_last_value;
/*!< last value of AUTO-INC interval */
@@ -793,7 +853,7 @@ struct row_prebuilt_struct {
ulonglong autoinc_offset; /*!< The offset passed to
get_auto_increment() by MySQL. Required
to calculate the next value */
- ulint autoinc_error; /*!< The actual error code encountered
+ dberr_t autoinc_error; /*!< The actual error code encountered
while trying to init or read the
autoinc value from the table. We
store it here so that we can return
@@ -808,6 +868,20 @@ struct row_prebuilt_struct {
/*----------------------*/
ulint magic_n2; /*!< this should be the same as
magic_n */
+ /*----------------------*/
+ unsigned innodb_api:1; /*!< whether this is a InnoDB API
+ query */
+ const rec_t* innodb_api_rec; /*!< InnoDB API search result */
+};
+
+/** Callback for row_mysql_sys_index_iterate() */
+struct SysIndexCallback {
+ virtual ~SysIndexCallback() { }
+
+ /** Callback method
+ @param mtr - current mini transaction
+ @param pcur - persistent cursor. */
+ virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0;
};
#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
@@ -831,4 +905,4 @@ struct row_prebuilt_struct {
#include "row0mysql.ic"
#endif
-#endif
+#endif /* row0mysql.h */
diff --git a/storage/xtradb/include/row0mysql.ic b/storage/xtradb/include/row0mysql.ic
index 878523528b2..2eb60898c46 100644
--- a/storage/xtradb/include/row0mysql.ic
+++ b/storage/xtradb/include/row0mysql.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2001, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2001, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/row0purge.h b/storage/xtradb/include/row0purge.h
index fa9c9291d5d..93dcf9cf49b 100644
--- a/storage/xtradb/include/row0purge.h
+++ b/storage/xtradb/include/row0purge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -34,6 +34,8 @@ Created 3/14/1997 Heikki Tuuri
#include "trx0types.h"
#include "que0types.h"
#include "row0types.h"
+#include "row0purge.h"
+#include "ut0vec.h"
/********************************************************************//**
Creates a purge node to a query graph.
@@ -42,8 +44,10 @@ UNIV_INTERN
purge_node_t*
row_purge_node_create(
/*==================*/
- que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
- mem_heap_t* heap); /*!< in: memory heap where created */
+ que_thr_t* parent, /*!< in: parent node, i.e., a
+ thr node */
+ mem_heap_t* heap) /*!< in: memory heap where created */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************//**
Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
@@ -53,19 +57,20 @@ is newer than the purge view.
NOTE: This function should only be called by the purge thread, only
while holding a latch on the leaf page of the secondary index entry
(or keeping the buffer pool watch on the page). It is possible that
-this function first returns TRUE and then FALSE, if a user transaction
+this function first returns true and then false, if a user transaction
inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
-@return TRUE if the secondary index record can be purged */
+@return true if the secondary index record can be purged */
UNIV_INTERN
-ibool
+bool
row_purge_poss_sec(
/*===============*/
purge_node_t* node, /*!< in/out: row purge node */
dict_index_t* index, /*!< in: secondary index */
- const dtuple_t* entry); /*!< in: secondary index entry */
+ const dtuple_t* entry) /*!< in: secondary index entry */
+ __attribute__((nonnull, warn_unused_result));
/***************************************************************
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph.
@@ -74,29 +79,26 @@ UNIV_INTERN
que_thr_t*
row_purge_step(
/*===========*/
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/* Purge node structure */
-struct purge_node_struct{
+struct purge_node_t{
que_common_t common; /*!< node type: QUE_NODE_PURGE */
/*----------------------*/
/* Local storage for this graph node */
roll_ptr_t roll_ptr;/* roll pointer to undo log record */
- trx_undo_rec_t* undo_rec;/* undo log record */
- trx_undo_inf_t* reservation;/* reservation for the undo log record in
- the purge array */
+ ib_vector_t* undo_recs;/*!< Undo recs to purge */
+
undo_no_t undo_no;/* undo number of the record */
+
ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
... */
- btr_pcur_t pcur; /*!< persistent cursor used in searching the
- clustered index record */
- ibool found_clust;/* TRUE if the clustered index record
- determined by ref was found in the clustered
- index, and we were able to position pcur on
- it */
dict_table_t* table; /*!< table where purge is done */
+
ulint cmpl_info;/* compiler analysis info of an update */
+
upd_t* update; /*!< update vector for a clustered index
record */
dtuple_t* ref; /*!< NULL, or row reference to the next row to
@@ -109,6 +111,14 @@ struct purge_node_struct{
mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
row; this must be emptied after a successful
purge of a row */
+ ibool found_clust;/* TRUE if the clustered index record
+ determined by ref was found in the clustered
+ index, and we were able to position pcur on
+ it */
+ btr_pcur_t pcur; /*!< persistent cursor used in searching the
+ clustered index record */
+ ibool done; /* Debug flag */
+
};
#ifndef UNIV_NONINL
diff --git a/storage/xtradb/include/row0purge.ic b/storage/xtradb/include/row0purge.ic
index 6465c2ca971..700106d1048 100644
--- a/storage/xtradb/include/row0purge.ic
+++ b/storage/xtradb/include/row0purge.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/row0quiesce.h b/storage/xtradb/include/row0quiesce.h
new file mode 100644
index 00000000000..1d6d11291b8
--- /dev/null
+++ b/storage/xtradb/include/row0quiesce.h
@@ -0,0 +1,74 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0quiesce.h
+
+Header file for tablespace quiesce functions.
+
+Created 2012-02-08 by Sunny Bains
+*******************************************************/
+
+#ifndef row0quiesce_h
+#define row0quiesce_h
+
+#include "univ.i"
+#include "dict0types.h"
+
+struct trx_t;
+
+/** The version number of the export meta-data text file. */
+#define IB_EXPORT_CFG_VERSION_V1 0x1UL
+
+/*********************************************************************//**
+Quiesce the tablespace that the table resides in. */
+UNIV_INTERN
+void
+row_quiesce_table_start(
+/*====================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ trx_t* trx) /*!< in/out: transaction/session */
+ __attribute__((nonnull));
+
+/*********************************************************************//**
+Set a table's quiesce state.
+@return DB_SUCCESS or errro code. */
+UNIV_INTERN
+dberr_t
+row_quiesce_set_state(
+/*==================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ ib_quiesce_t state, /*!< in: quiesce state to set */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Cleanup after table quiesce. */
+UNIV_INTERN
+void
+row_quiesce_table_complete(
+/*=======================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ trx_t* trx) /*!< in/out: transaction/session */
+ __attribute__((nonnull));
+
+#ifndef UNIV_NONINL
+#include "row0quiesce.ic"
+#endif
+
+#endif /* row0quiesce_h */
diff --git a/storage/xtradb/include/row0quiesce.ic b/storage/xtradb/include/row0quiesce.ic
new file mode 100644
index 00000000000..f570a6aed05
--- /dev/null
+++ b/storage/xtradb/include/row0quiesce.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0quiesce.ic
+
+Quiesce a tablespace.
+
+Created 2012-02-08 Sunny Bains
+*******************************************************/
+
diff --git a/storage/xtradb/include/row0row.h b/storage/xtradb/include/row0row.h
index bf135217bd0..a4e5e0dd2fa 100644
--- a/storage/xtradb/include/row0row.h
+++ b/storage/xtradb/include/row0row.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -73,20 +73,41 @@ row_get_rec_roll_ptr(
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged
+@retval NULL if the externally stored columns in the clustered index record
+are unavailable and ext != NULL, or row is missing some needed columns. */
+UNIV_INTERN
+dtuple_t*
+row_build_index_entry_low(
+/*======================*/
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ const row_ext_t* ext, /*!< in: externally stored column
+ prefixes, or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory for the index entry
+ is allocated */
+ __attribute__((warn_unused_result, nonnull(1,3,4)));
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
@return index entry which should be inserted or purged, or NULL if the
externally stored columns in the clustered index record are
unavailable and ext != NULL */
-UNIV_INTERN
+UNIV_INLINE
dtuple_t*
row_build_index_entry(
/*==================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- row_ext_t* ext, /*!< in: externally stored column prefixes,
- or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap); /*!< in: memory heap from which the memory for
- the index entry is allocated */
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ const row_ext_t* ext, /*!< in: externally stored column
+ prefixes, or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory for the index entry
+ is allocated */
+ __attribute__((warn_unused_result, nonnull(1,3,4)));
/*******************************************************************//**
An inverse function to row_build_index_entry. Builds a row from a
record in a clustered index.
@@ -124,11 +145,17 @@ row_build(
consulted instead; the user
columns in this table should be
the same columns as in index->table */
+ const dtuple_t* add_cols,
+ /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map,/*!< in: mapping of old column
+ numbers to new ones, or NULL */
row_ext_t** ext, /*!< out, own: cache of
externally stored column
prefixes, or NULL */
- mem_heap_t* heap); /*!< in: memory heap from which
+ mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
+ __attribute__((nonnull(2,3,9)));
/*******************************************************************//**
Converts an index record to a typed data tuple.
@return index entry built; does not set info_bits, and the data fields
@@ -142,37 +169,25 @@ row_rec_to_index_entry_low(
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint* n_ext, /*!< out: number of externally
stored columns */
- mem_heap_t* heap); /*!< in: memory heap from which
+ mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Converts an index record to a typed data tuple. NOTE that externally
stored (often big) fields are NOT copied to heap.
-@return own: index entry built; see the NOTE below! */
+@return own: index entry built */
UNIV_INTERN
dtuple_t*
row_rec_to_index_entry(
/*===================*/
- ulint type, /*!< in: ROW_COPY_DATA, or
- ROW_COPY_POINTERS: the former
- copies also the data fields to
- heap as the latter only places
- pointers to data fields on the
- index page */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the dtuple is used! */
+ const rec_t* rec, /*!< in: record in the index */
const dict_index_t* index, /*!< in: index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
+ const ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
ulint* n_ext, /*!< out: number of externally
stored columns */
- mem_heap_t* heap); /*!< in: memory heap from which
+ mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record.
@@ -193,8 +208,9 @@ row_build_row_ref(
the buffer page of this record must be
at least s-latched and the latch held
as long as the row reference is used! */
- mem_heap_t* heap); /*!< in: memory heap from which the memory
+ mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
@@ -215,7 +231,8 @@ row_build_row_ref_in_tuple(
const dict_index_t* index, /*!< in: secondary index */
ulint* offsets,/*!< in: rec_get_offsets(rec, index)
or NULL */
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx) /*!< in: transaction or NULL */
+ __attribute__((nonnull(1,2,3)));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
@@ -245,7 +262,8 @@ row_search_on_row_ref(
ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
const dict_table_t* table, /*!< in: table */
const dtuple_t* ref, /*!< in: row reference */
- mtr_t* mtr); /*!< in/out: mtr */
+ mtr_t* mtr) /*!< in/out: mtr */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Fetches the clustered index record for a secondary index record. The latches
on the secondary index record are preserved.
@@ -258,7 +276,8 @@ row_get_clust_rec(
const rec_t* rec, /*!< in: record in a secondary index */
dict_index_t* index, /*!< in: secondary index */
dict_index_t** clust_index,/*!< out: clustered index */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
/** Result of row_search_index_entry */
enum row_search_result {
@@ -285,8 +304,8 @@ row_search_index_entry(
ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
be closed by the caller */
- mtr_t* mtr); /*!< in: mtr */
-
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
#define ROW_COPY_DATA 1
#define ROW_COPY_POINTERS 2
@@ -294,10 +313,7 @@ row_search_index_entry(
/* The allowed latching order of index records is the following:
(1) a secondary index record ->
(2) the clustered index record ->
-(3) rollback segment data for the clustered index record.
-
-No new latches may be obtained while the kernel mutex is reserved.
-However, the kernel mutex can be reserved while latches are owned. */
+(3) rollback segment data for the clustered index record. */
/*******************************************************************//**
Formats the raw data in "data" (in InnoDB on-disk format) using
@@ -316,8 +332,9 @@ row_raw_format(
in bytes */
const dict_field_t* dict_field, /*!< in: index field */
char* buf, /*!< out: output buffer */
- ulint buf_size); /*!< in: output buffer size
+ ulint buf_size) /*!< in: output buffer size
in bytes */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_NONINL
#include "row0row.ic"
diff --git a/storage/xtradb/include/row0row.ic b/storage/xtradb/include/row0row.ic
index 831c2339d96..ac62422be1f 100644
--- a/storage/xtradb/include/row0row.ic
+++ b/storage/xtradb/include/row0row.ic
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -104,6 +104,33 @@ row_get_rec_roll_ptr(
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
}
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INLINE
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ const row_ext_t* ext, /*!< in: externally stored column
+ prefixes, or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory for the index entry
+ is allocated */
+{
+ dtuple_t* entry;
+
+ ut_ad(dtuple_check_typed(row));
+ entry = row_build_index_entry_low(row, ext, index, heap);
+ ut_ad(!entry || dtuple_check_typed(entry));
+ return(entry);
+}
+
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
diff --git a/storage/xtradb/include/row0sel.h b/storage/xtradb/include/row0sel.h
index 830615effc2..c8be80f89d9 100644
--- a/storage/xtradb/include/row0sel.h
+++ b/storage/xtradb/include/row0sel.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -148,7 +148,7 @@ position and fetch next or fetch prev must not be tried to the cursor!
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
UNIV_INTERN
-ulint
+dberr_t
row_search_for_mysql(
/*=================*/
byte* buf, /*!< in/out: buffer for the fetched
@@ -163,11 +163,12 @@ row_search_for_mysql(
'mode' */
ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
ROW_SEL_EXACT_PREFIX */
- ulint direction); /*!< in: 0 or ROW_SEL_NEXT or
+ ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
ROW_SEL_PREV; NOTE: if this is != 0,
then prebuilt must have a pcur
with stored position! In opening of a
cursor 'direction' should be 0. */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Checks if MySQL at the moment is allowed for this table to retrieve a
consistent read result, or store it to the query cache.
@@ -183,15 +184,16 @@ row_search_check_if_query_cache_permitted(
Read the max AUTOINC value from an index.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
row_search_max_autoinc(
/*===================*/
dict_index_t* index, /*!< in: index to search */
const char* col_name, /*!< in: autoinc column name */
- ib_uint64_t* value); /*!< out: AUTOINC value read */
+ ib_uint64_t* value) /*!< out: AUTOINC value read */
+ __attribute__((nonnull, warn_unused_result));
/** A structure for caching column values for prefetched rows */
-struct sel_buf_struct{
+struct sel_buf_t{
byte* data; /*!< data, or NULL; if not NULL, this field
has allocated memory which must be explicitly
freed; can be != NULL even when len is
@@ -204,7 +206,7 @@ struct sel_buf_struct{
};
/** Query plan */
-struct plan_struct{
+struct plan_t{
dict_table_t* table; /*!< table struct in the dictionary
cache */
dict_index_t* index; /*!< table index used in the search */
@@ -290,7 +292,7 @@ enum sel_node_state {
};
/** Select statement node */
-struct sel_node_struct{
+struct sel_node_t{
que_common_t common; /*!< node type: QUE_NODE_SELECT */
enum sel_node_state
state; /*!< node state */
@@ -343,7 +345,7 @@ struct sel_node_struct{
};
/** Fetch statement node */
-struct fetch_node_struct{
+struct fetch_node_t{
que_common_t common; /*!< type: QUE_NODE_FETCH */
sel_node_t* cursor_def; /*!< cursor definition */
sym_node_t* into_list; /*!< variables to set */
@@ -370,7 +372,7 @@ enum open_node_op {
};
/** Open or close cursor statement node */
-struct open_node_struct{
+struct open_node_t{
que_common_t common; /*!< type: QUE_NODE_OPEN */
enum open_node_op
op_type; /*!< operation type: open or
@@ -379,7 +381,7 @@ struct open_node_struct{
};
/** Row printf statement node */
-struct row_printf_node_struct{
+struct row_printf_node_t{
que_common_t common; /*!< type: QUE_NODE_ROW_PRINTF */
sel_node_t* sel_node; /*!< select */
};
diff --git a/storage/xtradb/include/row0sel.ic b/storage/xtradb/include/row0sel.ic
index 03c30e80dfe..d83a3448832 100644
--- a/storage/xtradb/include/row0sel.ic
+++ b/storage/xtradb/include/row0sel.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -92,7 +92,7 @@ open_step(
}
}
- if (UNIV_EXPECT(err, DB_SUCCESS) != DB_SUCCESS) {
+ if (err != DB_SUCCESS) {
/* SQL error detected */
fprintf(stderr, "SQL error %lu\n", (ulong) err);
diff --git a/storage/xtradb/include/row0types.h b/storage/xtradb/include/row0types.h
index b40094d05d6..52c89cb01fa 100644
--- a/storage/xtradb/include/row0types.h
+++ b/storage/xtradb/include/row0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,32 +26,28 @@ Created 12/27/1996 Heikki Tuuri
#ifndef row0types_h
#define row0types_h
-typedef struct plan_struct plan_t;
+struct plan_t;
-typedef struct upd_struct upd_t;
+struct upd_t;
+struct upd_field_t;
+struct upd_node_t;
+struct del_node_t;
+struct ins_node_t;
+struct sel_node_t;
+struct open_node_t;
+struct fetch_node_t;
-typedef struct upd_field_struct upd_field_t;
+struct row_printf_node_t;
+struct sel_buf_t;
-typedef struct upd_node_struct upd_node_t;
+struct undo_node_t;
-typedef struct del_node_struct del_node_t;
+struct purge_node_t;
-typedef struct ins_node_struct ins_node_t;
+struct row_ext_t;
-typedef struct sel_node_struct sel_node_t;
-
-typedef struct open_node_struct open_node_t;
-
-typedef struct fetch_node_struct fetch_node_t;
-
-typedef struct row_printf_node_struct row_printf_node_t;
-typedef struct sel_buf_struct sel_buf_t;
-
-typedef struct undo_node_struct undo_node_t;
-
-typedef struct purge_node_struct purge_node_t;
-
-typedef struct row_ext_struct row_ext_t;
+/** Buffer for logging modifications during online index creation */
+struct row_log_t;
/* MySQL data types */
struct TABLE;
diff --git a/storage/xtradb/include/row0uins.h b/storage/xtradb/include/row0uins.h
index 6809c6d9317..ebf4881208a 100644
--- a/storage/xtradb/include/row0uins.h
+++ b/storage/xtradb/include/row0uins.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -42,11 +42,11 @@ if it figures out that an index record will be removed in the purge
anyway, it will remove it in the rollback.
@return DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_undo_ins(
/*=========*/
- undo_node_t* node); /*!< in: row undo node */
-
+ undo_node_t* node) /*!< in: row undo node */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_NONINL
#include "row0uins.ic"
#endif
diff --git a/storage/xtradb/include/row0uins.ic b/storage/xtradb/include/row0uins.ic
index fb8a335191d..54da2e49874 100644
--- a/storage/xtradb/include/row0uins.ic
+++ b/storage/xtradb/include/row0uins.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/row0umod.h b/storage/xtradb/include/row0umod.h
index aca35ce2170..f89d5a334fc 100644
--- a/storage/xtradb/include/row0umod.h
+++ b/storage/xtradb/include/row0umod.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -38,12 +38,12 @@ Created 2/27/1997 Heikki Tuuri
Undoes a modify operation on a row of a table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_undo_mod(
/*=========*/
undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr); /*!< in: query thread */
-
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_NONINL
#include "row0umod.ic"
diff --git a/storage/xtradb/include/row0umod.ic b/storage/xtradb/include/row0umod.ic
index dd9e217fa20..00a8cd86e01 100644
--- a/storage/xtradb/include/row0umod.ic
+++ b/storage/xtradb/include/row0umod.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/row0undo.h b/storage/xtradb/include/row0undo.h
index d783c94a110..5dddfb4eae1 100644
--- a/storage/xtradb/include/row0undo.h
+++ b/storage/xtradb/include/row0undo.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -95,7 +95,7 @@ enum undo_exec {
};
/** Undo node structure */
-struct undo_node_struct{
+struct undo_node_t{
que_common_t common; /*!< node type: QUE_NODE_UNDO */
enum undo_exec state; /*!< node execution state */
trx_t* trx; /*!< trx for which undo is done */
diff --git a/storage/xtradb/include/row0undo.ic b/storage/xtradb/include/row0undo.ic
index 21723c88ecb..b97ffca590e 100644
--- a/storage/xtradb/include/row0undo.ic
+++ b/storage/xtradb/include/row0undo.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/row0upd.h b/storage/xtradb/include/row0upd.h
index 16c069d5ae8..27dedeb65a7 100644
--- a/storage/xtradb/include/row0upd.h
+++ b/storage/xtradb/include/row0upd.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -101,7 +101,7 @@ byte*
row_upd_write_sys_vals_to_log(
/*==========================*/
dict_index_t* index, /*!< in: clustered index */
- trx_t* trx, /*!< in: transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
in mlog */
@@ -118,8 +118,9 @@ row_upd_rec_sys_fields(
uncompressed part will be updated, or NULL */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */
+ const trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record,
+ can be 0 during IMPORT */
/*********************************************************************//**
Sets the trx id or roll ptr field of a clustered index entry. */
UNIV_INTERN
@@ -165,6 +166,15 @@ row_upd_changes_field_size_or_external(
dict_index_t* index, /*!< in: index */
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
const upd_t* update);/*!< in: update vector */
+/***********************************************************//**
+Returns true if row update contains disowned external fields.
+@return true if the update contains disowned external fields. */
+UNIV_INTERN
+bool
+row_upd_changes_disowned_external(
+/*==============================*/
+ const upd_t* update) /*!< in: update vector */
+ __attribute__((nonnull, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Replaces the new column values stored in the update vector to the
@@ -192,11 +202,12 @@ UNIV_INTERN
upd_t*
row_upd_build_sec_rec_difference_binary(
/*====================================*/
+ const rec_t* rec, /*!< in: secondary index record */
dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: secondary index record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ __attribute__((warn_unused_result, nonnull));
/***************************************************************//**
Builds an update vector from those fields, excluding the roll ptr and
trx id fields, which in an index entry differ from a record that has
@@ -204,14 +215,19 @@ the equal ordering fields. NOTE: we compare the fields as binary strings!
@return own: update vector of differing fields, excluding roll ptr and
trx id */
UNIV_INTERN
-upd_t*
+const upd_t*
row_upd_build_difference_binary(
/*============================*/
dict_index_t* index, /*!< in: clustered index */
const dtuple_t* entry, /*!< in: entry to insert */
const rec_t* rec, /*!< in: clustered index record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
+ bool no_sys, /*!< in: skip the system columns
+ DB_TRX_ID and DB_ROLL_PTR */
+ trx_t* trx, /*!< in: transaction (for diagnostics),
+ or NULL */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ __attribute__((nonnull(1,2,3,7), warn_unused_result));
/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
@@ -304,6 +320,26 @@ row_upd_changes_ord_field_binary_func(
row_upd_changes_ord_field_binary_func(index,update,row,ext)
#endif /* UNIV_DEBUG */
/***********************************************************//**
+Checks if an FTS indexed column is affected by an UPDATE.
+@return offset within fts_t::indexes if FTS indexed column updated else
+ULINT_UNDEFINED */
+UNIV_INTERN
+ulint
+row_upd_changes_fts_column(
+/*=======================*/
+ dict_table_t* table, /*!< in: table */
+ upd_field_t* upd_field); /*!< in: field to check */
+/***********************************************************//**
+Checks if an FTS Doc ID column is affected by an UPDATE.
+@return whether Doc ID column is affected */
+UNIV_INTERN
+bool
+row_upd_changes_doc_id(
+/*===================*/
+ dict_table_t* table, /*!< in: table */
+ upd_field_t* upd_field) /*!< in: field to check */
+ __attribute__((nonnull, warn_unused_result));
+/***********************************************************//**
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
fields in the index is small. Otherwise, this can be quadratic.
@@ -366,10 +402,10 @@ row_upd_index_parse(
/* Update vector field */
-struct upd_field_struct{
+struct upd_field_t{
unsigned field_no:16; /*!< field number in an index, usually
the clustered index, but in updating
- a secondary index record in btr0cur.c
+ a secondary index record in btr0cur.cc
this is the position in the secondary
index */
#ifndef UNIV_HOTBACKUP
@@ -385,7 +421,7 @@ struct upd_field_struct{
};
/* Update vector structure */
-struct upd_struct{
+struct upd_t{
ulint info_bits; /*!< new value of info bits to record;
default is 0 */
ulint n_fields; /*!< number of update fields */
@@ -396,7 +432,7 @@ struct upd_struct{
/* Update node structure which also implements the delete operation
of a row */
-struct upd_node_struct{
+struct upd_node_t{
que_common_t common; /*!< node type: QUE_NODE_UPDATE */
ibool is_delete;/* TRUE if delete, FALSE if update */
ibool searched_update;
diff --git a/storage/xtradb/include/row0upd.ic b/storage/xtradb/include/row0upd.ic
index 9b699455665..618a77fa4bf 100644
--- a/storage/xtradb/include/row0upd.ic
+++ b/storage/xtradb/include/row0upd.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -44,12 +44,11 @@ upd_create(
{
upd_t* update;
- update = (upd_t*) mem_heap_alloc(heap, sizeof(upd_t));
+ update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t));
- update->info_bits = 0;
update->n_fields = n;
update->fields = (upd_field_t*)
- mem_heap_alloc(heap, sizeof(upd_field_t) * n);
+ mem_heap_zalloc(heap, sizeof(upd_field_t) * n);
return(update);
}
@@ -103,7 +102,7 @@ upd_field_set_field_no(
upd_field->field_no = field_no;
upd_field->orig_len = 0;
- if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) {
+ if (field_no >= dict_index_get_n_fields(index)) {
fprintf(stderr,
"InnoDB: Error: trying to access field %lu in ",
(ulong) field_no);
@@ -111,6 +110,7 @@ upd_field_set_field_no(
fprintf(stderr, "\n"
"InnoDB: but index only has %lu fields\n",
(ulong) dict_index_get_n_fields(index));
+ ut_ad(0);
}
dict_col_copy_type(dict_index_get_nth_col(index, field_no),
@@ -152,13 +152,14 @@ row_upd_rec_sys_fields(
uncompressed part will be updated, or NULL */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */
+ const trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record,
+ can be 0 during IMPORT */
{
ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
ulint pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets,
pos, trx->id, roll_ptr);
@@ -172,8 +173,14 @@ row_upd_rec_sys_fields(
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
#endif
- ut_ad(lock_check_trx_id_sanity(trx_read_trx_id(rec + offset),
- rec, index, offsets, FALSE));
+ /* During IMPORT the trx id in the record can be in the
+ future, if the .ibd file is being imported from another
+ instance. During IMPORT roll_ptr will be 0. */
+ ut_ad(roll_ptr == 0
+ || lock_check_trx_id_sanity(
+ trx_read_trx_id(rec + offset),
+ rec, index, offsets));
+
trx_write_trx_id(rec + offset, trx->id);
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
}
diff --git a/storage/xtradb/include/row0vers.h b/storage/xtradb/include/row0vers.h
index 48d5fc43fd1..1df5b4d3e98 100644
--- a/storage/xtradb/include/row0vers.h
+++ b/storage/xtradb/include/row0vers.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -37,13 +37,15 @@ Created 2/6/1997 Heikki Tuuri
/*****************************************************************//**
Finds out if an active transaction has inserted or modified a secondary
-index record. NOTE: the kernel mutex is temporarily released in this
-function!
-@return NULL if committed, else the active transaction */
+index record.
+@return 0 if committed, else the active transaction id;
+NOTE that this function can return false positives but never false
+negatives. The caller must confirm all positive results by calling
+trx_is_active() while holding lock_sys->mutex. */
UNIV_INTERN
-trx_t*
-row_vers_impl_x_locked_off_kernel(
-/*==============================*/
+trx_id_t
+row_vers_impl_x_locked(
+/*===================*/
const rec_t* rec, /*!< in: record in a secondary index */
dict_index_t* index, /*!< in: the secondary index */
const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
@@ -85,7 +87,7 @@ read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version.
@return DB_SUCCESS or DB_MISSING_HISTORY */
UNIV_INTERN
-ulint
+dberr_t
row_vers_build_for_consistent_read(
/*===============================*/
const rec_t* rec, /*!< in: record in a clustered index; the
@@ -104,16 +106,17 @@ row_vers_build_for_consistent_read(
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers);/*!< out, own: old version, or NULL if the
- record does not exist in the view, that is,
+ rec_t** old_vers)/*!< out, own: old version, or NULL
+ if the history is missing or the record
+ does not exist in the view, that is,
it was freshly inserted afterwards */
+ __attribute__((nonnull(1,2,3,4,5,6,7)));
/*****************************************************************//**
Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
+which should be seen by a semi-consistent read. */
UNIV_INTERN
-ulint
+void
row_vers_build_for_semi_consistent_read(
/*====================================*/
const rec_t* rec, /*!< in: record in a clustered index; the
@@ -130,9 +133,10 @@ row_vers_build_for_semi_consistent_read(
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- const rec_t** old_vers);/*!< out: rec, old version, or NULL if the
+ const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
+ __attribute__((nonnull(1,2,3,4,5)));
#ifndef UNIV_NONINL
diff --git a/storage/xtradb/include/row0vers.ic b/storage/xtradb/include/row0vers.ic
index 2687d1a9e15..ef43a55bf70 100644
--- a/storage/xtradb/include/row0vers.ic
+++ b/storage/xtradb/include/row0vers.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/srv0conc.h b/storage/xtradb/include/srv0conc.h
new file mode 100644
index 00000000000..9aee1b17bf0
--- /dev/null
+++ b/storage/xtradb/include/srv0conc.h
@@ -0,0 +1,111 @@
+/*****************************************************************************
+
+Copyright (c) 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0conc.h
+
+InnoDB concurrency manager header file
+
+Created 2011/04/18 Sunny Bains
+*******************************************************/
+
+#ifndef srv_conc_h
+#define srv_conc_h
+
+/** We are prepared for a situation that we have this many threads waiting for
+a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
+value. */
+
+extern ulint srv_max_n_threads;
+
+/** The following controls how many threads we let inside InnoDB concurrently:
+threads waiting for locks are not counted into the number because otherwise
+we could get a deadlock. Value of 0 will disable the concurrency check. */
+
+extern ulong srv_thread_concurrency;
+
+/*********************************************************************//**
+Initialise the concurrency management data structures */
+void
+srv_conc_init(void);
+/*===============*/
+
+/*********************************************************************//**
+Free the concurrency management data structures */
+void
+srv_conc_free(void);
+/*===============*/
+
+/*********************************************************************//**
+Puts an OS thread to wait if there are too many concurrent threads
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
+UNIV_INTERN
+void
+srv_conc_enter_innodb(
+/*==================*/
+ trx_t* trx); /*!< in: transaction object associated
+ with the thread */
+
+/*********************************************************************//**
+This lets a thread enter InnoDB regardless of the number of threads inside
+InnoDB. This must be called when a thread ends a lock wait. */
+UNIV_INTERN
+void
+srv_conc_force_enter_innodb(
+/*========================*/
+ trx_t* trx); /*!< in: transaction object associated with
+ the thread */
+
+/*********************************************************************//**
+This must be called when a thread exits InnoDB in a lock wait or at the
+end of an SQL statement. */
+UNIV_INTERN
+void
+srv_conc_force_exit_innodb(
+/*=======================*/
+ trx_t* trx); /*!< in: transaction object associated with
+ the thread */
+
+/*********************************************************************//**
+Get the count of threads waiting inside InnoDB. */
+UNIV_INTERN
+ulint
+srv_conc_get_waiting_threads(void);
+/*==============================*/
+
+/*********************************************************************//**
+Get the count of threads active inside InnoDB. */
+UNIV_INTERN
+ulint
+srv_conc_get_active_threads(void);
+/*==============================*/
+
+#endif /* srv_conc_h */
diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h
new file mode 100644
index 00000000000..209894833a0
--- /dev/null
+++ b/storage/xtradb/include/srv0mon.h
@@ -0,0 +1,892 @@
+/***********************************************************************
+
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file include/srv0mon.h
+Server monitor counter related defines
+
+Created 12/15/2009 Jimmy Yang
+*******************************************************/
+
+#ifndef srv0mon_h
+#define srv0mon_h
+
+#include "univ.i"
+#ifndef UNIV_HOTBACKUP
+
+
+/** Possible status values for "mon_status" in "struct monitor_value" */
+enum monitor_running_status {
+ MONITOR_STARTED = 1, /*!< Monitor has been turned on */
+ MONITOR_STOPPED = 2 /*!< Monitor has been turned off */
+};
+
+typedef enum monitor_running_status monitor_running_t;
+
+/** Monitor counter value type */
+typedef ib_int64_t mon_type_t;
+
+/** Two monitor structures are defined in this file. One is
+"monitor_value_t" which contains dynamic counter values for each
+counter. The other is "monitor_info_t", which contains
+static information (counter name, desc etc.) for each counter.
+In addition, an enum datatype "monitor_id_t" is also defined,
+it identifies each monitor with an internally used symbol, whose
+integer value indexes into above two structure for its dynamic
+and static information.
+Developer who intend to add new counters would require to
+fill in counter information as described in "monitor_info_t" and
+create the internal counter ID in "monitor_id_t". */
+
+/** Structure containing the actual values of a monitor counter. */
+struct monitor_value_t {
+ ib_time_t mon_start_time; /*!< Start time of monitoring */
+ ib_time_t mon_stop_time; /*!< Stop time of monitoring */
+ ib_time_t mon_reset_time; /*!< Time counter resetted */
+ mon_type_t mon_value; /*!< Current counter Value */
+ mon_type_t mon_max_value; /*!< Current Max value */
+ mon_type_t mon_min_value; /*!< Current Min value */
+ mon_type_t mon_value_reset;/*!< value at last reset */
+ mon_type_t mon_max_value_start; /*!< Max value since start */
+ mon_type_t mon_min_value_start; /*!< Min value since start */
+ mon_type_t mon_start_value;/*!< Value at the start time */
+ mon_type_t mon_last_value; /*!< Last set of values */
+ monitor_running_t mon_status; /* whether monitor still running */
+};
+
+/** Follwoing defines are possible values for "monitor_type" field in
+"struct monitor_info" */
+enum monitor_type_t {
+ MONITOR_NONE = 0, /*!< No monitoring */
+ MONITOR_MODULE = 1, /*!< This is a monitor module type,
+ not a counter */
+ MONITOR_EXISTING = 2, /*!< The monitor carries information from
+ an existing system status variable */
+ MONITOR_NO_AVERAGE = 4, /*!< Set this status if we don't want to
+ calculate the average value for the counter */
+ MONITOR_DISPLAY_CURRENT = 8, /*!< Display current value of the
+ counter, rather than incremental value
+ over the period. Mostly for counters
+ displaying current resource usage */
+ MONITOR_GROUP_MODULE = 16, /*!< Monitor can be turned on/off
+ only as a module, but not individually */
+ MONITOR_DEFAULT_ON = 32,/*!< Monitor will be turned on by default at
+ server start up */
+ MONITOR_SET_OWNER = 64, /*!< Owner of "monitor set", a set of
+ monitor counters */
+ MONITOR_SET_MEMBER = 128,/*!< Being part of a "monitor set" */
+ MONITOR_HIDDEN = 256 /*!< Do not display this monitor in the
+ metrics table */
+};
+
+/** Counter minimum value is initialized to be max value of
+ mon_type_t (ib_int64_t) */
+#define MIN_RESERVED ((mon_type_t) (IB_UINT64_MAX >> 1))
+#define MAX_RESERVED (~MIN_RESERVED)
+
+/** This enumeration defines internal monitor identifier used internally
+to identify each particular counter. Its value indexes into two arrays,
+one is the "innodb_counter_value" array which records actual monitor
+counter values, the other is "innodb_counter_info" array which describes
+each counter's basic information (name, desc etc.). A couple of
+naming rules here:
+1) If the monitor defines a module, it starts with MONITOR_MODULE
+2) If the monitor uses exisitng counters from "status variable", its ID
+name shall start with MONITOR_OVLD
+
+Please refer to "innodb_counter_info" in srv/srv0mon.cc for detail
+information for each monitor counter */
+
+enum monitor_id_t {
+ /* This is to identify the default value set by the metrics
+ control global variables */
+ MONITOR_DEFAULT_START = 0,
+
+ /* Start of Metadata counter */
+ MONITOR_MODULE_METADATA,
+ MONITOR_TABLE_OPEN,
+ MONITOR_TABLE_CLOSE,
+ MONITOR_TABLE_REFERENCE,
+ MONITOR_OVLD_META_MEM_POOL,
+
+ /* Lock manager related counters */
+ MONITOR_MODULE_LOCK,
+ MONITOR_DEADLOCK,
+ MONITOR_TIMEOUT,
+ MONITOR_LOCKREC_WAIT,
+ MONITOR_TABLELOCK_WAIT,
+ MONITOR_NUM_RECLOCK_REQ,
+ MONITOR_RECLOCK_CREATED,
+ MONITOR_RECLOCK_REMOVED,
+ MONITOR_NUM_RECLOCK,
+ MONITOR_TABLELOCK_CREATED,
+ MONITOR_TABLELOCK_REMOVED,
+ MONITOR_NUM_TABLELOCK,
+ MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT,
+ MONITOR_OVLD_LOCK_WAIT_TIME,
+ MONITOR_OVLD_LOCK_MAX_WAIT_TIME,
+ MONITOR_OVLD_ROW_LOCK_WAIT,
+ MONITOR_OVLD_LOCK_AVG_WAIT_TIME,
+
+ /* Buffer and I/O realted counters. */
+ MONITOR_MODULE_BUFFER,
+ MONITOR_OVLD_BUFFER_POOL_SIZE,
+ MONITOR_OVLD_BUF_POOL_READS,
+ MONITOR_OVLD_BUF_POOL_READ_REQUESTS,
+ MONITOR_OVLD_BUF_POOL_WRITE_REQUEST,
+ MONITOR_OVLD_BUF_POOL_WAIT_FREE,
+ MONITOR_OVLD_BUF_POOL_READ_AHEAD,
+ MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED,
+ MONITOR_OVLD_BUF_POOL_PAGE_TOTAL,
+ MONITOR_OVLD_BUF_POOL_PAGE_MISC,
+ MONITOR_OVLD_BUF_POOL_PAGES_DATA,
+ MONITOR_OVLD_BUF_POOL_BYTES_DATA,
+ MONITOR_OVLD_BUF_POOL_PAGES_DIRTY,
+ MONITOR_OVLD_BUF_POOL_BYTES_DIRTY,
+ MONITOR_OVLD_BUF_POOL_PAGES_FREE,
+ MONITOR_OVLD_PAGE_CREATED,
+ MONITOR_OVLD_PAGES_WRITTEN,
+ MONITOR_OVLD_PAGES_READ,
+ MONITOR_OVLD_BYTE_READ,
+ MONITOR_OVLD_BYTE_WRITTEN,
+ MONITOR_FLUSH_BATCH_SCANNED,
+ MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
+ MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
+ MONITOR_FLUSH_HP_RESCAN,
+ MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_COUNT,
+ MONITOR_FLUSH_BATCH_PAGES,
+ MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+ MONITOR_FLUSH_NEIGHBOR_COUNT,
+ MONITOR_FLUSH_NEIGHBOR_PAGES,
+ MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
+ MONITOR_FLUSH_AVG_PAGE_RATE,
+ MONITOR_FLUSH_LSN_AVG_RATE,
+ MONITOR_FLUSH_PCT_FOR_DIRTY,
+ MONITOR_FLUSH_PCT_FOR_LSN,
+ MONITOR_FLUSH_SYNC_WAITS,
+ MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_COUNT,
+ MONITOR_FLUSH_ADAPTIVE_PAGES,
+ MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_COUNT,
+ MONITOR_FLUSH_SYNC_PAGES,
+ MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
+ MONITOR_FLUSH_BACKGROUND_COUNT,
+ MONITOR_FLUSH_BACKGROUND_PAGES,
+ MONITOR_LRU_BATCH_SCANNED,
+ MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_BATCH_SCANNED_PER_CALL,
+ MONITOR_LRU_BATCH_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_COUNT,
+ MONITOR_LRU_BATCH_PAGES,
+ MONITOR_LRU_SINGLE_FLUSH_SCANNED,
+ MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
+ MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL,
+ MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT,
+ MONITOR_LRU_GET_FREE_SEARCH,
+ MONITOR_LRU_SEARCH_SCANNED,
+ MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_SEARCH_SCANNED_PER_CALL,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
+
+ /* Buffer Page I/O specific counters. */
+ MONITOR_MODULE_BUF_PAGE,
+ MONITOR_INDEX_LEAF_PAGE_READ,
+ MONITOR_INDEX_NON_LEAF_PAGE_READ,
+ MONITOR_INDEX_IBUF_LEAF_PAGE_READ,
+ MONITOR_INDEX_IBUF_NON_LEAF_PAGE_READ,
+ MONITOR_UNDO_LOG_PAGE_READ,
+ MONITOR_INODE_PAGE_READ,
+ MONITOR_IBUF_FREELIST_PAGE_READ,
+ MONITOR_IBUF_BITMAP_PAGE_READ,
+ MONITOR_SYSTEM_PAGE_READ,
+ MONITOR_TRX_SYSTEM_PAGE_READ,
+ MONITOR_FSP_HDR_PAGE_READ,
+ MONITOR_XDES_PAGE_READ,
+ MONITOR_BLOB_PAGE_READ,
+ MONITOR_ZBLOB_PAGE_READ,
+ MONITOR_ZBLOB2_PAGE_READ,
+ MONITOR_OTHER_PAGE_READ,
+ MONITOR_INDEX_LEAF_PAGE_WRITTEN,
+ MONITOR_INDEX_NON_LEAF_PAGE_WRITTEN,
+ MONITOR_INDEX_IBUF_LEAF_PAGE_WRITTEN,
+ MONITOR_INDEX_IBUF_NON_LEAF_PAGE_WRITTEN,
+ MONITOR_UNDO_LOG_PAGE_WRITTEN,
+ MONITOR_INODE_PAGE_WRITTEN,
+ MONITOR_IBUF_FREELIST_PAGE_WRITTEN,
+ MONITOR_IBUF_BITMAP_PAGE_WRITTEN,
+ MONITOR_SYSTEM_PAGE_WRITTEN,
+ MONITOR_TRX_SYSTEM_PAGE_WRITTEN,
+ MONITOR_FSP_HDR_PAGE_WRITTEN,
+ MONITOR_XDES_PAGE_WRITTEN,
+ MONITOR_BLOB_PAGE_WRITTEN,
+ MONITOR_ZBLOB_PAGE_WRITTEN,
+ MONITOR_ZBLOB2_PAGE_WRITTEN,
+ MONITOR_OTHER_PAGE_WRITTEN,
+
+ /* OS level counters (I/O) */
+ MONITOR_MODULE_OS,
+ MONITOR_OVLD_OS_FILE_READ,
+ MONITOR_OVLD_OS_FILE_WRITE,
+ MONITOR_OVLD_OS_FSYNC,
+ MONITOR_OS_PENDING_READS,
+ MONITOR_OS_PENDING_WRITES,
+ MONITOR_OVLD_OS_LOG_WRITTEN,
+ MONITOR_OVLD_OS_LOG_FSYNC,
+ MONITOR_OVLD_OS_LOG_PENDING_FSYNC,
+ MONITOR_OVLD_OS_LOG_PENDING_WRITES,
+
+ /* Transaction related counters */
+ MONITOR_MODULE_TRX,
+ MONITOR_TRX_RW_COMMIT,
+ MONITOR_TRX_RO_COMMIT,
+ MONITOR_TRX_NL_RO_COMMIT,
+ MONITOR_TRX_COMMIT_UNDO,
+ MONITOR_TRX_ROLLBACK,
+ MONITOR_TRX_ROLLBACK_SAVEPOINT,
+ MONITOR_TRX_ROLLBACK_ACTIVE,
+ MONITOR_TRX_ACTIVE,
+ MONITOR_RSEG_HISTORY_LEN,
+ MONITOR_NUM_UNDO_SLOT_USED,
+ MONITOR_NUM_UNDO_SLOT_CACHED,
+ MONITOR_RSEG_CUR_SIZE,
+
+ /* Purge related counters */
+ MONITOR_MODULE_PURGE,
+ MONITOR_N_DEL_ROW_PURGE,
+ MONITOR_N_UPD_EXIST_EXTERN,
+ MONITOR_PURGE_INVOKED,
+ MONITOR_PURGE_N_PAGE_HANDLED,
+ MONITOR_DML_PURGE_DELAY,
+ MONITOR_PURGE_STOP_COUNT,
+ MONITOR_PURGE_RESUME_COUNT,
+
+ /* Recovery related counters */
+ MONITOR_MODULE_RECOVERY,
+ MONITOR_NUM_CHECKPOINT,
+ MONITOR_OVLD_LSN_FLUSHDISK,
+ MONITOR_OVLD_LSN_CHECKPOINT,
+ MONITOR_OVLD_LSN_CURRENT,
+ MONITOR_LSN_CHECKPOINT_AGE,
+ MONITOR_OVLD_BUF_OLDEST_LSN,
+ MONITOR_OVLD_MAX_AGE_ASYNC,
+ MONITOR_OVLD_MAX_AGE_SYNC,
+ MONITOR_PENDING_LOG_WRITE,
+ MONITOR_PENDING_CHECKPOINT_WRITE,
+ MONITOR_LOG_IO,
+ MONITOR_OVLD_LOG_WAITS,
+ MONITOR_OVLD_LOG_WRITE_REQUEST,
+ MONITOR_OVLD_LOG_WRITES,
+
+ /* Page Manager related counters */
+ MONITOR_MODULE_PAGE,
+ MONITOR_PAGE_COMPRESS,
+ MONITOR_PAGE_DECOMPRESS,
+ MONITOR_PAD_INCREMENTS,
+ MONITOR_PAD_DECREMENTS,
+
+ /* Index related counters */
+ MONITOR_MODULE_INDEX,
+ MONITOR_INDEX_SPLIT,
+ MONITOR_INDEX_MERGE,
+
+ /* Adaptive Hash Index related counters */
+ MONITOR_MODULE_ADAPTIVE_HASH,
+ MONITOR_OVLD_ADAPTIVE_HASH_SEARCH,
+ MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE,
+ MONITOR_ADAPTIVE_HASH_PAGE_ADDED,
+ MONITOR_ADAPTIVE_HASH_PAGE_REMOVED,
+ MONITOR_ADAPTIVE_HASH_ROW_ADDED,
+ MONITOR_ADAPTIVE_HASH_ROW_REMOVED,
+ MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND,
+ MONITOR_ADAPTIVE_HASH_ROW_UPDATED,
+
+ /* Tablespace related counters */
+ MONITOR_MODULE_FIL_SYSTEM,
+ MONITOR_OVLD_N_FILE_OPENED,
+
+ /* InnoDB Change Buffer related counters */
+ MONITOR_MODULE_IBUF_SYSTEM,
+ MONITOR_OVLD_IBUF_MERGE_INSERT,
+ MONITOR_OVLD_IBUF_MERGE_DELETE,
+ MONITOR_OVLD_IBUF_MERGE_PURGE,
+ MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT,
+ MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE,
+ MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE,
+ MONITOR_OVLD_IBUF_MERGES,
+ MONITOR_OVLD_IBUF_SIZE,
+
+ /* Counters for server operations */
+ MONITOR_MODULE_SERVER,
+ MONITOR_MASTER_THREAD_SLEEP,
+ MONITOR_OVLD_SERVER_ACTIVITY,
+ MONITOR_MASTER_ACTIVE_LOOPS,
+ MONITOR_MASTER_IDLE_LOOPS,
+ MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
+ MONITOR_SRV_IBUF_MERGE_MICROSECOND,
+ MONITOR_SRV_LOG_FLUSH_MICROSECOND,
+ MONITOR_SRV_MEM_VALIDATE_MICROSECOND,
+ MONITOR_SRV_PURGE_MICROSECOND,
+ MONITOR_SRV_DICT_LRU_MICROSECOND,
+ MONITOR_SRV_CHECKPOINT_MICROSECOND,
+ MONITOR_OVLD_SRV_DBLWR_WRITES,
+ MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN,
+ MONITOR_OVLD_SRV_PAGE_SIZE,
+ MONITOR_OVLD_RWLOCK_S_SPIN_WAITS,
+ MONITOR_OVLD_RWLOCK_X_SPIN_WAITS,
+ MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS,
+ MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS,
+ MONITOR_OVLD_RWLOCK_S_OS_WAITS,
+ MONITOR_OVLD_RWLOCK_X_OS_WAITS,
+
+ /* Data DML related counters */
+ MONITOR_MODULE_DML_STATS,
+ MONITOR_OLVD_ROW_READ,
+ MONITOR_OLVD_ROW_INSERTED,
+ MONITOR_OLVD_ROW_DELETED,
+ MONITOR_OLVD_ROW_UPDTATED,
+
+ /* Data DDL related counters */
+ MONITOR_MODULE_DDL_STATS,
+ MONITOR_BACKGROUND_DROP_INDEX,
+ MONITOR_BACKGROUND_DROP_TABLE,
+ MONITOR_ONLINE_CREATE_INDEX,
+ MONITOR_PENDING_ALTER_TABLE,
+
+ MONITOR_MODULE_ICP,
+ MONITOR_ICP_ATTEMPTS,
+ MONITOR_ICP_NO_MATCH,
+ MONITOR_ICP_OUT_OF_RANGE,
+ MONITOR_ICP_MATCH,
+
+ /* This is used only for control system to turn
+ on/off and reset all monitor counters */
+ MONITOR_ALL_COUNTER,
+
+ /* This must be the last member */
+ NUM_MONITOR
+};
+
+/** This informs the monitor control system to turn
+on/off and reset monitor counters through wild card match */
+#define MONITOR_WILDCARD_MATCH (NUM_MONITOR + 1)
+
+/** Cannot find monitor counter with a specified name */
+#define MONITOR_NO_MATCH (NUM_MONITOR + 2)
+
+/** struct monitor_info describes the basic/static information
+about each monitor counter. */
+struct monitor_info_t {
+ const char* monitor_name; /*!< Monitor name */
+ const char* monitor_module; /*!< Sub Module the monitor
+ belongs to */
+ const char* monitor_desc; /*!< Brief desc of monitor counter */
+ monitor_type_t monitor_type; /*!< Type of Monitor Info */
+ monitor_id_t monitor_related_id;/*!< Monitor ID of counter that
+ related to this monitor. This is
+ set when the monitor belongs to
+ a "monitor set" */
+ monitor_id_t monitor_id; /*!< Monitor ID as defined in enum
+ monitor_id_t */
+};
+
+/** Following are the "set_option" values allowed for
+srv_mon_process_existing_counter() and srv_mon_process_existing_counter()
+functions. To turn on/off/reset the monitor counters. */
+enum mon_option_t {
+ MONITOR_TURN_ON = 1, /*!< Turn on the counter */
+ MONITOR_TURN_OFF, /*!< Turn off the counter */
+ MONITOR_RESET_VALUE, /*!< Reset current values */
+ MONITOR_RESET_ALL_VALUE, /*!< Reset all values */
+ MONITOR_GET_VALUE /*!< Option for
+ srv_mon_process_existing_counter()
+ function */
+};
+
+/** Number of bit in a ulint datatype */
+#define NUM_BITS_ULINT (sizeof(ulint) * CHAR_BIT)
+
+/** This "monitor_set_tbl" is a bitmap records whether a particular monitor
+counter has been turned on or off */
+extern ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) /
+ NUM_BITS_ULINT];
+
+/** Macros to turn on/off the control bit in monitor_set_tbl for a monitor
+counter option. */
+#define MONITOR_ON(monitor) \
+ (monitor_set_tbl[monitor / NUM_BITS_ULINT] |= \
+ ((ulint)1 << (monitor % NUM_BITS_ULINT)))
+
+#define MONITOR_OFF(monitor) \
+ (monitor_set_tbl[monitor / NUM_BITS_ULINT] &= \
+ ~((ulint)1 << (monitor % NUM_BITS_ULINT)))
+
+/** Check whether the requested monitor is turned on/off */
+#define MONITOR_IS_ON(monitor) \
+ (monitor_set_tbl[monitor / NUM_BITS_ULINT] & \
+ ((ulint)1 << (monitor % NUM_BITS_ULINT)))
+
+/** The actual monitor counter array that records each monintor counter
+value */
+extern monitor_value_t innodb_counter_value[NUM_MONITOR];
+
+/** Following are macro defines for basic montior counter manipulations.
+Please note we do not provide any synchronization for these monitor
+operations due to performance consideration. Most counters can
+be placed under existing mutex protections in respective code
+module. */
+
+/** Macros to access various fields of a monitor counters */
+#define MONITOR_FIELD(monitor, field) \
+ (innodb_counter_value[monitor].field)
+
+#define MONITOR_VALUE(monitor) \
+ MONITOR_FIELD(monitor, mon_value)
+
+#define MONITOR_MAX_VALUE(monitor) \
+ MONITOR_FIELD(monitor, mon_max_value)
+
+#define MONITOR_MIN_VALUE(monitor) \
+ MONITOR_FIELD(monitor, mon_min_value)
+
+#define MONITOR_VALUE_RESET(monitor) \
+ MONITOR_FIELD(monitor, mon_value_reset)
+
+#define MONITOR_MAX_VALUE_START(monitor) \
+ MONITOR_FIELD(monitor, mon_max_value_start)
+
+#define MONITOR_MIN_VALUE_START(monitor) \
+ MONITOR_FIELD(monitor, mon_min_value_start)
+
+#define MONITOR_LAST_VALUE(monitor) \
+ MONITOR_FIELD(monitor, mon_last_value)
+
+#define MONITOR_START_VALUE(monitor) \
+ MONITOR_FIELD(monitor, mon_start_value)
+
+#define MONITOR_VALUE_SINCE_START(monitor) \
+ (MONITOR_VALUE(monitor) + MONITOR_VALUE_RESET(monitor))
+
+#define MONITOR_STATUS(monitor) \
+ MONITOR_FIELD(monitor, mon_status)
+
+#define MONITOR_SET_START(monitor) \
+ do { \
+ MONITOR_STATUS(monitor) = MONITOR_STARTED; \
+ MONITOR_FIELD((monitor), mon_start_time) = time(NULL); \
+ } while (0)
+
+#define MONITOR_SET_OFF(monitor) \
+ do { \
+ MONITOR_STATUS(monitor) = MONITOR_STOPPED; \
+ MONITOR_FIELD((monitor), mon_stop_time) = time(NULL); \
+ } while (0)
+
+#define MONITOR_INIT_ZERO_VALUE 0
+
+/** Max and min values are initialized when we first turn on the monitor
+counter, and set the MONITOR_STATUS. */
+#define MONITOR_MAX_MIN_NOT_INIT(monitor) \
+ (MONITOR_STATUS(monitor) == MONITOR_INIT_ZERO_VALUE \
+ && MONITOR_MIN_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE \
+ && MONITOR_MAX_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE)
+
+#define MONITOR_INIT(monitor) \
+ if (MONITOR_MAX_MIN_NOT_INIT(monitor)) { \
+ MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; \
+ MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED; \
+ MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; \
+ MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED; \
+ }
+
+/** Macros to increment/decrement the counters. The normal
+monitor counter operation expects appropriate synchronization
+already exists. No additional mutex is necessary when operating
+on the counters */
+#define MONITOR_INC(monitor) \
+ if (MONITOR_IS_ON(monitor)) { \
+ MONITOR_VALUE(monitor)++; \
+ if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ }
+
+/** Increment a monitor counter under mutex protection.
+Use MONITOR_INC if appropriate mutex protection already exists.
+@param monitor monitor to be incremented by 1
+@param mutex mutex to acquire and relese */
+# define MONITOR_MUTEX_INC(mutex, monitor) \
+ ut_ad(!mutex_own(mutex)); \
+ if (MONITOR_IS_ON(monitor)) { \
+ mutex_enter(mutex); \
+ if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \
+ } \
+ mutex_exit(mutex); \
+ }
+/** Decrement a monitor counter under mutex protection.
+Use MONITOR_DEC if appropriate mutex protection already exists.
+@param monitor monitor to be decremented by 1
+@param mutex mutex to acquire and relese */
+# define MONITOR_MUTEX_DEC(mutex, monitor) \
+ ut_ad(!mutex_own(mutex)); \
+ if (MONITOR_IS_ON(monitor)) { \
+ mutex_enter(mutex); \
+ if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
+ MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \
+ } \
+ mutex_exit(mutex); \
+ }
+
+#if defined HAVE_ATOMIC_BUILTINS_64
+/** Atomically increment a monitor counter.
+Use MONITOR_INC if appropriate mutex protection exists.
+@param monitor monitor to be incremented by 1 */
+# define MONITOR_ATOMIC_INC(monitor) \
+ if (MONITOR_IS_ON(monitor)) { \
+ ib_uint64_t value; \
+ value = os_atomic_increment_uint64( \
+ (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \
+ /* Note: This is not 100% accurate because of the \
+ inherent race, we ignore it due to performance. */ \
+ if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = value; \
+ } \
+ }
+
+/** Atomically decrement a monitor counter.
+Use MONITOR_DEC if appropriate mutex protection exists.
+@param monitor monitor to be decremented by 1 */
+# define MONITOR_ATOMIC_DEC(monitor) \
+ if (MONITOR_IS_ON(monitor)) { \
+ ib_uint64_t value; \
+ value = os_atomic_decrement_uint64( \
+ (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \
+ /* Note: This is not 100% accurate because of the \
+ inherent race, we ignore it due to performance. */ \
+ if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) { \
+ MONITOR_MIN_VALUE(monitor) = value; \
+ } \
+ }
+# define srv_mon_create() ((void) 0)
+# define srv_mon_free() ((void) 0)
+#else /* HAVE_ATOMIC_BUILTINS_64 */
+/** Mutex protecting atomic operations on platforms that lack
+built-in operations for atomic memory access */
+extern ib_mutex_t monitor_mutex;
+/****************************************************************//**
+Initialize the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_create(void);
+/*================*/
+/****************************************************************//**
+Close the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_free(void);
+/*==============*/
+
+/** Atomically increment a monitor counter.
+Use MONITOR_INC if appropriate mutex protection exists.
+@param monitor monitor to be incremented by 1 */
+# define MONITOR_ATOMIC_INC(monitor) MONITOR_MUTEX_INC(&monitor_mutex, monitor)
+/** Atomically decrement a monitor counter.
+Use MONITOR_DEC if appropriate mutex protection exists.
+@param monitor monitor to be decremented by 1 */
+# define MONITOR_ATOMIC_DEC(monitor) MONITOR_MUTEX_DEC(&monitor_mutex, monitor)
+#endif /* HAVE_ATOMIC_BUILTINS_64 */
+
+#define MONITOR_DEC(monitor) \
+ if (MONITOR_IS_ON(monitor)) { \
+ MONITOR_VALUE(monitor)--; \
+ if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
+ MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ }
+
+#ifdef UNIV_DEBUG_VALGRIND
+# define MONITOR_CHECK_DEFINED(value) do { \
+ mon_type_t m = value; \
+ UNIV_MEM_ASSERT_RW(&m, sizeof m); \
+} while (0)
+#else /* UNIV_DEBUG_VALGRIND */
+# define MONITOR_CHECK_DEFINED(value) (void) 0
+#endif /* UNIV_DEBUG_VALGRIND */
+
+#define MONITOR_INC_VALUE(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
+ if (MONITOR_IS_ON(monitor)) { \
+ MONITOR_VALUE(monitor) += (mon_type_t) (value); \
+ if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ }
+
+#define MONITOR_DEC_VALUE(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
+ if (MONITOR_IS_ON(monitor)) { \
+ ut_ad(MONITOR_VALUE(monitor) >= (mon_type_t) (value); \
+ MONITOR_VALUE(monitor) -= (mon_type_t) (value); \
+ if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
+ MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ }
+
+/* Increment/decrement counter without check the monitor on/off bit, which
+could already be checked as a module group */
+#define MONITOR_INC_NOCHECK(monitor) \
+ do { \
+ MONITOR_VALUE(monitor)++; \
+ if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ } while (0) \
+
+#define MONITOR_DEC_NOCHECK(monitor) \
+ do { \
+ MONITOR_VALUE(monitor)--; \
+ if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
+ MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ } while (0)
+
+/** Directly set a monitor counter's value */
+#define MONITOR_SET(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
+ if (MONITOR_IS_ON(monitor)) { \
+ MONITOR_VALUE(monitor) = (mon_type_t) (value); \
+ if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
+ MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ }
+
+/** Add time difference between now and input "value" (in seconds) to the
+monitor counter
+@param monitor monitor to update for the time difference
+@param value the start time value */
+#define MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
+ if (MONITOR_IS_ON(monitor)) { \
+ ullint old_time = (value); \
+ value = ut_time_us(NULL); \
+ MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\
+ }
+
+/** This macro updates 3 counters in one call. However, it only checks the
+main/first monitor counter 'monitor', to see it is on or off to decide
+whether to do the update.
+@param monitor the main monitor counter to update. It accounts for
+ the accumulative value for the counter.
+@param monitor_n_calls counter that counts number of times this macro is
+ called
+@param monitor_per_call counter that records the current and max value of
+ each incremental value
+@param value incremental value to record this time */
+#define MONITOR_INC_VALUE_CUMULATIVE( \
+ monitor, monitor_n_calls, monitor_per_call, value) \
+ MONITOR_CHECK_DEFINED(value); \
+ if (MONITOR_IS_ON(monitor)) { \
+ MONITOR_VALUE(monitor_n_calls)++; \
+ MONITOR_VALUE(monitor_per_call) = (mon_type_t) (value); \
+ if (MONITOR_VALUE(monitor_per_call) \
+ > MONITOR_MAX_VALUE(monitor_per_call)) { \
+ MONITOR_MAX_VALUE(monitor_per_call) = \
+ (mon_type_t) (value); \
+ } \
+ MONITOR_VALUE(monitor) += (mon_type_t) (value); \
+ if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ }
+
+/** Directly set a monitor counter's value, and if the value
+is monotonically increasing, only max value needs to be updated */
+#define MONITOR_SET_UPD_MAX_ONLY(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
+ if (MONITOR_IS_ON(monitor)) { \
+ MONITOR_VALUE(monitor) = (mon_type_t) (value); \
+ if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+ } \
+ }
+
+/** Some values such as log sequence number are montomically increasing
+number, do not need to record max/min values */
+#define MONITOR_SET_SIMPLE(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
+ if (MONITOR_IS_ON(monitor)) { \
+ MONITOR_VALUE(monitor) = (mon_type_t) (value); \
+ }
+
+/** Reset the monitor value and max/min value to zero. The reset
+operation would only be conducted when the counter is turned off */
+#define MONITOR_RESET_ALL(monitor) \
+ do { \
+ MONITOR_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE; \
+ MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; \
+ MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; \
+ MONITOR_VALUE_RESET(monitor) = MONITOR_INIT_ZERO_VALUE; \
+ MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED; \
+ MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED; \
+ MONITOR_LAST_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE; \
+ MONITOR_FIELD(monitor, mon_start_time) = \
+ MONITOR_INIT_ZERO_VALUE; \
+ MONITOR_FIELD(monitor, mon_stop_time) = \
+ MONITOR_INIT_ZERO_VALUE; \
+ MONITOR_FIELD(monitor, mon_reset_time) = \
+ MONITOR_INIT_ZERO_VALUE; \
+ } while (0)
+
+/** Following four macros defines necessary operations to fetch and
+consolidate information from existing system status variables. */
+
+/** Save the passed-in value to mon_start_value field of monitor
+counters */
+#define MONITOR_SAVE_START(monitor, value) do { \
+ MONITOR_CHECK_DEFINED(value); \
+ (MONITOR_START_VALUE(monitor) = \
+ (mon_type_t) (value) - MONITOR_VALUE_RESET(monitor)); \
+ } while (0)
+
+/** Save the passed-in value to mon_last_value field of monitor
+counters */
+#define MONITOR_SAVE_LAST(monitor) \
+ do { \
+ MONITOR_LAST_VALUE(monitor) = MONITOR_VALUE(monitor); \
+ MONITOR_START_VALUE(monitor) += MONITOR_VALUE(monitor); \
+ } while (0)
+
+/** Set monitor value to the difference of value and mon_start_value
+compensated by mon_last_value if accumulated value is required. */
+#define MONITOR_SET_DIFF(monitor, value) \
+ MONITOR_SET_UPD_MAX_ONLY(monitor, ((value) \
+ - MONITOR_VALUE_RESET(monitor) \
+ - MONITOR_FIELD(monitor, mon_start_value) \
+ + MONITOR_FIELD(monitor, mon_last_value)))
+
+/****************************************************************//**
+Get monitor's monitor_info_t by its monitor id (index into the
+innodb_counter_info array
+@return Point to corresponding monitor_info_t, or NULL if no such
+monitor */
+UNIV_INTERN
+monitor_info_t*
+srv_mon_get_info(
+/*=============*/
+ monitor_id_t monitor_id); /*!< id index into the
+ innodb_counter_info array */
+/****************************************************************//**
+Get monitor's name by its monitor id (index into the
+innodb_counter_info array
+@return corresponding monitor name, or NULL if no such
+monitor */
+UNIV_INTERN
+const char*
+srv_mon_get_name(
+/*=============*/
+ monitor_id_t monitor_id); /*!< id index into the
+ innodb_counter_info array */
+
+/****************************************************************//**
+Turn on/off/reset monitor counters in a module. If module_value
+is NUM_MONITOR then turn on all monitor counters.
+@return 0 if successful, or the first monitor that cannot be
+turned on because it is already turned on. */
+UNIV_INTERN
+void
+srv_mon_set_module_control(
+/*=======================*/
+ monitor_id_t module_id, /*!< in: Module ID as in
+ monitor_counter_id. If it is
+ set to NUM_MONITOR, this means
+ we shall turn on all the counters */
+ mon_option_t set_option); /*!< in: Turn on/off reset the
+ counter */
+/****************************************************************//**
+This function consolidates some existing server counters used
+by "system status variables". These existing system variables do not have
+mechanism to start/stop and reset the counters, so we simulate these
+controls by remembering the corresponding counter values when the
+corresponding monitors are turned on/off/reset, and do appropriate
+mathematics to deduct the actual value. */
+UNIV_INTERN
+void
+srv_mon_process_existing_counter(
+/*=============================*/
+ monitor_id_t monitor_id, /*!< in: the monitor's ID as in
+ monitor_counter_id */
+ mon_option_t set_option); /*!< in: Turn on/off reset the
+ counter */
+/*************************************************************//**
+This function is used to calculate the maximum counter value
+since the start of monitor counter
+@return max counter value since start. */
+UNIV_INLINE
+mon_type_t
+srv_mon_calc_max_since_start(
+/*=========================*/
+ monitor_id_t monitor); /*!< in: monitor id */
+/*************************************************************//**
+This function is used to calculate the minimum counter value
+since the start of monitor counter
+@return min counter value since start. */
+UNIV_INLINE
+mon_type_t
+srv_mon_calc_min_since_start(
+/*=========================*/
+ monitor_id_t monitor); /*!< in: monitor id*/
+/*************************************************************//**
+Reset a monitor, create a new base line with the current monitor
+value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
+UNIV_INTERN
+void
+srv_mon_reset(
+/*==========*/
+ monitor_id_t monitor); /*!< in: monitor id*/
+/*************************************************************//**
+This function resets all values of a monitor counter */
+UNIV_INLINE
+void
+srv_mon_reset_all(
+/*==============*/
+ monitor_id_t monitor); /*!< in: monitor id*/
+/*************************************************************//**
+Turn on monitor counters that are marked as default ON. */
+UNIV_INTERN
+void
+srv_mon_default_on(void);
+/*====================*/
+
+#ifndef UNIV_NONINL
+#include "srv0mon.ic"
+#endif
+#else /* !UNIV_HOTBACKUP */
+# define MONITOR_INC(x) ((void) 0)
+# define MONITOR_DEC(x) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/xtradb/include/srv0mon.ic b/storage/xtradb/include/srv0mon.ic
new file mode 100644
index 00000000000..17411d77a8b
--- /dev/null
+++ b/storage/xtradb/include/srv0mon.ic
@@ -0,0 +1,113 @@
+/*****************************************************************************
+
+Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/srv0mon.ic
+Server monitoring system
+
+Created 1/20/2010 Jimmy Yang
+************************************************************************/
+
+/*************************************************************//**
+This function is used to calculate the maximum counter value
+since the start of monitor counter
+@return max counter value since start. */
+UNIV_INLINE
+mon_type_t
+srv_mon_calc_max_since_start(
+/*=========================*/
+ monitor_id_t monitor) /*!< in: monitor id */
+{
+ if (MONITOR_MAX_VALUE_START(monitor) == MAX_RESERVED) {
+
+ /* MONITOR_MAX_VALUE_START has not yet been
+ initialized, the max value since start is the
+ max count in MONITOR_MAX_VALUE */
+ MONITOR_MAX_VALUE_START(monitor) =
+ MONITOR_MAX_VALUE(monitor);
+
+ } else if (MONITOR_MAX_VALUE(monitor) != MAX_RESERVED
+ && (MONITOR_MAX_VALUE(monitor)
+ + MONITOR_VALUE_RESET(monitor)
+ > MONITOR_MAX_VALUE_START(monitor))) {
+
+ /* If the max value since reset (as specified
+ in MONITOR_MAX_VALUE) plus the reset value is
+ larger than MONITOR_MAX_VALUE_START, reset
+ MONITOR_MAX_VALUE_START to this new max value */
+ MONITOR_MAX_VALUE_START(monitor) =
+ MONITOR_MAX_VALUE(monitor)
+ + MONITOR_VALUE_RESET(monitor);
+ }
+
+ return(MONITOR_MAX_VALUE_START(monitor));
+}
+
+/*************************************************************//**
+This function is used to calculate the minimum counter value
+since the start of monitor counter
+@return min counter value since start. */
+UNIV_INLINE
+mon_type_t
+srv_mon_calc_min_since_start(
+/*=========================*/
+ monitor_id_t monitor) /*!< in: monitor id */
+{
+ if (MONITOR_MIN_VALUE_START(monitor) == MIN_RESERVED) {
+
+ /* MONITOR_MIN_VALUE_START has not yet been
+ initialized, the min value since start is the
+ min count in MONITOR_MIN_VALUE */
+ MONITOR_MIN_VALUE_START(monitor) =
+ MONITOR_MIN_VALUE(monitor);
+
+ } else if (MONITOR_MIN_VALUE(monitor) != MIN_RESERVED
+ && (MONITOR_MIN_VALUE(monitor)
+ + MONITOR_VALUE_RESET(monitor)
+ < MONITOR_MIN_VALUE_START(monitor))) {
+
+ /* If the min value since reset (as specified
+ in MONITOR_MIN_VALUE) plus the reset value is
+ less than MONITOR_MIN_VALUE_START, reset
+ MONITOR_MIN_VALUE_START to this new min value */
+ MONITOR_MIN_VALUE_START(monitor) =
+ MONITOR_MIN_VALUE(monitor)
+ + MONITOR_VALUE_RESET(monitor);
+ }
+
+ return(MONITOR_MIN_VALUE_START(monitor));
+}
+
+/*************************************************************//**
+This function resets all values of a monitor counter */
+UNIV_INLINE
+void
+srv_mon_reset_all(
+/*==============*/
+ monitor_id_t monitor) /*!< in: monitor id */
+{
+ /* Do not reset all counter values if monitor is still on. */
+ if (MONITOR_IS_ON(monitor)) {
+ fprintf(stderr, "InnoDB: Cannot reset all values for "
+ "monitor counter %s while it is on. Please "
+ "turn it off and retry. \n",
+ srv_mon_get_name(monitor));
+ } else {
+ MONITOR_RESET_ALL(monitor);
+ }
+}
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index 330e3c412ae..d278782daa8 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, 2009, Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -26,8 +26,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -43,31 +43,119 @@ Created 10/10/1995 Heikki Tuuri
#include "univ.i"
#ifndef UNIV_HOTBACKUP
+#include "log0log.h"
#include "sync0sync.h"
#include "os0sync.h"
#include "que0types.h"
#include "trx0types.h"
+#include "srv0conc.h"
+#include "buf0checksum.h"
+#include "ut0counter.h"
+
+/* Global counters used inside InnoDB. */
+struct srv_stats_t {
+ typedef ib_counter_t<lsn_t, 1, single_indexer_t> lsn_ctr_1_t;
+ typedef ib_counter_t<ulint, 1, single_indexer_t> ulint_ctr_1_t;
+ typedef ib_counter_t<lint, 1, single_indexer_t> lint_ctr_1_t;
+ typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
+ typedef ib_counter_t<ib_int64_t, 1, single_indexer_t> ib_int64_ctr_1_t;
+
+ /** Count the amount of data written in total (in bytes) */
+ ulint_ctr_1_t data_written;
+
+ /** Number of the log write requests done */
+ ulint_ctr_1_t log_write_requests;
+
+ /** Number of physical writes to the log performed */
+ ulint_ctr_1_t log_writes;
+
+ /** Amount of data written to the log files in bytes */
+ lsn_ctr_1_t os_log_written;
+
+ /** Number of writes being done to the log files */
+ lint_ctr_1_t os_log_pending_writes;
+
+ /** We increase this counter, when we don't have enough
+ space in the log buffer and have to flush it */
+ ulint_ctr_1_t log_waits;
+
+ /** Count the number of times the doublewrite buffer was flushed */
+ ulint_ctr_1_t dblwr_writes;
+
+ /** Store the number of pages that have been flushed to the
+ doublewrite buffer */
+ ulint_ctr_1_t dblwr_pages_written;
+
+ /** Store the number of write requests issued */
+ ulint_ctr_1_t buf_pool_write_requests;
+
+ /** Store the number of times when we had to wait for a free page
+ in the buffer pool. It happens when the buffer pool is full and we
+ need to make a flush, in order to be able to read or create a page. */
+ ulint_ctr_1_t buf_pool_wait_free;
+
+ /** Count the number of pages that were written from buffer
+ pool to the disk */
+ ulint_ctr_1_t buf_pool_flushed;
+
+ /** Number of buffer pool reads that led to the reading of
+ a disk page */
+ ulint_ctr_1_t buf_pool_reads;
+
+ /** Number of data read in total (in bytes) */
+ ulint_ctr_1_t data_read;
+
+ /** Wait time of database locks */
+ ib_int64_ctr_1_t n_lock_wait_time;
+
+ /** Number of database lock waits */
+ ulint_ctr_1_t n_lock_wait_count;
+
+ /** Number of threads currently waiting on database locks */
+ lint_ctr_1_t n_lock_wait_current_count;
+
+ /** Number of rows read. */
+ ulint_ctr_64_t n_rows_read;
+
+ /** Number of rows updated */
+ ulint_ctr_64_t n_rows_updated;
+
+ /** Number of rows deleted */
+ ulint_ctr_64_t n_rows_deleted;
+
+ /** Number of rows inserted */
+ ulint_ctr_64_t n_rows_inserted;
+
+ ulint_ctr_1_t lock_deadlock_count;
+
+ ulint_ctr_1_t n_lock_max_wait_time;
+};
extern const char* srv_main_thread_op_info;
/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
-extern const char srv_mysql50_table_name_prefix[9];
-
-/* When this event is set the lock timeout and InnoDB monitor
-thread starts running */
-extern os_event_t srv_lock_timeout_thread_event;
+extern const char srv_mysql50_table_name_prefix[10];
/* The monitor thread waits on this event. */
extern os_event_t srv_monitor_event;
-/* The lock timeout thread waits on this event. */
-extern os_event_t srv_timeout_event;
-
/* The error monitor thread waits on this event. */
extern os_event_t srv_error_event;
-/* This event is set at shutdown to wakeup threads from sleep */
-extern os_event_t srv_shutdown_event;
+/** The buffer pool dump/load thread waits on this event. */
+extern os_event_t srv_buf_dump_event;
+
+/** The buffer pool dump/load file name */
+#define SRV_BUF_DUMP_FILENAME_DEFAULT "ib_buffer_pool"
+extern char* srv_buf_dump_filename;
+
+/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
+and/or load it during startup. */
+extern char srv_buffer_pool_dump_at_shutdown;
+extern char srv_buffer_pool_load_at_startup;
+
+/* Whether to disable file system cache if it is defined */
+extern char srv_disable_sort_file_cache;
/* This event is set on checkpoint completion to wake the redo log parser
thread */
@@ -82,87 +170,112 @@ at a time */
#define SRV_AUTO_EXTEND_INCREMENT \
(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
+/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
+extern ib_mutex_t srv_monitor_file_mutex;
+
/* prototypes for new functions added to ha_innodb.cc */
ibool innobase_get_slow_log();
-/* Mutex for locking srv_monitor_file */
-extern mutex_t srv_monitor_file_mutex;
/* Temporary file for innodb monitor output */
extern FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
+/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode.
This mutex has a very high rank; threads reserving it should not
be holding any InnoDB latches. */
-extern mutex_t srv_dict_tmpfile_mutex;
+extern ib_mutex_t srv_dict_tmpfile_mutex;
/* Temporary file for output from the data dictionary */
extern FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
+/* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode.
This mutex has a very low rank; threads reserving it should not
acquire any further latches or sleep before releasing this one. */
-extern mutex_t srv_misc_tmpfile_mutex;
+extern ib_mutex_t srv_misc_tmpfile_mutex;
/* Temporary file for miscellanous diagnostic output */
extern FILE* srv_misc_tmpfile;
/* Server parameters which are read from the initfile */
extern char* srv_data_home;
+
#ifdef UNIV_LOG_ARCHIVE
extern char* srv_arch_dir;
#endif /* UNIV_LOG_ARCHIVE */
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+extern my_bool srv_read_only_mode;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
-#ifndef UNIV_HOTBACKUP
extern my_bool srv_file_per_table;
-#else
-extern ibool srv_file_per_table;
-#endif /* UNIV_HOTBACKUP */
+/** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */
+extern ulong srv_thread_sleep_delay;
+#if defined(HAVE_ATOMIC_BUILTINS)
+/** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
+extern ulong srv_adaptive_max_sleep_delay;
+#endif /* HAVE_ATOMIC_BUILTINS */
+
/** The file format to use on new *.ibd files. */
extern ulint srv_file_format;
/** Whether to check file format during startup. A value of
-DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
+UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
set it to the highest format we support. */
extern ulint srv_max_file_format_at_startup;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
+/** Sort buffer size in index creation */
+extern ulong srv_sort_buf_size;
+/** Maximum modification log file size for online index creation */
+extern unsigned long long srv_online_max_size;
+
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
-#endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
-#endif
+#endif /* __WIN__ */
+#endif /* !UNIV_HOTBACKUP */
+
+/** Server undo tablespaces directory, can be absolute path. */
+extern char* srv_undo_dir;
+
+/** Number of undo tablespaces to use. */
+extern ulong srv_undo_tablespaces;
+
+/** The number of UNDO tablespaces that are open and ready to use. */
+extern ulint srv_undo_tablespaces_open;
+
+/* The number of undo segments to use */
+extern ulong srv_undo_logs;
+
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
extern ulint* srv_data_file_is_raw_partition;
-extern char* srv_doublewrite_file;
-
-extern ibool srv_recovery_stats;
-
extern my_bool srv_track_changed_pages;
-extern ib_uint64_t srv_max_bitmap_file_size;
+extern ulonglong srv_max_bitmap_file_size;
extern
ulonglong srv_max_changed_pages;
extern ibool srv_auto_extend_last_data_file;
extern ulint srv_last_file_size_max;
-extern char** srv_log_group_home_dirs;
+extern char* srv_log_group_home_dir;
#ifndef UNIV_HOTBACKUP
extern ulong srv_auto_extend_increment;
extern ibool srv_created_new_raw;
-extern ulint srv_n_log_groups;
-extern ulint srv_n_log_files;
-extern ulint srv_log_file_size;
+/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
+#define SRV_N_LOG_FILES_MAX 100
+extern ulong srv_n_log_files;
+extern ib_uint64_t srv_log_file_size;
+extern ib_uint64_t srv_log_file_size_requested;
extern ulint srv_log_buffer_size;
-//extern ulong srv_flush_log_at_trx_commit;
+extern uint srv_flush_log_at_timeout;
extern char srv_use_global_flush_log_at_trx_commit;
extern char srv_adaptive_flushing;
@@ -185,12 +298,55 @@ extern ibool srv_use_sys_malloc;
extern ulint srv_buf_pool_size; /*!< requested size in bytes */
extern my_bool srv_buf_pool_populate; /*!< virtual page preallocation */
extern ulint srv_buf_pool_instances; /*!< requested number of buffer pool instances */
+extern ulong srv_n_page_hash_locks; /*!< number of locks to
+ protect buf_pool->page_hash */
+extern ulong srv_LRU_scan_depth; /*!< Scan depth for LRU
+ flush batch */
+extern ulong srv_flush_neighbors; /*!< whether or not to flush
+ neighbors of a block */
extern ulint srv_buf_pool_old_size; /*!< previously requested size */
extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
-extern ibool srv_thread_concurrency_timer_based;
+extern ulong srv_foreground_preflush;/*!< Query thread preflush algorithm */
+
+extern ulint srv_cleaner_max_lru_time;/*!< the maximum time limit for a
+ single LRU tail flush iteration by the
+ page cleaner thread */
+
+extern ulint srv_cleaner_max_flush_time;/*!< the maximum time limit for a
+ single flush list flush iteration by
+ the page cleaner thread */
+
+extern ulint srv_cleaner_flush_chunk_size;
+ /*!< page cleaner flush list flush
+ batches are further divided into this
+ chunk size */
+
+extern ulint srv_cleaner_lru_chunk_size;
+ /*!< page cleaner LRU list flush
+ batches are further divided into this
+ chunk size */
+
+extern ulint srv_cleaner_free_list_lwm;/*!< if free list length is lower
+ than this percentage of
+ srv_LRU_scan_depth, page cleaner LRU
+ flushes will issue flush batches to the
+ same instance in a row */
+
+extern my_bool srv_cleaner_eviction_factor;
+ /*!< if TRUE, page cleaner heuristics
+ use evicted instead of flushed page
+ counts for its heuristics */
+
+extern ulong srv_cleaner_lsn_age_factor;
+ /*!< page cleaner LSN age factor
+ formula option */
+
+extern ulong srv_empty_free_list_algorithm;
+ /*!< Empty free list for a query thread
+ handling algorithm option */
extern ulint srv_n_file_io_threads;
extern my_bool srv_random_read_ahead;
@@ -200,10 +356,16 @@ extern ulint srv_n_write_io_threads;
/* Number of IO operations per second the server can do */
extern ulong srv_io_capacity;
+
+/* We use this dummy default value at startup for max_io_capacity.
+The real value is set based on the value of io_capacity. */
+#define SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT (~0UL)
+#define SRV_MAX_IO_CAPACITY_LIMIT (~0UL)
+extern ulong srv_max_io_capacity;
/* Returns the number of IO operations that is X percent of the
capacity. PCT_IO(5) -> returns the number of IO operations that
is 5% of the max where max is srv_io_capacity. */
-#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0)))
+#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) (p) / 100.0)))
/* The "innodb_stats_method" setting, decides how InnoDB is going
to treat NULL value when collecting statistics. It is not defined
@@ -222,63 +384,57 @@ extern ulint srv_win_file_flush_method;
extern ulint srv_max_n_open_files;
-extern ulint srv_max_dirty_pages_pct;
-
-extern ulint srv_force_recovery;
-extern ulong srv_thread_concurrency;
+extern ulong srv_max_dirty_pages_pct;
+extern ulong srv_max_dirty_pages_pct_lwm;
-extern ulint srv_max_n_threads;
+extern ulong srv_adaptive_flushing_lwm;
+extern ulong srv_flushing_avg_loops;
-extern lint srv_conc_n_threads;
+extern ulong srv_force_recovery;
+#ifndef DBUG_OFF
+extern ulong srv_force_recovery_crash;
+#endif /* !DBUG_OFF */
-extern ulint srv_fast_shutdown; /* If this is 1, do not do a
- purge and index buffer merge.
- If this 2, do not even flush the
- buffer pool to data files at the
- shutdown: we effectively 'crash'
- InnoDB (but lose no committed
- transactions). */
+extern ulint srv_fast_shutdown; /*!< If this is 1, do not do a
+ purge and index buffer merge.
+ If this 2, do not even flush the
+ buffer pool to data files at the
+ shutdown: we effectively 'crash'
+ InnoDB (but lose no committed
+ transactions). */
extern ibool srv_innodb_status;
-extern unsigned long long srv_stats_sample_pages;
-extern ulint srv_stats_auto_update;
-extern ulint srv_stats_update_need_lock;
-extern ibool srv_use_sys_stats_table;
-#ifdef UNIV_DEBUG
-extern ulong srv_sys_stats_root_page;
-#endif
+extern unsigned long long srv_stats_transient_sample_pages;
+extern my_bool srv_stats_persistent;
+extern unsigned long long srv_stats_persistent_sample_pages;
+extern my_bool srv_stats_auto_recalc;
extern ibool srv_use_doublewrite_buf;
+extern ulong srv_doublewrite_batch_size;
extern ibool srv_use_atomic_writes;
#ifdef HAVE_POSIX_FALLOCATE
extern ibool srv_use_posix_fallocate;
#endif
+extern ulong srv_checksum_algorithm;
-extern ibool srv_use_checksums;
-extern ibool srv_fast_checksum;
+extern ulong srv_log_arch_expire_sec;
extern ulong srv_max_buf_pool_modified_pct;
extern ulong srv_max_purge_lag;
+extern ulong srv_max_purge_lag_delay;
extern ulong srv_replication_delay;
-extern long long srv_ibuf_max_size;
-extern ulong srv_ibuf_active_contract;
-extern ulong srv_ibuf_accel_rate;
-extern ulint srv_checkpoint_age_target;
-extern ulong srv_flush_neighbor_pages;
-extern ulint srv_deprecated_enable_unsafe_group_commit;
-extern ulong srv_read_ahead;
-extern ulong srv_adaptive_flushing_method;
+extern my_bool srv_use_stacktrace;
-extern ulong srv_expand_import;
extern ulong srv_pass_corrupt_table;
-extern my_bool srv_use_stacktrace;
+extern ulong srv_log_checksum_algorithm;
/* Helper macro to support srv_pass_corrupt_table checks. If 'cond' is FALSE,
execute 'code' if srv_pass_corrupt_table is non-zero, or trigger a fatal error
-otherwise. The break statement in 'code' will obviously not work as expected. */
+otherwise. The break statement in 'code' will obviously not work as
+expected. */
#define SRV_CORRUPT_TABLE_CHECK(cond,code) \
do { \
@@ -291,14 +447,8 @@ otherwise. The break statement in 'code' will obviously not work as expected. */
} \
} while(0)
-extern ulint srv_dict_size_limit;
/*-------------------------------------------*/
-extern ulint srv_n_rows_inserted;
-extern ulint srv_n_rows_updated;
-extern ulint srv_n_rows_deleted;
-extern ulint srv_n_rows_read;
-
extern ulint srv_read_views_memory;
extern ulint srv_descriptors_memory;
@@ -306,12 +456,21 @@ extern ibool srv_print_innodb_monitor;
extern ibool srv_print_innodb_lock_monitor;
extern ibool srv_print_innodb_tablespace_monitor;
extern ibool srv_print_verbose_log;
+#define DEPRECATED_MSG_INNODB_TABLE_MONITOR \
+ "Using innodb_table_monitor is deprecated and it may be removed " \
+ "in future releases. Please use the InnoDB INFORMATION_SCHEMA " \
+ "tables instead, see " REFMAN "innodb-i_s-tables.html"
extern ibool srv_print_innodb_table_monitor;
-extern ibool srv_lock_timeout_active;
extern ibool srv_monitor_active;
extern ibool srv_error_monitor_active;
+/* TRUE during the lifetime of the buffer pool dump/load thread */
+extern ibool srv_buf_dump_thread_active;
+
+/* TRUE during the lifetime of the stats thread */
+extern ibool srv_dict_stats_thread_active;
+
extern ulong srv_n_spin_wait_rounds;
extern ulong srv_n_free_tickets_to_enter;
extern ulong srv_thread_sleep_delay;
@@ -319,6 +478,7 @@ extern ulong srv_spin_wait_delay;
extern ibool srv_priority_boost;
extern ulint srv_truncated_status_writes;
+extern ulint srv_available_undo_logs;
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
@@ -337,113 +497,97 @@ extern ibool srv_print_latch_waits;
# define srv_print_latch_waits FALSE
#endif /* UNIV_DEBUG */
-extern ulint srv_activity_count;
-extern ulint srv_fatal_semaphore_wait_threshold;
-#define SRV_SEMAPHORE_WAIT_EXTENSION 7200
-extern ulint srv_dml_needed_delay;
-extern long long srv_kill_idle_transaction;
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+extern my_bool srv_ibuf_disable_background_merge;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
#ifdef UNIV_DEBUG
extern my_bool srv_purge_view_update_only_debug;
#endif /* UNIV_DEBUG */
-extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
- query threads, and lock table: we allocate
- it from dynamic memory to get it to the
- same DRAM page as other hotspot semaphores */
-#define kernel_mutex (*kernel_mutex_temp)
+extern ulint srv_fatal_semaphore_wait_threshold;
+#define SRV_SEMAPHORE_WAIT_EXTENSION 7200
+extern ulint srv_dml_needed_delay;
+extern long long srv_kill_idle_transaction;
+
+#ifndef HAVE_ATOMIC_BUILTINS
+/** Mutex protecting some server global variables. */
+extern ib_mutex_t server_mutex;
+#endif /* !HAVE_ATOMIC_BUILTINS */
#define SRV_MAX_N_IO_THREADS 130
+#define SRV_MAX_N_PURGE_THREADS 32
+
/* Array of English strings describing the current state of an
i/o handler thread */
extern const char* srv_io_thread_op_info[];
extern const char* srv_io_thread_function[];
-/* the number of the log write requests done */
-extern ulint srv_log_write_requests;
+/* The tid of the cleaner thread */
+extern os_tid_t srv_cleaner_tid;
-/* the number of physical writes to the log performed */
-extern ulint srv_log_writes;
+/* The tids of the purge threads */
+extern os_tid_t srv_purge_tids[];
-/* amount of data written to the log files in bytes */
-extern ulint srv_os_log_written;
+/* The tids of the I/O threads */
+extern os_tid_t srv_io_tids[];
-/* amount of writes being done to the log files */
-extern ulint srv_os_log_pending_writes;
+/* The tid of the master thread */
+extern os_tid_t srv_master_tid;
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-extern ulint srv_log_waits;
+/* The relative scheduling priority of the cleaner thread */
+extern ulint srv_sched_priority_cleaner;
-/* the number of purge threads to use from the worker pool (currently 0 or 1) */
-extern ulong srv_n_purge_threads;
+/* The relative scheduling priority of the purge threads */
+extern ulint srv_sched_priority_purge;
-/* the number of pages to purge in one batch */
-extern ulong srv_purge_batch_size;
+/* The relative scheduling priority of the I/O threads */
+extern ulint srv_sched_priority_io;
-/* the number of rollback segments to use */
-extern ulong srv_rollback_segments;
+/* The relative scheduling priority of the master thread */
+extern ulint srv_sched_priority_master;
-/* variable that counts amount of data read in total (in bytes) */
-extern ulint srv_data_read;
+/* The relative priority of the purge coordinator and worker threads. */
+extern my_bool srv_purge_thread_priority;
-/* here we count the amount of data written in total (in bytes) */
-extern ulint srv_data_written;
+/* The relative priority of the I/O threads. */
+extern my_bool srv_io_thread_priority;
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-extern ulint srv_dblwr_writes;
+/* The relative priority of the cleaner thread. */
+extern my_bool srv_cleaner_thread_priority;
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-extern ulint srv_dblwr_pages_written;
+/* The relative priority of the master thread. */
+extern my_bool srv_master_thread_priority;
-/* in this variable we store the number of write requests issued */
-extern ulint srv_buf_pool_write_requests;
+/* the number of purge threads to use from the worker pool (currently 0 or 1) */
+extern ulong srv_n_purge_threads;
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-extern ulint srv_buf_pool_wait_free;
+/* the number of pages to purge in one batch */
+extern ulong srv_purge_batch_size;
-/* variable to count the number of pages that were written from the
-buffer pool to disk */
-extern ulint srv_buf_pool_flushed;
+/* the number of sync wait arrays */
+extern ulong srv_sync_array_size;
-extern ulint buf_lru_flush_page_count;
+/* print all user-level transactions deadlocks to mysqld stderr */
+extern my_bool srv_print_all_deadlocks;
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-extern ulint srv_buf_pool_reads;
+extern my_bool srv_cmp_per_index_enabled;
-/** Time in seconds between automatic buffer pool dumps */
-extern uint srv_auto_lru_dump;
+/** Status variables to be passed to MySQL */
+extern struct export_var_t export_vars;
-/** Whether startup should be blocked until buffer pool is fully restored */
-extern ibool srv_blocking_lru_restore;
+/** Global counters */
+extern srv_stats_t srv_stats;
/** When TRUE, fake change transcations take S rather than X row locks.
When FALSE, row locks are not taken at all. */
extern my_bool srv_fake_changes_locks;
-/** print all user-level transactions deadlocks to mysqld stderr */
-extern my_bool srv_print_all_deadlocks;
-
-/** Status variables to be passed to MySQL */
-typedef struct export_var_struct export_struc;
-
-/** Status variables to be passed to MySQL */
-extern export_struc export_vars;
-
-/** The server system */
-typedef struct srv_sys_struct srv_sys_t;
-
-/** The server system */
-extern srv_sys_t* srv_sys;
# ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */
+extern mysql_pfs_key_t buf_page_cleaner_thread_key;
extern mysql_pfs_key_t trx_rollback_clean_thread_key;
extern mysql_pfs_key_t io_handler_thread_key;
extern mysql_pfs_key_t srv_lock_timeout_thread_key;
@@ -451,26 +595,21 @@ extern mysql_pfs_key_t srv_error_monitor_thread_key;
extern mysql_pfs_key_t srv_monitor_thread_key;
extern mysql_pfs_key_t srv_master_thread_key;
extern mysql_pfs_key_t srv_purge_thread_key;
+extern mysql_pfs_key_t recv_writer_thread_key;
extern mysql_pfs_key_t srv_log_tracking_thread_key;
/* This macro register the current thread and its key with performance
schema */
# define pfs_register_thread(key) \
do { \
- if (PSI_server) { \
- struct PSI_thread* psi = PSI_server->new_thread(key, NULL, 0);\
- if (psi) { \
- PSI_server->set_thread(psi); \
- } \
- } \
+ struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+ PSI_THREAD_CALL(set_thread)(psi); \
} while (0)
/* This macro delist the current thread from performance schema */
# define pfs_delete_thread() \
do { \
- if (PSI_server) { \
- PSI_server->delete_current_thread(); \
- } \
+ PSI_THREAD_CALL(delete_current_thread)(); \
} while (0)
# endif /* UNIV_PFS_THREAD */
@@ -494,8 +633,22 @@ enum {
after writing to log files */
SRV_UNIX_NOSYNC, /*!< do not flush after writing */
SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
- data files */
- SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
+ data files. This implies using
+ non-buffered IO but still using fsync,
+ the reason for which is that some FS
+ do not flush meta-data when
+ unbuffered IO happens */
+ SRV_UNIX_O_DIRECT_NO_FSYNC,
+ /*!< do not use fsync() when using
+ direct IO i.e.: it can be set to avoid
+ the fsync() call that we make when
+ using SRV_UNIX_O_DIRECT. However, in
+ this case user/DBA should be sure about
+ the integrity of the meta-data */
+ SRV_UNIX_ALL_O_DIRECT /*!< similar to O_DIRECT, invokes
+ os_file_set_nocache() on data and log files.
+ This implies using non-buffered IO but still
+ using fsync for data but not log files. */
};
/** Alternatives for file i/o in Windows */
@@ -544,17 +697,19 @@ typedef enum srv_stats_method_name_enum srv_stats_method_name_t;
#ifndef UNIV_HOTBACKUP
/** Types of threads existing in the system. */
enum srv_thread_type {
- SRV_WORKER = 0, /**< threads serving parallelized queries and
- queries released from lock wait */
- SRV_MASTER /**< the master thread, (whose type number must
- be biggest) */
+ SRV_NONE, /*!< None */
+ SRV_WORKER, /*!< threads serving parallelized
+ queries and queries released from
+ lock wait */
+ SRV_PURGE, /*!< Purge coordinator thread */
+ SRV_MASTER /*!< the master thread, (whose type
+ number must be biggest) */
};
/*********************************************************************//**
-Boots Innobase server.
-@return DB_SUCCESS or error code */
+Boots Innobase server. */
UNIV_INTERN
-ulint
+void
srv_boot(void);
/*==========*/
/*********************************************************************//**
@@ -577,21 +732,6 @@ void
srv_general_init(void);
/*==================*/
/*********************************************************************//**
-Gets the number of threads in the system.
-@return sum of srv_n_threads[] */
-UNIV_INTERN
-ulint
-srv_get_n_threads(void);
-/*===================*/
-/*********************************************************************//**
-Check whether thread type has reserved a slot.
-@return slot number or UNDEFINED if not found*/
-UNIV_INTERN
-ulint
-srv_thread_has_reserved_slot(
-/*=========================*/
- enum srv_thread_type type); /*!< in: thread type to check */
-/*********************************************************************//**
Sets the info describing an i/o thread current state. */
UNIV_INTERN
void
@@ -601,31 +741,21 @@ srv_set_io_thread_op_info(
const char* str); /*!< in: constant char string describing the
state */
/*********************************************************************//**
-Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller!
-@return number of threads released: this may be less than n if not
-enough threads were suspended at the moment */
-UNIV_INTERN
-ulint
-srv_release_threads(
-/*================*/
- enum srv_thread_type type, /*!< in: thread type */
- ulint n); /*!< in: number of threads to release */
-/*********************************************************************//**
-The master thread controlling the server.
-@return a dummy parameter */
+Resets the info describing an i/o thread current state. */
UNIV_INTERN
-os_thread_ret_t
-srv_master_thread(
-/*==============*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
+void
+srv_reset_io_thread_op_info();
+/*=========================*/
/*******************************************************************//**
-Wakes up the purge thread if it's not already awake. */
+Tells the purge thread that there has been activity in the database
+and wakes up the purge thread if it is suspended (not sleeping). Note
+that there is a small chance that the purge thread stays suspended
+(we do not protect our operation with the srv_sys_t:mutex, for
+performance reasons). */
UNIV_INTERN
void
-srv_wake_purge_thread(void);
-/*=======================*/
+srv_wake_purge_thread_if_not_active(void);
+/*=====================================*/
/*******************************************************************//**
Tells the Innobase server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
@@ -642,174 +772,184 @@ UNIV_INTERN
void
srv_wake_master_thread(void);
/*========================*/
-/*******************************************************************//**
-Tells the purge thread that there has been activity in the database
-and wakes up the purge thread if it is suspended (not sleeping). Note
-that there is a small chance that the purge thread stays suspended
-(we do not protect our operation with the kernel mutex, for
-performace reasons). */
+/******************************************************************//**
+A thread which follows the redo log and outputs the changed page bitmap.
+@return a dummy value */
+extern "C"
UNIV_INTERN
-void
-srv_wake_purge_thread_if_not_active(void);
-/*=====================================*/
-/*********************************************************************//**
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
+os_thread_ret_t
+DECLARE_THREAD(srv_redo_log_follow_thread)(
+/*=======================*/
+ void* arg); /*!< in: a dummy parameter required by
+ os_thread_create */
+/******************************************************************//**
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
UNIV_INTERN
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-/*********************************************************************//**
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
+ibool
+srv_printf_innodb_monitor(
+/*======================*/
+ FILE* file, /*!< in: output stream */
+ ibool nowait, /*!< in: whether to wait for the
+ lock_sys_t::mutex */
+ ulint* trx_start, /*!< out: file position of the start of
+ the list of active transactions */
+ ulint* trx_end); /*!< out: file position of the end of
+ the list of active transactions */
+
+/******************************************************************//**
+Function to pass InnoDB status variables to MySQL */
UNIV_INTERN
void
-srv_conc_force_enter_innodb(
+srv_export_innodb_status(void);
+/*==========================*/
+/*************************************************************//**
+Removes old archived transaction log files.
+Both parameters couldn't be provided at the same time.
+@return DB_SUCCESS on success, otherwise DB_ERROR */
+UNIV_INTERN
+dberr_t
+purge_archived_logs(
+ time_t before_date, /*!< in: all files modified
+ before timestamp should be removed */
+ lsn_t before_lsn); /*!< in: files with this lsn in name
+ and earler should be removed */
+/*==========================*/
+/*******************************************************************//**
+Get current server activity count. We don't hold srv_sys::mutex while
+reading this value as it is only used in heuristics.
+@return activity count. */
+UNIV_INTERN
+ulint
+srv_get_activity_count(void);
/*========================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-/*********************************************************************//**
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
+/*******************************************************************//**
+Check if there has been any activity.
+@return FALSE if no change in activity counter. */
UNIV_INTERN
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-/*********************************************************************//**
-This must be called when a thread exits InnoDB. */
+ibool
+srv_check_activity(
+/*===============*/
+ ulint old_activity_count); /*!< old activity count */
+/******************************************************************//**
+Increment the server activity counter. */
UNIV_INTERN
void
-srv_conc_exit_innodb(
-/*=================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-/***************************************************************//**
-Puts a MySQL OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
+srv_inc_activity_count(void);
+/*=========================*/
+
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
UNIV_INTERN
void
-srv_suspend_mysql_thread(
+srv_que_task_enqueue_low(
/*=====================*/
- que_thr_t* thr); /*!< in: query thread associated with the MySQL
- OS thread */
-/********************************************************************//**
-Releases a MySQL OS thread waiting for a lock to be released, if the
-thread is already suspended. */
+ que_thr_t* thr); /*!< in: query thread */
+
+/**********************************************************************//**
+Check whether any background thread is active. If so, return the thread
+type.
+@return SRV_NONE if all are are suspended or have exited, thread
+type if any are still active. */
UNIV_INTERN
-void
-srv_release_mysql_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr); /*!< in: query thread associated with the
- MySQL OS thread */
+enum srv_thread_type
+srv_get_active_thread_type(void);
+/*============================*/
+
+extern "C" {
+
/*********************************************************************//**
-A thread which wakes up threads whose lock wait may have lasted too long.
+A thread which prints the info output by various InnoDB monitors.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
-srv_lock_timeout_thread(
-/*====================*/
+DECLARE_THREAD(srv_monitor_thread)(
+/*===============================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
+
/*********************************************************************//**
-A thread which prints the info output by various InnoDB monitors.
+The master thread controlling the server.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
-srv_monitor_thread(
-/*===============*/
+DECLARE_THREAD(srv_master_thread)(
+/*==============================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
+
/*************************************************************************
A thread which prints warnings about semaphore waits which have lasted
too long. These can be used to track bugs which cause hangs.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
-srv_error_monitor_thread(
-/*=====================*/
+DECLARE_THREAD(srv_error_monitor_thread)(
+/*=====================================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
+
/*********************************************************************//**
-A thread which restores the buffer pool from a dump file on startup and does
-periodic buffer pool dumps.
+Purge coordinator thread that schedules the purge tasks.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
-srv_LRU_dump_restore_thread(
-/*====================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-/******************************************************************//**
-A thread which follows the redo log and outputs the changed page bitmap.
-@return a dummy value */
+DECLARE_THREAD(srv_purge_coordinator_thread)(
+/*=========================================*/
+ void* arg __attribute__((unused))); /*!< in: a dummy parameter
+ required by os_thread_create */
+
+/*********************************************************************//**
+Worker thread that reads tasks from the work queue and executes them.
+@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
-srv_redo_log_follow_thread(
-/*=======================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-/******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor.
-@return FALSE if not all information printed
-due to failure to obtain necessary mutex */
-UNIV_INTERN
-ibool
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file, /*!< in: output stream */
- ibool nowait, /*!< in: whether to wait for kernel mutex */
- ulint* trx_start, /*!< out: file position of the start of
- the list of active transactions */
- ulint* trx_end); /*!< out: file position of the end of
- the list of active transactions */
+DECLARE_THREAD(srv_worker_thread)(
+/*==============================*/
+ void* arg __attribute__((unused))); /*!< in: a dummy parameter
+ required by os_thread_create */
+} /* extern "C" */
-/******************************************************************//**
-Function to pass InnoDB status variables to MySQL */
+/**********************************************************************//**
+Get count of tasks in the queue.
+@return number of tasks in queue */
UNIV_INTERN
-void
-srv_export_innodb_status(void);
-/*==========================*/
+ulint
+srv_get_task_queue_length(void);
+/*===========================*/
/*********************************************************************//**
-Asynchronous purge thread.
-@return a dummy parameter */
+Releases threads of the type given from suspension in the thread table.
+NOTE! The server mutex has to be reserved by the caller!
+@return number of threads released: this may be less than n if not
+enough threads were suspended at the moment */
UNIV_INTERN
-os_thread_ret_t
-srv_purge_thread(
-/*=============*/
- void* arg __attribute__((unused))); /*!< in: a dummy parameter
- required by os_thread_create */
+ulint
+srv_release_threads(
+/*================*/
+ enum srv_thread_type type, /*!< in: thread type */
+ ulint n); /*!< in: number of threads to release */
/**********************************************************************//**
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
+Check whether any background thread are active. If so print which thread
+is active. Send the threads wakeup signal.
+@return name of thread that is active or NULL */
UNIV_INTERN
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr); /*!< in: query thread */
+const char*
+srv_any_background_threads_are_active(void);
+/*=======================================*/
/**********************************************************************//**
-Check whether any background thread is active. If so, return the thread
-type.
-@return ULINT_UNDEFINED if all are are suspended or have exited, thread
-type if any are still active. */
+Wakeup the purge threads. */
UNIV_INTERN
-ulint
-srv_get_active_thread_type(void);
-/*============================*/
+void
+srv_purge_wakeup(void);
+/*==================*/
/** Status variables to be passed to MySQL */
-struct export_var_struct{
- ulint innodb_adaptive_hash_cells;
- ulint innodb_adaptive_hash_heap_buffers;
+struct export_var_t{
ulint innodb_adaptive_hash_hash_searches;
ulint innodb_adaptive_hash_non_hash_searches;
ulint innodb_background_log_sync;
@@ -821,7 +961,8 @@ struct export_var_struct{
ulint innodb_data_writes; /*!< I/O write requests */
ulint innodb_data_written; /*!< Data bytes written */
ulint innodb_data_reads; /*!< I/O read requests */
- ulint innodb_dict_tables;
+ char innodb_buffer_pool_dump_status[512];/*!< Buf pool dump status */
+ char innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
ulint innodb_buffer_pool_pages_data; /*!< Data pages */
ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
@@ -846,7 +987,6 @@ struct export_var_struct{
ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
ulint innodb_checkpoint_age;
ulint innodb_checkpoint_max_age;
- ulint innodb_checkpoint_target_age;
ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
ulint innodb_deadlocks;
@@ -865,14 +1005,12 @@ struct export_var_struct{
ulint innodb_log_waits; /*!< srv_log_waits */
ulint innodb_log_write_requests; /*!< srv_log_write_requests */
ulint innodb_log_writes; /*!< srv_log_writes */
- ib_int64_t innodb_lsn_current;
- ib_int64_t innodb_lsn_flushed;
- ib_int64_t innodb_lsn_last_checkpoint;
- ulint innodb_master_thread_1_second_loops;
- ulint innodb_master_thread_10_second_loops;
- ulint innodb_master_thread_background_loops;
- ulint innodb_master_thread_main_flush_loops;
- ulint innodb_master_thread_sleeps;
+ lsn_t innodb_os_log_written; /*!< srv_os_log_written */
+ lsn_t innodb_lsn_current;
+ lsn_t innodb_lsn_flushed;
+ lsn_t innodb_lsn_last_checkpoint;
+ ulint innodb_master_thread_active_loops;/*!< srv_main_active_loops */
+ ulint innodb_master_thread_idle_loops; /*!< srv_main_idle_loops */
ib_int64_t innodb_max_trx_id;
ulint innodb_mem_adaptive_hash;
ulint innodb_mem_dictionary;
@@ -881,7 +1019,6 @@ struct export_var_struct{
ib_int64_t innodb_mutex_spin_rounds;
ib_int64_t innodb_mutex_spin_waits;
ib_int64_t innodb_oldest_view_low_limit_trx_id;
- ulint innodb_os_log_written; /*!< srv_os_log_written */
ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */
ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */
@@ -905,7 +1042,9 @@ struct export_var_struct{
ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */
ulint innodb_rows_updated; /*!< srv_n_rows_updated */
ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
+ ulint innodb_num_open_files; /*!< fil_n_file_opened */
ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */
+ ulint innodb_available_undo_logs; /*!< srv_available_undo_logs */
ulint innodb_read_views_memory; /*!< srv_read_views_memory */
ulint innodb_descriptors_memory; /*!< srv_descriptors_memory */
ib_int64_t innodb_s_lock_os_waits;
@@ -915,32 +1054,43 @@ struct export_var_struct{
ib_int64_t innodb_x_lock_spin_rounds;
ib_int64_t innodb_x_lock_spin_waits;
#ifdef UNIV_DEBUG
- ulint innodb_purge_trx_id_age; /*!< max_trx_id - purged trx_id */
+ ulint innodb_purge_trx_id_age; /*!< rw_max_trx_id - purged trx_id */
ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
- purged view's min trx_id */
#endif /* UNIV_DEBUG */
};
-/** Thread slot in the thread table */
-typedef struct srv_slot_struct srv_slot_t;
-
-/** Thread table is an array of slots */
-typedef srv_slot_t srv_table_t;
-
-/** The server system struct */
-struct srv_sys_struct{
- srv_table_t* threads; /*!< server thread table */
- UT_LIST_BASE_NODE_T(que_thr_t)
- tasks; /*!< task queue */
+/** Thread slot in the thread table. */
+struct srv_slot_t{
+ srv_thread_type type; /*!< thread type: user,
+ utility etc. */
+ ibool in_use; /*!< TRUE if this slot
+ is in use */
+ ibool suspended; /*!< TRUE if the thread is
+ waiting for the event of this
+ slot */
+ ib_time_t suspend_time; /*!< time when the thread was
+ suspended. Initialized by
+ lock_wait_table_reserve_slot()
+ for lock wait */
+ ulong wait_timeout; /*!< wait time that if exceeded
+ the thread will be timed out.
+ Initialized by
+ lock_wait_table_reserve_slot()
+ for lock wait */
+ os_event_t event; /*!< event used in suspending
+ the thread when it has nothing
+ to do */
+ que_thr_t* thr; /*!< suspended query thread
+ (only used for user threads) */
};
-extern ulint srv_n_threads_active[];
#else /* !UNIV_HOTBACKUP */
# define srv_use_adaptive_hash_indexes FALSE
-# define srv_use_checksums TRUE
# define srv_use_native_aio FALSE
# define srv_force_recovery 0UL
# define srv_set_io_thread_op_info(t,info) ((void) 0)
+# define srv_reset_io_thread_op_info() ((void) 0)
# define srv_is_being_started 0
# define srv_win_file_flush_method SRV_WIN_IO_UNBUFFERED
# define srv_unix_file_flush_method SRV_UNIX_O_DSYNC
diff --git a/storage/xtradb/include/srv0srv.ic b/storage/xtradb/include/srv0srv.ic
index 19ba62cc3c2..53405c06f97 100644
--- a/storage/xtradb/include/srv0srv.ic
+++ b/storage/xtradb/include/srv0srv.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h
index ffbb0dafa5d..40d502f4459 100644
--- a/storage/xtradb/include/srv0start.h
+++ b/storage/xtradb/include/srv0start.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,8 +27,15 @@ Created 10/10/1995 Heikki Tuuri
#define srv0start_h
#include "univ.i"
+#include "log0log.h"
#include "ut0byte.h"
+#ifdef __WIN__
+#define SRV_PATH_SEPARATOR '\\'
+#else
+#define SRV_PATH_SEPARATOR '/'
+#endif
+
/*********************************************************************//**
Normalizes a directory path for Windows: converts slashes to backslashes. */
UNIV_INTERN
@@ -46,15 +53,6 @@ srv_parse_data_file_paths_and_sizes(
/*================================*/
char* str); /*!< in/out: the data file path string */
/*********************************************************************//**
-Reads log group home directories from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
- char* str); /*!< in/out: character string */
-/*********************************************************************//**
Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
and srv_parse_log_group_home_dirs(). */
UNIV_INTERN
@@ -76,20 +74,54 @@ Starts Innobase and creates a new database if database files
are not found and the user wants.
@return DB_SUCCESS or error code */
UNIV_INTERN
-int
+dberr_t
innobase_start_or_create_for_mysql(void);
/*====================================*/
/****************************************************************//**
Shuts down the Innobase database.
@return DB_SUCCESS or error code */
UNIV_INTERN
-int
+dberr_t
innobase_shutdown_for_mysql(void);
+
+/********************************************************************
+Signal all per-table background threads to shutdown, and wait for them to do
+so. */
+UNIV_INTERN
+void
+srv_shutdown_table_bg_threads(void);
/*=============================*/
+
+/*************************************************************//**
+Copy the file path component of the physical file to parameter. It will
+copy up to and including the terminating path separator.
+@return number of bytes copied or ULINT_UNDEFINED if destination buffer
+ is smaller than the path to be copied. */
+UNIV_INTERN
+ulint
+srv_path_copy(
+/*==========*/
+ char* dest, /*!< out: destination buffer */
+ ulint dest_len, /*!< in: max bytes to copy */
+ const char* basedir, /*!< in: base directory */
+ const char* table_name) /*!< in: source table name */
+ __attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Get the meta-data filename from the table name. */
+UNIV_INTERN
+void
+srv_get_meta_data_filename(
+/*======================*/
+ dict_table_t* table, /*!< in: table */
+ char* filename, /*!< out: filename */
+ ulint max_len) /*!< in: filename max length */
+ __attribute__((nonnull));
+
/** Log sequence number at shutdown */
-extern ib_uint64_t srv_shutdown_lsn;
+extern lsn_t srv_shutdown_lsn;
/** Log sequence number immediately after startup */
-extern ib_uint64_t srv_start_lsn;
+extern lsn_t srv_start_lsn;
#ifdef HAVE_DARWIN_THREADS
/** TRUE if the F_FULLFSYNC option is available */
@@ -113,6 +145,11 @@ enum srv_shutdown_state {
SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */
SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in
logs_empty_and_mark_files_at_shutdown() */
+ SRV_SHUTDOWN_FLUSH_PHASE,/*!< At this phase the master and the
+ purge threads must have completed their
+ work. Once we enter this phase the
+ page_cleaner can clean up the buffer
+ pool and exit */
SRV_SHUTDOWN_LAST_PHASE,/*!< Last phase after ensuring that
the buffer pool can be freed: flush
all file spaces and close all files */
@@ -127,7 +164,4 @@ extern enum srv_shutdown_state srv_shutdown_state;
/** Log 'spaces' have id's >= this */
#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
-/** reserved for extra system tables */
-#define SRV_EXTRA_SYS_SPACE_FIRST_ID 0xFFFFFFE0UL
-
#endif
diff --git a/storage/xtradb/include/sync0arr.h b/storage/xtradb/include/sync0arr.h
index 4bce9435577..bb4d1037a62 100644
--- a/storage/xtradb/include/sync0arr.h
+++ b/storage/xtradb/include/sync0arr.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -32,36 +32,10 @@ Created 9/5/1995 Heikki Tuuri
#include "os0thread.h"
/** Synchronization wait array cell */
-typedef struct sync_cell_struct sync_cell_t;
+struct sync_cell_t;
/** Synchronization wait array */
-typedef struct sync_array_struct sync_array_t;
-
-/** Parameters for sync_array_create() @{ */
-#define SYNC_ARRAY_OS_MUTEX 1 /*!< protected by os_mutex_t */
-#define SYNC_ARRAY_MUTEX 2 /*!< protected by mutex_t */
-/* @} */
-
-/*******************************************************************//**
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called.
-@return own: created wait array */
-UNIV_INTERN
-sync_array_t*
-sync_array_create(
-/*==============*/
- ulint n_cells, /*!< in: number of cells in the array
- to create */
- ulint protection); /*!< in: either SYNC_ARRAY_OS_MUTEX or
- SYNC_ARRAY_MUTEX: determines the type
- of mutex protecting the data structure */
-/******************************************************************//**
-Frees the resources in a wait array. */
-UNIV_INTERN
-void
-sync_array_free(
-/*============*/
- sync_array_t* arr); /*!< in, own: sync wait array */
+struct sync_array_t;
+
/******************************************************************//**
Reserves a wait array cell for waiting for an object.
The event of the cell is reset to nonsignalled state. */
@@ -99,9 +73,9 @@ sync_array_free_cell(
Note that one of the wait objects was signalled. */
UNIV_INTERN
void
-sync_array_object_signalled(
-/*========================*/
- sync_array_t* arr); /*!< in: wait array */
+sync_array_object_signalled(void);
+/*=============================*/
+
/**********************************************************************//**
If the wakeup algorithm does not work perfectly at semaphore relases,
this function will do the waking (see the comment in mutex_exit). This
@@ -132,11 +106,30 @@ sync_array_validate(
Prints info of the wait array. */
UNIV_INTERN
void
-sync_array_print_info(
+sync_array_print(
+/*=============*/
+ FILE* file); /*!< in: file where to print */
+
+/**********************************************************************//**
+Create the primary system wait array(s), they are protected by an OS mutex */
+UNIV_INTERN
+void
+sync_array_init(
+/*============*/
+ ulint n_threads); /*!< in: Number of slots to create */
+/**********************************************************************//**
+Close sync array wait sub-system. */
+UNIV_INTERN
+void
+sync_array_close(void);
/*==================*/
- FILE* file, /*!< in: file where to print */
- sync_array_t* arr); /*!< in: wait array */
+/**********************************************************************//**
+Get an instance of the sync wait array. */
+UNIV_INTERN
+sync_array_t*
+sync_array_get(void);
+/*================*/
#ifndef UNIV_NONINL
#include "sync0arr.ic"
diff --git a/storage/xtradb/include/sync0arr.ic b/storage/xtradb/include/sync0arr.ic
index b49dce34017..0114a1ff5a2 100644
--- a/storage/xtradb/include/sync0arr.ic
+++ b/storage/xtradb/include/sync0arr.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -24,4 +24,3 @@ Inline code
Created 9/5/1995 Heikki Tuuri
*******************************************************/
-
diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h
index 414d7ea43dc..ace3a0993c8 100644
--- a/storage/xtradb/include/sync0rw.h
+++ b/storage/xtradb/include/sync0rw.h
@@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -36,6 +36,7 @@ Created 9/11/1995 Heikki Tuuri
#include "univ.i"
#ifndef UNIV_HOTBACKUP
#include "ut0lst.h"
+#include "ut0counter.h"
#include "sync0sync.h"
#include "os0sync.h"
@@ -44,6 +45,43 @@ in MySQL: */
#undef rw_lock_t
#endif /* !UNIV_HOTBACKUP */
+/** Counters for RW locks. */
+struct rw_lock_stats_t {
+ typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t;
+
+ /** number of spin waits on rw-latches,
+ resulted during shared (read) locks */
+ ib_int64_counter_t rw_s_spin_wait_count;
+
+ /** number of spin loop rounds on rw-latches,
+ resulted during shared (read) locks */
+ ib_int64_counter_t rw_s_spin_round_count;
+
+ /** number of OS waits on rw-latches,
+ resulted during shared (read) locks */
+ ib_int64_counter_t rw_s_os_wait_count;
+
+ /** number of unlocks (that unlock shared locks),
+ set only when UNIV_SYNC_PERF_STAT is defined */
+ ib_int64_counter_t rw_s_exit_count;
+
+ /** number of spin waits on rw-latches,
+ resulted during exclusive (write) locks */
+ ib_int64_counter_t rw_x_spin_wait_count;
+
+ /** number of spin loop rounds on rw-latches,
+ resulted during exclusive (write) locks */
+ ib_int64_counter_t rw_x_spin_round_count;
+
+ /** number of OS waits on rw-latches,
+ resulted during exclusive (write) locks */
+ ib_int64_counter_t rw_x_os_wait_count;
+
+ /** number of unlocks (that unlock exclusive locks),
+ set only when UNIV_SYNC_PERF_STAT is defined */
+ ib_int64_counter_t rw_x_exit_count;
+};
+
/* Latch types; these are used also in btr0btr.h: keep the numerical values
smaller than 30 and the order of the numerical values like below! */
#define RW_S_LATCH 1
@@ -57,22 +95,23 @@ of concurrent read locks before the rw_lock breaks. The current value of
0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
#define X_LOCK_DECR 0x00100000
-typedef struct rw_lock_struct rw_lock_t;
+struct rw_lock_t;
+struct prio_rw_lock_t;
#ifdef UNIV_SYNC_DEBUG
-typedef struct rw_lock_debug_struct rw_lock_debug_t;
+struct rw_lock_debug_t;
#endif /* UNIV_SYNC_DEBUG */
typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t;
extern rw_lock_list_t rw_lock_list;
-extern mutex_t rw_lock_list_mutex;
+extern ib_mutex_t rw_lock_list_mutex;
#ifdef UNIV_SYNC_DEBUG
/* The global mutex which protects debug info lists of all rw-locks.
To modify the debug info list of an rw-lock, this mutex has to be
acquired in addition to the mutex protecting the lock. */
-extern mutex_t rw_lock_debug_mutex;
+extern ib_mutex_t rw_lock_debug_mutex;
extern os_event_t rw_lock_debug_event; /*!< If deadlock detection does
not get immediately the mutex it
may wait for this event */
@@ -80,30 +119,8 @@ extern ibool rw_lock_debug_waiters; /*!< This is set to TRUE, if
there may be waiters for the event */
#endif /* UNIV_SYNC_DEBUG */
-/** number of spin waits on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_s_spin_wait_count;
-/** number of spin loop rounds on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_s_spin_round_count;
-/** number of unlocks (that unlock shared locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-extern ib_int64_t rw_s_exit_count;
-/** number of OS waits on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_s_os_wait_count;
-/** number of spin waits on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_x_spin_wait_count;
-/** number of spin loop rounds on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_x_spin_round_count;
-/** number of OS waits on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_x_os_wait_count;
-/** number of unlocks (that unlock exclusive locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-extern ib_int64_t rw_x_exit_count;
+/** Counters for RW locks. */
+extern rw_lock_stats_t rw_lock_stats;
#ifdef UNIV_PFS_RWLOCK
/* Following are rwlock keys used to register with MySQL
@@ -112,18 +129,22 @@ performance schema */
extern mysql_pfs_key_t archive_lock_key;
# endif /* UNIV_LOG_ARCHIVE */
extern mysql_pfs_key_t btr_search_latch_key;
-extern mysql_pfs_key_t buf_pool_page_hash_key;
extern mysql_pfs_key_t buf_block_lock_key;
# ifdef UNIV_SYNC_DEBUG
extern mysql_pfs_key_t buf_block_debug_latch_key;
# endif /* UNIV_SYNC_DEBUG */
extern mysql_pfs_key_t dict_operation_lock_key;
-extern mysql_pfs_key_t fil_space_latch_key;
extern mysql_pfs_key_t checkpoint_lock_key;
+extern mysql_pfs_key_t fil_space_latch_key;
+extern mysql_pfs_key_t fts_cache_rw_lock_key;
+extern mysql_pfs_key_t fts_cache_init_rw_lock_key;
extern mysql_pfs_key_t trx_i_s_cache_lock_key;
extern mysql_pfs_key_t trx_purge_latch_key;
extern mysql_pfs_key_t index_tree_rw_lock_key;
+extern mysql_pfs_key_t index_online_log_key;
extern mysql_pfs_key_t dict_table_stats_latch_key;
+extern mysql_pfs_key_t trx_sys_rw_lock_key;
+extern mysql_pfs_key_t hash_table_rw_lock_key;
#endif /* UNIV_PFS_RWLOCK */
@@ -279,6 +300,24 @@ rw_lock_create_func(
#endif /* UNIV_DEBUG */
const char* cmutex_name); /*!< in: mutex name */
/******************************************************************//**
+Creates, or rather, initializes a priority rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+UNIV_INTERN
+void
+rw_lock_create_func(
+/*================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to memory */
+#ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline, /*!< in: file line where created */
+#endif /* UNIV_DEBUG */
+ const char* cmutex_name); /*!< in: mutex name */
+/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
rw-lock is checked to be in the non-locked state. */
@@ -287,6 +326,15 @@ void
rw_lock_free_func(
/*==============*/
rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
+Calling this function is obligatory only if the memory buffer containing
+the priority rw-lock is freed. Removes an rw-lock object from the global list.
+The rw-lock is checked to be in the non-locked state. */
+UNIV_INTERN
+void
+rw_lock_free_func(
+/*==============*/
+ prio_rw_lock_t* lock); /*!< in: rw-lock */
#ifdef UNIV_DEBUG
/******************************************************************//**
Checks that the rw-lock has been initialized and that there are no
@@ -297,6 +345,15 @@ ibool
rw_lock_validate(
/*=============*/
rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
+Checks that the priority rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks.
+@return TRUE */
+UNIV_INTERN
+ibool
+rw_lock_validate(
+/*=============*/
+ prio_rw_lock_t* lock); /*!< in: rw-lock */
#endif /* UNIV_DEBUG */
/******************************************************************//**
Low-level function which tries to lock an rw-lock in s-mode. Performs no
@@ -329,6 +386,22 @@ rw_lock_s_lock_func(
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function, except if
+you supply the file name and line number. Lock a priority rw-lock in shared
+mode for the current thread, using the relative thread priority. If the
+rw-lock is locked in exclusive mode, or there is an exclusive lock request
+waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+waiting for the lock, before suspending the thread. */
+UNIV_INLINE
+void
+rw_lock_s_lock_func(
+/*================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread if the lock can be
obtained immediately.
@@ -353,6 +426,17 @@ rw_lock_s_unlock_func(
rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
+Releases a shared mode priority lock. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_func(
+/*==================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ prio_rw_lock_t* lock); /*!< in/out: rw-lock */
+/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread. If the rw-lock is locked
in shared or exclusive mode, or there is an exclusive lock request waiting,
@@ -365,7 +449,30 @@ UNIV_INTERN
void
rw_lock_x_lock_func(
/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line, /*!< in: line where requested */
+ bool priority_lock = false,
+ /*!< in: whether the lock is a priority lock */
+ bool high_priority = false);
+ /*!< in: whether we are acquiring a priority
+ lock with high priority */
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock a priority
+rw-lock in exclusive mode for the current thread. If the rw-lock is locked
+in shared or exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single x-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+UNIV_INTERN
+void
+rw_lock_x_lock_func(
+/*================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
@@ -381,30 +488,17 @@ rw_lock_x_unlock_func(
been passed to another thread to unlock */
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
-
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line); /*!< in: line where lock requested */
/******************************************************************//**
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
+Releases an exclusive mode priority lock. */
UNIV_INLINE
void
-rw_lock_x_lock_direct(
+rw_lock_x_unlock_func(
/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line); /*!< in: line where lock requested */
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ prio_rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
This function is used in the insert buffer to move the ownership of an
x-latch on a buffer frame to the current thread. The x-latch was set by
@@ -420,22 +514,6 @@ rw_lock_x_lock_move_ownership(
rw_lock_t* lock); /*!< in: lock which was x-locked in the
buffer read */
/******************************************************************//**
-Releases a shared mode lock when we know there are no waiters and none
-else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_direct(
-/*====================*/
- rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock durint the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_direct(
-/*====================*/
- rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
Returns the value of writer_count for the lock. Does not reserve the lock
mutex, so the caller must be sure it is not changed during the call.
@return value of writer_count */
@@ -444,6 +522,15 @@ ulint
rw_lock_get_x_lock_count(
/*=====================*/
const rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
+Returns the value of writer_count for the priority lock. Does not reserve the
+lock mutex, so the caller must be sure it is not changed during the call.
+@return value of writer_count */
+UNIV_INLINE
+ulint
+rw_lock_get_x_lock_count(
+/*=====================*/
+ const prio_rw_lock_t* lock); /*!< in: rw-lock */
/********************************************************************//**
Check if there are threads waiting for the rw-lock.
@return 1 if waiters, 0 otherwise */
@@ -452,6 +539,14 @@ ulint
rw_lock_get_waiters(
/*================*/
const rw_lock_t* lock); /*!< in: rw-lock */
+/********************************************************************//**
+Check if there are threads waiting for the priority rw-lock.
+@return 1 if waiters, 0 otherwise */
+UNIV_INLINE
+ulint
+rw_lock_get_waiters(
+/*================*/
+ const prio_rw_lock_t* lock); /*!< in: rw-lock */
/******************************************************************//**
Returns the write-status of the lock - this function made more sense
with the old rw_lock implementation.
@@ -462,6 +557,15 @@ rw_lock_get_writer(
/*===============*/
const rw_lock_t* lock); /*!< in: rw-lock */
/******************************************************************//**
+Returns the write-status of the priority lock - this function made more sense
+with the old rw_lock implementation.
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+UNIV_INLINE
+ulint
+rw_lock_get_writer(
+/*===============*/
+ const prio_rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
Returns the number of readers.
@return number of readers */
UNIV_INLINE
@@ -470,6 +574,14 @@ rw_lock_get_reader_count(
/*=====================*/
const rw_lock_t* lock); /*!< in: rw-lock */
/******************************************************************//**
+Returns the number of readers.
+@return number of readers */
+UNIV_INLINE
+ulint
+rw_lock_get_reader_count(
+/*=====================*/
+ const prio_rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
Decrements lock_word the specified amount if it is greater than 0.
This is used by both s_lock and x_lock operations.
@return TRUE if decr occurs */
@@ -516,6 +628,17 @@ rw_lock_own(
ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
__attribute__((warn_unused_result));
+/******************************************************************//**
+Checks if the thread has locked the priority rw-lock in the specified mode,
+with the pass value == 0. */
+UNIV_INTERN
+ibool
+rw_lock_own(
+/*========*/
+ prio_rw_lock_t* lock, /*!< in: rw-lock */
+ ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
+ RW_LOCK_EX */
+ __attribute__((warn_unused_result));
#endif /* UNIV_SYNC_DEBUG */
/******************************************************************//**
Checks if somebody has locked the rw-lock in the specified mode. */
@@ -588,7 +711,7 @@ shared locks are allowed. To prevent starving of a writer blocked by
readers, a writer may queue for x-lock by decrementing lock_word: no
new readers will be let in while the thread waits for readers to
exit. */
-struct rw_lock_struct {
+struct rw_lock_t {
volatile lint lock_word;
/*!< Holds the state of the lock. */
volatile ulint waiters;/*!< 1: there are waiters */
@@ -608,12 +731,12 @@ struct rw_lock_struct {
/*!< Thread id of writer thread. Is only
guaranteed to have sane and non-stale
value iff recursive flag is set. */
- os_event_t event; /*!< Used by sync0arr.c for thread queueing */
+ os_event_t event; /*!< Used by sync0arr.cc for thread queueing */
os_event_t wait_ex_event;
/*!< Event for next-writer to wait on. A thread
must decrement lock_word before waiting. */
#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_t mutex; /*!< The mutex protecting rw_lock_struct */
+ ib_mutex_t mutex; /*!< The mutex protecting rw_lock_t */
#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
UT_LIST_NODE_T(rw_lock_t) list;
@@ -646,15 +769,36 @@ struct rw_lock_struct {
unsigned last_x_line:14; /*!< Line number where last time x-locked */
#ifdef UNIV_DEBUG
ulint magic_n; /*!< RW_LOCK_MAGIC_N */
-/** Value of rw_lock_struct::magic_n */
+/** Value of rw_lock_t::magic_n */
#define RW_LOCK_MAGIC_N 22643
#endif /* UNIV_DEBUG */
};
+/** The structure implementing a priority rw lock. */
+struct prio_rw_lock_t {
+ struct rw_lock_t base_lock; /* The regular rw latch
+ provides the lock word etc. for
+ the priority rw lock */
+ volatile ulint high_priority_s_waiters;
+ /* If 1, high priority S
+ waiters exist */
+ os_event_t high_priority_s_event; /* High priority wait
+ array event for S waiters */
+ volatile ulint high_priority_x_waiters;
+ /* If 1, high priority X
+ waiters exist */
+ os_event_t high_priority_x_event;
+ /* High priority wait arraay
+ event for X waiters */
+ volatile ulint high_priority_wait_ex_waiter;
+ /* If 1, a waiting next-writer
+ exists and is high-priority */
+};
+
#ifdef UNIV_SYNC_DEBUG
/** The structure for storing debug info of an rw-lock. All access to this
structure must be protected by rw_lock_debug_mutex_enter(). */
-struct rw_lock_debug_struct {
+struct rw_lock_debug_t {
os_thread_id_t thread_id; /*!< The thread id of the thread which
locked the rw-lock */
@@ -691,9 +835,6 @@ rw_lock_s_lock_gen()
rw_lock_s_lock_nowait()
rw_lock_s_unlock_gen()
rw_lock_free()
-
-Two function APIs rw_lock_x_unlock_direct() and rw_lock_s_unlock_direct()
-do not have any caller/user, they are not instrumented.
*/
#ifdef UNIV_PFS_RWLOCK
@@ -718,6 +859,26 @@ pfs_rw_lock_create_func(
const char* cmutex_name); /*!< in: mutex name */
/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_create_func()
+NOTE! Please use the corresponding macro rw_lock_create(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_create_func(
+/*====================*/
+ PSI_rwlock_key key, /*!< in: key registered with
+ performance schema */
+ prio_rw_lock_t* lock, /*!< in: rw lock */
+#ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline, /*!< in: file line where created */
+#endif /* UNIV_DEBUG */
+ const char* cmutex_name); /*!< in: mutex name */
+
+/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_lock_func()
NOTE! Please use the corresponding macro rw_lock_x_lock(), not
directly this function! */
@@ -730,6 +891,21 @@ pfs_rw_lock_x_lock_func(
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+
/******************************************************************//**
Performance schema instrumented wrap function for
rw_lock_x_lock_func_nowait()
@@ -742,6 +918,7 @@ pfs_rw_lock_x_lock_func_nowait(
rw_lock_t* lock, /*!< in: pointer to rw-lock */
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
+
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
@@ -755,6 +932,21 @@ pfs_rw_lock_s_lock_func(
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_lock_func(
+/*====================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
@@ -771,6 +963,21 @@ pfs_rw_lock_s_lock_low(
const char* file_name, /*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
+this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_s_lock_low(
+/*===================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock will be passed to another
+ thread to unlock */
+ const char* file_name, /*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_lock_func()
NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
this function! */
@@ -784,6 +991,19 @@ pfs_rw_lock_x_lock_func(
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_unlock_func()
NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly
this function! */
@@ -799,6 +1019,20 @@ pfs_rw_lock_s_unlock_func(
rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif
+ prio_rw_lock_t* lock); /*!< in/out: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
this function! */
UNIV_INLINE
@@ -812,6 +1046,20 @@ pfs_rw_lock_x_unlock_func(
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif
+ prio_rw_lock_t* lock); /*!< in/out: rw-lock */
+/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_free_func()
NOTE! Please use the corresponding macro rw_lock_free(), not directly
this function! */
@@ -820,6 +1068,15 @@ void
pfs_rw_lock_free_func(
/*==================*/
rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_free_func()
+NOTE! Please use the corresponding macro rw_lock_free(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_free_func(
+/*==================*/
+ prio_rw_lock_t* lock); /*!< in: rw-lock */
#endif /* UNIV_PFS_RWLOCK */
diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic
index 706ccbc00de..c625ee39035 100644
--- a/storage/xtradb/include/sync0rw.ic
+++ b/storage/xtradb/include/sync0rw.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -31,17 +31,22 @@ Created 9/11/1995 Heikki Tuuri
*******************************************************/
/******************************************************************//**
-Lock an rw-lock in shared mode for the current thread. If the rw-lock is
-locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+Lock a regular or priority rw-lock in shared mode for the current thread. If
+the rw-lock is locked in exclusive mode, or there is an exclusive lock request
+waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
waiting for the lock before suspending the thread. */
UNIV_INTERN
void
rw_lock_s_lock_spin(
/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ void* _lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
+ bool priority_lock,
+ /*!< in: whether the lock is a priority lock */
+ bool high_priority,
+ /*!< in: whether we are acquiring a priority
+ lock with high priority */
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
#ifdef UNIV_SYNC_DEBUG
@@ -80,6 +85,20 @@ rw_lock_get_waiters(
}
/********************************************************************//**
+Check if there are threads waiting for the priority rw-lock.
+@return 1 if waiters, 0 otherwise */
+UNIV_INLINE
+ulint
+rw_lock_get_waiters(
+/*================*/
+ const prio_rw_lock_t* lock) /*!< in: rw-lock */
+{
+ return rw_lock_get_waiters(&lock->base_lock)
+ | lock->high_priority_s_waiters
+ | lock->high_priority_x_waiters;
+}
+
+/********************************************************************//**
Sets lock->waiters to 1. It is not an error if lock->waiters is already
1. On platforms where ATOMIC builtins are used this function enforces a
memory barrier. */
@@ -128,15 +147,28 @@ rw_lock_get_writer(
/* return NOT_LOCKED in s-lock state, like the writer
member of the old lock implementation. */
return(RW_LOCK_NOT_LOCKED);
- } else if (((-lock_word) % X_LOCK_DECR) == 0) {
+ } else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) {
return(RW_LOCK_EX);
} else {
- ut_ad(lock_word > -X_LOCK_DECR);
+ ut_ad(lock_word > -X_LOCK_DECR);
return(RW_LOCK_WAIT_EX);
}
}
/******************************************************************//**
+Returns the write-status of the priority lock - this function made more sense
+with the old rw_lock implementation.
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+UNIV_INLINE
+ulint
+rw_lock_get_writer(
+/*===============*/
+ const prio_rw_lock_t* lock) /*!< in: rw-lock */
+{
+ return(rw_lock_get_writer(&lock->base_lock));
+}
+
+/******************************************************************//**
Returns the number of readers.
@return number of readers */
UNIV_INLINE
@@ -156,9 +188,21 @@ rw_lock_get_reader_count(
return(0);
}
+/******************************************************************//**
+Returns the number of readers.
+@return number of readers */
+UNIV_INLINE
+ulint
+rw_lock_get_reader_count(
+/*=====================*/
+ const prio_rw_lock_t* lock) /*!< in: rw-lock */
+{
+ return(rw_lock_get_reader_count(&lock->base_lock));
+}
+
#ifndef INNODB_RW_LOCKS_USE_ATOMICS
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
rw_lock_get_mutex(
/*==============*/
rw_lock_t* lock)
@@ -178,11 +222,23 @@ rw_lock_get_x_lock_count(
const rw_lock_t* lock) /*!< in: rw-lock */
{
lint lock_copy = lock->lock_word;
- /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
- if (lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
+ if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) {
return(0);
}
- return(((-lock_copy) / X_LOCK_DECR) + 1);
+ return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR)));
+}
+
+/******************************************************************//**
+Returns the value of writer_count for the priority lock. Does not reserve the
+lock mutex, so the caller must be sure it is not changed during the call.
+@return value of writer_count */
+UNIV_INLINE
+ulint
+rw_lock_get_x_lock_count(
+/*=====================*/
+ const prio_rw_lock_t* lock) /*!< in: rw-lock */
+{
+ return(rw_lock_get_x_lock_count(&lock->base_lock));
}
/******************************************************************//**
@@ -200,7 +256,7 @@ rw_lock_lock_word_decr(
ulint amount) /*!< in: amount to decrement */
{
#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- lint local_lock_word = lock->lock_word;
+ lint local_lock_word = lock->lock_word;
while (local_lock_word > 0) {
if (os_compare_and_swap_lint(&lock->lock_word,
local_lock_word,
@@ -244,7 +300,7 @@ rw_lock_lock_word_incr(
mutex_exit(&(lock->mutex));
- return(local_lock_word);
+ return(local_lock_word);
#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
}
@@ -308,7 +364,6 @@ rw_lock_s_lock_low(
const char* file_name, /*!< in: file name where lock requested */
ulint line) /*!< in: line where requested */
{
- /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
if (!rw_lock_lock_word_decr(lock, 1)) {
/* Locking did not succeed */
return(FALSE);
@@ -318,7 +373,7 @@ rw_lock_s_lock_low(
rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
#endif
/* These debugging values are not set safely: they may be incorrect
- or even refer to a line that is invalid for the file name. */
+ or even refer to a line that is invalid for the file name. */
lock->last_s_file_name = file_name;
lock->last_s_line = line;
@@ -326,58 +381,6 @@ rw_lock_s_lock_low(
}
/******************************************************************//**
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line) /*!< in: line where lock requested */
-{
- ut_ad(lock->lock_word == X_LOCK_DECR);
-
- /* Indicate there is a new reader by decrementing lock_word */
- lock->lock_word--;
-
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line);
-#endif
-}
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line) /*!< in: line where lock requested */
-{
- ut_ad(rw_lock_validate(lock));
- ut_ad(lock->lock_word == X_LOCK_DECR);
-
- lock->lock_word -= X_LOCK_DECR;
- lock->writer_thread = os_thread_get_curr_id();
- lock->recursive = TRUE;
-
- lock->last_x_file_name = file_name;
- lock->last_x_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-}
-
-/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in shared mode for the current thread. If the rw-lock is locked
in exclusive mode, or there is an exclusive lock request waiting, the
@@ -409,14 +412,81 @@ rw_lock_s_lock_func(
ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
return; /* Success */
} else {
/* Did not succeed, try spin wait */
- rw_lock_s_lock_spin(lock, pass, file_name, line);
+ rw_lock_s_lock_spin(lock, pass, false, false, file_name, line);
+
+ return;
+ }
+}
+
+/******************************************************************//**
+Return true if waiters of higher priority than the current thread
+exist.
+@true if waiterss of higher priority exist */
+UNIV_INLINE
+bool
+rw_lock_higher_prio_waiters_exist(
+/*==============================*/
+ bool priority_lock, /*!< in: whether the lock is a priority lock */
+ bool high_priority, /*!< in: whether we are acquiring a priority
+ lock with high priority */
+ void* lock) /*!< in: rw lock */
+{
+ if (high_priority || !priority_lock) {
+ ut_ad(!(!priority_lock && high_priority));
+ return(false);
+ }
+
+ ut_ad(priority_lock && !high_priority);
+
+ prio_rw_lock_t *prio_rw_lock = (prio_rw_lock_t *) lock;
+ return prio_rw_lock->high_priority_wait_ex_waiter > 0
+ || prio_rw_lock->high_priority_s_waiters > 0
+ || prio_rw_lock->high_priority_x_waiters > 0;
+}
+
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function, except if
+you supply the file name and line number. Lock a priority rw-lock in shared
+mode for the current thread, using the relative thread priority. If the
+rw-lock is locked in exclusive mode, or there is an exclusive lock request
+waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+waiting for the lock, before suspending the thread. */
+UNIV_INLINE
+void
+rw_lock_s_lock_func(
+/*================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
+ ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ bool high_priority = srv_current_thread_priority > 0;
+
+ /* Do not attempt to acquire a low-priority S latch if there are
+ high-priority waiters even if such attempt would be successful. This
+ is to prevent a high priority X request from being starved by a
+ sequence of overlapping regular priority S requests. */
+
+ if (!rw_lock_higher_prio_waiters_exist(true, high_priority, lock)
+ && rw_lock_s_lock_low(&lock->base_lock, pass, file_name, line)) {
+
+ return; /* Success */
+ } else {
+ /* Did not succeed, try spin wait */
+ rw_lock_s_lock_spin(lock, pass, true, high_priority, file_name,
+ line);
return;
}
@@ -435,8 +505,6 @@ rw_lock_x_lock_func_nowait(
const char* file_name,/*!< in: file name where lock requested */
ulint line) /*!< in: line where requested */
{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
ibool success;
#ifdef INNODB_RW_LOCKS_USE_ATOMICS
@@ -456,13 +524,19 @@ rw_lock_x_lock_func_nowait(
rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
} else if (lock->recursive
- && os_thread_eq(lock->writer_thread, curr_thread)) {
+ && os_thread_eq(lock->writer_thread,
+ os_thread_get_curr_id())) {
/* Relock: this lock_word modification is safe since no other
threads can modify (lock, unlock, or reserve) lock_word while
there is an exclusive writer and this is the writer thread. */
- lock->lock_word -= X_LOCK_DECR;
+ if (lock->lock_word == 0) {
+ lock->lock_word = -X_LOCK_DECR;
+ } else {
+ lock->lock_word--;
+ }
- ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
+ /* Watch for too many recursive locks */
+ ut_ad(lock->lock_word < 0);
} else {
/* Failure */
@@ -492,7 +566,9 @@ rw_lock_s_unlock_func(
#endif
rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
+ ut_ad(lock->lock_word > -X_LOCK_DECR);
+ ut_ad(lock->lock_word != 0);
+ ut_ad(lock->lock_word < X_LOCK_DECR);
#ifdef UNIV_SYNC_DEBUG
rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
@@ -502,10 +578,10 @@ rw_lock_s_unlock_func(
if (rw_lock_lock_word_incr(lock, 1) == 0) {
/* wait_ex waiter exists. It may not be asleep, but we signal
- anyway. We do not wake other waiters, because they can't
- exist without wait_ex waiter and wait_ex waiter goes first.*/
+ anyway. We do not wake other waiters, because they can't
+ exist without wait_ex waiter and wait_ex waiter goes first.*/
os_event_set(lock->wait_ex_event);
- sync_array_object_signalled(sync_primary_wait_array);
+ sync_array_object_signalled();
}
@@ -517,43 +593,94 @@ rw_lock_s_unlock_func(
}
/******************************************************************//**
-Releases a shared mode lock when we know there are no waiters and none
-else will access the lock during the time this function is executed. */
+Releases a shared mode priority lock. */
UNIV_INLINE
void
-rw_lock_s_unlock_direct(
-/*====================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
+rw_lock_s_unlock_func(
+/*==================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ prio_rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ut_ad(lock->lock_word < X_LOCK_DECR);
+ lint lock_word;
+
+ ut_ad(lock->base_lock.lock_word > -X_LOCK_DECR);
+ ut_ad(lock->base_lock.lock_word != 0);
+ ut_ad(lock->base_lock.lock_word < X_LOCK_DECR);
#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
+ rw_lock_remove_debug_info(&lock->base_lock, pass, RW_LOCK_SHARED);
#endif
- /* Decrease reader count by incrementing lock_word */
- lock->lock_word++;
+ /* Increment lock_word to indicate 1 less reader */
+ lock_word = rw_lock_lock_word_incr(&lock->base_lock, 1);
+ if (lock_word == 0) {
+
+ /* A waiting next-writer exists, either high priority or
+ regular. Wake up the first waiter in this order: 1) high
+ priority next-writer; 2) high priority X waiters; 3) high
+ priority S waiters; 4) regular priority next-waiter. This
+ allows high priority requests to overtake an already-waiting
+ regular priority next-waiter. */
+ if (lock->high_priority_wait_ex_waiter) {
+
+ lock->high_priority_wait_ex_waiter = 0;
+ /* Note that we do not have a separate high priority
+ next-waiter event. There can be only one such waiter,
+ here we already know it's high priority, no
+ regular-priority wakeup may happen. */
+ os_event_set(lock->base_lock.wait_ex_event);
+ } else if (lock->high_priority_x_waiters) {
+
+ lock->high_priority_x_waiters = 0;
+ os_event_set(lock->high_priority_x_event);
+ } else if (lock->high_priority_s_waiters) {
+
+ lock->high_priority_s_waiters = 0;
+ os_event_set(lock->high_priority_s_event);
+ } else {
+
+ os_event_set(lock->base_lock.wait_ex_event);
+ }
+ sync_array_object_signalled();
+ } else if (lock_word == X_LOCK_DECR) {
+
+ /* S-waiters may exist during an S unlock if a high-priority
+ thread released it, because low-priority threads are prevented
+ from acquiring S lock while high-priority thread holds it. */
+ if (lock->base_lock.waiters) {
+
+ rw_lock_reset_waiter_flag(&lock->base_lock);
+ os_event_set(lock->base_lock.event);
+ sync_array_object_signalled();
+ }
+ }
- ut_ad(!lock->waiters);
ut_ad(rw_lock_validate(lock));
+
#ifdef UNIV_SYNC_PERF_STAT
rw_s_exit_count++;
#endif
}
/******************************************************************//**
-Releases an exclusive mode lock. */
+Prepares an exclusive mode lock release: resets the recursion flag and removes
+the debug information if needed and returns the required lock word increment
+value.
+@return lock word increment value to perform the unlock */
UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
+ulint
+rw_lock_x_prepare_unlock(
+/*=====================*/
#ifdef UNIV_SYNC_DEBUG
ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
#endif
rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+ ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR);
/* lock->recursive flag also indicates if lock->writer_thread is
valid or stale. If we are the last of the recursive callers
@@ -570,14 +697,46 @@ rw_lock_x_unlock_func(
rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
#endif
- if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
+ ulint x_lock_incr;
+ if (lock->lock_word == 0) {
+ x_lock_incr = X_LOCK_DECR;
+ } else if (lock->lock_word == -X_LOCK_DECR) {
+ x_lock_incr = X_LOCK_DECR;
+ } else {
+ ut_ad(lock->lock_word < -X_LOCK_DECR);
+ x_lock_incr = 1;
+ }
+
+ return(x_lock_incr);
+}
+
+/******************************************************************//**
+Releases an exclusive mode lock. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_func(
+/*==================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ rw_lock_t* lock) /*!< in/out: rw-lock */
+{
+ ulint x_lock_incr = rw_lock_x_prepare_unlock(
+#ifdef UNIV_SYNC_DEBUG
+ pass,
+#endif
+ lock);
+
+ if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) {
/* Lock is now free. May have to signal read/write waiters.
- We do not need to signal wait_ex waiters, since they cannot
- exist when there is a writer. */
+ We do not need to signal wait_ex waiters, since they cannot
+ exist when there is a writer. */
+
if (lock->waiters) {
rw_lock_reset_waiter_flag(lock);
os_event_set(lock->event);
- sync_array_object_signalled(sync_primary_wait_array);
+ sync_array_object_signalled();
}
}
@@ -589,30 +748,50 @@ rw_lock_x_unlock_func(
}
/******************************************************************//**
-Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock during the time this function is executed. */
+Releases an exclusive mode priority lock. */
UNIV_INLINE
void
-rw_lock_x_unlock_direct(
-/*====================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
+rw_lock_x_unlock_func(
+/*==================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ prio_rw_lock_t* lock) /*!< in/out: rw-lock */
{
- /* Reset the exclusive lock if this thread no longer has an x-mode
- lock */
-
- ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
-
+ ulint x_lock_incr = rw_lock_x_prepare_unlock(
#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+ pass,
#endif
+ &lock->base_lock);
- if (lock->lock_word == 0) {
- lock->recursive = FALSE;
- }
+ if (rw_lock_lock_word_incr(&lock->base_lock, x_lock_incr)
+ == X_LOCK_DECR) {
+
+ /* Priority lock is now free. Signal any waiters in this
+ order: 1) high priority X waiters; 2) high priority S waiters;
+ 3) regular priority waiters.
+ We do not need to signal wait_ex waiters, since they cannot
+ exist when there is a writer. */
+
+ if (lock->high_priority_x_waiters) {
- lock->lock_word += X_LOCK_DECR;
+ lock->high_priority_x_waiters = 0;
+ os_event_set(lock->high_priority_x_event);
+ sync_array_object_signalled();
+ } else if (lock->high_priority_s_waiters) {
+
+ lock->high_priority_s_waiters = 0;
+ os_event_set(lock->high_priority_s_event);
+ sync_array_object_signalled();
+ } else if (lock->base_lock.waiters) {
+
+ rw_lock_reset_waiter_flag(&lock->base_lock);
+ os_event_set(lock->base_lock.event);
+ sync_array_object_signalled();
+ }
+ }
- ut_ad(!lock->waiters);
ut_ad(rw_lock_validate(lock));
#ifdef UNIV_SYNC_PERF_STAT
@@ -643,9 +822,42 @@ pfs_rw_lock_create_func(
const char* cmutex_name) /*!< in: mutex name */
{
/* Initialize the rwlock for performance schema */
- lock->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key))
- ? PSI_server->init_rwlock(key, lock)
- : NULL;
+ lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
+
+ /* The actual function to initialize an rwlock */
+ rw_lock_create_func(lock,
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ level,
+# endif /* UNIV_SYNC_DEBUG */
+ cfile_name,
+ cline,
+# endif /* UNIV_DEBUG */
+ cmutex_name);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_create_func().
+NOTE! Please use the corresponding macro rw_lock_create(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_create_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: key registered with
+ performance schema */
+ prio_rw_lock_t* lock, /*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline, /*!< in: file line where created */
+# endif /* UNIV_DEBUG */
+ const char* cmutex_name) /*!< in: mutex name */
+{
+ /* Initialize the rwlock for performance schema */
+ lock->base_lock.pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
/* The actual function to initialize an rwlock */
rw_lock_create_func(lock,
@@ -658,6 +870,7 @@ pfs_rw_lock_create_func(
# endif /* UNIV_DEBUG */
cmutex_name);
}
+
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_lock_func()
NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
@@ -672,26 +885,61 @@ pfs_rw_lock_x_lock_func(
const char* file_name,/*!< in: file name where lock requested */
ulint line) /*!< in: line where requested */
{
- struct PSI_rwlock_locker* locker = NULL;
- PSI_rwlock_locker_state state;
+ if (lock->pfs_psi != NULL)
+ {
+ PSI_rwlock_locker* locker;
+ PSI_rwlock_locker_state state;
- /* Record the entry of rw x lock request in performance schema */
- if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
- locker = PSI_server->get_thread_rwlock_locker(
- &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK);
+ /* Record the entry of rw x lock request in performance schema */
+ locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
+ &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, file_name, line);
- if (locker) {
- PSI_server->start_rwlock_wrwait(locker,
- file_name, line);
- }
+ rw_lock_x_lock_func(lock, pass, file_name, line);
+
+ if (locker != NULL)
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
}
+ else
+ {
+ rw_lock_x_lock_func(lock, pass, file_name, line);
+ }
+}
- rw_lock_x_lock_func(lock, pass, file_name, line);
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
+{
+ if (lock->base_lock.pfs_psi != NULL)
+ {
+ PSI_rwlock_locker* locker;
+ PSI_rwlock_locker_state state;
+
+ /* Record the entry of rw x lock request in performance schema */
+ locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
+ &state, lock->base_lock.pfs_psi, PSI_RWLOCK_WRITELOCK,
+ file_name, line);
- if (locker) {
- PSI_server->end_rwlock_wrwait(locker, 0);
+ rw_lock_x_lock_func(lock, pass, file_name, line);
+
+ if (locker != NULL)
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
+ }
+ else
+ {
+ rw_lock_x_lock_func(lock, pass, file_name, line);
}
}
+
/******************************************************************//**
Performance schema instrumented wrap function for
rw_lock_x_lock_func_nowait()
@@ -707,25 +955,25 @@ pfs_rw_lock_x_lock_func_nowait(
requested */
ulint line) /*!< in: line where requested */
{
- struct PSI_rwlock_locker* locker = NULL;
- PSI_rwlock_locker_state state;
ibool ret;
- /* Record the entry of rw x lock request in performance schema */
- if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
- locker = PSI_server->get_thread_rwlock_locker(
- &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK);
+ if (lock->pfs_psi != NULL)
+ {
+ PSI_rwlock_locker* locker;
+ PSI_rwlock_locker_state state;
- if (locker) {
- PSI_server->start_rwlock_wrwait(locker,
- file_name, line);
- }
- }
+ /* Record the entry of rw x lock request in performance schema */
+ locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
+ &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, file_name, line);
- ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
+ ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
- if (locker) {
- PSI_server->end_rwlock_wrwait(locker, 0);
+ if (locker != NULL)
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, ret);
+ }
+ else
+ {
+ ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
}
return(ret);
@@ -740,13 +988,34 @@ pfs_rw_lock_free_func(
/*==================*/
rw_lock_t* lock) /*!< in: pointer to rw-lock */
{
- if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
- PSI_server->destroy_rwlock(lock->pfs_psi);
+ if (lock->pfs_psi != NULL)
+ {
+ PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi);
lock->pfs_psi = NULL;
}
rw_lock_free_func(lock);
}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_free_func()
+NOTE! Please use the corresponding macro rw_lock_free(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_free_func(
+/*==================*/
+ prio_rw_lock_t* lock) /*!< in: pointer to rw-lock */
+{
+ if (lock->base_lock.pfs_psi != NULL)
+ {
+ PSI_RWLOCK_CALL(destroy_rwlock)(lock->base_lock.pfs_psi);
+ lock->base_lock.pfs_psi = NULL;
+ }
+
+ rw_lock_free_func(lock);
+}
+
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not
@@ -763,25 +1032,67 @@ pfs_rw_lock_s_lock_func(
requested */
ulint line) /*!< in: line where requested */
{
- struct PSI_rwlock_locker* locker = NULL;
- PSI_rwlock_locker_state state;
+ if (lock->pfs_psi != NULL)
+ {
+ PSI_rwlock_locker* locker;
+ PSI_rwlock_locker_state state;
- /* Instrumented to inform we are aquiring a shared rwlock */
- if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
- locker = PSI_server->get_thread_rwlock_locker(
- &state, lock->pfs_psi, PSI_RWLOCK_READLOCK);
- if (locker) {
- PSI_server->start_rwlock_rdwait(locker,
- file_name, line);
- }
+ /* Instrumented to inform we are aquiring a shared rwlock */
+ locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
+ &state, lock->pfs_psi, PSI_RWLOCK_READLOCK, file_name, line);
+
+ rw_lock_s_lock_func(lock, pass, file_name, line);
+
+ if (locker != NULL)
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
+ }
+ else
+ {
+ rw_lock_s_lock_func(lock, pass, file_name, line);
}
- rw_lock_s_lock_func(lock, pass, file_name, line);
+ return;
+}
- if (locker) {
- PSI_server->end_rwlock_rdwait(locker, 0);
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_lock_func(
+/*====================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock will be passed to another
+ thread to unlock */
+ const char* file_name,/*!< in: file name where lock
+ requested */
+ ulint line) /*!< in: line where requested */
+{
+ if (lock->base_lock.pfs_psi != NULL)
+ {
+ PSI_rwlock_locker* locker;
+ PSI_rwlock_locker_state state;
+
+ /* Instrumented to inform we are aquiring a shared rwlock */
+ locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
+ &state, lock->base_lock.pfs_psi, PSI_RWLOCK_READLOCK,
+ file_name, line);
+
+ rw_lock_s_lock_func(lock, pass, file_name, line);
+
+ if (locker != NULL)
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
}
+ else
+ {
+ rw_lock_s_lock_func(lock, pass, file_name, line);
+ }
+
+ return;
}
+
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not
@@ -798,30 +1109,51 @@ pfs_rw_lock_s_lock_low(
const char* file_name, /*!< in: file name where lock requested */
ulint line) /*!< in: line where requested */
{
- struct PSI_rwlock_locker* locker = NULL;
- PSI_rwlock_locker_state state;
ibool ret;
- /* Instrumented to inform we are aquiring a shared rwlock */
- if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
- locker = PSI_server->get_thread_rwlock_locker(
- &state, lock->pfs_psi, PSI_RWLOCK_READLOCK);
- if (locker) {
- PSI_server->start_rwlock_rdwait(locker,
- file_name, line);
- }
- }
+ if (lock->pfs_psi != NULL)
+ {
+ PSI_rwlock_locker* locker;
+ PSI_rwlock_locker_state state;
- ret = rw_lock_s_lock_low(lock, pass, file_name, line);
+ /* Instrumented to inform we are aquiring a shared rwlock */
+ locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
+ &state, lock->pfs_psi, PSI_RWLOCK_READLOCK, file_name, line);
- if (locker) {
- PSI_server->end_rwlock_rdwait(locker, 0);
+ ret = rw_lock_s_lock_low(lock, pass, file_name, line);
+
+ if (locker != NULL)
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, ret);
+ }
+ else
+ {
+ ret = rw_lock_s_lock_low(lock, pass, file_name, line);
}
return(ret);
}
/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not
+directly this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_s_lock_low(
+/*===================*/
+ prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock will be passed to another
+ thread to unlock */
+ const char* file_name, /*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
+{
+ return(pfs_rw_lock_s_lock_low(&lock->base_lock, pass,
+ file_name, line));
+}
+
+/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_unlock_func()
NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
this function! */
@@ -837,9 +1169,34 @@ pfs_rw_lock_x_unlock_func(
rw_lock_t* lock) /*!< in/out: rw-lock */
{
/* Inform performance schema we are unlocking the lock */
- if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
- PSI_server->unlock_rwlock(lock->pfs_psi);
- }
+ if (lock->pfs_psi != NULL)
+ PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+
+ rw_lock_x_unlock_func(
+#ifdef UNIV_SYNC_DEBUG
+ pass,
+#endif
+ lock);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif
+ prio_rw_lock_t* lock) /*!< in/out: rw-lock */
+{
+ /* Inform performance schema we are unlocking the lock */
+ if (lock->base_lock.pfs_psi != NULL)
+ PSI_RWLOCK_CALL(unlock_rwlock)(lock->base_lock.pfs_psi);
rw_lock_x_unlock_func(
#ifdef UNIV_SYNC_DEBUG
@@ -864,9 +1221,8 @@ pfs_rw_lock_s_unlock_func(
rw_lock_t* lock) /*!< in/out: rw-lock */
{
/* Inform performance schema we are unlocking the lock */
- if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
- PSI_server->unlock_rwlock(lock->pfs_psi);
- }
+ if (lock->pfs_psi != NULL)
+ PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
rw_lock_s_unlock_func(
#ifdef UNIV_SYNC_DEBUG
@@ -875,4 +1231,32 @@ pfs_rw_lock_s_unlock_func(
lock);
}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif
+ prio_rw_lock_t* lock) /*!< in/out: rw-lock */
+{
+ /* Inform performance schema we are unlocking the lock */
+ if (lock->base_lock.pfs_psi != NULL)
+ PSI_RWLOCK_CALL(unlock_rwlock)(lock->base_lock.pfs_psi);
+
+ rw_lock_s_unlock_func(
+#ifdef UNIV_SYNC_DEBUG
+ pass,
+#endif
+ lock);
+
+}
+
#endif /* UNIV_PFS_RWLOCK */
diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h
index b3b99b10630..f54c6d59af9 100644
--- a/storage/xtradb/include/sync0sync.h
+++ b/storage/xtradb/include/sync0sync.h
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2012, Facebook Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -40,9 +41,10 @@ Created 9/5/1995 Heikki Tuuri
#include "os0thread.h"
#include "os0sync.h"
#include "sync0arr.h"
+#include "ut0counter.h"
#if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
-extern my_bool timed_mutexes;
+extern "C" my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef _WIN32
@@ -53,25 +55,19 @@ typedef byte lock_word_t;
#endif
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
-/* There are mutexes/rwlocks that we want to exclude from
-instrumentation even if their corresponding performance schema
-define is set. And this PFS_NOT_INSTRUMENTED is used
-as the key value to dentify those objects that would
-be excluded from instrumentation. */
-# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED
-
-# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED)
/* By default, buffer mutexes and rwlocks will be excluded from
instrumentation due to their large number of instances. */
# define PFS_SKIP_BUFFER_MUTEX_RWLOCK
+/* By default, event->mutex will also be excluded from instrumentation */
+# define PFS_SKIP_EVENT_MUTEX
+
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
/* Key defines to register InnoDB mutexes with performance schema */
extern mysql_pfs_key_t autoinc_mutex_key;
-extern mysql_pfs_key_t btr_search_enabled_mutex_key;
extern mysql_pfs_key_t buffer_block_mutex_key;
extern mysql_pfs_key_t buf_pool_mutex_key;
extern mysql_pfs_key_t buf_pool_zip_mutex_key;
@@ -79,12 +75,17 @@ extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
+extern mysql_pfs_key_t buf_pool_flush_state_mutex_key;
extern mysql_pfs_key_t cache_last_read_mutex_key;
extern mysql_pfs_key_t dict_foreign_err_mutex_key;
extern mysql_pfs_key_t dict_sys_mutex_key;
extern mysql_pfs_key_t file_format_max_mutex_key;
extern mysql_pfs_key_t fil_system_mutex_key;
extern mysql_pfs_key_t flush_list_mutex_key;
+extern mysql_pfs_key_t fts_bg_threads_mutex_key;
+extern mysql_pfs_key_t fts_delete_mutex_key;
+extern mysql_pfs_key_t fts_optimize_mutex_key;
+extern mysql_pfs_key_t fts_doc_id_mutex_key;
extern mysql_pfs_key_t hash_table_mutex_key;
extern mysql_pfs_key_t ibuf_bitmap_mutex_key;
extern mysql_pfs_key_t ibuf_mutex_key;
@@ -92,7 +93,9 @@ extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
extern mysql_pfs_key_t log_bmp_sys_mutex_key;
extern mysql_pfs_key_t log_sys_mutex_key;
extern mysql_pfs_key_t log_flush_order_mutex_key;
-extern mysql_pfs_key_t kernel_mutex_key;
+# ifndef HAVE_ATOMIC_BUILTINS
+extern mysql_pfs_key_t server_mutex_key;
+# endif /* !HAVE_ATOMIC_BUILTINS */
# ifdef UNIV_MEM_DEBUG
extern mysql_pfs_key_t mem_hash_mutex_key;
# endif /* UNIV_MEM_DEBUG */
@@ -100,6 +103,7 @@ extern mysql_pfs_key_t mem_pool_mutex_key;
extern mysql_pfs_key_t mutex_list_mutex_key;
extern mysql_pfs_key_t purge_sys_bh_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
+extern mysql_pfs_key_t recv_writer_mutex_key;
extern mysql_pfs_key_t rseg_mutex_key;
# ifdef UNIV_SYNC_DEBUG
extern mysql_pfs_key_t rw_lock_debug_mutex_key;
@@ -109,13 +113,29 @@ extern mysql_pfs_key_t rw_lock_mutex_key;
extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
+extern mysql_pfs_key_t srv_threads_mutex_key;
extern mysql_pfs_key_t srv_monitor_file_mutex_key;
-extern mysql_pfs_key_t syn_arr_mutex_key;
# ifdef UNIV_SYNC_DEBUG
extern mysql_pfs_key_t sync_thread_mutex_key;
# endif /* UNIV_SYNC_DEBUG */
-extern mysql_pfs_key_t trx_doublewrite_mutex_key;
+extern mysql_pfs_key_t buf_dblwr_mutex_key;
extern mysql_pfs_key_t trx_undo_mutex_key;
+extern mysql_pfs_key_t trx_mutex_key;
+extern mysql_pfs_key_t lock_sys_mutex_key;
+extern mysql_pfs_key_t lock_sys_wait_mutex_key;
+extern mysql_pfs_key_t trx_sys_mutex_key;
+extern mysql_pfs_key_t srv_sys_mutex_key;
+extern mysql_pfs_key_t srv_sys_tasks_mutex_key;
+#ifndef HAVE_ATOMIC_BUILTINS
+extern mysql_pfs_key_t srv_conc_mutex_key;
+#endif /* !HAVE_ATOMIC_BUILTINS */
+#ifndef HAVE_ATOMIC_BUILTINS_64
+extern mysql_pfs_key_t monitor_mutex_key;
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
+extern mysql_pfs_key_t event_os_mutex_key;
+extern mysql_pfs_key_t ut_list_mutex_key;
+extern mysql_pfs_key_t os_mutex_key;
+extern mysql_pfs_key_t zip_pad_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/******************************************************************//**
@@ -140,6 +160,8 @@ if "UNIV_PFS_MUTEX" is defined:
mutex_create
mutex_enter
+mutex_enter_first
+mutex_enter_last
mutex_exit
mutex_enter_nowait
mutex_free
@@ -175,6 +197,12 @@ necessary only if the memory block containing it is freed. */
# define mutex_enter_nowait(M) \
pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__)
+# define mutex_enter_first(M) \
+ pfs_mutex_enter_func((M), __FILE__, __LINE__, HIGH_PRIO)
+
+# define mutex_enter_last(M) \
+ pfs_mutex_enter_func((M), __FILE__, __LINE__, LOW_PRIO)
+
# define mutex_exit(M) pfs_mutex_exit_func(M)
# define mutex_free(M) pfs_mutex_free_func(M)
@@ -201,6 +229,12 @@ original non-instrumented functions */
# define mutex_enter_nowait(M) \
mutex_enter_nowait_func((M), __FILE__, __LINE__)
+# define mutex_enter_first(M) \
+ mutex_enter_func((M), __FILE__, __LINE__, HIGH_PRIO)
+
+# define mutex_enter_last(M) \
+ mutex_enter_func((M), __FILE__, __LINE__, LOW_PRIO)
+
# define mutex_exit(M) mutex_exit_func(M)
# define mutex_free(M) mutex_free_func(M)
@@ -216,7 +250,7 @@ UNIV_INTERN
void
mutex_create_func(
/*==============*/
- mutex_t* mutex, /*!< in: pointer to memory */
+ ib_mutex_t* mutex, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
@@ -227,6 +261,26 @@ mutex_create_func(
const char* cmutex_name); /*!< in: mutex name */
/******************************************************************//**
+Creates, or rather, initializes a priority mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+UNIV_INTERN
+void
+mutex_create_func(
+/*==============*/
+ ib_prio_mutex_t* mutex, /*!< in: pointer to memory */
+#ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where
+ created */
+ ulint cline, /*!< in: file line where
+ created */
+#endif /* UNIV_DEBUG */
+ const char* cmutex_name); /*!< in: mutex name */
+/******************************************************************//**
NOTE! Use the corresponding macro mutex_free(), not directly this function!
Calling this function is obligatory only if the memory buffer containing
the mutex is freed. Removes a mutex object from the mutex list. The mutex
@@ -235,7 +289,17 @@ UNIV_INTERN
void
mutex_free_func(
/*============*/
- mutex_t* mutex); /*!< in: mutex */
+ ib_mutex_t* mutex); /*!< in: mutex */
+/******************************************************************//**
+NOTE! Use the corresponding macro mutex_free(), not directly this function!
+Calling this function is obligatory only if the memory buffer containing
+the mutex is freed. Removes a priority mutex object from the mutex list. The
+mutex is checked to be in the reset state. */
+UNIV_INTERN
+void
+mutex_free_func(
+/*============*/
+ ib_prio_mutex_t* mutex); /*!< in: mutex */
/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
@@ -252,9 +316,29 @@ UNIV_INLINE
void
mutex_enter_func(
/*=============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line); /*!< in: line where locked */
+/******************************************************************//**
+NOTE! Use the corresponding macro in the header file, not this function
+directly. Locks a priority mutex for the current thread. If the mutex is
+reserved the function spins a preset time (controlled by SYNC_SPIN_ROUNDS)
+waiting for the mutex before suspending the thread. If the thread is suspended,
+the priority argument value determines the relative order for its wake up. Any
+HIGH_PRIO waiters will be woken up before any LOW_PRIO waiters. In case of
+DEFAULT_PRIO, the relative priority will be set according to
+srv_current_thread_priority. */
+UNIV_INLINE
+void
+mutex_enter_func(
+/*=============*/
+ ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where
+ locked */
+ ulint line, /*!< in: line where locked */
+ enum ib_sync_priority priority = DEFAULT_PRIO);
+ /*!<in: mutex acquisition
+ priority */
/********************************************************************//**
NOTE! Use the corresponding macro in the header file, not this function
directly. Tries to lock the mutex for the current thread. If the lock is not
@@ -264,10 +348,24 @@ UNIV_INTERN
ulint
mutex_enter_nowait_func(
/*====================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line); /*!< in: line where requested */
+/********************************************************************//**
+NOTE! Use the corresponding macro in the header file, not this function
+directly. Tries to lock the mutex for the current thread. If the lock is not
+acquired immediately, returns with return value 1.
+@return 0 if succeed, 1 if not */
+UNIV_INTERN
+ulint
+mutex_enter_nowait_func(
+/*====================*/
+ ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where mutex
+ requested */
+ ulint line); /*!< in: line where
+ requested */
/******************************************************************//**
NOTE! Use the corresponding macro mutex_exit(), not directly this function!
Unlocks a mutex owned by the current thread. */
@@ -275,7 +373,15 @@ UNIV_INLINE
void
mutex_exit_func(
/*============*/
- mutex_t* mutex); /*!< in: pointer to mutex */
+ ib_mutex_t* mutex); /*!< in: pointer to mutex */
+/******************************************************************//**
+NOTE! Use the corresponding macro mutex_exit(), not directly this function!
+Unlocks a priority mutex owned by the current thread. */
+UNIV_INLINE
+void
+mutex_exit_func(
+/*============*/
+ ib_prio_mutex_t* mutex); /*!< in: pointer to mutex */
#ifdef UNIV_PFS_MUTEX
@@ -290,7 +396,7 @@ void
pfs_mutex_create_func(
/*==================*/
PSI_mutex_key key, /*!< in: Performance Schema key */
- mutex_t* mutex, /*!< in: pointer to memory */
+ ib_mutex_t* mutex, /*!< in: pointer to memory */
# ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
@@ -300,6 +406,29 @@ pfs_mutex_create_func(
# endif /* UNIV_DEBUG */
const char* cmutex_name);
/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_create(), not directly
+this function!
+A wrapper function for mutex_create_func(), registers the mutex
+with peformance schema if "UNIV_PFS_MUTEX" is defined when
+creating the performance mutex */
+UNIV_INLINE
+void
+pfs_mutex_create_func(
+/*==================*/
+ PSI_mutex_key key, /*!< in: Performance Schema
+ key */
+ ib_prio_mutex_t* mutex, /*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where
+ created */
+ ulint cline, /*!< in: file line where
+ created */
+# endif /* UNIV_DEBUG */
+ const char* cmutex_name);
+/******************************************************************//**
NOTE! Please use the corresponding macro mutex_enter(), not directly
this function!
This is a performance schema instrumented wrapper function for
@@ -308,9 +437,25 @@ UNIV_INLINE
void
pfs_mutex_enter_func(
/*=================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line); /*!< in: line where locked */
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_func(). */
+UNIV_INLINE
+void
+pfs_mutex_enter_func(
+/*=================*/
+ ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where
+ locked */
+ ulint line, /*!< in: line where locked */
+ enum ib_sync_priority priority = DEFAULT_PRIO);
+ /*!<in: mutex acquisition
+ priority */
/********************************************************************//**
NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
this function!
@@ -321,10 +466,25 @@ UNIV_INLINE
ulint
pfs_mutex_enter_nowait_func(
/*========================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line); /*!< in: line where requested */
+/********************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_nowait_func.
+@return 0 if succeed, 1 if not */
+UNIV_INLINE
+ulint
+pfs_mutex_enter_nowait_func(
+/*========================*/
+ ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where mutex
+ requested */
+ ulint line); /*!< in: line where
+ requested */
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_exit(), not directly
this function!
@@ -334,7 +494,17 @@ UNIV_INLINE
void
pfs_mutex_exit_func(
/*================*/
- mutex_t* mutex); /*!< in: pointer to mutex */
+ ib_mutex_t* mutex); /*!< in: pointer to mutex */
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_exit(), not directly
+this function!
+A wrap function of mutex_exit_func() with peformance schema instrumentation.
+Unlocks a priority mutex owned by the current thread. */
+UNIV_INLINE
+void
+pfs_mutex_exit_func(
+/*================*/
+ ib_prio_mutex_t* mutex); /*!< in: pointer to mutex */
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_free(), not directly
@@ -345,7 +515,17 @@ UNIV_INLINE
void
pfs_mutex_free_func(
/*================*/
- mutex_t* mutex); /*!< in: mutex */
+ ib_mutex_t* mutex); /*!< in: mutex */
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_free(), not directly
+this function!
+Wrapper function for mutex_free_func(). Also destroys the performance
+schema probes when freeing the priority mutex */
+UNIV_INLINE
+void
+pfs_mutex_free_func(
+/*================*/
+ ib_prio_mutex_t* mutex); /*!< in: mutex */
#endif /* UNIV_PFS_MUTEX */
@@ -383,7 +563,7 @@ UNIV_INTERN
ibool
mutex_validate(
/*===========*/
- const mutex_t* mutex); /*!< in: mutex */
+ const ib_mutex_t* mutex); /*!< in: mutex */
/******************************************************************//**
Checks that the current thread owns the mutex. Works only
in the debug version.
@@ -392,7 +572,17 @@ UNIV_INTERN
ibool
mutex_own(
/*======*/
- const mutex_t* mutex) /*!< in: mutex */
+ const ib_mutex_t* mutex) /*!< in: mutex */
+ __attribute__((warn_unused_result));
+/******************************************************************//**
+Checks that the current thread owns the priority mutex. Works only
+in the debug version.
+@return TRUE if owns */
+UNIV_INTERN
+ibool
+mutex_own(
+/*======*/
+ const ib_prio_mutex_t* mutex) /*!< in: priority mutex */
__attribute__((warn_unused_result));
#endif /* UNIV_DEBUG */
#ifdef UNIV_SYNC_DEBUG
@@ -463,7 +653,7 @@ UNIV_INTERN
void
mutex_get_debug_info(
/*=================*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
const char** file_name, /*!< out: file where requested */
ulint* line, /*!< out: line where requested */
os_thread_id_t* thread_id); /*!< out: id of the thread which owns
@@ -483,7 +673,7 @@ UNIV_INLINE
lock_word_t
mutex_get_lock_word(
/*================*/
- const mutex_t* mutex); /*!< in: mutex */
+ const ib_mutex_t* mutex); /*!< in: mutex */
#ifdef UNIV_SYNC_DEBUG
/******************************************************************//**
NOT to be used outside this module except in debugging! Gets the waiters
@@ -493,7 +683,7 @@ UNIV_INLINE
ulint
mutex_get_waiters(
/*==============*/
- const mutex_t* mutex); /*!< in: mutex */
+ const ib_mutex_t* mutex); /*!< in: mutex */
#endif /* UNIV_SYNC_DEBUG */
/*
@@ -596,15 +786,28 @@ V
File system pages
|
V
-Kernel mutex If a kernel operation needs a file
-| page allocation, it must reserve the
-| fsp x-latch before acquiring the kernel
-| mutex.
+lock_sys_wait_mutex Mutex protecting lock timeout data
+|
+V
+lock_sys_mutex Mutex protecting lock_sys_t
+|
+V
+trx_sys->mutex Mutex protecting trx_sys_t
+|
+V
+Threads mutex Background thread scheduling mutex
+|
+V
+query_thr_mutex Mutex protecting query threads
+|
+V
+trx_mutex Mutex protecting trx_t fields
+|
V
Search system mutex
|
V
-Buffer pool mutex
+Buffer pool mutexes
|
V
Log mutex
@@ -614,7 +817,8 @@ Any other latch
V
Memory pool mutex */
-/* Latching order levels */
+/* Latching order levels. If you modify these, you have to also update
+sync_thread_add_level(). */
/* User transaction locks are higher than any of the latch levels below:
no latches are allowed when a thread goes to wait for a normal table
@@ -634,12 +838,14 @@ or row lock! */
trx_i_s_cache_t::last_read_mutex */
#define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the
file format tag */
-#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve
+#define SYNC_DICT_OPERATION 1010 /* table create, drop, etc. reserve
this in X-mode; implicit or backround
operations purge, rollback, foreign
key checks reserve this in S-mode */
+#define SYNC_FTS_CACHE 1005 /* FTS cache rwlock */
#define SYNC_DICT 1000
#define SYNC_DICT_AUTOINC_MUTEX 999
+#define SYNC_STATS_AUTO_RECALC 997
#define SYNC_DICT_HEADER 995
#define SYNC_IBUF_HEADER 914
#define SYNC_IBUF_PESS_INSERT_MUTEX 912
@@ -657,26 +863,34 @@ or row lock! */
#define SYNC_EXTERN_STORAGE 500
#define SYNC_FSP 400
#define SYNC_FSP_PAGE 395
-/*------------------------------------- Insert buffer headers */
+/*------------------------------------- Change buffer headers */
#define SYNC_IBUF_MUTEX 370 /* ibuf_mutex */
-/*------------------------------------- Insert buffer tree */
+/*------------------------------------- Change buffer tree */
#define SYNC_IBUF_INDEX_TREE 360
#define SYNC_IBUF_TREE_NODE_NEW 359
#define SYNC_IBUF_TREE_NODE 358
#define SYNC_IBUF_BITMAP_MUTEX 351
#define SYNC_IBUF_BITMAP 350
+/*------------------------------------- Change log for online create index */
+#define SYNC_INDEX_ONLINE_LOG 340
/*------------------------------------- MySQL query cache mutex */
/*------------------------------------- MySQL binlog mutex */
/*-------------------------------*/
-#define SYNC_KERNEL 300
-#define SYNC_REC_LOCK 299
-#define SYNC_TRX_LOCK_HEAP 298
+#define SYNC_LOCK_WAIT_SYS 300
+#define SYNC_LOCK_SYS 299
+#define SYNC_TRX_SYS 298
+#define SYNC_TRX 297
+#define SYNC_THREADS 295
+#define SYNC_REC_LOCK 294
#define SYNC_TRX_SYS_HEADER 290
#define SYNC_PURGE_QUEUE 200
#define SYNC_LOG_ONLINE 175
#define SYNC_LOG 170
-#define SYNC_LOG_FLUSH_ORDER 156
+#define SYNC_LOG_FLUSH_ORDER 147
#define SYNC_RECV 168
+#define SYNC_FTS_CACHE_INIT 166 /* Used for FTS cache initialization */
+#define SYNC_FTS_BG_THREADS 165
+#define SYNC_FTS_OPTIMIZE 164 // FIXME: is this correct number, test
#define SYNC_WORK_QUEUE 162
#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
heap that can be extended to the
@@ -684,16 +898,15 @@ or row lock! */
SYNC_SEARCH_SYS, as memory allocation
can call routines there! Otherwise
the level is SYNC_MEM_HASH. */
-#define SYNC_BUF_LRU_LIST 158
-#define SYNC_BUF_PAGE_HASH 157
-#define SYNC_BUF_BLOCK 155 /* Block mutex */
-#define SYNC_BUF_FREE_LIST 153
-#define SYNC_BUF_ZIP_FREE 152
-#define SYNC_BUF_ZIP_HASH 151
-#define SYNC_BUF_POOL 150 /* Buffer pool mutex */
-#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
-#define SYNC_DOUBLEWRITE 140
-#define SYNC_OUTER_ANY_LATCH 136
+#define SYNC_BUF_LRU_LIST 151
+#define SYNC_BUF_PAGE_HASH 149 /* buf_pool->page_hash rw_lock */
+#define SYNC_BUF_BLOCK 146 /* Block mutex */
+#define SYNC_BUF_FREE_LIST 145
+#define SYNC_BUF_ZIP_FREE 144
+#define SYNC_BUF_ZIP_HASH 143
+#define SYNC_BUF_FLUSH_STATE 142
+#define SYNC_BUF_FLUSH_LIST 141 /* Buffer flush list mutex */
+#define SYNC_DOUBLEWRITE 139
#define SYNC_ANY_LATCH 135
#define SYNC_MEM_HASH 131
#define SYNC_MEM_POOL 130
@@ -705,14 +918,17 @@ or row lock! */
#define RW_LOCK_SHARED 352
#define RW_LOCK_WAIT_EX 353
#define SYNC_MUTEX 354
+#define SYNC_PRIO_MUTEX 355
+#define PRIO_RW_LOCK_EX 356
+#define PRIO_RW_LOCK_SHARED 357
/* NOTE! The structure appears here only for the compiler to know its size.
Do not use its fields directly! The structure used in the spin lock
implementation of a mutual exclusion semaphore. */
/** InnoDB mutex */
-struct mutex_struct {
- os_event_t event; /*!< Used by sync0arr.c for the wait queue */
+struct ib_mutex_t {
+ os_event_t event; /*!< Used by sync0arr.cc for the wait queue */
volatile lock_word_t lock_word; /*!< lock_word is the target
of the atomic test-and-set instruction when
atomic operations are enabled. */
@@ -722,11 +938,11 @@ struct mutex_struct {
os_fast_mutex; /*!< We use this OS mutex in place of lock_word
when atomic operations are not enabled */
#endif
- volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
+ ulint waiters; /*!< This ulint is set to 1 if there are (or
may be) threads waiting in the global wait
array for this mutex to be released.
Otherwise, this is 0. */
- UT_LIST_NODE_T(mutex_t) list; /*!< All allocated mutexes are put into
+ UT_LIST_NODE_T(ib_mutex_t) list; /*!< All allocated mutexes are put into
a list. Pointers to the next and prev. */
#ifdef UNIV_SYNC_DEBUG
const char* file_name; /*!< File where the mutex was locked */
@@ -736,21 +952,17 @@ struct mutex_struct {
#ifdef UNIV_DEBUG
const char* cfile_name;/*!< File name where mutex created */
ulint cline; /*!< Line where created */
+#endif
+ ulong count_os_wait; /*!< count of os_wait */
+#ifdef UNIV_DEBUG
+
+/** Value of mutex_t::magic_n */
+# define MUTEX_MAGIC_N 979585UL
+
os_thread_id_t thread_id; /*!< The thread id of the thread
which locked the mutex. */
ulint magic_n; /*!< MUTEX_MAGIC_N */
-/** Value of mutex_struct::magic_n */
-# define MUTEX_MAGIC_N (ulint)979585
-#endif /* UNIV_DEBUG */
- ulong count_os_wait; /*!< count of os_wait */
-#ifdef UNIV_DEBUG
- ulong count_using; /*!< count of times mutex used */
- ulong count_spin_loop; /*!< count of spin loops */
- ulong count_spin_rounds;/*!< count of spin rounds */
- ulong count_os_yield; /*!< count of os_wait */
- ulonglong lspent_time; /*!< mutex os_wait timer msec */
- ulonglong lmax_spent_time;/*!< mutex os_wait timer msec */
- ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
+ ulint ib_mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
#endif /* UNIV_DEBUG */
const char* cmutex_name; /*!< mutex name */
#ifdef UNIV_PFS_MUTEX
@@ -759,10 +971,19 @@ struct mutex_struct {
#endif
};
-/** The global array of wait cells for implementation of the databases own
-mutexes and read-write locks. */
-extern sync_array_t* sync_primary_wait_array;/* Appears here for
- debugging purposes only! */
+/** XtraDB priority mutex */
+struct ib_prio_mutex_t {
+ ib_mutex_t base_mutex; /* The regular mutex provides the lock
+ word etc. for the priority mutex */
+ os_event_t high_priority_event; /* High priority wait array
+ event */
+ volatile ulint high_priority_waiters; /* Set to 1 if there are (or
+ may be) threads that asked for this
+ mutex to be acquired with high priority
+ in the global wait array for this mutex
+ to be released. Otherwise, this is
+ 0. */
+};
/** Constant determining how long spin wait is continued before suspending
the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
@@ -770,9 +991,15 @@ to 20 microseconds. */
#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
-extern ib_int64_t mutex_spin_round_count;
-extern ib_int64_t mutex_spin_wait_count;
-extern ib_int64_t mutex_os_wait_count;
+/** The number of iterations in the mutex_spin_wait() spin loop.
+Intended for performance monitoring. */
+extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count;
+/** The number of mutex_spin_wait() calls. Intended for
+performance monitoring. */
+extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count;
+/** The number of OS waits in mutex_spin_wait(). Intended for
+performance monitoring. */
+extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count;
/** The number of mutex_exit calls. Intended for performance monitoring. */
extern ib_int64_t mutex_exit_count;
@@ -786,13 +1013,37 @@ extern ibool sync_order_checks_on;
extern ibool sync_initialized;
/** Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(mutex_t) ut_list_base_node_t;
+typedef UT_LIST_BASE_NODE_T(ib_mutex_t) ut_list_base_node_t;
/** Global list of database mutexes (not OS mutexes) created. */
extern ut_list_base_node_t mutex_list;
/** Mutex protecting the mutex_list variable */
-extern mutex_t mutex_list_mutex;
+extern ib_mutex_t mutex_list_mutex;
+#ifndef HAVE_ATOMIC_BUILTINS
+/**********************************************************//**
+Function that uses a mutex to decrement a variable atomically */
+UNIV_INLINE
+void
+os_atomic_dec_ulint_func(
+/*=====================*/
+ ib_mutex_t* mutex, /*!< in: mutex guarding the
+ decrement */
+ volatile ulint* var, /*!< in/out: variable to
+ decrement */
+ ulint delta); /*!< in: delta to decrement */
+/**********************************************************//**
+Function that uses a mutex to increment a variable atomically */
+UNIV_INLINE
+void
+os_atomic_inc_ulint_func(
+/*=====================*/
+ ib_mutex_t* mutex, /*!< in: mutex guarding the
+ increment */
+ volatile ulint* var, /*!< in/out: variable to
+ increment */
+ ulint delta); /*!< in: delta to increment */
+#endif /* !HAVE_ATOMIC_BUILTINS */
#ifndef UNIV_NONINL
#include "sync0sync.ic"
diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic
index 73e7379cac1..396005ec83a 100644
--- a/storage/xtradb/include/sync0sync.ic
+++ b/storage/xtradb/include/sync0sync.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -36,17 +36,20 @@ UNIV_INTERN
void
mutex_set_waiters(
/*==============*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
ulint n); /*!< in: value to set */
/******************************************************************//**
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
+Reserves a mutex or a priority mutex for the current thread. If the mutex is
+reserved, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS)
+waiting for the mutex before suspending the thread. */
UNIV_INTERN
void
mutex_spin_wait(
/*============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ void* _mutex, /*!< in: pointer to mutex */
+ bool high_priority, /*!< in: whether the mutex is a
+ priority mutex with high priority
+ specified */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line); /*!< in: line where requested */
@@ -57,7 +60,7 @@ UNIV_INTERN
void
mutex_set_debug_info(
/*=================*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
const char* file_name, /*!< in: file where requested */
ulint line); /*!< in: line where requested */
#endif /* UNIV_SYNC_DEBUG */
@@ -67,7 +70,7 @@ UNIV_INTERN
void
mutex_signal_object(
/*================*/
- mutex_t* mutex); /*!< in: mutex */
+ ib_mutex_t* mutex); /*!< in: mutex */
/******************************************************************//**
Performs an atomic test-and-set instruction to the lock_word field of a
@@ -75,9 +78,9 @@ mutex.
@return the previous value of lock_word: 0 or 1 */
UNIV_INLINE
byte
-mutex_test_and_set(
+ib_mutex_test_and_set(
/*===============*/
- mutex_t* mutex) /*!< in: mutex */
+ ib_mutex_t* mutex) /*!< in: mutex */
{
#if defined(HAVE_ATOMIC_BUILTINS)
return(os_atomic_test_and_set_byte(&mutex->lock_word, 1));
@@ -94,7 +97,7 @@ mutex_test_and_set(
mutex->lock_word = 1;
}
- return((byte)ret);
+ return((byte) ret);
#endif
}
@@ -105,7 +108,7 @@ UNIV_INLINE
void
mutex_reset_lock_word(
/*==================*/
- mutex_t* mutex) /*!< in: mutex */
+ ib_mutex_t* mutex) /*!< in: mutex */
{
#if defined(HAVE_ATOMIC_BUILTINS)
/* In theory __sync_lock_release should be used to release the lock.
@@ -125,7 +128,7 @@ UNIV_INLINE
lock_word_t
mutex_get_lock_word(
/*================*/
- const mutex_t* mutex) /*!< in: mutex */
+ const ib_mutex_t* mutex) /*!< in: mutex */
{
ut_ad(mutex);
@@ -139,7 +142,7 @@ UNIV_INLINE
ulint
mutex_get_waiters(
/*==============*/
- const mutex_t* mutex) /*!< in: mutex */
+ const ib_mutex_t* mutex) /*!< in: mutex */
{
const volatile ulint* ptr; /*!< declared volatile to ensure that
the value is read from memory */
@@ -158,7 +161,7 @@ UNIV_INLINE
void
mutex_exit_func(
/*============*/
- mutex_t* mutex) /*!< in: pointer to mutex */
+ ib_mutex_t* mutex) /*!< in: pointer to mutex */
{
ut_ad(mutex_own(mutex));
@@ -192,6 +195,55 @@ mutex_exit_func(
}
/******************************************************************//**
+NOTE! Use the corresponding macro mutex_exit(), not directly this function!
+Unlocks a priority mutex owned by the current thread. */
+UNIV_INLINE
+void
+mutex_exit_func(
+/*============*/
+ ib_prio_mutex_t* mutex) /*!< in: pointer to mutex */
+{
+ ut_ad(mutex_own(mutex));
+
+ ut_d(mutex->base_mutex.thread_id = (os_thread_id_t) ULINT_UNDEFINED);
+
+#ifdef UNIV_SYNC_DEBUG
+ sync_thread_reset_level(&mutex->base_mutex);
+#endif
+ mutex_reset_lock_word(&mutex->base_mutex);
+
+ /* A problem: we assume that mutex_reset_lock word
+ is a memory barrier, that is when we read the waiters
+ field next, the read must be serialized in memory
+ after the reset. A speculative processor might
+ perform the read first, which could leave a waiting
+ thread hanging indefinitely.
+
+ Our current solution call every second
+ sync_arr_wake_threads_if_sema_free()
+ to wake up possible hanging threads if
+ they are missed in mutex_signal_object. */
+
+ /* Wake up any high priority waiters first. */
+ if (mutex->high_priority_waiters != 0) {
+
+ mutex->high_priority_waiters = 0;
+ os_event_set(mutex->high_priority_event);
+ sync_array_object_signalled();
+
+ } else if (mutex_get_waiters(&mutex->base_mutex) != 0) {
+
+ mutex_signal_object(&mutex->base_mutex);
+ }
+
+#ifdef UNIV_SYNC_PERF_STAT
+ mutex_exit_count++;
+#endif
+
+}
+
+
+/******************************************************************//**
Locks a mutex for the current thread. If the mutex is reserved, the function
spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
before suspending the thread. */
@@ -199,7 +251,7 @@ UNIV_INLINE
void
mutex_enter_func(
/*=============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line) /*!< in: line where locked */
{
@@ -209,9 +261,7 @@ mutex_enter_func(
/* Note that we do not peek at the value of lock_word before trying
the atomic test_and_set; we could peek, and possibly save time. */
- ut_d(mutex->count_using++);
-
- if (!mutex_test_and_set(mutex)) {
+ if (!ib_mutex_test_and_set(mutex)) {
ut_d(mutex->thread_id = os_thread_get_curr_id());
#ifdef UNIV_SYNC_DEBUG
mutex_set_debug_info(mutex, file_name, line);
@@ -219,9 +269,55 @@ mutex_enter_func(
return; /* Succeeded! */
}
- mutex_spin_wait(mutex, file_name, line);
+ mutex_spin_wait(mutex, false, file_name, line);
}
+/******************************************************************//**
+NOTE! Use the corresponding macro in the header file, not this function
+directly. Locks a priority mutex for the current thread. If the mutex is
+reserved the function spins a preset time (controlled by SYNC_SPIN_ROUNDS)
+waiting for the mutex before suspending the thread. If the thread is suspended,
+the priority argument value determines the relative order for its wake up. Any
+HIGH_PRIO waiters will be woken up before any LOW_PRIO waiters. In case of
+DEFAULT_PRIO, the relative priority will be set according to
+srv_current_thread_priority. */
+UNIV_INLINE
+void
+mutex_enter_func(
+/*=============*/
+ ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where
+ locked */
+ ulint line, /*!< in: line where locked */
+ enum ib_sync_priority priority)
+ /*!<in: mutex acquisition
+ priority */
+{
+ bool high_priority;
+
+ ut_ad(mutex_validate(&mutex->base_mutex));
+ ut_ad(!mutex_own(mutex));
+
+ /* Note that we do not peek at the value of lock_word before trying
+ the atomic test_and_set; we could peek, and possibly save time. */
+
+ if (!ib_mutex_test_and_set(&mutex->base_mutex)) {
+ ut_d(mutex->base_mutex.thread_id = os_thread_get_curr_id());
+#ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(&mutex->base_mutex, file_name, line);
+#endif
+ return; /* Succeeded! */
+ }
+
+ if (UNIV_LIKELY(priority == DEFAULT_PRIO)) {
+ high_priority = srv_current_thread_priority;
+ } else {
+ high_priority = (priority == HIGH_PRIO);
+ }
+ mutex_spin_wait(mutex, high_priority, file_name, line);
+}
+
+
#ifdef UNIV_PFS_MUTEX
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_enter(), not directly
@@ -232,28 +328,62 @@ UNIV_INLINE
void
pfs_mutex_enter_func(
/*=================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line) /*!< in: line where locked */
{
- struct PSI_mutex_locker* locker = NULL;
- PSI_mutex_locker_state state;
- int result = 0;
-
- if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
- locker = PSI_server->get_thread_mutex_locker(
- &state, mutex->pfs_psi, PSI_MUTEX_LOCK);
- if (locker) {
- PSI_server->start_mutex_wait(locker, file_name, line);
+ if (mutex->pfs_psi != NULL) {
+ PSI_mutex_locker* locker;
+ PSI_mutex_locker_state state;
+
+ locker = PSI_MUTEX_CALL(start_mutex_wait)(
+ &state, mutex->pfs_psi,
+ PSI_MUTEX_LOCK, file_name, line);
+
+ mutex_enter_func(mutex, file_name, line);
+
+ if (locker != NULL) {
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
}
+ } else {
+ mutex_enter_func(mutex, file_name, line);
}
+}
- mutex_enter_func(mutex, file_name, line);
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_func(). */
+UNIV_INLINE
+void
+pfs_mutex_enter_func(
+/*=================*/
+ ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where
+ locked */
+ ulint line, /*!< in: line where locked */
+ enum ib_sync_priority priority) /*!<in: mutex acquisition
+ priority */
+{
+ if (mutex->base_mutex.pfs_psi != NULL) {
+ PSI_mutex_locker* locker;
+ PSI_mutex_locker_state state;
- if (locker) {
- PSI_server->end_mutex_wait(locker, result);
+ locker = PSI_MUTEX_CALL(start_mutex_wait)(
+ &state, mutex->base_mutex.pfs_psi,
+ PSI_MUTEX_LOCK, file_name, line);
+
+ mutex_enter_func(mutex, file_name, line, priority);
+
+ if (locker != NULL) {
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
+ }
+ } else {
+ mutex_enter_func(mutex, file_name, line, priority);
}
}
+
/********************************************************************//**
NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
this function!
@@ -264,31 +394,53 @@ UNIV_INLINE
ulint
pfs_mutex_enter_nowait_func(
/*========================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line) /*!< in: line where requested */
{
- ulint ret;
- struct PSI_mutex_locker* locker = NULL;
- PSI_mutex_locker_state state;
-
- if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
- locker = PSI_server->get_thread_mutex_locker(
- &state, mutex->pfs_psi, PSI_MUTEX_TRYLOCK);
- if (locker) {
- PSI_server->start_mutex_wait(locker, file_name, line);
- }
- }
+ ulint ret;
+
+ if (mutex->pfs_psi != NULL) {
+ PSI_mutex_locker* locker;
+ PSI_mutex_locker_state state;
+
+ locker = PSI_MUTEX_CALL(start_mutex_wait)(
+ &state, mutex->pfs_psi,
+ PSI_MUTEX_TRYLOCK, file_name, line);
- ret = mutex_enter_nowait_func(mutex, file_name, line);
+ ret = mutex_enter_nowait_func(mutex, file_name, line);
- if (locker) {
- PSI_server->end_mutex_wait(locker, ret);
+ if (locker != NULL) {
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret);
+ }
+ } else {
+ ret = mutex_enter_nowait_func(mutex, file_name, line);
}
return(ret);
}
+
+/********************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_nowait_func.
+@return 0 if succeed, 1 if not */
+UNIV_INLINE
+ulint
+pfs_mutex_enter_nowait_func(
+/*========================*/
+ ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where mutex
+ requested */
+ ulint line) /*!< in: line where
+ requested */
+{
+ return pfs_mutex_enter_nowait_func(&mutex->base_mutex, file_name,
+ line);
+}
+
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_exit(), not directly
this function!
@@ -298,15 +450,34 @@ UNIV_INLINE
void
pfs_mutex_exit_func(
/*================*/
- mutex_t* mutex) /*!< in: pointer to mutex */
+ ib_mutex_t* mutex) /*!< in: pointer to mutex */
+{
+ if (mutex->pfs_psi != NULL) {
+ PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi);
+ }
+
+ mutex_exit_func(mutex);
+}
+
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_exit(), not directly
+this function!
+A wrap function of mutex_exit_func() with peformance schema instrumentation.
+Unlocks a priority mutex owned by the current thread. */
+UNIV_INLINE
+void
+pfs_mutex_exit_func(
+/*================*/
+ ib_prio_mutex_t* mutex) /*!< in: pointer to mutex */
{
- if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
- PSI_server->unlock_mutex(mutex->pfs_psi);
+ if (mutex->base_mutex.pfs_psi != NULL) {
+ PSI_MUTEX_CALL(unlock_mutex)(mutex->base_mutex.pfs_psi);
}
mutex_exit_func(mutex);
}
+
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_create(), not directly
this function!
@@ -318,7 +489,7 @@ void
pfs_mutex_create_func(
/*==================*/
mysql_pfs_key_t key, /*!< in: Performance Schema key */
- mutex_t* mutex, /*!< in: pointer to memory */
+ ib_mutex_t* mutex, /*!< in: pointer to memory */
# ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
@@ -328,9 +499,7 @@ pfs_mutex_create_func(
# endif /* UNIV_DEBUG */
const char* cmutex_name) /*!< in: mutex name */
{
- mutex->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key))
- ? PSI_server->init_mutex(key, mutex)
- : NULL;
+ mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
mutex_create_func(mutex,
# ifdef UNIV_DEBUG
@@ -342,6 +511,45 @@ pfs_mutex_create_func(
# endif /* UNIV_DEBUG */
cmutex_name);
}
+
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_create(), not directly
+this function!
+A wrapper function for mutex_create_func(), registers the mutex
+with peformance schema if "UNIV_PFS_MUTEX" is defined when
+creating the performance mutex */
+UNIV_INLINE
+void
+pfs_mutex_create_func(
+/*==================*/
+ PSI_mutex_key key, /*!< in: Performance Schema
+ key */
+ ib_prio_mutex_t* mutex, /*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where
+ created */
+ ulint cline, /*!< in: file line where
+ created */
+# endif /* UNIV_DEBUG */
+ const char* cmutex_name)
+{
+ mutex->base_mutex.pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
+
+ mutex_create_func(mutex,
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ level,
+# endif /* UNIV_SYNC_DEBUG */
+ cfile_name,
+ cline,
+# endif /* UNIV_DEBUG */
+ cmutex_name);
+}
+
+
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_free(), not directly
this function!
@@ -351,14 +559,74 @@ UNIV_INLINE
void
pfs_mutex_free_func(
/*================*/
- mutex_t* mutex) /*!< in: mutex */
+ ib_mutex_t* mutex) /*!< in: mutex */
{
- if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
- PSI_server->destroy_mutex(mutex->pfs_psi);
+ if (mutex->pfs_psi != NULL) {
+ PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi);
mutex->pfs_psi = NULL;
}
mutex_free_func(mutex);
}
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_free(), not directly
+this function!
+Wrapper function for mutex_free_func(). Also destroys the performance
+schema probes when freeing the priority mutex */
+UNIV_INLINE
+void
+pfs_mutex_free_func(
+/*================*/
+ ib_prio_mutex_t* mutex) /*!< in: mutex */
+{
+ if (mutex->base_mutex.pfs_psi != NULL) {
+ PSI_MUTEX_CALL(destroy_mutex)(mutex->base_mutex.pfs_psi);
+ mutex->base_mutex.pfs_psi = NULL;
+ }
+
+ mutex_free_func(mutex);
+}
+
+
#endif /* UNIV_PFS_MUTEX */
+
+#ifndef HAVE_ATOMIC_BUILTINS
+/**********************************************************//**
+Function that uses a mutex to decrement a variable atomically */
+UNIV_INLINE
+void
+os_atomic_dec_ulint_func(
+/*=====================*/
+ ib_mutex_t* mutex, /*!< in: mutex guarding the dec */
+ volatile ulint* var, /*!< in/out: variable to decrement */
+ ulint delta) /*!< in: delta to decrement */
+{
+ mutex_enter(mutex);
+
+ /* I don't think we will encounter a situation where
+ this check will not be required. */
+ ut_ad(*var >= delta);
+
+ *var -= delta;
+
+ mutex_exit(mutex);
+}
+
+/**********************************************************//**
+Function that uses a mutex to increment a variable atomically */
+UNIV_INLINE
+void
+os_atomic_inc_ulint_func(
+/*=====================*/
+ ib_mutex_t* mutex, /*!< in: mutex guarding the increment */
+ volatile ulint* var, /*!< in/out: variable to increment */
+ ulint delta) /*!< in: delta to increment */
+{
+ mutex_enter(mutex);
+
+ *var += delta;
+
+ mutex_exit(mutex);
+}
+#endif /* !HAVE_ATOMIC_BUILTINS */
diff --git a/storage/xtradb/include/sync0types.h b/storage/xtradb/include/sync0types.h
index 5e800240888..67f613ab8ae 100644
--- a/storage/xtradb/include/sync0types.h
+++ b/storage/xtradb/include/sync0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,9 +26,19 @@ Created 9/5/1995 Heikki Tuuri
#ifndef sync0types_h
#define sync0types_h
-/** Rename mutex_t to avoid name space collision on some systems */
-#define mutex_t ib_mutex_t
-/** InnoDB mutex */
-typedef struct mutex_struct mutex_t;
+struct ib_mutex_t;
+
+/* The relative priority of the current thread. If 0, low priority; if 1, high
+priority. */
+extern UNIV_THREAD_LOCAL ulint srv_current_thread_priority;
+
+struct ib_prio_mutex_t;
+
+/** Priority mutex and rwlatch acquisition priorities */
+enum ib_sync_priority {
+ DEFAULT_PRIO,
+ LOW_PRIO,
+ HIGH_PRIO
+};
#endif
diff --git a/storage/xtradb/include/trx0i_s.h b/storage/xtradb/include/trx0i_s.h
index c67227369a7..ac5e00c6834 100644
--- a/storage/xtradb/include/trx0i_s.h
+++ b/storage/xtradb/include/trx0i_s.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -66,36 +66,34 @@ do { \
strncpy(buff, data, constraint); \
buff[constraint] = '\0'; \
\
- field = ha_storage_put_memlim( \
+ field = static_cast<const char*>( \
+ ha_storage_put_memlim( \
(tcache)->storage, buff, constraint + 1,\
- MAX_ALLOWED_FOR_STORAGE(tcache)); \
+ MAX_ALLOWED_FOR_STORAGE(tcache))); \
} else { \
- field = ha_storage_put_str_memlim( \
+ field = static_cast<const char*>( \
+ ha_storage_put_str_memlim( \
(tcache)->storage, data, \
- MAX_ALLOWED_FOR_STORAGE(tcache)); \
+ MAX_ALLOWED_FOR_STORAGE(tcache))); \
} \
} while (0)
/** A row of INFORMATION_SCHEMA.innodb_locks */
-typedef struct i_s_locks_row_struct i_s_locks_row_t;
-/** A row of INFORMATION_SCHEMA.innodb_trx */
-typedef struct i_s_trx_row_struct i_s_trx_row_t;
-/** A row of INFORMATION_SCHEMA.innodb_lock_waits */
-typedef struct i_s_lock_waits_row_struct i_s_lock_waits_row_t;
+struct i_s_locks_row_t;
/** Objects of trx_i_s_cache_t::locks_hash */
-typedef struct i_s_hash_chain_struct i_s_hash_chain_t;
+struct i_s_hash_chain_t;
/** Objects of this type are added to the hash table
trx_i_s_cache_t::locks_hash */
-struct i_s_hash_chain_struct {
+struct i_s_hash_chain_t {
i_s_locks_row_t* value; /*!< row of
INFORMATION_SCHEMA.innodb_locks*/
i_s_hash_chain_t* next; /*!< next item in the hash chain */
};
/** This structure represents INFORMATION_SCHEMA.innodb_locks row */
-struct i_s_locks_row_struct {
+struct i_s_locks_row_t {
trx_id_t lock_trx_id; /*!< transaction identifier */
const char* lock_mode; /*!< lock mode from
lock_get_mode_str() */
@@ -126,16 +124,16 @@ struct i_s_locks_row_struct {
};
/** This structure represents INFORMATION_SCHEMA.innodb_trx row */
-struct i_s_trx_row_struct {
+struct i_s_trx_row_t {
trx_id_t trx_id; /*!< transaction identifier */
const char* trx_state; /*!< transaction state from
trx_get_que_state_str() */
- ib_time_t trx_started; /*!< trx_struct::start_time */
+ ib_time_t trx_started; /*!< trx_t::start_time */
const i_s_locks_row_t* requested_lock_row;
/*!< pointer to a row
in innodb_locks if trx
is waiting, or NULL */
- ib_time_t trx_wait_started; /*!< trx_struct::wait_started */
+ ib_time_t trx_wait_started; /*!< trx_t::wait_started */
ullint trx_weight; /*!< TRX_WEIGHT() */
ulint trx_mysql_thread_id; /*!< thd_get_thread_id() */
const char* trx_query; /*!< MySQL statement being
@@ -143,46 +141,49 @@ struct i_s_trx_row_struct {
struct charset_info_st* trx_query_cs;
/*!< charset encode the MySQL
statement */
- const char* trx_operation_state; /*!< trx_struct::op_info */
+ const char* trx_operation_state; /*!< trx_t::op_info */
ulint trx_tables_in_use;/*!< n_mysql_tables_in_use in
- trx_struct */
+ trx_t */
ulint trx_tables_locked;
/*!< mysql_n_tables_locked in
- trx_struct */
+ trx_t */
ulint trx_lock_structs;/*!< list len of trx_locks in
- trx_struct */
+ trx_t */
ulint trx_lock_memory_bytes;
/*!< mem_heap_get_size(
trx->lock_heap) */
ulint trx_rows_locked;/*!< lock_number_of_rows_locked() */
- ullint trx_rows_modified;/*!< trx_struct::undo_no */
+ ullint trx_rows_modified;/*!< trx_t::undo_no */
ulint trx_concurrency_tickets;
/*!< n_tickets_to_enter_innodb in
- trx_struct */
+ trx_t */
const char* trx_isolation_level;
- /*!< isolation_level in trx_struct*/
+ /*!< isolation_level in trx_t */
ibool trx_unique_checks;
- /*!< check_unique_secondary in
- trx_struct*/
+ /*!< check_unique_secondary in trx_t*/
ibool trx_foreign_key_checks;
- /*!< check_foreigns in trx_struct */
+ /*!< check_foreigns in trx_t */
const char* trx_foreign_key_error;
- /*!< detailed_error in trx_struct */
+ /*!< detailed_error in trx_t */
ibool trx_has_search_latch;
- /*!< has_search_latch in trx_struct */
+ /*!< has_search_latch in trx_t */
ulint trx_search_latch_timeout;
- /*!< search_latch_timeout in
- trx_struct */
+ /*!< search_latch_timeout in trx_t */
+ ulint trx_is_read_only;
+ /*!< trx_t::read_only */
+ ulint trx_is_autocommit_non_locking;
+ /*!< trx_is_autocommit_non_locking(trx)
+ */
};
/** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */
-struct i_s_lock_waits_row_struct {
+struct i_s_lock_waits_row_t {
const i_s_locks_row_t* requested_lock_row; /*!< requested lock */
const i_s_locks_row_t* blocking_lock_row; /*!< blocking lock */
};
/** Cache of INFORMATION_SCHEMA table data */
-typedef struct trx_i_s_cache_struct trx_i_s_cache_t;
+struct trx_i_s_cache_t;
/** Auxiliary enum used by functions that need to select one of the
INFORMATION_SCHEMA tables */
@@ -307,4 +308,8 @@ trx_i_s_create_lock_id(
ulint lock_id_size);/*!< in: size of the lock id
buffer */
+UNIV_INTERN
+void
+trx_i_s_get_lock_sys_memory_usage(ulint *constant, ulint *variable);
+
#endif /* trx0i_s_h */
diff --git a/storage/xtradb/include/trx0purge.h b/storage/xtradb/include/trx0purge.h
index f8f662125a7..a862523c092 100644
--- a/storage/xtradb/include/trx0purge.h
+++ b/storage/xtradb/include/trx0purge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -52,17 +52,6 @@ trx_purge_get_log_from_hist(
/*========================*/
fil_addr_t node_addr); /*!< in: file address of the history
list node of the log */
-/*****************************************************************//**
-Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system.
-@return TRUE if is sure that it is preserved, also if the function
-returns FALSE, it is possible that the undo log still exists in the
-system */
-UNIV_INTERN
-ibool
-trx_purge_update_undo_must_exist(
-/*=============================*/
- trx_id_t trx_id);/*!< in: transaction id */
/********************************************************************//**
Creates the global purge system control structure and inits the history
mutex. */
@@ -70,7 +59,8 @@ UNIV_INTERN
void
trx_purge_sys_create(
/*=================*/
- ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/
+ ulint n_purge_threads,/*!< in: number of purge threads */
+ ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/
/********************************************************************//**
Frees the global purge system control structure. */
UNIV_INTERN
@@ -88,26 +78,6 @@ trx_purge_add_update_undo_to_history(
page_t* undo_page, /*!< in: update undo log header page,
x-latched */
mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function.
-@return copy of an undo log record or pointer to trx_purge_dummy_rec,
-if the whole undo log can skipped in purge; NULL if none left */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_purge_fetch_next_rec(
-/*=====================*/
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- trx_undo_inf_t** cell, /*!< out: storage cell for the record in the
- purge array */
- mem_heap_t* heap); /*!< in: memory heap where copied */
-/*******************************************************************//**
-Releases a reserved purge undo record. */
-UNIV_INTERN
-void
-trx_purge_rec_release(
-/*==================*/
- trx_undo_inf_t* cell); /*!< in: storage cell */
/*******************************************************************//**
This function runs a purge batch.
@return number of undo log pages handled in the batch */
@@ -115,48 +85,102 @@ UNIV_INTERN
ulint
trx_purge(
/*======*/
- ulint limit); /*!< in: the maximum number of records to
- purge in one batch */
-/******************************************************************//**
-Prints information of the purge system to stderr. */
+ ulint n_purge_threads, /*!< in: number of purge tasks to
+ submit to task queue. */
+ ulint limit, /*!< in: the maximum number of
+ records to purge in one batch */
+ bool truncate); /*!< in: truncate history if true */
+/*******************************************************************//**
+Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
UNIV_INTERN
void
-trx_purge_sys_print(void);
-/*======================*/
+trx_purge_stop(void);
+/*================*/
+/*******************************************************************//**
+Resume purge, move to PURGE_STATE_RUN. */
+UNIV_INTERN
+void
+trx_purge_run(void);
+/*================*/
+
+/** Purge states */
+enum purge_state_t {
+ PURGE_STATE_INIT, /*!< Purge instance created */
+ PURGE_STATE_RUN, /*!< Purge should be running */
+ PURGE_STATE_STOP, /*!< Purge should be stopped */
+ PURGE_STATE_EXIT, /*!< Purge has been shutdown */
+ PURGE_STATE_DISABLED /*!< Purge was never started */
+};
+
+/*******************************************************************//**
+Get the purge state.
+@return purge state. */
+UNIV_INTERN
+purge_state_t
+trx_purge_state(void);
+/*=================*/
+
+/** This is the purge pointer/iterator. We need both the undo no and the
+transaction no up to which purge has parsed and applied the records. */
+struct purge_iter_t {
+ trx_id_t trx_no; /*!< Purge has advanced past all
+ transactions whose number is less
+ than this */
+ undo_no_t undo_no; /*!< Purge has advanced past all records
+ whose undo number is less than this */
+};
/** The control structure used in the purge operation */
-struct trx_purge_struct{
- ulint state; /*!< Purge system state */
+struct trx_purge_t{
sess_t* sess; /*!< System session running the purge
query */
trx_t* trx; /*!< System transaction running the
- purge
- query: this trx is not in the trx list
- of the trx system and it never ends */
- que_t* query; /*!< The query graph which will do the
- parallelized purge operation */
- rw_lock_t latch; /*!< The latch protecting the purge
- view. A purge operation must acquire
- an x-latch here for the instant at which
+ purge query: this trx is not in the
+ trx list of the trx system and it
+ never ends */
+ prio_rw_lock_t latch; /*!< The latch protecting the purge
+ view. A purge operation must acquire an
+ x-latch here for the instant at which
it changes the purge view: an undo
log operation can prevent this by
- obtaining an s-latch here. */
+ obtaining an s-latch here. It also
+ protects state and running */
+ os_event_t event; /*!< State signal event */
+ ulint n_stop; /*!< Counter to track number stops */
+ volatile bool running; /*!< true, if purge is active,
+ we check this without the latch too */
+ volatile purge_state_t state; /*!< Purge coordinator thread states,
+ we check this in several places
+ without holding the latch. */
+ que_t* query; /*!< The query graph which will do the
+ parallelized purge operation */
read_view_t* view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
+ read_view_t* prebuilt_clone; /*!< Pre-built view which is used as a
+ temporary clone of the oldest view in
+ read_view_purge_open() */
read_view_t* prebuilt_view; /*!< Pre-built view array */
- ulonglong n_pages_handled;/*!< Approximate number of undo log
- pages processed in purge */
- ulonglong handle_limit; /*!< Target of how many pages to get
- processed in the current purge */
+ volatile ulint n_submitted; /*!< Count of total tasks submitted
+ to the task queue */
+ volatile ulint n_completed; /*!< Count of total tasks completed */
+
/*------------------------------*/
/* The following two fields form the 'purge pointer' which advances
during a purge, and which is used in history list truncation */
- trx_id_t purge_trx_no; /*!< Purge has advanced past all
- transactions whose number is less
- than this */
- undo_no_t purge_undo_no; /*!< Purge has advanced past all records
- whose undo number is less than this */
+ purge_iter_t iter; /* Limit up to which we have read and
+ parsed the UNDO log records. Not
+ necessarily purged from the indexes.
+ Note that this can never be less than
+ the limit below, we check for this
+ invariant in trx0purge.cc */
+ purge_iter_t limit; /* The 'purge pointer' which advances
+ during a purge, and which is used in
+ history list truncation */
+#ifdef UNIV_DEBUG
+ purge_iter_t done; /* Indicate 'purge pointer' which have
+ purged already accurately. */
+#endif /* UNIV_DEBUG */
/*-----------------------------*/
ibool next_stored; /*!< TRUE if the info of the next record
to purge is stored below: if yes, then
@@ -175,9 +199,6 @@ struct trx_purge_struct{
the next record to purge belongs */
ulint hdr_offset; /*!< Header byte offset on the page */
/*-----------------------------*/
- trx_undo_arr_t* arr; /*!< Array of transaction numbers and
- undo numbers of the undo records
- currently under processing in purge */
mem_heap_t* heap; /*!< Temporary storage used during a
purge: can be emptied after purge
completes */
@@ -185,12 +206,15 @@ struct trx_purge_struct{
ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on
rseg_queue_t::trx_no. It is protected
by the bh_mutex */
- mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
+ ib_mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
+};
+
+/** Info required to purge a record */
+struct trx_purge_rec_t {
+ trx_undo_rec_t* undo_rec; /*!< Record to purge */
+ roll_ptr_t roll_ptr; /*!< File pointr to UNDO record */
};
-#define TRX_PURGE_ON 1 /* purge operation is running */
-#define TRX_STOP_PURGE 2 /* purge operation is stopped, or
- it should be stopped */
#ifndef UNIV_NONINL
#include "trx0purge.ic"
#endif
diff --git a/storage/xtradb/include/trx0purge.ic b/storage/xtradb/include/trx0purge.ic
index 800d26ba51b..ca9cc1fb894 100644
--- a/storage/xtradb/include/trx0purge.ic
+++ b/storage/xtradb/include/trx0purge.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -41,3 +41,22 @@ trx_purge_get_log_from_hist(
return(node_addr);
}
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+address of its history list node.
+@return TRUE if purge_sys_t::limit <= purge_sys_t::iter*/
+UNIV_INLINE
+ibool
+trx_purge_check_limit(void)
+/*=======================*/
+{
+ ut_ad(purge_sys->limit.trx_no <= purge_sys->iter.trx_no);
+
+ if (purge_sys->limit.trx_no == purge_sys->iter.trx_no) {
+ ut_ad(purge_sys->limit.undo_no <= purge_sys->iter.undo_no);
+ }
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
diff --git a/storage/xtradb/include/trx0rec.h b/storage/xtradb/include/trx0rec.h
index a6e54d6dfd1..50da55d2ea3 100644
--- a/storage/xtradb/include/trx0rec.h
+++ b/storage/xtradb/include/trx0rec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -105,10 +105,11 @@ trx_undo_rec_get_pars(
TRX_UNDO_INSERT_REC, ... */
ulint* cmpl_info, /*!< out: compiler info, relevant only
for update type records */
- ibool* updated_extern, /*!< out: TRUE if we updated an
+ bool* updated_extern, /*!< out: true if we updated an
externally stored fild */
undo_no_t* undo_no, /*!< out: undo log record number */
- table_id_t* table_id); /*!< out: table id */
+ table_id_t* table_id) /*!< out: table id */
+ __attribute__((nonnull));
/*******************************************************************//**
Builds a row reference from an undo log record.
@return pointer to remaining part of undo record */
@@ -178,8 +179,9 @@ trx_undo_update_rec_get_update(
needed is allocated */
upd_t** upd); /*!< out, own: update vector */
/*******************************************************************//**
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table.
+Builds a partial row from an update undo log record, for purge.
+It contains the columns which occur as ordering in any index of the table.
+Any missing columns are indicated by col->mtype == DATA_MISSING.
@return pointer to remaining part of undo record */
UNIV_INTERN
byte*
@@ -197,8 +199,9 @@ trx_undo_rec_get_partial_row(
ibool ignore_prefix, /*!< in: flag to indicate if we
expect blob prefixes in undo. Used
only in the assertion. */
- mem_heap_t* heap); /*!< in: memory heap from which the memory
+ mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
@@ -206,7 +209,7 @@ transaction and in consistent reads that must look to the history of this
transaction.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
trx_undo_report_row_operation(
/*==========================*/
ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
@@ -225,10 +228,12 @@ trx_undo_report_row_operation(
const rec_t* rec, /*!< in: case of an update or delete
marking, the record in the clustered
index, otherwise NULL */
- roll_ptr_t* roll_ptr); /*!< out: rollback pointer to the
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
inserted undo log record,
0 if BTR_NO_UNDO_LOG
flag was specified */
+ __attribute__((nonnull(3,4,10), warn_unused_result));
/******************************************************************//**
Copies an undo record to heap. This function can be called if we know that
the undo log record exists.
@@ -238,35 +243,17 @@ trx_undo_rec_t*
trx_undo_get_undo_rec_low(
/*======================*/
roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- mem_heap_t* heap); /*!< in: memory heap where copied */
-/******************************************************************//**
-Copies an undo record to heap.
-
-NOTE: the caller must have latches on the clustered index page and
-purge_view.
-
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
-truncated and we cannot fetch the old version */
-UNIV_INTERN
-ulint
-trx_undo_get_undo_rec(
-/*==================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- trx_id_t trx_id, /*!< in: id of the trx that generated
- the roll pointer: it points to an
- undo log of this transaction */
- trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
- mem_heap_t* heap); /*!< in: memory heap where copied */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
-Build a previous version of a clustered index record. This function checks
-that the caller has a latch on the index page of the clustered index record
-and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
-earlier than purge_view, which means that it may have been removed,
-DB_ERROR if corrupted record */
+Build a previous version of a clustered index record. The caller must
+hold a latch on the index page of the clustered index record.
+@retval true if previous version was built, or if it was an insert
+or the table has been rebuilt
+@retval false if the previous version is earlier than purge_view,
+which means that it may have been removed */
UNIV_INTERN
-ulint
+bool
trx_undo_prev_version_build(
/*========================*/
const rec_t* index_rec,/*!< in: clustered index record in the
@@ -275,12 +262,13 @@ trx_undo_prev_version_build(
index_rec page and purge_view */
const rec_t* rec, /*!< in: version of a clustered index record */
dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
- rec_t** old_vers);/*!< out, own: previous version, or NULL if
+ rec_t** old_vers)/*!< out, own: previous version, or NULL if
rec is the first inserted version, or if
history data has been deleted */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a redo log record of adding an undo log record.
diff --git a/storage/xtradb/include/trx0rec.ic b/storage/xtradb/include/trx0rec.ic
index 4fc5a7147f9..08704f6b821 100644
--- a/storage/xtradb/include/trx0rec.ic
+++ b/storage/xtradb/include/trx0rec.ic
@@ -90,7 +90,7 @@ trx_undo_rec_get_offset(
/*====================*/
undo_no_t undo_no) /*!< in: undo no read from node */
{
- return (3 + mach_ull_get_much_compressed_size(undo_no));
+ return(3 + mach_ull_get_much_compressed_size(undo_no));
}
/***********************************************************************//**
@@ -108,6 +108,6 @@ trx_undo_rec_copy(
len = mach_read_from_2(undo_rec)
- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
ut_ad(len < UNIV_PAGE_SIZE);
- return(mem_heap_dup(heap, undo_rec, len));
+ return((trx_undo_rec_t*) mem_heap_dup(heap, undo_rec, len));
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/trx0roll.h b/storage/xtradb/include/trx0roll.h
index db68ae0a8d6..aa3dbb1f6cd 100644
--- a/storage/xtradb/include/trx0roll.h
+++ b/storage/xtradb/include/trx0roll.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -27,13 +27,12 @@ Created 3/26/1996 Heikki Tuuri
#define trx0roll_h
#include "univ.i"
+#include "btr0types.h"
#include "trx0trx.h"
#include "trx0types.h"
#include "mtr0mtr.h"
#include "trx0sys.h"
-#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL)
-
/*******************************************************************//**
Determines if this transaction is rolling back an incomplete transaction
in crash recovery.
@@ -53,12 +52,6 @@ trx_savept_take(
/*============*/
trx_t* trx); /*!< in: transaction */
/*******************************************************************//**
-Creates an undo number array. */
-UNIV_INTERN
-trx_undo_arr_t*
-trx_undo_arr_create(void);
-/*=====================*/
-/*******************************************************************//**
Frees an undo number array. */
UNIV_INTERN
void
@@ -74,13 +67,6 @@ trx_undo_arr_get_nth_info(
/*======================*/
trx_undo_arr_t* arr, /*!< in: undo number array */
ulint n); /*!< in: position */
-/***********************************************************************//**
-Tries truncate the undo logs. */
-UNIV_INTERN
-void
-trx_roll_try_truncate(
-/*==================*/
- trx_t* trx); /*!< in/out: transaction */
/********************************************************************//**
Pops the topmost record when the two undo logs of a transaction are seen
as a single stack of records ordered by their undo numbers. Inserts the
@@ -116,19 +102,6 @@ trx_undo_rec_release(
/*=================*/
trx_t* trx, /*!< in/out: transaction */
undo_no_t undo_no);/*!< in: undo number */
-/*********************************************************************//**
-Starts a rollback operation. */
-UNIV_INTERN
-void
-trx_rollback(
-/*=========*/
- trx_t* trx, /*!< in: transaction */
- trx_sig_t* sig, /*!< in: signal starting the rollback */
- que_thr_t** next_thr);/*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
/*******************************************************************//**
Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
@@ -147,38 +120,13 @@ committed, then we clean up a possible insert undo log. If the
transaction was not yet committed, then we roll it back.
Note: this is done in a background thread.
@return a dummy parameter */
-UNIV_INTERN
+extern "C" UNIV_INTERN
os_thread_ret_t
-trx_rollback_or_clean_all_recovered(
-/*================================*/
+DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
+/*================================================*/
void* arg __attribute__((unused)));
/*!< in: a dummy parameter required by
os_thread_create */
-/****************************************************************//**
-Finishes a transaction rollback. */
-UNIV_INTERN
-void
-trx_finish_rollback_off_kernel(
-/*===========================*/
- que_t* graph, /*!< in: undo graph which can now be freed */
- trx_t* trx, /*!< in: transaction */
- que_thr_t** next_thr);/*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if this parameter is
- NULL, it is ignored */
-/****************************************************************//**
-Builds an undo 'query' graph for a transaction. The actual rollback is
-performed by executing this query graph like a query subprocedure call.
-The reply about the completion of the rollback will be sent by this
-graph.
-@return own: the query graph */
-UNIV_INTERN
-que_t*
-trx_roll_graph_build(
-/*=================*/
- trx_t* trx); /*!< in: trx handle */
/*********************************************************************//**
Creates a rollback command node struct.
@return own: rollback node struct */
@@ -199,29 +147,32 @@ trx_rollback_step(
Rollback a transaction used in MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_for_mysql(
/*===================*/
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull));
/*******************************************************************//**
Rollback the latest SQL statement for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_last_sql_stat_for_mysql(
/*=================================*/
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull));
/*******************************************************************//**
-Rollback a transaction used in MySQL.
+Rollback a transaction to a given savepoint or do a complete rollback.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
-trx_general_rollback_for_mysql(
-/*===========================*/
+dberr_t
+trx_rollback_to_savepoint(
+/*======================*/
trx_t* trx, /*!< in: transaction handle */
- trx_savept_t* savept);/*!< in: pointer to savepoint undo number, if
+ trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
partial rollback requested, or NULL for
complete rollback */
+ __attribute__((nonnull(1)));
/*******************************************************************//**
Rolls back a transaction back to a named savepoint. Modifications after the
savepoint are undone but InnoDB does NOT release the corresponding locks
@@ -232,17 +183,18 @@ were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_rollback_to_savepoint_for_mysql(
/*================================*/
trx_t* trx, /*!< in: transaction handle */
const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t* mysql_binlog_cache_pos);/*!< out: the MySQL binlog cache
+ ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
position corresponding to this
savepoint; MySQL needs this
information to remove the
binlog entries of the queries
executed after the savepoint */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Creates a named savepoint. If the transaction is not yet started, starts it.
If there is already a savepoint of the same name, this call erases that old
@@ -250,40 +202,30 @@ savepoint and replaces it with a new. Savepoints are deleted in a transaction
commit or rollback.
@return always DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_savepoint_for_mysql(
/*====================*/
trx_t* trx, /*!< in: transaction handle */
const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t binlog_cache_pos); /*!< in: MySQL binlog cache
+ ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
position corresponding to this
connection at the time of the
savepoint */
-
+ __attribute__((nonnull));
/*******************************************************************//**
Releases a named savepoint. Savepoints which
were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_release_savepoint_for_mysql(
/*============================*/
trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name); /*!< in: savepoint name */
-
-/*******************************************************************//**
-Frees a single savepoint struct. */
-UNIV_INTERN
-void
-trx_roll_savepoint_free(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep); /*!< in: savepoint to free */
-
+ const char* savepoint_name) /*!< in: savepoint name */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
-Frees savepoint structs starting from savep, if savep == NULL then
-free all savepoints. */
+Frees savepoint structs starting from savep. */
UNIV_INTERN
void
trx_roll_savepoints_free(
@@ -293,34 +235,35 @@ trx_roll_savepoints_free(
if this is NULL, free all savepoints
of trx */
-/** A cell of trx_undo_arr_struct; used during a rollback and a purge */
-struct trx_undo_inf_struct{
+/** A cell of trx_undo_arr_t; used during a rollback and a purge */
+struct trx_undo_inf_t{
+ ibool in_use; /*!< true if cell is being used */
trx_id_t trx_no; /*!< transaction number: not defined during
a rollback */
undo_no_t undo_no;/*!< undo number of an undo record */
- ibool in_use; /*!< TRUE if the cell is in use */
};
/** During a rollback and a purge, undo numbers of undo records currently being
processed are stored in this array */
-struct trx_undo_arr_struct{
+struct trx_undo_arr_t{
ulint n_cells; /*!< number of cells in the array */
- ulint n_used; /*!< number of cells currently in use */
+ ulint n_used; /*!< number of cells in use */
trx_undo_inf_t* infos; /*!< the array of undo infos */
mem_heap_t* heap; /*!< memory heap from which allocated */
};
/** Rollback node states */
enum roll_node_state {
- ROLL_NODE_SEND = 1, /*!< about to send a rollback signal to
- the transaction */
- ROLL_NODE_WAIT /*!< rollback signal sent to the transaction,
- waiting for completion */
+ ROLL_NODE_NONE = 0, /*!< Unknown state */
+ ROLL_NODE_SEND, /*!< about to send a rollback signal to
+ the transaction */
+ ROLL_NODE_WAIT /*!< rollback signal sent to the
+ transaction, waiting for completion */
};
/** Rollback command node in a query graph */
-struct roll_node_struct{
+struct roll_node_t{
que_common_t common; /*!< node type: QUE_NODE_ROLLBACK */
enum roll_node_state state; /*!< node execution state */
ibool partial;/*!< TRUE if we want a partial
@@ -328,10 +271,11 @@ struct roll_node_struct{
trx_savept_t savept; /*!< savepoint to which to
roll back, in the case of a
partial rollback */
+ que_thr_t* undo_thr;/*!< undo query graph */
};
/** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
-struct trx_named_savept_struct{
+struct trx_named_savept_t{
char* name; /*!< savepoint name */
trx_savept_t savept; /*!< the undo number corresponding to
the savepoint */
diff --git a/storage/xtradb/include/trx0roll.ic b/storage/xtradb/include/trx0roll.ic
index 6a4a5f54459..178e9bb730a 100644
--- a/storage/xtradb/include/trx0roll.ic
+++ b/storage/xtradb/include/trx0roll.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h
index 703b6e411a5..b9c84ef2b06 100644
--- a/storage/xtradb/include/trx0rseg.h
+++ b/storage/xtradb/include/trx0rseg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -29,6 +29,7 @@ Created 3/26/1996 Heikki Tuuri
#include "univ.i"
#include "trx0types.h"
#include "trx0sys.h"
+#include "ut0bh.h"
/******************************************************************//**
Gets a rollback segment header.
@@ -86,11 +87,11 @@ trx_rsegf_undo_find_free(
/******************************************************************//**
Looks for a rollback segment, based on the rollback segment id.
@return rollback segment */
-UNIV_INTERN
+UNIV_INLINE
trx_rseg_t*
trx_rseg_get_on_id(
/*===============*/
- ulint id); /*!< in: rollback segment id */
+ ulint id); /*!< in: rollback segment id */
/****************************************************************//**
Creates a rollback segment header. This function is called only when
a new rollback segment is created in the database.
@@ -107,30 +108,42 @@ trx_rseg_header_create(
mtr_t* mtr); /*!< in: mtr */
/*********************************************************************//**
Creates the memory copies for rollback segments and initializes the
-rseg list and array in trx_sys at a database startup. */
+rseg array in trx_sys at a database startup. */
UNIV_INTERN
void
-trx_rseg_list_and_array_init(
-/*=========================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
+trx_rseg_array_init(
+/*================*/
+ trx_sysf_t* sys_header, /*!< in/out: trx system header */
ib_bh_t* ib_bh, /*!< in: rseg queue */
- mtr_t* mtr); /*!< in: mtr */
-
+ mtr_t* mtr); /*!< in/out: mtr */
/***************************************************************************
Free's an instance of the rollback segment in memory. */
UNIV_INTERN
void
trx_rseg_mem_free(
/*==============*/
- trx_rseg_t* rseg); /* in, own: instance to free */
+ trx_rseg_t* rseg); /*!< in, own: instance to free */
/*********************************************************************
Creates a rollback segment. */
UNIV_INTERN
trx_rseg_t*
-trx_rseg_create(void);
-/*==================*/
-
+trx_rseg_create(
+/*============*/
+ ulint space); /*!< in: id of UNDO tablespace */
+
+/********************************************************************
+Get the number of unique rollback tablespaces in use except space id 0.
+The last space id will be the sentinel value ULINT_UNDEFINED. The array
+will be sorted on space id. Note: space_ids should have have space for
+TRX_SYS_N_RSEGS + 1 elements.
+@return number of unique rollback tablespaces in use. */
+UNIV_INTERN
+ulint
+trx_rseg_get_n_undo_tablespaces(
+/*============================*/
+ ulint* space_ids); /*!< out: array of space ids of
+ UNDO tablespaces */
/* Number of undo log slots in a rollback segment file copy */
#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16)
@@ -138,11 +151,11 @@ trx_rseg_create(void);
#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
/* The rollback segment memory object */
-struct trx_rseg_struct{
+struct trx_rseg_t{
/*--------------------------------------------------------*/
ulint id; /*!< rollback segment id == the index of
its slot in the trx system file copy */
- mutex_t mutex; /*!< mutex protecting the fields in this
+ ib_prio_mutex_t mutex; /*!< mutex protecting the fields in this
struct except id, which is constant */
ulint space; /*!< space where the rollback segment is
header is placed */
@@ -176,20 +189,14 @@ struct trx_rseg_struct{
yet purged log */
ibool last_del_marks; /*!< TRUE if the last not yet purged log
needs purging */
- /*--------------------------------------------------------*/
- UT_LIST_NODE_T(trx_rseg_t) rseg_list;
- /* the list of the rollback segment
- memory objects */
};
/** For prioritising the rollback segments for purge. */
-struct rseg_queue_struct {
- trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
- trx_rseg_t* rseg; /*!< Rollback segment */
+struct rseg_queue_t {
+ trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
+ trx_rseg_t* rseg; /*!< Rollback segment */
};
-typedef struct rseg_queue_struct rseg_queue_t;
-
/* Undo log segment slot in a rollback segment header */
/*-------------------------------------------------------------*/
#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
diff --git a/storage/xtradb/include/trx0rseg.ic b/storage/xtradb/include/trx0rseg.ic
index bb2684576d3..30743da9b8c 100644
--- a/storage/xtradb/include/trx0rseg.ic
+++ b/storage/xtradb/include/trx0rseg.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -86,7 +86,7 @@ trx_rsegf_get_nth_undo(
ulint n, /*!< in: index of slot */
mtr_t* mtr) /*!< in: mtr */
{
- if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
+ if (n >= TRX_RSEG_N_SLOTS) {
fprintf(stderr,
"InnoDB: Error: trying to get slot %lu of rseg\n",
(ulong) n);
@@ -108,7 +108,7 @@ trx_rsegf_set_nth_undo(
ulint page_no,/*!< in: page number of the undo log segment */
mtr_t* mtr) /*!< in: mtr */
{
- if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
+ if (n >= TRX_RSEG_N_SLOTS) {
fprintf(stderr,
"InnoDB: Error: trying to set slot %lu of rseg\n",
(ulong) n);
@@ -150,3 +150,18 @@ trx_rsegf_undo_find_free(
return(ULINT_UNDEFINED);
}
+
+/******************************************************************//**
+Looks for a rollback segment, based on the rollback segment id.
+@return rollback segment */
+UNIV_INLINE
+trx_rseg_t*
+trx_rseg_get_on_id(
+/*===============*/
+ ulint id) /*!< in: rollback segment id */
+{
+ ut_a(id < TRX_SYS_N_RSEGS);
+
+ return(trx_sys->rseg_array[id]);
+}
+
diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h
index f284790630c..7b97c6e99cd 100644
--- a/storage/xtradb/include/trx0sys.h
+++ b/storage/xtradb/include/trx0sys.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -41,6 +41,9 @@ Created 3/26/1996 Heikki Tuuri
#include "ut0bh.h"
#include "read0types.h"
#include "page0types.h"
+#include "ut0bh.h"
+
+typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t;
/** In a MySQL replication slave, in crash recovery we store the master log
file name and position here. */
@@ -53,9 +56,6 @@ there was no master log position info inside InnoDB.*/
extern ib_int64_t trx_sys_mysql_master_log_pos;
/* @} */
-extern char trx_sys_mysql_relay_log_name[];
-extern ib_int64_t trx_sys_mysql_relay_log_pos;
-
/** If this MySQL server uses binary logging, after InnoDB has been inited
and if it has done a crash recovery, we store the binlog file name and position
here. */
@@ -69,53 +69,6 @@ extern ib_int64_t trx_sys_mysql_bin_log_pos;
/** The transaction system */
extern trx_sys_t* trx_sys;
-/** Doublewrite system */
-extern trx_doublewrite_t* trx_doublewrite;
-/** The following is set to TRUE when we are upgrading from pre-4.1
-format data files to the multiple tablespaces format data files */
-extern ibool trx_doublewrite_must_reset_space_ids;
-/** Set to TRUE when the doublewrite buffer is being created */
-extern ibool trx_doublewrite_buf_is_being_created;
-/** The following is TRUE when we are using the database in the
-post-4.1 format, i.e., we have successfully upgraded, or have created
-a new database installation */
-extern ibool trx_sys_multiple_tablespace_format;
-
-/****************************************************************//**
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
-trx_sys_create_doublewrite_buf(void);
-/*================================*/
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-UNIV_INTERN
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
- ibool restore_corrupt_pages); /*!< in: TRUE=restore pages */
-/****************************************************************//**
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-UNIV_INTERN
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void);
-/*===============================================*/
-/****************************************************************//**
-Determines if a page number is located inside the doublewrite buffer.
-@return TRUE if the location is inside the two blocks of the
-doublewrite buffer */
-UNIV_INTERN
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
- ulint page_no); /*!< in: page number */
/***************************************************************//**
Checks if a page address is the trx sys header page.
@return TRUE if trx sys header page */
@@ -125,42 +78,26 @@ trx_sys_hdr_page(
/*=============*/
ulint space, /*!< in: space */
ulint page_no);/*!< in: page number */
-/***************************************************************//**
-Checks if a space is the system tablespaces.
-@return TRUE if system tablespace */
-UNIV_INLINE
-ibool
-trx_sys_sys_space(
-/*==============*/
- ulint space); /*!< in: space */
-/***************************************************************//**
-Checks if a space is the doublewrite tablespace.
-@return TRUE if doublewrite tablespace */
-UNIV_INLINE
-ibool
-trx_sys_doublewrite_space(
-/*======================*/
- ulint space); /*!< in: space */
/*****************************************************************//**
Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
+system. This is called when the database is started.
+@return min binary heap of rsegs to purge */
UNIV_INTERN
-void
+ib_bh_t*
trx_sys_init_at_db_start(void);
/*==========================*/
/*****************************************************************//**
-Creates and initializes the transaction system at the database creation. */
+Creates the trx_sys instance and initializes ib_bh and mutex. */
UNIV_INTERN
void
trx_sys_create(void);
/*================*/
/*****************************************************************//**
-Creates and initializes the dummy transaction system page for tablespace. */
+Creates and initializes the transaction system at the database creation. */
UNIV_INTERN
void
-trx_sys_dummy_create(
-/*=================*/
- ulint space);
+trx_sys_create_sys_pages(void);
+/*==========================*/
/****************************************************************//**
Looks for a free slot for a rollback segment in the trx system file copy.
@return slot index or ULINT_UNDEFINED if not found */
@@ -178,16 +115,6 @@ trx_sys_get_nth_rseg(
/*=================*/
trx_sys_t* sys, /*!< in: trx system */
ulint n); /*!< in: index of slot */
-/***************************************************************//**
-Sets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-void
-trx_sys_set_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n, /*!< in: index of slot */
- trx_rseg_t* rseg); /*!< in: pointer to rseg object, NULL if slot
- not in use */
/**********************************************************************//**
Gets a pointer to the transaction system file copy and x-locks its page.
@return pointer to system file copy, page x-locked */
@@ -248,6 +175,14 @@ UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_id(void);
/*========================*/
+/*****************************************************************//**
+Determines the maximum transaction id.
+@return maximum currently allocated trx id; will be stale after the
+next call to trx_sys_get_new_trx_id() */
+UNIV_INLINE
+trx_id_t
+trx_sys_get_max_trx_id(void);
+/*========================*/
/*************************************************************//**
Find a slot for a given trx ID in a descriptors array.
@@ -286,39 +221,75 @@ trx_read_trx_id(
/*============*/
const byte* ptr); /*!< in: pointer to memory from where to read */
/****************************************************************//**
-Looks for the trx handle with the given id in trx_list.
-@return the trx handle or NULL if not found */
+Looks for the trx instance with the given id in the rw trx_list.
+The caller must be holding trx_sys->mutex.
+@return the trx handle or NULL if not found;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
UNIV_INLINE
trx_t*
-trx_get_on_id(
-/*==========*/
+trx_get_rw_trx_by_id(
+/*=================*/
trx_id_t trx_id);/*!< in: trx id to search for */
/****************************************************************//**
-Returns the minumum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->conc_state to
+Returns the minimum trx id in rw trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->state to
find out if the minimum trx id transaction itself is active, or already
committed.)
@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
UNIV_INLINE
trx_id_t
-trx_list_get_min_trx_id(void);
-/*=========================*/
+trx_rw_min_trx_id(void);
+/*===================*/
/****************************************************************//**
-Checks if a transaction with the given id is active.
-@return TRUE if active */
+Returns pointer to a transaction instance if a rw transaction with the given id
+is active. Caller must hold trx_sys->mutex. If the caller is not holding
+lock_sys->mutex, the transaction may already have been committed.
+@return transaction instance if active, or NULL;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
UNIV_INLINE
-ibool
-trx_is_active(
-/*==========*/
- trx_id_t trx_id);/*!< in: trx id of the transaction */
+trx_t*
+trx_rw_get_active_trx_by_id(
+/*========================*/
+ trx_id_t trx_id, /*!< in: trx id of the transaction */
+ ibool* corrupt); /*!< in: NULL or pointer to a flag
+ that will be set if corrupt */
+/****************************************************************//**
+Checks if a rw transaction with the given id is active. Caller must hold
+trx_sys->mutex. If the caller is not holding lock_sys->mutex, the
+transaction may already have been committed.
+@return true if rw transaction it with a given id is active. */
+UNIV_INLINE
+bool
+trx_rw_is_active_low(
+/*=================*/
+ trx_id_t trx_id, /*!< in: trx id of the transaction */
+ ibool* corrupt); /*!< in: NULL or pointer to a flag
+ that will be set if corrupt */
+/****************************************************************//**
+Checks if a rw transaction with the given id is active. If the caller is
+not holding lock_sys->mutex, the transaction may already have been
+committed.
+@return true if rw transaction it with a given id is active. */
+UNIV_INLINE
+bool
+trx_rw_is_active(
+/*=============*/
+ trx_id_t trx_id, /*!< in: trx id of the transaction */
+ ibool* corrupt); /*!< in: NULL or pointer to a flag
+ that will be set if corrupt */
+#ifdef UNIV_DEBUG
/****************************************************************//**
-Checks that trx is in the trx list.
+Checks whether a trx is in one of rw_trx_list or ro_trx_list.
@return TRUE if is in */
UNIV_INTERN
ibool
trx_in_trx_list(
/*============*/
- trx_t* in_trx);/*!< in: trx */
+ const trx_t* in_trx) /*!< in: transaction */
+ __attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
/***********************************************************//**
Assert that a transaction has been recovered.
@@ -339,8 +310,7 @@ UNIV_INTERN
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
- trx_sysf_t* sys_header,
- const char* file_name_in,/*!< in: MySQL log file name */
+ const char* file_name,/*!< in: MySQL log file name */
ib_int64_t offset, /*!< in: position in that log file */
ulint field, /*!< in: offset of the MySQL log info field in
the trx sys header */
@@ -353,14 +323,6 @@ void
trx_sys_print_mysql_binlog_offset(void);
/*===================================*/
/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header
-COMMIT set of fields if the magic number shows it valid and stores it
-in global variables. */
-UNIV_INTERN
-void
-trx_sys_print_committed_mysql_master_log_pos(void);
-/*==============================================*/
-/*****************************************************************//**
Prints to stderr the MySQL master log offset info in the trx system header if
the magic number shows it valid. */
UNIV_INTERN
@@ -388,14 +350,12 @@ UNIV_INTERN
void
trx_sys_file_format_tag_init(void);
/*==============================*/
-#ifndef UNIV_HOTBACKUP
/*****************************************************************//**
Shutdown/Close the transaction system. */
UNIV_INTERN
void
trx_sys_close(void);
/*===============*/
-#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Get the name representation of the file format from its id.
@return pointer to the name */
@@ -415,31 +375,30 @@ trx_sys_file_format_max_set(
ulint format_id, /*!< in: file format id */
const char** name); /*!< out: max file format name or
NULL if not needed. */
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
+/*********************************************************************
+Creates the rollback segments
+@return number of rollback segments that are active. */
UNIV_INTERN
-const char*
-trx_sys_file_format_max_get(void);
-/*=============================*/
+ulint
+trx_sys_create_rsegs(
+/*=================*/
+ ulint n_spaces, /*!< number of tablespaces for UNDO logs */
+ ulint n_rsegs); /*!< number of rollback segments to create */
/*****************************************************************//**
-Check for the max file format tag stored on disk.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+Get the number of transaction in the system, independent of their state.
+@return count of transactions in trx_sys_t::trx_list */
+UNIV_INLINE
ulint
-trx_sys_file_format_max_check(
-/*==========================*/
- ulint max_format_id); /*!< in: the max format id to check */
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
+trx_sys_get_n_rw_trx(void);
+/*======================*/
+
+/*********************************************************************
+Check if there are any active (non-prepared) transactions.
+@return total number of active transactions or 0 if none */
UNIV_INTERN
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
- const char** name, /*!< out: max file format name */
- ulint format_id); /*!< in: file format identifier */
+ulint
+trx_sys_any_active_transactions(void);
+/*=================================*/
#else /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Prints to stderr the MySQL binlog info in the system header if the
@@ -476,6 +435,32 @@ trx_sys_read_pertable_file_format_id(
datafile */
ulint *format_id); /*!< out: file format of the per-table
data file */
+#endif /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the max format name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_max_get(void);
+/*=============================*/
+/*****************************************************************//**
+Check for the max file format tag stored on disk.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+trx_sys_file_format_max_check(
+/*==========================*/
+ ulint max_format_id); /*!< in: the max format id to check */
+/********************************************************************//**
+Update the file format tag in the system tablespace only if the given
+format id is greater than the known max id.
+@return TRUE if format_id was bigger than the known max id */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_upgrade(
+/*============================*/
+ const char** name, /*!< out: max file format name */
+ ulint format_id); /*!< in: file format identifier */
/*****************************************************************//**
Get the name representation of the file format from its id.
@return pointer to the name */
@@ -485,22 +470,20 @@ trx_sys_file_format_id_to_name(
/*===========================*/
const ulint id); /*!< in: id of the file format */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************
-Creates the rollback segments */
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Validate the trx_sys_t::trx_list. */
UNIV_INTERN
-void
-trx_sys_create_rsegs(
-/*=================*/
- ulint n_rsegs); /*!< number of rollback segments to create */
+ibool
+trx_sys_validate_trx_list(void);
+/*===========================*/
+#endif /* UNIV_DEBUG */
/* The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0
/* Space id and page no where the trx system file copy resides */
#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
-#define TRX_DOUBLEWRITE_SPACE 0xFFFFFFE0UL /* the doublewrite buffer tablespace if used */
-#define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */
#include "fsp0fsp.h"
#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
@@ -545,23 +528,15 @@ We must remember this limit in order to keep file compatibility. */
@see trx_sys_mysql_master_log_name
@see trx_sys_mysql_bin_log_name */
#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
-#define TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN 480 /* (500 - 12) is dead line. */
/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
-//#if UNIV_PAGE_SIZE < 4096
-//# error "UNIV_PAGE_SIZE < 4096"
-//#endif
+#if UNIV_PAGE_SIZE_MIN < 4096
+# error "UNIV_PAGE_SIZE_MIN < 4096"
+#endif
/** The offset of the MySQL replication info in the trx system header;
-this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below. These are
-written at prepare time and are the main copy. */
+this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
-#define TRX_SYS_MYSQL_RELAY_LOG_INFO (UNIV_PAGE_SIZE - 1500)
-
-/** The copy of the above which is made at transaction COMMIT time. If binlog
-crash recovery rollbacks a PREPAREd transaction, they are copied back. */
-#define TRX_SYS_COMMIT_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 3000)
-#define TRX_SYS_COMMIT_RELAY_LOG_INFO (UNIV_PAGE_SIZE - 2500)
/** The offset of the MySQL binlog offset info in the trx system header */
#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
@@ -613,7 +588,7 @@ crash recovery rollbacks a PREPAREd transaction, they are copied back. */
/** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
we must reset the doublewrite buffer, because starting from 4.1.x the
space id of a data page is stored into
-FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */
+FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
/*-------------------------------------------------------------*/
@@ -647,29 +622,22 @@ identifier is added to this 64-bit constant. */
#define TRX_DESCR_ARRAY_INITIAL_SIZE 1000
#ifndef UNIV_HOTBACKUP
-/** Doublewrite control struct */
-struct trx_doublewrite_struct{
- mutex_t mutex; /*!< mutex protecting the first_free field and
- write_buf */
- ulint block1; /*!< the page number of the first
- doublewrite block (64 pages) */
- ulint block2; /*!< page number of the second block */
- ulint first_free; /*!< first free position in write_buf measured
- in units of UNIV_PAGE_SIZE */
- byte* write_buf; /*!< write buffer used in writing to the
- doublewrite buffer, aligned to an
- address divisible by UNIV_PAGE_SIZE
- (which is required by Windows aio) */
- byte* write_buf_unaligned;
- /*!< pointer to write_buf, but unaligned */
- buf_page_t**
- buf_block_arr; /*!< array to store pointers to the buffer
- blocks which have been cached to write_buf */
-};
-
-/** The transaction system central memory data structure; protected by the
-kernel mutex */
-struct trx_sys_struct{
+/** The transaction system central memory data structure. */
+struct trx_sys_t{
+
+ ib_mutex_t mutex; /*!< mutex protecting most fields in
+ this structure except when noted
+ otherwise */
+ ulint n_prepared_trx; /*!< Number of transactions currently
+ in the XA PREPARED state */
+ ulint n_prepared_recovered_trx; /*!< Number of transactions
+ currently in XA PREPARED state that are
+ also recovered. Such transactions cannot
+ be added during runtime. They can only
+ occur after recovery if mysqld crashed
+ while there were XA PREPARED
+ transactions. We disable query cache
+ if such transactions exist. */
trx_id_t max_trx_id; /*!< The smallest number not yet
assigned as a transaction id or
transaction number */
@@ -685,35 +653,53 @@ struct trx_sys_struct{
descriptors array. */
char pad3[64]; /*!< Ensure descriptors do not share
cache line with other fields */
- UT_LIST_BASE_NODE_T(trx_t) trx_list;
- /*!< List of active and committed in
- memory transactions, sorted on trx id,
- biggest first */
+#ifdef UNIV_DEBUG
+ trx_id_t rw_max_trx_id; /*!< Max trx id of read-write transactions
+ which exist or existed */
+#endif
+ trx_list_t rw_trx_list; /*!< List of active and committed in
+ memory read-write transactions, sorted
+ on trx id, biggest first. Recovered
+ transactions are always on this list. */
char pad4[64]; /*!< Ensure list base nodes do not
share cache line with other fields */
- UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list;
- /*!< List of transactions created
- for MySQL */
+ trx_list_t ro_trx_list; /*!< List of active and committed in
+ memory read-only transactions, sorted
+ on trx id, biggest first. NOTE:
+ The order for read-only transactions
+ is not necessary. We should exploit
+ this and increase concurrency during
+ add/remove. */
char pad5[64]; /*!< Ensure list base nodes do not
share cache line with other fields */
- UT_LIST_BASE_NODE_T(trx_t) trx_serial_list;
+ trx_list_t mysql_trx_list; /*!< List of transactions created
+ for MySQL. All transactions on
+ ro_trx_list are on mysql_trx_list. The
+ rw_trx_list can contain system
+ transactions and recovered transactions
+ that will not be in the mysql_trx_list.
+ There can be active non-locking
+ auto-commit read only transactions that
+ are on this list but not on ro_trx_list.
+ mysql_trx_list may additionally contain
+ transactions that have not yet been
+ started in InnoDB. */
+ char pad6[64]; /*!< Ensure list base nodes do not
+ share cache line with other fields */
+ trx_list_t trx_serial_list;
/*!< trx->no ordered List of
transactions in either TRX_PREPARED or
TRX_ACTIVE which have already been
assigned a serialization number */
- char pad6[64]; /*!< Ensure trx_serial_list does not
- share cache line with other fields */
- UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list;
- /*!< List of rollback segment
- objects */
char pad7[64]; /*!< Ensure list base nodes do not
share cache line with other fields */
- trx_rseg_t* latest_rseg; /*!< Latest rollback segment in the
- round-robin assignment of rollback
- segments to transactions */
- trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
+ trx_rseg_t* const rseg_array[TRX_SYS_N_RSEGS];
/*!< Pointer array to rollback
- segments; NULL if slot not in use */
+ segments; NULL if slot not in use;
+ created and destroyed in
+ single-threaded mode; not protected
+ by any mutex, because it is read-only
+ during multi-threaded operation */
ulint rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
list (update undo logs for committed
transactions), protected by
diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic
index 17c94105cee..699148cff6d 100644
--- a/storage/xtradb/include/trx0sys.ic
+++ b/storage/xtradb/include/trx0sys.ic
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -71,40 +71,6 @@ trx_sys_hdr_page(
}
/***************************************************************//**
-Checks if a space is the system tablespaces.
-@return TRUE if system tablespace */
-UNIV_INLINE
-ibool
-trx_sys_sys_space(
-/*==============*/
- ulint space) /*!< in: space */
-{
- if (srv_doublewrite_file) {
- /* several spaces are reserved */
- return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE));
- } else {
- return((ibool)(space == TRX_SYS_SPACE));
- }
-}
-
-/***************************************************************//**
-Checks if a space is the doublewrite tablespace.
-@return TRUE if doublewrite tablespace */
-UNIV_INLINE
-ibool
-trx_sys_doublewrite_space(
-/*======================*/
- ulint space) /*!< in: space */
-{
- if (srv_doublewrite_file) {
- /* doublewrite buffer is separated */
- return((ibool)(space == TRX_DOUBLEWRITE_SPACE));
- } else {
- return((ibool)(space == TRX_SYS_SPACE));
- }
-}
-
-/***************************************************************//**
Gets the pointer in the nth slot of the rseg array.
@return pointer to rseg object, NULL if slot not in use */
UNIV_INLINE
@@ -114,28 +80,11 @@ trx_sys_get_nth_rseg(
trx_sys_t* sys, /*!< in: trx system */
ulint n) /*!< in: index of slot */
{
- ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(n < TRX_SYS_N_RSEGS);
return(sys->rseg_array[n]);
}
-/***************************************************************//**
-Sets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-void
-trx_sys_set_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n, /*!< in: index of slot */
- trx_rseg_t* rseg) /*!< in: pointer to rseg object, NULL if slot
- not in use */
-{
- ut_ad(n < TRX_SYS_N_RSEGS);
-
- sys->rseg_array[n] = rseg;
-}
-
/**********************************************************************//**
Gets a pointer to the transaction system header and x-latches its page.
@return pointer to system header, page x-latched. */
@@ -171,7 +120,6 @@ trx_sysf_rseg_get_space(
ulint i, /*!< in: slot index == rseg id */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(sys_header);
ut_ad(i < TRX_SYS_N_RSEGS);
@@ -193,7 +141,6 @@ trx_sysf_rseg_get_page_no(
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(sys_header);
- ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(i < TRX_SYS_N_RSEGS);
return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
@@ -213,7 +160,6 @@ trx_sysf_rseg_set_space(
ulint space, /*!< in: space id */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(sys_header);
ut_ad(i < TRX_SYS_N_RSEGS);
@@ -237,7 +183,6 @@ trx_sysf_rseg_set_page_no(
slot is reset to unused */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(sys_header);
ut_ad(i < TRX_SYS_N_RSEGS);
@@ -285,30 +230,96 @@ trx_read_trx_id(
}
/****************************************************************//**
-Looks for the trx handle with the given id in trx_list.
-@return the trx handle or NULL if not found */
+Looks for the trx handle with the given id in rw_trx_list.
+The caller must be holding trx_sys->mutex.
+@return the trx handle or NULL if not found;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
UNIV_INLINE
trx_t*
-trx_get_on_id(
-/*==========*/
+trx_get_rw_trx_by_id(
+/*=================*/
trx_id_t trx_id) /*!< in: trx id to search for */
{
- trx_t* trx;
+ trx_t* trx;
+ ulint len;
+ trx_t* first;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ len = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
- ut_ad(mutex_own(&(kernel_mutex)));
+ if (len == 0) {
+ return(NULL);
+ }
+
+ /* Because the list is ordered on trx id in descending order,
+ we try to speed things up a bit. */
+
+ trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ assert_trx_in_rw_list(trx);
+
+ if (trx_id == trx->id) {
+ return(trx);
+ } else if (len == 1 || trx_id > trx->id) {
+ return(NULL);
+ }
+
+ first = trx;
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+ trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
+ assert_trx_in_rw_list(trx);
- while (trx != NULL) {
- if (trx_id == trx->id) {
+ if (trx_id == trx->id) {
+ return(trx);
+ } else if (len == 2 || trx_id < trx->id) {
+ return(NULL);
+ }
- return(trx);
+ /* Search the list from the lower end (tail). */
+ if (trx_id < (first->id + trx->id) >> 1) {
+ for (trx = UT_LIST_GET_PREV(trx_list, trx);
+ trx != NULL && trx_id > trx->id;
+ trx = UT_LIST_GET_PREV(trx_list, trx)) {
+ assert_trx_in_rw_list(trx);
+ }
+ } else {
+ for (trx = UT_LIST_GET_NEXT(trx_list, first);
+ trx != NULL && trx_id < trx->id;
+ trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+ assert_trx_in_rw_list(trx);
}
+ }
+
+ return((trx != NULL && trx->id == trx_id) ? trx : NULL);
+}
- trx = UT_LIST_GET_NEXT(trx_list, trx);
+/****************************************************************//**
+Returns the minimum trx id in trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->state
+to find out if the minimum trx id transaction itself is active, or already
+committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+UNIV_INLINE
+trx_id_t
+trx_rw_min_trx_id_low(void)
+/*=======================*/
+{
+ trx_id_t id;
+ const trx_t* trx;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
+
+ if (trx == NULL) {
+ id = trx_sys->max_trx_id;
+ } else {
+ assert_trx_in_rw_list(trx);
+ id = trx->id;
}
- return(NULL);
+ return(id);
}
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
@@ -321,62 +332,138 @@ trx_assert_recovered(
/*=================*/
trx_id_t trx_id) /*!< in: transaction identifier */
{
- trx_t* trx;
+ const trx_t* trx;
- mutex_enter(&kernel_mutex);
- trx = trx_get_on_id(trx_id);
- ut_a(trx);
+ mutex_enter(&trx_sys->mutex);
+
+ trx = trx_get_rw_trx_by_id(trx_id);
ut_a(trx->is_recovered);
- mutex_exit(&kernel_mutex);
+
+ mutex_exit(&trx_sys->mutex);
return(TRUE);
}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
/****************************************************************//**
-Returns the minumum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->conc_state to
-find out if the minimum trx id transaction itself is active, or already
+Returns the minimum trx id in rw trx list. This is the smallest id for which
+the rw trx can possibly be active. (But, you must look at the trx->state
+to find out if the minimum trx id transaction itself is active, or already
committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
UNIV_INLINE
trx_id_t
-trx_list_get_min_trx_id(void)
-/*=========================*/
+trx_rw_min_trx_id(void)
+/*===================*/
{
- trx_t* trx;
+ trx_id_t id;
- ut_ad(mutex_own(&(kernel_mutex)));
+ mutex_enter(&trx_sys->mutex);
- trx = UT_LIST_GET_LAST(trx_sys->trx_list);
+ id = trx_rw_min_trx_id_low();
- if (trx == NULL) {
+ mutex_exit(&trx_sys->mutex);
+
+ return(id);
+}
+
+/****************************************************************//**
+Returns pointer to a transaction instance if a rw transaction with the given id
+is active. Caller must hold trx_sys->mutex. If the caller is not holding
+lock_sys->mutex, the transaction may already have been committed.
+@return transaction instance if active, or NULL;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
+UNIV_INLINE
+trx_t*
+trx_rw_get_active_trx_by_id(
+/*========================*/
+ trx_id_t trx_id, /*!< in: trx id of the transaction */
+ ibool* corrupt) /*!< in: NULL or pointer to a flag
+ that will be set if corrupt */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ if (trx_id < trx_rw_min_trx_id_low()) {
+
+ trx = NULL;
+ } else if (trx_id >= trx_sys->max_trx_id) {
+
+ /* There must be corruption: we let the caller handle the
+ diagnostic prints in this case. */
- return(trx_sys->max_trx_id);
+ trx = NULL;
+ if (corrupt != NULL) {
+ *corrupt = TRUE;
+ }
+ } else {
+ trx = trx_get_rw_trx_by_id(trx_id);
+
+ if (trx != NULL
+ && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+
+ trx = NULL;
+ }
}
- return(trx->id);
+ return(trx);
}
/****************************************************************//**
-Checks if a transaction with the given id is active.
-@return TRUE if active */
+Checks if a rw transaction with the given id is active. Caller must hold
+trx_sys->mutex. If the caller is not holding lock_sys->mutex, the
+transaction may already have been committed.
+@return true if rw transaction it with a given id is active. */
UNIV_INLINE
-ibool
-trx_is_active(
-/*==========*/
- trx_id_t trx_id) /*!< in: trx id of the transaction */
+bool
+trx_rw_is_active_low(
+/*=================*/
+ trx_id_t trx_id, /*!< in: trx id of the transaction */
+ ibool* corrupt) /*!< in: NULL or pointer to a flag
+ that will be set if corrupt */
{
- ut_ad(mutex_own(&(kernel_mutex)));
+ ut_ad(mutex_own(&trx_sys->mutex));
- if (trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- trx_id)) {
+ if (UNIV_UNLIKELY(trx_id >= trx_sys->max_trx_id)) {
- return(TRUE);
+ /* There must be corruption: we let the caller handle the
+ diagnostic prints in this case. */
+
+ if (corrupt != NULL) {
+ *corrupt = TRUE;
+ }
+
+ return(false);
}
- return(FALSE);
+ return(trx_find_descriptor(trx_sys->descriptors, trx_sys->descr_n_used,
+ trx_id) != NULL);
+}
+
+/****************************************************************//**
+Checks if a rw transaction with the given id is active. If the caller is
+not holding lock_sys->mutex, the transaction may already have been
+committed.
+@return true if rw transaction it with a given id is active. */
+UNIV_INLINE
+bool
+trx_rw_is_active(
+/*=============*/
+ trx_id_t trx_id, /*!< in: trx id of the transaction */
+ ibool* corrupt) /*!< in: NULL or pointer to a flag
+ that will be set if corrupt */
+{
+ bool res;
+
+ mutex_enter(&trx_sys->mutex);
+
+ res = trx_rw_is_active_low(trx_id, corrupt);
+
+ mutex_exit(&trx_sys->mutex);
+
+ return(res);
}
/*****************************************************************//**
@@ -387,9 +474,7 @@ trx_id_t
trx_sys_get_new_trx_id(void)
/*========================*/
{
- trx_id_t id;
-
- ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(mutex_own(&trx_sys->mutex));
/* VERY important: after the database is started, max_trx_id value is
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
@@ -398,14 +483,60 @@ trx_sys_get_new_trx_id(void)
Thus trx id values will not overlap when the database is
repeatedly started! */
- if ((ulint) trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) {
+ if (!(trx_sys->max_trx_id % (trx_id_t) TRX_SYS_TRX_ID_WRITE_MARGIN)) {
trx_sys_flush_max_trx_id();
}
- id = trx_sys->max_trx_id++;
+ return(trx_sys->max_trx_id++);
+}
- return(id);
+/*****************************************************************//**
+Determines the maximum transaction id.
+@return maximum currently allocated trx id; will be stale after the
+next call to trx_sys_get_new_trx_id() */
+UNIV_INLINE
+trx_id_t
+trx_sys_get_max_trx_id(void)
+/*========================*/
+{
+#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
+ trx_id_t max_trx_id;
+#endif
+
+ ut_ad(!mutex_own(&trx_sys->mutex));
+
+#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
+ /* Avoid torn reads. */
+ mutex_enter(&trx_sys->mutex);
+ max_trx_id = trx_sys->max_trx_id;
+ mutex_exit(&trx_sys->mutex);
+ return(max_trx_id);
+#else
+ /* Perform a dirty read. Callers should be prepared for stale
+ values, and we know that the value fits in a machine word, so
+ that it will be read and written atomically. */
+ return(trx_sys->max_trx_id);
+#endif
+}
+
+/*****************************************************************//**
+Get the number of transaction in the system, independent of their state.
+@return count of transactions in trx_sys_t::rw_trx_list */
+UNIV_INLINE
+ulint
+trx_sys_get_n_rw_trx(void)
+/*======================*/
+{
+ ulint n_trx;
+
+ mutex_enter(&trx_sys->mutex);
+
+ n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+
+ mutex_exit(&trx_sys->mutex);
+
+ return(n_trx);
}
@@ -418,10 +549,10 @@ trx_find_descriptor(
/*================*/
const trx_id_t* descriptors, /*!< in: descriptors array */
ulint n_descr, /*!< in: array size */
- trx_id_t trx_id) /*!< in: trx pointer */
+ trx_id_t trx_id) /*!< in: trx id */
{
ut_ad(descriptors != trx_sys->descriptors ||
- mutex_own(&kernel_mutex));
+ mutex_own(&trx_sys->mutex));
if (UNIV_UNLIKELY(n_descr == 0)) {
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index 4ab8e5b2cc5..82e9a90fcfb 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -31,23 +31,18 @@ Created 3/26/1996 Heikki Tuuri
#include "dict0types.h"
#ifndef UNIV_HOTBACKUP
#include "lock0types.h"
+#include "log0log.h"
#include "usr0types.h"
#include "que0types.h"
#include "mem0mem.h"
#include "read0types.h"
#include "trx0xa.h"
#include "ut0vec.h"
+#include "fts0fts.h"
/** Dummy session used currently in MySQL interface */
extern sess_t* trx_dummy_sess;
-/** Number of transactions currently allocated for MySQL: protected by
-the kernel mutex */
-extern ulint trx_n_mysql_transactions;
-/** Number of transactions currently in the XA PREPARED state: protected by
-the kernel mutex */
-extern ulint trx_n_prepared;
-
/********************************************************************//**
In XtraDB it is impossible for a transaction to own a search latch outside of
InnoDB code, so there is nothing to release on demand. We keep this function to
@@ -82,15 +77,6 @@ const dict_index_t*
trx_get_error_info(
/*===============*/
const trx_t* trx); /*!< in: trx object */
-/****************************************************************//**
-Creates and initializes a transaction object.
-@return own: the transaction */
-UNIV_INTERN
-trx_t*
-trx_create(
-/*=======*/
- sess_t* sess) /*!< in: session */
- __attribute__((nonnull));
/********************************************************************//**
Creates a transaction object for MySQL.
@return own: transaction object */
@@ -106,11 +92,11 @@ trx_t*
trx_allocate_for_background(void);
/*=============================*/
/********************************************************************//**
-Frees a transaction object. */
+Frees a transaction object of a background operation of the master thread. */
UNIV_INTERN
void
-trx_free(
-/*=====*/
+trx_free_for_background(
+/*====================*/
trx_t* trx); /*!< in, own: trx object */
/********************************************************************//**
At shutdown, frees a transaction object that is in the PREPARED state. */
@@ -127,13 +113,6 @@ void
trx_free_for_mysql(
/*===============*/
trx_t* trx); /*!< in, own: trx object */
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx); /*!< in, own: trx object */
/****************************************************************//**
Creates trx objects for transactions and initializes the trx list of
trx_sys at database start. Rollback segment and undo log lists must
@@ -144,51 +123,87 @@ UNIV_INTERN
void
trx_lists_init_at_db_start(void);
/*============================*/
-/****************************************************************//**
-Starts a new transaction.
-@return TRUE if success, FALSE if the rollback segment could not
-support this many transactions */
-UNIV_INTERN
-ibool
-trx_start(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-/****************************************************************//**
-Starts a new transaction.
-@return TRUE */
-UNIV_INTERN
-ibool
-trx_start_low(
-/*==========*/
- trx_t* trx, /*!< in: transaction */
- ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started_xa(t) \
+ { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_if_not_started_xa_low((t)); \
+ }
+#else
+#define trx_start_if_not_started_xa(t) \
+ trx_start_if_not_started_xa_low((t))
+#endif /* UNIV_DEBUG */
+
/*************************************************************//**
Starts the transaction if it is not yet started. */
-UNIV_INLINE
+UNIV_INTERN
void
-trx_start_if_not_started(
-/*=====================*/
+trx_start_if_not_started_xa_low(
+/*============================*/
trx_t* trx); /*!< in: transaction */
/*************************************************************//**
-Starts the transaction if it is not yet started. Assumes we have reserved
-the kernel mutex! */
-UNIV_INLINE
+Starts the transaction if it is not yet started. */
+UNIV_INTERN
void
trx_start_if_not_started_low(
/*=========================*/
trx_t* trx); /*!< in: transaction */
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started(t) \
+ { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_if_not_started_low((t)); \
+ }
+#else
+#define trx_start_if_not_started(t) \
+ trx_start_if_not_started_low((t))
+#endif /* UNIV_DEBUG */
+
+/*************************************************************//**
+Starts the transaction for a DDL operation. */
+UNIV_INTERN
+void
+trx_start_for_ddl_low(
+/*==================*/
+ trx_t* trx, /*!< in/out: transaction */
+ trx_dict_op_t op) /*!< in: dictionary operation type */
+ __attribute__((nonnull));
+
+#ifdef UNIV_DEBUG
+#define trx_start_for_ddl(t, o) \
+ { \
+ ut_ad((t)->start_file == 0); \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_for_ddl_low((t), (o)); \
+ }
+#else
+#define trx_start_for_ddl(t, o) \
+ trx_start_for_ddl_low((t), (o))
+#endif /* UNIV_DEBUG */
+
/****************************************************************//**
Commits a transaction. */
UNIV_INTERN
void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx); /*!< in: transaction */
+trx_commit(
+/*=======*/
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull));
+/****************************************************************//**
+Commits a transaction and a mini-transaction. */
+UNIV_INTERN
+void
+trx_commit_low(
+/*===========*/
+ trx_t* trx, /*!< in/out: transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
+ or NULL if trx made no modifications */
+ __attribute__((nonnull(1)));
/****************************************************************//**
Cleans up a transaction at database startup. The cleanup is needed if
the transaction already got to the middle of a commit when the database
@@ -202,18 +217,17 @@ trx_cleanup_at_db_startup(
Does the transaction commit for MySQL.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
trx_commit_for_mysql(
/*=================*/
- trx_t* trx); /*!< in: trx handle */
+ trx_t* trx); /*!< in/out: transaction */
/**********************************************************************//**
-Does the transaction prepare for MySQL.
-@return 0 or error number */
+Does the transaction prepare for MySQL. */
UNIV_INTERN
-ulint
+void
trx_prepare_for_mysql(
/*==================*/
- trx_t* trx); /*!< in: trx handle */
+ trx_t* trx); /*!< in/out: trx handle */
/**********************************************************************//**
This function is used to find number of prepared transactions and
their transaction objects for a recovery.
@@ -227,7 +241,9 @@ trx_recover_for_mysql(
/*******************************************************************//**
This function is used to find one X/Open XA distributed transaction
which is in the prepared state
-@return trx or NULL; on match, the trx->xid will be invalidated */
+@return trx or NULL; on match, the trx->xid will be invalidated;
+note that the trx may have been committed, unless the caller is
+holding lock_sys->mutex */
UNIV_INTERN
trx_t *
trx_get_trx_by_xid(
@@ -235,13 +251,13 @@ trx_get_trx_by_xid(
const XID* xid); /*!< in: X/Open XA transaction identifier */
/**********************************************************************//**
If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE.
-@return 0 or error number */
+with trx->flush_log_later == TRUE. */
UNIV_INTERN
-ulint
+void
trx_commit_complete_for_mysql(
/*==========================*/
- trx_t* trx); /*!< in: trx handle */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull));
/**********************************************************************//**
Marks the latest SQL statement ended. */
UNIV_INTERN
@@ -259,86 +275,20 @@ read_view_t*
trx_assign_read_view(
/*=================*/
trx_t* trx); /*!< in: active transaction */
-/***********************************************************//**
-The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
-the TRX_QUE_RUNNING state and releases query threads which were
-waiting for a lock in the wait_thrs list. */
-UNIV_INTERN
-void
-trx_end_lock_wait(
-/*==============*/
- trx_t* trx); /*!< in: transaction */
/****************************************************************//**
-Sends a signal to a trx object. */
+Prepares a transaction for commit/rollback. */
UNIV_INTERN
void
-trx_sig_send(
-/*=========*/
- trx_t* trx, /*!< in: trx handle */
- ulint type, /*!< in: signal type */
- ulint sender, /*!< in: TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver_thr, /*!< in: query thread which wants the
- reply, or NULL; if type is
- TRX_SIG_END_WAIT, this must be NULL */
- trx_savept_t* savept, /*!< in: possible rollback savepoint, or
- NULL */
- que_thr_t** next_thr); /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-/****************************************************************//**
-Send the reply message when a signal in the queue of the trx has
-been handled. */
-UNIV_INTERN
-void
-trx_sig_reply(
-/*==========*/
- trx_sig_t* sig, /*!< in: signal */
- que_thr_t** next_thr); /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/****************************************************************//**
-Removes the signal object from a trx signal queue. */
-UNIV_INTERN
-void
-trx_sig_remove(
-/*===========*/
- trx_t* trx, /*!< in: trx handle */
- trx_sig_t* sig); /*!< in, own: signal */
-/****************************************************************//**
-Starts handling of a trx signal. */
-UNIV_INTERN
-void
-trx_sig_start_handle(
-/*=================*/
- trx_t* trx, /*!< in: trx handle */
- que_thr_t** next_thr); /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/****************************************************************//**
-Ends signal handling. If the session is in the error state, and
-trx->graph_before_signal_handling != NULL, returns control to the error
-handling routine of the graph (currently only returns the control to the
-graph root which then sends an error message to the client). */
-UNIV_INTERN
-void
-trx_end_signal_handling(
-/*====================*/
- trx_t* trx); /*!< in: trx */
+trx_commit_or_rollback_prepare(
+/*===========================*/
+ trx_t* trx); /*!< in/out: transaction */
/*********************************************************************//**
Creates a commit command node struct.
@return own: commit node struct */
UNIV_INTERN
commit_node_t*
-commit_node_create(
-/*===============*/
+trx_commit_node_create(
+/*===================*/
mem_heap_t* heap); /*!< in: mem heap where created */
/***********************************************************//**
Performs an execution step for a commit type node in a query graph.
@@ -350,37 +300,59 @@ trx_commit_step(
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
-Prints info about a transaction to the given file. The caller must own the
-kernel mutex. */
+Prints info about a transaction.
+Caller must hold trx_sys->mutex. */
+UNIV_INTERN
+void
+trx_print_low(
+/*==========*/
+ FILE* f,
+ /*!< in: output stream */
+ const trx_t* trx,
+ /*!< in: transaction */
+ ulint max_query_len,
+ /*!< in: max query length to print,
+ or 0 to use the default max length */
+ ulint n_rec_locks,
+ /*!< in: lock_number_of_rows_locked(&trx->lock) */
+ ulint n_trx_locks,
+ /*!< in: length of trx->lock.trx_locks */
+ ulint heap_size)
+ /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
+ __attribute__((nonnull));
+
+/**********************************************************************//**
+Prints info about a transaction.
+The caller must hold lock_sys->mutex and trx_sys->mutex.
+When possible, use trx_print() instead. */
+UNIV_INTERN
+void
+trx_print_latched(
+/*==============*/
+ FILE* f, /*!< in: output stream */
+ const trx_t* trx, /*!< in: transaction */
+ ulint max_query_len) /*!< in: max query length to print,
+ or 0 to use the default max length */
+ __attribute__((nonnull));
+
+/**********************************************************************//**
+Prints info about a transaction.
+Acquires and releases lock_sys->mutex and trx_sys->mutex. */
UNIV_INTERN
void
trx_print(
/*======*/
- FILE* f, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- ulint max_query_len); /*!< in: max query length to print, or 0 to
- use the default max length */
-
-/** Type of data dictionary operation */
-typedef enum trx_dict_op {
- /** The transaction is not modifying the data dictionary. */
- TRX_DICT_OP_NONE = 0,
- /** The transaction is creating a table or an index, or
- dropping a table. The table must be dropped in crash
- recovery. This and TRX_DICT_OP_NONE are the only possible
- operation modes in crash recovery. */
- TRX_DICT_OP_TABLE = 1,
- /** The transaction is creating or dropping an index in an
- existing table. In crash recovery, the data dictionary
- must be locked, but the table must not be dropped. */
- TRX_DICT_OP_INDEX = 2
-} trx_dict_op_t;
+ FILE* f, /*!< in: output stream */
+ const trx_t* trx, /*!< in: transaction */
+ ulint max_query_len) /*!< in: max query length to print,
+ or 0 to use the default max length */
+ __attribute__((nonnull));
/**********************************************************************//**
Determine if a transaction is a dictionary operation.
@return dictionary operation mode */
UNIV_INLINE
-enum trx_dict_op
+enum trx_dict_op_t
trx_get_dict_operation(
/*===================*/
const trx_t* trx) /*!< in: transaction */
@@ -392,18 +364,49 @@ void
trx_set_dict_operation(
/*===================*/
trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op op); /*!< in: operation, not
+ enum trx_dict_op_t op); /*!< in: operation, not
TRX_DICT_OP_NONE */
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
+Determines if a transaction is in the given state.
+The caller must hold trx_sys->mutex, or it must be the thread
+that is serving a running transaction.
+A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
+unless it is a non-locking autocommit read only transaction, which is only
+in trx_sys->mysql_trx_list.
+@return TRUE if trx->state == state */
+UNIV_INLINE
+ibool
+trx_state_eq(
+/*=========*/
+ const trx_t* trx, /*!< in: transaction */
+ trx_state_t state) /*!< in: state;
+ if state != TRX_STATE_NOT_STARTED
+ asserts that
+ trx->state != TRX_STATE_NOT_STARTED */
+ __attribute__((nonnull, warn_unused_result));
+# ifdef UNIV_DEBUG
+/**********************************************************************//**
+Asserts that a transaction has been started.
+The caller must hold trx_sys->mutex.
+@return TRUE if started */
+UNIV_INTERN
+ibool
+trx_assert_started(
+/*===============*/
+ const trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
Determines if the currently running transaction has been interrupted.
@return TRUE if interrupted */
UNIV_INTERN
ibool
trx_is_interrupted(
/*===============*/
- trx_t* trx); /*!< in: transaction */
+ const trx_t* trx); /*!< in: transaction */
/**********************************************************************//**
Determines if the currently running transaction is in strict mode.
@return TRUE if strict */
@@ -421,7 +424,7 @@ Calculates the "weight" of a transaction. The weight of one transaction
is estimated as the number of altered rows + the number of locked rows.
@param t transaction
@return transaction weight */
-#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->trx_locks))
+#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
/*******************************************************************//**
Compares the "weight" (or size) of two transactions. Transactions that
@@ -449,6 +452,16 @@ trx_get_que_state_str(
/*==================*/
const trx_t* trx); /*!< in: transaction */
+/****************************************************************//**
+Assign a read-only transaction a rollback-segment, if it is attempting
+to write to a TEMPORARY table. */
+UNIV_INTERN
+void
+trx_assign_rseg(
+/*============*/
+ trx_t* trx); /*!< A read-only transaction that
+ needs to be assigned a RBS. */
+
/*************************************************************//**
Callback function for trx_find_descriptor() to compare trx IDs. */
UNIV_INTERN
@@ -466,53 +479,309 @@ trx_release_descriptor(
/*===================*/
trx_t* trx); /*!< in: trx pointer */
-/* Signal to a transaction */
-struct trx_sig_struct{
- unsigned type:3; /*!< signal type */
- unsigned sender:1; /*!< TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver; /*!< non-NULL if the sender of the signal
- wants reply after the operation induced
- by the signal is completed */
- trx_savept_t savept; /*!< possible rollback savepoint */
- UT_LIST_NODE_T(trx_sig_t)
- signals; /*!< queue of pending signals to the
- transaction */
- UT_LIST_NODE_T(trx_sig_t)
- reply_signals; /*!< list of signals for which the sender
- transaction is waiting a reply */
+/*******************************************************************//**
+Transactions that aren't started by the MySQL server don't set
+the trx_t::mysql_thd field. For such transactions we set the lock
+wait timeout to 0 instead of the user configured value that comes
+from innodb_lock_wait_timeout via trx_t::mysql_thd.
+@param trx transaction
+@return lock wait timeout in seconds */
+#define trx_lock_wait_timeout_get(trx) \
+ ((trx)->mysql_thd != NULL \
+ ? thd_lock_wait_timeout((trx)->mysql_thd) \
+ : 0)
+
+/*******************************************************************//**
+Determine if the transaction is a non-locking autocommit select
+(implied read-only).
+@param t transaction
+@return true if non-locking autocommit select transaction. */
+#define trx_is_autocommit_non_locking(t) \
+((t)->auto_commit && (t)->will_lock == 0)
+
+/*******************************************************************//**
+Determine if the transaction is a non-locking autocommit select
+with an explicit check for the read-only status.
+@param t transaction
+@return true if non-locking autocommit read-only transaction. */
+#define trx_is_ac_nl_ro(t) \
+((t)->read_only && trx_is_autocommit_non_locking((t)))
+
+/*******************************************************************//**
+Assert that the transaction is in the trx_sys_t::rw_trx_list */
+#define assert_trx_in_rw_list(t) do { \
+ ut_ad(!(t)->read_only); \
+ assert_trx_in_list(t); \
+} while (0)
+
+/*******************************************************************//**
+Assert that the transaction is either in trx_sys->ro_trx_list or
+trx_sys->rw_trx_list but not both and it cannot be an autocommit
+non-locking select */
+#define assert_trx_in_list(t) do { \
+ ut_ad((t)->in_ro_trx_list == (t)->read_only); \
+ ut_ad((t)->in_rw_trx_list == !(t)->read_only); \
+ ut_ad(!trx_is_autocommit_non_locking((t))); \
+ switch ((t)->state) { \
+ case TRX_STATE_PREPARED: \
+ /* fall through */ \
+ case TRX_STATE_ACTIVE: \
+ case TRX_STATE_COMMITTED_IN_MEMORY: \
+ continue; \
+ case TRX_STATE_NOT_STARTED: \
+ break; \
+ } \
+ ut_error; \
+} while (0)
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Assert that an autocommit non-locking select cannot be in the
+ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+The tranasction must be in the mysql_trx_list. */
+# define assert_trx_nonlocking_or_in_list(t) \
+ do { \
+ if (trx_is_autocommit_non_locking(t)) { \
+ trx_state_t t_state = (t)->state; \
+ ut_ad((t)->read_only); \
+ ut_ad(!(t)->is_recovered); \
+ ut_ad(!(t)->in_ro_trx_list); \
+ ut_ad(!(t)->in_rw_trx_list); \
+ ut_ad((t)->in_mysql_trx_list); \
+ ut_ad(t_state == TRX_STATE_NOT_STARTED \
+ || t_state == TRX_STATE_ACTIVE); \
+ } else { \
+ assert_trx_in_list(t); \
+ } \
+ } while (0)
+#else /* UNIV_DEBUG */
+/*******************************************************************//**
+Assert that an autocommit non-locking slect cannot be in the
+ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+The tranasction must be in the mysql_trx_list. */
+# define assert_trx_nonlocking_or_in_list(trx) ((void)0)
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state
+captures the state of the query thread during the execution of a query.
+This is different from a transaction state. The query state of a transaction
+can be updated asynchronously by other threads. The other threads can be
+system threads, like the timeout monitor thread or user threads executing
+other queries. Another thing to be mindful of is that there is a delay between
+when a query thread is put into LOCK_WAIT state and before it actually starts
+waiting. Between these two events it is possible that the query thread is
+granted the lock it was waiting for, which implies that the state can be changed
+asynchronously.
+
+All these operations take place within the context of locking. Therefore state
+changes within the locking code must acquire both the lock mutex and the
+trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or
+trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient
+to only acquire the trx->mutex.
+To query the state either of the mutexes is sufficient within the locking
+code and no mutex is required when the query thread is no longer waiting. */
+
+/** The locks and state of an active transaction. Protected by
+lock_sys->mutex, trx->mutex or both. */
+struct trx_lock_t {
+ ulint n_active_thrs; /*!< number of active query threads */
+
+ trx_que_t que_state; /*!< valid when trx->state
+ == TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
+ TRX_QUE_LOCK_WAIT, ... */
+
+ lock_t* wait_lock; /*!< if trx execution state is
+ TRX_QUE_LOCK_WAIT, this points to
+ the lock request, otherwise this is
+ NULL; set to non-NULL when holding
+ both trx->mutex and lock_sys->mutex;
+ set to NULL when holding
+ lock_sys->mutex; readers should
+ hold lock_sys->mutex, except when
+ they are holding trx->mutex and
+ wait_lock==NULL */
+ ib_uint64_t deadlock_mark; /*!< A mark field that is initialized
+ to and checked against lock_mark_counter
+ by lock_deadlock_recursive(). */
+ ibool was_chosen_as_deadlock_victim;
+ /*!< when the transaction decides to
+ wait for a lock, it sets this to FALSE;
+ if another transaction chooses this
+ transaction as a victim in deadlock
+ resolution, it sets this to TRUE.
+ Protected by trx->mutex. */
+ time_t wait_started; /*!< lock wait started at this time,
+ protected only by lock_sys->mutex */
+
+ que_thr_t* wait_thr; /*!< query thread belonging to this
+ trx that is in QUE_THR_LOCK_WAIT
+ state. For threads suspended in a
+ lock wait, this is protected by
+ lock_sys->mutex. Otherwise, this may
+ only be modified by the thread that is
+ serving the running transaction. */
+
+ mem_heap_t* lock_heap; /*!< memory heap for trx_locks;
+ protected by lock_sys->mutex */
+
+ UT_LIST_BASE_NODE_T(lock_t)
+ trx_locks; /*!< locks requested
+ by the transaction;
+ insertions are protected by trx->mutex
+ and lock_sys->mutex; removals are
+ protected by lock_sys->mutex */
+
+ ib_vector_t* table_locks; /*!< All table locks requested by this
+ transaction, including AUTOINC locks */
+
+ ibool cancel; /*!< TRUE if the transaction is being
+ rolled back either via deadlock
+ detection or due to lock timeout. The
+ caller has to acquire the trx_t::mutex
+ in order to cancel the locks. In
+ lock_trx_table_locks_remove() we
+ check for this cancel of a transaction's
+ locks and avoid reacquiring the trx
+ mutex to prevent recursive deadlocks.
+ Protected by both the lock sys mutex
+ and the trx_t::mutex. */
};
#define TRX_MAGIC_N 91118598
-/* The transaction handle; every session has a trx object which is freed only
-when the session is freed; in addition there may be session-less transactions
-rolling back after a database recovery */
+/** The transaction handle
+
+Normally, there is a 1:1 relationship between a transaction handle
+(trx) and a session (client connection). One session is associated
+with exactly one user transaction. There are some exceptions to this:
+
+* For DDL operations, a subtransaction is allocated that modifies the
+data dictionary tables. Lock waits and deadlocks are prevented by
+acquiring the dict_operation_lock before starting the subtransaction
+and releasing it after committing the subtransaction.
+
+* The purge system uses a special transaction that is not associated
+with any session.
+
+* If the system crashed or it was quickly shut down while there were
+transactions in the ACTIVE or PREPARED state, these transactions would
+no longer be associated with a session when the server is restarted.
+
+A session may be served by at most one thread at a time. The serving
+thread of a session might change in some MySQL implementations.
+Therefore we do not have os_thread_get_curr_id() assertions in the code.
+
+Normally, only the thread that is currently associated with a running
+transaction may access (read and modify) the trx object, and it may do
+so without holding any mutex. The following are exceptions to this:
+
+* trx_rollback_resurrected() may access resurrected (connectionless)
+transactions while the system is already processing new user
+transactions. The trx_sys->mutex prevents a race condition between it
+and lock_trx_release_locks() [invoked by trx_commit()].
-struct trx_struct{
+* trx_print_low() may access transactions not associated with the current
+thread. The caller must be holding trx_sys->mutex and lock_sys->mutex.
+
+* When a transaction handle is in the trx_sys->mysql_trx_list or
+trx_sys->trx_list, some of its fields must not be modified without
+holding trx_sys->mutex exclusively.
+
+* The locking code (in particular, lock_deadlock_recursive() and
+lock_rec_convert_impl_to_expl()) will access transactions associated
+to other connections. The locks of transactions are protected by
+lock_sys->mutex and sometimes by trx->mutex. */
+
+struct trx_t{
ulint magic_n;
+ ib_mutex_t mutex; /*!< Mutex protecting the fields
+ state and lock
+ (except some fields of lock, which
+ are protected by lock_sys->mutex) */
+
+ /** State of the trx from the point of view of concurrency control
+ and the valid state transitions.
+
+ Possible states:
+
+ TRX_STATE_NOT_STARTED
+ TRX_STATE_ACTIVE
+ TRX_STATE_PREPARED
+ TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
+
+ Valid state transitions are:
+
+ Regular transactions:
+ * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
+
+ Auto-commit non-locking read-only:
+ * NOT_STARTED -> ACTIVE -> NOT_STARTED
+
+ XA (2PC):
+ * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
+
+ Recovered XA:
+ * NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
+
+ XA (2PC) (shutdown before ROLLBACK or COMMIT):
+ * NOT_STARTED -> PREPARED -> (freed)
+
+ Latching and various transaction lists membership rules:
+
+ XA (2PC) transactions are always treated as non-autocommit.
+
+ Transitions to ACTIVE or NOT_STARTED occur when
+ !in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed).
+
+ Autocommit non-locking read-only transactions move between states
+ without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list.
+
+ When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
+ it is a user transaction. It cannot be in ro_trx_list or rw_trx_list.
+
+ ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
+ The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
+
+ ACTIVE->COMMITTED is possible when the transaction is in
+ ro_trx_list or rw_trx_list.
+
+ Transitions to COMMITTED are protected by both lock_sys->mutex
+ and trx->mutex.
+
+ NOTE: Some of these state change constraints are an overkill,
+ currently only required for a consistent view for printing stats.
+ This unnecessarily adds a huge cost for the general case.
+
+ NOTE: In the future we should add read only transactions to the
+ ro_trx_list the first time they try to acquire a lock ie. by default
+ we treat all read-only transactions as non-locking. */
+ trx_state_t state;
+
+ trx_lock_t lock; /*!< Information about the transaction
+ locks and state. Protected by
+ trx->mutex or lock_sys->mutex
+ or both */
+ ulint is_recovered; /*!< 0=normal transaction,
+ 1=recovered, must be rolled back,
+ protected by trx_sys->mutex when
+ trx->in_rw_trx_list holds */
+
/* These fields are not protected by any mutex. */
const char* op_info; /*!< English text describing the
current operation, or an empty
string */
- ulint state; /*!< state of the trx from the point of
- view of concurrency control: TRX_ACTIVE,
- TRX_COMMITTED_IN_MEMORY, ... This was
- called 'conc_state' in the upstream and
- has been renamed in Percona Server,
- because changing it's value to/from
- either TRX_ACTIVE or TRX_PREPARED
- requires calling
- trx_reserve_descriptor() /
- trx_release_descriptor(). Different name
- ensures we notice any new code changing
- the state. */
+ ulint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
+ ulint check_foreigns; /*!< normally TRUE, but if the user
+ wants to suppress foreign key checks,
+ (in table imports, for example) we
+ set this FALSE */
/*------------------------------*/
/* MySQL has a transaction coordinator to coordinate two phase
- commit between multiple storage engines and the binary log. When
- an engine participates in a transaction, it's responsible for
- registering itself using the trans_register_ha() API. */
+ commit between multiple storage engines and the binary log. When
+ an engine participates in a transaction, it's responsible for
+ registering itself using the trans_register_ha() API. */
unsigned is_registered:1;/* This flag is set to 1 after the
transaction has been registered with
the coordinator using the XA API, and
@@ -521,17 +790,9 @@ struct trx_struct{
this is set to 1 then registered should
also be set to 1. This is used in the
XA code */
- unsigned is_in_trx_serial_list:1;
- /* Set when transaction is in the
- trx_serial_list */
/*------------------------------*/
- ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
- ulint check_foreigns; /* normally TRUE, but if the user
- wants to suppress foreign key checks,
- (in table imports, for example) we
- set this FALSE */
ulint check_unique_secondary;
- /* normally TRUE, but if the user
+ /*!< normally TRUE, but if the user
wants to speed up inserts by
suppressing unique key checks
for secondary indexes when we decide
@@ -549,123 +810,120 @@ struct trx_struct{
defer flush of the logs to disk
until after we release the
mutex. */
- ulint must_flush_log_later;/* this flag is set to TRUE in
- trx_commit_off_kernel() if
- flush_log_later was TRUE, and there
- were modifications by the transaction;
- in that case we must flush the log
- in trx_commit_complete_for_mysql() */
+ ulint must_flush_log_later;/*!< this flag is set to TRUE in
+ trx_commit() if flush_log_later was
+ TRUE, and there were modifications by
+ the transaction; in that case we must
+ flush the log in
+ trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- ibool has_search_latch;
- /* TRUE if this trx has latched any
+ bool has_search_latch;
+ /*!< true if this trx has latched any
search system latch in S-mode */
- ulint deadlock_mark; /*!< a mark field used in deadlock
- checking algorithm. */
+ ulint search_latch_timeout;
+ /*!< If we notice that someone is
+ waiting for our S-lock on the search
+ latch to be released, we wait in
+ row0sel.cc for BTR_SEA_TIMEOUT new
+ searches until we try to keep
+ the search latch again over
+ calls from MySQL; this is intended
+ to reduce contention on the search
+ latch */
trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */
/* Fields protected by the srv_conc_mutex. */
ulint declared_to_be_inside_innodb;
- /* this is TRUE if we have declared
+ /*!< this is TRUE if we have declared
this transaction in
srv_conc_enter_innodb to be inside the
InnoDB engine */
-
- /* Fields protected by dict_operation_lock. The very latch
- it is used to track. */
+ ulint n_tickets_to_enter_innodb;
+ /*!< this can be > 0 only when
+ declared_to_... is TRUE; when we come
+ to srv_conc_innodb_enter, if the value
+ here is > 0, we decrement this by 1 */
ulint dict_operation_lock_mode;
/*!< 0, RW_S_LATCH, or RW_X_LATCH:
the latch mode trx currently holds
- on dict_operation_lock */
+ on dict_operation_lock. Protected
+ by dict_operation_lock. */
+
+ trx_id_t no; /*!< transaction serialization number:
+ max trx id shortly before the
+ transaction is moved to
+ COMMITTED_IN_MEMORY state.
+ Protected by trx_sys_t::mutex
+ when trx->in_rw_trx_list. Initially
+ set to TRX_ID_MAX. */
- /* All the next fields are protected by the kernel mutex, except the
- undo logs which are protected by undo_mutex */
- ulint is_purge; /*!< 0=user transaction, 1=purge */
- ulint is_recovered; /*!< 0=normal transaction,
- 1=recovered, must be rolled back */
- ulint que_state; /*!< valid when conc_state
- == TRX_ACTIVE: TRX_QUE_RUNNING,
- TRX_QUE_LOCK_WAIT, ... */
- ulint handling_signals;/* this is TRUE as long as the trx
- is handling signals */
time_t start_time; /*!< time the trx object was created
or the state last time became
- TRX_ACTIVE */
+ TRX_STATE_ACTIVE */
trx_id_t id; /*!< transaction id */
XID xid; /*!< X/Open XA transaction
identification to identify a
transaction branch */
- trx_id_t no; /*!< transaction serialization number ==
- max trx id when the transaction is
- moved to COMMITTED_IN_MEMORY state */
- ib_uint64_t commit_lsn; /*!< lsn at the time of the commit */
+ lsn_t commit_lsn; /*!< lsn at the time of the commit */
table_id_t table_id; /*!< Table to drop iff dict_operation
- is TRUE, or 0. */
+ == TRX_DICT_OP_TABLE, or 0. */
/*------------------------------*/
- void* mysql_thd; /*!< MySQL thread handle corresponding
+ THD* mysql_thd; /*!< MySQL thread handle corresponding
to this trx, or NULL */
const char* mysql_log_file_name;
- /* if MySQL binlog is used, this field
+ /*!< if MySQL binlog is used, this field
contains a pointer to the latest file
name; this is NULL if binlog is not
used */
- ib_int64_t mysql_log_offset;/* if MySQL binlog is used, this field
- contains the end offset of the binlog
- entry */
- const char* mysql_master_log_file_name;
- /* if the database server is a MySQL
- replication slave, we have here the
- master binlog name up to which
- replication has processed; otherwise
- this is a pointer to a null
- character */
- ib_int64_t mysql_master_log_pos;
- /* if the database server is a MySQL
- replication slave, this is the
- position in the log file up to which
- replication has processed */
- const char* mysql_relay_log_file_name;
- ib_int64_t mysql_relay_log_pos;
+ ib_int64_t mysql_log_offset;
+ /*!< if MySQL binlog is used, this
+ field contains the end offset of the
+ binlog entry */
time_t idle_start;
ib_int64_t last_stmt_start;
/*------------------------------*/
- ulint n_mysql_tables_in_use; /* number of Innobase tables
+ ulint n_mysql_tables_in_use; /*!< number of Innobase tables
used in the processing of the current
SQL statement in MySQL */
ulint mysql_n_tables_locked;
- /* how many tables the current SQL
+ /*!< how many tables the current SQL
statement uses, except those
in consistent read */
- ulint search_latch_timeout;
- /* If we notice that someone is
- waiting for our S-lock on the search
- latch to be released, we wait in
- row0sel.c for BTR_SEA_TIMEOUT new
- searches until we try to keep
- the search latch again over
- calls from MySQL; this is intended
- to reduce contention on the search
- latch */
- /*------------------------------*/
- ulint n_tickets_to_enter_innodb;
- /* this can be > 0 only when
- declared_to_... is TRUE; when we come
- to srv_conc_innodb_enter, if the value
- here is > 0, we decrement this by 1 */
/*------------------------------*/
UT_LIST_NODE_T(trx_t)
- trx_list; /*!< list of transactions */
+ trx_list; /*!< list of transactions;
+ protected by trx_sys->mutex.
+ The same node is used for both
+ trx_sys_t::ro_trx_list and
+ trx_sys_t::rw_trx_list */
+#ifdef UNIV_DEBUG
+ /** The following two fields are mutually exclusive. */
+ /* @{ */
+
+ ibool in_ro_trx_list; /*!< TRUE if in trx_sys->ro_trx_list */
+ ibool in_rw_trx_list; /*!< TRUE if in trx_sys->rw_trx_list */
+ /* @} */
+#endif /* UNIV_DEBUG */
UT_LIST_NODE_T(trx_t)
mysql_trx_list; /*!< list of transactions created for
- MySQL */
+ MySQL; protected by trx_sys->mutex */
+#ifdef UNIV_DEBUG
+ ibool in_mysql_trx_list;
+ /*!< TRUE if in
+ trx_sys->mysql_trx_list */
+#endif /* UNIV_DEBUG */
UT_LIST_NODE_T(trx_t)
trx_serial_list;/*!< list node for
trx_sys->trx_serial_list */
+ bool in_trx_serial_list;
+ /* Set when transaction is in the
+ trx_serial_list */
/*------------------------------*/
- ulint error_state; /*!< 0 if no error, otherwise error
+ dberr_t error_state; /*!< 0 if no error, otherwise error
number; NOTE That ONLY the thread
doing the transaction is allowed to
set this field: this is NOT protected
- by the kernel mutex */
+ by any mutex */
const dict_index_t*error_info; /*!< if the error number indicates a
duplicate key error, a pointer to
the problematic index is stored here */
@@ -679,47 +937,8 @@ struct trx_struct{
survive over a transaction commit, if
it is a stored procedure with a COMMIT
WORK statement, for instance */
- ulint n_active_thrs; /*!< number of active query threads */
- que_t* graph_before_signal_handling;
- /* value of graph when signal handling
- for this trx started: this is used to
- return control to the original query
- graph for error processing */
- trx_sig_t sig; /*!< one signal object can be allocated
- in this space, avoiding mem_alloc */
- UT_LIST_BASE_NODE_T(trx_sig_t)
- signals; /*!< queue of processed or pending
- signals to the trx */
- UT_LIST_BASE_NODE_T(trx_sig_t)
- reply_signals; /*!< list of signals sent by the query
- threads of this trx for which a thread
- is waiting for a reply; if this trx is
- killed, the reply requests in the list
- must be canceled */
- /*------------------------------*/
- lock_t* wait_lock; /*!< if trx execution state is
- TRX_QUE_LOCK_WAIT, this points to
- the lock request, otherwise this is
- NULL */
- ibool was_chosen_as_deadlock_victim;
- /* when the transaction decides to wait
- for a lock, it sets this to FALSE;
- if another transaction chooses this
- transaction as a victim in deadlock
- resolution, it sets this to TRUE */
- time_t wait_started; /*!< lock wait started at this time */
- UT_LIST_BASE_NODE_T(que_thr_t)
- wait_thrs; /*!< query threads belonging to this
- trx that are in the QUE_THR_LOCK_WAIT
- state */
- /*------------------------------*/
- mem_heap_t* lock_heap; /*!< memory heap for the locks of the
- transaction */
- UT_LIST_BASE_NODE_T(lock_t)
- trx_locks; /*!< locks reserved by the transaction */
- /*------------------------------*/
read_view_t* global_read_view;
- /* consistent read view associated
+ /*!< consistent read view associated
to a transaction or NULL */
read_view_t* read_view; /*!< consistent read view used in the
transaction or NULL, this read view
@@ -733,7 +952,7 @@ struct trx_struct{
trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
oldest first */
/*------------------------------*/
- mutex_t undo_mutex; /*!< mutex protecting the fields in this
+ ib_mutex_t undo_mutex; /*!< mutex protecting the fields in this
section (down to undo_no_arr), EXCEPT
last_sql_stat_start, which can be
accessed only when we know that there
@@ -747,7 +966,7 @@ struct trx_struct{
the number of modified/inserted
rows in a transaction */
trx_savept_t last_sql_stat_start;
- /* undo_no when the last sql statement
+ /*!< undo_no when the last sql statement
was started: in case of an error, trx
is rolled back down to this undo
number; see note at undo_mutex! */
@@ -773,7 +992,39 @@ struct trx_struct{
transaction. Note that these are
also in the lock list trx_locks. This
vector needs to be freed explicitly
- when the trx_t instance is desrtoyed */
+ when the trx instance is destroyed.
+ Protected by lock_sys->mutex. */
+ /*------------------------------*/
+ ibool read_only; /*!< TRUE if transaction is flagged
+ as a READ-ONLY transaction.
+ if !auto_commit || will_lock > 0
+ then it will added to the list
+ trx_sys_t::ro_trx_list. A read only
+ transaction will not be assigned an
+ UNDO log. Non-locking auto-commit
+ read-only transaction will not be on
+ either list. */
+ ibool auto_commit; /*!< TRUE if it is an autocommit */
+ ulint will_lock; /*!< Will acquire some locks. Increment
+ each time we determine that a lock will
+ be acquired by the MySQL layer. */
+ bool ddl; /*!< true if it is a transaction that
+ is being started for a DDL operation */
+ /*------------------------------*/
+ fts_trx_t* fts_trx; /*!< FTS information, or NULL if
+ transaction hasn't modified tables
+ with FTS indexes (yet). */
+ doc_id_t fts_next_doc_id;/* The document id used for updates */
+ /*------------------------------*/
+ ulint flush_tables; /*!< if "covering" the FLUSH TABLES",
+ count of tables being flushed. */
+
+ /*------------------------------*/
+#ifdef UNIV_DEBUG
+ ulint start_line; /*!< Track where it was started from */
+ const char* start_file; /*!< Filename where it was started */
+#endif /* UNIV_DEBUG */
+
/*------------------------------*/
char detailed_error[256]; /*!< detailed error message for last
error, or empty. */
@@ -790,23 +1041,6 @@ struct trx_struct{
ibool take_stats;
};
-#define TRX_MAX_N_THREADS 32 /* maximum number of
- concurrent threads running a
- single operation of a
- transaction, e.g., a parallel
- query */
-/* Transaction concurrency states (trx->conc_state) */
-#define TRX_NOT_STARTED 0
-#define TRX_ACTIVE 1
-#define TRX_COMMITTED_IN_MEMORY 2
-#define TRX_PREPARED 3 /* Support for 2PC/XA */
-
-/* Transaction execution states when trx->conc_state == TRX_ACTIVE */
-#define TRX_QUE_RUNNING 0 /* transaction is running */
-#define TRX_QUE_LOCK_WAIT 1 /* transaction is waiting for a lock */
-#define TRX_QUE_ROLLING_BACK 2 /* transaction is rolling back */
-#define TRX_QUE_COMMITTING 3 /* transaction is committing */
-
/* Transaction isolation levels (trx->isolation_level) */
#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking
SELECTs are performed so that
@@ -853,7 +1087,6 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_TOTAL_ROLLBACK 1
#define TRX_SIG_ROLLBACK_TO_SAVEPT 2
#define TRX_SIG_COMMIT 3
-#define TRX_SIG_ERROR_OCCURRED 4
#define TRX_SIG_BREAK_EXECUTION 5
/* Sender types of a signal */
@@ -876,13 +1109,40 @@ enum commit_node_state {
};
/** Commit command node in a query graph */
-struct commit_node_struct{
+struct commit_node_t{
que_common_t common; /*!< node type: QUE_NODE_COMMIT */
enum commit_node_state
state; /*!< node execution state */
};
+/** Test if trx->mutex is owned. */
+#define trx_mutex_own(t) mutex_own(&t->mutex)
+
+/** Acquire the trx->mutex. */
+#define trx_mutex_enter(t) do { \
+ mutex_enter(&t->mutex); \
+} while (0)
+
+/** Release the trx->mutex. */
+#define trx_mutex_exit(t) do { \
+ mutex_exit(&t->mutex); \
+} while (0)
+
+/** @brief The latch protecting the adaptive search system
+
+This latch protects the
+(1) hash index;
+(2) columns of a record to which we have a pointer in the hash index;
+
+but does NOT protect:
+
+(3) next record offset field in a record;
+(4) next or previous records on the same page.
+
+Bear in mind (3) and (4) when using the hash index.
+*/
+extern prio_rw_lock_t* btr_search_latch_arr;
#ifndef UNIV_NONINL
#include "trx0trx.ic"
diff --git a/storage/xtradb/include/trx0trx.ic b/storage/xtradb/include/trx0trx.ic
index 97dda69f013..787931dc4b6 100644
--- a/storage/xtradb/include/trx0trx.ic
+++ b/storage/xtradb/include/trx0trx.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -23,50 +23,48 @@ The transaction
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-/********************************************************************//**
-In XtraDB it is impossible for a transaction to own a search latch outside of
-InnoDB code, so there is nothing to release on demand. We keep this function to
-simplify maintenance.*/
-UNIV_INLINE
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx __attribute__((unused))) /*!< in: transaction */
-{
- ut_ad(!trx->has_search_latch);
-}
-
-/*************************************************************//**
-Starts the transaction if it is not yet started. */
-UNIV_INLINE
-void
-trx_start_if_not_started(
-/*=====================*/
- trx_t* trx) /*!< in: transaction */
-{
- ut_ad(trx->state != TRX_COMMITTED_IN_MEMORY);
-
- if (trx->state == TRX_NOT_STARTED) {
-
- trx_start(trx, ULINT_UNDEFINED);
- }
-}
-
-/*************************************************************//**
-Starts the transaction if it is not yet started. Assumes we have reserved
-the kernel mutex! */
+/**********************************************************************//**
+Determines if a transaction is in the given state.
+The caller must hold trx_sys->mutex, or it must be the thread
+that is serving a running transaction.
+A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
+unless it is a non-locking autocommit read only transaction, which is only
+in trx_sys->mysql_trx_list.
+@return TRUE if trx->state == state */
UNIV_INLINE
-void
-trx_start_if_not_started_low(
-/*=========================*/
- trx_t* trx) /*!< in: transaction */
+ibool
+trx_state_eq(
+/*=========*/
+ const trx_t* trx, /*!< in: transaction */
+ trx_state_t state) /*!< in: state;
+ if state != TRX_STATE_NOT_STARTED
+ asserts that
+ trx->state != TRX_STATE_NOT_STARTED */
{
- ut_ad(trx->state != TRX_COMMITTED_IN_MEMORY);
-
- if (trx->state == TRX_NOT_STARTED) {
-
- trx_start_low(trx, ULINT_UNDEFINED);
+#ifdef UNIV_DEBUG
+ switch (trx->state) {
+ case TRX_STATE_PREPARED:
+ ut_ad(!trx_is_autocommit_non_locking(trx));
+ return(trx->state == state);
+
+ case TRX_STATE_ACTIVE:
+ assert_trx_nonlocking_or_in_list(trx);
+ return(state == trx->state);
+
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ assert_trx_in_list(trx);
+ return(state == trx->state);
+
+ case TRX_STATE_NOT_STARTED:
+ /* This state is not allowed for running transactions. */
+ ut_a(state == TRX_STATE_NOT_STARTED);
+ ut_ad(!trx->in_rw_trx_list);
+ ut_ad(!trx->in_ro_trx_list);
+ return(state == trx->state);
}
+ ut_error;
+#endif /* UNIV_DEBUG */
+ return(trx->state == state);
}
/****************************************************************//**
@@ -92,7 +90,7 @@ trx_get_que_state_str(
const trx_t* trx) /*!< in: transaction */
{
/* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */
- switch (trx->que_state) {
+ switch (trx->lock.que_state) {
case TRX_QUE_RUNNING:
return("RUNNING");
case TRX_QUE_LOCK_WAIT:
@@ -110,12 +108,12 @@ trx_get_que_state_str(
Determine if a transaction is a dictionary operation.
@return dictionary operation mode */
UNIV_INLINE
-enum trx_dict_op
+enum trx_dict_op_t
trx_get_dict_operation(
/*===================*/
const trx_t* trx) /*!< in: transaction */
{
- enum trx_dict_op op = (enum trx_dict_op) trx->dict_operation;
+ trx_dict_op_t op = static_cast<trx_dict_op_t>(trx->dict_operation);
#ifdef UNIV_DEBUG
switch (op) {
@@ -126,7 +124,7 @@ trx_get_dict_operation(
}
ut_error;
#endif /* UNIV_DEBUG */
- return((enum trx_dict_op) UNIV_EXPECT(op, TRX_DICT_OP_NONE));
+ return(op);
}
/**********************************************************************//**
Flag a transaction a dictionary operation. */
@@ -135,11 +133,11 @@ void
trx_set_dict_operation(
/*===================*/
trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op op) /*!< in: operation, not
+ enum trx_dict_op_t op) /*!< in: operation, not
TRX_DICT_OP_NONE */
{
#ifdef UNIV_DEBUG
- enum trx_dict_op old_op = trx_get_dict_operation(trx);
+ enum trx_dict_op_t old_op = trx_get_dict_operation(trx);
switch (op) {
case TRX_DICT_OP_NONE:
@@ -161,5 +159,19 @@ trx_set_dict_operation(
ok:
#endif /* UNIV_DEBUG */
+ trx->ddl = true;
trx->dict_operation = op;
}
+
+/********************************************************************//**
+In XtraDB it is impossible for a transaction to own a search latch outside of
+InnoDB code, so there is nothing to release on demand. We keep this function to
+simplify maintenance.*/
+UNIV_INLINE
+void
+trx_search_latch_release_if_reserved(
+/*=================================*/
+ trx_t* trx __attribute__((unused))) /*!< in: transaction */
+{
+ ut_ad(!trx->has_search_latch);
+}
diff --git a/storage/xtradb/include/trx0types.h b/storage/xtradb/include/trx0types.h
index 7303892bec4..7ca95131328 100644
--- a/storage/xtradb/include/trx0types.h
+++ b/storage/xtradb/include/trx0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -29,38 +29,70 @@ Created 3/26/1996 Heikki Tuuri
#include "ut0byte.h"
/** printf(3) format used for printing DB_TRX_ID and other system fields */
-#define TRX_ID_FMT "%llX"
+#define TRX_ID_FMT IB_ID_FMT
/** maximum length that a formatted trx_t::id could take, not including
the terminating NUL character. */
#define TRX_ID_MAX_LEN 17
+/** Transaction execution states when trx->state == TRX_STATE_ACTIVE */
+enum trx_que_t {
+ TRX_QUE_RUNNING, /*!< transaction is running */
+ TRX_QUE_LOCK_WAIT, /*!< transaction is waiting for
+ a lock */
+ TRX_QUE_ROLLING_BACK, /*!< transaction is rolling back */
+ TRX_QUE_COMMITTING /*!< transaction is committing */
+};
+
+/** Transaction states (trx_t::state) */
+enum trx_state_t {
+ TRX_STATE_NOT_STARTED,
+ TRX_STATE_ACTIVE,
+ TRX_STATE_PREPARED, /* Support for 2PC/XA */
+ TRX_STATE_COMMITTED_IN_MEMORY
+};
+
+/** Type of data dictionary operation */
+enum trx_dict_op_t {
+ /** The transaction is not modifying the data dictionary. */
+ TRX_DICT_OP_NONE = 0,
+ /** The transaction is creating a table or an index, or
+ dropping a table. The table must be dropped in crash
+ recovery. This and TRX_DICT_OP_NONE are the only possible
+ operation modes in crash recovery. */
+ TRX_DICT_OP_TABLE = 1,
+ /** The transaction is creating or dropping an index in an
+ existing table. In crash recovery, the data dictionary
+ must be locked, but the table must not be dropped. */
+ TRX_DICT_OP_INDEX = 2
+};
+
/** Memory objects */
/* @{ */
/** Transaction */
-typedef struct trx_struct trx_t;
+struct trx_t;
+/** The locks and state of an active transaction */
+struct trx_lock_t;
/** Transaction system */
-typedef struct trx_sys_struct trx_sys_t;
-/** Doublewrite information */
-typedef struct trx_doublewrite_struct trx_doublewrite_t;
+struct trx_sys_t;
/** Signal */
-typedef struct trx_sig_struct trx_sig_t;
+struct trx_sig_t;
/** Rollback segment */
-typedef struct trx_rseg_struct trx_rseg_t;
+struct trx_rseg_t;
/** Transaction undo log */
-typedef struct trx_undo_struct trx_undo_t;
+struct trx_undo_t;
/** Array of undo numbers of undo records being rolled back or purged */
-typedef struct trx_undo_arr_struct trx_undo_arr_t;
+struct trx_undo_arr_t;
/** A cell of trx_undo_arr_t */
-typedef struct trx_undo_inf_struct trx_undo_inf_t;
+struct trx_undo_inf_t;
/** The control structure used in the purge operation */
-typedef struct trx_purge_struct trx_purge_t;
+struct trx_purge_t;
/** Rollback command node in a query graph */
-typedef struct roll_node_struct roll_node_t;
+struct roll_node_t;
/** Commit command node in a query graph */
-typedef struct commit_node_struct commit_node_t;
+struct commit_node_t;
/** SAVEPOINT command node in a query graph */
-typedef struct trx_named_savept_struct trx_named_savept_t;
+struct trx_named_savept_t;
/* @} */
/** Rollback contexts */
@@ -87,10 +119,11 @@ typedef ib_id_t roll_ptr_t;
/** Undo number */
typedef ib_id_t undo_no_t;
+/** Maximum transaction identifier */
+#define TRX_ID_MAX IB_ID_MAX
+
/** Transaction savepoint */
-typedef struct trx_savept_struct trx_savept_t;
-/** Transaction savepoint */
-struct trx_savept_struct{
+struct trx_savept_t{
undo_no_t least_undo_no; /*!< least undo number to undo */
};
diff --git a/storage/xtradb/include/trx0undo.h b/storage/xtradb/include/trx0undo.h
index 4a1e40af505..61b0dabb1e6 100644
--- a/storage/xtradb/include/trx0undo.h
+++ b/storage/xtradb/include/trx0undo.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -65,6 +65,15 @@ ibool
trx_undo_roll_ptr_is_insert(
/*========================*/
roll_ptr_t roll_ptr); /*!< in: roll pointer */
+/***********************************************************************//**
+Returns true if the record is of the insert type.
+@return true if the record was freshly inserted (not updated). */
+UNIV_INLINE
+bool
+trx_undo_trx_id_is_insert(
+/*======================*/
+ const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
+ __attribute__((nonnull, pure, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Writes a roll ptr to an index page. In case that the size changes in
@@ -166,6 +175,7 @@ trx_undo_get_prev_rec(
trx_undo_rec_t* rec, /*!< in: undo record */
ulint page_no,/*!< in: undo log header page number */
ulint offset, /*!< in: undo log header offset on page */
+ bool shared, /*!< in: true=S-latch, false=X-latch */
mtr_t* mtr); /*!< in: mtr */
/***********************************************************************//**
Gets the next record in an undo log.
@@ -282,14 +292,15 @@ trx_undo_lists_init(
Assigns an undo log for a transaction. A new undo log is created or a cached
undo log reused.
@return DB_SUCCESS if undo log assign successful, possible error codes
-are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE
+are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
DB_OUT_OF_MEMORY */
UNIV_INTERN
-ulint
+dberr_t
trx_undo_assign_undo(
/*=================*/
trx_t* trx, /*!< in: transaction */
- ulint type); /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+ ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Sets the state of the undo log segment at a transaction finish.
@return undo log segment header page, x-latched */
@@ -404,7 +415,7 @@ trx_undo_mem_free(
/** Transaction undo log memory object; this is protected by the undo_mutex
in the corresponding transaction object */
-struct trx_undo_struct{
+struct trx_undo_t{
/*-----------------------------*/
ulint id; /*!< undo log slot number within the
rollback segment */
@@ -412,8 +423,8 @@ struct trx_undo_struct{
TRX_UNDO_UPDATE */
ulint state; /*!< state of the corresponding undo log
segment */
- ibool del_marks; /*!< relevant only in an update undo log:
- this is TRUE if the transaction may
+ ibool del_marks; /*!< relevant only in an update undo
+ log: this is TRUE if the transaction may
have delete marked records, because of
a delete of a row or an update of an
indexed field; purge is then
@@ -435,8 +446,8 @@ struct trx_undo_struct{
in bytes, or 0 for uncompressed */
ulint hdr_page_no; /*!< page number of the header page in
the undo log */
- ulint hdr_offset; /*!< header offset of the undo log on the
- page */
+ ulint hdr_offset; /*!< header offset of the undo log on
+ the page */
ulint last_page_no; /*!< page number of the last page in the
undo log; this may differ from
top_page_no during a rollback */
@@ -582,8 +593,8 @@ quite a large overhead. */
#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4)
/*--------------------------------------------------------------*/
#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
- /*!< Total size of the undo log header
- with the XA XID */
+ /*!< Total size of the undo log header
+ with the XA XID */
/* @} */
#ifndef UNIV_NONINL
diff --git a/storage/xtradb/include/trx0undo.ic b/storage/xtradb/include/trx0undo.ic
index a12d38116b6..577759d6c3d 100644
--- a/storage/xtradb/include/trx0undo.ic
+++ b/storage/xtradb/include/trx0undo.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -101,6 +101,21 @@ trx_undo_roll_ptr_is_insert(
ut_ad(roll_ptr < (1ULL << 56));
return((ibool) (roll_ptr >> 55));
}
+
+/***********************************************************************//**
+Returns true if the record is of the insert type.
+@return true if the record was freshly inserted (not updated). */
+UNIV_INLINE
+bool
+trx_undo_trx_id_is_insert(
+/*======================*/
+ const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
+{
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error
+#endif
+ return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
+}
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
diff --git a/storage/xtradb/include/trx0xa.h b/storage/xtradb/include/trx0xa.h
index 97c24c899a7..7caddfb7ba4 100644
--- a/storage/xtradb/include/trx0xa.h
+++ b/storage/xtradb/include/trx0xa.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 0b105f573c2..c9447245124 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -1,8 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2009, Sun Microsystems, Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -10,12 +9,6 @@ briefly in the InnoDB documentation. The contributions by Google are
incorporated with their permission, and subject to the conditions contained in
the file COPYING.Google.
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
-
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
@@ -25,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -49,10 +42,16 @@ Created 1/20/1994 Heikki Tuuri
#define _IB_TO_STR(s) #s
#define IB_TO_STR(s) _IB_TO_STR(s)
-#include <mysql_version.h>
+#define INNODB_VERSION_MAJOR 5
+#define INNODB_VERSION_MINOR 6
+#define INNODB_VERSION_BUGFIX 14
+
+#ifndef PERCONA_INNODB_VERSION
+#define PERCONA_INNODB_VERSION 62.0
+#endif
-#define INNODB_VERSION_MAJOR MYSQL_MAJOR_VERSION
-#define INNODB_VERSION_MINOR MYSQL_MINOR_VERSION
+/* Enable UNIV_LOG_ARCHIVE in XtraDB */
+#define UNIV_LOG_ARCHIVE 1
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
@@ -63,15 +62,15 @@ component, i.e. we show M.N.P as M.N */
#define INNODB_VERSION_SHORT \
(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
-#ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 31.1
-#endif
-
-#define INNODB_VERSION_STR MYSQL_SERVER_VERSION "-" IB_TO_STR(PERCONA_INNODB_VERSION)
+#define INNODB_VERSION_STR \
+ IB_TO_STR(INNODB_VERSION_MAJOR) "." \
+ IB_TO_STR(INNODB_VERSION_MINOR) "." \
+ IB_TO_STR(INNODB_VERSION_BUGFIX) "-" \
+ IB_TO_STR(PERCONA_INNODB_VERSION)
#define REFMAN "http://dev.mysql.com/doc/refman/" \
- IB_TO_STR(MYSQL_MAJOR_VERSION) "." \
- IB_TO_STR(MYSQL_MINOR_VERSION) "/en/"
+ IB_TO_STR(INNODB_VERSION_MAJOR) "." \
+ IB_TO_STR(INNODB_VERSION_MINOR) "/en/"
#ifdef MYSQL_DYNAMIC_PLUGIN
/* In the dynamic plugin, redefine some externally visible symbols
@@ -105,10 +104,10 @@ if we are compiling on Windows. */
# include <my_pthread.h>
#endif /* UNIV_HOTBACKUP */
-/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
+/* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */
# include <sys/stat.h>
# if !defined(__WIN__)
-# include <sys/mman.h> /* mmap() for os0proc.c */
+# include <sys/mman.h> /* mmap() for os0proc.cc */
# endif
/* Include the header file generated by GNU autoconf */
@@ -125,21 +124,21 @@ if we are compiling on Windows. */
/* We only try to do explicit inlining of functions with gcc and
Sun Studio */
-# if !defined(__GNUC__) && !(defined(__SUNPRO_C) || defined(__SUNPRO_CC))
-# undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */
-# define UNIV_MUST_NOT_INLINE
-# endif
-
# ifdef HAVE_PREAD
# define HAVE_PWRITE
# endif
#endif /* #if (defined(WIN32) || ... */
+#ifndef __WIN__
+#define __STDC_FORMAT_MACROS /* Enable C99 printf format macros */
+#include <inttypes.h>
+#endif /* !__WIN__ */
+
/* Following defines are to enable performance schema
instrumentation in each of four InnoDB modules if
HAVE_PSI_INTERFACE is defined. */
-#ifdef HAVE_PSI_INTERFACE
+#if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP
# define UNIV_PFS_MUTEX
# define UNIV_PFS_RWLOCK
/* For I/O instrumentation, performance schema rely
@@ -151,8 +150,22 @@ resolved */
# define UNIV_PFS_IO
# endif
# define UNIV_PFS_THREAD
+
+/* There are mutexes/rwlocks that we want to exclude from
+instrumentation even if their corresponding performance schema
+define is set. And this PFS_NOT_INSTRUMENTED is used
+as the key value to identify those objects that would
+be excluded from instrumentation. */
+# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED
+
+# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED)
+
#endif /* HAVE_PSI_INTERFACE */
+#ifdef __WIN__
+# define YY_NO_UNISTD_H 1
+#endif /* __WIN__ */
+
/* DEBUG VERSION CONTROL
===================== */
@@ -180,8 +193,6 @@ command. Not tested on Windows. */
debugging without UNIV_DEBUG */
#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column
debugging without UNIV_DEBUG */
-#define UNIV_BLOB_NULL_DEBUG /* Enable deep off-page
- column debugging */
#define UNIV_DEBUG /* Enable ut_ad() assertions
and disable UNIV_INLINE */
#define UNIV_DEBUG_LOCK_VALIDATE /* Enable
@@ -202,6 +213,9 @@ assumes that no BLOBs survive server restart */
#define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer;
this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
and the insert buffer must be empty when the database is started */
+#define UNIV_PERF_DEBUG /* debug flag that enables
+ light weight performance
+ related stuff. */
#define UNIV_SYNC_DEBUG /* debug mutex and latch
operations (very slow); also UNIV_DEBUG must be defined */
#define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */
@@ -210,7 +224,7 @@ operations (very slow); also UNIV_DEBUG must be defined */
#define UNIV_SEARCH_PERF_STAT /* statistics for the
adaptive hash index */
#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output
- in sync0sync.c */
+ in sync0sync.cc */
#define UNIV_BTR_PRINT /* enable functions for
printing B-trees */
#define UNIV_ZIP_DEBUG /* extensive consistency checks
@@ -220,6 +234,11 @@ operations (very slow); also UNIV_DEBUG must be defined */
#define UNIV_AIO_DEBUG /* prints info about
submitted and reaped AIO
requests to the log. */
+#define UNIV_STATS_DEBUG /* prints various stats
+ related debug info from
+ dict0stats.c */
+#define FTS_INTERNAL_DIAG_PRINT /* FTS internal debugging
+ info output */
#endif
#define UNIV_BTR_DEBUG /* check B-tree links */
@@ -242,7 +261,9 @@ easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */
#else
# define UNIV_INTERN
#endif
-#if defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+#if defined(INNODB_COMPILER_HINTS) \
+ && defined __GNUC__ \
+ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3)
/** Starting with GCC 4.3, the "cold" attribute is used to inform the
compiler that a function is unlikely executed. The function is
optimized for size rather than speed and on many targets it is placed
@@ -256,40 +277,41 @@ rarely invoked function for size instead for speed. */
# define UNIV_COLD /* empty */
#endif
+#ifdef UNIV_LINUX
+# define UNIV_THREAD_LOCAL __thread
+#else
+/* FIXME: the TLS variables are silently broken on other platforms for now */
+# define UNIV_THREAD_LOCAL
+#endif
+
#ifndef UNIV_MUST_NOT_INLINE
/* Definition for inline version */
-#ifdef __WIN__
-# define UNIV_INLINE __inline
-#elif defined(__SUNPRO_CC) || defined(__SUNPRO_C)
-# define UNIV_INLINE static inline
-#else
-# define UNIV_INLINE static __inline__
-#endif
+#define UNIV_INLINE static inline
-#else
+#else /* !UNIV_MUST_NOT_INLINE */
/* If we want to compile a noninlined version we use the following macro
definitions: */
#define UNIV_NONINL
#define UNIV_INLINE UNIV_INTERN
-#endif /* UNIV_DEBUG */
+#endif /* !UNIV_MUST_NOT_INLINE */
#ifdef _WIN32
#define UNIV_WORD_SIZE 4
#elif defined(_WIN64)
#define UNIV_WORD_SIZE 8
#else
-/* MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */
+/** MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */
#define UNIV_WORD_SIZE SIZEOF_LONG
#endif
-/* The following alignment is used in memory allocations in memory heap
+/** The following alignment is used in memory allocations in memory heap
management to ensure correct alignment for doubles etc. */
-#define UNIV_MEM_ALIGNMENT 8
+#define UNIV_MEM_ALIGNMENT 8
-/* The following alignment is used in aligning lints etc. */
+/** The following alignment is used in aligning lints etc. */
#define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE
/*
@@ -315,16 +337,62 @@ enum innodb_file_formats_enum {
typedef enum innodb_file_formats_enum innodb_file_formats_t;
-/* The 2-logarithm of UNIV_PAGE_SIZE: */
-/* #define UNIV_PAGE_SIZE_SHIFT 14 */
-#define UNIV_PAGE_SIZE_SHIFT_MAX 14
+/** Minimum supported file format */
+#define UNIV_FORMAT_MIN UNIV_FORMAT_A
+
+/** Maximum supported file format */
+#define UNIV_FORMAT_MAX UNIV_FORMAT_B
+
+/** The 2-logarithm of UNIV_PAGE_SIZE: */
#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift
-/* The universal page size of the database */
-/* #define UNIV_PAGE_SIZE (1u << UNIV_PAGE_SIZE_SHIFT) */
-#define UNIV_PAGE_SIZE srv_page_size
-#define UNIV_PAGE_SIZE_MAX (1u << UNIV_PAGE_SIZE_SHIFT_MAX)
-/* Maximum number of parallel threads in a parallelized operation */
+/** The universal page size of the database */
+#define UNIV_PAGE_SIZE ((ulint) srv_page_size)
+
+/** log2 of smallest compressed page size (1<<10 == 1024 bytes)
+Note: This must never change! */
+#define UNIV_ZIP_SIZE_SHIFT_MIN 10
+
+/** log2 of largest compressed page size (1<<14 == 16384 bytes).
+A compressed page directory entry reserves 14 bits for the start offset
+and 2 bits for flags. This limits the uncompressed page size to 16k.
+Even though a 16k uncompressed page can theoretically be compressed
+into a larger compressed page, it is not a useful feature so we will
+limit both with this same constant. */
+#define UNIV_ZIP_SIZE_SHIFT_MAX 14
+
+/* Define the Min, Max, Default page sizes. */
+/** Minimum Page Size Shift (power of 2) */
+#define UNIV_PAGE_SIZE_SHIFT_MIN 12
+/** Maximum Page Size Shift (power of 2) */
+#define UNIV_PAGE_SIZE_SHIFT_MAX 14
+/** Default Page Size Shift (power of 2) */
+#define UNIV_PAGE_SIZE_SHIFT_DEF 14
+/** Original 16k InnoDB Page Size Shift, in case the default changes */
+#define UNIV_PAGE_SIZE_SHIFT_ORIG 14
+
+/** Minimum page size InnoDB currently supports. */
+#define UNIV_PAGE_SIZE_MIN (1 << UNIV_PAGE_SIZE_SHIFT_MIN)
+/** Maximum page size InnoDB currently supports. */
+#define UNIV_PAGE_SIZE_MAX (1 << UNIV_PAGE_SIZE_SHIFT_MAX)
+/** Default page size for InnoDB tablespaces. */
+#define UNIV_PAGE_SIZE_DEF (1 << UNIV_PAGE_SIZE_SHIFT_DEF)
+/** Original 16k page size for InnoDB tablespaces. */
+#define UNIV_PAGE_SIZE_ORIG (1 << UNIV_PAGE_SIZE_SHIFT_ORIG)
+
+/** Smallest compressed page size */
+#define UNIV_ZIP_SIZE_MIN (1 << UNIV_ZIP_SIZE_SHIFT_MIN)
+
+/** Largest compressed page size */
+#define UNIV_ZIP_SIZE_MAX (1 << UNIV_ZIP_SIZE_SHIFT_MAX)
+
+/** Number of supported page sizes (The convention 'ssize' is used
+for 'log2 minus 9' or the number of shifts starting with 512.)
+This number varies depending on UNIV_PAGE_SIZE. */
+#define UNIV_PAGE_SSIZE_MAX \
+ (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+
+/** Maximum number of parallel threads in a parallelized operation */
#define UNIV_MAX_PARALLELISM 32
/** This is the "mbmaxlen" for my_charset_filename (defined in
@@ -338,12 +406,11 @@ FILENAME_CHARSET_MAXNAMLEN (5) = 320 bytes. The number does not include a
terminating '\0'. InnoDB can handle longer names internally */
#define MAX_TABLE_NAME_LEN 320
-
-/* The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is
+/** The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is
the MySQL's NAME_LEN, see check_and_convert_db_name(). */
#define MAX_DATABASE_NAME_LEN MAX_TABLE_NAME_LEN
-/* MAX_FULL_NAME_LEN defines the full name path including the
+/** MAX_FULL_NAME_LEN defines the full name path including the
database name and table name. In addition, 14 bytes is added for:
2 for surrounding quotes around table name
1 for the separating dot (.)
@@ -351,6 +418,16 @@ database name and table name. In addition, 14 bytes is added for:
#define MAX_FULL_NAME_LEN \
(MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
+/** The maximum length in bytes that a database name can occupy when stored in
+UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
+mysql_com.h if you are to use this macro. */
+#define MAX_DB_UTF8_LEN (NAME_LEN + 1)
+
+/** The maximum length in bytes that a table name can occupy when stored in
+UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
+mysql_com.h if you are to use this macro. */
+#define MAX_TABLE_UTF8_LEN (NAME_LEN + sizeof(srv_mysql50_table_name_prefix))
+
/*
UNIVERSAL TYPE DEFINITIONS
==========================
@@ -359,41 +436,47 @@ database name and table name. In addition, 14 bytes is added for:
/* Note that inside MySQL 'byte' is defined as char on Linux! */
#define byte unsigned char
-/* Define an unsigned integer type that is exactly 32 bits. */
-
-#if SIZEOF_INT == 4
-typedef unsigned int ib_uint32_t;
-#elif SIZEOF_LONG == 4
-typedef unsigned long ib_uint32_t;
-#else
-#error "Neither int or long is 4 bytes"
-#endif
-
/* Another basic type we use is unsigned long integer which should be equal to
the word size of the machine, that is on a 32-bit platform 32 bits, and on a
64-bit platform 64 bits. We also give the printf format for the type as a
macro ULINTPF. */
+
+#ifdef __WIN__
+/* Use the integer types and formatting strings defined in Visual Studio. */
+# define UINT32PF "%I32u"
+# define INT64PF "%I64d"
+# define UINT64PF "%I64u"
+# define UINT64PFx "%016I64u"
+# define DBUG_LSN_PF "%llu"
+typedef __int64 ib_int64_t;
+typedef unsigned __int64 ib_uint64_t;
+typedef unsigned __int32 ib_uint32_t;
+#else
+/* Use the integer types and formatting strings defined in the C99 standard. */
+# define UINT32PF "%"PRIu32
+# define INT64PF "%"PRId64
+# define UINT64PF "%"PRIu64
+# define UINT64PFx "%016"PRIx64
+# define DBUG_LSN_PF UINT64PF
+typedef int64_t ib_int64_t;
+typedef uint64_t ib_uint64_t;
+typedef uint32_t ib_uint32_t;
+# endif /* __WIN__ */
+
+# define IB_ID_FMT UINT64PF
+
#ifdef _WIN64
typedef unsigned __int64 ulint;
-#define ULINTPF "%I64u"
typedef __int64 lint;
+# define ULINTPF UINT64PF
#define MYSQL_SYSVAR_ULINT MYSQL_SYSVAR_ULONGLONG
#else
typedef unsigned long int ulint;
-#define ULINTPF "%lu"
typedef long int lint;
+# define ULINTPF "%lu"
#define MYSQL_SYSVAR_ULINT MYSQL_SYSVAR_ULONG
-#endif
-
-#ifdef __WIN__
-typedef __int64 ib_int64_t;
-typedef unsigned __int64 ib_uint64_t;
-#elif !defined(UNIV_HOTBACKUP)
-/* Note: longlong and ulonglong come from MySQL headers. */
-typedef longlong ib_int64_t;
-typedef ulonglong ib_uint64_t;
-#endif
+#endif /* _WIN64 */
#ifndef UNIV_HOTBACKUP
typedef unsigned long long int ullint;
@@ -405,27 +488,33 @@ typedef unsigned long long int ullint;
#endif
#endif
-/* The 'undefined' value for a ulint */
+/** The 'undefined' value for a ulint */
#define ULINT_UNDEFINED ((ulint)(-1))
+#define ULONG_UNDEFINED ((ulong)(-1))
+
+/** The 'undefined' value for a ib_uint64_t */
+#define UINT64_UNDEFINED ((ib_uint64_t)(-1))
+
/** The bitmask of 32-bit unsigned integer */
#define ULINT32_MASK 0xFFFFFFFF
-/* The undefined 32-bit unsigned integer */
+/** The undefined 32-bit unsigned integer */
#define ULINT32_UNDEFINED ULINT32_MASK
-/* Maximum value for a ulint */
+/** Maximum value for a ulint */
#define ULINT_MAX ((ulint)(-2))
-/* Maximum value for ib_uint64_t */
-#define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL))
+/** Maximum value for ib_uint64_t */
+#define IB_UINT64_MAX ((ib_uint64_t) (~0ULL))
/** The generic InnoDB system object identifier data type */
-typedef ib_uint64_t ib_id_t;
+typedef ib_uint64_t ib_id_t;
+#define IB_ID_MAX IB_UINT64_MAX
-/* The 'undefined' value for a ullint */
+/** The 'undefined' value for a ullint */
#define ULLINT_UNDEFINED ((ullint)(-1))
-/* This 'ibool' type is used within Innobase. Remember that different included
+/** This 'ibool' type is used within Innobase. Remember that different included
headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
#define ibool ulint
@@ -436,7 +525,9 @@ headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
#endif
-/* The following number as the length of a logical field means that the field
+#define UNIV_NOTHROW
+
+/** The following number as the length of a logical field means that the field
has the SQL NULL as its value. NOTE that because we assume that the length
of a field is a 32-bit integer when we store it, for example, to an undo log
on disk, we must have also this number fit in 32 bits, also in 64-bit
@@ -444,15 +535,23 @@ computers! */
#define UNIV_SQL_NULL ULINT32_UNDEFINED
-/* Lengths which are not UNIV_SQL_NULL, but bigger than the following
+/** Lengths which are not UNIV_SQL_NULL, but bigger than the following
number indicate that a field contains a reference to an externally
stored part of the field in the tablespace. The length field then
contains the sum of the following flag and the locally stored len. */
#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_MAX)
-/* Some macros to improve branch prediction and reduce cache misses */
#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+#define HAVE_GCC_GT_2
+/* Tell the compiler that variable/function is unused. */
+# define UNIV_UNUSED __attribute__ ((unused))
+#else
+# define UNIV_UNUSED
+#endif /* CHECK FOR GCC VER_GT_2 */
+
+/* Some macros to improve branch prediction and reduce cache misses */
+#if defined(INNODB_COMPILER_HINTS) && defined(HAVE_GCC_GT_2)
/* Tell the compiler that 'expr' probably evaluates to 'constant'. */
# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant)
/* Tell the compiler that a pointer is likely to be NULL */
@@ -463,19 +562,30 @@ it is read. */
/* Minimize cache-miss latency by moving data at addr into a cache before
it is read or written. */
# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
+
/* Sun Studio includes sun_prefetch.h as of version 5.9 */
#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \
|| (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590)
+
# include <sun_prefetch.h>
+
#if __SUNPRO_C >= 0x550
# undef UNIV_INTERN
# define UNIV_INTERN __hidden
#endif /* __SUNPRO_C >= 0x550 */
-/* Use sun_prefetch when compile with Sun Studio */
+
# define UNIV_EXPECT(expr,value) (expr)
# define UNIV_LIKELY_NULL(expr) (expr)
-# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr)
-# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
+
+# if defined(INNODB_COMPILER_HINTS)
+//# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr)
+# define UNIV_PREFETCH_R(addr) ((void) 0)
+# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
+# else
+# define UNIV_PREFETCH_R(addr) ((void) 0)
+# define UNIV_PREFETCH_RW(addr) ((void) 0)
+# endif /* INNODB_COMPILER_HINTS */
+
#else
/* Dummy versions of the macros */
# define UNIV_EXPECT(expr,value) (expr)
@@ -483,6 +593,7 @@ it is read or written. */
# define UNIV_PREFETCH_R(addr) ((void) 0)
# define UNIV_PREFETCH_RW(addr) ((void) 0)
#endif
+
/* Tell the compiler that cond is likely to hold */
#define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE)
/* Tell the compiler that cond is unlikely to hold */
@@ -513,17 +624,25 @@ typedef void* os_thread_ret_t;
# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
-# define UNIV_MEM_DESC(addr, size, b) VALGRIND_CREATE_BLOCK(addr, size, b)
+# define UNIV_MEM_DESC(addr, size) VALGRIND_CREATE_BLOCK(addr, size, #addr)
# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b)
-# define UNIV_MEM_ASSERT_RW(addr, size) do { \
+# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do { \
const void* _p = (const void*) (ulint) \
VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \
- if (UNIV_LIKELY_NULL(_p)) \
+ if (UNIV_LIKELY_NULL(_p)) { \
fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n", \
__FILE__, __LINE__, \
(const void*) (addr), (unsigned) (size), (long) \
(((const char*) _p) - ((const char*) (addr)))); \
- } while (0)
+ if (should_abort) { \
+ ut_error; \
+ } \
+ } \
+} while (0)
+# define UNIV_MEM_ASSERT_RW(addr, size) \
+ UNIV_MEM_ASSERT_RW_LOW(addr, size, false)
+# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) \
+ UNIV_MEM_ASSERT_RW_LOW(addr, size, true)
# define UNIV_MEM_ASSERT_W(addr, size) do { \
const void* _p = (const void*) (ulint) \
VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size); \
@@ -533,15 +652,22 @@ typedef void* os_thread_ret_t;
(const void*) (addr), (unsigned) (size), (long) \
(((const char*) _p) - ((const char*) (addr)))); \
} while (0)
+# define UNIV_MEM_TRASH(addr, c, size) do { \
+ ut_d(memset(addr, c, size)); \
+ UNIV_MEM_INVALID(addr, size); \
+ } while (0)
#else
# define UNIV_MEM_VALID(addr, size) do {} while(0)
# define UNIV_MEM_INVALID(addr, size) do {} while(0)
# define UNIV_MEM_FREE(addr, size) do {} while(0)
# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
-# define UNIV_MEM_DESC(addr, size, b) do {} while(0)
+# define UNIV_MEM_DESC(addr, size) do {} while(0)
# define UNIV_MEM_UNDESC(b) do {} while(0)
+# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do {} while(0)
# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
+# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) do {} while(0)
# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
+# define UNIV_MEM_TRASH(addr, c, size) do {} while(0)
#endif
#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \
UNIV_MEM_ASSERT_W(addr, size); \
@@ -552,6 +678,7 @@ typedef void* os_thread_ret_t;
UNIV_MEM_ALLOC(addr, size); \
} while (0)
-extern ulint srv_page_size_shift;
-extern ulint srv_page_size;
+extern ulong srv_page_size_shift;
+extern ulong srv_page_size;
+
#endif
diff --git a/storage/xtradb/include/usr0sess.h b/storage/xtradb/include/usr0sess.h
index bcc2f0d1d99..b5c80b97b43 100644
--- a/storage/xtradb/include/usr0sess.h
+++ b/storage/xtradb/include/usr0sess.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -51,8 +51,9 @@ sess_close(
/*=======*/
sess_t* sess); /* in, own: session object */
-/* The session handle. All fields are protected by the kernel mutex */
-struct sess_struct{
+/* The session handle. This data structure is only used by purge and is
+not really necessary. We should get rid of it. */
+struct sess_t{
ulint state; /*!< state of the session */
trx_t* trx; /*!< transaction object permanently
assigned for the session: the
diff --git a/storage/xtradb/include/usr0sess.ic b/storage/xtradb/include/usr0sess.ic
index 1dcca8a3853..284e59537fe 100644
--- a/storage/xtradb/include/usr0sess.ic
+++ b/storage/xtradb/include/usr0sess.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/usr0types.h b/storage/xtradb/include/usr0types.h
index 6c224e6db17..6ba937cacc8 100644
--- a/storage/xtradb/include/usr0types.h
+++ b/storage/xtradb/include/usr0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,6 +26,6 @@ Created 6/25/1996 Heikki Tuuri
#ifndef usr0types_h
#define usr0types_h
-typedef struct sess_struct sess_t;
+struct sess_t;
#endif
diff --git a/storage/xtradb/include/ut0bh.h b/storage/xtradb/include/ut0bh.h
index e89d76a51b3..84ea6dd915a 100644
--- a/storage/xtradb/include/ut0bh.h
+++ b/storage/xtradb/include/ut0bh.h
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -31,7 +31,7 @@ Created 2010-05-28 by Sunny Bains
/** Comparison function for objects in the binary heap. */
typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
-typedef struct ib_bh_struct ib_bh_t;
+struct ib_bh_t;
/**********************************************************************//**
Get the number of elements in the binary heap.
@@ -138,7 +138,7 @@ ib_bh_pop(
ib_bh_t* ib_bh); /*!< in/out: instance */
/** Binary heap data structure */
-struct ib_bh_struct {
+struct ib_bh_t {
ulint max_elems; /*!< max elements allowed */
ulint n_elems; /*!< current size */
ulint sizeof_elem; /*!< sizeof element */
diff --git a/storage/xtradb/include/ut0bh.ic b/storage/xtradb/include/ut0bh.ic
index 4d04f9b6f49..a604237665d 100644
--- a/storage/xtradb/include/ut0bh.ic
+++ b/storage/xtradb/include/ut0bh.ic
@@ -1,5 +1,6 @@
/***************************************************************************//**
-Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
+
+Copyright (c) 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -10,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -122,4 +123,3 @@ ib_bh_last(
: ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
}
-
diff --git a/storage/xtradb/include/ut0byte.h b/storage/xtradb/include/ut0byte.h
index 0c23e999268..5bdd553ca80 100644
--- a/storage/xtradb/include/ut0byte.h
+++ b/storage/xtradb/include/ut0byte.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -32,7 +32,7 @@ Created 1/20/1994 Heikki Tuuri
/*******************************************************//**
Creates a 64-bit integer out of two 32-bit integers.
-@return created dulint */
+@return created integer */
UNIV_INLINE
ib_uint64_t
ut_ull_create(
diff --git a/storage/xtradb/include/ut0byte.ic b/storage/xtradb/include/ut0byte.ic
index 2892c5429fb..873d98c727e 100644
--- a/storage/xtradb/include/ut0byte.ic
+++ b/storage/xtradb/include/ut0byte.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -25,7 +25,7 @@ Created 5/30/1994 Heikki Tuuri
/*******************************************************//**
Creates a 64-bit integer out of two 32-bit integers.
-@return created dulint */
+@return created integer */
UNIV_INLINE
ib_uint64_t
ut_ull_create(
@@ -90,7 +90,7 @@ ut_align(
ut_ad(sizeof(void*) == sizeof(ulint));
- return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1)));
+ return((void*)((((ulint) ptr) + align_no - 1) & ~(align_no - 1)));
}
/*********************************************************//**
@@ -110,7 +110,7 @@ ut_align_down(
ut_ad(sizeof(void*) == sizeof(ulint));
- return((void*)((((ulint)ptr)) & ~(align_no - 1)));
+ return((void*)((((ulint) ptr)) & ~(align_no - 1)));
}
/*********************************************************//**
@@ -130,7 +130,7 @@ ut_align_offset(
ut_ad(sizeof(void*) == sizeof(ulint));
- return(((ulint)ptr) & (align_no - 1));
+ return(((ulint) ptr) & (align_no - 1));
}
/*****************************************************************//**
diff --git a/storage/xtradb/include/ut0counter.h b/storage/xtradb/include/ut0counter.h
new file mode 100644
index 00000000000..fe0f36dfff2
--- /dev/null
+++ b/storage/xtradb/include/ut0counter.h
@@ -0,0 +1,203 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ut0counter.h
+
+Counter utility class
+
+Created 2012/04/12 by Sunny Bains
+*******************************************************/
+
+#ifndef UT0COUNTER_H
+#define UT0COUNTER_H
+
+#include "univ.i"
+#include <string.h>
+#include "os0thread.h"
+
+/** CPU cache line size */
+#define CACHE_LINE_SIZE 64
+
+/** Default number of slots to use in ib_counter_t */
+#define IB_N_SLOTS 64
+
+/** Get the offset into the counter array. */
+template <typename Type, int N>
+struct generic_indexer_t {
+ /** Default constructor/destructor should be OK. */
+
+ /** @return offset within m_counter */
+ size_t offset(size_t index) const UNIV_NOTHROW {
+ return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
+ }
+};
+
+#ifdef HAVE_SCHED_GETCPU
+#include <utmpx.h>
+/** Use the cpu id to index into the counter array. If it fails then
+use the thread id. */
+template <typename Type, int N>
+struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
+ /** Default constructor/destructor should be OK. */
+
+ /* @return result from sched_getcpu(), the thread id if it fails. */
+ size_t get_rnd_index() const UNIV_NOTHROW {
+
+ size_t cpu = sched_getcpu();
+ if (cpu == -1) {
+ cpu = (lint) os_thread_get_curr_id();
+ }
+
+ return(cpu);
+ }
+};
+#endif /* HAVE_SCHED_GETCPU */
+
+/** Use the thread id to index into the counter array. */
+template <typename Type, int N>
+struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
+ /** Default constructor/destructor should are OK. */
+
+ /* @return a random number, currently we use the thread id. Where
+ thread id is represented as a pointer, it may not work as
+ effectively. */
+ size_t get_rnd_index() const UNIV_NOTHROW {
+ return((lint) os_thread_get_curr_id());
+ }
+};
+
+/** For counters wher N=1 */
+template <typename Type, int N=1>
+struct single_indexer_t {
+ /** Default constructor/destructor should are OK. */
+
+ /** @return offset within m_counter */
+ size_t offset(size_t index) const UNIV_NOTHROW {
+ ut_ad(N == 1);
+ return((CACHE_LINE_SIZE / sizeof(Type)));
+ }
+
+ /* @return 1 */
+ size_t get_rnd_index() const UNIV_NOTHROW {
+ ut_ad(N == 1);
+ return(1);
+ }
+};
+
+/** Class for using fuzzy counters. The counter is not protected by any
+mutex and the results are not guaranteed to be 100% accurate but close
+enough. Creates an array of counters and separates each element by the
+CACHE_LINE_SIZE bytes */
+template <
+ typename Type,
+ int N = IB_N_SLOTS,
+ template<typename, int> class Indexer = thread_id_indexer_t>
+class ib_counter_t {
+public:
+ ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); }
+
+ ~ib_counter_t()
+ {
+ ut_ad(validate());
+ }
+
+ bool validate() UNIV_NOTHROW {
+#ifdef UNIV_DEBUG
+ size_t n = (CACHE_LINE_SIZE / sizeof(Type));
+
+ /* Check that we aren't writing outside our defined bounds. */
+ for (size_t i = 0; i < UT_ARR_SIZE(m_counter); i += n) {
+ for (size_t j = 1; j < n - 1; ++j) {
+ ut_ad(m_counter[i + j] == 0);
+ }
+ }
+#endif /* UNIV_DEBUG */
+ return(true);
+ }
+
+ /** If you can't use a good index id. Increment by 1. */
+ void inc() UNIV_NOTHROW { add(1); }
+
+ /** If you can't use a good index id.
+ * @param n - is the amount to increment */
+ void add(Type n) UNIV_NOTHROW {
+ size_t i = m_policy.offset(m_policy.get_rnd_index());
+
+ ut_ad(i < UT_ARR_SIZE(m_counter));
+
+ m_counter[i] += n;
+ }
+
+ /** Use this if you can use a unique indentifier, saves a
+ call to get_rnd_index().
+ @param i - index into a slot
+ @param n - amount to increment */
+ void add(size_t index, Type n) UNIV_NOTHROW {
+ size_t i = m_policy.offset(index);
+
+ ut_ad(i < UT_ARR_SIZE(m_counter));
+
+ m_counter[i] += n;
+ }
+
+ /** If you can't use a good index id. Decrement by 1. */
+ void dec() UNIV_NOTHROW { sub(1); }
+
+ /** If you can't use a good index id.
+ * @param - n is the amount to decrement */
+ void sub(Type n) UNIV_NOTHROW {
+ size_t i = m_policy.offset(m_policy.get_rnd_index());
+
+ ut_ad(i < UT_ARR_SIZE(m_counter));
+
+ m_counter[i] -= n;
+ }
+
+ /** Use this if you can use a unique indentifier, saves a
+ call to get_rnd_index().
+ @param i - index into a slot
+ @param n - amount to decrement */
+ void sub(size_t index, Type n) UNIV_NOTHROW {
+ size_t i = m_policy.offset(index);
+
+ ut_ad(i < UT_ARR_SIZE(m_counter));
+
+ m_counter[i] -= n;
+ }
+
+ /* @return total value - not 100% accurate, since it is not atomic. */
+ operator Type() const UNIV_NOTHROW {
+ Type total = 0;
+
+ for (size_t i = 0; i < N; ++i) {
+ total += m_counter[m_policy.offset(i)];
+ }
+
+ return(total);
+ }
+
+private:
+ /** Indexer into the array */
+ Indexer<Type, N>m_policy;
+
+ /** Slot 0 is unused. */
+ Type m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
+};
+
+#endif /* UT0COUNTER_H */
diff --git a/storage/xtradb/include/ut0crc32.h b/storage/xtradb/include/ut0crc32.h
new file mode 100644
index 00000000000..86217692764
--- /dev/null
+++ b/storage/xtradb/include/ut0crc32.h
@@ -0,0 +1,51 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ut0crc32.h
+CRC32 implementation
+
+Created Aug 10, 2011 Vasil Dimov
+*******************************************************/
+
+#ifndef ut0crc32_h
+#define ut0crc32_h
+
+#include "univ.i"
+
+/********************************************************************//**
+Initializes the data structures used by ut_crc32(). Does not do any
+allocations, would not hurt if called twice, but would be pointless. */
+UNIV_INTERN
+void
+ut_crc32_init();
+/*===========*/
+
+/********************************************************************//**
+Calculates CRC32.
+@param ptr - data over which to calculate CRC32.
+@param len - data length in bytes.
+@return CRC32 (CRC-32C, using the GF(2) primitive polynomial 0x11EDC6F41,
+or 0x1EDC6F41 without the high-order bit) */
+typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
+
+extern ib_ut_crc32_t ut_crc32;
+
+extern bool ut_crc32_sse2_enabled;
+
+#endif /* ut0crc32_h */
diff --git a/storage/xtradb/include/ut0dbg.h b/storage/xtradb/include/ut0dbg.h
index 5a854326b7b..6a4afe99597 100644
--- a/storage/xtradb/include/ut0dbg.h
+++ b/storage/xtradb/include/ut0dbg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -26,6 +26,12 @@ Created 1/30/1994 Heikki Tuuri
#ifndef ut0dbg_h
#define ut0dbg_h
+#ifdef UNIV_INNOCHECKSUM
+#define ut_a assert
+#define ut_ad assert
+#define ut_error assert(0)
+#else /* !UNIV_INNOCHECKSUM */
+
#include "univ.i"
#include <stdlib.h>
#include "os0thread.h"
@@ -97,10 +103,10 @@ ut_dbg_assertion_failed(
#include <sys/resource.h>
/** structure used for recording usage statistics */
-typedef struct speedo_struct {
+struct speedo_t {
struct rusage ru; /*!< getrusage() result */
struct timeval tv; /*!< gettimeofday() result */
-} speedo_t;
+};
/*******************************************************************//**
Resets a speedo (records the current time in it). */
@@ -121,4 +127,6 @@ speedo_show(
#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* !UNIV_INNOCHECKSUM */
+
#endif
diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h
index 4cfe4b9d8ce..29fc8669ce4 100644
--- a/storage/xtradb/include/ut0list.h
+++ b/storage/xtradb/include/ut0list.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -48,9 +48,8 @@ automatically freeing the list node when the item's heap is freed.
#include "mem0mem.h"
-typedef struct ib_list_struct ib_list_t;
-typedef struct ib_list_node_struct ib_list_node_t;
-typedef struct ib_list_helper_struct ib_list_helper_t;
+struct ib_list_t;
+struct ib_list_node_t;
/****************************************************************//**
Create a new list using mem_alloc. Lists created with this function must be
@@ -142,8 +141,17 @@ ib_list_get_last(
/*=============*/
ib_list_t* list); /*!< in: list */
+/********************************************************************
+Check if list is empty. */
+UNIV_INLINE
+ibool
+ib_list_is_empty(
+/*=============*/
+ /* out: TRUE if empty else */
+ const ib_list_t* list); /* in: list */
+
/* List. */
-struct ib_list_struct {
+struct ib_list_t {
ib_list_node_t* first; /*!< first node */
ib_list_node_t* last; /*!< last node */
ibool is_heap_list; /*!< TRUE if this list was
@@ -151,7 +159,7 @@ struct ib_list_struct {
};
/* A list node. */
-struct ib_list_node_struct {
+struct ib_list_node_t {
ib_list_node_t* prev; /*!< previous node */
ib_list_node_t* next; /*!< next node */
void* data; /*!< user data */
@@ -160,7 +168,7 @@ struct ib_list_node_struct {
/* Quite often, the only additional piece of data you need is the per-item
memory heap, so we have this generic struct available to use in those
cases. */
-struct ib_list_helper_struct {
+struct ib_list_helper_t {
mem_heap_t* heap; /*!< memory heap */
void* data; /*!< user data */
};
diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic
index c8810675ca0..d9dcb2eac99 100644
--- a/storage/xtradb/include/ut0list.ic
+++ b/storage/xtradb/include/ut0list.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -46,3 +46,15 @@ ib_list_get_last(
{
return(list->last);
}
+
+/********************************************************************
+Check if list is empty. */
+UNIV_INLINE
+ibool
+ib_list_is_empty(
+/*=============*/
+ /* out: TRUE if empty else FALSE */
+ const ib_list_t* list) /* in: list */
+{
+ return(!(list->first || list->last));
+}
diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h
index 9bb4bc7723f..b53e7ade4c1 100644
--- a/storage/xtradb/include/ut0lst.h
+++ b/storage/xtradb/include/ut0lst.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -28,10 +28,17 @@ Created 9/10/1995 Heikki Tuuri
#include "univ.i"
+/*******************************************************************//**
+Return offset of F in POD T.
+@param T - POD pointer
+@param F - Field in T */
+#define IB_OFFSETOF(T, F) \
+ (reinterpret_cast<byte*>(&(T)->F) - reinterpret_cast<byte*>(T))
+
/* This module implements the two-way linear list which should be used
if a list is used in the database. Note that a single struct may belong
to two or more lists, provided that the list are given different names.
-An example of the usage of the lists can be found in fil0fil.c. */
+An example of the usage of the lists can be found in fil0fil.cc. */
/*******************************************************************//**
This macro expands to the unnamed type definition of a struct which acts
@@ -39,12 +46,16 @@ as the two-way list base node. The base node contains pointers
to both ends of the list and a count of nodes in the list (excluding
the base node from the count).
@param TYPE the name of the list node data type */
-#define UT_LIST_BASE_NODE_T(TYPE)\
-struct {\
- ulint count; /*!< count of nodes in list */\
- TYPE * start; /*!< pointer to list start, NULL if empty */\
- TYPE * end; /*!< pointer to list end, NULL if empty */\
-}\
+template <typename TYPE>
+struct ut_list_base {
+ typedef TYPE elem_type;
+
+ ulint count; /*!< count of nodes in list */
+ TYPE* start; /*!< pointer to list start, NULL if empty */
+ TYPE* end; /*!< pointer to list end, NULL if empty */
+};
+
+#define UT_LIST_BASE_NODE_T(TYPE) ut_list_base<TYPE>
/*******************************************************************//**
This macro expands to the unnamed type definition of a struct which
@@ -54,20 +65,36 @@ The name of the field in the node struct should be the name given
to the list.
@param TYPE the list node type name */
/* Example:
-typedef struct LRU_node_struct LRU_node_t;
-struct LRU_node_struct {
+struct LRU_node_t {
UT_LIST_NODE_T(LRU_node_t) LRU_list;
...
}
The example implements an LRU list of name LRU_list. Its nodes are of type
LRU_node_t. */
-#define UT_LIST_NODE_T(TYPE)\
-struct {\
- TYPE * prev; /*!< pointer to the previous node,\
- NULL if start of list */\
- TYPE * next; /*!< pointer to next node, NULL if end of list */\
-}\
+template <typename TYPE>
+struct ut_list_node {
+ TYPE* prev; /*!< pointer to the previous node,
+ NULL if start of list */
+ TYPE* next; /*!< pointer to next node, NULL if end of list */
+};
+
+#define UT_LIST_NODE_T(TYPE) ut_list_node<TYPE>
+
+/*******************************************************************//**
+Get the list node at offset.
+@param elem - list element
+@param offset - offset within element.
+@return reference to list node. */
+template <typename Type>
+ut_list_node<Type>&
+ut_elem_get_node(Type& elem, size_t offset)
+{
+ ut_a(offset < sizeof(elem));
+
+ return(*reinterpret_cast<ut_list_node<Type>*>(
+ reinterpret_cast<byte*>(&elem) + offset));
+}
/*******************************************************************//**
Initializes the base node of a two-way list.
@@ -82,108 +109,197 @@ Initializes the base node of a two-way list.
/*******************************************************************//**
Adds the node as the first element in a two-way linked list.
+@param list the base node (not a pointer to it)
+@param elem the element to add
+@param offset offset of list node in elem. */
+template <typename List, typename Type>
+void
+ut_list_prepend(
+ List& list,
+ Type& elem,
+ size_t offset)
+{
+ ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
+
+ elem_node.prev = 0;
+ elem_node.next = list.start;
+
+ if (list.start != 0) {
+ ut_list_node<Type>& base_node =
+ ut_elem_get_node(*list.start, offset);
+
+ ut_ad(list.start != &elem);
+
+ base_node.prev = &elem;
+ }
+
+ list.start = &elem;
+
+ if (list.end == 0) {
+ list.end = &elem;
+ }
+
+ ++list.count;
+}
+
+/*******************************************************************//**
+Adds the node as the first element in a two-way linked list.
@param NAME list name
-@param BASE the base node (not a pointer to it)
-@param N pointer to the node to be added to the list.
-*/
-#define UT_LIST_ADD_FIRST(NAME, BASE, N)\
-{\
- ut_ad(N);\
- ((BASE).count)++;\
- ((N)->NAME).next = (BASE).start;\
- ((N)->NAME).prev = NULL;\
- if (UNIV_LIKELY((BASE).start != NULL)) {\
- ut_ad((BASE).start != (N));\
- (((BASE).start)->NAME).prev = (N);\
- }\
- (BASE).start = (N);\
- if (UNIV_UNLIKELY((BASE).end == NULL)) {\
- (BASE).end = (N);\
- }\
-}\
+@param LIST the base node (not a pointer to it)
+@param ELEM the element to add */
+#define UT_LIST_ADD_FIRST(NAME, LIST, ELEM) \
+ ut_list_prepend(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+
+/*******************************************************************//**
+Adds the node as the last element in a two-way linked list.
+@param list list
+@param elem the element to add
+@param offset offset of list node in elem */
+template <typename List, typename Type>
+void
+ut_list_append(
+ List& list,
+ Type& elem,
+ size_t offset)
+{
+ ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
+
+ elem_node.next = 0;
+ elem_node.prev = list.end;
+
+ if (list.end != 0) {
+ ut_list_node<Type>& base_node =
+ ut_elem_get_node(*list.end, offset);
+
+ ut_ad(list.end != &elem);
+
+ base_node.next = &elem;
+ }
+
+ list.end = &elem;
+
+ if (list.start == 0) {
+ list.start = &elem;
+ }
+
+ ++list.count;
+}
/*******************************************************************//**
Adds the node as the last element in a two-way linked list.
@param NAME list name
-@param BASE the base node (not a pointer to it)
-@param N pointer to the node to be added to the list
-*/
-#define UT_LIST_ADD_LAST(NAME, BASE, N)\
-{\
- ut_ad(N != NULL);\
- ((BASE).count)++;\
- ((N)->NAME).prev = (BASE).end;\
- ((N)->NAME).next = NULL;\
- if ((BASE).end != NULL) {\
- ut_ad((BASE).end != (N));\
- (((BASE).end)->NAME).next = (N);\
- }\
- (BASE).end = (N);\
- if ((BASE).start == NULL) {\
- (BASE).start = (N);\
- }\
-}\
+@param LIST list
+@param ELEM the element to add */
+#define UT_LIST_ADD_LAST(NAME, LIST, ELEM)\
+ ut_list_append(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
/*******************************************************************//**
-Inserts a NODE2 after NODE1 in a list.
+Inserts a ELEM2 after ELEM1 in a list.
+@param list the base node
+@param elem1 node after which ELEM2 is inserted
+@param elem2 node being inserted after NODE1
+@param offset offset of list node in elem1 and elem2 */
+template <typename List, typename Type>
+void
+ut_list_insert(
+ List& list,
+ Type& elem1,
+ Type& elem2,
+ size_t offset)
+{
+ ut_ad(&elem1 != &elem2);
+
+ ut_list_node<Type>& elem1_node = ut_elem_get_node(elem1, offset);
+ ut_list_node<Type>& elem2_node = ut_elem_get_node(elem2, offset);
+
+ elem2_node.prev = &elem1;
+ elem2_node.next = elem1_node.next;
+
+ if (elem1_node.next != NULL) {
+ ut_list_node<Type>& next_node =
+ ut_elem_get_node(*elem1_node.next, offset);
+
+ next_node.prev = &elem2;
+ }
+
+ elem1_node.next = &elem2;
+
+ if (list.end == &elem1) {
+ list.end = &elem2;
+ }
+
+ ++list.count;
+}
+
+/*******************************************************************//**
+Inserts a ELEM2 after ELEM1 in a list.
@param NAME list name
-@param BASE the base node (not a pointer to it)
-@param NODE1 pointer to node after which NODE2 is inserted
-@param NODE2 pointer to node being inserted after NODE1
-*/
-#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\
-{\
- ut_ad(NODE1);\
- ut_ad(NODE2);\
- ut_ad((NODE1) != (NODE2));\
- ((BASE).count)++;\
- ((NODE2)->NAME).prev = (NODE1);\
- ((NODE2)->NAME).next = ((NODE1)->NAME).next;\
- if (((NODE1)->NAME).next != NULL) {\
- ((((NODE1)->NAME).next)->NAME).prev = (NODE2);\
- }\
- ((NODE1)->NAME).next = (NODE2);\
- if ((BASE).end == (NODE1)) {\
- (BASE).end = (NODE2);\
- }\
-}\
+@param LIST the base node
+@param ELEM1 node after which ELEM2 is inserted
+@param ELEM2 node being inserted after ELEM1 */
+#define UT_LIST_INSERT_AFTER(NAME, LIST, ELEM1, ELEM2)\
+ ut_list_insert(LIST, *ELEM1, *ELEM2, IB_OFFSETOF(ELEM1, NAME))
#ifdef UNIV_LIST_DEBUG
/** Invalidate the pointers in a list node.
@param NAME list name
@param N pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(NAME, N) \
-((N)->NAME.prev = (N)->NAME.next = (void*) -1)
+# define UT_LIST_REMOVE_CLEAR(N) \
+ (N).next = (Type*) -1; \
+ (N).prev = (N).next
#else
/** Invalidate the pointers in a list node.
@param NAME list name
@param N pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(NAME, N) do {} while (0)
-#endif
+# define UT_LIST_REMOVE_CLEAR(N)
+#endif /* UNIV_LIST_DEBUG */
/*******************************************************************//**
Removes a node from a two-way linked list.
-@param NAME list name
-@param BASE the base node (not a pointer to it)
-@param N pointer to the node to be removed from the list
-*/
-#define UT_LIST_REMOVE(NAME, BASE, N) \
-do { \
- ut_ad(N); \
- ut_a((BASE).count > 0); \
- ((BASE).count)--; \
- if (((N)->NAME).next != NULL) { \
- ((((N)->NAME).next)->NAME).prev = ((N)->NAME).prev; \
- } else { \
- (BASE).end = ((N)->NAME).prev; \
- } \
- if (((N)->NAME).prev != NULL) { \
- ((((N)->NAME).prev)->NAME).next = ((N)->NAME).next; \
- } else { \
- (BASE).start = ((N)->NAME).next; \
- } \
- UT_LIST_REMOVE_CLEAR(NAME, N); \
-} while (0)
+@param list the base node (not a pointer to it)
+@param elem node to be removed from the list
+@param offset offset of list node within elem */
+template <typename List, typename Type>
+void
+ut_list_remove(
+ List& list,
+ Type& elem,
+ size_t offset)
+{
+ ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
+
+ ut_a(list.count > 0);
+
+ if (elem_node.next != NULL) {
+ ut_list_node<Type>& next_node =
+ ut_elem_get_node(*elem_node.next, offset);
+
+ next_node.prev = elem_node.prev;
+ } else {
+ list.end = elem_node.prev;
+ }
+
+ if (elem_node.prev != NULL) {
+ ut_list_node<Type>& prev_node =
+ ut_elem_get_node(*elem_node.prev, offset);
+
+ prev_node.next = elem_node.next;
+ } else {
+ list.start = elem_node.next;
+ }
+
+ UT_LIST_REMOVE_CLEAR(elem_node);
+
+ --list.count;
+}
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+ aram NAME list name
+@param LIST the base node (not a pointer to it)
+@param ELEM node to be removed from the list */
+#define UT_LIST_REMOVE(NAME, LIST, ELEM) \
+ ut_list_remove(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
/********************************************************************//**
Gets the next node in a two-way list.
@@ -223,39 +339,70 @@ Gets the last node in a two-way list.
#define UT_LIST_GET_LAST(BASE)\
(BASE).end
+struct NullValidate { void operator()(const void* elem) { } };
+
+/********************************************************************//**
+Iterate over all the elements and call the functor for each element.
+@param list base node (not a pointer to it)
+@param functor Functor that is called for each element in the list
+@parm node pointer to member node within list element */
+template <typename List, class Functor>
+void
+ut_list_map(
+ List& list,
+ ut_list_node<typename List::elem_type>
+ List::elem_type::*node,
+ Functor functor)
+{
+ ulint count = 0;
+
+ for (typename List::elem_type* elem = list.start;
+ elem != 0;
+ elem = (elem->*node).next, ++count) {
+
+ functor(elem);
+ }
+
+ ut_a(count == list.count);
+}
+
+/********************************************************************//**
+Checks the consistency of a two-way list.
+@param list base node (not a pointer to it)
+@param functor Functor that is called for each element in the list
+@parm node pointer to member node within list element */
+template <typename List, class Functor>
+void
+ut_list_validate(
+ List& list,
+ ut_list_node<typename List::elem_type>
+ List::elem_type::*node,
+ Functor functor = NullValidate())
+{
+ ut_list_map(list, node, functor);
+
+ ulint count = 0;
+
+ for (typename List::elem_type* elem = list.end;
+ elem != 0;
+ elem = (elem->*node).prev, ++count) {
+
+ functor(elem);
+ }
+
+ ut_a(count == list.count);
+}
+
/********************************************************************//**
Checks the consistency of a two-way list.
@param NAME the name of the list
@param TYPE node type
-@param BASE base node (not a pointer to it)
-@param ASSERTION a condition on ut_list_node_313 */
-#define UT_LIST_VALIDATE(NAME, TYPE, BASE, ASSERTION) \
-do { \
- ulint ut_list_i_313; \
- TYPE* ut_list_node_313; \
- \
- ut_list_node_313 = (BASE).start; \
- \
- for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
- ut_a(ut_list_node_313); \
- ASSERTION; \
- ut_ad((ut_list_node_313->NAME).next || !ut_list_i_313); \
- ut_list_node_313 = (ut_list_node_313->NAME).next; \
- } \
- \
- ut_a(ut_list_node_313 == NULL); \
- \
- ut_list_node_313 = (BASE).end; \
- \
- for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
- ut_a(ut_list_node_313); \
- ASSERTION; \
- ut_ad((ut_list_node_313->NAME).prev || !ut_list_i_313); \
- ut_list_node_313 = (ut_list_node_313->NAME).prev; \
- } \
- \
- ut_a(ut_list_node_313 == NULL); \
-} while (0)
-
-#endif
+@param LIST base node (not a pointer to it)
+@param FUNCTOR called for each list element */
+#define UT_LIST_VALIDATE(NAME, TYPE, LIST, FUNCTOR) \
+ ut_list_validate(LIST, &TYPE::NAME, FUNCTOR)
+
+#define UT_LIST_CHECK(NAME, TYPE, LIST) \
+ ut_list_validate(LIST, &TYPE::NAME, NullValidate())
+#endif /* ut0lst.h */
diff --git a/storage/xtradb/include/ut0mem.h b/storage/xtradb/include/ut0mem.h
index 16c31c2c36c..af7eb4e9b1d 100644
--- a/storage/xtradb/include/ut0mem.h
+++ b/storage/xtradb/include/ut0mem.h
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -101,7 +101,7 @@ ut_free(
void* ptr); /*!< in, own: memory block, can be NULL */
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
+Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
use this function because the allocation functions in mem0mem.h are the
recommended ones in InnoDB.
@@ -211,6 +211,18 @@ ut_strreplace(
const char* s1, /*!< in: string to replace */
const char* s2); /*!< in: string to replace s1 with */
+/********************************************************************
+Concatenate 3 strings.*/
+
+char*
+ut_str3cat(
+/*=======*/
+ /* out, own: concatenated string, must be
+ freed with mem_free() */
+ const char* s1, /* in: string 1 */
+ const char* s2, /* in: string 2 */
+ const char* s3); /* in: string 3 */
+
/**********************************************************************//**
Converts a raw binary data to a NUL-terminated hex string. The output is
truncated if there is not enough space in "hex", make sure "hex_size" is at
diff --git a/storage/xtradb/include/ut0mem.ic b/storage/xtradb/include/ut0mem.ic
index de701bd50e3..5c9071d52cc 100644
--- a/storage/xtradb/include/ut0mem.ic
+++ b/storage/xtradb/include/ut0mem.ic
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -280,7 +280,7 @@ ut_str_sql_format(
switch (ch) {
case '\0':
- if (UNIV_UNLIKELY(buf_size - buf_i < 4)) {
+ if (buf_size - buf_i < 4) {
goto func_exit;
}
@@ -292,7 +292,7 @@ ut_str_sql_format(
case '\'':
case '\\':
- if (UNIV_UNLIKELY(buf_size - buf_i < 4)) {
+ if (buf_size - buf_i < 4) {
goto func_exit;
}
diff --git a/storage/xtradb/include/ut0rbt.h b/storage/xtradb/include/ut0rbt.h
index 0540e1ee386..5c25104b5d7 100644
--- a/storage/xtradb/include/ut0rbt.h
+++ b/storage/xtradb/include/ut0rbt.h
@@ -1,12 +1,6 @@
/***************************************************************************//**
-Copyright (c) 2007, 2010, Innobase Oy. All Rights Reserved.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
+Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -17,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
/******************************************************************//**
@@ -50,24 +44,19 @@ Created 2007-03-20 Sunny Bains
#define FALSE 0
#endif
-/* Red black tree typedefs */
-typedef struct ib_rbt_struct ib_rbt_t;
-typedef struct ib_rbt_node_struct ib_rbt_node_t;
-/* FIXME: Iterator is a better name than _bound_ */
-typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
+struct ib_rbt_node_t;
typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
+typedef int (*ib_rbt_arg_compare)(const void*, const void* p1, const void* p2);
/** Red black tree color types */
-enum ib_rbt_color_enum {
+enum ib_rbt_color_t {
IB_RBT_RED,
IB_RBT_BLACK
};
-typedef enum ib_rbt_color_enum ib_rbt_color_t;
-
/** Red black tree node */
-struct ib_rbt_node_struct {
+struct ib_rbt_node_t {
ib_rbt_color_t color; /* color of this node */
ib_rbt_node_t* left; /* points left child */
@@ -78,7 +67,7 @@ struct ib_rbt_node_struct {
};
/** Red black tree instance.*/
-struct ib_rbt_struct {
+struct ib_rbt_t {
ib_rbt_node_t* nil; /* Black colored node that is
used as a sentinel. This is
pre-allocated too.*/
@@ -90,12 +79,16 @@ struct ib_rbt_struct {
ulint n_nodes; /* Total number of data nodes */
ib_rbt_compare compare; /* Fn. to use for comparison */
+ ib_rbt_arg_compare
+ compare_with_arg; /* Fn. to use for comparison
+ with argument */
ulint sizeof_value; /* Sizeof the item in bytes */
+ void* cmp_arg; /* Compare func argument */
};
/** The result of searching for a key in the tree, this is useful for
a speedy lookup and insert if key doesn't exist.*/
-struct ib_rbt_bound_struct {
+struct ib_rbt_bound_t {
const ib_rbt_node_t*
last; /* Last node visited */
@@ -137,6 +130,18 @@ rbt_create(
size_t sizeof_value, /*!< in: size in bytes */
ib_rbt_compare compare); /*!< in: comparator */
/**********************************************************************//**
+Create an instance of a red black tree, whose comparison function takes
+an argument
+@return rb tree instance */
+UNIV_INTERN
+ib_rbt_t*
+rbt_create_arg_cmp(
+/*===============*/
+ size_t sizeof_value, /*!< in: size in bytes */
+ ib_rbt_arg_compare
+ compare, /*!< in: comparator */
+ void* cmp_arg); /*!< in: compare fn arg */
+/**********************************************************************//**
Delete a node from the red black tree, identified by key */
UNIV_INTERN
ibool
@@ -280,7 +285,10 @@ rbt_search_cmp(
const ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: search bounds */
const void* key, /*!< in: key to search */
- ib_rbt_compare compare); /*!< in: comparator */
+ ib_rbt_compare compare, /*!< in: comparator */
+ ib_rbt_arg_compare
+ arg_compare); /*!< in: fn to compare items
+ with argument */
/**********************************************************************//**
Clear the tree, deletes (and free's) all the nodes. */
UNIV_INTERN
diff --git a/storage/xtradb/include/ut0rnd.h b/storage/xtradb/include/ut0rnd.h
index bed2c668c60..53b769849a5 100644
--- a/storage/xtradb/include/ut0rnd.h
+++ b/storage/xtradb/include/ut0rnd.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -28,6 +28,8 @@ Created 1/20/1994 Heikki Tuuri
#include "univ.i"
+#ifndef UNIV_INNOCHECKSUM
+
#include "ut0byte.h"
/** The 'character code' for end of field or string (used
@@ -87,16 +89,6 @@ ut_hash_ulint(
ulint key, /*!< in: value to be hashed */
ulint table_size); /*!< in: hash table size */
/*************************************************************//**
-Folds a pair of ulints.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
- ulint n1, /*!< in: ulint */
- ulint n2) /*!< in: ulint */
- __attribute__((const));
-/*************************************************************//**
Folds a 64-bit integer.
@return folded value */
UNIV_INLINE
@@ -114,23 +106,6 @@ ut_fold_string(
/*===========*/
const char* str) /*!< in: null-terminated string */
__attribute__((pure));
-/*************************************************************//**
-Folds a binary string.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_binary(
-/*===========*/
- const byte* str, /*!< in: string of bytes */
- ulint len) /*!< in: length */
- __attribute__((pure));
-UNIV_INLINE
-ulint
-ut_fold_binary_32(
-/*==============*/
- const byte* str, /*!< in: string of bytes */
- ulint len) /*!< in: length */
- __attribute__((pure));
/***********************************************************//**
Looks for a prime number slightly greater than the given argument.
The prime is chosen so that it is not near any power of 2.
@@ -142,6 +117,29 @@ ut_find_prime(
ulint n) /*!< in: positive number > 100 */
__attribute__((const));
+#endif /* !UNIV_INNOCHECKSUM */
+
+/*************************************************************//**
+Folds a pair of ulints.
+@return folded value */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+ ulint n1, /*!< in: ulint */
+ ulint n2) /*!< in: ulint */
+ __attribute__((const));
+/*************************************************************//**
+Folds a binary string.
+@return folded value */
+UNIV_INLINE
+ulint
+ut_fold_binary(
+/*===========*/
+ const byte* str, /*!< in: string of bytes */
+ ulint len) /*!< in: length */
+ __attribute__((pure));
+
#ifndef UNIV_NONINL
#include "ut0rnd.ic"
diff --git a/storage/xtradb/include/ut0rnd.ic b/storage/xtradb/include/ut0rnd.ic
index 30bd32726fa..024c59e553b 100644
--- a/storage/xtradb/include/ut0rnd.ic
+++ b/storage/xtradb/include/ut0rnd.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -25,6 +25,9 @@ Created 5/30/1994 Heikki Tuuri
#define UT_HASH_RANDOM_MASK 1463735687
#define UT_HASH_RANDOM_MASK2 1653893711
+
+#ifndef UNIV_INNOCHECKSUM
+
#define UT_RND1 151117737
#define UT_RND2 119785373
#define UT_RND3 85689495
@@ -156,20 +159,6 @@ ut_hash_ulint(
}
/*************************************************************//**
-Folds a pair of ulints.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
- ulint n1, /*!< in: ulint */
- ulint n2) /*!< in: ulint */
-{
- return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
- ^ UT_HASH_RANDOM_MASK) + n2);
-}
-
-/*************************************************************//**
Folds a 64-bit integer.
@return folded value */
UNIV_INLINE
@@ -203,6 +192,22 @@ ut_fold_string(
return(fold);
}
+#endif /* !UNIV_INNOCHECKSUM */
+
+/*************************************************************//**
+Folds a pair of ulints.
+@return folded value */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+ ulint n1, /*!< in: ulint */
+ ulint n2) /*!< in: ulint */
+{
+ return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
+ ^ UT_HASH_RANDOM_MASK) + n2);
+}
+
/*************************************************************//**
Folds a binary string.
@return folded value */
@@ -213,40 +218,37 @@ ut_fold_binary(
const byte* str, /*!< in: string of bytes */
ulint len) /*!< in: length */
{
- const byte* str_end = str + len;
ulint fold = 0;
+ const byte* str_end = str + (len & 0xFFFFFFF8);
ut_ad(str || !len);
while (str < str_end) {
- fold = ut_fold_ulint_pair(fold, (ulint)(*str));
-
- str++;
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
}
- return(fold);
-}
-
-UNIV_INLINE
-ulint
-ut_fold_binary_32(
-/*==============*/
- const byte* str, /*!< in: string of bytes */
- ulint len) /*!< in: length */
-{
- const ib_uint32_t* str_end = (const ib_uint32_t*) (str + len);
- const ib_uint32_t* str_32 = (const ib_uint32_t*) str;
- ulint fold = 0;
-
- ut_ad(str);
- /* This function is only for word-aligned data */
- ut_ad(len % 4 == 0);
- ut_ad((ulint)str % 4 == 0);
-
- while (str_32 < str_end) {
- fold = ut_fold_ulint_pair(fold, (ulint)(*str_32));
-
- str_32++;
+ switch (len & 0x7) {
+ case 7:
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ case 6:
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ case 5:
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ case 4:
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ case 3:
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ case 2:
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+ case 1:
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
}
return(fold);
diff --git a/storage/xtradb/include/ut0sort.h b/storage/xtradb/include/ut0sort.h
index 8cc73e65b2a..75648b5c317 100644
--- a/storage/xtradb/include/ut0sort.h
+++ b/storage/xtradb/include/ut0sort.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h
index 37f1c6064b6..163dc23b363 100644
--- a/storage/xtradb/include/ut0ut.h
+++ b/storage/xtradb/include/ut0ut.h
@@ -1,13 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Sun Microsystems, Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -18,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -35,6 +28,8 @@ Created 1/20/1994 Heikki Tuuri
#include "univ.i"
+#ifndef UNIV_INNOCHECKSUM
+
#include "db0err.h"
#ifndef UNIV_HOTBACKUP
@@ -46,6 +41,8 @@ Created 1/20/1994 Heikki Tuuri
#include <ctype.h>
#endif
+#include <stdarg.h> /* for va_list */
+
/** Index name prefix in fast index creation */
#define TEMP_INDEX_PREFIX '\377'
/** Index name prefix in fast index creation, as a string constant */
@@ -55,27 +52,32 @@ Created 1/20/1994 Heikki Tuuri
typedef time_t ib_time_t;
#ifndef UNIV_HOTBACKUP
-#if defined(HAVE_PAUSE_INSTRUCTION)
+# if defined(HAVE_PAUSE_INSTRUCTION)
/* According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
Also asm volatile may trigger a memory barrier (spilling all registers
to memory). */
-# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
+# ifdef __SUNPRO_CC
+# define UT_RELAX_CPU() asm ("pause" )
+# else
+# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
+# endif /* __SUNPRO_CC */
+
+# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-#elif defined(HAVE_WINDOWS_ATOMICS)
- /* In the Win32 API, the x86 PAUSE instruction is executed by calling
- the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
- independent way by using YieldProcessor. */
-# define UT_RELAX_CPU() YieldProcessor()
-#elif defined(HAVE_ATOMIC_BUILTINS)
+# elif defined(HAVE_ATOMIC_BUILTINS)
# define UT_RELAX_CPU() do { \
volatile lint volatile_var; \
os_compare_and_swap_lint(&volatile_var, 0, 1); \
} while (0)
-#else
+# elif defined(HAVE_WINDOWS_ATOMICS)
+ /* In the Win32 API, the x86 PAUSE instruction is executed by calling
+ the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
+ independent way by using YieldProcessor. */
+# define UT_RELAX_CPU() YieldProcessor()
+# else
# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
-#endif
+# endif
/*********************************************************************//**
Delays execution for at most max_wait_us microseconds or returns earlier
@@ -94,16 +96,9 @@ do { \
} while (0)
#endif /* !UNIV_HOTBACKUP */
-/********************************************************//**
-Gets the high 32 bits in a ulint. That is makes a shift >> 32,
-but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion.
-@return a >> 32 */
-UNIV_INTERN
-ulint
-ut_get_high32(
-/*==========*/
- ulint a); /*!< in: ulint */
+template <class T> T ut_min(T a, T b) { return(a < b ? a : b); }
+template <class T> T ut_max(T a, T b) { return(a > b ? a : b); }
+
/******************************************************//**
Calculates the minimum of two ulints.
@return minimum */
@@ -122,15 +117,6 @@ ut_max(
/*===*/
ulint n1, /*!< in: first number */
ulint n2); /*!< in: second number */
-/******************************************************//**
-Calculates the maximum of two ib_uint64_t values.
-@return the maximum */
-UNIV_INLINE
-ib_uint64_t
-ut_max_uint64(
-/*==========*/
- ib_uint64_t n1, /*!< in: first number */
- ib_uint64_t n2); /*!< in: second number */
/****************************************************************//**
Calculates minimum of two ulint-pairs. */
UNIV_INLINE
@@ -270,6 +256,16 @@ ut_time_ms(void);
#endif /* !UNIV_HOTBACKUP */
/**********************************************************//**
+Returns the number of milliseconds since some epoch. The
+value may wrap around. It should only be used for heuristic
+purposes.
+@return ms since epoch */
+UNIV_INTERN
+ulint
+ut_time_ms(void);
+/*============*/
+
+/**********************************************************//**
Returns the difference of two times in seconds.
@return time2 - time1 expressed in seconds */
UNIV_INTERN
@@ -278,6 +274,9 @@ ut_difftime(
/*========*/
ib_time_t time2, /*!< in: time */
ib_time_t time1); /*!< in: time */
+
+#endif /* !UNIV_INNOCHECKSUM */
+
/**********************************************************//**
Prints a timestamp to a file. */
UNIV_INTERN
@@ -286,6 +285,9 @@ ut_print_timestamp(
/*===============*/
FILE* file) /*!< in: file where to print */
UNIV_COLD __attribute__((nonnull));
+
+#ifndef UNIV_INNOCHECKSUM
+
/**********************************************************//**
Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
UNIV_INTERN
@@ -343,7 +345,7 @@ ut_print_filename(
#ifndef UNIV_HOTBACKUP
/* Forward declaration of transaction handle */
-struct trx_struct;
+struct trx_t;
/**********************************************************************//**
Outputs a fixed-length string, quoted as an SQL identifier.
@@ -355,7 +357,7 @@ void
ut_print_name(
/*==========*/
FILE* f, /*!< in: output stream */
- struct trx_struct*trx, /*!< in: transaction */
+ const trx_t* trx, /*!< in: transaction */
ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
const char* name); /*!< in: name to print */
@@ -370,13 +372,31 @@ void
ut_print_namel(
/*===========*/
FILE* f, /*!< in: output stream */
- struct trx_struct*trx, /*!< in: transaction (NULL=no quotes) */
+ const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
const char* name, /*!< in: name to print */
ulint namelen);/*!< in: length of name */
/**********************************************************************//**
+Formats a table or index name, quoted as an SQL identifier. If the name
+contains a slash '/', the result will contain two identifiers separated by
+a period (.), as in SQL database_name.identifier.
+@return pointer to 'formatted' */
+UNIV_INTERN
+char*
+ut_format_name(
+/*===========*/
+ const char* name, /*!< in: table or index name, must be
+ '\0'-terminated */
+ ibool is_table, /*!< in: if TRUE then 'name' is a table
+ name */
+ char* formatted, /*!< out: formatted result, will be
+ '\0'-terminated */
+ ulint formatted_size);/*!< out: no more than this number of
+ bytes will be written to 'formatted' */
+
+/**********************************************************************//**
Catenate files. */
UNIV_INTERN
void
@@ -388,6 +408,22 @@ ut_copy_file(
#ifdef __WIN__
/**********************************************************************//**
+A substitute for vsnprintf(3), formatted output conversion into
+a limited buffer. Note: this function DOES NOT return the number of
+characters that would have been printed if the buffer was unlimited because
+VC's _vsnprintf() returns -1 in this case and we would need to call
+_vscprintf() in addition to estimate that but we would need another copy
+of "ap" for that and VC does not provide va_copy(). */
+UNIV_INTERN
+void
+ut_vsnprintf(
+/*=========*/
+ char* str, /*!< out: string */
+ size_t size, /*!< in: str size */
+ const char* fmt, /*!< in: format */
+ va_list ap); /*!< in: format values */
+
+/**********************************************************************//**
A substitute for snprintf(3), formatted output conversion into
a limited buffer.
@return number of characters that would have been printed if the size
@@ -402,6 +438,15 @@ ut_snprintf(
...); /*!< in: format values */
#else
/**********************************************************************//**
+A wrapper for vsnprintf(3), formatted output conversion into
+a limited buffer. Note: this function DOES NOT return the number of
+characters that would have been printed if the buffer was unlimited because
+VC's _vsnprintf() returns -1 in this case and we would need to call
+_vscprintf() in addition to estimate that but we would need another copy
+of "ap" for that and VC does not provide va_copy(). */
+# define ut_vsnprintf(buf, size, fmt, ap) \
+ ((void) vsnprintf(buf, size, fmt, ap))
+/**********************************************************************//**
A wrapper for snprintf(3), formatted output conversion into
a limited buffer. */
# define ut_snprintf snprintf
@@ -415,11 +460,25 @@ UNIV_INTERN
const char*
ut_strerr(
/*======*/
- enum db_err num); /*!< in: error number */
+ dberr_t num); /*!< in: error number */
+
+/****************************************************************
+Sort function for ulint arrays. */
+UNIV_INTERN
+void
+ut_ulint_sort(
+/*==========*/
+ ulint* arr, /*!< in/out: array to sort */
+ ulint* aux_arr, /*!< in/out: aux array to use in sort */
+ ulint low, /*!< in: lower bound */
+ ulint high) /*!< in: upper bound */
+ __attribute__((nonnull));
#ifndef UNIV_NONINL
#include "ut0ut.ic"
#endif
+#endif /* !UNIV_INNOCHECKSUM */
+
#endif
diff --git a/storage/xtradb/include/ut0ut.ic b/storage/xtradb/include/ut0ut.ic
index 019b3d216cf..4e0f76e1957 100644
--- a/storage/xtradb/include/ut0ut.ic
+++ b/storage/xtradb/include/ut0ut.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -49,19 +49,6 @@ ut_max(
return((n1 <= n2) ? n2 : n1);
}
-/******************************************************//**
-Calculates the maximum of two ib_uint64_t values.
-@return the maximum */
-UNIV_INLINE
-ib_uint64_t
-ut_max_uint64(
-/*==========*/
- ib_uint64_t n1, /*!< in: first number */
- ib_uint64_t n2) /*!< in: second number */
-{
- return((n1 <= n2) ? n2 : n1);
-}
-
/****************************************************************//**
Calculates minimum of two ulint-pairs. */
UNIV_INLINE
diff --git a/storage/xtradb/include/ut0vec.h b/storage/xtradb/include/ut0vec.h
index 316ae87c2cb..432fb348a09 100644
--- a/storage/xtradb/include/ut0vec.h
+++ b/storage/xtradb/include/ut0vec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -29,59 +29,116 @@ Created 4/6/2006 Osku Salerma
#include "univ.i"
#include "mem0mem.h"
-/** An automatically resizing vector data type. */
-typedef struct ib_vector_struct ib_vector_t;
+struct ib_alloc_t;
+struct ib_vector_t;
-/* An automatically resizing vector datatype with the following properties:
+typedef void* (*ib_mem_alloc_t)(
+ /* out: Pointer to allocated memory */
+ ib_alloc_t* allocator, /* in: Pointer to allocator instance */
+ ulint size); /* in: Number of bytes to allocate */
+
+typedef void (*ib_mem_free_t)(
+ ib_alloc_t* allocator, /* in: Pointer to allocator instance */
+ void* ptr); /* in: Memory to free */
- -Contains void* items.
+typedef void* (*ib_mem_resize_t)(
+ /* out: Pointer to resized memory */
+ ib_alloc_t* allocator, /* in: Pointer to allocator */
+ void* ptr, /* in: Memory to resize */
+ ulint old_size, /* in: Old memory size in bytes */
+ ulint new_size); /* in: New size in bytes */
- -The items are owned by the caller.
+typedef int (*ib_compare_t)(const void*, const void*);
- -All memory allocation is done through a heap owned by the caller, who is
- responsible for freeing it when done with the vector.
+/* An automatically resizing vector datatype with the following properties:
- -When the vector is resized, the old memory area is left allocated since it
- uses the same heap as the new memory area, so this is best used for
- relatively small or short-lived uses.
+ -All memory allocation is done through an allocator, which is responsible for
+freeing it when done with the vector.
*/
-/****************************************************************//**
-Create a new vector with the given initial size.
-@return vector */
+/* This is useful shorthand for elements of type void* */
+#define ib_vector_getp(v, n) (*(void**) ib_vector_get(v, n))
+#define ib_vector_getp_const(v, n) (*(void**) ib_vector_get_const(v, n))
+
+#define ib_vector_allocator(v) (v->allocator)
+
+/********************************************************************
+Create a new vector with the given initial size. */
UNIV_INTERN
ib_vector_t*
ib_vector_create(
/*=============*/
- mem_heap_t* heap, /*!< in: heap */
- ulint size); /*!< in: initial size */
+ /* out: vector */
+ ib_alloc_t* alloc, /* in: Allocator */
+ /* in: size of the data item */
+ ulint sizeof_value,
+ ulint size); /* in: initial size */
-/****************************************************************//**
-Push a new element to the vector, increasing its size if necessary. */
-UNIV_INTERN
+/********************************************************************
+Destroy the vector. Make sure the vector owns the allocator, e.g.,
+the heap in the the heap allocator. */
+UNIV_INLINE
void
+ib_vector_free(
+/*===========*/
+ ib_vector_t* vec); /* in/out: vector */
+
+/********************************************************************
+Push a new element to the vector, increasing its size if necessary,
+if elem is not NULL then elem is copied to the vector.*/
+UNIV_INLINE
+void*
ib_vector_push(
/*===========*/
+ /* out: pointer the "new" element */
+ ib_vector_t* vec, /* in/out: vector */
+ const void* elem); /* in: data element */
+
+/********************************************************************
+Pop the last element from the vector.*/
+UNIV_INLINE
+void*
+ib_vector_pop(
+/*==========*/
+ /* out: pointer to the "new" element */
+ ib_vector_t* vec); /* in/out: vector */
+
+/*******************************************************************//**
+Remove an element to the vector
+@return pointer to the "removed" element */
+UNIV_INLINE
+void*
+ib_vector_remove(
+/*=============*/
ib_vector_t* vec, /*!< in: vector */
- void* elem); /*!< in: data element */
+ const void* elem); /*!< in: value to remove */
-/****************************************************************//**
-Get the number of elements in the vector.
-@return number of elements in vector */
+/********************************************************************
+Get the number of elements in the vector. */
UNIV_INLINE
ulint
ib_vector_size(
/*===========*/
- const ib_vector_t* vec); /*!< in: vector */
+ /* out: number of elements in vector */
+ const ib_vector_t* vec); /* in: vector */
-/****************************************************************//**
+/********************************************************************
+Increase the size of the vector. */
+UNIV_INTERN
+void
+ib_vector_resize(
+/*=============*/
+ /* out: number of elements in vector */
+ ib_vector_t* vec); /* in/out: vector */
+
+/********************************************************************
Test whether a vector is empty or not.
-@return TRUE if empty */
+@return TRUE if empty */
UNIV_INLINE
ibool
ib_vector_is_empty(
/*===============*/
- const ib_vector_t* vec); /*!< in: vector */
+ const ib_vector_t* vec); /*!< in: vector */
/****************************************************************//**
Get the n'th element.
@@ -93,6 +150,15 @@ ib_vector_get(
ib_vector_t* vec, /*!< in: vector */
ulint n); /*!< in: element index to get */
+/********************************************************************
+Const version of the get n'th element.
+@return n'th element */
+UNIV_INLINE
+const void*
+ib_vector_get_const(
+/*================*/
+ const ib_vector_t* vec, /* in: vector */
+ ulint n); /* in: element index to get */
/****************************************************************//**
Get last element. The vector must not be empty.
@return last element */
@@ -101,7 +167,6 @@ void*
ib_vector_get_last(
/*===============*/
ib_vector_t* vec); /*!< in: vector */
-
/****************************************************************//**
Set the n'th element. */
UNIV_INLINE
@@ -112,33 +177,161 @@ ib_vector_set(
ulint n, /*!< in: element index to set */
void* elem); /*!< in: data element */
-/****************************************************************//**
-Remove the last element from the vector. */
+/********************************************************************
+Reset the vector size to 0 elements. */
+UNIV_INLINE
+void
+ib_vector_reset(
+/*============*/
+ ib_vector_t* vec); /* in/out: vector */
+
+/********************************************************************
+Get the last element of the vector. */
UNIV_INLINE
void*
-ib_vector_pop(
-/*==========*/
- ib_vector_t* vec); /*!< in: vector */
+ib_vector_last(
+/*===========*/
+ /* out: pointer to last element */
+ ib_vector_t* vec); /* in/out: vector */
-/****************************************************************//**
-Free the underlying heap of the vector. Note that vec is invalid
-after this call. */
+/********************************************************************
+Get the last element of the vector. */
+UNIV_INLINE
+const void*
+ib_vector_last_const(
+/*=================*/
+ /* out: pointer to last element */
+ const ib_vector_t* vec); /* in: vector */
+
+/********************************************************************
+Sort the vector elements. */
UNIV_INLINE
void
-ib_vector_free(
+ib_vector_sort(
+/*===========*/
+ ib_vector_t* vec, /* in/out: vector */
+ ib_compare_t compare); /* in: the comparator to use for sort */
+
+/********************************************************************
+The default ib_vector_t heap free. Does nothing. */
+UNIV_INLINE
+void
+ib_heap_free(
+/*=========*/
+ ib_alloc_t* allocator, /* in: allocator */
+ void* ptr); /* in: size in bytes */
+
+/********************************************************************
+The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */
+UNIV_INLINE
+void*
+ib_heap_malloc(
+/*===========*/
+ /* out: pointer to allocated memory */
+ ib_alloc_t* allocator, /* in: allocator */
+ ulint size); /* in: size in bytes */
+
+/********************************************************************
+The default ib_vector_t heap resize. Since we can't resize the heap
+we have to copy the elements from the old ptr to the new ptr.
+Uses mem_heap_alloc(). */
+UNIV_INLINE
+void*
+ib_heap_resize(
/*===========*/
- ib_vector_t* vec); /*!< in,own: vector */
-
-/** An automatically resizing vector data type. */
-struct ib_vector_struct {
- mem_heap_t* heap; /*!< heap */
- void** data; /*!< data elements */
- ulint used; /*!< number of elements currently used */
- ulint total; /*!< number of elements allocated */
+ /* out: pointer to reallocated
+ memory */
+ ib_alloc_t* allocator, /* in: allocator */
+ void* old_ptr, /* in: pointer to memory */
+ ulint old_size, /* in: old size in bytes */
+ ulint new_size); /* in: new size in bytes */
+
+/********************************************************************
+Create a heap allocator that uses the passed in heap. */
+UNIV_INLINE
+ib_alloc_t*
+ib_heap_allocator_create(
+/*=====================*/
+ /* out: heap allocator instance */
+ mem_heap_t* heap); /* in: heap to use */
+
+/********************************************************************
+Free a heap allocator. */
+UNIV_INLINE
+void
+ib_heap_allocator_free(
+/*===================*/
+ ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */
+
+/********************************************************************
+Wrapper for ut_free(). */
+UNIV_INLINE
+void
+ib_ut_free(
+/*=======*/
+ ib_alloc_t* allocator, /* in: allocator */
+ void* ptr); /* in: size in bytes */
+
+/********************************************************************
+Wrapper for ut_malloc(). */
+UNIV_INLINE
+void*
+ib_ut_malloc(
+/*=========*/
+ /* out: pointer to allocated memory */
+ ib_alloc_t* allocator, /* in: allocator */
+ ulint size); /* in: size in bytes */
+
+/********************************************************************
+Wrapper for ut_realloc(). */
+UNIV_INLINE
+void*
+ib_ut_resize(
+/*=========*/
+ /* out: pointer to reallocated
+ memory */
+ ib_alloc_t* allocator, /* in: allocator */
+ void* old_ptr, /* in: pointer to memory */
+ ulint old_size, /* in: old size in bytes */
+ ulint new_size); /* in: new size in bytes */
+
+/********************************************************************
+Create a heap allocator that uses the passed in heap. */
+UNIV_INLINE
+ib_alloc_t*
+ib_ut_allocator_create(void);
+/*=========================*/
+
+/********************************************************************
+Create a heap allocator that uses the passed in heap. */
+UNIV_INLINE
+void
+ib_ut_allocator_free(
+/*=================*/
+ ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */
+
+/* Allocator used by ib_vector_t. */
+struct ib_alloc_t {
+ ib_mem_alloc_t mem_malloc; /* For allocating memory */
+ ib_mem_free_t mem_release; /* For freeing memory */
+ ib_mem_resize_t mem_resize; /* For resizing memory */
+ void* arg; /* Currently if not NULL then it
+ points to the heap instance */
+};
+
+/* See comment at beginning of file. */
+struct ib_vector_t {
+ ib_alloc_t* allocator; /* Allocator, because one size
+ doesn't fit all */
+ void* data; /* data elements */
+ ulint used; /* number of elements currently used */
+ ulint total; /* number of elements allocated */
+ /* Size of a data item */
+ ulint sizeof_value;
};
#ifndef UNIV_NONINL
#include "ut0vec.ic"
#endif
-#endif
+#endif /* IB_VECTOR_H */
diff --git a/storage/xtradb/include/ut0vec.ic b/storage/xtradb/include/ut0vec.ic
index fce41362d3a..f41a85e1d1d 100644
--- a/storage/xtradb/include/ut0vec.ic
+++ b/storage/xtradb/include/ut0vec.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -23,21 +23,169 @@ A vector of pointers to data items
Created 4/6/2006 Osku Salerma
************************************************************************/
-/****************************************************************//**
-Get number of elements in vector.
-@return number of elements in vector */
+#define IB_VEC_OFFSET(v, i) (vec->sizeof_value * i)
+
+/********************************************************************
+The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */
+UNIV_INLINE
+void*
+ib_heap_malloc(
+/*===========*/
+ ib_alloc_t* allocator, /* in: allocator */
+ ulint size) /* in: size in bytes */
+{
+ mem_heap_t* heap = (mem_heap_t*) allocator->arg;
+
+ return(mem_heap_alloc(heap, size));
+}
+
+/********************************************************************
+The default ib_vector_t heap free. Does nothing. */
+UNIV_INLINE
+void
+ib_heap_free(
+/*=========*/
+ ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
+ void* ptr UNIV_UNUSED) /* in: size in bytes */
+{
+ /* We can't free individual elements. */
+}
+
+/********************************************************************
+The default ib_vector_t heap resize. Since we can't resize the heap
+we have to copy the elements from the old ptr to the new ptr.
+Uses mem_heap_alloc(). */
+UNIV_INLINE
+void*
+ib_heap_resize(
+/*===========*/
+ ib_alloc_t* allocator, /* in: allocator */
+ void* old_ptr, /* in: pointer to memory */
+ ulint old_size, /* in: old size in bytes */
+ ulint new_size) /* in: new size in bytes */
+{
+ void* new_ptr;
+ mem_heap_t* heap = (mem_heap_t*) allocator->arg;
+
+ new_ptr = mem_heap_alloc(heap, new_size);
+ memcpy(new_ptr, old_ptr, old_size);
+
+ return(new_ptr);
+}
+
+/********************************************************************
+Create a heap allocator that uses the passed in heap. */
+UNIV_INLINE
+ib_alloc_t*
+ib_heap_allocator_create(
+/*=====================*/
+ mem_heap_t* heap) /* in: heap to use */
+{
+ ib_alloc_t* heap_alloc;
+
+ heap_alloc = (ib_alloc_t*) mem_heap_alloc(heap, sizeof(*heap_alloc));
+
+ heap_alloc->arg = heap;
+ heap_alloc->mem_release = ib_heap_free;
+ heap_alloc->mem_malloc = ib_heap_malloc;
+ heap_alloc->mem_resize = ib_heap_resize;
+
+ return(heap_alloc);
+}
+
+/********************************************************************
+Free a heap allocator. */
+UNIV_INLINE
+void
+ib_heap_allocator_free(
+/*===================*/
+ ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */
+{
+ mem_heap_free((mem_heap_t*) ib_ut_alloc->arg);
+}
+
+/********************************************************************
+Wrapper around ut_malloc(). */
+UNIV_INLINE
+void*
+ib_ut_malloc(
+/*=========*/
+ ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
+ ulint size) /* in: size in bytes */
+{
+ return(ut_malloc(size));
+}
+
+/********************************************************************
+Wrapper around ut_free(). */
+UNIV_INLINE
+void
+ib_ut_free(
+/*=======*/
+ ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
+ void* ptr) /* in: size in bytes */
+{
+ ut_free(ptr);
+}
+
+/********************************************************************
+Wrapper aroung ut_realloc(). */
+UNIV_INLINE
+void*
+ib_ut_resize(
+/*=========*/
+ ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
+ void* old_ptr, /* in: pointer to memory */
+ ulint old_size UNIV_UNUSED,/* in: old size in bytes */
+ ulint new_size) /* in: new size in bytes */
+{
+ return(ut_realloc(old_ptr, new_size));
+}
+
+/********************************************************************
+Create a ut allocator. */
+UNIV_INLINE
+ib_alloc_t*
+ib_ut_allocator_create(void)
+/*========================*/
+{
+ ib_alloc_t* ib_ut_alloc;
+
+ ib_ut_alloc = (ib_alloc_t*) ut_malloc(sizeof(*ib_ut_alloc));
+
+ ib_ut_alloc->arg = NULL;
+ ib_ut_alloc->mem_release = ib_ut_free;
+ ib_ut_alloc->mem_malloc = ib_ut_malloc;
+ ib_ut_alloc->mem_resize = ib_ut_resize;
+
+ return(ib_ut_alloc);
+}
+
+/********************************************************************
+Free a ut allocator. */
+UNIV_INLINE
+void
+ib_ut_allocator_free(
+/*=================*/
+ ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */
+{
+ ut_free(ib_ut_alloc);
+}
+
+/********************************************************************
+Get number of elements in vector. */
UNIV_INLINE
ulint
ib_vector_size(
/*===========*/
- const ib_vector_t* vec) /*!< in: vector */
+ /* out: number of elements in vector*/
+ const ib_vector_t* vec) /* in: vector */
{
return(vec->used);
}
/****************************************************************//**
-Get n'th element.
-@return n'th element */
+Get n'th element. */
UNIV_INLINE
void*
ib_vector_get(
@@ -47,9 +195,23 @@ ib_vector_get(
{
ut_a(n < vec->used);
- return(vec->data[n]);
+ return((byte*) vec->data + IB_VEC_OFFSET(vec, n));
}
+/********************************************************************
+Const version of the get n'th element.
+@return n'th element */
+UNIV_INLINE
+const void*
+ib_vector_get_const(
+/*================*/
+ const ib_vector_t* vec, /* in: vector */
+ ulint n) /* in: element index to get */
+{
+ ut_a(n < vec->used);
+
+ return((byte*) vec->data + IB_VEC_OFFSET(vec, n));
+}
/****************************************************************//**
Get last element. The vector must not be empty.
@return last element */
@@ -61,7 +223,7 @@ ib_vector_get_last(
{
ut_a(vec->used > 0);
- return(vec->data[vec->used - 1]);
+ return((byte*) ib_vector_get(vec, vec->used - 1));
}
/****************************************************************//**
@@ -74,9 +236,52 @@ ib_vector_set(
ulint n, /*!< in: element index to set */
void* elem) /*!< in: data element */
{
+ void* slot;
+
ut_a(n < vec->used);
- vec->data[n] = elem;
+ slot = ((byte*) vec->data + IB_VEC_OFFSET(vec, n));
+ memcpy(slot, elem, vec->sizeof_value);
+}
+
+/********************************************************************
+Reset the vector size to 0 elements. */
+UNIV_INLINE
+void
+ib_vector_reset(
+/*============*/
+ /* out: void */
+ ib_vector_t* vec) /* in: vector */
+{
+ vec->used = 0;
+}
+
+/********************************************************************
+Get the last element of the vector. */
+UNIV_INLINE
+void*
+ib_vector_last(
+/*===========*/
+ /* out: void */
+ ib_vector_t* vec) /* in: vector */
+{
+ ut_a(ib_vector_size(vec) > 0);
+
+ return(ib_vector_get(vec, ib_vector_size(vec) - 1));
+}
+
+/********************************************************************
+Get the last element of the vector. */
+UNIV_INLINE
+const void*
+ib_vector_last_const(
+/*=================*/
+ /* out: void */
+ const ib_vector_t* vec) /* in: vector */
+{
+ ut_a(ib_vector_size(vec) > 0);
+
+ return(ib_vector_get_const(vec, ib_vector_size(vec) - 1));
}
/****************************************************************//**
@@ -86,35 +291,130 @@ UNIV_INLINE
void*
ib_vector_pop(
/*==========*/
- ib_vector_t* vec) /*!< in/out: vector */
+ /* out: pointer to element */
+ ib_vector_t* vec) /* in: vector */
{
- void* elem;
+ void* elem;
ut_a(vec->used > 0);
- --vec->used;
- elem = vec->data[vec->used];
- ut_d(vec->data[vec->used] = NULL);
- UNIV_MEM_INVALID(&vec->data[vec->used], sizeof(*vec->data));
+ elem = ib_vector_last(vec);
+ --vec->used;
return(elem);
}
-/****************************************************************//**
-Free the underlying heap of the vector. Note that vec is invalid
-after this call. */
+/********************************************************************
+Append an element to the vector, if elem != NULL then copy the data
+from elem.*/
+UNIV_INLINE
+void*
+ib_vector_push(
+/*===========*/
+ /* out: pointer to the "new" element */
+ ib_vector_t* vec, /* in: vector */
+ const void* elem) /* in: element to add (can be NULL) */
+{
+ void* last;
+
+ if (vec->used >= vec->total) {
+ ib_vector_resize(vec);
+ }
+
+ last = (byte*) vec->data + IB_VEC_OFFSET(vec, vec->used);
+
+#ifdef UNIV_DEBUG
+ memset(last, 0, vec->sizeof_value);
+#endif
+
+ if (elem) {
+ memcpy(last, elem, vec->sizeof_value);
+ }
+
+ ++vec->used;
+
+ return(last);
+}
+
+/*******************************************************************//**
+Remove an element to the vector
+@return pointer to the "removed" element */
+UNIV_INLINE
+void*
+ib_vector_remove(
+/*=============*/
+ ib_vector_t* vec, /*!< in: vector */
+ const void* elem) /*!< in: value to remove */
+{
+ void* current = NULL;
+ void* next;
+ ulint i;
+ ulint old_used_count = vec->used;
+
+ for (i = 0; i < vec->used; i++) {
+ current = ib_vector_get(vec, i);
+
+ if (*(void**) current == elem) {
+ if (i == vec->used - 1) {
+ return(ib_vector_pop(vec));
+ }
+
+ next = ib_vector_get(vec, i + 1);
+ memmove(current, next, vec->sizeof_value
+ * (vec->used - i - 1));
+ --vec->used;
+ break;
+ }
+ }
+
+ return((old_used_count != vec->used) ? current : NULL);
+}
+
+/********************************************************************
+Sort the vector elements. */
+UNIV_INLINE
+void
+ib_vector_sort(
+/*===========*/
+ /* out: void */
+ ib_vector_t* vec, /* in: vector */
+ ib_compare_t compare)/* in: the comparator to use for sort */
+{
+ qsort(vec->data, vec->used, vec->sizeof_value, compare);
+}
+
+/********************************************************************
+Destroy the vector. Make sure the vector owns the allocator, e.g.,
+the heap in the the heap allocator. */
UNIV_INLINE
void
ib_vector_free(
/*===========*/
- ib_vector_t* vec) /*!< in, own: vector */
+ ib_vector_t* vec) /* in, own: vector */
{
- mem_heap_free(vec->heap);
+ /* Currently we only support two types of allocators, heap
+ and ut_malloc(), when the heap is freed all the elements are
+ freed too. With ut allocator, we need to free the elements,
+ the vector instance and the allocator separately. */
+
+ /* Only the heap allocator uses the arg field. */
+ if (vec->allocator->arg) {
+ mem_heap_free((mem_heap_t*) vec->allocator->arg);
+ } else {
+ ib_alloc_t* allocator;
+
+ allocator = vec->allocator;
+
+ allocator->mem_release(allocator, vec->data);
+ allocator->mem_release(allocator, vec);
+
+ ib_ut_allocator_free(allocator);
+ }
}
-/****************************************************************//**
+/********************************************************************
Test whether a vector is empty or not.
-@return TRUE if empty */
+@return TRUE if empty */
UNIV_INLINE
ibool
ib_vector_is_empty(
diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h
index aedcc2b435d..33385ddf2d4 100644
--- a/storage/xtradb/include/ut0wqueue.h
+++ b/storage/xtradb/include/ut0wqueue.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -37,7 +37,7 @@ processing.
#include "os0sync.h"
#include "sync0types.h"
-typedef struct ib_wqueue_struct ib_wqueue_t;
+struct ib_wqueue_t;
/****************************************************************//**
Create a new work queue.
@@ -66,6 +66,16 @@ ib_wqueue_add(
mem_heap_t* heap); /*!< in: memory heap to use for allocating the
list node */
+/********************************************************************
+Check if queue is empty. */
+
+ibool
+ib_wqueue_is_empty(
+/*===============*/
+ /* out: TRUE if queue empty
+ else FALSE */
+ const ib_wqueue_t* wq); /* in: work queue */
+
/****************************************************************//**
Wait for a work item to appear in the queue.
@return work item */
@@ -75,9 +85,19 @@ ib_wqueue_wait(
/*===========*/
ib_wqueue_t* wq); /*!< in: work queue */
+/********************************************************************
+Wait for a work item to appear in the queue for specified time. */
+
+void*
+ib_wqueue_timedwait(
+/*================*/
+ /* out: work item or NULL on timeout*/
+ ib_wqueue_t* wq, /* in: work queue */
+ ib_time_t wait_in_usecs); /* in: wait time in micro seconds */
+
/* Work queue. */
-struct ib_wqueue_struct {
- mutex_t mutex; /*!< mutex protecting everything */
+struct ib_wqueue_t {
+ ib_mutex_t mutex; /*!< mutex protecting everything */
ib_list_t* items; /*!< work item list */
os_event_t event; /*!< event we use to signal additions to list */
};