summaryrefslogtreecommitdiff
path: root/innobase/fil
diff options
context:
space:
mode:
authorunknown <heikki@hundin.mysql.fi>2003-10-07 17:28:59 +0300
committerunknown <heikki@hundin.mysql.fi>2003-10-07 17:28:59 +0300
commitd1485aad0eb79559902b1af26502b7cc53f8e95a (patch)
treed53a5cc4e9736e149276ba08d53a7dd09a14b4fd /innobase/fil
parentd1ab51eb947623f45a8314cb9c0b6ddc15f0d148 (diff)
downloadmariadb-git-d1485aad0eb79559902b1af26502b7cc53f8e95a.tar.gz
Many files:
Multiple tablespaces for InnoDB sql_table.cc: Tell explicitly that InnoDB should retrieve all columns in CHECKSUM TABLE sql_update.cc, sql_select.cc, my_base.h: More descriptive flag name HA_EXTRA_RETRIEVE_ALL_COLS include/my_base.h: More descriptive flag name HA_EXTRA_RETRIEVE_ALL_COLS sql/sql_select.cc: More descriptive flag name HA_EXTRA_RETRIEVE_ALL_COLS sql/sql_update.cc: More descriptive flag name HA_EXTRA_RETRIEVE_ALL_COLS sql/sql_table.cc: Tell explicitly that InnoDB should retrieve all columns in CHECKSUM TABLE sql/sql_db.cc: Multiple tablespaces for InnoDB sql/ha_innodb.cc: Multiple tablespaces for InnoDB sql/mysqld.cc: Multiple tablespaces for InnoDB sql/set_var.cc: Multiple tablespaces for InnoDB sql/sql_cache.cc: Multiple tablespaces for InnoDB sql/ha_innodb.h: Multiple tablespaces for InnoDB innobase/include/btr0btr.ic: Multiple tablespaces for InnoDB innobase/include/btr0pcur.ic: Multiple tablespaces for InnoDB innobase/include/data0type.ic: Multiple tablespaces for InnoDB innobase/include/dyn0dyn.ic: Multiple tablespaces for InnoDB innobase/include/fut0lst.ic: Multiple tablespaces for InnoDB innobase/include/log0log.ic: Multiple tablespaces for InnoDB innobase/include/mach0data.ic: Multiple tablespaces for InnoDB innobase/include/mtr0log.ic: Multiple tablespaces for InnoDB innobase/include/rem0rec.ic: Multiple tablespaces for InnoDB innobase/include/ut0byte.ic: Multiple tablespaces for InnoDB innobase/include/ut0ut.ic: Multiple tablespaces for InnoDB innobase/include/buf0buf.h: Multiple tablespaces for InnoDB innobase/include/buf0lru.h: Multiple tablespaces for InnoDB innobase/include/buf0rea.h: Multiple tablespaces for InnoDB innobase/include/data0type.h: Multiple tablespaces for InnoDB innobase/include/db0err.h: Multiple tablespaces for InnoDB innobase/include/dict0boot.h: Multiple tablespaces for InnoDB innobase/include/dict0dict.h: Multiple tablespaces for InnoDB innobase/include/dict0load.h: Multiple tablespaces for InnoDB innobase/include/dict0mem.h: Multiple tablespaces for InnoDB innobase/include/fil0fil.h: Multiple tablespaces for InnoDB innobase/include/fsp0fsp.h: Multiple tablespaces for InnoDB innobase/include/ibuf0ibuf.h: Multiple tablespaces for InnoDB innobase/include/lock0lock.h: Multiple tablespaces for InnoDB innobase/include/log0log.h: Multiple tablespaces for InnoDB innobase/include/log0recv.h: Multiple tablespaces for InnoDB innobase/include/os0file.h: Multiple tablespaces for InnoDB innobase/include/page0page.h: Multiple tablespaces for InnoDB innobase/include/que0types.h: Multiple tablespaces for InnoDB innobase/include/rem0rec.h: Multiple tablespaces for InnoDB innobase/include/srv0srv.h: Multiple tablespaces for InnoDB innobase/include/srv0start.h: Multiple tablespaces for InnoDB innobase/include/sync0sync.h: Multiple tablespaces for InnoDB innobase/include/trx0sys.h: Multiple tablespaces for InnoDB innobase/include/ut0byte.h: Multiple tablespaces for InnoDB innobase/include/univ.i: Multiple tablespaces for InnoDB innobase/btr/btr0cur.c: Multiple tablespaces for InnoDB innobase/btr/btr0sea.c: Multiple tablespaces for InnoDB innobase/buf/buf0buf.c: Multiple tablespaces for InnoDB innobase/buf/buf0flu.c: Multiple tablespaces for InnoDB innobase/buf/buf0lru.c: Multiple tablespaces for InnoDB innobase/buf/buf0rea.c: Multiple tablespaces for InnoDB innobase/data/data0type.c: Multiple tablespaces for InnoDB innobase/dict/dict0boot.c: Multiple tablespaces for InnoDB innobase/dict/dict0crea.c: Multiple tablespaces for InnoDB innobase/dict/dict0dict.c: Multiple tablespaces for InnoDB innobase/dict/dict0load.c: Multiple tablespaces for InnoDB innobase/dict/dict0mem.c: Multiple tablespaces for InnoDB innobase/fil/fil0fil.c: Multiple tablespaces for InnoDB innobase/fsp/fsp0fsp.c: Multiple tablespaces for InnoDB innobase/ha/ha0ha.c: Multiple tablespaces for InnoDB innobase/ibuf/ibuf0ibuf.c: Multiple tablespaces for InnoDB innobase/log/log0log.c: Multiple tablespaces for InnoDB innobase/log/log0recv.c: Multiple tablespaces for InnoDB innobase/mach/mach0data.c: Multiple tablespaces for InnoDB innobase/mem/mem0dbg.c: Multiple tablespaces for InnoDB innobase/mem/mem0pool.c: Multiple tablespaces for InnoDB innobase/mtr/mtr0log.c: Multiple tablespaces for InnoDB innobase/os/os0file.c: Multiple tablespaces for InnoDB innobase/os/os0proc.c: Multiple tablespaces for InnoDB innobase/page/page0cur.c: Multiple tablespaces for InnoDB innobase/que/que0que.c: Multiple tablespaces for InnoDB innobase/row/row0ins.c: Multiple tablespaces for InnoDB innobase/row/row0mysql.c: Multiple tablespaces for InnoDB innobase/row/row0sel.c: Multiple tablespaces for InnoDB innobase/row/row0upd.c: Multiple tablespaces for InnoDB innobase/srv/srv0srv.c: Multiple tablespaces for InnoDB innobase/srv/srv0start.c: Multiple tablespaces for InnoDB innobase/sync/sync0rw.c: Multiple tablespaces for InnoDB innobase/sync/sync0sync.c: Multiple tablespaces for InnoDB innobase/trx/trx0sys.c: Multiple tablespaces for InnoDB innobase/trx/trx0trx.c: Multiple tablespaces for InnoDB innobase/trx/trx0undo.c: Multiple tablespaces for InnoDB innobase/ut/ut0byte.c: Multiple tablespaces for InnoDB innobase/ut/ut0ut.c: Multiple tablespaces for InnoDB
Diffstat (limited to 'innobase/fil')
-rw-r--r--innobase/fil/fil0fil.c2702
1 files changed, 2247 insertions, 455 deletions
diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
index f55df90846c..2b0138ccb5a 100644
--- a/innobase/fil/fil0fil.c
+++ b/innobase/fil/fil0fil.c
@@ -1,5 +1,5 @@
/******************************************************
-The low-level file system
+The tablespace memory cache
(c) 1995 Innobase Oy
@@ -16,16 +16,19 @@ Created 10/25/1995 Heikki Tuuri
#include "mach0data.h"
#include "ibuf0ibuf.h"
#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
#include "log0log.h"
#include "log0recv.h"
#include "fsp0fsp.h"
#include "srv0srv.h"
+#include "srv0start.h"
/*
- IMPLEMENTATION OF THE LOW-LEVEL FILE SYSTEM
- ===========================================
+ IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
+ =============================================
-The file system is responsible for providing fast read/write access to
+The tablespace cache is responsible for providing fast read/write access to
tablespaces and logs of the database. File creation and deletion is done
in other modules which know more of the logic of the operation, however.
@@ -83,20 +86,31 @@ ulint fil_n_pending_tablespace_flushes = 0;
/* Null file address */
fil_addr_t fil_addr_null = {FIL_NULL, 0};
-/* File system file node data structure */
+/* File node of a tablespace or the log data space */
typedef struct fil_node_struct fil_node_t;
struct fil_node_struct {
- char* name; /* the file name or path */
+ fil_space_t* space; /* backpointer to the space where this node
+ belongs */
+ char* name; /* path to the file */
ibool open; /* TRUE if file open */
os_file_t handle; /* OS handle to the file, if file open */
- ulint size; /* size of the file in database pages
- (where the possible last incomplete megabyte
- is ignored) */
+ ibool is_raw_disk;/* TRUE if the 'file' is actually a raw
+ device or a raw disk partition */
+ ulint size; /* size of the file in database pages, 0 if
+ not known yet; the possible last incomplete
+ megabyte is ignored if space == 0 */
ulint n_pending;
- /* count of pending i/o-ops on this file */
- ibool is_modified; /* this is set to TRUE when we write
- to the file and FALSE when we call fil_flush
- for this file space */
+ /* count of pending i/o's on this file;
+ closing of the file is not allowed if
+ this is > 0 */
+ ulint n_pending_flushes;
+ /* count of pending flushes on this file;
+ closing of the file is not allowed if
+ this is > 0 */
+ ib_longlong modification_counter;/* when we write to the file we
+ increment this by one */
+ ib_longlong flush_counter;/* up to what modification_counter value
+ we have flushed the modifications to disk */
UT_LIST_NODE_T(fil_node_t) chain;
/* link field for the file chain */
UT_LIST_NODE_T(fil_node_t) LRU;
@@ -106,19 +120,52 @@ struct fil_node_struct {
#define FIL_NODE_MAGIC_N 89389
-/* File system tablespace or log data structure: let us call them by a common
-name space */
+/* Tablespace or log data space: let us call them by a common name space */
struct fil_space_struct {
- char* name; /* space name */
+ char* name; /* space name = the path to the first file in
+ it */
ulint id; /* space id */
+ ib_longlong tablespace_version;
+ /* in DISCARD/IMPORT this timestamp is used to
+ check if we should ignore an insert buffer
+ merge request for a page because it actually
+ was for the previous incarnation of the
+ space */
+ ibool mark; /* this is set to TRUE at database startup if
+ the space corresponds to a table in the InnoDB
+ data dictionary; so we can print a warning of
+ orphaned tablespaces */
+ ibool stop_ios;/* TRUE if we want to rename the .ibd file of
+ tablespace and want to stop temporarily
+ posting of new i/o requests on the file */
+ ibool stop_ibuf_merges;
+ /* we set this TRUE when we start deleting a
+ single-table tablespace */
+ ibool is_being_deleted;
+ /* this is set to TRUE when we start
+ deleting a single-table tablespace and its
+ file; when this flag is set no further i/o
+ or flush requests can be placed on this space,
+ though there may be such requests still being
+ processed on this space */
ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
UT_LIST_BASE_NODE_T(fil_node_t) chain;
/* base node for the file chain */
- ulint size; /* space size in pages */
+ ulint size; /* space size in pages; 0 if a single-table
+ tablespace whose size we do not know yet */
ulint n_reserved_extents;
/* number of reserved free extents for
ongoing operations like B-tree page split */
+ ulint n_pending_flushes; /* this is > 0 when flushing
+ the tablespace to disk; dropping of the
+ tablespace is forbidden if this is > 0 */
+ ulint n_pending_ibuf_merges;/* this is > 0 when merging
+ insert buffer entries to a page so that we
+ may need to access the ibuf bitmap page in the
+ tablespade: dropping of the tablespace is
+ forbidden if this is > 0 */
hash_node_t hash; /* hash chain node */
+ hash_node_t name_hash;/* hash chain the name_hash table */
rw_lock_t latch; /* latch protecting the file space storage
allocation */
UT_LIST_NODE_T(fil_space_t) space_list;
@@ -130,80 +177,115 @@ struct fil_space_struct {
#define FIL_SPACE_MAGIC_N 89472
-/* The file system data structure */
+/* The tablespace memory cache; also the totality of logs = the log data space,
+is stored here; below we talk about tablespaces, but also the ib_logfiles
+form a 'space' and it is handled here */
typedef struct fil_system_struct fil_system_t;
struct fil_system_struct {
- mutex_t mutex; /* The mutex protecting the system */
+ mutex_t mutex; /* The mutex protecting the cache */
hash_table_t* spaces; /* The hash table of spaces in the
- system */
+ system; they are hashed on the space
+ id */
+ hash_table_t* name_hash; /* hash table based on the space
+ name */
UT_LIST_BASE_NODE_T(fil_node_t) LRU;
/* base node for the LRU list of the
- most recently used open files */
- ulint n_open_pending; /* current number of open files with
- pending i/o-ops on them */
- ulint max_n_open; /* maximum allowed open files */
- os_event_t can_open; /* this event is set to the signaled
- state when the system is capable of
- opening a new file, i.e.,
- n_open_pending < max_n_open */
+ most recently used open files with no
+ pending i/o's; if we start an i/o on
+ the file, we first remove it from this
+ list, and return it to the start of
+ the list when the i/o ends;
+ log files and the system tablespace are
+ not put to this list: they are opened
+ after the startup, and kept open until
+ shutdown */
+ ulint n_open; /* number of files currently open */
+ ulint max_n_open; /* n_open is not allowed to exceed
+ this */
+ ib_longlong modification_counter;/* when we write to a file we
+ increment this by one */
+ ulint max_assigned_id;/* maximum space id in the existing
+ tables, or assigned during the time
+ mysqld has been up; at an InnoDB
+ startup we scan the data dictionary
+ and set here the maximum of the
+ space id's of the tables there */
+ ib_longlong tablespace_version;
+ /* a counter which is incremented for
+ every space object memory creation;
+ every space mem object gets a
+ 'timestamp' from this; in DISCARD/
+ IMPORT this is used to check if we
+ should ignore an insert buffer merge
+ request */
UT_LIST_BASE_NODE_T(fil_space_t) space_list;
/* list of all file spaces */
};
-/* The file system. This variable is NULL before the module is initialized. */
+/* The tablespace memory cache. This variable is NULL before the module is
+initialized. */
fil_system_t* fil_system = NULL;
-/* The file system hash table size */
-#define FIL_SYSTEM_HASH_SIZE 500
+/* The tablespace memory cache hash table size */
+#define FIL_SYSTEM_HASH_SIZE 50 /* TODO: make bigger! */
-/***********************************************************************
-Reserves a right to open a single file. The right must be released with
-fil_release_right_to_open. */
+/************************************************************************
+NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
+Prepares a file node for i/o. Opens the file if it is closed. Updates the
+pending i/o's field in the node and the system appropriately. Takes the node
+off the LRU list if it is in the LRU list. The caller must hold the fil_sys
+mutex. */
+static
void
-fil_reserve_right_to_open(void)
-/*===========================*/
-{
-loop:
- mutex_enter(&(fil_system->mutex));
-
- if (fil_system->n_open_pending == fil_system->max_n_open) {
-
- /* It is not sure we can open the file if it is closed: wait */
-
- os_event_reset(fil_system->can_open);
-
- mutex_exit(&(fil_system->mutex));
+fil_node_prepare_for_io(
+/*====================*/
+ fil_node_t* node, /* in: file node */
+ fil_system_t* system, /* in: tablespace memory cache */
+ fil_space_t* space); /* in: space */
+/************************************************************************
+Updates the data structures when an i/o operation finishes. Updates the
+pending i/o's field in the node appropriately. */
+static
+void
+fil_node_complete_io(
+/*=================*/
+ fil_node_t* node, /* in: file node */
+ fil_system_t* system, /* in: tablespace memory cache */
+ ulint type); /* in: OS_FILE_WRITE or OS_FILE_READ; marks
+ the node as modified if
+ type == OS_FILE_WRITE */
- os_event_wait(fil_system->can_open);
- goto loop;
- }
+/***********************************************************************
+Returns the version number of a tablespace, -1 if not found. */
- fil_system->max_n_open--;
+ib_longlong
+fil_space_get_version(
+/*==================*/
+ /* out: version number, -1 if the tablespace does not
+ exist in the memory cache */
+ ulint id) /* in: space id */
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+ ib_longlong version = -1;
- mutex_exit(&(fil_system->mutex));
-}
+ ut_ad(system);
-/***********************************************************************
-Releases a right to open a single file. */
+ mutex_enter(&(system->mutex));
-void
-fil_release_right_to_open(void)
-/*===========================*/
-{
- mutex_enter(&(fil_system->mutex));
-
- if (fil_system->n_open_pending == fil_system->max_n_open) {
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- os_event_set(fil_system->can_open);
+ if (space) {
+ version = space->tablespace_version;
}
- fil_system->max_n_open++;
+ mutex_exit(&(system->mutex));
- mutex_exit(&(fil_system->mutex));
+ return(version);
}
/***********************************************************************
@@ -215,8 +297,8 @@ fil_space_get_latch(
/* out: latch protecting storage allocation */
ulint id) /* in: space id */
{
- fil_space_t* space;
fil_system_t* system = fil_system;
+ fil_space_t* space;
ut_ad(system);
@@ -224,6 +306,8 @@ fil_space_get_latch(
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ ut_a(space);
+
mutex_exit(&(system->mutex));
return(&(space->latch));
@@ -238,8 +322,8 @@ fil_space_get_type(
/* out: FIL_TABLESPACE or FIL_LOG */
ulint id) /* in: space id */
{
- fil_space_t* space;
fil_system_t* system = fil_system;
+ fil_space_t* space;
ut_ad(system);
@@ -247,6 +331,8 @@ fil_space_get_type(
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ ut_a(space);
+
mutex_exit(&(system->mutex));
return(space->purpose);
@@ -261,17 +347,21 @@ fil_space_get_ibuf_data(
/* out: ibuf data for this space */
ulint id) /* in: space id */
{
+ fil_system_t* system = fil_system;
fil_space_t* space;
- fil_system_t* system = fil_system;
ut_ad(system);
+ ut_a(id == 0);
+
mutex_enter(&(system->mutex));
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
mutex_exit(&(system->mutex));
+ ut_a(space);
+
return(space->ibuf_data);
}
@@ -284,16 +374,16 @@ fil_node_create(
char* name, /* in: file name (file must be closed) */
ulint size, /* in: file size in database blocks, rounded downwards
to an integer */
- ulint id) /* in: space id where to append */
+ ulint id, /* in: space id where to append */
+ ibool is_raw) /* in: TRUE if a raw device or a raw disk partition */
{
+ fil_system_t* system = fil_system;
fil_node_t* node;
fil_space_t* space;
char* name2;
- fil_system_t* system = fil_system;
ut_a(system);
ut_a(name);
- ut_a(size > 0);
mutex_enter(&(system->mutex));
@@ -305,29 +395,119 @@ fil_node_create(
node->name = name2;
node->open = FALSE;
+
+ ut_a(!is_raw || srv_start_raw_disk_in_use);
+
+ node->is_raw_disk = is_raw;
node->size = size;
node->magic_n = FIL_NODE_MAGIC_N;
node->n_pending = 0;
+ node->n_pending_flushes = 0;
- node->is_modified = FALSE;
+ node->modification_counter = 0;
+ node->flush_counter = 0;
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ if (!space) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: Could not find tablespace %lu for\n"
+"InnoDB: file %s from the tablespace memory cache.\n", id, name);
+ mem_free(name2);
+
+ mem_free(node);
+
+ mutex_exit(&(system->mutex));
+
+ return;
+ }
+
space->size += size;
+ node->space = space;
+
UT_LIST_ADD_LAST(chain, space->chain, node);
mutex_exit(&(system->mutex));
}
+/************************************************************************
+Opens a the file of a node of a tablespace. The caller must own the fil_system
+mutex. */
+static
+void
+fil_node_open_file(
+/*===============*/
+ fil_node_t* node, /* in: file node */
+ fil_system_t* system, /* in: tablespace memory cache */
+ fil_space_t* space) /* in: space */
+{
+ ib_longlong size_bytes;
+ ulint size_low;
+ ulint size_high;
+ ibool ret;
+
+ ut_ad(mutex_own(&(system->mutex)));
+
+ ut_a(node->n_pending == 0);
+ ut_a(node->open == FALSE);
+
+ /* printf("Opening file %s\n", node->name); */
+
+ if (space->purpose == FIL_LOG) {
+ node->handle = os_file_create(node->name, OS_FILE_OPEN,
+ OS_FILE_AIO, OS_LOG_FILE, &ret);
+ } else if (node->is_raw_disk) {
+ node->handle = os_file_create(node->name,
+ OS_FILE_OPEN_RAW,
+ OS_FILE_AIO, OS_DATA_FILE, &ret);
+ } else {
+ node->handle = os_file_create(node->name, OS_FILE_OPEN,
+ OS_FILE_AIO, OS_DATA_FILE, &ret);
+ }
+
+ ut_a(ret);
+
+ node->open = TRUE;
+
+ system->n_open++;
+
+ if (node->size == 0) {
+ /* It must be a single-table tablespace and we do not know the
+ size of the file yet */
+
+ ut_a(space->id != 0);
+
+ os_file_get_size(node->handle, &size_low, &size_high);
+
+ size_bytes = (((ib_longlong)size_high) << 32)
+ + (ib_longlong)size_low;
+
+ if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
+ node->size = (ulint) ((size_bytes / (1024 * 1024))
+ * ((1024 * 1024) / UNIV_PAGE_SIZE));
+ } else {
+ node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+ }
+
+ space->size = node->size;
+ }
+
+ if (space->purpose == FIL_TABLESPACE && space->id != 0) {
+ /* Put the node to the LRU list */
+ UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+ }
+}
+
/**************************************************************************
Closes a file. */
static
void
-fil_node_close(
-/*===========*/
+fil_node_close_file(
+/*================*/
fil_node_t* node, /* in: file node */
- fil_system_t* system) /* in: file system */
+ fil_system_t* system) /* in: tablespace memory cache */
{
ibool ret;
@@ -335,32 +515,214 @@ fil_node_close(
ut_ad(mutex_own(&(system->mutex)));
ut_a(node->open);
ut_a(node->n_pending == 0);
+ ut_a(node->n_pending_flushes == 0);
ret = os_file_close(node->handle);
ut_a(ret);
+ /* printf("Closing file %s\n", node->name); */
+
node->open = FALSE;
+ ut_a(system->n_open > 0);
+ system->n_open--;
- /* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(LRU, system->LRU, node);
+ if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
+ ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+
+ /* The node is in the LRU list, remove it */
+ UT_LIST_REMOVE(LRU, system->LRU, node);
+ }
+}
+
+/************************************************************************
+Tries to close a file in the LRU list. The caller must hold the fil_sys
+mutex. */
+static
+ibool
+fil_try_to_close_file_in_LRU(
+/*=========================*/
+ /* out: TRUE if success, FALSE if should retry
+ later; since i/o's generally complete in <
+ 100 ms, and as InnoDB writes at most 128 pages
+ from the buffer pool in a batch, and then
+ immediately flushes the files, there is a good
+ chance that the next time we find a suitable
+ node from the LRU list */
+ ibool print_info) /* in: if TRUE, prints information why it
+ cannot close a file */
+{
+ fil_system_t* system = fil_system;
+ fil_node_t* node;
+
+ ut_ad(mutex_own(&(system->mutex)));
+
+ node = UT_LIST_GET_LAST(system->LRU);
+
+ if (print_info) {
+ fprintf(stderr,
+"InnoDB: fil_sys open file LRU len %lu\n", UT_LIST_GET_LEN(system->LRU));
+ }
+
+ while (node != NULL) {
+ if (node->modification_counter == node->flush_counter
+ && node->n_pending_flushes == 0) {
+
+ fil_node_close_file(node, system);
+
+ return(TRUE);
+ }
+
+ if (print_info && node->n_pending_flushes > 0) {
+ fprintf(stderr,
+"InnoDB: cannot close file %s, because n_pending_flushes %lu\n", node->name,
+ node->n_pending_flushes);
+ }
+
+ if (print_info
+ && node->modification_counter != node->flush_counter) {
+ fprintf(stderr,
+"InnoDB: cannot close file %s, because mod_count %lld != fl_count %lld\n",
+ node->name, node->modification_counter,
+ node->flush_counter);
+ }
+
+ node = UT_LIST_GET_PREV(LRU, node);
+ }
+
+ return(FALSE);
}
/***********************************************************************
-Frees a file node object from a file system. */
+Reserves the fil_system mutex and tries to make sure we can open at least one
+file while holding it. This should be called before calling
+fil_node_prepare_for_io(), because that function may need to open a file. */
+static
+void
+fil_mutex_enter_and_prepare_for_io(
+/*===============================*/
+ ulint space_id) /* in: space id */
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+ ibool success;
+ ibool print_info = FALSE;
+ ulint count = 0;
+ ulint count2 = 0;
+
+ ut_ad(!mutex_own(&(system->mutex)));
+retry:
+ mutex_enter(&(system->mutex));
+
+ if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
+ /* We keep log files and system tablespace files always open;
+ this is important in preventing deadlocks in this module, as
+ a page read completion often performs another read from the
+ insert buffer. The insert buffer is in tablespace 0, and we
+ cannot end up waiting in this function. */
+
+ return;
+ }
+
+ if (system->n_open < system->max_n_open) {
+
+ return;
+ }
+
+ HASH_SEARCH(hash, system->spaces, space_id, space,
+ space->id == space_id);
+ if (space != NULL && space->stop_ios) {
+ /* We are going to do a rename file and want to stop new i/o's
+ for a while */
+
+ if (count2 > 20000) {
+ fprintf(stderr,
+"InnoDB: Warning: tablespace %s has i/o ops stopped for a long time %lu\n",
+ space->name, count2);
+ }
+
+ mutex_exit(&(system->mutex));
+
+ os_thread_sleep(20000);
+
+ count2++;
+
+ goto retry;
+ }
+
+ /* If the file is already open, no need to do anything; if the space
+ does not exist, we handle the situation in the function which called
+ this function */
+
+ if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
+
+ return;
+ }
+
+ if (count > 1) {
+ print_info = TRUE;
+ }
+
+ /* Too many files are open, try to close some */
+close_more:
+ success = fil_try_to_close_file_in_LRU(print_info);
+
+ if (success && system->n_open >= system->max_n_open) {
+
+ goto close_more;
+ }
+
+ if (system->n_open < system->max_n_open) {
+ /* Ok */
+
+ return;
+ }
+
+ if (count >= 2) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Warning: too many (%lu) files stay open while the maximum\n"
+"InnoDB: allowed value would be %lu.\n"
+"InnoDB: You may need to raise the value of innodb_max_files_open in\n"
+"InnoDB: my.cnf.\n", system->n_open, system->max_n_open);
+
+ return;
+ }
+
+ mutex_exit(&(system->mutex));
+
+ /* Wake the i/o-handler threads to make sure pending i/o's are
+ performed */
+ os_aio_simulated_wake_handler_threads();
+
+ os_thread_sleep(20000);
+
+ /* Flush tablespaces so that we can close modified files in the LRU
+ list */
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ count++;
+
+ goto retry;
+}
+
+/***********************************************************************
+Frees a file node object from a tablespace memory cache. */
static
void
fil_node_free(
/*==========*/
fil_node_t* node, /* in, own: file node */
- fil_system_t* system, /* in: file system */
+ fil_system_t* system, /* in: tablespace memory cache */
fil_space_t* space) /* in: space where the file node is chained */
{
ut_ad(node && system && space);
ut_ad(mutex_own(&(system->mutex)));
ut_a(node->magic_n == FIL_NODE_MAGIC_N);
+ ut_a(node->n_pending == 0);
if (node->open) {
- fil_node_close(node, system);
+ fil_node_close_file(node, system);
}
space->size -= node->size;
@@ -383,9 +745,9 @@ fil_space_truncate_start(
if this does not equal to the combined size of
some initial files in the space */
{
+ fil_system_t* system = fil_system;
fil_node_t* node;
fil_space_t* space;
- fil_system_t* system = fil_system;
mutex_enter(&(system->mutex));
@@ -394,7 +756,6 @@ fil_space_truncate_start(
ut_a(space);
while (trunc_len > 0) {
-
node = UT_LIST_GET_FIRST(space->chain);
ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
@@ -405,17 +766,323 @@ fil_space_truncate_start(
}
mutex_exit(&(system->mutex));
-}
+}
+
+/***********************************************************************
+Creates a space memory object and puts it to the tablespace memory cache. If
+there is an error, prints an error message to the .err log. */
+
+ibool
+fil_space_create(
+/*=============*/
+ /* out: TRUE if success */
+ char* name, /* in: space name */
+ ulint id, /* in: space id */
+ ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+ char* name2;
+ ulint namesake_id;
+try_again:
+ /*printf(
+ "InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
+ purpose);*/
+
+ ut_a(system);
+ ut_a(name);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space,
+ 0 == strcmp(name, space->name));
+ if (space != NULL) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Warning: trying to init to the tablespace memory cache\n"
+"InnoDB: a tablespace %lu of name %s,\n"
+"InnoDB: but a tablespace %lu of the same name %s\n"
+"InnoDB: already exists in the tablespace memory cache!\n",
+ id, name, space->id, space->name);
+
+ if (id == 0 || purpose != FIL_TABLESPACE) {
+
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+ }
+
+ fprintf(stderr,
+"InnoDB: We assume that InnoDB did a crash recovery, and you had\n"
+"InnoDB: an .ibd file for which the table did not exist in the\n"
+"InnoDB: InnoDB internal data dictionary in the ibdata files.\n"
+"InnoDB: We assume that you later removed the .ibd and .frm files,\n"
+"InnoDB: and are now trying to recreate the table. We now remove the\n"
+"InnoDB: conflicting tablespace object from the memory cache and try\n"
+"InnoDB: the init again.\n");
+
+ namesake_id = space->id;
+
+ mutex_exit(&(system->mutex));
+
+ fil_space_free(namesake_id);
+
+ goto try_again;
+ }
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ if (space != NULL) {
+ fprintf(stderr,
+"InnoDB: Error: trying to add tablespace %lu of name %s\n"
+"InnoDB: to the tablespace memory cache, but tablespace\n"
+"InnoDB: %lu of name %s already exists in the tablespace\n"
+"InnoDB: memory cache!\n", id, name, space->id, space->name);
+
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+ }
+
+ space = mem_alloc(sizeof(fil_space_t));
+
+ name2 = mem_alloc(ut_strlen(name) + 1);
+
+ ut_strcpy(name2, name);
+
+ space->name = name2;
+ space->id = id;
+
+ system->tablespace_version++;
+ space->tablespace_version =
+ system->tablespace_version;
+ space->mark = FALSE;
+
+ if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) {
+ system->max_assigned_id = id;
+ }
+
+ space->stop_ios = FALSE;
+ space->stop_ibuf_merges = FALSE;
+ space->is_being_deleted = FALSE;
+ space->purpose = purpose;
+ space->size = 0;
+
+ space->n_reserved_extents = 0;
+
+ space->n_pending_flushes = 0;
+ space->n_pending_ibuf_merges = 0;
+
+ UT_LIST_INIT(space->chain);
+ space->magic_n = FIL_SPACE_MAGIC_N;
+
+ space->ibuf_data = NULL;
+
+ rw_lock_create(&(space->latch));
+ rw_lock_set_level(&(space->latch), SYNC_FSP);
+
+ HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
+
+ HASH_INSERT(fil_space_t, name_hash, system->name_hash,
+ ut_fold_string(name), space);
+ UT_LIST_ADD_LAST(space_list, system->space_list, space);
+
+ mutex_exit(&(system->mutex));
+
+ return(TRUE);
+}
+
+/***********************************************************************
+Assigns a new space id for a new single-table tablespace. This works simply by
+incrementing the global counter. If 4 billion id's is not enough, we may need
+to recycle id's. */
+static
+ulint
+fil_assign_new_space_id(void)
+/*=========================*/
+ /* out: new tablespace id; ULINT_UNDEFINED if could
+ not assign an id */
+{
+ fil_system_t* system = fil_system;
+ ulint id;
+
+ mutex_enter(&(system->mutex));
+
+ system->max_assigned_id++;
+
+ id = system->max_assigned_id;
+
+ if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+"InnoDB: Warning: you are running out of new single-table tablespace id's.\n"
+"InnoDB: Current counter is %lu and it must not exceed %lu!\n"
+"InnoDB: To reset the counter to zero you have to dump all your tables and\n"
+"InnoDB: recreate the whole InnoDB installation.\n", id,
+ SRV_LOG_SPACE_FIRST_ID);
+ }
+
+ if (id >= SRV_LOG_SPACE_FIRST_ID) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+"InnoDB: You have run out of single-table tablespace id's!\n"
+"InnoDB: Current counter is %lu.\n"
+"InnoDB: To reset the counter to zero you have to dump all your tables and\n"
+"InnoDB: recreate the whole InnoDB installation.\n", id);
+ system->max_assigned_id--;
+
+ id = ULINT_UNDEFINED;
+ }
+
+ mutex_exit(&(system->mutex));
+
+ return(id);
+}
+
+/***********************************************************************
+Frees a space object from the tablespace memory cache. Closes the files in
+the chain but does not delete them. There must not be any pending i/o's or
+flushes on the files. */
+
+ibool
+fil_space_free(
+/*===========*/
+ /* out: TRUE if success */
+ ulint id) /* in: space id */
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+ fil_space_t* namespace;
+ fil_node_t* fil_node;
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ if (!space) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: trying to remove tablespace %lu from the cache but\n"
+"InnoDB: it is not there.\n", id);
+
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+ }
+
+ HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
+
+ HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name),
+ namespace, 0 == strcmp(space->name, namespace->name));
+ ut_a(namespace);
+ ut_a(space == namespace);
+
+ HASH_DELETE(fil_space_t, name_hash, system->name_hash,
+ ut_fold_string(space->name), space);
+
+ UT_LIST_REMOVE(space_list, system->space_list, space);
+
+ ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
+ ut_a(0 == space->n_pending_flushes);
+
+ fil_node = UT_LIST_GET_FIRST(space->chain);
+
+ while (fil_node != NULL) {
+ fil_node_free(fil_node, system, space);
+
+ fil_node = UT_LIST_GET_FIRST(space->chain);
+ }
+
+ ut_a(0 == UT_LIST_GET_LEN(space->chain));
+
+ mutex_exit(&(system->mutex));
+
+ rw_lock_free(&(space->latch));
+
+ mem_free(space->name);
+ mem_free(space);
+
+ return(TRUE);
+}
+
+/***********************************************************************
+Returns the size of the space in pages. The tablespace must be cached in the
+memory cache. */
+
+ulint
+fil_space_get_size(
+/*===============*/
+ /* out: space size, 0 if space not found */
+ ulint id) /* in: space id */
+{
+ fil_system_t* system = fil_system;
+ fil_node_t* node;
+ fil_space_t* space;
+ ulint size;
+
+ ut_ad(system);
+
+ fil_mutex_enter_and_prepare_for_io(id);
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ if (space == NULL) {
+ mutex_exit(&(system->mutex));
+
+ return(0);
+ }
+
+ if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
+ ut_a(id != 0);
+
+ ut_a(1 == UT_LIST_GET_LEN(space->chain));
+
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ /* It must be a single-table tablespace and we have not opened
+ the file yet; the following calls will open it and update the
+ size fields */
+
+ fil_node_prepare_for_io(node, system, space);
+ fil_node_complete_io(node, system, OS_FILE_READ);
+ }
+
+ size = space->size;
+
+ mutex_exit(&(system->mutex));
+
+ return(size);
+}
+
+/***********************************************************************
+Checks if the pair space, page_no refers to an existing page in a tablespace
+file space. The tablespace must be cached in the memory cache. */
+
+ibool
+fil_check_adress_in_tablespace(
+/*===========================*/
+ /* out: TRUE if the address is meaningful */
+ ulint id, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ if (fil_space_get_size(id) > page_no) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
/********************************************************************
-Creates a file system object. */
+Creates a the tablespace memory cache. */
static
fil_system_t*
fil_system_create(
/*==============*/
- /* out, own: file system object */
+ /* out, own: tablespace memory cache */
ulint hash_size, /* in: hash table size */
- ulint max_n_open) /* in: maximum number of open files */
+ ulint max_n_open) /* in: maximum number of open files; must be
+ > 10 */
{
fil_system_t* system;
@@ -429,12 +1096,17 @@ fil_system_create(
mutex_set_level(&(system->mutex), SYNC_ANY_LATCH);
system->spaces = hash_create(hash_size);
+ system->name_hash = hash_create(hash_size);
UT_LIST_INIT(system->LRU);
- system->n_open_pending = 0;
+ system->n_open = 0;
system->max_n_open = max_n_open;
- system->can_open = os_event_create(NULL);
+
+ system->modification_counter = 0;
+ system->max_assigned_id = 0;
+
+ system->tablespace_version = 0;
UT_LIST_INIT(system->space_list);
@@ -442,7 +1114,7 @@ fil_system_create(
}
/********************************************************************
-Initializes the file system of this module. */
+Initializes the tablespace memory cache. */
void
fil_init(
@@ -451,11 +1123,119 @@ fil_init(
{
ut_a(fil_system == NULL);
+ /*printf("Initializing the tablespace cache with max %lu open files\n",
+ max_n_open); */
fil_system = fil_system_create(FIL_SYSTEM_HASH_SIZE, max_n_open);
}
+/***********************************************************************
+Opens all log files and system tablespace data files. They stay open until the
+database server shutdown. This should be called at a server startup after the
+space objects for the log and the system tablespace have been created. The
+purpose of this operation is to make sure we never run out of file descriptors
+if we need to read from the insert buffer or to write to the log. */
+
+void
+fil_open_log_and_system_tablespace_files(void)
+/*==========================================*/
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+ fil_node_t* node;
+
+ mutex_enter(&(system->mutex));
+
+ space = UT_LIST_GET_FIRST(system->space_list);
+
+ while (space != NULL) {
+ if (space->purpose != FIL_TABLESPACE || space->id == 0) {
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ while (node != NULL) {
+ if (!node->open) {
+ fil_node_open_file(node, system,
+ space);
+ }
+ if (system->max_n_open < 10 + system->n_open) {
+ fprintf(stderr,
+"InnoDB: Warning: you must raise the value of innodb_max_open_files in\n"
+"InnoDB: my.cnf! Remember that InnoDB keeps all log files and all system\n"
+"InnoDB: tablespace files open for the whole time mysqld is running, and\n"
+"InnoDB: needs to open also some .ibd files if the file-per-table storage\n"
+"InnoDB: model is used. Current open files %lu, max allowed open files %lu.\n",
+ system->n_open, system->max_n_open);
+ }
+ node = UT_LIST_GET_NEXT(chain, node);
+ }
+ }
+ space = UT_LIST_GET_NEXT(space_list, space);
+ }
+
+ mutex_exit(&(system->mutex));
+}
+
+/***********************************************************************
+Closes all open files. There must not be any pending i/o's or not flushed
+modifications in the files. */
+
+void
+fil_close_all_files(void)
+/*=====================*/
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+ fil_node_t* node;
+
+ mutex_enter(&(system->mutex));
+
+ space = UT_LIST_GET_FIRST(system->space_list);
+
+ while (space != NULL) {
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ while (node != NULL) {
+ if (node->open) {
+ fil_node_close_file(node, system);
+ }
+ node = UT_LIST_GET_NEXT(chain, node);
+ }
+ space = UT_LIST_GET_NEXT(space_list, space);
+ }
+
+ mutex_exit(&(system->mutex));
+}
+
+/***********************************************************************
+Sets the max tablespace id counter if the given number is bigger than the
+previous value. */
+
+void
+fil_set_max_space_id_if_bigger(
+/*===========================*/
+ ulint max_id) /* in: maximum known id */
+{
+ fil_system_t* system = fil_system;
+
+ if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
+ fprintf(stderr,
+"InnoDB: Fatal error: max tablespace id is too high, %lu\n", max_id);
+ ut_a(0);
+ }
+
+ mutex_enter(&(system->mutex));
+
+ if (system->max_assigned_id < max_id) {
+
+ system->max_assigned_id = max_id;
+ }
+
+ mutex_exit(&(system->mutex));
+}
+
/********************************************************************
-Writes the flushed lsn to the header of each file space. */
+Initializes the ibuf data structure for space 0 == the system tablespace.
+This can be called after the file space headers have been created and the
+dictionary system has been initialized. */
void
fil_ibuf_init_at_db_start(void)
@@ -464,39 +1244,37 @@ fil_ibuf_init_at_db_start(void)
fil_space_t* space;
space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- if (space->purpose == FIL_TABLESPACE) {
- space->ibuf_data = ibuf_data_init_for_space(space->id);
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
+ ut_a(space);
+ ut_a(space->purpose == FIL_TABLESPACE);
+
+ space->ibuf_data = ibuf_data_init_for_space(space->id);
}
/********************************************************************
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of a data file. */
+Writes the flushed lsn and the latest archived log number to the page header
+of the first page of a data file. */
static
ulint
fil_write_lsn_and_arch_no_to_file(
/*==============================*/
ulint space_id, /* in: space number */
- ulint sum_of_sizes, /* in: combined size of previous files in space,
- in database pages */
+ ulint sum_of_sizes, /* in: combined size of previous files in
+ space, in database pages */
dulint lsn, /* in: lsn to write */
ulint arch_log_no) /* in: archived log number to write */
{
byte* buf1;
byte* buf;
+ UT_NOT_USED(arch_log_no);
+
buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
buf = ut_align(buf1, UNIV_PAGE_SIZE);
fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
- mach_write_to_4(buf + FIL_PAGE_ARCH_LOG_NO, arch_log_no);
fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
@@ -505,7 +1283,7 @@ fil_write_lsn_and_arch_no_to_file(
/********************************************************************
Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file. */
+header of the first page of each data file in the system tablespace. */
ulint
fil_write_flushed_lsn_to_data_files(
@@ -524,18 +1302,20 @@ fil_write_flushed_lsn_to_data_files(
space = UT_LIST_GET_FIRST(fil_system->space_list);
while (space) {
- if (space->purpose == FIL_TABLESPACE) {
+ /* We only write the lsn to the system tablespace
+ (space id == 0) files */
+
+ if (space->id == 0) {
+ ut_a(space->purpose == FIL_TABLESPACE);
sum_of_sizes = 0;
node = UT_LIST_GET_FIRST(space->chain);
-
while (node) {
mutex_exit(&(fil_system->mutex));
err = fil_write_lsn_and_arch_no_to_file(
- space->id,
- sum_of_sizes,
- lsn, arch_log_no);
+ space->id, sum_of_sizes,
+ lsn, arch_log_no);
if (err != DB_SUCCESS) {
return(err);
@@ -544,11 +1324,11 @@ fil_write_flushed_lsn_to_data_files(
mutex_enter(&(fil_system->mutex));
sum_of_sizes += node->size;
-
node = UT_LIST_GET_NEXT(chain, node);
}
- }
+ break; /* there is only one space with id == 0 */
+ }
space = UT_LIST_GET_NEXT(space_list, space);
}
@@ -575,8 +1355,9 @@ fil_read_flushed_lsn_and_arch_log_no(
byte* buf;
byte* buf2;
dulint flushed_lsn;
- ulint arch_log_no;
-
+ ulint arch_log_no = 0; /* since InnoDB does not archive
+ its own logs under MySQL, this
+ parameter is not relevant */
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
/* Align the memory for a possible read from a raw device */
buf = ut_align(buf2, UNIV_PAGE_SIZE);
@@ -584,7 +1365,6 @@ fil_read_flushed_lsn_and_arch_log_no(
os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
- arch_log_no = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO);
ut_free(buf2);
@@ -611,143 +1391,959 @@ fil_read_flushed_lsn_and_arch_log_no(
}
}
+/*================ SINGLE-TABLE TABLESPACES ==========================*/
+
/***********************************************************************
-Creates a space object and puts it to the file system. */
+Increments the count of pending insert buffer page merges, if space is not
+being deleted. */
-void
-fil_space_create(
-/*=============*/
- char* name, /* in: space name */
- ulint id, /* in: space id */
- ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
+ibool
+fil_inc_pending_ibuf_merges(
+/*========================*/
+ /* out: TRUE if being deleted, and ibuf merges should
+ be skipped */
+ ulint id) /* in: space id */
{
- fil_space_t* space;
- char* name2;
- fil_system_t* system = fil_system;
-
- ut_a(system);
- ut_a(name);
-
-#ifndef UNIV_BASIC_LOG_DEBUG
- /* Spaces with an odd id number are reserved to replicate spaces
- used in log debugging */
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
- ut_anp((purpose == FIL_LOG) || (id % 2 == 0));
-#endif
mutex_enter(&(system->mutex));
- space = mem_alloc(sizeof(fil_space_t));
-
- name2 = mem_alloc(ut_strlen(name) + 1);
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- ut_strcpy(name2, name);
+ if (space == NULL) {
+ fprintf(stderr,
+"InnoDB: Error: trying to do ibuf merge to a dropped tablespace %lu\n", id);
+ }
- space->name = name2;
- space->id = id;
- space->purpose = purpose;
- space->size = 0;
+ if (space == NULL || space->stop_ibuf_merges) {
+ mutex_exit(&(system->mutex));
- space->n_reserved_extents = 0;
-
- UT_LIST_INIT(space->chain);
- space->magic_n = FIL_SPACE_MAGIC_N;
+ return(TRUE);
+ }
- space->ibuf_data = NULL;
-
- rw_lock_create(&(space->latch));
- rw_lock_set_level(&(space->latch), SYNC_FSP);
-
- HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
+ space->n_pending_ibuf_merges++;
- UT_LIST_ADD_LAST(space_list, system->space_list, space);
-
mutex_exit(&(system->mutex));
+
+ return(FALSE);
}
/***********************************************************************
-Frees a space object from a file system. Closes the files in the chain
-but does not delete them. */
+Decrements the count of pending insert buffer page merges. */
void
-fil_space_free(
-/*===========*/
+fil_decr_pending_ibuf_merges(
+/*========================*/
ulint id) /* in: space id */
{
+ fil_system_t* system = fil_system;
fil_space_t* space;
- fil_node_t* fil_node;
- fil_system_t* system = fil_system;
mutex_enter(&(system->mutex));
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
+ if (space == NULL) {
+ fprintf(stderr,
+"InnoDB: Error: decrementing ibuf merge of a dropped tablespace %lu\n", id);
+ }
- UT_LIST_REMOVE(space_list, system->space_list, space);
+ if (space != NULL) {
+ space->n_pending_ibuf_merges--;
+ }
- ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
+ mutex_exit(&(system->mutex));
+}
- fil_node = UT_LIST_GET_FIRST(space->chain);
+/***********************************************************************
+Deletes a single-table tablespace. The tablespace must be cached in the
+memory cache. */
- ut_d(UT_LIST_VALIDATE(chain, fil_node_t, space->chain));
+ibool
+fil_delete_tablespace(
+/*==================*/
+ /* out: TRUE if success */
+ ulint id) /* in: space id */
+{
+ fil_system_t* system = fil_system;
+ ibool success;
+ fil_space_t* space;
+ fil_node_t* node;
+ ulint count = 0;
+ char path[OS_FILE_MAX_PATH];
- while (fil_node != NULL) {
- fil_node_free(fil_node, system, space);
+ ut_a(id != 0);
+stop_ibuf_merges:
+ mutex_enter(&(system->mutex));
- fil_node = UT_LIST_GET_FIRST(space->chain);
- }
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ if (space != NULL) {
+ space->stop_ibuf_merges = TRUE;
+
+ if (space->n_pending_ibuf_merges == 0) {
+ mutex_exit(&(system->mutex));
+
+ count = 0;
+
+ goto try_again;
+ } else {
+ if (count > 5000) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Warning: trying to delete tablespace %s,\n"
+"InnoDB: but there are %lu pending ibuf merges on it.\n"
+"InnoDB: Loop %lu.\n", space->name, space->n_pending_ibuf_merges, count);
+ }
+
+ mutex_exit(&(system->mutex));
+
+ os_thread_sleep(20000);
+ count++;
+
+ goto stop_ibuf_merges;
+ }
+ }
+
+ mutex_exit(&(system->mutex));
+ count = 0;
+
+try_again:
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ if (space == NULL) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: cannot delete tablespace %lu because it is not found\n"
+"InnoDB: in the tablespace memory cache.\n", id);
+
+ mutex_exit(&(system->mutex));
- ut_d(UT_LIST_VALIDATE(chain, fil_node_t, space->chain));
- ut_ad(0 == UT_LIST_GET_LEN(space->chain));
+ return(FALSE);
+ }
+
+ ut_a(space);
+ ut_a(strlen(space->name) < OS_FILE_MAX_PATH);
+ ut_a(space->n_pending_ibuf_merges == 0);
+
+ strcpy(path, space->name);
+
+ space->is_being_deleted = TRUE;
+
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ if (space->n_pending_flushes > 0 || node->n_pending > 0) {
+ if (count > 1000) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Warning: trying to delete tablespace %s,\n"
+"InnoDB: but there are %lu flushes and %lu pending i/o's on it\n"
+"InnoDB: Loop %lu.\n", space->name, space->n_pending_flushes, node->n_pending,
+ count);
+ }
+ mutex_exit(&(system->mutex));
+ os_thread_sleep(20000);
+
+ count++;
+
+ goto try_again;
+ }
mutex_exit(&(system->mutex));
- mem_free(space->name);
- mem_free(space);
+ /* Invalidate in the buffer pool all pages belonging to the
+ tablespace. Since we have set space->is_being_deleted = TRUE, readahead
+ or ibuf merge can no longer read more pages of this tablespace to the
+ buffer pool. Thus we can clean the tablespace out of the buffer pool
+ completely and permanently. The flag is_being_deleted also prevents
+ fil_flush() from being applied to this tablespace. */
+
+ buf_LRU_invalidate_tablespace(id);
+
+ success = fil_space_free(id);
+
+ if (success) {
+ success = os_file_delete(path);
+
+ if (success) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
}
/***********************************************************************
-Returns the size of the space in pages. */
+Discards a single-table tablespace. The tablespace must be cached in the
+memory cache. Discarding is like deleting a tablespace, but
+1) we do not drop the table from the data dictionary;
+2) we remove all insert buffer entries for the tablespace immediately; in DROP
+TABLE they are only removed gradually in the background;
+3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
+as it originally had. */
-ulint
-fil_space_get_size(
-/*===============*/
- /* out: space size */
+ibool
+fil_discard_tablespace(
+/*===================*/
+ /* out: TRUE if success */
ulint id) /* in: space id */
{
- fil_space_t* space;
+ ibool success;
+
+ success = fil_delete_tablespace(id);
+
+ if (!success) {
+ fprintf(stderr,
+"InnoDB: Warning: cannot delete tablespace %lu in DISCARD TABLESPACE.\n"
+"InnoDB: But let us remove the insert buffer entries for this tablespace.\n",
+ id);
+ }
+
+ /* Remove all insert buffer entries for the tablespace */
+
+ ibuf_delete_for_discarded_space(id);
+
+ return(TRUE);
+}
+
+/***********************************************************************
+Renames the memory cache structures of a single-table tablespace. */
+static
+ibool
+fil_rename_tablespace_in_mem(
+/*=========================*/
+ /* out: TRUE if success */
+ fil_space_t* space, /* in: tablespace memory object */
+ fil_node_t* node, /* in: file node of that tablespace */
+ char* path) /* in: new name */
+{
fil_system_t* system = fil_system;
- ulint size;
+ fil_space_t* space2;
+ char* old_name = space->name;
+
+ HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name),
+ space2, 0 == strcmp(old_name, space2->name));
+ if (space != space2) {
+ fprintf(stderr,
+"InnoDB: Error: cannot find %s in tablespace memory cache\n", old_name);
- ut_ad(system);
+ return(FALSE);
+ }
+
+ HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path),
+ space2, 0 == strcmp(path, space2->name));
+ if (space2 != NULL) {
+ fprintf(stderr,
+"InnoDB: Error: %s is already in tablespace memory cache\n", path);
+
+ return(FALSE);
+ }
+
+ HASH_DELETE(fil_space_t, name_hash, system->name_hash,
+ ut_fold_string(space->name), space);
+ mem_free(space->name);
+ mem_free(node->name);
+
+ space->name = mem_alloc(strlen(path) + 1);
+ node->name = mem_alloc(strlen(path) + 1);
+
+ strcpy(space->name, path);
+ strcpy(node->name, path);
+
+ HASH_INSERT(fil_space_t, name_hash, system->name_hash,
+ ut_fold_string(path), space);
+ return(TRUE);
+}
+
+/***********************************************************************
+Renames a single-table tablespace. The tablespace must be cached in the
+tablespace memory cache. */
+
+ibool
+fil_rename_tablespace(
+/*==================*/
+ /* out: TRUE if success */
+ char* old_name, /* in: old table name in the standard
+ databasename/tablename format of InnoDB */
+ ulint id, /* in: space id */
+ char* new_name) /* in: new table name in the standard
+ databasename/tablename format of InnoDB */
+{
+ fil_system_t* system = fil_system;
+ ibool success;
+ fil_space_t* space;
+ fil_node_t* node;
+ ulint count = 0;
+ char old_path[OS_FILE_MAX_PATH];
+ char path[OS_FILE_MAX_PATH];
+
+ ut_a(id != 0);
+retry:
+ count++;
+
+ if (count > 1000) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Warning: problems renaming %s to %s, %lu iterations\n",
+ old_name, new_name, count);
+ }
mutex_enter(&(system->mutex));
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- size = space->size;
+ if (space == NULL) {
+ fprintf(stderr,
+"InnoDB: Error: cannot find space id %lu from the tablespace memory cache\n"
+"InnoDB: though the table %s in a rename operation should have that id\n",
+ id, old_name);
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+ }
+
+ if (count > 25000) {
+ space->stop_ios = FALSE;
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+ }
+
+ /* We temporarily close the .ibd file because we do not trust that
+ operating systems can rename an open file. For the closing we have to
+ wait until there are no pending i/o's or flushes on the file. */
+
+ space->stop_ios = TRUE;
+
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ if (node->n_pending > 0 || node->n_pending_flushes > 0) {
+ /* There are pending i/o's or flushes, sleep for a while and
+ retry */
+
+ mutex_exit(&(system->mutex));
+
+ os_thread_sleep(20000);
+
+ goto retry;
+
+ } else if (node->modification_counter > node->flush_counter) {
+ /* Flush the space */
+
+ mutex_exit(&(system->mutex));
+
+ os_thread_sleep(20000);
+
+ fil_flush(id);
+
+ goto retry;
+
+ } else if (node->open) {
+ /* Close the file */
+
+ fil_node_close_file(node, system);
+ }
+
+ /* Check that the old name in the space is right */
+ ut_a(strlen(old_name) < OS_FILE_MAX_PATH - 10);
+
+ sprintf(old_path, "./%s.ibd", old_name);
+
+ srv_normalize_path_for_win(old_path);
+
+ ut_a(strcmp(space->name, old_path) == 0);
+ ut_a(strcmp(node->name, old_path) == 0);
+
+ /* Rename the tablespace and the node in the memory cache */
+
+ ut_a(strlen(new_name) < OS_FILE_MAX_PATH - 10);
+
+ sprintf(path, "./%s.ibd", new_name);
+
+ srv_normalize_path_for_win(path);
+
+ success = fil_rename_tablespace_in_mem(space, node, path);
+
+ if (!success) {
+
+ goto func_exit;
+ }
+
+ success = os_file_rename(old_path, path);
+
+ if (!success) {
+ /* We have to revert the changes we made to the tablespace
+ memory cache */
+
+ ut_a(fil_rename_tablespace_in_mem(space, node, old_path));
+ }
+func_exit:
+ space->stop_ios = FALSE;
+
mutex_exit(&(system->mutex));
- return(size);
+ return(success);
}
/***********************************************************************
-Checks if the pair space, page_no refers to an existing page in a
-tablespace file space. */
+Creates a new single-table tablespace to a database directory of MySQL.
+Database directories are under the 'datadir' of MySQL. The datadir is the
+directory of a running mysqld program. We can refer to it by simply the
+path '.'. */
+
+ulint
+fil_create_new_single_table_tablespace(
+/*===================================*/
+ /* out: DB_SUCCESS or error code */
+ ulint* space_id, /* out: space id */
+ char* tablename, /* in: the table name in the usual
+ databasename/tablename format of InnoDB */
+ ulint size) /* in: the initial size of the tablespace file
+ in pages, must be > 0 */
+{
+ os_file_t file;
+ ibool ret;
+ ulint err;
+ byte* page;
+ ibool success;
+ char path[OS_FILE_MAX_PATH];
+
+ ut_a(strlen(tablename) < OS_FILE_MAX_PATH - 10);
+
+ sprintf(path, "./%s.ibd", tablename);
+
+ srv_normalize_path_for_win(path);
+
+ file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL,
+ OS_DATA_FILE, &ret);
+ if (ret == FALSE) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error creating file %s.\n", path);
+
+ /* The following call will print an error message */
+
+ err = os_file_get_last_error(TRUE);
+
+ if (err == OS_FILE_ALREADY_EXISTS) {
+ fprintf(stderr,
+"InnoDB: The file already exists though the corresponding table did not\n"
+"InnoDB: exist in the InnoDB data dictionary. Have you moved InnoDB\n"
+"InnoDB: .ibd files around without using the SQL commands\n"
+"InnoDB: DISCARD TABLESPACE and IMPORT TABLESPACE, or did\n"
+"InnoDB: mysqld crash in the middle of CREATE TABLE? You can\n"
+"InnoDB: resolve the problem by removing the file %s\n"
+"InnoDB: under the 'datadir' of MySQL.\n", path);
+
+ return(DB_TABLESPACE_ALREADY_EXISTS);
+ }
+
+ if (err == OS_FILE_DISK_FULL) {
+
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+
+ return(DB_ERROR);
+ }
+
+ page = ut_malloc(UNIV_PAGE_SIZE);
+
+ ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
+
+ if (!ret) {
+ ut_free(page);
+ os_file_close(file);
+ os_file_delete(path);
+
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+
+ *space_id = fil_assign_new_space_id();
+
+ if (*space_id == ULINT_UNDEFINED) {
+ ut_free(page);
+ os_file_close(file);
+ os_file_delete(path);
+
+ return(DB_ERROR);
+ }
+
+ /* We have to write the space id to the file immediately and flush the
+ file to disk. This is because in crash recovery we must be aware what
+ tablespaces exist and what are their space id's, so that we can apply
+ the log records to the right file. It may take quite a while until
+ buffer pool flush algorithms write anything to the file and flush it to
+ disk. If we would not write here anything, the file would be filled
+ with zeros from the call of os_file_set_size(), until a buffer pool
+ flush would write to it. */
+
+ memset(page, '\0', UNIV_PAGE_SIZE);
+
+ fsp_header_write_space_id(page, *space_id);
+
+ buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0);
+
+ ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
+
+ ut_free(page);
+
+ if (!ret) {
+ fprintf(stderr,
+"InnoDB: Error: could not write the first page to tablespace %s\n", path);
+
+ os_file_close(file);
+ os_file_delete(path);
+
+ return(DB_ERROR);
+ }
+
+ ret = os_file_flush(file);
+
+ if (!ret) {
+ fprintf(stderr,
+"InnoDB: Error: file flush of tablespace %s failed\n", path);
+
+ os_file_close(file);
+ os_file_delete(path);
+
+ return(DB_ERROR);
+ }
+
+ os_file_close(file);
+
+ if (*space_id == ULINT_UNDEFINED) {
+ os_file_delete(path);
+
+ return(DB_ERROR);
+ }
+
+ success = fil_space_create(path, *space_id, FIL_TABLESPACE);
+
+ if (!success) {
+ os_file_delete(path);
+
+ return(DB_ERROR);
+ }
+
+ fil_node_create(path, size, *space_id, FALSE);
+
+ return(DB_SUCCESS);
+}
+
+/************************************************************************
+Tries to open a single-table tablespace and checks the space id is right in
+it. If does not succeed, prints an error message to the .err log. This
+function is used to open the tablespace when we load a table definition
+to the dictionary cache. NOTE that we assume this operation is used under the
+protection of the dictionary mutex, so that two users cannot race here. This
+operation does not leave the file associated with the tablespace open, but
+closes it after we have looked at the space id in it. */
ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- /* out: TRUE if the address is meaningful */
+fil_open_single_table_tablespace(
+/*=============================*/
+ /* out: TRUE if success */
ulint id, /* in: space id */
- ulint page_no)/* in: page number */
+ char* name) /* in: table name in the databasename/tablename
+ format */
{
+ os_file_t file;
+ char* filepath;
+ ibool success;
+ byte* page;
+ ulint space_id;
+ ibool ret = TRUE;
+
+ filepath = ut_malloc(OS_FILE_MAX_PATH);
+
+ ut_a(strlen(name) < OS_FILE_MAX_PATH - 10);
+
+ sprintf(filepath, "./%s.ibd", name);
+
+ srv_normalize_path_for_win(filepath);
+
+ file = os_file_create_simple_no_error_handling(filepath, OS_FILE_OPEN,
+ OS_FILE_READ_ONLY, &success);
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: Error: trying to open a table, but could not\n"
+"InnoDB: open the tablespace file %s!\n", filepath);
+ fprintf(stderr,
+"InnoDB: have you moved InnoDB .ibd files around without using the\n"
+"InnoDB: commands DISCARD TABLESPACE and IMPORT TABLESPACE?\n");
+
+ ut_free(filepath);
+
+ return(FALSE);
+ }
+
+ /* Read the first page of the tablespace */
+
+ page = ut_malloc(UNIV_PAGE_SIZE);
+
+ success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
+
+ /* We have to read the tablespace id from the file */
+
+ space_id = fsp_header_get_space_id(page);
+
+ if (space_id != id) {
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: Error: tablespace id in file %s is %lu, but in the InnoDB\n"
+"InnoDB: data dictionary it is %lu.\n", filepath, space_id, id);
+ fprintf(stderr,
+"InnoDB: Have you moved InnoDB .ibd files around without using the\n"
+"InnoDB: commands DISCARD TABLESPACE and IMPORT TABLESPACE?\n");
+
+ ret = FALSE;
+
+ goto func_exit;
+ }
+
+ success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
+
+ if (!success) {
+ goto func_exit;
+ }
+
+ /* We do not measure the size of the file, that is why we pass the 0
+ below */
+
+ fil_node_create(filepath, 0, space_id, FALSE);
+func_exit:
+ os_file_close(file);
+ ut_free(page);
+ ut_free(filepath);
+
+ return(ret);
+}
+
+/************************************************************************
+Opens an .ibd file and adds the associated single-table tablespace to the
+InnoDB fil0fil.c data structures. */
+static
+void
+fil_load_single_table_tablespace(
+/*=============================*/
+ char* dbname, /* in: database name */
+ char* filename) /* in: file name (not a path), including the
+ .ibd extension */
+{
+ os_file_t file;
+ char* filepath;
+ ibool success;
+ byte* page;
+ ulint space_id;
+ ulint size_low;
+ ulint size_high;
+ ib_longlong size;
+
+ filepath = ut_malloc(OS_FILE_MAX_PATH);
+
+ ut_a(strlen(dbname) + strlen(filename) < OS_FILE_MAX_PATH - 10);
+
+ sprintf(filepath, "./%s/%s", dbname, filename);
+
+ srv_normalize_path_for_win(filepath);
+
+ file = os_file_create_simple_no_error_handling(filepath, OS_FILE_OPEN,
+ OS_FILE_READ_ONLY, &success);
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+
+ fprintf(stderr,
+"InnoDB: Error: could not open single-table tablespace file\n"
+"InnoDB: %s!", filepath);
+
+ ut_free(filepath);
+
+ return;
+ }
+
+ success = os_file_get_size(file, &size_low, &size_high);
+
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+
+ fprintf(stderr,
+"InnoDB: Error: could not measure the size of single-table tablespace file\n"
+"InnoDB: %s!", filepath);
+
+ os_file_close(file);
+ ut_free(filepath);
+
+ return;
+ }
+
+ size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low;
+
+ if (size < 4 * UNIV_PAGE_SIZE) {
+ fprintf(stderr,
+"InnoDB: Error: the size of single-table tablespace file %s\n"
+"InnoDB: is only %lu %lu, should be at least %lu!", filepath, size_high,
+ size_low, (ulint)4 * UNIV_PAGE_SIZE);
+ os_file_close(file);
+ ut_free(filepath);
+
+ return;
+ }
+
+ /* Read the first page of the tablespace */
+
+ page = ut_malloc(UNIV_PAGE_SIZE);
+
+ success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
+
+ /* We have to read the tablespace id from the file */
+
+ space_id = fsp_header_get_space_id(page);
+
+ if (space_id == ULINT_UNDEFINED || space_id == 0) {
+ fprintf(stderr,
+"InnoDB: Error: tablespace id %lu in file %s is not sensible\n", space_id,
+ filepath);
+ goto func_exit;
+ }
+
+ success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
+
+ if (!success) {
+
+ goto func_exit;
+ }
+
+ /* We do not measure the size of the file, that is why we pass the 0
+ below */
+
+ fil_node_create(filepath, 0, space_id, FALSE);
+func_exit:
+ os_file_close(file);
+ ut_free(page);
+ ut_free(filepath);
+}
+
+/************************************************************************
+At the server startup, if we need crash recovery, scans the database
+directories under the MySQL datadir, looking for .ibd files. Those files are
+single-table tablespaces. We need to know the space id in each of them so that
+we know into which file we should look to check the contents of a page stored
+in the doublewrite buffer, also to know where to apply log records where the
+space id is != 0. */
+
+ulint
+fil_load_single_table_tablespaces(void)
+/*===================================*/
+ /* out: DB_SUCCESS or error number */
+{
+ int ret;
+ char* dbpath;
+ os_file_dir_t dir;
+ os_file_dir_t dbdir;
+ os_file_stat_t dbinfo;
+ os_file_stat_t fileinfo;
+
+ /* The datadir of MySQL is always the default directory of mysqld */
+
+ dir = os_file_opendir((char*)".", TRUE);
+
+ if (dir == NULL) {
+
+ return(DB_ERROR);
+ }
+
+ dbpath = ut_malloc(OS_FILE_MAX_PATH);
+
+ /* Scan all directories under the datadir. They are the database
+ directories of MySQL. */
+
+ ret = os_file_readdir_next_file((char*)".", dir, &dbinfo);
+
+ while (ret == 0) {
+ /* printf("Looking at %s in datadir\n", dbinfo.name); */
+
+ if (dbinfo.type == OS_FILE_TYPE_FILE
+ || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
+
+ goto next_datadir_item;
+ }
+
+ /* We found a symlink or a directory; try opening it to see
+ if a symlink is a directory */
+
+ ut_a(strlen(dbinfo.name) < OS_FILE_MAX_PATH - 10);
+
+ sprintf(dbpath, "./%s", dbinfo.name);
+
+ srv_normalize_path_for_win(dbpath);
+
+ dbdir = os_file_opendir(dbpath, FALSE);
+
+ if (dbdir != NULL) {
+ /* printf("Opened dir %s\n", dbinfo.name); */
+
+ /* We found a database directory; loop through it,
+ looking for possible .ibd files in it */
+
+ ret = os_file_readdir_next_file(dbpath, dbdir,
+ &fileinfo);
+ while (ret == 0) {
+ /* printf(
+" Looking at file %s\n", fileinfo.name); */
+
+ if (fileinfo.type == OS_FILE_TYPE_DIR
+ || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
+ goto next_file_item;
+ }
+
+ /* We found a symlink or a file */
+ if (strlen(fileinfo.name) > 4
+ && 0 == strcmp(fileinfo.name +
+ strlen(fileinfo.name) - 4,
+ ".ibd")) {
+ /* The name ends in .ibd; try opening
+ the file */
+ fil_load_single_table_tablespace(
+ dbinfo.name, fileinfo.name);
+ }
+next_file_item:
+ ret = os_file_readdir_next_file(dbpath, dbdir,
+ &fileinfo);
+ }
+
+ if (0 != os_file_closedir(dbdir)) {
+ fprintf(stderr,
+"InnoDB: Warning: could not close database directory %s\n", dbpath);
+ }
+ }
+
+next_datadir_item:
+ ret = os_file_readdir_next_file((char*)".", dir, &dbinfo);
+ }
+
+ ut_free(dbpath);
+
+ /* At the end of directory we should get 1 as the return value, -1
+ if there was an error */
+ if (ret != 1) {
+ fprintf(stderr,
+"InnoDB: Error: os_file_readdir_next_file returned %d in MySQL datadir\n",
+ ret);
+ os_file_closedir(dir);
+
+ return(DB_ERROR);
+ }
+
+ if (0 != os_file_closedir(dir)) {
+ fprintf(stderr,
+"InnoDB: Error: could not close MySQL datadir\n");
+
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/************************************************************************
+If we need crash recovery, and we have called
+fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
+we can call this function to print an error message of orphaned .ibd files
+for which there is not a data dictionary entry with a matching table name
+and space id. */
+
+void
+fil_print_orphaned_tablespaces(void)
+/*================================*/
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+
+ mutex_enter(&(system->mutex));
+
+ space = UT_LIST_GET_FIRST(system->space_list);
+
+ while (space) {
+ if (space->purpose == FIL_TABLESPACE && space->id != 0
+ && !space->mark) {
+ fprintf(stderr,
+"InnoDB: Warning: tablespace %s of id %lu has no matching table in\n"
+"InnoDB: the InnoDB data dixtionary.\n", space->name, space->id);
+ }
+
+ space = UT_LIST_GET_NEXT(space_list, space);
+
+ }
+
+ mutex_exit(&(system->mutex));
+}
+
+/***********************************************************************
+Returns TRUE if a single-table tablespace does not exist in the memory cache,
+or is being deleted there. */
+
+ibool
+fil_tablespace_deleted_or_being_deleted_in_mem(
+/*===========================================*/
+ /* out: TRUE if does not exist or is being\
+ deleted */
+ ulint id, /* in: space id */
+ ib_longlong version)/* in: tablespace_version should be this; if
+ you pass -1 as the value of this, then this
+ parameter is ignored */
+{
+ fil_system_t* system = fil_system;
fil_space_t* space;
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ if (space == NULL || space->is_being_deleted) {
+ mutex_exit(&(system->mutex));
+
+ return(TRUE);
+ }
+
+ if (version != -1LL && space->tablespace_version != version) {
+ mutex_exit(&(system->mutex));
+
+ return(TRUE);
+ }
+
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Returns TRUE if a single-table tablespace exists in the memory cache. */
+
+ibool
+fil_tablespace_exists_in_mem(
+/*=========================*/
+ /* out: TRUE if exists */
+ ulint id) /* in: space id */
+{
fil_system_t* system = fil_system;
- ulint size;
- ibool ret;
-
+ fil_space_t* space;
+
ut_ad(system);
mutex_enter(&(system->mutex));
@@ -755,24 +2351,297 @@ fil_check_adress_in_tablespace(
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
if (space == NULL) {
- ret = FALSE;
- } else {
- size = space->size;
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+ }
+
+ mutex_exit(&(system->mutex));
+
+ return(TRUE);
+}
+
+/***********************************************************************
+Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
+cache. Note that if we have not done a crash recovery at the database startup,
+there may be many tablespaces which are not yet in the memory cache. */
- if (page_no > size) {
- ret = FALSE;
- } else if (space->purpose != FIL_TABLESPACE) {
- ret = FALSE;
+ibool
+fil_space_for_table_exists_in_mem(
+/*==============================*/
+ /* out: TRUE if a matching tablespace
+ exists in the memory cache */
+ ulint id, /* in: space id */
+ char* name, /* in: table name in the standard
+ 'databasename/tablename' format */
+ ibool mark_space, /* in: in crash recovery, at database startup
+ we mark all spaces which have an associated
+ table in the InnoDB data dictionary, so that
+ we can print a warning about orphaned
+ tablespaces */
+ ibool print_error_if_does_not_exist)
+ /* in: print detailed error information to
+ the .err log if a matching tablespace is
+ not found from memory */
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* namespace;
+ fil_space_t* space;
+ char path[OS_FILE_MAX_PATH];
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ sprintf(path, "./%s.ibd", name);
+ srv_normalize_path_for_win(path);
+
+ /* Look if there is a space with the same id */
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ /* Look if there is a space with the same name; the name is the
+ directory path from the datadir to the file */
+
+ HASH_SEARCH(name_hash, system->name_hash,
+ ut_fold_string(path), namespace,
+ 0 == strcmp(namespace->name, path));
+ if (!print_error_if_does_not_exist) {
+ if (space && space == namespace) {
+ if (mark_space) {
+ space->mark = TRUE;
+ }
+
+ mutex_exit(&(system->mutex));
+
+ return(TRUE);
+ }
+
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+ }
+
+ if (space == NULL) {
+ if (namespace == NULL) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: table %s in InnoDB data dictionary has tablespace\n"
+"InnoDB: id %lu, but tablespace with that id or name does not exist. Have\n"
+"InnoDB: you deleted or moved .ibd files? We cannot open table %s now.\n",
+ name, id, name);
} else {
- ret = TRUE;
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: table %s in InnoDB data dictionary has tablespace\n"
+"InnoDB: id %lu, but tablespace with that id does not exist. There is\n"
+"InnoDB: a tablespace of name %s and id %lu, though. Have\n"
+"InnoDB: you deleted or moved .ibd files? We cannot open table %s now.\n",
+ name, id, namespace->name, namespace->id, name);
}
+
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
}
-
+
+ if (0 != strcmp(space->name, path)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: table %s in InnoDB data dictionary has tablespace\n"
+"InnoDB: id %lu, but tablespace with that id has name %s. Have you\n"
+"InnoDB: deleted or moved .ibd files? We cannot open table %s now.\n",
+ name, id, space->name, name);
+ if (namespace != NULL) {
+ fprintf(stderr,
+"InnoDB: There is a tablespace with the right name %s, but its id is %lu.\n",
+ namespace->name, namespace->id);
+ }
+
+ mutex_exit(&(system->mutex));
+
+ return(FALSE);
+ }
+
+ ut_a(space == namespace);
+
+ if (mark_space) {
+ space->mark = TRUE;
+ }
+
mutex_exit(&(system->mutex));
- return(ret);
+ return(TRUE);
+}
+
+/**************************************************************************
+Tries to extend a data file by the number of pages given. Fractions of 1 MB
+are ignored. The tablespace must be cached in the memory cache. */
+
+ibool
+fil_extend_last_data_file(
+/*======================*/
+ /* out: TRUE if success, also if we run
+ out of disk space we may return TRUE */
+ ulint* actual_increase,/* out: number of pages we were able to
+ extend, here the original size of the file and
+ the resulting size of the file are rounded
+ downwards to a full megabyte, and the
+ difference expressed in pages is returned */
+ ulint space_id, /* in: space id */
+ ulint size, /* in: current size of the space in pages, as
+ stored in the fsp header */
+ ulint size_increase) /* in: try to extend this many pages */
+{
+ fil_system_t* system = fil_system;
+ fil_node_t* node;
+ fil_space_t* space;
+ byte* buf2;
+ byte* buf;
+ ibool success;
+ ulint i;
+
+ fil_mutex_enter_and_prepare_for_io(space_id);
+
+ HASH_SEARCH(hash, system->spaces, space_id, space,
+ space->id == space_id);
+ ut_a(space);
+
+ node = UT_LIST_GET_LAST(space->chain);
+
+ fil_node_prepare_for_io(node, system, space);
+
+ if (UT_LIST_GET_LEN(space->chain) == 1 && node->size < size) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+"InnoDB: Fatal error: space %s id %lu size stored in header is %lu pages\n"
+"InnoDB: but actual size is only %lu pages (possibly rounded downwards)!\n"
+"InnoDB: Cannot continue operation!\n", space->name, space->id, size,
+ node->size);
+ exit(1);
+ }
+
+ buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE);
+ buf = ut_align(buf2, UNIV_PAGE_SIZE);
+
+ memset(buf, '\0', 1024 * 1024);
+
+ for (i = 0; i < size_increase / ((1024 * 1024) / UNIV_PAGE_SIZE);
+ i++) {
+ /* If we use native Windows aio, then we use it also in this
+ write */
+
+ success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
+ node->name, node->handle, buf,
+ (node->size << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
+ node->size >> (32 - UNIV_PAGE_SIZE_SHIFT),
+ 1024 * 1024, NULL, NULL);
+
+ if (!success) {
+ break;
+ }
+
+ node->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
+ space->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
+
+ os_has_said_disk_full = FALSE;
+ }
+
+ mem_free(buf2);
+
+ fil_node_complete_io(node, system, OS_FILE_WRITE);
+
+ mutex_exit(&(system->mutex));
+
+ *actual_increase = i * ((1024 * 1024) / UNIV_PAGE_SIZE);
+
+ fil_flush(space_id);
+
+ if (space_id == 0) {
+ srv_data_file_sizes[srv_n_data_files - 1] += *actual_increase;
+ }
+
+ return(TRUE);
}
+/**************************************************************************
+Tries to extend a data file so that it would accommodate the number of pages
+given. The tablespace must be cached in the memory cache. */
+
+ibool
+fil_extend_data_file_with_pages(
+/*============================*/
+ /* out: TRUE if success */
+ ulint space_id, /* in: space id, must be != 0 */
+ ulint size, /* in: current size of the space in pages, as
+ stored in the fsp header */
+ ulint size_after_extend)/* in: desired size in pages after the
+ extension, should be less than 4 GB (this
+ function is primarily intended for increasing
+ the data file size from < 64 pages to up to
+ 64 pages) */
+{
+ fil_system_t* system = fil_system;
+ fil_node_t* node;
+ fil_space_t* space;
+ byte* buf2;
+ byte* buf;
+ ibool success;
+
+ ut_a(space_id != 0);
+ ut_a(size_after_extend < 64 * 4096);
+ ut_a(size_after_extend >= size);
+
+ fil_mutex_enter_and_prepare_for_io(space_id);
+
+ HASH_SEARCH(hash, system->spaces, space_id, space,
+ space->id == space_id);
+ ut_a(space);
+
+ node = UT_LIST_GET_LAST(space->chain);
+
+ fil_node_prepare_for_io(node, system, space);
+
+ if (UT_LIST_GET_LEN(space->chain) == 1 && node->size < size) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+"InnoDB: Fatal error: space %s id %lu size stored in header is %lu pages\n"
+"InnoDB: but actual size is only %lu pages (possibly rounded downwards)!\n"
+"InnoDB: Cannot continue operation!\n", space->name, space_id, size,
+ node->size);
+ exit(1);
+ }
+
+ buf2 = mem_alloc((1 + size_after_extend - size) * UNIV_PAGE_SIZE);
+ buf = ut_align(buf2, UNIV_PAGE_SIZE);
+
+ memset(buf, '\0', (size_after_extend - size) * UNIV_PAGE_SIZE);
+
+ success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
+ node->name, node->handle, buf,
+ UNIV_PAGE_SIZE * size, 0,
+ UNIV_PAGE_SIZE * (size_after_extend - size),
+ NULL, NULL);
+ if (success) {
+ node->size = size_after_extend;
+ space->size = size_after_extend;
+
+ os_has_said_disk_full = FALSE;
+ }
+
+ mem_free(buf2);
+
+ fil_node_complete_io(node, system, OS_FILE_WRITE);
+
+ mutex_exit(&(system->mutex));
+
+ fil_flush(space_id);
+
+ return(success);
+}
+
+/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
+
/***********************************************************************
Tries to reserve free extents in a file space. */
@@ -784,8 +2653,8 @@ fil_space_reserve_free_extents(
ulint n_free_now, /* in: number of free extents now */
ulint n_to_reserve) /* in: how many one wants to reserve */
{
- fil_space_t* space;
fil_system_t* system = fil_system;
+ fil_space_t* space;
ibool success;
ut_ad(system);
@@ -794,6 +2663,8 @@ fil_space_reserve_free_extents(
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ ut_a(space);
+
if (space->n_reserved_extents + n_to_reserve > n_free_now) {
success = FALSE;
} else {
@@ -815,8 +2686,8 @@ fil_space_release_free_extents(
ulint id, /* in: space id */
ulint n_reserved) /* in: how many one reserved */
{
- fil_space_t* space;
fil_system_t* system = fil_system;
+ fil_space_t* space;
ut_ad(system);
@@ -824,6 +2695,7 @@ fil_space_release_free_extents(
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ ut_a(space);
ut_a(space->n_reserved_extents >= n_reserved);
space->n_reserved_extents -= n_reserved;
@@ -840,8 +2712,8 @@ fil_space_get_n_reserved_extents(
/*=============================*/
ulint id) /* in: space id */
{
- fil_space_t* space;
fil_system_t* system = fil_system;
+ fil_space_t* space;
ulint n;
ut_ad(system);
@@ -859,204 +2731,94 @@ fil_space_get_n_reserved_extents(
return(n);
}
+/*============================ FILE I/O ================================*/
+
/************************************************************************
+NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
+
Prepares a file node for i/o. Opens the file if it is closed. Updates the
pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. */
+off the LRU list if it is in the LRU list. The caller must hold the fil_sys
+mutex. */
static
void
fil_node_prepare_for_io(
/*====================*/
fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: file system */
+ fil_system_t* system, /* in: tablespace memory cache */
fil_space_t* space) /* in: space */
{
- ibool ret;
- fil_node_t* last_node;
-
ut_ad(node && system && space);
ut_ad(mutex_own(&(system->mutex)));
+ if (system->n_open > system->max_n_open + 5) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Warning: open files %lu exceeds the limit %lu\n",
+ system->n_open, system->max_n_open);
+ }
+
if (node->open == FALSE) {
- /* File is closed */
+ /* File is closed: open it */
ut_a(node->n_pending == 0);
- /* If too many files are open, close one */
-
- if (system->n_open_pending + UT_LIST_GET_LEN(system->LRU)
- == system->max_n_open) {
-
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
- last_node = UT_LIST_GET_LAST(system->LRU);
-
- if (last_node == NULL) {
- fprintf(stderr,
- "InnoDB: Error: cannot close any file to open another for i/o\n"
- "InnoDB: Pending i/o's on %lu files exist\n",
- system->n_open_pending);
-
- ut_a(0);
- }
-
- fil_node_close(last_node, system);
- }
-
- if (space->purpose == FIL_LOG) {
- node->handle = os_file_create(node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_LOG_FILE, &ret);
- } else {
- node->handle = os_file_create(node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_DATA_FILE, &ret);
- }
-
- ut_a(ret);
-
- node->open = TRUE;
-
- system->n_open_pending++;
- node->n_pending = 1;
-
- /* File was closed: the node was not in the LRU list */
-
- return;
+ fil_node_open_file(node, system, space);
}
- /* File is open */
- if (node->n_pending == 0) {
+ if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
+ && space->id != 0) {
/* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(LRU, system->LRU, node);
-
- system->n_open_pending++;
- node->n_pending = 1;
- } else {
- /* There is already a pending i/o-op on the file: the node is
- not in the LRU list */
+ ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
- node->n_pending++;
+ UT_LIST_REMOVE(LRU, system->LRU, node);
}
+
+ node->n_pending++;
}
/************************************************************************
Updates the data structures when an i/o operation finishes. Updates the
-pending i/os field in the node and the system appropriately. Puts the node
-in the LRU list if there are no other pending i/os. */
+pending i/o's field in the node appropriately. */
static
void
fil_node_complete_io(
/*=================*/
fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: file system */
- ulint type) /* in: OS_FILE_WRITE or ..._READ */
+ fil_system_t* system, /* in: tablespace memory cache */
+ ulint type) /* in: OS_FILE_WRITE or OS_FILE_READ; marks
+ the node as modified if
+ type == OS_FILE_WRITE */
{
ut_ad(node);
ut_ad(system);
ut_ad(mutex_own(&(system->mutex)));
+
ut_a(node->n_pending > 0);
node->n_pending--;
- if (type != OS_FILE_READ) {
- node->is_modified = TRUE;
+ if (type == OS_FILE_WRITE) {
+ system->modification_counter++;
+ node->modification_counter = system->modification_counter;
}
- if (node->n_pending == 0) {
+ if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
+ && node->space->id != 0) {
/* The node must be put back to the LRU list */
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
-
- ut_a(system->n_open_pending > 0);
-
- system->n_open_pending--;
-
- if (system->n_open_pending == system->max_n_open - 1) {
-
- os_event_set(system->can_open);
- }
}
}
-
-/**************************************************************************
-Tries to extend a data file by the number of pages given. Any fractions of a
-megabyte are ignored. */
-
-ibool
-fil_extend_last_data_file(
-/*======================*/
- /* out: TRUE if success, also if we run
- out of disk space we may return TRUE */
- ulint* actual_increase,/* out: number of pages we were able to
- extend, here the orginal size of the file and
- the resulting size of the file are rounded
- downwards to a full megabyte, and the
- difference expressed in pages is returned */
- ulint size_increase) /* in: try to extend this many pages */
-{
- fil_node_t* node;
- fil_space_t* space;
- fil_system_t* system = fil_system;
- byte* buf2;
- byte* buf;
- ibool success;
- ulint i;
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, 0, space, space->id == 0);
-
- ut_a(space);
-
- node = UT_LIST_GET_LAST(space->chain);
-
- fil_node_prepare_for_io(node, system, space);
-
- buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE);
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
- memset(buf, '\0', 1024 * 1024);
-
- for (i = 0; i < size_increase / ((1024 * 1024) / UNIV_PAGE_SIZE); i++) {
-
- /* If we use native Windows aio, then also this write is
- done using it */
-
- success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
- node->name, node->handle, buf,
- (node->size << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF,
- node->size >> (32 - UNIV_PAGE_SIZE_SHIFT),
- 1024 * 1024, NULL, NULL);
-
- if (!success) {
- break;
- }
-
- node->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
- space->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- os_has_said_disk_full = FALSE;
- }
-
- mem_free(buf2);
-
- fil_node_complete_io(node, system, OS_FILE_WRITE);
-
- mutex_exit(&(system->mutex));
-
- *actual_increase = i * ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- fil_flush(0);
-
- srv_data_file_sizes[srv_n_data_files - 1] += *actual_increase;
-
- return(TRUE);
-}
/************************************************************************
Reads or writes data. This operation is asynchronous (aio). */
-void
+ulint
fil_io(
/*===*/
+ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+ if we are trying to do i/o on a tablespace
+ which does not exist */
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
ORed to OS_FILE_LOG, if a log i/o
and ORed to OS_AIO_SIMULATED_WAKE_LATER
@@ -1081,17 +2843,15 @@ fil_io(
void* message) /* in: message for aio handler if non-sync
aio used, else ignored */
{
+ fil_system_t* system = fil_system;
ulint mode;
fil_space_t* space;
fil_node_t* node;
ulint offset_high;
ulint offset_low;
- fil_system_t* system;
- os_event_t event;
ibool ret;
ulint is_log;
ulint wake_later;
- ulint count;
is_log = type & OS_FILE_LOG;
type = type & ~OS_FILE_LOG;
@@ -1102,7 +2862,7 @@ fil_io(
ut_ad(byte_offset < UNIV_PAGE_SIZE);
ut_ad(buf);
ut_ad(len > 0);
- ut_ad((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
+ ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
ut_ad(fil_validate());
#ifndef UNIV_LOG_DEBUG
/* ibuf bitmap pages must be read in the sync aio mode: */
@@ -1124,82 +2884,45 @@ fil_io(
mode = OS_AIO_NORMAL;
}
- system = fil_system;
+ /* Reserve the fil_system mutex and make sure that we can open at
+ least one file while holding it, if the file is not already open */
- count = 0;
-loop:
- count++;
-
- /* NOTE that there is a possibility of a hang here:
- if the read i/o-handler thread needs to complete
- a read by reading from the insert buffer, it may need to
- post another read. But if the maximum number of files
- are already open, it cannot proceed from here! */
-
- mutex_enter(&(system->mutex));
+ fil_mutex_enter_and_prepare_for_io(space_id);
- if (count < 500 && !is_log && !ibuf_inside()
- && system->n_open_pending >= (3 * system->max_n_open) / 4) {
-
- /* We are not doing an ibuf operation: leave a
- safety margin of openable files for possible ibuf
- merges needed in page read completion */
-
- mutex_exit(&(system->mutex));
-
- /* Wake the i/o-handler threads to make sure pending
- i/o's are handled and eventually we can open the file */
-
- os_aio_simulated_wake_handler_threads();
-
- os_thread_sleep(100000);
-
- if (count > 50) {
- fprintf(stderr,
- "InnoDB: Warning: waiting for file closes to proceed\n"
- "InnoDB: round %lu\n", count);
- }
-
- goto loop;
- }
-
- if (system->n_open_pending == system->max_n_open) {
-
- /* It is not sure we can open the file if it is closed: wait */
-
- event = system->can_open;
- os_event_reset(event);
-
+ HASH_SEARCH(hash, system->spaces, space_id, space,
+ space->id == space_id);
+ if (!space) {
mutex_exit(&(system->mutex));
- /* Wake the i/o-handler threads to make sure pending
- i/o's are handled and eventually we can open the file */
-
- os_aio_simulated_wake_handler_threads();
-
+ ut_print_timestamp(stderr);
fprintf(stderr,
- "InnoDB: Warning: max allowed number of files is open\n");
-
- os_event_wait(event);
+" InnoDB: Error: trying to do i/o to a tablespace which does not exist.\n"
+"InnoDB: i/o type %lu, space id %lu, page no. %lu, i/o length %lu bytes\n",
+ type, space_id, block_offset, len);
- goto loop;
- }
-
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
- ut_a(space);
+ return(DB_TABLESPACE_DELETED);
+ }
ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
node = UT_LIST_GET_FIRST(space->chain);
for (;;) {
+ if (space->id != 0 && node->size == 0) {
+ /* We do not know the size of a single-table tablespace
+ before we open the file */
+
+ break;
+ }
+
if (node == NULL) {
fprintf(stderr,
- "InnoDB: Error: trying to access page number %lu in space %lu\n"
+ "InnoDB: Error: trying to access page number %lu in space %lu,\n"
+ "InnoDB: space name %s,\n"
"InnoDB: which is outside the tablespace bounds.\n"
"InnoDB: Byte offset %lu, len %lu, i/o type %lu\n",
- block_offset, space_id, byte_offset, len, type);
+ block_offset, space_id, space->name, byte_offset, len,
+ type);
ut_a(0);
}
@@ -1216,13 +2939,28 @@ loop:
/* Open file if closed */
fil_node_prepare_for_io(node, system, space);
+ /* Check that at least the start offset is within the bounds of a
+ single-table tablespace */
+ if (space->purpose == FIL_TABLESPACE && space->id != 0
+ && node->size <= block_offset) {
+
+ fprintf(stderr,
+ "InnoDB: Error: trying to access page number %lu in space %lu,\n"
+ "InnoDB: space name %s,\n"
+ "InnoDB: which is outside the tablespace bounds.\n"
+ "InnoDB: Byte offset %lu, len %lu, i/o type %lu\n",
+ block_offset, space_id, space->name, byte_offset, len,
+ type);
+ ut_a(0);
+ }
+
/* Now we have made the changes in the data structures of system */
mutex_exit(&(system->mutex));
/* Calculate the low 32 bits and the high 32 bits of the file offset */
offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
- offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF)
+ offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL)
+ byte_offset;
ut_a(node->size - block_offset >=
@@ -1250,6 +2988,8 @@ loop:
ut_ad(fil_validate());
}
+
+ return(DB_SUCCESS);
}
/************************************************************************
@@ -1257,9 +2997,12 @@ Reads data from a space to a buffer. Remember that the possible incomplete
blocks at the end of file are ignored: they are not taken into account when
calculating the byte offset within a space. */
-void
+ulint
fil_read(
/*=====*/
+ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+ if we are trying to do i/o on a tablespace
+ which does not exist */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint space_id, /* in: space id */
ulint block_offset, /* in: offset in number of blocks */
@@ -1273,8 +3016,8 @@ fil_read(
void* message) /* in: message for aio handler if non-sync
aio used, else ignored */
{
- fil_io(OS_FILE_READ, sync, space_id, block_offset, byte_offset, len,
- buf, message);
+ return(fil_io(OS_FILE_READ, sync, space_id, block_offset,
+ byte_offset, len, buf, message));
}
/************************************************************************
@@ -1282,9 +3025,12 @@ Writes data to a space from a buffer. Remember that the possible incomplete
blocks at the end of file are ignored: they are not taken into account when
calculating the byte offset within a space. */
-void
+ulint
fil_write(
/*======*/
+ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+ if we are trying to do i/o on a tablespace
+ which does not exist */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint space_id, /* in: space id */
ulint block_offset, /* in: offset in number of blocks */
@@ -1298,8 +3044,8 @@ fil_write(
void* message) /* in: message for aio handler if non-sync
aio used, else ignored */
{
- fil_io(OS_FILE_WRITE, sync, space_id, block_offset, byte_offset, len,
- buf, message);
+ return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset,
+ byte_offset, len, buf, message));
}
/**************************************************************************
@@ -1314,16 +3060,16 @@ fil_aio_wait(
ulint segment) /* in: the number of the segment in the aio
array to wait for */
{
+ fil_system_t* system = fil_system;
ibool ret;
fil_node_t* fil_node;
- fil_system_t* system = fil_system;
void* message;
ulint type;
ut_ad(fil_validate());
if (os_aio_use_native_aio) {
- srv_io_thread_op_info[segment] = (char *) "native aio handle";
+ srv_io_thread_op_info[segment] = (char *) "handle native aio";
#ifdef WIN_ASYNC_IO
ret = os_aio_windows_handle(segment, 0, &fil_node, &message,
&type);
@@ -1334,7 +3080,7 @@ fil_aio_wait(
ut_a(0);
#endif
} else {
- srv_io_thread_op_info[segment] =(char *)"simulated aio handle";
+ srv_io_thread_op_info[segment] =(char *)"handle simulated aio";
ret = os_aio_simulated_handle(segment, (void**) &fil_node,
&message, &type);
@@ -1353,6 +3099,10 @@ fil_aio_wait(
ut_ad(fil_validate());
/* Do the i/o handling */
+ /* IMPORTANT: since i/o handling for reads will read also the insert
+ buffer in tablespace 0, you have to be very careful not to introduce
+ deadlocks in the i/o system. We keep tablespace 0 data files always
+ open, and use a special i/o thread to serve insert buffer requests. */
if (buf_pool_is_block(message)) {
srv_io_thread_op_info[segment] =
@@ -1365,7 +3115,8 @@ fil_aio_wait(
}
/**************************************************************************
-Flushes to disk possible writes cached by the OS. */
+Flushes to disk possible writes cached by the OS. If the space does not exist
+or is being dropped, does not do anything. */
void
fil_flush(
@@ -1377,41 +3128,79 @@ fil_flush(
fil_space_t* space;
fil_node_t* node;
os_file_t file;
+ ib_longlong old_mod_counter;
mutex_enter(&(system->mutex));
HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
- ut_a(space);
+ space->id == space_id);
+ if (!space || space->is_being_deleted) {
+ mutex_exit(&(system->mutex));
+
+ return;
+ }
+ space->n_pending_flushes++; /* prevent dropping of the space while
+ we are flushing */
node = UT_LIST_GET_FIRST(space->chain);
while (node) {
- if (node->open && node->is_modified) {
- file = node->handle;
+ if (node->modification_counter > node->flush_counter) {
+ ut_a(node->open);
+
+ /* We want to flush the changes at least up to
+ old_mod_counter */
+ old_mod_counter = node->modification_counter;
- node->is_modified = FALSE;
-
if (space->purpose == FIL_TABLESPACE) {
fil_n_pending_tablespace_flushes++;
} else {
fil_n_pending_log_flushes++;
}
+#ifdef __WIN__
+ if (node->is_raw_disk) {
- mutex_exit(&(system->mutex));
+ goto skip_flush;
+ }
+#endif
+retry:
+ if (node->n_pending_flushes > 0) {
+ /* We want to avoid calling os_file_flush() on
+ the file twice at the same time, because we do
+ not know what bugs OS's may contain in file
+ i/o; sleep for a while */
- /* Note that it is not certain, when we have
- released the mutex above, that the file of the
- handle is still open: we assume that the OS
- will not crash or trap even if we pass a handle
- to a closed file below in os_file_flush! */
+ mutex_exit(&(system->mutex));
+
+ os_thread_sleep(20000);
+
+ mutex_enter(&(system->mutex));
+
+ if (node->flush_counter >= old_mod_counter) {
+
+ goto skip_flush;
+ }
+
+ goto retry;
+ }
+
+ ut_a(node->open);
+ file = node->handle;
+ node->n_pending_flushes++;
+
+ mutex_exit(&(system->mutex));
/* printf("Flushing to file %s\n", node->name); */
-
- os_file_flush(file);
-
+ os_file_flush(file);
+
mutex_enter(&(system->mutex));
+ node->n_pending_flushes--;
+skip_flush:
+ if (node->flush_counter < old_mod_counter) {
+ node->flush_counter = old_mod_counter;
+ }
+
if (space->purpose == FIL_TABLESPACE) {
fil_n_pending_tablespace_flushes--;
} else {
@@ -1422,11 +3211,13 @@ fil_flush(
node = UT_LIST_GET_NEXT(chain, node);
}
+ space->n_pending_flushes--;
+
mutex_exit(&(system->mutex));
}
/**************************************************************************
-Flushes to disk writes in file spaces of the given type possibly cached by
+Flushes to disk the writes in file spaces of the given type possibly cached by
the OS. */
void
@@ -1443,13 +3234,17 @@ fil_flush_file_spaces(
while (space) {
if (space->purpose == purpose) {
+ space->n_pending_flushes++; /* prevent dropping of the
+ space while we are
+ flushing */
mutex_exit(&(system->mutex));
fil_flush(space->id);
mutex_enter(&(system->mutex));
- }
+ space->n_pending_flushes--;
+ }
space = UT_LIST_GET_NEXT(space_list, space);
}
@@ -1457,20 +3252,18 @@ fil_flush_file_spaces(
}
/**********************************************************************
-Checks the consistency of the file system. */
+Checks the consistency of the tablespace cache. */
ibool
fil_validate(void)
/*==============*/
/* out: TRUE if ok */
{
+ fil_system_t* system = fil_system;
fil_space_t* space;
fil_node_t* fil_node;
- ulint pending_count = 0;
- fil_system_t* system;
+ ulint n_open = 0;
ulint i;
-
- system = fil_system;
mutex_enter(&(system->mutex));
@@ -1481,36 +3274,35 @@ fil_validate(void)
space = HASH_GET_FIRST(system->spaces, i);
while (space != NULL) {
-
UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
fil_node = UT_LIST_GET_FIRST(space->chain);
while (fil_node != NULL) {
-
if (fil_node->n_pending > 0) {
-
- pending_count++;
ut_a(fil_node->open);
}
+ if (fil_node->open) {
+ n_open++;
+ }
fil_node = UT_LIST_GET_NEXT(chain, fil_node);
}
-
space = HASH_GET_NEXT(hash, space);
}
}
- ut_a(pending_count == system->n_open_pending);
+ ut_a(system->n_open == n_open);
UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
fil_node = UT_LIST_GET_FIRST(system->LRU);
while (fil_node != NULL) {
-
ut_a(fil_node->n_pending == 0);
ut_a(fil_node->open);
+ ut_a(fil_node->space->purpose == FIL_TABLESPACE);
+ ut_a(fil_node->space->id != 0);
fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
}
@@ -1578,4 +3370,4 @@ fil_page_get_type(
ut_ad(page);
return(mach_read_from_2(page + FIL_PAGE_TYPE));
-}
+}