summaryrefslogtreecommitdiff
path: root/innobase/include
diff options
context:
space:
mode:
authorunknown <heikki@donna.mysql.fi>2001-08-04 19:36:14 +0300
committerunknown <heikki@donna.mysql.fi>2001-08-04 19:36:14 +0300
commitc67510f0939fbcad4f2f0efcc79272322d5ffa57 (patch)
treee6dc89cb458f496f2b93e907afb60d3cd886cc18 /innobase/include
parentee82985a98c2c302e54a81ef586ffc8a867a550c (diff)
downloadmariadb-git-c67510f0939fbcad4f2f0efcc79272322d5ffa57.tar.gz
srv0srv.h Support raw disk partitions as data files
srv0start.c Support raw disk partitions as data files srv0srv.c Support raw disk partitions as data files row0purge.c < 4 GB rows, doublewrite, hang fixes row0row.c < 4 GB rows, doublewrite, hang fixes row0sel.c < 4 GB rows, doublewrite, hang fixes row0uins.c < 4 GB rows, doublewrite, hang fixes row0umod.c < 4 GB rows, doublewrite, hang fixes row0undo.c < 4 GB rows, doublewrite, hang fixes row0upd.c < 4 GB rows, doublewrite, hang fixes srv0srv.c < 4 GB rows, doublewrite, hang fixes srv0start.c < 4 GB rows, doublewrite, hang fixes sync0rw.c < 4 GB rows, doublewrite, hang fixes sync0sync.c < 4 GB rows, doublewrite, hang fixes trx0purge.c < 4 GB rows, doublewrite, hang fixes trx0rec.c < 4 GB rows, doublewrite, hang fixes trx0sys.c < 4 GB rows, doublewrite, hang fixes btr0btr.c < 4 GB rows, doublewrite, hang fixes btr0cur.c < 4 GB rows, doublewrite, hang fixes buf0buf.c < 4 GB rows, doublewrite, hang fixes buf0flu.c < 4 GB rows, doublewrite, hang fixes buf0rea.c < 4 GB rows, doublewrite, hang fixes data0data.c < 4 GB rows, doublewrite, hang fixes fil0fil.c < 4 GB rows, doublewrite, hang fixes fsp0fsp.c < 4 GB rows, doublewrite, hang fixes ibuf0ibuf.c < 4 GB rows, doublewrite, hang fixes lock0lock.c < 4 GB rows, doublewrite, hang fixes log0log.c < 4 GB rows, doublewrite, hang fixes log0recv.c < 4 GB rows, doublewrite, hang fixes os0file.c < 4 GB rows, doublewrite, hang fixes page0cur.c < 4 GB rows, doublewrite, hang fixes pars0pars.c < 4 GB rows, doublewrite, hang fixes rem0cmp.c < 4 GB rows, doublewrite, hang fixes rem0rec.c < 4 GB rows, doublewrite, hang fixes row0ins.c < 4 GB rows, doublewrite, hang fixes row0mysql.c < 4 GB rows, doublewrite, hang fixes univ.i < 4 GB rows, doublewrite, hang fixes data0data.ic < 4 GB rows, doublewrite, hang fixes mach0data.ic < 4 GB rows, doublewrite, hang fixes rem0rec.ic < 4 GB rows, doublewrite, hang fixes row0upd.ic < 4 GB rows, doublewrite, hang fixes trx0rec.ic < 4 GB rows, doublewrite, hang fixes rem0cmp.h < 4 GB rows, doublewrite, hang fixes rem0rec.h < 4 GB rows, doublewrite, hang fixes row0ins.h < 4 GB rows, doublewrite, hang fixes row0mysql.h < 4 GB rows, doublewrite, hang fixes row0row.h < 4 GB rows, doublewrite, hang fixes row0upd.h < 4 GB rows, doublewrite, hang fixes srv0srv.h < 4 GB rows, doublewrite, hang fixes sync0sync.h < 4 GB rows, doublewrite, hang fixes trx0rec.h < 4 GB rows, doublewrite, hang fixes trx0sys.h < 4 GB rows, doublewrite, hang fixes trx0types.h < 4 GB rows, doublewrite, hang fixes trx0undo.h < 4 GB rows, doublewrite, hang fixes ut0dbg.h < 4 GB rows, doublewrite, hang fixes ut0ut.h < 4 GB rows, doublewrite, hang fixes btr0btr.h < 4 GB rows, doublewrite, hang fixes btr0cur.h < 4 GB rows, doublewrite, hang fixes buf0buf.h < 4 GB rows, doublewrite, hang fixes buf0flu.h < 4 GB rows, doublewrite, hang fixes data0data.h < 4 GB rows, doublewrite, hang fixes dict0mem.h < 4 GB rows, doublewrite, hang fixes fil0fil.h < 4 GB rows, doublewrite, hang fixes fsp0fsp.h < 4 GB rows, doublewrite, hang fixes os0file.h < 4 GB rows, doublewrite, hang fixes innobase/include/btr0btr.h: < 4 GB rows, doublewrite, hang fixes innobase/include/btr0cur.h: < 4 GB rows, doublewrite, hang fixes innobase/include/buf0buf.h: < 4 GB rows, doublewrite, hang fixes innobase/include/buf0flu.h: < 4 GB rows, doublewrite, hang fixes innobase/include/data0data.h: < 4 GB rows, doublewrite, hang fixes innobase/include/dict0mem.h: < 4 GB rows, doublewrite, hang fixes innobase/include/fil0fil.h: < 4 GB rows, doublewrite, hang fixes innobase/include/fsp0fsp.h: < 4 GB rows, doublewrite, hang fixes innobase/include/os0file.h: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0cmp.h: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0rec.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0ins.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0mysql.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0row.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0upd.h: < 4 GB rows, doublewrite, hang fixes innobase/include/sync0sync.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0rec.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0sys.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0types.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0undo.h: < 4 GB rows, doublewrite, hang fixes innobase/include/ut0dbg.h: < 4 GB rows, doublewrite, hang fixes innobase/include/ut0ut.h: < 4 GB rows, doublewrite, hang fixes innobase/include/data0data.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/mach0data.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0rec.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/row0upd.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0rec.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/univ.i: < 4 GB rows, doublewrite, hang fixes innobase/btr/btr0btr.c: < 4 GB rows, doublewrite, hang fixes innobase/btr/btr0cur.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0buf.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0flu.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0rea.c: < 4 GB rows, doublewrite, hang fixes innobase/data/data0data.c: < 4 GB rows, doublewrite, hang fixes innobase/fil/fil0fil.c: < 4 GB rows, doublewrite, hang fixes innobase/fsp/fsp0fsp.c: < 4 GB rows, doublewrite, hang fixes innobase/ibuf/ibuf0ibuf.c: < 4 GB rows, doublewrite, hang fixes innobase/lock/lock0lock.c: < 4 GB rows, doublewrite, hang fixes innobase/log/log0log.c: < 4 GB rows, doublewrite, hang fixes innobase/log/log0recv.c: < 4 GB rows, doublewrite, hang fixes innobase/os/os0file.c: < 4 GB rows, doublewrite, hang fixes innobase/page/page0cur.c: < 4 GB rows, doublewrite, hang fixes innobase/pars/pars0pars.c: < 4 GB rows, doublewrite, hang fixes innobase/rem/rem0cmp.c: < 4 GB rows, doublewrite, hang fixes innobase/rem/rem0rec.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0ins.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0mysql.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0purge.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0row.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0sel.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0uins.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0umod.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0undo.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0upd.c: < 4 GB rows, doublewrite, hang fixes innobase/sync/sync0rw.c: < 4 GB rows, doublewrite, hang fixes innobase/sync/sync0sync.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0purge.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0rec.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0sys.c: < 4 GB rows, doublewrite, hang fixes innobase/srv/srv0srv.c: Support raw disk partitions as data files innobase/srv/srv0start.c: Support raw disk partitions as data files innobase/include/srv0srv.h: Support raw disk partitions as data files
Diffstat (limited to 'innobase/include')
-rw-r--r--innobase/include/btr0btr.h38
-rw-r--r--innobase/include/btr0cur.h123
-rw-r--r--innobase/include/buf0buf.h8
-rw-r--r--innobase/include/buf0flu.h2
-rw-r--r--innobase/include/data0data.h58
-rw-r--r--innobase/include/data0data.ic7
-rw-r--r--innobase/include/dict0mem.h2
-rw-r--r--innobase/include/fil0fil.h10
-rw-r--r--innobase/include/fsp0fsp.h2
-rw-r--r--innobase/include/mach0data.ic4
-rw-r--r--innobase/include/os0file.h18
-rw-r--r--innobase/include/rem0cmp.h13
-rw-r--r--innobase/include/rem0rec.h53
-rw-r--r--innobase/include/rem0rec.ic72
-rw-r--r--innobase/include/row0ins.h6
-rw-r--r--innobase/include/row0mysql.h8
-rw-r--r--innobase/include/row0row.h1
-rw-r--r--innobase/include/row0upd.h12
-rw-r--r--innobase/include/row0upd.ic5
-rw-r--r--innobase/include/srv0srv.h22
-rw-r--r--innobase/include/sync0sync.h2
-rw-r--r--innobase/include/trx0rec.h16
-rw-r--r--innobase/include/trx0rec.ic17
-rw-r--r--innobase/include/trx0sys.h70
-rw-r--r--innobase/include/trx0types.h1
-rw-r--r--innobase/include/trx0undo.h4
-rw-r--r--innobase/include/univ.i24
-rw-r--r--innobase/include/ut0dbg.h10
-rw-r--r--innobase/include/ut0ut.h3
29 files changed, 546 insertions, 65 deletions
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
index f8a3000ca8a..bea85565125 100644
--- a/innobase/include/btr0btr.h
+++ b/innobase/include/btr0btr.h
@@ -357,6 +357,44 @@ btr_get_size(
/* out: number of pages */
dict_index_t* index, /* in: index */
ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+/******************************************************************
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents! */
+
+page_t*
+btr_page_alloc(
+/*===========*/
+ /* out: new allocated page, x-latched;
+ NULL if out of space */
+ dict_tree_t* tree, /* in: index tree */
+ ulint hint_page_no, /* in: hint of a good page */
+ byte file_direction, /* in: direction where a possible
+ page split is made */
+ ulint level, /* in: level where the page is placed
+ in the tree */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Frees a file page used in an index tree. NOTE: cannot free field external
+storage pages because the page must contain info on its level. */
+
+void
+btr_page_free(
+/*==========*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page to be freed, x-latched */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Frees a file page used in an index tree. Can be used also to BLOB
+external storage pages, because the page level 0 can be given as an
+argument. */
+
+void
+btr_page_free_low(
+/*==============*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page to be freed, x-latched */
+ ulint level, /* in: page level */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Prints size info of a B-tree. */
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
index 4ce2177bfe8..ffae434a5d9 100644
--- a/innobase/include/btr0cur.h
+++ b/innobase/include/btr0cur.h
@@ -151,11 +151,14 @@ btr_cur_optimistic_insert(
ulint flags, /* in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
- btr_cur_t* cursor, /* in: cursor on page after which
- to insert; cursor stays valid */
+ btr_cur_t* cursor, /* in: cursor on page after which to insert;
+ cursor stays valid */
dtuple_t* entry, /* in: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
+ big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ be stored externally by the caller, or
+ NULL */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
@@ -169,13 +172,19 @@ btr_cur_pessimistic_insert(
/*=======================*/
/* out: DB_SUCCESS or error number */
ulint flags, /* in: undo logging and locking flags: if not
- zero, the parameters index and thr should be
- specified */
+ zero, the parameter thr should be
+ specified; if no undo logging is specified,
+ then the caller must have reserved enough
+ free extents in the file space so that the
+ insertion will certainly succeed */
btr_cur_t* cursor, /* in: cursor after which to insert;
- cursor does not stay valid */
+ cursor stays valid */
dtuple_t* entry, /* in: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
+ big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ be stored externally by the caller, or
+ NULL */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
@@ -228,8 +237,9 @@ btr_cur_pessimistic_update(
/* out: DB_SUCCESS or error code */
ulint flags, /* in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
- cursor does not stay valid */
+ btr_cur_t* cursor, /* in: cursor on the record to update */
+ big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ be stored externally by the caller, or NULL */
upd_t* update, /* in: update vector; this is allowed also
contain trx id and roll ptr fields, but
the values in update vector have no effect */
@@ -407,6 +417,92 @@ btr_estimate_number_of_different_key_vals(
/*======================================*/
/* out: estimated number of key values */
dict_index_t* index); /* in: index */
+/***********************************************************************
+Stores the fields in big_rec_vec to the tablespace and puts pointers to
+them in rec. The fields are stored on pages allocated from leaf node
+file segment of the index tree. */
+
+ulint
+btr_store_big_rec_extern_fields(
+/*============================*/
+ /* out: DB_SUCCESS or error */
+ dict_index_t* index, /* in: index of rec; the index tree
+ MUST be X-latched */
+ rec_t* rec, /* in: record */
+ big_rec_t* big_rec_vec, /* in: vector containing fields
+ to be stored externally */
+ mtr_t* local_mtr); /* in: mtr containing the latch to
+ rec and to the tree */
+/***********************************************************************
+Frees the space in an externally stored field to the file space
+management. */
+
+void
+btr_free_externally_stored_field(
+/*=============================*/
+ dict_index_t* index, /* in: index of the data, the index
+ tree MUST be X-latched */
+ byte* data, /* in: internally stored data
+ + reference to the externally
+ stored part */
+ ulint local_len, /* in: length of data */
+ mtr_t* local_mtr); /* in: mtr containing the latch to
+ data an an X-latch to the index
+ tree */
+/***************************************************************
+Frees the externally stored fields for a record. */
+
+void
+btr_rec_free_externally_stored_fields(
+/*==================================*/
+ dict_index_t* index, /* in: index of the data, the index
+ tree MUST be X-latched */
+ rec_t* rec, /* in: record */
+ mtr_t* mtr); /* in: mini-transaction handle which contains
+ an X-latch to record page and to the index
+ tree */
+/***********************************************************************
+Copies an externally stored field of a record to mem heap. */
+
+byte*
+btr_rec_copy_externally_stored_field(
+/*=================================*/
+ /* out: the field copied to heap */
+ rec_t* rec, /* in: record */
+ ulint no, /* in: field number */
+ ulint* len, /* out: length of the field */
+ mem_heap_t* heap); /* in: mem heap */
+/***********************************************************************
+Copies an externally stored field of a record to mem heap. Parameter
+data contains a pointer to 'internally' stored part of the field:
+possibly some data, and the reference to the externally stored part in
+the last 20 bytes of data. */
+
+byte*
+btr_copy_externally_stored_field(
+/*=============================*/
+ /* out: the whole field copied to heap */
+ ulint* len, /* out: length of the whole field */
+ byte* data, /* in: 'internally' stored part of the
+ field containing also the reference to
+ the external part */
+ ulint local_len,/* in: length of data */
+ mem_heap_t* heap); /* in: mem heap */
+/***********************************************************************
+Stores the positions of the fields marked as extern storage in the update
+vector, and also those fields who are marked as extern storage in rec
+and not mentioned in updated fields. We use this function to remember
+which fields we must mark as extern storage in a record inserted for an
+update. */
+
+ulint
+btr_push_update_extern_fields(
+/*==========================*/
+ /* out: number of values stored in ext_vect */
+ ulint* ext_vect, /* in: array of ulints, must be preallocated
+ to have place for all fields in rec */
+ rec_t* rec, /* in: record */
+ upd_t* update); /* in: update vector */
/*######################################################################*/
@@ -516,6 +612,19 @@ and sleep this many microseconds in between */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
#define BTR_CUR_RETRY_SLEEP_TIME 50000
+/* The reference in a field of which data is stored on a different page */
+/*--------------------------------------*/
+#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
+#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */
+#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header
+ on that page */
+#define BTR_EXTERN_LEN 12 /* 8 bytes containing the
+ length of the externally
+ stored part of the BLOB */
+/*--------------------------------------*/
+#define BTR_EXTERN_FIELD_REF_SIZE 20
+
+
extern ulint btr_cur_n_non_sea;
#ifndef UNIV_NONINL
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 7f3e20a4505..8b22561adf8 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -378,6 +378,14 @@ buf_calc_page_checksum(
/*===================*/
/* out: checksum */
byte* page); /* in: buffer page */
+/************************************************************************
+Checks if a page is corrupt. */
+
+ibool
+buf_page_is_corrupted(
+/*==================*/
+ /* out: TRUE if corrupted */
+ byte* read_buf); /* in: a database page */
/**************************************************************************
Gets the page number of a pointer pointing within a buffer frame containing
a file page. */
diff --git a/innobase/include/buf0flu.h b/innobase/include/buf0flu.h
index 9317950904f..cb1c0965a65 100644
--- a/innobase/include/buf0flu.h
+++ b/innobase/include/buf0flu.h
@@ -101,7 +101,7 @@ make sure that a read-ahead batch can be read efficiently in a single
sweep). */
#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
-#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4)
+#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
#ifndef UNIV_NONINL
#include "buf0flu.ic"
diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h
index d7f0986b0b6..f695e0989a5 100644
--- a/innobase/include/data0data.h
+++ b/innobase/include/data0data.h
@@ -14,6 +14,9 @@ Created 5/30/1994 Heikki Tuuri
#include "data0types.h"
#include "data0type.h"
#include "mem0mem.h"
+#include "dict0types.h"
+
+typedef struct big_rec_struct big_rec_t;
/* Some non-inlined functions used in the MySQL interface: */
void
@@ -312,6 +315,41 @@ dtuple_sprintf(
char* buf, /* in: print buffer */
ulint buf_len,/* in: buf length in bytes */
dtuple_t* tuple); /* in: tuple */
+/******************************************************************
+Moves parts of long fields in entry to the big record vector so that
+the size of tuple drops below the maximum record size allowed in the
+database. Moves data only from those fields which are not necessary
+to determine uniquely the insertion place of the tuple in the index. */
+
+big_rec_t*
+dtuple_convert_big_rec(
+/*===================*/
+ /* out, own: created big record vector,
+ NULL if we are not able to shorten
+ the entry enough, i.e., if there are
+ too many short fields in entry */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry); /* in: index entry */
+/******************************************************************
+Puts back to entry the data stored in vector. Note that to ensure the
+fields in entry can accommodate the data, vector must have been created
+from entry with dtuple_convert_big_rec. */
+
+void
+dtuple_convert_back_big_rec(
+/*========================*/
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: entry whose data was put to vector */
+ big_rec_t* vector);/* in, own: big rec vector; it is
+ freed in this function */
+/******************************************************************
+Frees the memory in a big rec vector. */
+
+void
+dtuple_big_rec_free(
+/*================*/
+ big_rec_t* vector); /* in, own: big rec vector; it is
+ freed in this function */
/***************************************************************
Generates a random tuple. */
@@ -396,7 +434,7 @@ dtuple_gen_search_tuple_TPC_C(
/* Structure for an SQL data field */
struct dfield_struct{
void* data; /* pointer to data */
- ulint len; /* data length; UNIV_SQL_NULL if SQL null */
+ ulint len; /* data length; UNIV_SQL_NULL if SQL null; */
dtype_t type; /* type of data */
ulint col_no; /* when building index entries, the column
number can be stored here */
@@ -423,6 +461,24 @@ struct dtuple_struct {
};
#define DATA_TUPLE_MAGIC_N 65478679
+/* A slot for a field in a big rec vector */
+
+typedef struct big_rec_field_struct big_rec_field_t;
+struct big_rec_field_struct {
+ ulint field_no; /* field number in record */
+ ulint len; /* stored data len */
+ byte* data; /* stored data */
+};
+
+/* Storage format for overflow data in a big record, that is, a record
+which needs external storage of data fields */
+
+struct big_rec_struct {
+ mem_heap_t* heap; /* memory heap from which allocated */
+ ulint n_fields; /* number of stored fields */
+ big_rec_field_t* fields; /* stored fields */
+};
+
#ifndef UNIV_NONINL
#include "data0data.ic"
#endif
diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic
index 27b5552d338..b886ad6c69c 100644
--- a/innobase/include/data0data.ic
+++ b/innobase/include/data0data.ic
@@ -307,12 +307,13 @@ dtuple_create(
/**************************************************************
The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted. */
+occupied by the field structs or the tuple struct is not counted. Neither
+is possible space in externally stored parts of the field. */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
- /* out: sum of data lens */
+ /* out: sum of data lengths */
dtuple_t* tuple) /* in: typed data tuple */
{
dfield_t* field;
@@ -382,7 +383,7 @@ dtuple_datas_are_equal(
field2 = dtuple_get_nth_field(tuple2, i);
data2 = (byte*) dfield_get_data(field2);
- len2 = dfield_get_len(field2);
+ len2 = dfield_get_len(field2);
if (len1 != len2) {
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
index be9cd42b7be..74ecbc8bba2 100644
--- a/innobase/include/dict0mem.h
+++ b/innobase/include/dict0mem.h
@@ -143,7 +143,7 @@ struct dict_col_struct{
ulint clust_pos;/* position of the column in the
clustered index */
ulint ord_part;/* count of how many times this column
- appears in an ordering fields of an index */
+ appears in ordering fields of an index */
char* name; /* name */
dtype_t type; /* data type */
dict_table_t* table; /* back pointer to table of this column */
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
index 9905b5a2c3c..bfc322270fc 100644
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@@ -196,6 +196,16 @@ fil_space_get_size(
/* out: space size */
ulint id); /* in: space id */
/***********************************************************************
+Checks if the pair space, page_no refers to an existing page in a
+tablespace file space. */
+
+ibool
+fil_check_adress_in_tablespace(
+/*===========================*/
+ /* out: TRUE if the address is meaningful */
+ ulint id, /* in: space id */
+ ulint page_no);/* in: page number */
+/***********************************************************************
Appends a new file to the chain of files of a space.
File must be closed. */
diff --git a/innobase/include/fsp0fsp.h b/innobase/include/fsp0fsp.h
index f1be4de4d40..e7f9eab330b 100644
--- a/innobase/include/fsp0fsp.h
+++ b/innobase/include/fsp0fsp.h
@@ -70,7 +70,7 @@ page_t*
fseg_create(
/*========*/
/* out: the page where the segment header is placed,
- x-latched, FIL_NULL if could not create segment
+ x-latched, NULL if could not create segment
because of lack of space */
ulint space, /* in: space id */
ulint page, /* in: page where the segment header is placed: if
diff --git a/innobase/include/mach0data.ic b/innobase/include/mach0data.ic
index 176f3415281..1d6badd035b 100644
--- a/innobase/include/mach0data.ic
+++ b/innobase/include/mach0data.ic
@@ -115,7 +115,7 @@ mach_write_to_4(
{
ut_ad(b);
-#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
+#if (0 == 1) && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
/* We do not use this even on Intel, because unaligned accesses may
be slow */
@@ -143,7 +143,7 @@ mach_read_from_4(
/* out: ulint integer */
byte* b) /* in: pointer to four bytes */
{
-#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
+#if (0 == 1) && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
/* We do not use this even on Intel, because unaligned accesses may
be slow */
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
index c093cb92ca9..75bbbba549f 100644
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@@ -59,6 +59,10 @@ log. */
#define OS_FILE_AIO 61
#define OS_FILE_NORMAL 62
+/* Types for file create */
+#define OS_DATA_FILE 100
+#define OS_LOG_FILE 101
+
/* Error codes from os_file_get_last_error */
#define OS_FILE_NOT_FOUND 71
#define OS_FILE_DISK_FULL 72
@@ -125,6 +129,7 @@ os_file_create(
if a new file is created or an old overwritten */
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
is desired, OS_FILE_NORMAL, if any normal file */
+ ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success);/* out: TRUE if succeed, FALSE if error */
/***************************************************************************
Closes a file handle. In case of error, error number can be retrieved with
@@ -263,6 +268,13 @@ os_aio(
operation); if mode is OS_AIO_SYNC, these
are ignored */
void* message2);
+/****************************************************************************
+Waits until there are no pending writes in os_aio_write_array. There can
+be other, synchronous, pending writes. */
+
+void
+os_aio_wait_until_no_pending_writes(void);
+/*=====================================*/
/**************************************************************************
Wakes up simulated aio i/o-handler threads if they have something to do. */
@@ -298,7 +310,8 @@ os_aio_windows_handle(
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
- void** message2);
+ void** message2,
+ ulint* type); /* out: OS_FILE_WRITE or ..._READ */
#endif
#ifdef POSIX_ASYNC_IO
/**************************************************************************
@@ -335,7 +348,8 @@ os_aio_simulated_handle(
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
- void** message2);
+ void** message2,
+ ulint* type); /* out: OS_FILE_WRITE or ..._READ */
/**************************************************************************
Validates the consistency of the aio system. */
diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h
index 77b9ef9edc8..10c428cb9ca 100644
--- a/innobase/include/rem0cmp.h
+++ b/innobase/include/rem0cmp.h
@@ -1,7 +1,7 @@
/***********************************************************************
Comparison services for records
-(c) 1994-1996 Innobase Oy
+(c) 1994-2001 Innobase Oy
Created 7/1/1994 Heikki Tuuri
************************************************************************/
@@ -31,14 +31,18 @@ This function is used to compare a data tuple to a physical record.
Only dtuple->n_fields_cmp first fields are taken into account for
the the data tuple! If we denote by n = n_fields_cmp, then rec must
have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. */
+the m fields rec has. If rec has an externally stored field we do not
+compare it but return with value 0 if such a comparison should be
+made. */
int
cmp_dtuple_rec_with_match(
/*======================*/
/* out: 1, 0, -1, if dtuple is greater, equal,
less than rec, respectively, when only the
- common first fields are compared */
+ common first fields are compared, or
+ until the first externally stored field in
+ rec */
dtuple_t* dtuple, /* in: data tuple */
rec_t* rec, /* in: physical record which differs from
dtuple in some of the common fields, or which
@@ -89,7 +93,8 @@ cmp_dtuple_rec_prefix_equal(
fields in dtuple */
/*****************************************************************
This function is used to compare two physical records. Only the common
-first fields are compared. */
+first fields are compared, and if an externally stored field is
+encountered, then 0 is returned. */
int
cmp_rec_rec_with_match(
diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h
index 62c0aa14519..12e3a8b39d6 100644
--- a/innobase/include/rem0rec.h
+++ b/innobase/include/rem0rec.h
@@ -12,6 +12,7 @@ Created 5/30/1994 Heikki Tuuri
#include "univ.i"
#include "data0data.h"
#include "rem0types.h"
+#include "mtr0types.h"
/* Maximum values for various fields (for non-blob tuples) */
#define REC_MAX_N_FIELDS (1024 - 1)
@@ -162,6 +163,49 @@ rec_get_nth_field_size(
/* out: field size in bytes */
rec_t* rec, /* in: record */
ulint n); /* in: index of the field */
+/***************************************************************
+Gets the value of the ith field extern storage bit. If it is TRUE
+it means that the field is stored on another page. */
+UNIV_INLINE
+ibool
+rec_get_nth_field_extern_bit(
+/*=========================*/
+ /* in: TRUE or FALSE */
+ rec_t* rec, /* in: record */
+ ulint i); /* in: ith field */
+/**********************************************************
+Returns TRUE if the extern bit is set in any of the fields
+of rec. */
+UNIV_INLINE
+ibool
+rec_contains_externally_stored_field(
+/*=================================*/
+ /* out: TRUE if a field is stored externally */
+ rec_t* rec); /* in: record */
+/***************************************************************
+Sets the value of the ith field extern storage bit. */
+
+void
+rec_set_nth_field_extern_bit(
+/*=========================*/
+ rec_t* rec, /* in: record */
+ ulint i, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page where
+ rec is, or NULL; in the NULL case we do not
+ write to log about the change */
+/***************************************************************
+Sets TRUE the extern storage bits of fields mentioned in an array. */
+
+void
+rec_set_field_extern_bits(
+/*======================*/
+ rec_t* rec, /* in: record */
+ ulint* vec, /* in: array of field numbers */
+ ulint n_fields, /* in: number of fields numbers */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case we
+ do not write to log about the change */
/****************************************************************
The following function is used to get a copy of the nth
data field in the record to a buffer. */
@@ -350,6 +394,15 @@ rec_sprintf(
#define REC_INFO_BITS 6 /* This is single byte bit-field */
+/* Maximum lengths for the data in a physical record if the offsets
+are given in one byte (resp. two byte) format. */
+#define REC_1BYTE_OFFS_LIMIT 0x7F
+#define REC_2BYTE_OFFS_LIMIT 0x7FFF
+
+/* The data size of record must be smaller than this because we reserve
+two upmost bits in a two byte offset for special purposes */
+#define REC_MAX_DATA_SIZE (16 * 1024)
+
#ifndef UNIV_NONINL
#include "rem0rec.ic"
#endif
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
index c63b25374dd..1e9ecb47e2e 100644
--- a/innobase/include/rem0rec.ic
+++ b/innobase/include/rem0rec.ic
@@ -25,12 +25,6 @@ significant bytes and bits are written below less significant.
4 bits info bits
*/
-
-/* Maximum lengths for the data in a physical record if the offsets
-are given as one byte (resp. two byte) format. */
-#define REC_1BYTE_OFFS_LIMIT 0x7F
-#define REC_2BYTE_OFFS_LIMIT 0x7FFF
-
/* We list the byte offsets from the origin of the record, the mask,
and the shift needed to obtain each bit-field of the record. */
@@ -66,6 +60,11 @@ one-byte and two-byte offsets */
#define REC_1BYTE_SQL_NULL_MASK 0x80
#define REC_2BYTE_SQL_NULL_MASK 0x8000
+/* In a 2-byte offset the second most significant bit denotes
+a field stored to another page: */
+
+#define REC_2BYTE_EXTERN_MASK 0x4000
+
/***************************************************************
Sets the value of the ith field SQL null bit. */
@@ -489,7 +488,7 @@ ulint
rec_2_get_field_end_info(
/*=====================*/
/* out: offset of the start of the field, SQL null
- flag ORed */
+ flag and extern storage flag ORed */
rec_t* rec, /* in: record */
ulint n) /* in: field index */
{
@@ -499,6 +498,63 @@ rec_2_get_field_end_info(
return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2)));
}
+/***************************************************************
+Gets the value of the ith field extern storage bit. If it is TRUE
+it means that the field is stored on another page. */
+UNIV_INLINE
+ibool
+rec_get_nth_field_extern_bit(
+/*=========================*/
+ /* in: TRUE or FALSE */
+ rec_t* rec, /* in: record */
+ ulint i) /* in: ith field */
+{
+ ulint info;
+
+ if (rec_get_1byte_offs_flag(rec)) {
+
+ return(FALSE);
+ }
+
+ info = rec_2_get_field_end_info(rec, i);
+
+ if (info & REC_2BYTE_EXTERN_MASK) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************
+Returns TRUE if the extern bit is set in any of the fields
+of rec. */
+UNIV_INLINE
+ibool
+rec_contains_externally_stored_field(
+/*=================================*/
+ /* out: TRUE if a field is stored externally */
+ rec_t* rec) /* in: record */
+{
+ ulint n;
+ ulint i;
+
+ if (rec_get_1byte_offs_flag(rec)) {
+
+ return(FALSE);
+ }
+
+ n = rec_get_n_fields(rec);
+
+ for (i = 0; i < n; i++) {
+ if (rec_get_nth_field_extern_bit(rec, i)) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
/**********************************************************
Returns the offset of n - 1th field end if the record is stored in the 1-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
@@ -616,7 +672,7 @@ rec_2_get_field_start_offs(
}
return(rec_2_get_prev_field_end_info(rec, n)
- & ~REC_2BYTE_SQL_NULL_MASK);
+ & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
}
/**********************************************************
diff --git a/innobase/include/row0ins.h b/innobase/include/row0ins.h
index 94b0e8dec37..612b9e8d73a 100644
--- a/innobase/include/row0ins.h
+++ b/innobase/include/row0ins.h
@@ -56,6 +56,9 @@ row_ins_index_entry_low(
pessimistic descent down the index tree */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to insert */
+ ulint* ext_vec,/* in: array containing field numbers of
+ externally stored fields in entry, or NULL */
+ ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr); /* in: query thread */
/*******************************************************************
Inserts an index entry to index. Tries first optimistic, then pessimistic
@@ -70,6 +73,9 @@ row_ins_index_entry(
DB_DUPLICATE_KEY, or some other error code */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to insert */
+ ulint* ext_vec,/* in: array containing field numbers of
+ externally stored fields in entry, or NULL */
+ ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr); /* in: query thread */
/***************************************************************
Inserts a row to a table. */
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 554da2c035c..31f9e15cddc 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -189,7 +189,9 @@ row_update_for_mysql(
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
handle */
/*************************************************************************
-Does a table creation operation for MySQL. */
+Does a table creation operation for MySQL. If the name of the created
+table ends to characters INNODB_MONITOR, then this also starts
+printing of monitor output by the master thread. */
int
row_create_table_for_mysql(
@@ -209,7 +211,9 @@ row_create_index_for_mysql(
dict_index_t* index, /* in: index defintion */
trx_t* trx); /* in: transaction handle */
/*************************************************************************
-Drops a table for MySQL. */
+Drops a table for MySQL. If the name of the dropped table ends to
+characters INNODB_MONITOR, then this also stops printing of monitor
+output by the master thread. */
int
row_drop_table_for_mysql(
diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h
index fb1e1b01ee3..09a79e19fd7 100644
--- a/innobase/include/row0row.h
+++ b/innobase/include/row0row.h
@@ -250,6 +250,7 @@ row_search_index_entry(
#define ROW_COPY_DATA 1
#define ROW_COPY_POINTERS 2
+#define ROW_COPY_ALSO_EXTERNALS 3
/* The allowed latching order of index records is the following:
(1) a secondary index record ->
diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h
index 3046345f446..9bb73726b29 100644
--- a/innobase/include/row0upd.h
+++ b/innobase/include/row0upd.h
@@ -147,6 +147,9 @@ row_upd_build_difference(
fields, excluding roll ptr and trx id */
dict_index_t* index, /* in: clustered index */
dtuple_t* entry, /* in: entry to insert */
+ ulint* ext_vec,/* in: array containing field numbers of
+ externally stored fields in entry, or NULL */
+ ulint n_ext_vec,/* in: number of fields in ext_vec */
rec_t* rec, /* in: clustered index record */
mem_heap_t* heap); /* in: memory heap from which allocated */
/***************************************************************
@@ -262,6 +265,9 @@ struct upd_field_struct{
constants in the symbol table of the
query graph */
dfield_t new_val; /* new value for the column */
+ ibool extern_storage; /* this is set to TRUE if dfield
+ actually contains a reference to
+ an externally stored field */
};
/* Update vector structure */
@@ -318,6 +324,10 @@ struct upd_node_struct{
dtuple_t* row; /* NULL, or a copy (also fields copied to
heap) of the row to update; this must be reset
to NULL after a successful update */
+ ulint* ext_vec;/* array describing which fields are stored
+ externally in the clustered index record of
+ row */
+ ulint n_ext_vec;/* number of fields in ext_vec */
mem_heap_t* heap; /* memory heap used as auxiliary storage for
row; this must be emptied after a successful
update if node->row != NULL */
@@ -349,7 +359,7 @@ struct upd_node_struct{
looked at and updated if an ordering
field changed */
-/* Compilation info flags: these must fit within one byte */
+/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be
changed in the update and no ordering
field of the clustered index */
diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic
index b1b10bef0e8..b785e52caa0 100644
--- a/innobase/include/row0upd.ic
+++ b/innobase/include/row0upd.ic
@@ -23,6 +23,7 @@ upd_create(
mem_heap_t* heap) /* in: heap from which memory allocated */
{
upd_t* update;
+ ulint i;
update = mem_heap_alloc(heap, sizeof(upd_t));
@@ -30,6 +31,10 @@ upd_create(
update->n_fields = n;
update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
+ for (i = 0; i < n; i++) {
+ update->fields[i].extern_storage = 0;
+ }
+
return(update);
}
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index f80abda19c6..e635964e5ec 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -27,6 +27,9 @@ extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
extern ulint* srv_data_file_is_raw_partition;
+#define SRV_NEW_RAW 1
+#define SRV_OLD_RAW 2
+
extern char** srv_log_group_home_dirs;
extern ulint srv_n_log_groups;
@@ -52,10 +55,14 @@ extern ulint srv_lock_wait_timeout;
extern char* srv_unix_file_flush_method_str;
extern ulint srv_unix_file_flush_method;
+extern ibool srv_use_doublewrite_buf;
+
extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority;
/*-------------------------------------------*/
+
+extern ibool srv_print_innodb_monitor;
extern ulint srv_n_spin_wait_rounds;
extern ulint srv_spin_wait_delay;
extern ibool srv_priority_boost;
@@ -104,26 +111,13 @@ typedef struct srv_sys_struct srv_sys_t;
/* The server system */
extern srv_sys_t* srv_sys;
-/* Alternatives for file flush option in Unix; see the InnoDB manual about
+/* Alternatives for fiel flush option in Unix; see the InnoDB manual about
what these mean */
#define SRV_UNIX_FDATASYNC 1
#define SRV_UNIX_O_DSYNC 2
#define SRV_UNIX_LITTLESYNC 3
#define SRV_UNIX_NOSYNC 4
-/* Raw partition flags */
-#define SRV_OLD_RAW 1
-#define SRV_NEW_RAW 2
-
-void
-srv_mysql_thread_release(void);
-/*==========================*/
-os_event_t
-srv_mysql_thread_event_get(void);
-void
-srv_mysql_thread_slot_free(
-/*==========================*/
- os_event_t event);
/*************************************************************************
Boots Innobase server. */
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index 4b12dd3c86d..cb86b2b815c 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -393,6 +393,7 @@ Memory pool mutex */
#define SYNC_RSEG_HEADER_NEW 591
#define SYNC_RSEG_HEADER 590
#define SYNC_TRX_UNDO_PAGE 570
+#define SYNC_EXTERN_STORAGE 500
#define SYNC_FSP 400
#define SYNC_FSP_PAGE 395
/*------------------------------------- Insert buffer headers */
@@ -415,6 +416,7 @@ Memory pool mutex */
the level is SYNC_MEM_HASH. */
#define SYNC_BUF_POOL 150
#define SYNC_BUF_BLOCK 149
+#define SYNC_DOUBLEWRITE 140
#define SYNC_ANY_LATCH 135
#define SYNC_MEM_HASH 131
#define SYNC_MEM_POOL 130
diff --git a/innobase/include/trx0rec.h b/innobase/include/trx0rec.h
index ea9e9f3fce5..edfc283d1b2 100644
--- a/innobase/include/trx0rec.h
+++ b/innobase/include/trx0rec.h
@@ -45,6 +45,14 @@ trx_undo_rec_get_cmpl_info(
/* out: compiler info */
trx_undo_rec_t* undo_rec); /* in: undo log record */
/**************************************************************************
+Returns TRUE if an undo log record contains an extern storage field. */
+UNIV_INLINE
+ibool
+trx_undo_rec_get_extern_storage(
+/*============================*/
+ /* out: TRUE if extern */
+ trx_undo_rec_t* undo_rec); /* in: undo log record */
+/**************************************************************************
Reads the undo log record number. */
UNIV_INLINE
dulint
@@ -65,6 +73,8 @@ trx_undo_rec_get_pars(
TRX_UNDO_INSERT_REC, ... */
ulint* cmpl_info, /* out: compiler info, relevant only
for update type records */
+ ibool* updated_extern, /* out: TRUE if we updated an
+ externally stored fild */
dulint* undo_no, /* out: undo log record number */
dulint* table_id); /* out: table id */
/***********************************************************************
@@ -272,7 +282,11 @@ record */
do not change */
#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
this and ORed to the type above */
-
+#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
+ to denote that we updated external
+ storage fields: used by purge to
+ free the external storage */
+
/* Operation type flags used in trx_undo_report_row_operation */
#define TRX_UNDO_INSERT_OP 1
#define TRX_UNDO_MODIFY_OP 2
diff --git a/innobase/include/trx0rec.ic b/innobase/include/trx0rec.ic
index f813a52ff9c..cd02ed9e04c 100644
--- a/innobase/include/trx0rec.ic
+++ b/innobase/include/trx0rec.ic
@@ -31,6 +31,23 @@ trx_undo_rec_get_cmpl_info(
}
/**************************************************************************
+Returns TRUE if an undo log record contains an extern storage field. */
+UNIV_INLINE
+ibool
+trx_undo_rec_get_extern_storage(
+/*============================*/
+ /* out: TRUE if extern */
+ trx_undo_rec_t* undo_rec) /* in: undo log record */
+{
+ if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**************************************************************************
Reads the undo log record number. */
UNIV_INLINE
dulint
diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h
index d0506dd65b7..e26f7e19850 100644
--- a/innobase/include/trx0sys.h
+++ b/innobase/include/trx0sys.h
@@ -27,6 +27,23 @@ Created 3/26/1996 Heikki Tuuri
/* The transaction system */
extern trx_sys_t* trx_sys;
+/* Doublewrite system */
+extern trx_doublewrite_t* trx_doublewrite;
+
+/********************************************************************
+Creates the doublewrite buffer at a database start. The header of the
+doublewrite buffer is placed on the trx system header page. */
+
+void
+trx_sys_create_doublewrite_buf(void);
+/*================================*/
+/********************************************************************
+At a database startup uses a possible doublewrite buffer to restore
+half-written pages in the data files. */
+
+void
+trx_sys_doublewrite_restore_corrupt_pages(void);
+/*===========================================*/
/*******************************************************************
Checks if a page address is the trx sys header page. */
UNIV_INLINE
@@ -235,6 +252,59 @@ therefore 256 */
segment specification slots */
/*-------------------------------------------------------------*/
+/* The offset of the doublewrite buffer header on the trx system header page */
+#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
+/*-------------------------------------------------------------*/
+#define TRX_SYS_DOUBLEWRITE_FSEG 0 /* fseg header of the fseg
+ containing the doublewrite
+ buffer */
+#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE
+ /* 4-byte magic number which
+ shows if we already have
+ created the doublewrite
+ buffer */
+#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE)
+ /* page number of the
+ first page in the first
+ sequence of 64
+ (= FSP_EXTENT_SIZE) consecutive
+ pages in the doublewrite
+ buffer */
+#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE)
+ /* page number of the
+ first page in the second
+ sequence of 64 consecutive
+ pages in the doublewrite
+ buffer */
+#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /* we repeat the above 3
+ numbers so that if the trx
+ sys header is half-written
+ to disk, we still may be able
+ to recover the information */
+/*-------------------------------------------------------------*/
+#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855
+
+#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
+
+/* Doublewrite control struct */
+struct trx_doublewrite_struct{
+ mutex_t mutex; /* mutex protecting the first_free field and
+ write_buf */
+ ulint block1; /* the page number of the first
+ doublewrite block (64 pages) */
+ ulint block2; /* page number of the second block */
+ ulint first_free; /* first free position in write_buf measured
+ in units of UNIV_PAGE_SIZE */
+ byte* write_buf; /* write buffer used in writing to the
+ doublewrite buffer, aligned to an
+ address divisible by UNIV_PAGE_SIZE
+ (which is required by Windows aio) */
+ byte* write_buf_unaligned; /* pointer to write_buf, but unaligned */
+ buf_block_t**
+ buf_block_arr; /* array to store pointers to the buffer
+ blocks which have been cached to write_buf */
+};
+
/* The transaction system central memory data structure; protected by the
kernel mutex */
struct trx_sys_struct{
diff --git a/innobase/include/trx0types.h b/innobase/include/trx0types.h
index 02da1605077..b8befe7172f 100644
--- a/innobase/include/trx0types.h
+++ b/innobase/include/trx0types.h
@@ -15,6 +15,7 @@ Created 3/26/1996 Heikki Tuuri
/* Memory objects */
typedef struct trx_struct trx_t;
typedef struct trx_sys_struct trx_sys_t;
+typedef struct trx_doublewrite_struct trx_doublewrite_t;
typedef struct trx_sig_struct trx_sig_t;
typedef struct trx_rseg_struct trx_rseg_t;
typedef struct trx_undo_struct trx_undo_t;
diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h
index 82c21f756e6..7f0378c68d3 100644
--- a/innobase/include/trx0undo.h
+++ b/innobase/include/trx0undo.h
@@ -341,7 +341,9 @@ struct trx_undo_struct{
have delete marked records, because of
a delete of a row or an update of an
indexed field; purge is then
- necessary. */
+ necessary; also TRUE if the transaction
+ has updated an externally stored
+ field */
dulint trx_id; /* id of the trx assigned to the undo
log */
ibool dict_operation; /* TRUE if a dict operation trx */
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
index 73bf48b1bc0..6ffbb1b8fef 100644
--- a/innobase/include/univ.i
+++ b/innobase/include/univ.i
@@ -9,11 +9,12 @@ Created 1/20/1994 Heikki Tuuri
#ifndef univ_i
#define univ_i
-#undef UNIV_INTEL_X86
-
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(MYSQL_SERVER)
+#if (defined(_WIN32) || defined(_WIN64))
#define __WIN__
+
+#ifndef MYSQL_SERVER
#include <windows.h>
+#endif
/* If you want to check for errors with compiler level -W4,
comment out the above include of windows.h and let the following defines
@@ -40,10 +41,8 @@ subdirectory of 'mysql'. */
#include <global.h>
#include <my_pthread.h>
-#ifndef __WIN__
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
#include <sys/stat.h>
-#endif
#undef PACKAGE
#undef VERSION
@@ -63,19 +62,21 @@ subdirectory of 'mysql'. */
/* DEBUG VERSION CONTROL
===================== */
+
+/*
+#define UNIV_SYNC_DEBUG
+*/
+
/* Make a non-inline debug version */
/*
#define UNIV_DEBUG
#define UNIV_MEM_DEBUG
-#define UNIV_SYNC_DEBUG
#define UNIV_SEARCH_DEBUG
#define UNIV_IBUF_DEBUG
#define UNIV_SYNC_PERF_STAT
#define UNIV_SEARCH_PERF_STAT
-
-#define UNIV_DEBUG_FILE_ACCESSES
*/
#define UNIV_LIGHT_MEM_DEBUG
@@ -192,6 +193,13 @@ headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
has the SQL NULL as its value. */
#define UNIV_SQL_NULL ULINT_UNDEFINED
+/* Lengths which are not UNIV_SQL_NULL, but bigger than the following
+number indicate that a field contains a reference to an externally
+stored part of the field in the tablespace. The length field then
+contains the sum of the following flag and the locally stored len. */
+
+#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE)
+
/* The following definition of __FILE__ removes compiler warnings
associated with const char* / char* mismatches with __FILE__ */
diff --git a/innobase/include/ut0dbg.h b/innobase/include/ut0dbg.h
index 657d1bf95b2..fc5d493ca5e 100644
--- a/innobase/include/ut0dbg.h
+++ b/innobase/include/ut0dbg.h
@@ -41,7 +41,7 @@ extern ulint* ut_dbg_null_ptr;
}\
if (ut_dbg_stop_threads) {\
fprintf(stderr,\
- "Innobase: Thread %lu stopped in file %s line %lu\n",\
+ "InnoDB: Thread %lu stopped in file %s line %lu\n",\
os_thread_get_curr_id(), IB__FILE__, (ulint)__LINE__);\
os_thread_sleep(1000000000);\
}\
@@ -50,19 +50,17 @@ extern ulint* ut_dbg_null_ptr;
#define ut_error {\
ulint dbg_i;\
fprintf(stderr,\
- "Innobase: Assertion failure in thread %lu in file %s line %lu\n",\
+ "InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\
os_thread_get_curr_id(), IB__FILE__, (ulint)__LINE__);\
fprintf(stderr,\
- "Innobase: we intentionally generate a memory trap.\n");\
+ "InnoDB: We intentionally generate a memory trap.\n");\
fprintf(stderr,\
- "Innobase: Send a bug report to mysql@lists.mysql.com\n");\
+ "InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\
ut_dbg_stop_threads = TRUE;\
dbg_i = *(ut_dbg_null_ptr);\
printf("%lu", dbg_i);\
}
-
-
#ifdef UNIV_DEBUG
#define ut_ad(EXPR) ut_a(EXPR)
#define ut_d(EXPR) {EXPR;}
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
index 1e93a2b8a36..e1813e763bd 100644
--- a/innobase/include/ut0ut.h
+++ b/innobase/include/ut0ut.h
@@ -11,8 +11,7 @@ Created 1/20/1994 Heikki Tuuri
#include "univ.i"
#include <time.h>
-#include <m_ctype.h>
-
+#include <ctype.h>
typedef time_t ib_time_t;