diff options
Diffstat (limited to 'innobase/include')
28 files changed, 382 insertions, 77 deletions
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index 1d17c0e952d..506877333c3 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -690,7 +690,13 @@ and sleep this many microseconds in between */ #define BTR_CUR_RETRY_DELETE_N_TIMES 100 #define BTR_CUR_RETRY_SLEEP_TIME 50000 -/* The reference in a field of which data is stored on a different page */ +/* The reference in a field for which data is stored on a different page. +The reference is at the end of the 'locally' stored part of the field. +'Locally' means storage in the index record. +We store locally a long enough prefix of each column so that we can determine +the ordering parts of each index record without looking into the externally +stored part. */ + /*--------------------------------------*/ #define BTR_EXTERN_SPACE_ID 0 /* space id where stored */ #define BTR_EXTERN_PAGE_NO 4 /* page no where stored */ diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index b613d60ebf7..d2ee1a440c7 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -388,11 +388,24 @@ to a file. Note that we must be careful to calculate the same value on 32-bit and 64-bit architectures. */ ulint -buf_calc_page_checksum( -/*===================*/ +buf_calc_page_new_checksum( +/*=======================*/ /* out: checksum */ byte* page); /* in: buffer page */ /************************************************************************ +In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only +looked at the first few bytes of the page. This calculates that old +checksum. +NOTE: we must first store the new formula checksum to +FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum +because this takes that field as an input! */ + +ulint +buf_calc_page_old_checksum( +/*=======================*/ + /* out: checksum */ + byte* page); /* in: buffer page */ +/************************************************************************ Checks if a page is corrupt. */ ibool diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h index e0fb06e5018..889d148d3fe 100644 --- a/innobase/include/data0data.h +++ b/innobase/include/data0data.h @@ -453,8 +453,6 @@ struct dfield_struct{ void* data; /* pointer to data */ ulint len; /* data length; UNIV_SQL_NULL if SQL null; */ dtype_t type; /* type of data */ - ulint col_no; /* when building index entries, the column - number can be stored here */ }; struct dtuple_struct { diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h index b53a70a8909..4da686bf2e1 100644 --- a/innobase/include/data0type.h +++ b/innobase/include/data0type.h @@ -18,14 +18,16 @@ typedef struct dtype_struct dtype_t; data type */ extern dtype_t* dtype_binary; -/* Data main types of SQL data; NOTE! character data types requiring -collation transformation must have the smallest codes! All codes must be -less than 256! */ +/* Data main types of SQL data */ #define DATA_VARCHAR 1 /* character varying */ #define DATA_CHAR 2 /* fixed length character */ #define DATA_FIXBINARY 3 /* binary string of fixed length */ #define DATA_BINARY 4 /* binary string */ -#define DATA_BLOB 5 /* binary large object */ +#define DATA_BLOB 5 /* binary large object, or a TEXT type; if + prtype & DATA_NONLATIN1 != 0 the data must + be compared by MySQL as a whole field; if + prtype & DATA_BINARY_TYPE == 0, then this is + actually a TEXT column */ #define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */ #define DATA_SYS_CHILD 7 /* address of the child page in node pointer */ #define DATA_SYS 8 /* system column */ @@ -34,35 +36,55 @@ binary strings */ #define DATA_FLOAT 9 #define DATA_DOUBLE 10 #define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */ -#define DATA_VARMYSQL 12 /* data types for which comparisons must be */ -#define DATA_MYSQL 13 /* made by MySQL */ -#define DATA_ERROR 111 /* error value */ -#define DATA_MTYPE_MAX 255 +#define DATA_VARMYSQL 12 /* non-latin1 varying length char */ +#define DATA_MYSQL 13 /* non-latin1 fixed length char */ +#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size() + requires the values are <= 63 */ /*-------------------------------------------*/ -/* Precise data types for system columns; NOTE: the values must run -from 0 up in the order given! All codes must be less than 256! */ +/* In the lowest byte in the precise type we store the MySQL type code +(not applicable for system columns). */ + +#define DATA_ENGLISH 4 /* English language character string: this + is a relic from pre-MySQL time and only used + for InnoDB's own system tables */ +#define DATA_ERROR 111 /* another relic from pre-MySQL time */ + +#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL + type from the precise type */ + +/* Precise data types for system columns and the length of those columns; +NOTE: the values must run from 0 up in the order given! All codes must +be less than 256 */ #define DATA_ROW_ID 0 /* row id: a dulint */ #define DATA_ROW_ID_LEN 6 /* stored length for row id */ + #define DATA_TRX_ID 1 /* transaction id: 6 bytes */ #define DATA_TRX_ID_LEN 6 + #define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */ #define DATA_ROLL_PTR_LEN 7 + #define DATA_MIX_ID 3 /* mixed index label: a dulint, stored in a row in a compressed form */ #define DATA_MIX_ID_LEN 9 /* maximum stored length for mix id (in a compressed dulint form) */ #define DATA_N_SYS_COLS 4 /* number of system columns defined above */ +/*-------------------------------------------*/ +/* Flags ORed to the precise data type */ #define DATA_NOT_NULL 256 /* this is ORed to the precise type when the column is declared as NOT NULL */ #define DATA_UNSIGNED 512 /* this id ORed to the precise type when we have an unsigned integer type */ +#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character + string, this is ORed to the precise type: + this only holds for tables created with + >= MySQL-4.0.14 */ +#define DATA_NONLATIN1 2048 /* if the data type is a DATA_BLOB (actually + TEXT) of a non-latin1 type, this is ORed to + the precise type: this only holds for tables + created with >= MySQL-4.0.14 */ /*-------------------------------------------*/ -/* Precise types of a char or varchar data. All codes must be less than 256! */ -#define DATA_ENGLISH 4 /* English language character string */ -#define DATA_FINNISH 5 /* Finnish */ -#define DATA_PRTYPE_MAX 255 - /* This many bytes we need to store the type information affecting the alphabetical order for a single field and decide the storage size of an SQL null*/ @@ -123,7 +145,7 @@ dtype_get_pad_char( /*===============*/ /* out: padding character code, or ULINT_UNDEFINED if no padding specified */ - dtype_t* type); /* in: typeumn */ + dtype_t* type); /* in: type */ /*************************************************************************** Returns the size of a fixed size data type, 0 if not a fixed size type. */ UNIV_INLINE @@ -150,24 +172,24 @@ dtype_is_fixed_size( /* out: TRUE if fixed size */ dtype_t* type); /* in: type */ /************************************************************************** -Stores to a type the information which determines its alphabetical -ordering. */ +Stores for a type the information which determines its alphabetical ordering +and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_store_for_order_and_null_size( /*================================*/ byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE - bytes */ + bytes where we store the info */ dtype_t* type); /* in: type struct */ /************************************************************************** -Reads of a type the stored information which determines its alphabetical -ordering. */ +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_read_for_order_and_null_size( /*===============================*/ dtype_t* type, /* in: type struct */ - byte* buf); /* in: buffer for type order info */ + byte* buf); /* in: buffer for the stored order info */ /************************************************************************* Validates a data type structure. */ diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic index d82d976d076..ddd0b0ae8cc 100644 --- a/innobase/include/data0type.ic +++ b/innobase/include/data0type.ic @@ -110,7 +110,9 @@ dtype_get_pad_char( if (type->mtype == DATA_CHAR || type->mtype == DATA_VARCHAR || type->mtype == DATA_BINARY - || type->mtype == DATA_FIXBINARY) { + || type->mtype == DATA_FIXBINARY + || type->mtype == DATA_MYSQL + || type->mtype == DATA_VARMYSQL) { /* Space is the padding character for all char and binary strings */ @@ -124,39 +126,56 @@ dtype_get_pad_char( } /************************************************************************** -Stores to a type the information which determines its alphabetical -ordering. */ +Stores for a type the information which determines its alphabetical ordering +and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_store_for_order_and_null_size( /*================================*/ byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE - bytes */ + bytes where we store the info */ dtype_t* type) /* in: type struct */ { ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE); buf[0] = (byte)(type->mtype & 0xFF); + + if (type->prtype & DATA_BINARY_TYPE) { + buf[0] = buf[0] | 128; + } + + if (type->prtype & DATA_NONLATIN1) { + buf[0] = buf[0] | 64; + } + buf[1] = (byte)(type->prtype & 0xFF); mach_write_to_2(buf + 2, type->len & 0xFFFF); } /************************************************************************** -Reads of a type the stored information which determines its alphabetical -ordering. */ +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_read_for_order_and_null_size( /*===============================*/ dtype_t* type, /* in: type struct */ - byte* buf) /* in: buffer for type order info */ + byte* buf) /* in: buffer for stored type order info */ { ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE); - type->mtype = buf[0]; + type->mtype = buf[0] & 63; type->prtype = buf[1]; + if (buf[0] & 128) { + type->prtype = type->prtype | DATA_BINARY_TYPE; + } + + if (buf[0] & 64) { + type->prtype = type->prtype | DATA_NONLATIN1; + } + type->len = mach_read_from_2(buf + 2); } diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h index ab7d0caa35c..854b9794c00 100644 --- a/innobase/include/db0err.h +++ b/innobase/include/db0err.h @@ -44,8 +44,10 @@ Created 5/24/1996 Heikki Tuuri #define DB_CORRUPTION 39 /* data structure corruption noticed */ #define DB_COL_APPEARS_TWICE_IN_INDEX 40 /* InnoDB cannot handle an index where same column appears twice */ -#define DB_CANNOT_DROP_CONSTRAINT 40 /* dropping a foreign key constraint +#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint from a table failed */ +#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given + name */ /* The following are partial failure codes */ #define DB_FAIL 1000 diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h index 97486a7c2f6..b5ec5381db2 100644 --- a/innobase/include/dict0dict.h +++ b/innobase/include/dict0dict.h @@ -569,6 +569,29 @@ dict_index_get_nth_col_pos( dict_index_t* index, /* in: index */ ulint n); /* in: column number */ /************************************************************************ +Returns TRUE if the index contains a column or a prefix of that column. */ + +ibool +dict_index_contains_col_or_prefix( +/*==============================*/ + /* out: TRUE if contains the column or its + prefix */ + dict_index_t* index, /* in: index */ + ulint n); /* in: column number */ +/************************************************************************ +Looks for a matching field in an index. The column and the prefix len has +to be the same. */ + +ulint +dict_index_get_nth_field_pos( +/*=========================*/ + /* out: position in internal representation + of the index; if not contained, returns + ULINT_UNDEFINED */ + dict_index_t* index, /* in: index from which to search */ + dict_index_t* index2, /* in: index */ + ulint n); /* in: field number in index2 */ +/************************************************************************ Looks for column n position in the clustered index. */ ulint diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic index 71ea67117a7..c5982c162a7 100644 --- a/innobase/include/dict0dict.ic +++ b/innobase/include/dict0dict.ic @@ -203,7 +203,6 @@ dict_index_get_n_fields( { ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); return(index->n_fields); } diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index 0798541cfe0..03dc913a7c9 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -111,10 +111,13 @@ by the column name may be released only after publishing the index. */ void dict_mem_index_add_field( /*=====================*/ - dict_index_t* index, /* in: index */ - char* name, /* in: column name */ - ulint order); /* in: order criterion; 0 means an ascending - order */ + dict_index_t* index, /* in: index */ + char* name, /* in: column name */ + ulint order, /* in: order criterion; 0 means an + ascending order */ + ulint prefix_len); /* in: 0 or the column prefix length + in a MySQL index like + INDEX (textcol(25)) */ /************************************************************************** Frees an index memory object. */ @@ -158,12 +161,18 @@ struct dict_col_struct{ in some of the functions below */ }; +#define DICT_MAX_COL_PREFIX_LEN 512 + /* Data structure for a field in an index */ struct dict_field_struct{ - dict_col_t* col; /* pointer to the table column */ - char* name; /* name of the column */ - ulint order; /* flags for ordering this field: - DICT_DESCEND, ... */ + dict_col_t* col; /* pointer to the table column */ + char* name; /* name of the column */ + ulint order; /* flags for ordering this field: + DICT_DESCEND, ... */ + ulint prefix_len; /* 0 or the length of the column + prefix in a MySQL index of type, e.g., + INDEX (textcol(25)); must be smaller + than DICT_MAX_COL_PREFIX_LEN */ }; /* Data structure for an index tree */ diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h index 23ef0304b2d..ad3149f0b36 100644 --- a/innobase/include/fil0fil.h +++ b/innobase/include/fil0fil.h @@ -43,7 +43,10 @@ struct fil_addr_struct{ extern fil_addr_t fil_addr_null; /* The byte offsets on a file page for various variables */ -#define FIL_PAGE_SPACE 0 /* space id the page belongs to */ +#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the + page belongs to (== 0) but in later + versions the 'new' checksum of the + page */ #define FIL_PAGE_OFFSET 4 /* page offset inside space */ #define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor of the page, its offset */ @@ -64,7 +67,7 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_DATA 38 /* start of the data on the page */ /* File page trailer */ -#define FIL_PAGE_END_LSN 8 /* the low 4 bytes of this are used +#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used to store the page checksum, the last 4 bytes should be identical to the last 4 bytes of FIL_PAGE_LSN */ @@ -383,6 +386,14 @@ fil_space_release_free_extents( /*===========================*/ ulint id, /* in: space id */ ulint n_reserved); /* in: how many one reserved */ +/*********************************************************************** +Gets the number of reserved extents. If the database is silent, this number +should be zero. */ + +ulint +fil_space_get_n_reserved_extents( +/*=============================*/ + ulint id); /* in: space id */ typedef struct fil_space_struct fil_space_t; diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h index d3b3d55d015..5608ba020b7 100644 --- a/innobase/include/lock0lock.h +++ b/innobase/include/lock0lock.h @@ -450,6 +450,18 @@ lock_rec_get_mutex_for_addr( ulint space, /* in: space id */ ulint page_no);/* in: page number */ /************************************************************************* +Checks that a transaction id is sensible, i.e., not in the future. */ + +ibool +lock_check_trx_id_sanity( +/*=====================*/ + /* out: TRUE if ok */ + dulint trx_id, /* in: trx id */ + rec_t* rec, /* in: user record */ + dict_index_t* index, /* in: clustered index */ + ibool has_kernel_mutex);/* in: TRUE if the caller owns the + kernel mutex */ +/************************************************************************* Validates the lock queue on a single record. */ ibool diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h index 4e1404b15fe..24ec28a56e6 100644 --- a/innobase/include/log0log.h +++ b/innobase/include/log0log.h @@ -173,6 +173,12 @@ log_write_up_to( /* in: TRUE if we want the written log also to be flushed to disk */ /******************************************************************** +Does a syncronous flush of the log buffer to disk. */ + +void +log_buffer_flush_to_disk(void); +/*==========================*/ +/******************************************************************** Advances the smallest lsn for which there are unflushed dirty blocks in the buffer pool and also may make a new checkpoint. NOTE: this function may only be called if the calling thread owns no synchronization objects! */ @@ -507,6 +513,15 @@ log_print( /*======*/ char* buf, /* in/out: buffer where to print */ char* buf_end);/* in: buffer end */ +/********************************************************** +Peeks the current lsn. */ + +ibool +log_peek_lsn( +/*=========*/ + /* out: TRUE if success, FALSE if could not get the + log system mutex */ + dulint* lsn); /* out: if returns TRUE, current lsn is here */ /************************************************************************** Refreshes the statistics used to print per-second averages. */ @@ -779,6 +794,11 @@ struct log_struct{ called */ /* Fields involved in checkpoints */ + ulint log_group_capacity; /* capacity of the log group; if + the checkpoint age exceeds this, it is + a serious error because it is possible + we will then overwrite log and spoil + crash recovery */ ulint max_modified_age_async; /* when this recommended value for lsn - buf_pool_get_oldest_modification() diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h index bef42cfec1c..7b27ee34541 100644 --- a/innobase/include/log0recv.h +++ b/innobase/include/log0recv.h @@ -333,6 +333,8 @@ extern ibool recv_recovery_on; extern ibool recv_no_ibuf_operations; extern ibool recv_needed_recovery; +extern ibool recv_lsn_checks_on; + extern ibool recv_is_making_a_backup; extern ulint recv_max_parsed_page_no; diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h index 86f27a2d3eb..5c52f0e92bf 100644 --- a/innobase/include/os0file.h +++ b/innobase/include/os0file.h @@ -146,6 +146,21 @@ os_file_create_simple( ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ ibool* success);/* out: TRUE if succeed, FALSE if error */ /******************************************************************** +A simple function to open or create a file. */ + +os_file_t +os_file_create_simple_no_error_handling( +/*====================================*/ + /* out, own: handle to the file, not defined if error, + error number can be retrieved with os_get_last_error */ + char* name, /* in: name of the file or path as a null-terminated + string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened + (if does not exist, error), or OS_FILE_CREATE if a new + file is created (if exists, error) */ + ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ + ibool* success);/* out: TRUE if succeed, FALSE if error */ +/******************************************************************** Opens an existing file or creates a new. */ os_file_t @@ -160,7 +175,11 @@ os_file_create( file is created (if exists, error), OS_FILE_OVERWRITE if a new file is created or an old overwritten */ ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o - is desired, OS_FILE_NORMAL, if any normal file */ + is desired, OS_FILE_NORMAL, if any normal file; + NOTE that it also depends on type, os_aio_.. and srv_.. + variables whether we really use async i/o or + unbuffered i/o: look in the function source code for + the exact rules */ ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */ ibool* success);/* out: TRUE if succeed, FALSE if error */ /*************************************************************************** @@ -173,6 +192,14 @@ os_file_close( /* out: TRUE if success */ os_file_t file); /* in, own: handle to a file */ /*************************************************************************** +Closes a file handle. */ + +ibool +os_file_close_no_error_handling( +/*============================*/ + /* out: TRUE if success */ + os_file_t file); /* in, own: handle to a file */ +/*************************************************************************** Gets a file size. */ ibool diff --git a/innobase/include/os0sync.h b/innobase/include/os0sync.h index 634507467f9..e1cf263216e 100644 --- a/innobase/include/os0sync.h +++ b/innobase/include/os0sync.h @@ -36,8 +36,12 @@ typedef os_event_struct_t* os_event_t; struct os_event_struct { os_fast_mutex_t os_mutex; /* this mutex protects the next fields */ - ibool is_set; /* this is TRUE if the next mutex is - not reserved */ + ibool is_set; /* this is TRUE when the event is + in the signaled state, i.e., a thread + does not stop if it tries to wait for + this event */ + ib_longlong signal_count; /* this is incremented each time + the event becomes signaled */ pthread_cond_t cond_var; /* condition variable is used in waiting for the event */ UT_LIST_NODE_T(os_event_struct_t) os_event_list; diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h index b5e33af5bc0..04f771c3abd 100644 --- a/innobase/include/page0page.h +++ b/innobase/include/page0page.h @@ -666,6 +666,15 @@ page_rec_validate( /* out: TRUE if ok */ rec_t* rec); /* in: record on the page */ /******************************************************************* +Checks that the first directory slot points to the infimum record and +the last to the supremum. This function is intended to track if the +bug fixed in 4.0.14 has caused corruption to users' databases. */ + +void +page_check_dir( +/*===========*/ + page_t* page); /* in: index page */ +/******************************************************************* This function checks the consistency of an index page when we do not know the index. This is also resilient so that this should never crash even if the page is total garbage. */ diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h index 6f2a99fc8c2..712e263350e 100644 --- a/innobase/include/rem0cmp.h +++ b/innobase/include/rem0cmp.h @@ -42,6 +42,22 @@ cmp_data_data( buffer) */ ulint len2); /* in: data field length or UNIV_SQL_NULL */ /***************************************************************** +This function is used to compare two data fields for which we know the +data type. */ + +int +cmp_data_data_slow( +/*===============*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + dtype_t* cur_type,/* in: data type of the fields */ + byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** This function is used to compare two dfields where at least the first has its data type field set. */ UNIV_INLINE diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic index e9d493da8b5..4ecd66e06ec 100644 --- a/innobase/include/row0mysql.ic +++ b/innobase/include/row0mysql.ic @@ -58,7 +58,8 @@ row_mysql_store_col_in_innobase_format( /*===================================*/ dfield_t* dfield, /* in/out: dfield */ byte* buf, /* in/out: buffer for the converted - value */ + value; this must be at least col_len + long! */ byte* mysql_data, /* in: MySQL column value, not SQL NULL; NOTE that dfield may also get a pointer to mysql_data, @@ -96,7 +97,6 @@ row_mysql_store_col_in_innobase_format( while (col_len > 0 && ptr[col_len - 1] == ' ') { col_len--; } - } else if (type == DATA_BLOB) { ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); } diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h index 09a79e19fd7..d1befbbbad3 100644 --- a/innobase/include/row0row.h +++ b/innobase/include/row0row.h @@ -86,9 +86,10 @@ dtuple_t* row_build( /*======*/ /* out, own: row built; see the NOTE below! */ - ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap as the latter only places pointers to + ulint type, /* in: ROW_COPY_POINTERS, ROW_COPY_DATA, or + ROW_COPY_ALSO_EXTERNALS, + the two last copy also the data fields to + heap as the first only places pointers to data fields on the index page, and thus is more efficient */ dict_index_t* index, /* in: clustered index */ diff --git a/innobase/include/row0sel.h b/innobase/include/row0sel.h index cfc30852b87..a35d588ad08 100644 --- a/innobase/include/row0sel.h +++ b/innobase/include/row0sel.h @@ -87,9 +87,11 @@ row_printf_step( /* out: query thread to run next or NULL */ que_thr_t* thr); /* in: query thread */ /******************************************************************** -Converts a key value stored in MySQL format to an Innobase dtuple. -The last field of the key value may be just a prefix of a fixed length -field: hence the parameter key_len. */ +Converts a key value stored in MySQL format to an Innobase dtuple. The last +field of the key value may be just a prefix of a fixed length field: hence +the parameter key_len. But currently we do not allow search keys where the +last field is only a prefix of the full key field len and print a warning if +such appears. */ void row_sel_convert_mysql_key_to_innobase( @@ -100,6 +102,7 @@ row_sel_convert_mysql_key_to_innobase( to index! */ byte* buf, /* in: buffer to use in field conversions */ + ulint buf_len, /* in: buffer length */ dict_index_t* index, /* in: index of the key value */ byte* key_ptr, /* in: MySQL key value */ ulint key_len); /* in: MySQL key value length */ diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h index 273ec6074eb..473c55c7ef9 100644 --- a/innobase/include/row0upd.h +++ b/innobase/include/row0upd.h @@ -114,13 +114,15 @@ row_upd_index_write_log( closed within this function */ mtr_t* mtr); /* in: mtr into whose log to write */ /*************************************************************** -Returns TRUE if row update changes size of some field in index. */ +Returns TRUE if row update changes size of some field in index or if some +field to be updated is stored externally in rec or update. */ ibool -row_upd_changes_field_size( -/*=======================*/ +row_upd_changes_field_size_or_external( +/*===================================*/ /* out: TRUE if the update changes the size of - some field in index */ + some field in index or the field is external + in rec or update */ rec_t* rec, /* in: record in clustered index */ dict_index_t* index, /* in: clustered index */ upd_t* update);/* in: update vector */ @@ -175,16 +177,10 @@ row_upd_index_replace_new_col_vals( dtuple_t* entry, /* in/out: index entry where replaced */ dict_index_t* index, /* in: index; NOTE that may also be a non-clustered index */ - upd_t* update); /* in: update vector */ -/*************************************************************** -Replaces the new column values stored in the update vector to the -clustered index entry given. */ - -void -row_upd_clust_index_replace_new_col_vals( -/*=====================================*/ - dtuple_t* entry, /* in/out: index entry where replaced */ - upd_t* update); /* in: update vector */ + upd_t* update, /* in: update vector */ + mem_heap_t* heap); /* in: memory heap to which we allocate and + copy the new values, set this as NULL if you + do not want allocation */ /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering @@ -358,9 +354,9 @@ struct upd_node_struct{ externally in the clustered index record of row */ ulint n_ext_vec;/* number of fields in ext_vec */ - mem_heap_t* heap; /* memory heap used as auxiliary storage for - row; this must be emptied after a successful - update if node->row != NULL */ + mem_heap_t* heap; /* memory heap used as auxiliary storage; + this must be emptied after a successful + update */ /*----------------------*/ sym_node_t* table_sym;/* table node in symbol table */ que_node_t* col_assign_list; diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index 8fd0fc2dd6d..87643e87a68 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -156,6 +156,7 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, /* Array of English strings describing the current state of an i/o handler thread */ extern char* srv_io_thread_op_info[]; +extern char* srv_io_thread_function[]; typedef struct srv_sys_struct srv_sys_t; @@ -170,6 +171,7 @@ what these mean */ #define SRV_UNIX_O_DSYNC 2 #define SRV_UNIX_LITTLESYNC 3 #define SRV_UNIX_NOSYNC 4 +#define SRV_UNIX_O_DIRECT 5 /* Alternatives for file i/o in Windows */ #define SRV_WIN_IO_NORMAL 1 diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h index 820af4cd014..0d7126c9c57 100644 --- a/innobase/include/trx0roll.h +++ b/innobase/include/trx0roll.h @@ -177,6 +177,55 @@ trx_general_rollback_for_mysql( ibool partial,/* in: TRUE if partial rollback requested */ trx_savept_t* savept);/* in: pointer to savepoint undo number, if partial rollback requested */ +/*********************************************************************** +Rolls back a transaction back to a named savepoint. Modifications after the +savepoint are undone but InnoDB does NOT release the corresponding locks +which are stored in memory. If a lock is 'implicit', that is, a new inserted +row holds a lock where the lock information is carried by the trx id stored in +the row, these locks are naturally released in the rollback. Savepoints which +were set after this savepoint are deleted. */ + +ulint +trx_rollback_to_savepoint_for_mysql( +/*================================*/ + /* out: if no savepoint + of the name found then + DB_NO_SAVEPOINT, + otherwise DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong* mysql_binlog_cache_pos);/* out: the MySQL binlog cache + position corresponding to this + savepoint; MySQL needs this + information to remove the + binlog entries of the queries + executed after the savepoint */ +/*********************************************************************** +Creates a named savepoint. If the transaction is not yet started, starts it. +If there is already a savepoint of the same name, this call erases that old +savepoint and replaces it with a new. Savepoints are deleted in a transaction +commit or rollback. */ + +ulint +trx_savepoint_for_mysql( +/*====================*/ + /* out: always DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong binlog_cache_pos); /* in: MySQL binlog cache + position corresponding to this + connection at the time of the + savepoint */ +/*********************************************************************** +Frees savepoint structs. */ + +void +trx_roll_savepoints_free( +/*=====================*/ + trx_t* trx, /* in: transaction handle */ + trx_named_savept_t* savep); /* in: free all savepoints > this one; + if this is NULL, free all savepoints + of trx */ extern sess_t* trx_dummy_sess; @@ -207,6 +256,21 @@ struct roll_node_struct{ case of a partial rollback */ }; +/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */ +struct trx_named_savept_struct{ + char* name; /* savepoint name */ + trx_savept_t savept; /* the undo number corresponding to + the savepoint */ + ib_longlong mysql_binlog_cache_pos; + /* the MySQL binlog cache position + corresponding to this savepoint, not + defined if the MySQL binlogging is not + enabled */ + UT_LIST_NODE_T(trx_named_savept_t) + trx_savepoints; /* the list of savepoints of a + transaction */ +}; + /* Rollback node states */ #define ROLL_NODE_SEND 1 #define ROLL_NODE_WAIT 2 diff --git a/innobase/include/trx0sys.ic b/innobase/include/trx0sys.ic index ada2d8cb19c..343e6d7c2fa 100644 --- a/innobase/include/trx0sys.ic +++ b/innobase/include/trx0sys.ic @@ -296,6 +296,16 @@ trx_is_active( return(FALSE); } + if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) { + + /* There must be corruption: we return TRUE because this + function is only called by lock_clust_rec_some_has_impl() + and row_vers_impl_x_locked_off_kernel() and they have + diagnostic prints in this case */ + + return(TRUE); + } + trx = trx_get_on_id(trx_id); if (trx && (trx->conc_state == TRX_ACTIVE)) { diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index 39229923375..6b08b674db8 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -381,7 +381,8 @@ struct trx_struct{ replication slave, we have here the master binlog name up to which replication has processed; otherwise - this is a pointer to a null character */ + this is a pointer to a null + character */ ib_longlong mysql_master_log_pos; /* if the database server is a MySQL replication slave, this is the @@ -501,6 +502,10 @@ struct trx_struct{ mem_heap_t* read_view_heap; /* memory heap for the read view */ read_view_t* read_view; /* consistent read view or NULL */ /*------------------------------*/ + UT_LIST_BASE_NODE_T(trx_named_savept_t) + trx_savepoints; /* savepoints set with SAVEPOINT ..., + oldest first */ + /*------------------------------*/ mutex_t undo_mutex; /* mutex protecting the fields in this section (down to undo_no_arr), EXCEPT last_sql_stat_start, which can be diff --git a/innobase/include/trx0types.h b/innobase/include/trx0types.h index b8befe7172f..2965eb4451f 100644 --- a/innobase/include/trx0types.h +++ b/innobase/include/trx0types.h @@ -24,6 +24,7 @@ typedef struct trx_undo_inf_struct trx_undo_inf_t; typedef struct trx_purge_struct trx_purge_t; typedef struct roll_node_struct roll_node_t; typedef struct commit_node_struct commit_node_t; +typedef struct trx_named_savept_struct trx_named_savept_t; /* Transaction savepoint */ typedef struct trx_savept_struct trx_savept_t; diff --git a/innobase/include/ut0dbg.h b/innobase/include/ut0dbg.h index e99dc8c09d6..802557099fc 100644 --- a/innobase/include/ut0dbg.h +++ b/innobase/include/ut0dbg.h @@ -50,6 +50,37 @@ extern ulint* ut_dbg_null_ptr; }\ } +/* This can be used if there are % characters in the assertion formula: +if we try to printf the formula gcc would complain of illegal print +format characters */ +#define ut_anp(EXPR)\ +{\ + ulint dbg_i;\ +\ + if (!((ulint)(EXPR) + ut_dbg_zero)) {\ + ut_print_timestamp(stderr);\ + fprintf(stderr,\ + " InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\ + os_thread_pf(os_thread_get_curr_id()), IB__FILE__,\ + (ulint)__LINE__);\ + fprintf(stderr,\ + "\nInnoDB: We intentionally generate a memory trap.\n");\ + fprintf(stderr,\ + "InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\ + ut_dbg_stop_threads = TRUE;\ + dbg_i = *(ut_dbg_null_ptr);\ + if (dbg_i) {\ + ut_dbg_null_ptr = NULL;\ + }\ + }\ + if (ut_dbg_stop_threads) {\ + fprintf(stderr,\ + "InnoDB: Thread %lu stopped in file %s line %lu\n",\ + os_thread_pf(os_thread_get_curr_id()), IB__FILE__, (ulint)__LINE__);\ + os_thread_sleep(1000000000);\ + }\ +} + #define ut_error {\ ulint dbg_i;\ ut_print_timestamp(stderr);\ diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h index d3d04d58596..ba6905a8618 100644 --- a/innobase/include/ut0mem.h +++ b/innobase/include/ut0mem.h @@ -67,7 +67,7 @@ ut_free( /*====*/ void* ptr); /* in, own: memory block */ /************************************************************************** -Frees all allocated memory not freed yet. */ +Frees in shutdown all allocated memory not freed yet. */ void ut_free_all_mem(void); |