diff options
author | Sergei Golubchik <sergii@pisem.net> | 2012-01-16 20:16:35 +0100 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2012-01-16 20:16:35 +0100 |
commit | 38e3ae155db08ab2e9e5c267f05f89bec0542b33 (patch) | |
tree | 7289bbef1ba3f495aa5c7cdb7d0a3f993a5bbc80 /storage | |
parent | c56483d972d023105fbcb0f47af0042ee092657c (diff) | |
parent | ed1ba992c1d3c3ecbe6a2769c51ceb5d27606d3b (diff) | |
download | mariadb-git-38e3ae155db08ab2e9e5c267f05f89bec0542b33.tar.gz |
mysql-5.5 merge
Diffstat (limited to 'storage')
42 files changed, 822 insertions, 674 deletions
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index 4008165865a..b5ec94ba503 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -830,6 +830,7 @@ uint32 ha_archive::max_row_length(const uchar *buf) ptr != end ; ptr++) { + if (!table->field[*ptr]->is_null()) length += 2 + ((Field_blob*)table->field[*ptr])->get_length(); } @@ -1186,6 +1187,17 @@ int ha_archive::unpack_row(azio_stream *file_to_read, uchar *record) /* Copy null bits */ const uchar *ptr= record_buffer->buffer; + /* + Field::unpack() is not called when field is NULL. For VARCHAR + Field::unpack() only unpacks as much bytes as occupied by field + value. In these cases respective memory area on record buffer is + not initialized. + + These uninitialized areas may be accessed by CHECKSUM TABLE or + by optimizer using temporary table (BUG#12997905). We may remove + this memset() when they're fixed. + */ + memset(record, 0, table->s->reclength); memcpy(record, ptr, table->s->null_bytes); ptr+= table->s->null_bytes; for (Field **field=table->field ; *field ; field++) @@ -1691,13 +1703,15 @@ int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt) { int rc= 0; const char *old_proc_info; - ha_rows count= share->rows_recorded; + ha_rows count; DBUG_ENTER("ha_archive::check"); old_proc_info= thd_proc_info(thd, "Checking table"); - /* Flush any waiting data */ mysql_mutex_lock(&share->mutex); - azflush(&(share->archive_write), Z_SYNC_FLUSH); + count= share->rows_recorded; + /* Flush any waiting data */ + if (share->archive_write_open) + azflush(&(share->archive_write), Z_SYNC_FLUSH); mysql_mutex_unlock(&share->mutex); if (init_archive_reader()) @@ -1707,18 +1721,34 @@ int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt) start of the file. */ read_data_header(&archive); + for (ha_rows cur_count= count; cur_count; cur_count--) + { + if ((rc= get_row(&archive, table->record[0]))) + goto error; + } + /* + Now read records that may have been inserted concurrently. + Acquire share->mutex so tail of the table is not modified by + concurrent writers. + */ + mysql_mutex_lock(&share->mutex); + count= share->rows_recorded - count; + if (share->archive_write_open) + azflush(&(share->archive_write), Z_SYNC_FLUSH); while (!(rc= get_row(&archive, table->record[0]))) count--; - - thd_proc_info(thd, old_proc_info); + mysql_mutex_unlock(&share->mutex); if ((rc && rc != HA_ERR_END_OF_FILE) || count) - { - share->crashed= FALSE; - DBUG_RETURN(HA_ADMIN_CORRUPT); - } + goto error; + thd_proc_info(thd, old_proc_info); DBUG_RETURN(HA_ADMIN_OK); + +error: + thd_proc_info(thd, old_proc_info); + share->crashed= FALSE; + DBUG_RETURN(HA_ADMIN_CORRUPT); } /* diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index 1091d88ffc8..4294f4b094f 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -1686,6 +1686,16 @@ int ha_federated::close(void) mysql_close(mysql); mysql= NULL; + /* + mysql_close() might return an error if a remote server's gone + for some reason. If that happens while removing a table from + the table cache, the error will be propagated to a client even + if the original query was not issued against the FEDERATED table. + So, don't propagate errors from mysql_close(). + */ + if (table->in_use) + table->in_use->clear_error(); + DBUG_RETURN(free_share(share)); } diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 625721133fd..03346337d3f 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -458,8 +458,6 @@ btr_cur_search_to_nth_level( cursor->flag = BTR_CUR_BINARY; cursor->index = index; - cursor->ibuf_cnt = ULINT_UNDEFINED; - #ifndef BTR_CUR_ADAPT guess = NULL; #else @@ -747,21 +745,8 @@ retry_page_get: /* We're doing a search on an ibuf tree and we're one level above the leaf page. */ - ulint is_min_rec; - ut_ad(level == 0); - is_min_rec = rec_get_info_bits(node_ptr, 0) - & REC_INFO_MIN_REC_FLAG; - - if (!is_min_rec) { - cursor->ibuf_cnt - = ibuf_rec_get_counter(node_ptr); - - ut_a(cursor->ibuf_cnt <= 0xFFFF - || cursor->ibuf_cnt == ULINT_UNDEFINED); - } - buf_mode = BUF_GET; rw_latch = RW_NO_LATCH; goto retry_page_get; diff --git a/storage/innobase/btr/btr0pcur.c b/storage/innobase/btr/btr0pcur.c index 57d9752649f..fb153556b2f 100644 --- a/storage/innobase/btr/btr0pcur.c +++ b/storage/innobase/btr/btr0pcur.c @@ -52,12 +52,13 @@ btr_pcur_create_for_mysql(void) } /**************************************************************//** -Frees the memory for a persistent cursor object. */ +Resets a persistent cursor object, freeing ::old_rec_buf if it is +allocated and resetting the other members to their initial values. */ UNIV_INTERN void -btr_pcur_free_for_mysql( -/*====================*/ - btr_pcur_t* cursor) /*!< in, own: persistent cursor */ +btr_pcur_reset( +/*===========*/ + btr_pcur_t* cursor) /*!< in, out: persistent cursor */ { if (cursor->old_rec_buf != NULL) { @@ -66,6 +67,7 @@ btr_pcur_free_for_mysql( cursor->old_rec_buf = NULL; } + cursor->btr_cur.index = NULL; cursor->btr_cur.page_cur.rec = NULL; cursor->old_rec = NULL; cursor->old_n_fields = 0; @@ -73,7 +75,17 @@ btr_pcur_free_for_mysql( cursor->latch_mode = BTR_NO_LATCHES; cursor->pos_state = BTR_PCUR_NOT_POSITIONED; +} +/**************************************************************//** +Frees the memory for a persistent cursor object. */ +UNIV_INTERN +void +btr_pcur_free_for_mysql( +/*====================*/ + btr_pcur_t* cursor) /*!< in, own: persistent cursor */ +{ + btr_pcur_reset(cursor); mem_free(cursor); } diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c index b5004a0834c..890d0282286 100644 --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -3885,6 +3885,9 @@ buf_pool_validate_instance( ut_a(rw_lock_is_locked(&block->lock, RW_LOCK_EX)); break; + + case BUF_IO_PIN: + break; } n_lru++; @@ -3914,6 +3917,7 @@ buf_pool_validate_instance( ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE); switch (buf_page_get_io_fix(b)) { case BUF_IO_NONE: + case BUF_IO_PIN: /* All clean blocks should be I/O-unfixed. */ break; case BUF_IO_READ: @@ -3953,6 +3957,7 @@ buf_pool_validate_instance( switch (buf_page_get_io_fix(b)) { case BUF_IO_NONE: case BUF_IO_READ: + case BUF_IO_PIN: break; case BUF_IO_WRITE: switch (buf_page_get_flush_type(b)) { diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c index 510f6eefba5..15b0ad40aaa 100644 --- a/storage/innobase/buf/buf0lru.c +++ b/storage/innobase/buf/buf0lru.c @@ -68,8 +68,12 @@ allowed to point to either end of the LRU list. */ /** When dropping the search hash index entries before deleting an ibd file, we build a local array of pages belonging to that tablespace -in the buffer pool. Following is the size of that array. */ -#define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024 +in the buffer pool. Following is the size of that array. +We also release buf_pool->mutex after scanning this many pages of the +flush_list when dropping a table. This is to ensure that other threads +are not blocked for extended period of time when using very large +buffer pools. */ +#define BUF_LRU_DROP_SEARCH_SIZE 1024 /** If we switch on the InnoDB monitor because there are too few available frames in the buffer pool, we set this to TRUE */ @@ -210,7 +214,7 @@ buf_LRU_drop_page_hash_batch( ulint i; ut_ad(arr != NULL); - ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE); + ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE); for (i = 0; i < count; ++i) { btr_search_drop_page_hash_when_freed(space_id, zip_size, @@ -244,7 +248,7 @@ buf_LRU_drop_page_hash_for_tablespace( } page_arr = ut_malloc( - sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE); + sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE); buf_pool_mutex_enter(buf_pool); num_entries = 0; @@ -283,10 +287,10 @@ next_page: /* Store the page number so that we can drop the hash index in a batch later. */ page_arr[num_entries] = bpage->offset; - ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); + ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE); ++num_entries; - if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { + if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) { goto next_page; } @@ -331,37 +335,40 @@ next_page: } /******************************************************************//** -Invalidates all pages belonging to a given tablespace inside a specific +Remove all dirty pages belonging to a given tablespace inside a specific buffer pool instance when we are deleting the data file(s) of that -tablespace. */ +tablespace. The pages still remain a part of LRU and are evicted from +the list as they age towards the tail of the LRU. */ static void -buf_LRU_invalidate_tablespace_buf_pool_instance( -/*============================================*/ +buf_LRU_remove_dirty_pages_for_tablespace( +/*======================================*/ buf_pool_t* buf_pool, /*!< buffer pool instance */ ulint id) /*!< in: space id */ { buf_page_t* bpage; ibool all_freed; + ulint i; scan_again: buf_pool_mutex_enter(buf_pool); + buf_flush_list_mutex_enter(buf_pool); all_freed = TRUE; - bpage = UT_LIST_GET_LAST(buf_pool->LRU); + for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list), i = 0; + bpage != NULL; ++i) { - while (bpage != NULL) { buf_page_t* prev_bpage; mutex_t* block_mutex = NULL; ut_a(buf_page_in_file(bpage)); - prev_bpage = UT_LIST_GET_PREV(LRU, bpage); + prev_bpage = UT_LIST_GET_PREV(list, bpage); /* bpage->space and bpage->io_fix are protected by - buf_pool_mutex and block_mutex. It is safe to check - them while holding buf_pool_mutex only. */ + buf_pool->mutex and block_mutex. It is safe to check + them while holding buf_pool->mutex only. */ if (buf_page_get_space(bpage) != id) { /* Skip this block, as it does not belong to @@ -374,79 +381,83 @@ scan_again: all_freed = FALSE; goto next_page; - } else { - block_mutex = buf_page_get_mutex(bpage); - mutex_enter(block_mutex); + } - if (bpage->buf_fix_count > 0) { + /* We have to release the flush_list_mutex to obey the + latching order. We are however guaranteed that the page + will stay in the flush_list because buf_flush_remove() + needs buf_pool->mutex as well. */ + buf_flush_list_mutex_exit(buf_pool); + block_mutex = buf_page_get_mutex(bpage); + mutex_enter(block_mutex); - mutex_exit(block_mutex); - /* We cannot remove this page during - this scan yet; maybe the system is - currently reading it in, or flushing - the modifications to the file */ + if (bpage->buf_fix_count > 0) { + mutex_exit(block_mutex); + buf_flush_list_mutex_enter(buf_pool); - all_freed = FALSE; + /* We cannot remove this page during + this scan yet; maybe the system is + currently reading it in, or flushing + the modifications to the file */ - goto next_page; - } + all_freed = FALSE; + goto next_page; } - ut_ad(mutex_own(block_mutex)); + ut_ad(bpage->oldest_modification != 0); -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Dropping space %lu page %lu\n", - (ulong) buf_page_get_space(bpage), - (ulong) buf_page_get_page_no(bpage)); - } -#endif - if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { - /* This is a compressed-only block - descriptor. Do nothing. */ - } else if (((buf_block_t*) bpage)->index) { - ulint page_no; - ulint zip_size; + buf_flush_remove(bpage); - buf_pool_mutex_exit(buf_pool); - - zip_size = buf_page_get_zip_size(bpage); - page_no = buf_page_get_page_no(bpage); + mutex_exit(block_mutex); + buf_flush_list_mutex_enter(buf_pool); +next_page: + bpage = prev_bpage; - mutex_exit(block_mutex); + if (!bpage) { + break; + } - /* Note that the following call will acquire - and release an X-latch on the page. */ + /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the + loop we release buf_pool->mutex to let other threads + do their job. */ + if (i < BUF_LRU_DROP_SEARCH_SIZE) { + continue; + } - btr_search_drop_page_hash_when_freed( - id, zip_size, page_no); - goto scan_again; + /* We IO-fix the block to make sure that the block + stays in its position in the flush_list. */ + if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) { + /* Block is already IO-fixed. We don't + want to change the value. Lets leave + this block alone. */ + continue; } - if (bpage->oldest_modification != 0) { + buf_flush_list_mutex_exit(buf_pool); + block_mutex = buf_page_get_mutex(bpage); + mutex_enter(block_mutex); + buf_page_set_sticky(bpage); + mutex_exit(block_mutex); - buf_flush_remove(bpage); - } + /* Now it is safe to release the buf_pool->mutex. */ + buf_pool_mutex_exit(buf_pool); + os_thread_yield(); + buf_pool_mutex_enter(buf_pool); - /* Remove from the LRU list. */ + mutex_enter(block_mutex); + buf_page_unset_sticky(bpage); + mutex_exit(block_mutex); - if (buf_LRU_block_remove_hashed_page(bpage, TRUE) - != BUF_BLOCK_ZIP_FREE) { - buf_LRU_block_free_hashed_page((buf_block_t*) bpage); - mutex_exit(block_mutex); - } else { - /* The block_mutex should have been released - by buf_LRU_block_remove_hashed_page() when it - returns BUF_BLOCK_ZIP_FREE. */ - ut_ad(block_mutex == &buf_pool->zip_mutex); - ut_ad(!mutex_own(block_mutex)); - } -next_page: - bpage = prev_bpage; + buf_flush_list_mutex_enter(buf_pool); + ut_ad(bpage->in_flush_list); + + i = 0; } buf_pool_mutex_exit(buf_pool); + buf_flush_list_mutex_exit(buf_pool); + + ut_ad(buf_flush_validate(buf_pool)); if (!all_freed) { os_thread_sleep(20000); @@ -477,7 +488,7 @@ buf_LRU_invalidate_tablespace( buf_pool = buf_pool_from_array(i); buf_LRU_drop_page_hash_for_tablespace(buf_pool, id); - buf_LRU_invalidate_tablespace_buf_pool_instance(buf_pool, id); + buf_LRU_remove_dirty_pages_for_tablespace(buf_pool, id); } } @@ -1532,8 +1543,9 @@ alloc: /* Prevent buf_page_get_gen() from decompressing the block while we release buf_pool->mutex and block_mutex. */ - b->buf_fix_count++; - b->io_fix = BUF_IO_READ; + mutex_enter(&buf_pool->zip_mutex); + buf_page_set_sticky(b); + mutex_exit(&buf_pool->zip_mutex); } buf_pool_mutex_exit(buf_pool); @@ -1573,8 +1585,7 @@ alloc: if (b) { mutex_enter(&buf_pool->zip_mutex); - b->buf_fix_count--; - buf_page_set_io_fix(b, BUF_IO_NONE); + buf_page_unset_sticky(b); mutex_exit(&buf_pool->zip_mutex); } diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c index 2a2c7652817..9dc3cef229e 100644 --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -911,6 +911,11 @@ dict_index_find_on_id_low( dict_table_t* table; dict_index_t* index; + /* This can happen if the system tablespace is the wrong page size */ + if (dict_sys == NULL) { + return(NULL); + } + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); while (table) { @@ -3069,10 +3074,15 @@ dict_scan_table_name( memcpy(ref, database_name, database_name_len); ref[database_name_len] = '/'; memcpy(ref + database_name_len + 1, table_name, table_name_len + 1); + } else { +#ifndef __WIN__ if (innobase_get_lower_case_table_names() == 1) { innobase_casedn_str(ref); } +#else + innobase_casedn_str(ref); +#endif /* !__WIN__ */ *table = dict_table_get_low(ref); } diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c index 0a5cfb87140..44a0ec5b338 100644 --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -2066,8 +2066,9 @@ static void dict_load_foreign_cols( /*===================*/ - const char* id, /*!< in: foreign constraint id as a - null-terminated string */ + const char* id, /*!< in: foreign constraint id, not + necessary '\0'-terminated */ + ulint id_len, /*!< in: id length */ dict_foreign_t* foreign)/*!< in: foreign constraint object */ { dict_table_t* sys_foreign_cols; @@ -2097,7 +2098,7 @@ dict_load_foreign_cols( tuple = dtuple_create(foreign->heap, 1); dfield = dtuple_get_nth_field(tuple, 0); - dfield_set_data(dfield, id, ut_strlen(id)); + dfield_set_data(dfield, id, id_len); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, @@ -2110,7 +2111,7 @@ dict_load_foreign_cols( ut_a(!rec_get_deleted_flag(rec, 0)); field = rec_get_nth_field_old(rec, 0, &len); - ut_a(len == ut_strlen(id)); + ut_a(len == id_len); ut_a(ut_memcmp(id, field, len) == 0); field = rec_get_nth_field_old(rec, 1, &len); @@ -2139,8 +2140,9 @@ static ulint dict_load_foreign( /*==============*/ - const char* id, /*!< in: foreign constraint id as a - null-terminated string */ + const char* id, /*!< in: foreign constraint id, not + necessary '\0'-terminated */ + ulint id_len, /*!< in: id length */ ibool check_charsets, /*!< in: TRUE=check charset compatibility */ ibool check_recursive) @@ -2176,7 +2178,7 @@ dict_load_foreign( tuple = dtuple_create(heap2, 1); dfield = dtuple_get_nth_field(tuple, 0); - dfield_set_data(dfield, id, ut_strlen(id)); + dfield_set_data(dfield, id, id_len); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, @@ -2188,8 +2190,8 @@ dict_load_foreign( /* Not found */ fprintf(stderr, - "InnoDB: Error A: cannot load foreign constraint %s\n", - id); + "InnoDB: Error A: cannot load foreign constraint " + "%.*s\n", (int) id_len, id); btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -2201,11 +2203,11 @@ dict_load_foreign( field = rec_get_nth_field_old(rec, 0, &len); /* Check if the id in record is the searched one */ - if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) { + if (len != id_len || ut_memcmp(id, field, len) != 0) { fprintf(stderr, - "InnoDB: Error B: cannot load foreign constraint %s\n", - id); + "InnoDB: Error B: cannot load foreign constraint " + "%.*s\n", (int) id_len, id); btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -2231,7 +2233,7 @@ dict_load_foreign( foreign->type = (unsigned int) (n_fields_and_type >> 24); foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL); - foreign->id = mem_heap_strdup(foreign->heap, id); + foreign->id = mem_heap_strdupl(foreign->heap, id, id_len); field = rec_get_nth_field_old(rec, 3, &len); @@ -2247,7 +2249,7 @@ dict_load_foreign( btr_pcur_close(&pcur); mtr_commit(&mtr); - dict_load_foreign_cols(id, foreign); + dict_load_foreign_cols(id, id_len, foreign); ref_table = dict_table_check_if_in_cache_low( foreign->referenced_table_name_lookup); @@ -2326,8 +2328,8 @@ dict_load_foreigns( ibool check_charsets) /*!< in: TRUE=check charset compatibility */ { + char tuple_buf[DTUPLE_EST_ALLOC(1)]; btr_pcur_t pcur; - mem_heap_t* heap; dtuple_t* tuple; dfield_t* dfield; dict_index_t* sec_index; @@ -2335,7 +2337,6 @@ dict_load_foreigns( const rec_t* rec; const byte* field; ulint len; - char* id ; ulint err; mtr_t mtr; @@ -2362,9 +2363,8 @@ dict_load_foreigns( sec_index = dict_table_get_next_index( dict_table_get_first_index(sys_foreign)); start_load: - heap = mem_heap_create(256); - tuple = dtuple_create(heap, 1); + tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1); dfield = dtuple_get_nth_field(tuple, 0); dfield_set_data(dfield, table_name, ut_strlen(table_name)); @@ -2418,7 +2418,6 @@ loop: /* Now we get a foreign key constraint id */ field = rec_get_nth_field_old(rec, 1, &len); - id = mem_heap_strdupl(heap, (char*) field, len); btr_pcur_store_position(&pcur, &mtr); @@ -2426,11 +2425,11 @@ loop: /* Load the foreign constraint definition to the dictionary cache */ - err = dict_load_foreign(id, check_charsets, check_recursive); + err = dict_load_foreign((char*) field, len, check_charsets, + check_recursive); if (err != DB_SUCCESS) { btr_pcur_close(&pcur); - mem_heap_free(heap); return(err); } @@ -2446,7 +2445,6 @@ next_rec: load_next_index: btr_pcur_close(&pcur); mtr_commit(&mtr); - mem_heap_free(heap); sec_index = dict_table_get_next_index(sec_index); diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c index 196f4bd3f42..2e4c6aeeb60 100644 --- a/storage/innobase/fil/fil0fil.c +++ b/storage/innobase/fil/fil0fil.c @@ -1803,36 +1803,44 @@ fil_write_flushed_lsn_to_data_files( } /*******************************************************************//** -Reads the flushed lsn and arch no fields from a data file at database -startup. */ +Reads the flushed lsn, arch no, and tablespace flag fields from a data +file at database startup. */ UNIV_INTERN void -fil_read_flushed_lsn_and_arch_log_no( -/*=================================*/ +fil_read_first_page( +/*================*/ os_file_t data_file, /*!< in: open data file */ ibool one_read_already, /*!< in: TRUE if min and max parameters below already contain sensible data */ + ulint* flags, /*!< out: tablespace flags */ #ifdef UNIV_LOG_ARCHIVE - ulint* min_arch_log_no, /*!< in/out: */ - ulint* max_arch_log_no, /*!< in/out: */ + ulint* min_arch_log_no, /*!< out: min of archived + log numbers in data files */ + ulint* max_arch_log_no, /*!< out: max of archived + log numbers in data files */ #endif /* UNIV_LOG_ARCHIVE */ - ib_uint64_t* min_flushed_lsn, /*!< in/out: */ - ib_uint64_t* max_flushed_lsn) /*!< in/out: */ + ib_uint64_t* min_flushed_lsn, /*!< out: min of flushed + lsn values in data files */ + ib_uint64_t* max_flushed_lsn) /*!< out: max of flushed + lsn values in data files */ { byte* buf; - byte* buf2; + page_t* page; ib_uint64_t flushed_lsn; - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); + buf = ut_malloc(2 * UNIV_PAGE_SIZE); /* Align the memory for a possible read from a raw device */ - buf = ut_align(buf2, UNIV_PAGE_SIZE); + page = ut_align(buf, UNIV_PAGE_SIZE); - os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE); + os_file_read(data_file, page, 0, 0, UNIV_PAGE_SIZE); - flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN); + *flags = mach_read_from_4(page + + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS); - ut_free(buf2); + flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + + ut_free(buf); if (!one_read_already) { *min_flushed_lsn = flushed_lsn; diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c index f31e6c03ba1..2d626405c5c 100644 --- a/storage/innobase/fsp/fsp0fsp.c +++ b/storage/innobase/fsp/fsp0fsp.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -50,67 +50,6 @@ Created 11/29/1995 Heikki Tuuri #include "dict0mem.h" -#define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header - within a file page */ - -/* The data structures in files are defined just as byte strings in C */ -typedef byte fsp_header_t; -typedef byte xdes_t; - -/* SPACE HEADER - ============ - -File space header data structure: this data structure is contained in the -first page of a space. The space for this header is reserved in every extent -descriptor page, but used only in the first. */ - -/*-------------------------------------*/ -#define FSP_SPACE_ID 0 /* space id */ -#define FSP_NOT_USED 4 /* this field contained a value up to - which we know that the modifications - in the database have been flushed to - the file space; not used now */ -#define FSP_SIZE 8 /* Current size of the space in - pages */ -#define FSP_FREE_LIMIT 12 /* Minimum page number for which the - free list has not been initialized: - the pages >= this limit are, by - definition, free; note that in a - single-table tablespace where size - < 64 pages, this number is 64, i.e., - we have initialized the space - about the first extent, but have not - physically allocted those pages to the - file */ -#define FSP_SPACE_FLAGS 16 /* table->flags & ~DICT_TF_COMPACT */ -#define FSP_FRAG_N_USED 20 /* number of used pages in the - FSP_FREE_FRAG list */ -#define FSP_FREE 24 /* list of free extents */ -#define FSP_FREE_FRAG (24 + FLST_BASE_NODE_SIZE) - /* list of partially free extents not - belonging to any segment */ -#define FSP_FULL_FRAG (24 + 2 * FLST_BASE_NODE_SIZE) - /* list of full extents not belonging - to any segment */ -#define FSP_SEG_ID (24 + 3 * FLST_BASE_NODE_SIZE) - /* 8 bytes which give the first unused - segment id */ -#define FSP_SEG_INODES_FULL (32 + 3 * FLST_BASE_NODE_SIZE) - /* list of pages containing segment - headers, where all the segment inode - slots are reserved */ -#define FSP_SEG_INODES_FREE (32 + 4 * FLST_BASE_NODE_SIZE) - /* list of pages containing segment - headers, where not all the segment - header slots are reserved */ -/*-------------------------------------*/ -/* File space header size */ -#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE) - -#define FSP_FREE_ADD 4 /* this many free extents are added - to the free list from above - FSP_FREE_LIMIT at a time */ - /* FILE SEGMENT INODE ================== @@ -332,7 +271,7 @@ fseg_alloc_free_page_low( inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr); /*!< in/out: mini-transaction */ + mtr_t* mtr); /*!< in: mtr handle */ #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** @@ -1547,7 +1486,7 @@ fsp_alloc_free_page( ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ ulint hint, /*!< in: hint of which page would be desirable */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + mtr_t* mtr) /*!< in: mtr handle */ { fsp_header_t* header; fil_addr_t first; @@ -2576,7 +2515,7 @@ fseg_alloc_free_page_low( inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + mtr_t* mtr) /*!< in: mtr handle */ { fsp_header_t* space_header; ulint space_size; @@ -2824,7 +2763,7 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + mtr_t* mtr) /*!< in: mtr handle */ { fseg_inode_t* inode; ulint space; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9a8bd67e8a4..27ad19456cc 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3790,22 +3790,9 @@ ha_innobase::open( DBUG_RETURN(1); } - /* Create buffers for packing the fields of a record. Why - table->reclength did not work here? Obviously, because char - fields when packed actually became 1 byte longer, when we also - stored the string length as the first byte. */ - - upd_and_key_val_buff_len = - table->s->reclength + table->s->max_key_length - + MAX_REF_PARTS * 3; - if (!(uchar*) my_multi_malloc(MYF(MY_WME), - &upd_buff, upd_and_key_val_buff_len, - &key_val_buff, upd_and_key_val_buff_len, - NullS)) { - free_share(share); - - DBUG_RETURN(1); - } + /* Will be allocated if it is needed in ::update_row() */ + upd_buf = NULL; + upd_buf_size = 0; /* We look for pattern #P# to see if the table is partitioned MySQL table. The retry logic for partitioned tables is a @@ -3846,7 +3833,6 @@ retry: "how you can resolve the problem.\n", norm_name); free_share(share); - my_free(upd_buff); my_errno = ENOENT; DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); @@ -3862,16 +3848,14 @@ retry: "how you can resolve the problem.\n", norm_name); free_share(share); - my_free(upd_buff); my_errno = ENOENT; dict_table_decrement_handle_count(ib_table, FALSE); DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); } - prebuilt = row_create_prebuilt(ib_table); + prebuilt = row_create_prebuilt(ib_table, table->s->reclength); - prebuilt->mysql_row_len = table->s->reclength; prebuilt->default_rec = table->s->default_values; ut_ad(prebuilt->default_rec); @@ -4060,7 +4044,13 @@ ha_innobase::close(void) row_prebuilt_free(prebuilt, FALSE); - my_free(upd_buff); + if (upd_buf != NULL) { + ut_ad(upd_buf_size != 0); + my_free(upd_buf); + upd_buf = NULL; + upd_buf_size = 0; + } + free_share(share); /* Tell InnoDB server that there might be work for @@ -5327,6 +5317,23 @@ ha_innobase::update_row( ut_a(prebuilt->trx == trx); + if (upd_buf == NULL) { + ut_ad(upd_buf_size == 0); + + /* Create a buffer for packing the fields of a record. Why + table->reclength did not work here? Obviously, because char + fields when packed actually became 1 byte longer, when we also + stored the string length as the first byte. */ + + upd_buf_size = table->s->reclength + table->s->max_key_length + + MAX_REF_PARTS * 3; + upd_buf = (uchar*) my_malloc(upd_buf_size, MYF(MY_WME)); + if (upd_buf == NULL) { + upd_buf_size = 0; + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + } + ha_statistic_increment(&SSV::ha_update_count); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) @@ -5339,11 +5346,10 @@ ha_innobase::update_row( } /* Build an update vector from the modified fields in the rows - (uses upd_buff of the handle) */ + (uses upd_buf of the handle) */ calc_row_difference(uvect, (uchar*) old_row, new_row, table, - upd_buff, (ulint)upd_and_key_val_buff_len, - prebuilt, user_thd); + upd_buf, upd_buf_size, prebuilt, user_thd); /* This is not a delete */ prebuilt->upd_node->is_delete = FALSE; @@ -5720,8 +5726,7 @@ ha_innobase::index_read( row_sel_convert_mysql_key_to_innobase( prebuilt->search_tuple, - (byte*) key_val_buff, - (ulint)upd_and_key_val_buff_len, + srch_key_val1, sizeof(srch_key_val1), index, (byte*) key_ptr, (ulint) key_len, @@ -5819,7 +5824,6 @@ ha_innobase::innobase_get_index( dict_index_t* index = 0; DBUG_ENTER("innobase_get_index"); - ha_statistic_increment(&SSV::ha_read_key_count); if (keynr != MAX_KEY && table->s->keys > 0) { key = table->key_info + keynr; @@ -5833,13 +5837,13 @@ ha_innobase::innobase_get_index( table. Only print message if the index translation table exists */ if (share->idx_trans_tbl.index_mapping) { - sql_print_error("InnoDB could not find " - "index %s key no %u for " - "table %s through its " - "index translation table", - key ? key->name : "NULL", - keynr, - prebuilt->table->name); + sql_print_warning("InnoDB could not find " + "index %s key no %u for " + "table %s through its " + "index translation table", + key ? key->name : "NULL", + keynr, + prebuilt->table->name); } index = dict_table_get_index_on_name(prebuilt->table, @@ -7541,12 +7545,6 @@ ha_innobase::records_in_range( { KEY* key; dict_index_t* index; - uchar* key_val_buff2 = (uchar*) my_malloc( - table->s->reclength - + table->s->max_key_length + 100, - MYF(MY_FAE)); - ulint buff2_len = table->s->reclength - + table->s->max_key_length + 100; dtuple_t* range_start; dtuple_t* range_end; ib_int64_t n_rows; @@ -7598,8 +7596,8 @@ ha_innobase::records_in_range( dict_index_copy_types(range_end, index, key->key_parts); row_sel_convert_mysql_key_to_innobase( - range_start, (byte*) key_val_buff, - (ulint)upd_and_key_val_buff_len, + range_start, + srch_key_val1, sizeof(srch_key_val1), index, (byte*) (min_key ? min_key->key : (const uchar*) 0), @@ -7610,8 +7608,9 @@ ha_innobase::records_in_range( : range_start->n_fields == 0); row_sel_convert_mysql_key_to_innobase( - range_end, (byte*) key_val_buff2, - buff2_len, index, + range_end, + srch_key_val2, sizeof(srch_key_val2), + index, (byte*) (max_key ? max_key->key : (const uchar*) 0), (ulint) (max_key ? max_key->length : 0), @@ -7638,7 +7637,6 @@ ha_innobase::records_in_range( mem_heap_free(heap); func_exit: - my_free(key_val_buff2); prebuilt->trx->op_info = (char*)""; @@ -8322,7 +8320,10 @@ ha_innobase::check( putc('\n', stderr); #endif - if (!btr_validate_index(index, prebuilt->trx)) { + /* If this is an index being created, break */ + if (*index->name == TEMP_INDEX_PREFIX) { + break; + } else if (!btr_validate_index(index, prebuilt->trx)) { is_ok = FALSE; innobase_format_name( diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index e6c9e955827..7cce0c4a16c 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -78,13 +78,14 @@ class ha_innobase: public handler INNOBASE_SHARE* share; /*!< information for MySQL table locking */ - uchar* upd_buff; /*!< buffer used in updates */ - uchar* key_val_buff; /*!< buffer used in converting + uchar* upd_buf; /*!< buffer used in updates */ + ulint upd_buf_size; /*!< the size of upd_buf in bytes */ + uchar srch_key_val1[REC_VERSION_56_MAX_INDEX_COL_LEN + 2]; + uchar srch_key_val2[REC_VERSION_56_MAX_INDEX_COL_LEN + 2]; + /*!< buffers used in converting search key values from MySQL format - to Innodb format */ - ulong upd_and_key_val_buff_len; - /* the length of each of the previous - two buffers */ + to InnoDB format. "+ 2" for the two + bytes where the length is stored */ Table_flags int_table_flags; uint primary_key; ulong start_of_scan; /*!< this is set to 1 when we are diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 6d5b7b4668f..c6754660b84 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -1008,7 +1008,12 @@ ha_innobase::final_add_index( row_prebuilt_free(prebuilt, TRUE); error = row_merge_drop_table(trx, old_table); add->indexed_table->n_mysql_handles_opened++; - prebuilt = row_create_prebuilt(add->indexed_table); + prebuilt = row_create_prebuilt(add->indexed_table, + 0 /* XXX Do we know the mysql_row_len here? + Before the addition of this parameter to + row_create_prebuilt() the mysql_row_len + member was left 0 (from zalloc) in the + prebuilt object. */); } err = convert_error_code_to_mysql( diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index 0676a7be0f7..47ec1365cb8 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -254,11 +254,20 @@ ibuf_count_check( list of the ibuf */ /* @} */ +#define IBUF_REC_FIELD_SPACE 0 /*!< in the pre-4.1 format, + the page number. later, the space_id */ +#define IBUF_REC_FIELD_MARKER 1 /*!< starting with 4.1, a marker + consisting of 1 byte that is 0 */ +#define IBUF_REC_FIELD_PAGE 2 /*!< starting with 4.1, the + page number */ +#define IBUF_REC_FIELD_METADATA 3 /* the metadata field */ +#define IBUF_REC_FIELD_USER 4 /* first user field */ + /* Various constants for checking the type of an ibuf record and extracting data from it. For details, see the description of the record format at the top of this file. */ -/** @name Format of the fourth column of an insert buffer record +/** @name Format of the IBUF_REC_FIELD_METADATA of an insert buffer record The fourth column in the MySQL 5.5 format contains an operation type, counter, and some flags. */ /* @{ */ @@ -1233,13 +1242,13 @@ ibuf_rec_get_page_no_func( ut_ad(ibuf_inside(mtr)); ut_ad(rec_get_n_fields_old(rec) > 2); - field = rec_get_nth_field_old(rec, 1, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); if (len == 1) { /* This is of the >= 4.1.x record format */ ut_a(trx_sys_multiple_tablespace_format); - field = rec_get_nth_field_old(rec, 2, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len); } else { ut_a(trx_doublewrite_must_reset_space_ids); ut_a(!trx_sys_multiple_tablespace_format); @@ -1279,13 +1288,13 @@ ibuf_rec_get_space_func( ut_ad(ibuf_inside(mtr)); ut_ad(rec_get_n_fields_old(rec) > 2); - field = rec_get_nth_field_old(rec, 1, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); if (len == 1) { /* This is of the >= 4.1.x record format */ ut_a(trx_sys_multiple_tablespace_format); - field = rec_get_nth_field_old(rec, 0, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); ut_a(len == 4); return(mach_read_from_4(field)); @@ -1335,9 +1344,9 @@ ibuf_rec_get_info_func( || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); ut_ad(ibuf_inside(mtr)); fields = rec_get_n_fields_old(rec); - ut_a(fields > 4); + ut_a(fields > IBUF_REC_FIELD_USER); - types = rec_get_nth_field_old(rec, 3, &len); + types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE; @@ -1363,7 +1372,8 @@ ibuf_rec_get_info_func( ut_a(op_local < IBUF_OP_COUNT); ut_a((len - info_len_local) == - (fields - 4) * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + (fields - IBUF_REC_FIELD_USER) + * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); if (op) { *op = op_local; @@ -1407,7 +1417,7 @@ ibuf_rec_get_op_type_func( ut_ad(ibuf_inside(mtr)); ut_ad(rec_get_n_fields_old(rec) > 2); - (void) rec_get_nth_field_old(rec, 1, &len); + (void) rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); if (len > 1) { /* This is a < 4.1.x format record */ @@ -1436,12 +1446,12 @@ ibuf_rec_get_counter( const byte* ptr; ulint len; - if (rec_get_n_fields_old(rec) < 4) { + if (rec_get_n_fields_old(rec) <= IBUF_REC_FIELD_METADATA) { return(ULINT_UNDEFINED); } - ptr = rec_get_nth_field_old(rec, 3, &len); + ptr = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); if (len >= 2) { @@ -1666,7 +1676,7 @@ ibuf_build_entry_from_ibuf_rec_func( || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX)); ut_ad(ibuf_inside(mtr)); - data = rec_get_nth_field_old(ibuf_rec, 1, &len); + data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len); if (len > 1) { /* This a < 4.1.x format record */ @@ -1678,13 +1688,13 @@ ibuf_build_entry_from_ibuf_rec_func( ut_a(trx_sys_multiple_tablespace_format); ut_a(*data == 0); - ut_a(rec_get_n_fields_old(ibuf_rec) > 4); + ut_a(rec_get_n_fields_old(ibuf_rec) > IBUF_REC_FIELD_USER); - n_fields = rec_get_n_fields_old(ibuf_rec) - 4; + n_fields = rec_get_n_fields_old(ibuf_rec) - IBUF_REC_FIELD_USER; tuple = dtuple_create(heap, n_fields); - types = rec_get_nth_field_old(ibuf_rec, 3, &len); + types = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_METADATA, &len); ibuf_rec_get_info(mtr, ibuf_rec, NULL, &comp, &info_len, NULL); @@ -1698,7 +1708,8 @@ ibuf_build_entry_from_ibuf_rec_func( for (i = 0; i < n_fields; i++) { field = dtuple_get_nth_field(tuple, i); - data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); + data = rec_get_nth_field_old( + ibuf_rec, i + IBUF_REC_FIELD_USER, &len); dfield_set_data(field, data, len); @@ -1745,7 +1756,7 @@ ibuf_rec_get_size( field_offset = 2; types_offset = DATA_ORDER_NULL_TYPE_BUF_SIZE; } else { - field_offset = 4; + field_offset = IBUF_REC_FIELD_USER; types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE; } @@ -1806,7 +1817,7 @@ ibuf_rec_get_volume_func( ut_ad(ibuf_inside(mtr)); ut_ad(rec_get_n_fields_old(ibuf_rec) > 2); - data = rec_get_nth_field_old(ibuf_rec, 1, &len); + data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len); pre_4_1 = (len > 1); if (pre_4_1) { @@ -1829,7 +1840,8 @@ ibuf_rec_get_volume_func( ut_a(trx_sys_multiple_tablespace_format); ut_a(*data == 0); - types = rec_get_nth_field_old(ibuf_rec, 3, &len); + types = rec_get_nth_field_old( + ibuf_rec, IBUF_REC_FIELD_METADATA, &len); ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL); @@ -1859,7 +1871,8 @@ ibuf_rec_get_volume_func( } types += info_len; - n_fields = rec_get_n_fields_old(ibuf_rec) - 4; + n_fields = rec_get_n_fields_old(ibuf_rec) + - IBUF_REC_FIELD_USER; } data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, pre_4_1, comp); @@ -1914,11 +1927,11 @@ ibuf_entry_build( n_fields = dtuple_get_n_fields(entry); - tuple = dtuple_create(heap, n_fields + 4); + tuple = dtuple_create(heap, n_fields + IBUF_REC_FIELD_USER); /* 1) Space Id */ - field = dtuple_get_nth_field(tuple, 0); + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE); buf = mem_heap_alloc(heap, 4); @@ -1928,7 +1941,7 @@ ibuf_entry_build( /* 2) Marker byte */ - field = dtuple_get_nth_field(tuple, 1); + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER); buf = mem_heap_alloc(heap, 1); @@ -1940,7 +1953,7 @@ ibuf_entry_build( /* 3) Page number */ - field = dtuple_get_nth_field(tuple, 2); + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE); buf = mem_heap_alloc(heap, 4); @@ -1988,10 +2001,7 @@ ibuf_entry_build( ulint fixed_len; const dict_field_t* ifield; - /* We add 4 below because we have the 4 extra fields at the - start of an ibuf record */ - - field = dtuple_get_nth_field(tuple, i + 4); + field = dtuple_get_nth_field(tuple, i + IBUF_REC_FIELD_USER); entry_field = dtuple_get_nth_field(entry, i); dfield_copy(field, entry_field); @@ -2024,13 +2034,13 @@ ibuf_entry_build( /* 4) Type info, part #2 */ - field = dtuple_get_nth_field(tuple, 3); + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_METADATA); dfield_set_data(field, type_info, ti - type_info); /* Set all the types in the new tuple binary */ - dtuple_set_types_binary(tuple, n_fields + 4); + dtuple_set_types_binary(tuple, n_fields + IBUF_REC_FIELD_USER); return(tuple); } @@ -2090,11 +2100,11 @@ ibuf_new_search_tuple_build( ut_a(trx_sys_multiple_tablespace_format); - tuple = dtuple_create(heap, 3); + tuple = dtuple_create(heap, IBUF_REC_FIELD_METADATA); /* Store the space id in tuple */ - field = dtuple_get_nth_field(tuple, 0); + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE); buf = mem_heap_alloc(heap, 4); @@ -2104,7 +2114,7 @@ ibuf_new_search_tuple_build( /* Store the new format record marker byte */ - field = dtuple_get_nth_field(tuple, 1); + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER); buf = mem_heap_alloc(heap, 1); @@ -2114,7 +2124,7 @@ ibuf_new_search_tuple_build( /* Store the page number in tuple */ - field = dtuple_get_nth_field(tuple, 2); + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE); buf = mem_heap_alloc(heap, 4); @@ -2122,7 +2132,7 @@ ibuf_new_search_tuple_build( dfield_set_data(field, buf, 4); - dtuple_set_types_binary(tuple, 3); + dtuple_set_types_binary(tuple, IBUF_REC_FIELD_METADATA); return(tuple); } @@ -2789,8 +2799,10 @@ ibuf_get_volume_buffered_hash( ulint fold; ulint bitmask; - len = ibuf_rec_get_size(rec, types, rec_get_n_fields_old(rec) - 4, - FALSE, comp); + len = ibuf_rec_get_size( + rec, types, + rec_get_n_fields_old(rec) - IBUF_REC_FIELD_USER, + FALSE, comp); fold = ut_fold_binary(data, len); hash += (fold / (CHAR_BIT * sizeof *hash)) % size; @@ -2842,8 +2854,8 @@ ibuf_get_volume_buffered_count_func( ut_ad(ibuf_inside(mtr)); n_fields = rec_get_n_fields_old(rec); - ut_ad(n_fields > 4); - n_fields -= 4; + ut_ad(n_fields > IBUF_REC_FIELD_USER); + n_fields -= IBUF_REC_FIELD_USER; rec_get_nth_field_offs_old(rec, 1, &len); /* This function is only invoked when buffering new @@ -2852,7 +2864,7 @@ ibuf_get_volume_buffered_count_func( ut_a(len == 1); ut_ad(trx_sys_multiple_tablespace_format); - types = rec_get_nth_field_old(rec, 3, &len); + types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE, IBUF_REC_INFO_SIZE)) { @@ -3164,7 +3176,7 @@ ibuf_update_max_tablespace_id(void) } else { rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field_old(rec, 0, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); ut_a(len == 4); @@ -3186,10 +3198,12 @@ ibuf_update_max_tablespace_id(void) ibuf_get_entry_counter_low_func(rec,space,page_no) #endif /****************************************************************//** -Helper function for ibuf_set_entry_counter. Checks if rec is for (space, -page_no), and if so, reads counter value from it and returns that + 1. -Otherwise, returns 0. -@return new counter value, or 0 */ +Helper function for ibuf_get_entry_counter_func. Checks if rec is for +(space, page_no), and if so, reads counter value from it and returns +that + 1. +@retval ULINT_UNDEFINED if the record does not contain any counter +@retval 0 if the record is not for (space, page_no) +@retval 1 + previous counter value, otherwise */ static ulint ibuf_get_entry_counter_low_func( @@ -3210,7 +3224,7 @@ ibuf_get_entry_counter_low_func( || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); ut_ad(rec_get_n_fields_old(rec) > 2); - field = rec_get_nth_field_old(rec, 1, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); if (UNIV_UNLIKELY(len != 1)) { /* pre-4.1 format */ @@ -3223,7 +3237,7 @@ ibuf_get_entry_counter_low_func( ut_a(trx_sys_multiple_tablespace_format); /* Check the tablespace identifier. */ - field = rec_get_nth_field_old(rec, 0, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); ut_a(len == 4); if (mach_read_from_4(field) != space) { @@ -3232,7 +3246,7 @@ ibuf_get_entry_counter_low_func( } /* Check the page offset. */ - field = rec_get_nth_field_old(rec, 2, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len); ut_a(len == 4); if (mach_read_from_4(field) != page_no) { @@ -3241,7 +3255,7 @@ ibuf_get_entry_counter_low_func( } /* Check if the record contains a counter field. */ - field = rec_get_nth_field_old(rec, 3, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { default: @@ -3257,147 +3271,61 @@ ibuf_get_entry_counter_low_func( } } +#ifdef UNIV_DEBUG +# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \ + ibuf_get_entry_counter_func(space,page_no,rec,mtr,exact_leaf) +#else /* UNIV_DEBUG */ +# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \ + ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf) +#endif + /****************************************************************//** -Set the counter field in entry to the correct value based on the current +Calculate the counter field for an entry based on the current last record in ibuf for (space, page_no). -@return FALSE if we should abort this insertion to ibuf */ +@return the counter field, or ULINT_UNDEFINED +if we should abort this insertion to ibuf */ static -ibool -ibuf_set_entry_counter( -/*===================*/ - dtuple_t* entry, /*!< in/out: entry to patch */ +ulint +ibuf_get_entry_counter_func( +/*========================*/ ulint space, /*!< in: space id of entry */ ulint page_no, /*!< in: page number of entry */ - btr_pcur_t* pcur, /*!< in: pcur positioned on the record - found by btr_pcur_open(.., entry, - PAGE_CUR_LE, ..., pcur, ...) */ - ibool is_optimistic, /*!< in: is this an optimistic insert */ - mtr_t* mtr) /*!< in: mtr */ + const rec_t* rec, /*!< in: the record preceding the + insertion point */ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction */ +#endif /* UNIV_DEBUG */ + ibool only_leaf) /*!< in: TRUE if this is the only + leaf page that can contain entries + for (space,page_no), that is, there + was no exact match for (space,page_no) + in the node pointer */ { - dfield_t* field; - byte* data; - ulint counter = 0; - - /* pcur points to either a user rec or to a page's infimum record. */ ut_ad(ibuf_inside(mtr)); - ut_ad(mtr_memo_contains(mtr, btr_pcur_get_block(pcur), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(page_validate(btr_pcur_get_page(pcur), ibuf->index)); - - if (btr_pcur_is_on_user_rec(pcur)) { - - counter = ibuf_get_entry_counter_low( - mtr, btr_pcur_get_rec(pcur), space, page_no); - - if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) { - /* The record lacks a counter field. - Such old records must be merged before - new records can be buffered. */ - - return(FALSE); - } - } else if (btr_pcur_is_before_first_in_tree(pcur, mtr)) { - /* Ibuf tree is either completely empty, or the insert - position is at the very first record of a non-empty tree. In - either case we have no previous records for (space, - page_no). */ - - counter = 0; - } else if (btr_pcur_is_before_first_on_page(pcur)) { - btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur); - - if (cursor->low_match < 3) { - /* If low_match < 3, we know that the father node - pointer did not contain the searched for (space, - page_no), which means that the search ended on the - right page regardless of the counter value, and - since we're at the infimum record, there are no - existing records. */ - - counter = 0; - } else { - rec_t* rec; - const page_t* page; - buf_block_t* block; - page_t* prev_page; - ulint prev_page_no; - - ut_a(cursor->ibuf_cnt != ULINT_UNDEFINED); - - page = btr_pcur_get_page(pcur); - prev_page_no = btr_page_get_prev(page, mtr); - - ut_a(prev_page_no != FIL_NULL); - - block = buf_page_get( - IBUF_SPACE_ID, 0, prev_page_no, - RW_X_LATCH, mtr); + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)); + ut_ad(page_validate(page_align(rec), ibuf->index)); - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); - - prev_page = buf_block_get_frame(block); - - rec = page_rec_get_prev( - page_get_supremum_rec(prev_page)); - - ut_ad(page_rec_is_user_rec(rec)); - - counter = ibuf_get_entry_counter_low( - mtr, rec, space, page_no); - - if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) { - /* The record lacks a counter field. - Such old records must be merged before - new records can be buffered. */ - - return(FALSE); - } - - if (counter < cursor->ibuf_cnt) { - /* Search ended on the wrong page. */ - - if (is_optimistic) { - /* In an optimistic insert, we can - shift the insert position to the left - page, since it only needs an X-latch - on the page itself, which the - original search acquired for us. */ - - btr_cur_position( - ibuf->index, rec, block, - btr_pcur_get_btr_cur(pcur)); - } else { - /* We can't shift the insert - position to the left page in a - pessimistic insert since it would - require an X-latch on the left - page's left page, so we have to - abort. */ - - return(FALSE); - } - } else { - /* The counter field in the father node is - the same as we would insert; we don't know - whether the insert should go to this page or - the left page (the later fields can differ), - so refuse the insert. */ - - return(FALSE); - } - } + if (page_rec_is_supremum(rec)) { + /* This is just for safety. The record should be a + page infimum or a user record. */ + ut_ad(0); + return(ULINT_UNDEFINED); + } else if (!page_rec_is_infimum(rec)) { + return(ibuf_get_entry_counter_low(mtr, rec, space, page_no)); + } else if (only_leaf + || fil_page_get_prev(page_align(rec)) == FIL_NULL) { + /* The parent node pointer did not contain the + searched for (space, page_no), which means that the + search ended on the correct page regardless of the + counter value, and since we're at the infimum record, + there are no existing records. */ + return(0); } else { - /* The cursor is not positioned at or before a user record. */ - return(FALSE); + /* We used to read the previous page here. It would + break the latching order, because the caller has + buffer-fixed an insert buffer bitmap page. */ + return(ULINT_UNDEFINED); } - - /* Patch counter value in already built entry. */ - field = dtuple_get_nth_field(entry, 3); - data = dfield_get_data(field); - - mach_write_to_2(data + IBUF_REC_OFFSET_COUNTER, counter); - - return(TRUE); } /*********************************************************************//** @@ -3604,16 +3532,27 @@ fail_exit: } } - /* Patch correct counter value to the entry to insert. This can - change the insert position, which can result in the need to abort in - some cases. */ - if (!no_counter - && !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur, - mode == BTR_MODIFY_PREV, &mtr)) { + if (!no_counter) { + /* Patch correct counter value to the entry to + insert. This can change the insert position, which can + result in the need to abort in some cases. */ + ulint counter = ibuf_get_entry_counter( + space, page_no, btr_pcur_get_rec(&pcur), &mtr, + btr_pcur_get_btr_cur(&pcur)->low_match + < IBUF_REC_FIELD_METADATA); + dfield_t* field; + + if (counter == ULINT_UNDEFINED) { bitmap_fail: - ibuf_mtr_commit(&bitmap_mtr); + ibuf_mtr_commit(&bitmap_mtr); + goto fail_exit; + } - goto fail_exit; + field = dtuple_get_nth_field( + ibuf_entry, IBUF_REC_FIELD_METADATA); + mach_write_to_2( + (byte*) dfield_get_data(field) + + IBUF_REC_OFFSET_COUNTER, counter); } /* Set the bitmap bit denoting that the insert buffer contains diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index be918439f59..4f33aacc48e 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -743,24 +743,6 @@ struct btr_cur_struct { NULL */ ulint fold; /*!< fold value used in the search if flag is BTR_CUR_HASH */ - /*----- Delete buffering -------*/ - ulint ibuf_cnt; /* in searches done on insert buffer - trees, this contains the "counter" - value (the first two bytes of the - fourth field) extracted from the - page above the leaf page, from the - father node pointer that pointed to - the leaf page. in other words, it - contains the minimum counter value - for records to be inserted on the - chosen leaf page. If for some reason - this can't be read, or if the search - ended on the leftmost leaf page in - the tree (in which case the father - node pointer had the 'minimum - record' flag set), this is - ULINT_UNDEFINED. */ - /*------------------------------*/ /* @} */ btr_path_t* path_arr; /*!< in estimating the number of rows in range, we store in this array diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index 140f94466db..2ebd70a6f23 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -53,6 +53,16 @@ UNIV_INTERN btr_pcur_t* btr_pcur_create_for_mysql(void); /*============================*/ + +/**************************************************************//** +Resets a persistent cursor object, freeing ::old_rec_buf if it is +allocated and resetting the other members to their initial values. */ +UNIV_INTERN +void +btr_pcur_reset( +/*===========*/ + btr_pcur_t* cursor);/*!< in, out: persistent cursor */ + /**************************************************************//** Frees the memory for a persistent cursor object. */ UNIV_INTERN diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index c0ff7b1766b..456f077a13d 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -910,7 +910,27 @@ buf_block_set_io_fix( /*=================*/ buf_block_t* block, /*!< in/out: control block */ enum buf_io_fix io_fix);/*!< in: io_fix state */ - +/*********************************************************************//** +Makes a block sticky. A sticky block implies that even after we release +the buf_pool->mutex and the block->mutex: +* it cannot be removed from the flush_list +* the block descriptor cannot be relocated +* it cannot be removed from the LRU list +Note that: +* the block can still change its position in the LRU list +* the next and previous pointers can change. */ +UNIV_INLINE +void +buf_page_set_sticky( +/*================*/ + buf_page_t* bpage); /*!< in/out: control block */ +/*********************************************************************//** +Removes stickiness of a block. */ +UNIV_INLINE +void +buf_page_unset_sticky( +/*==================*/ + buf_page_t* bpage); /*!< in/out: control block */ /********************************************************************//** Determine if a buffer block can be relocated in memory. The block can be dirty, but it must not be I/O-fixed or bufferfixed. */ diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index b65b5133c15..99e55df3312 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -414,6 +414,7 @@ buf_page_get_io_fix( case BUF_IO_NONE: case BUF_IO_READ: case BUF_IO_WRITE: + case BUF_IO_PIN: return(io_fix); } ut_error; @@ -464,6 +465,49 @@ buf_block_set_io_fix( buf_page_set_io_fix(&block->page, io_fix); } +/*********************************************************************//** +Makes a block sticky. A sticky block implies that even after we release +the buf_pool->mutex and the block->mutex: +* it cannot be removed from the flush_list +* the block descriptor cannot be relocated +* it cannot be removed from the LRU list +Note that: +* the block can still change its position in the LRU list +* the next and previous pointers can change. */ +UNIV_INLINE +void +buf_page_set_sticky( +/*================*/ + buf_page_t* bpage) /*!< in/out: control block */ +{ +#ifdef UNIV_DEBUG + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + ut_ad(buf_pool_mutex_own(buf_pool)); +#endif + ut_ad(mutex_own(buf_page_get_mutex(bpage))); + ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE); + + bpage->io_fix = BUF_IO_PIN; +} + +/*********************************************************************//** +Removes stickiness of a block. */ +UNIV_INLINE +void +buf_page_unset_sticky( +/*==================*/ + buf_page_t* bpage) /*!< in/out: control block */ +{ +#ifdef UNIV_DEBUG + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + ut_ad(buf_pool_mutex_own(buf_pool)); +#endif + ut_ad(mutex_own(buf_page_get_mutex(bpage))); + ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN); + + bpage->io_fix = BUF_IO_NONE; +} + /********************************************************************//** Determine if a buffer block can be relocated in memory. The block can be dirty, but it must not be I/O-fixed or bufferfixed. */ diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index 0cc2defb3ff..12b9e22f673 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -57,7 +57,10 @@ enum buf_flush { enum buf_io_fix { BUF_IO_NONE = 0, /**< no pending I/O */ BUF_IO_READ, /**< read pending */ - BUF_IO_WRITE /**< write pending */ + BUF_IO_WRITE, /**< write pending */ + BUF_IO_PIN /**< disallow relocation of + block and its removal of from + the flush_list */ }; /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */ diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h index f7bdd29ed90..6d3c2988fdc 100644 --- a/storage/innobase/include/data0data.h +++ b/storage/innobase/include/data0data.h @@ -231,6 +231,26 @@ dtuple_set_n_fields_cmp( dtuple_t* tuple, /*!< in: tuple */ ulint n_fields_cmp); /*!< in: number of fields used in comparisons in rem0cmp.* */ + +/* Estimate the number of bytes that are going to be allocated when +creating a new dtuple_t object */ +#define DTUPLE_EST_ALLOC(n_fields) \ + (sizeof(dtuple_t) + (n_fields) * sizeof(dfield_t)) + +/**********************************************************//** +Creates a data tuple from an already allocated chunk of memory. +The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields). +The default value for number of fields used in record comparisons +for this tuple is n_fields. +@return created tuple (inside buf) */ +UNIV_INLINE +dtuple_t* +dtuple_create_from_mem( +/*===================*/ + void* buf, /*!< in, out: buffer to use */ + ulint buf_size, /*!< in: buffer size */ + ulint n_fields); /*!< in: number of fields */ + /**********************************************************//** Creates a data tuple to a memory heap. The default value for number of fields used in record comparisons for this tuple is n_fields. @@ -240,7 +260,8 @@ dtuple_t* dtuple_create( /*==========*/ mem_heap_t* heap, /*!< in: memory heap where the tuple - is created */ + is created, DTUPLE_EST_ALLOC(n_fields) + bytes will be allocated from this heap */ ulint n_fields); /*!< in: number of fields */ /**********************************************************//** diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic index 5c0f8039c80..205fa397987 100644 --- a/storage/innobase/include/data0data.ic +++ b/storage/innobase/include/data0data.ic @@ -348,23 +348,25 @@ dtuple_get_nth_field( #endif /* UNIV_DEBUG */ /**********************************************************//** -Creates a data tuple to a memory heap. The default value for number -of fields used in record comparisons for this tuple is n_fields. -@return own: created tuple */ +Creates a data tuple from an already allocated chunk of memory. +The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields). +The default value for number of fields used in record comparisons +for this tuple is n_fields. +@return created tuple (inside buf) */ UNIV_INLINE dtuple_t* -dtuple_create( -/*==========*/ - mem_heap_t* heap, /*!< in: memory heap where the tuple - is created */ - ulint n_fields) /*!< in: number of fields */ +dtuple_create_from_mem( +/*===================*/ + void* buf, /*!< in, out: buffer to use */ + ulint buf_size, /*!< in: buffer size */ + ulint n_fields) /*!< in: number of fields */ { dtuple_t* tuple; - ut_ad(heap); + ut_ad(buf != NULL); + ut_a(buf_size >= DTUPLE_EST_ALLOC(n_fields)); - tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t) - + n_fields * sizeof(dfield_t)); + tuple = (dtuple_t*) buf; tuple->info_bits = 0; tuple->n_fields = n_fields; tuple->n_fields_cmp = n_fields; @@ -386,9 +388,38 @@ dtuple_create( dfield_get_type(field)->mtype = DATA_ERROR; } } +#endif + return(tuple); +} + +/**********************************************************//** +Creates a data tuple to a memory heap. The default value for number +of fields used in record comparisons for this tuple is n_fields. +@return own: created tuple */ +UNIV_INLINE +dtuple_t* +dtuple_create( +/*==========*/ + mem_heap_t* heap, /*!< in: memory heap where the tuple + is created, DTUPLE_EST_ALLOC(n_fields) + bytes will be allocated from this heap */ + ulint n_fields) /*!< in: number of fields */ +{ + void* buf; + ulint buf_size; + dtuple_t* tuple; + + ut_ad(heap); + + buf_size = DTUPLE_EST_ALLOC(n_fields); + buf = mem_heap_alloc(heap, buf_size); + tuple = dtuple_create_from_mem(buf, buf_size, n_fields); + +#ifdef UNIV_DEBUG UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields); #endif + return(tuple); } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 3a6336f1a01..d50b0cb4162 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -328,18 +328,23 @@ Reads the flushed lsn and arch no fields from a data file at database startup. */ UNIV_INTERN void -fil_read_flushed_lsn_and_arch_log_no( -/*=================================*/ +fil_read_first_page( +/*================*/ os_file_t data_file, /*!< in: open data file */ ibool one_read_already, /*!< in: TRUE if min and max parameters below already contain sensible data */ + ulint* flags, /*!< out: tablespace flags */ #ifdef UNIV_LOG_ARCHIVE - ulint* min_arch_log_no, /*!< in/out: */ - ulint* max_arch_log_no, /*!< in/out: */ + ulint* min_arch_log_no, /*!< out: min of archived + log numbers in data files */ + ulint* max_arch_log_no, /*!< out: max of archived + log numbers in data files */ #endif /* UNIV_LOG_ARCHIVE */ - ib_uint64_t* min_flushed_lsn, /*!< in/out: */ - ib_uint64_t* max_flushed_lsn); /*!< in/out: */ + ib_uint64_t* min_flushed_lsn, /*!< out: min of flushed + lsn values in data files */ + ib_uint64_t* max_flushed_lsn); /*!< out: max of flushed + lsn values in data files */ /*******************************************************************//** Increments the count of pending insert buffer page merges, if space is not being deleted. diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index 6e70fbf6f66..4efabacb2cb 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -34,6 +34,90 @@ Created 12/18/1995 Heikki Tuuri #include "page0types.h" #include "fsp0types.h" +/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */ + +/** Number of flag bits used to indicate the tablespace page size */ +#define FSP_FLAGS_WIDTH_PAGE_SSIZE 4 +/** Zero relative shift position of the PAGE_SSIZE field */ +#define FSP_FLAGS_POS_PAGE_SSIZE 6 +/** Bit mask of the PAGE_SSIZE field */ +#define FSP_FLAGS_MASK_PAGE_SSIZE \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_SSIZE)) \ + << FSP_FLAGS_POS_PAGE_SSIZE) +/** Return the value of the PAGE_SSIZE field */ +#define FSP_FLAGS_GET_PAGE_SSIZE(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_SSIZE) \ + >> FSP_FLAGS_POS_PAGE_SSIZE) + +/* @} */ + +/* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */ + +/** Offset of the space header within a file page */ +#define FSP_HEADER_OFFSET FIL_PAGE_DATA + +/* The data structures in files are defined just as byte strings in C */ +typedef byte fsp_header_t; +typedef byte xdes_t; + +/* SPACE HEADER + ============ + +File space header data structure: this data structure is contained in the +first page of a space. The space for this header is reserved in every extent +descriptor page, but used only in the first. */ + +/*-------------------------------------*/ +#define FSP_SPACE_ID 0 /* space id */ +#define FSP_NOT_USED 4 /* this field contained a value up to + which we know that the modifications + in the database have been flushed to + the file space; not used now */ +#define FSP_SIZE 8 /* Current size of the space in + pages */ +#define FSP_FREE_LIMIT 12 /* Minimum page number for which the + free list has not been initialized: + the pages >= this limit are, by + definition, free; note that in a + single-table tablespace where size + < 64 pages, this number is 64, i.e., + we have initialized the space + about the first extent, but have not + physically allocted those pages to the + file */ +#define FSP_SPACE_FLAGS 16 /* fsp_space_t.flags, similar to + dict_table_t::flags */ +#define FSP_FRAG_N_USED 20 /* number of used pages in the + FSP_FREE_FRAG list */ +#define FSP_FREE 24 /* list of free extents */ +#define FSP_FREE_FRAG (24 + FLST_BASE_NODE_SIZE) + /* list of partially free extents not + belonging to any segment */ +#define FSP_FULL_FRAG (24 + 2 * FLST_BASE_NODE_SIZE) + /* list of full extents not belonging + to any segment */ +#define FSP_SEG_ID (24 + 3 * FLST_BASE_NODE_SIZE) + /* 8 bytes which give the first unused + segment id */ +#define FSP_SEG_INODES_FULL (32 + 3 * FLST_BASE_NODE_SIZE) + /* list of pages containing segment + headers, where all the segment inode + slots are reserved */ +#define FSP_SEG_INODES_FREE (32 + 4 * FLST_BASE_NODE_SIZE) + /* list of pages containing segment + headers, where not all the segment + header slots are reserved */ +/*-------------------------------------*/ +/* File space header size */ +#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE) + +#define FSP_FREE_ADD 4 /* this many free extents are added + to the free list from above + FSP_FREE_LIMIT at a time */ +/* @} */ + +/* @} */ + /**********************************************************************//** Initializes the file space system. */ UNIV_INTERN @@ -353,6 +437,18 @@ fseg_print( mtr_t* mtr); /*!< in: mtr */ #endif /* UNIV_BTR_PRINT */ +/********************************************************************//** +Extract the page size from tablespace flags. +This feature, storing the page_ssize into the tablespace flags, is added +to InnoDB 5.6.4. This is here only to protect against a crash if a newer +database is opened with this code branch. +@return page size of the tablespace in bytes */ +UNIV_INLINE +ulint +fsp_flags_get_page_size( +/*====================*/ + ulint flags); /*!< in: tablespace flags */ + #ifndef UNIV_NONINL #include "fsp0fsp.ic" #endif diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic index 434c370b527..c92111a9d89 100644 --- a/storage/innobase/include/fsp0fsp.ic +++ b/storage/innobase/include/fsp0fsp.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -43,3 +43,31 @@ fsp_descr_page( return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET)); } +/********************************************************************//** +Extract the page size from tablespace flags. +This feature, storing the page_ssize into the tablespace flags, is added +to InnoDB 5.6.4. This is here only to protect against a crash if a newer +database is opened with this code branch. +@return page size of the tablespace in bytes */ +UNIV_INLINE +ulint +fsp_flags_get_page_size( +/*====================*/ + ulint flags) /*!< in: tablespace flags */ +{ + ulint page_size = 0; + ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags); + + /* Convert from a 'log2 minus 9' to a page size in bytes. */ + if (UNIV_UNLIKELY(ssize)) { + page_size = (512 << ssize); + + ut_ad(page_size <= UNIV_PAGE_SIZE); + } else { + /* If the page size was not stored, then it is the + original 16k. */ + page_size = UNIV_PAGE_SIZE; + } + + return(page_size); +} diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic index d214c3fe6c9..c70615e1ca9 100644 --- a/storage/innobase/include/mem0mem.ic +++ b/storage/innobase/include/mem0mem.ic @@ -209,10 +209,6 @@ mem_heap_alloc( buf = (byte*)buf + MEM_FIELD_HEADER_SIZE; #endif -#ifdef UNIV_SET_MEM_TO_ZERO - UNIV_MEM_ALLOC(buf, n); - memset(buf, '\0', n); -#endif UNIV_MEM_ALLOC(buf, n); return(buf); } diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic index 960c25d6051..1db4a4bd735 100644 --- a/storage/innobase/include/mtr0mtr.ic +++ b/storage/innobase/include/mtr0mtr.ic @@ -44,6 +44,7 @@ mtr_start( mtr->log_mode = MTR_LOG_ALL; mtr->modifications = FALSE; + mtr->inside_ibuf = FALSE; mtr->n_log_recs = 0; ut_d(mtr->state = MTR_ACTIVE); diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index 6a82e820312..540ec7855bc 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -893,6 +893,7 @@ page_parse_create( ulint comp, /*!< in: nonzero=compact page format */ buf_block_t* block, /*!< in: block or NULL */ mtr_t* mtr); /*!< in: mtr or NULL */ +#ifndef UNIV_HOTBACKUP /************************************************************//** Prints record contents including the data relevant only in the index page context. */ @@ -902,6 +903,7 @@ page_rec_print( /*===========*/ const rec_t* rec, /*!< in: physical record */ const ulint* offsets);/*!< in: record descriptor */ +# ifdef UNIV_BTR_PRINT /***************************************************************//** This is used to print the contents of the directory for debugging purposes. */ @@ -941,6 +943,8 @@ page_print( in directory */ ulint rn); /*!< in: print rn first and last records in directory */ +# endif /* UNIV_BTR_PRINT */ +#endif /* !UNIV_HOTBACKUP */ /***************************************************************//** The following is used to validate a record on a page. This function differs from rec_validate as it can also check the n_owned field and diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index dd619406ab9..e17fd584110 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -168,7 +168,9 @@ UNIV_INTERN row_prebuilt_t* row_create_prebuilt( /*================*/ - dict_table_t* table); /*!< in: Innobase table handle */ + dict_table_t* table, /*!< in: Innobase table handle */ + ulint mysql_row_len); /*!< in: length in bytes of a row in + the MySQL format */ /********************************************************************//** Free a prebuilt struct for a MySQL table handle. */ UNIV_INTERN @@ -672,9 +674,9 @@ struct row_prebuilt_struct { in inserts */ que_fork_t* upd_graph; /*!< Innobase SQL query graph used in updates or deletes */ - btr_pcur_t* pcur; /*!< persistent cursor used in selects + btr_pcur_t pcur; /*!< persistent cursor used in selects and updates */ - btr_pcur_t* clust_pcur; /*!< persistent cursor used in + btr_pcur_t clust_pcur; /*!< persistent cursor used in some selects and updates */ que_fork_t* sel_graph; /*!< dummy query graph used in selects */ diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h index 8544b9d08ba..1c4ea6f7244 100644 --- a/storage/innobase/include/row0sel.h +++ b/storage/innobase/include/row0sel.h @@ -128,7 +128,12 @@ row_sel_convert_mysql_key_to_innobase( in the tuple is already according to index! */ byte* buf, /*!< in: buffer to use in field - conversions */ + conversions; NOTE that dtuple->data + may end up pointing inside buf so + do not discard that buffer while + the tuple is being used. See + row_mysql_store_col_in_innobase_format() + in the case of DATA_INT */ ulint buf_len, /*!< in: buffer length */ dict_index_t* index, /*!< in: index of the key value */ const byte* key_ptr, /*!< in: MySQL key value */ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 21d1eb03b99..50837d262c3 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -154,14 +154,6 @@ resolved */ /* DEBUG VERSION CONTROL ===================== */ -/* The following flag will make InnoDB to initialize -all memory it allocates to zero. It hides Purify -warnings about reading unallocated memory unless -memory is read outside the allocated blocks. */ -/* -#define UNIV_INIT_MEM_TO_ZERO -*/ - /* When this macro is defined then additional test functions will be compiled. These functions live at the end of each relevant source file and have "test_" prefix. These functions are not called from anywhere in @@ -231,15 +223,6 @@ operations (very slow); also UNIV_DEBUG must be defined */ #define UNIV_BTR_DEBUG /* check B-tree links */ #define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */ -#ifdef HAVE_purify -/* The following sets all new allocated memory to zero before use: -this can be used to eliminate unnecessary Purify warnings, but note that -it also masks many bugs Purify could detect. For detailed Purify analysis it -is best to remove the define below and look through the warnings one -by one. */ -#define UNIV_SET_MEM_TO_ZERO -#endif - /* #define UNIV_SQL_DEBUG #define UNIV_LOG_DEBUG diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h index faf6f242883..39f5f20dc6d 100644 --- a/storage/innobase/include/ut0mem.h +++ b/storage/innobase/include/ut0mem.h @@ -78,40 +78,19 @@ ut_mem_init(void); /*=============*/ /**********************************************************************//** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined and set_to_zero is TRUE. +Allocates memory. @return own: allocated memory */ UNIV_INTERN void* ut_malloc_low( /*==========*/ ulint n, /*!< in: number of bytes to allocate */ - ibool set_to_zero, /*!< in: TRUE if allocated memory - should be set to zero if - UNIV_SET_MEM_TO_ZERO is defined */ - ibool assert_on_error); /*!< in: if TRUE, we crash mysqld if + ibool assert_on_error) /*!< in: if TRUE, we crash mysqld if the memory cannot be allocated */ + __attribute__((malloc)); /**********************************************************************//** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined. -@return own: allocated memory */ -UNIV_INTERN -void* -ut_malloc( -/*======*/ - ulint n); /*!< in: number of bytes to allocate */ -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs -out. It cannot be used if we want to return an error message. Prints to -stderr a message if fails. -@return TRUE if succeeded */ -UNIV_INTERN -ibool -ut_test_malloc( -/*===========*/ - ulint n); /*!< in: try to allocate this many bytes */ -#endif /* !UNIV_HOTBACKUP */ +Allocates memory. */ +#define ut_malloc(n) ut_malloc_low(n, TRUE) /**********************************************************************//** Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is a nop. */ diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic index df99c384bbe..795b8ab7a85 100644 --- a/storage/innobase/include/ut0rnd.ic +++ b/storage/innobase/include/ut0rnd.ic @@ -114,7 +114,7 @@ ut_rnd_interval( rnd = ut_rnd_gen_ulint(); - return(low + (rnd % (high - low + 1))); + return(low + (rnd % (high - low))); } /*********************************************************//** diff --git a/storage/innobase/mem/mem0pool.c b/storage/innobase/mem/mem0pool.c index dc68cf8eb24..50dbe526d64 100644 --- a/storage/innobase/mem/mem0pool.c +++ b/storage/innobase/mem/mem0pool.c @@ -228,11 +228,7 @@ mem_pool_create( pool = ut_malloc(sizeof(mem_pool_t)); - /* We do not set the memory to zero (FALSE) in the pool, - but only when allocated at a higher level in mem0mem.c. - This is to avoid masking useful Purify warnings. */ - - pool->buf = ut_malloc_low(size, FALSE, TRUE); + pool->buf = ut_malloc_low(size, TRUE); pool->size = size; mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL); diff --git a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c index 0f56a608f38..68321e1aaf9 100644 --- a/storage/innobase/os/os0proc.c +++ b/storage/innobase/os/os0proc.c @@ -111,9 +111,6 @@ os_mem_alloc_large( os_fast_mutex_lock(&ut_list_mutex); ut_total_allocated_memory += size; os_fast_mutex_unlock(&ut_list_mutex); -# ifdef UNIV_SET_MEM_TO_ZERO - memset(ptr, '\0', size); -# endif UNIV_MEM_ALLOC(ptr, size); return(ptr); } diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c index 102274d66f3..4858929082a 100644 --- a/storage/innobase/page/page0page.c +++ b/storage/innobase/page/page0page.c @@ -1591,13 +1591,14 @@ page_rec_print( " n_owned: %lu; heap_no: %lu; next rec: %lu\n", (ulong) rec_get_n_owned_old(rec), (ulong) rec_get_heap_no_old(rec), - (ulong) rec_get_next_offs(rec, TRUE)); + (ulong) rec_get_next_offs(rec, FALSE)); } page_rec_check(rec); rec_validate(rec, offsets); } +# ifdef UNIV_BTR_PRINT /***************************************************************//** This is used to print the contents of the directory for debugging purposes. */ @@ -1758,6 +1759,7 @@ page_print( page_dir_print(page, dn); page_print_list(block, index, rn); } +# endif /* UNIV_BTR_PRINT */ #endif /* !UNIV_HOTBACKUP */ /***************************************************************//** diff --git a/storage/innobase/pars/pars0pars.c b/storage/innobase/pars/pars0pars.c index ef107f2896f..86f54195682 100644 --- a/storage/innobase/pars/pars0pars.c +++ b/storage/innobase/pars/pars0pars.c @@ -1857,7 +1857,7 @@ pars_sql( ut_ad(str); - heap = mem_heap_create(256); + heap = mem_heap_create(16000); /* Currently, the parser is not reentrant: */ ut_ad(mutex_own(&(dict_sys->mutex))); diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index d06411e09f0..996a49f76e8 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -289,21 +289,21 @@ row_mysql_pad_col( /* space=0x0020 */ pad_end = pad + len; ut_a(!(len % 2)); - do { + while (pad < pad_end) { *pad++ = 0x00; *pad++ = 0x20; - } while (pad < pad_end); + }; break; case 4: /* space=0x00000020 */ pad_end = pad + len; ut_a(!(len % 4)); - do { + while (pad < pad_end) { *pad++ = 0x00; *pad++ = 0x00; *pad++ = 0x00; *pad++ = 0x20; - } while (pad < pad_end); + } break; } } @@ -667,17 +667,60 @@ UNIV_INTERN row_prebuilt_t* row_create_prebuilt( /*================*/ - dict_table_t* table) /*!< in: Innobase table handle */ + dict_table_t* table, /*!< in: Innobase table handle */ + ulint mysql_row_len) /*!< in: length in bytes of a row in + the MySQL format */ { row_prebuilt_t* prebuilt; mem_heap_t* heap; dict_index_t* clust_index; dtuple_t* ref; ulint ref_len; + ulint search_tuple_n_fields; + + search_tuple_n_fields = 2 * dict_table_get_n_cols(table); + + clust_index = dict_table_get_first_index(table); - heap = mem_heap_create(sizeof *prebuilt + 128); + /* Make sure that search_tuple is long enough for clustered index */ + ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields); + + ref_len = dict_index_get_n_unique(clust_index); - prebuilt = mem_heap_zalloc(heap, sizeof *prebuilt); +#define PREBUILT_HEAP_INITIAL_SIZE \ + ( \ + sizeof(*prebuilt) \ + /* allocd in this function */ \ + + DTUPLE_EST_ALLOC(search_tuple_n_fields) \ + + DTUPLE_EST_ALLOC(ref_len) \ + /* allocd in row_prebuild_sel_graph() */ \ + + sizeof(sel_node_t) \ + + sizeof(que_fork_t) \ + + sizeof(que_thr_t) \ + /* allocd in row_get_prebuilt_update_vector() */ \ + + sizeof(upd_node_t) \ + + sizeof(upd_t) \ + + sizeof(upd_field_t) \ + * dict_table_get_n_cols(table) \ + + sizeof(que_fork_t) \ + + sizeof(que_thr_t) \ + /* allocd in row_get_prebuilt_insert_row() */ \ + + sizeof(ins_node_t) \ + /* mysql_row_len could be huge and we are not \ + sure if this prebuilt instance is going to be \ + used in inserts */ \ + + (mysql_row_len < 256 ? mysql_row_len : 0) \ + + DTUPLE_EST_ALLOC(dict_table_get_n_cols(table)) \ + + sizeof(que_fork_t) \ + + sizeof(que_thr_t) \ + ) + + /* We allocate enough space for the objects that are likely to + be created later in order to minimize the number of malloc() + calls */ + heap = mem_heap_create(PREBUILT_HEAP_INITIAL_SIZE); + + prebuilt = mem_heap_zalloc(heap, sizeof(*prebuilt)); prebuilt->magic_n = ROW_PREBUILT_ALLOCATED; prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED; @@ -687,23 +730,15 @@ row_create_prebuilt( prebuilt->sql_stat_start = TRUE; prebuilt->heap = heap; - prebuilt->pcur = btr_pcur_create_for_mysql(); - prebuilt->clust_pcur = btr_pcur_create_for_mysql(); + btr_pcur_reset(&prebuilt->pcur); + btr_pcur_reset(&prebuilt->clust_pcur); prebuilt->select_lock_type = LOCK_NONE; prebuilt->stored_select_lock_type = 99999999; UNIV_MEM_INVALID(&prebuilt->stored_select_lock_type, sizeof prebuilt->stored_select_lock_type); - prebuilt->search_tuple = dtuple_create( - heap, 2 * dict_table_get_n_cols(table)); - - clust_index = dict_table_get_first_index(table); - - /* Make sure that search_tuple is long enough for clustered index */ - ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields); - - ref_len = dict_index_get_n_unique(clust_index); + prebuilt->search_tuple = dtuple_create(heap, search_tuple_n_fields); ref = dtuple_create(heap, ref_len); @@ -720,6 +755,8 @@ row_create_prebuilt( prebuilt->autoinc_last_value = 0; + prebuilt->mysql_row_len = mysql_row_len; + return(prebuilt); } @@ -755,8 +792,8 @@ row_prebuilt_free( prebuilt->magic_n = ROW_PREBUILT_FREED; prebuilt->magic_n2 = ROW_PREBUILT_FREED; - btr_pcur_free_for_mysql(prebuilt->pcur); - btr_pcur_free_for_mysql(prebuilt->clust_pcur); + btr_pcur_reset(&prebuilt->pcur); + btr_pcur_reset(&prebuilt->clust_pcur); if (prebuilt->mysql_template) { mem_free(prebuilt->mysql_template); @@ -1416,11 +1453,11 @@ row_update_for_mysql( clust_index = dict_table_get_first_index(table); - if (prebuilt->pcur->btr_cur.index == clust_index) { - btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur); + if (prebuilt->pcur.btr_cur.index == clust_index) { + btr_pcur_copy_stored_position(node->pcur, &prebuilt->pcur); } else { btr_pcur_copy_stored_position(node->pcur, - prebuilt->clust_pcur); + &prebuilt->clust_pcur); } ut_a(node->pcur->rel_pos == BTR_PCUR_ON); @@ -1524,8 +1561,8 @@ row_unlock_for_mysql( clust_pcur, and we do not need to reposition the cursors. */ { - btr_pcur_t* pcur = prebuilt->pcur; - btr_pcur_t* clust_pcur = prebuilt->clust_pcur; + btr_pcur_t* pcur = &prebuilt->pcur; + btr_pcur_t* clust_pcur = &prebuilt->clust_pcur; trx_t* trx = prebuilt->trx; ut_ad(prebuilt && trx); diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c index 5d8f53f68da..20d45c1884d 100644 --- a/storage/innobase/row/row0sel.c +++ b/storage/innobase/row/row0sel.c @@ -2301,7 +2301,12 @@ row_sel_convert_mysql_key_to_innobase( in the tuple is already according to index! */ byte* buf, /*!< in: buffer to use in field - conversions */ + conversions; NOTE that dtuple->data + may end up pointing inside buf so + do not discard that buffer while + the tuple is being used. See + row_mysql_store_col_in_innobase_format() + in the case of DATA_INT */ ulint buf_len, /*!< in: buffer length */ dict_index_t* index, /*!< in: index of the key value */ const byte* key_ptr, /*!< in: MySQL key value */ @@ -2433,6 +2438,7 @@ row_sel_convert_mysql_key_to_innobase( /* Storing may use at most data_len bytes of buf */ if (UNIV_LIKELY(!is_null)) { + ut_a(buf + data_len <= original_buf + buf_len); row_mysql_store_col_in_innobase_format( dfield, buf, FALSE, /* MySQL key value format col */ @@ -2915,17 +2921,17 @@ row_sel_get_clust_rec_for_mysql( btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref, PAGE_CUR_LE, BTR_SEARCH_LEAF, - prebuilt->clust_pcur, 0, mtr); + &prebuilt->clust_pcur, 0, mtr); - clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur); + clust_rec = btr_pcur_get_rec(&prebuilt->clust_pcur); - prebuilt->clust_pcur->trx_if_known = trx; + prebuilt->clust_pcur.trx_if_known = trx; /* Note: only if the search ends up on a non-infimum record is the low_match value the real match to the search tuple */ if (!page_rec_is_user_rec(clust_rec) - || btr_pcur_get_low_match(prebuilt->clust_pcur) + || btr_pcur_get_low_match(&prebuilt->clust_pcur) < dict_index_get_n_unique(clust_index)) { /* In a rare case it is possible that no clust rec is found @@ -2974,7 +2980,7 @@ row_sel_get_clust_rec_for_mysql( we set a LOCK_REC_NOT_GAP type lock */ err = lock_clust_rec_read_check_and_lock( - 0, btr_pcur_get_block(prebuilt->clust_pcur), + 0, btr_pcur_get_block(&prebuilt->clust_pcur), clust_rec, clust_index, *offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); switch (err) { @@ -3052,7 +3058,7 @@ func_exit: /* We may use the cursor in update or in unlock_row(): store its position */ - btr_pcur_store_position(prebuilt->clust_pcur, mtr); + btr_pcur_store_position(&prebuilt->clust_pcur, mtr); } err_exit: @@ -3300,7 +3306,7 @@ row_sel_try_search_shortcut_for_mysql( { dict_index_t* index = prebuilt->index; const dtuple_t* search_tuple = prebuilt->search_tuple; - btr_pcur_t* pcur = prebuilt->pcur; + btr_pcur_t* pcur = &prebuilt->pcur; trx_t* trx = prebuilt->trx; const rec_t* rec; @@ -3389,7 +3395,7 @@ row_search_for_mysql( dict_index_t* index = prebuilt->index; ibool comp = dict_table_is_comp(index->table); const dtuple_t* search_tuple = prebuilt->search_tuple; - btr_pcur_t* pcur = prebuilt->pcur; + btr_pcur_t* pcur = &prebuilt->pcur; trx_t* trx = prebuilt->trx; dict_index_t* clust_index; que_thr_t* thr; diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c index cf11e75b8e0..edc655e93b9 100644 --- a/storage/innobase/srv/srv0start.c +++ b/storage/innobase/srv/srv0start.c @@ -733,6 +733,7 @@ open_or_create_data_files( ibool one_created = FALSE; ulint size; ulint size_high; + ulint flags; ulint rounded_size_pages; char name[10000]; @@ -914,12 +915,31 @@ open_or_create_data_files( return(DB_ERROR); } skip_size_check: - fil_read_flushed_lsn_and_arch_log_no( - files[i], one_opened, + fil_read_first_page( + files[i], one_opened, &flags, #ifdef UNIV_LOG_ARCHIVE min_arch_log_no, max_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ min_flushed_lsn, max_flushed_lsn); + + if (UNIV_PAGE_SIZE + != fsp_flags_get_page_size(flags)) { + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: data file %s" + " uses page size %lu,\n", + name, + fsp_flags_get_page_size(flags)); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: but the only supported" + " page size in this release is=%lu\n", + (ulong) UNIV_PAGE_SIZE); + + return(DB_ERROR); + } + one_opened = TRUE; } else { /* We created the data file and now write it full of diff --git a/storage/innobase/ut/ut0mem.c b/storage/innobase/ut/ut0mem.c index 303fdd6dd44..cb6b050beca 100644 --- a/storage/innobase/ut/ut0mem.c +++ b/storage/innobase/ut/ut0mem.c @@ -84,17 +84,13 @@ ut_mem_init(void) #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined and set_to_zero is TRUE. +Allocates memory. @return own: allocated memory */ UNIV_INTERN void* ut_malloc_low( /*==========*/ ulint n, /*!< in: number of bytes to allocate */ - ibool set_to_zero, /*!< in: TRUE if allocated memory should be - set to zero if UNIV_SET_MEM_TO_ZERO is - defined */ ibool assert_on_error)/*!< in: if TRUE, we crash mysqld if the memory cannot be allocated */ { @@ -106,12 +102,6 @@ ut_malloc_low( ret = malloc(n); ut_a(ret || !assert_on_error); -#ifdef UNIV_SET_MEM_TO_ZERO - if (set_to_zero) { - memset(ret, '\0', n); - UNIV_MEM_ALLOC(ret, n); - } -#endif return(ret); } @@ -193,12 +183,6 @@ retry: } } - if (set_to_zero) { -#ifdef UNIV_SET_MEM_TO_ZERO - memset(ret, '\0', n + sizeof(ut_mem_block_t)); -#endif - } - UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t)); ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t); @@ -215,75 +199,11 @@ retry: void* ret = malloc(n); ut_a(ret || !assert_on_error); -# ifdef UNIV_SET_MEM_TO_ZERO - if (set_to_zero) { - memset(ret, '\0', n); - } -# endif return(ret); #endif /* !UNIV_HOTBACKUP */ } /**********************************************************************//** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined. -@return own: allocated memory */ -UNIV_INTERN -void* -ut_malloc( -/*======*/ - ulint n) /*!< in: number of bytes to allocate */ -{ -#ifndef UNIV_HOTBACKUP - return(ut_malloc_low(n, TRUE, TRUE)); -#else /* !UNIV_HOTBACKUP */ - return(malloc(n)); -#endif /* !UNIV_HOTBACKUP */ -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs -out. It cannot be used if we want to return an error message. Prints to -stderr a message if fails. -@return TRUE if succeeded */ -UNIV_INTERN -ibool -ut_test_malloc( -/*===========*/ - ulint n) /*!< in: try to allocate this many bytes */ -{ - void* ret; - - ret = malloc(n); - - if (ret == NULL) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: cannot allocate" - " %lu bytes of memory for\n" - "InnoDB: a BLOB with malloc! Total allocated memory\n" - "InnoDB: by InnoDB %lu bytes." - " Operating system errno: %d\n" - "InnoDB: Check if you should increase" - " the swap file or\n" - "InnoDB: ulimits of your operating system.\n" - "InnoDB: On FreeBSD check you have" - " compiled the OS with\n" - "InnoDB: a big enough maximum process size.\n", - (ulong) n, - (ulong) ut_total_allocated_memory, - (int) errno); - return(FALSE); - } - - free(ret); - - return(TRUE); -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is a nop. */ UNIV_INTERN diff --git a/storage/myisam/mi_close.c b/storage/myisam/mi_close.c index 6e9ee7747ae..e58c2e0f189 100644 --- a/storage/myisam/mi_close.c +++ b/storage/myisam/mi_close.c @@ -87,7 +87,12 @@ int mi_close(register MI_INFO *info) } #ifdef HAVE_MMAP if (share->file_map) - _mi_unmap_file(info); + { + if (share->options & HA_OPTION_COMPRESS_RECORD) + _mi_unmap_file(info); + else + mi_munmap_file(info); + } #endif if (share->decode_trees) { diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c index 917bc9f4409..c154ec3b817 100644 --- a/storage/myisam/mi_packrec.c +++ b/storage/myisam/mi_packrec.c @@ -1550,13 +1550,14 @@ my_bool _mi_memmap_file(MI_INFO *info) void _mi_unmap_file(MI_INFO *info) { - (void) my_munmap((char*) info->s->file_map, - (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN); + DBUG_ASSERT(info->s->options & HA_OPTION_COMPRESS_RECORD); + + (void) my_munmap((char*) info->s->file_map, (size_t) info->s->mmaped_length); if (myisam_mmap_size != SIZE_T_MAX) { mysql_mutex_lock(&THR_LOCK_myisam_mmap); - myisam_mmap_used-= info->s->mmaped_length + MEMMAP_EXTRA_MARGIN; + myisam_mmap_used-= info->s->mmaped_length; mysql_mutex_unlock(&THR_LOCK_myisam_mmap); } } |