diff options
Diffstat (limited to 'storage')
168 files changed, 5777 insertions, 7372 deletions
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index 487c0038239..bb15aa9297d 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -802,7 +802,7 @@ int ha_archive::create(const char *name, TABLE *table_arg, #endif /* HAVE_READLINK */ { if (create_info->data_file_name) - my_error(WARN_OPTION_IGNORED, MYF(ME_JUST_WARNING), "DATA DIRECTORY"); + my_error(WARN_OPTION_IGNORED, MYF(ME_WARNING), "DATA DIRECTORY"); fn_format(name_buff, name, "", ARZ, MY_REPLACE_EXT | MY_UNPACK_FILENAME); @@ -811,7 +811,7 @@ int ha_archive::create(const char *name, TABLE *table_arg, /* Archive engine never uses INDEX DIRECTORY. */ if (create_info->index_file_name) - my_error(WARN_OPTION_IGNORED, MYF(ME_JUST_WARNING), "INDEX DIRECTORY"); + my_error(WARN_OPTION_IGNORED, MYF(ME_WARNING), "INDEX DIRECTORY"); /* There is a chance that the file was "discovered". In this case diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h index 56ff566db8c..1f25fba4eed 100644 --- a/storage/archive/ha_archive.h +++ b/storage/archive/ha_archive.h @@ -108,7 +108,7 @@ public: return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_CAN_BIT_FIELD | HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_STATS_RECORDS_IS_EXACT | HA_CAN_EXPORT | - HA_HAS_RECORDS | HA_CAN_REPAIR | + HA_HAS_RECORDS | HA_CAN_REPAIR | HA_SLOW_RND_POS | HA_FILE_BASED | HA_CAN_INSERT_DELAYED | HA_CAN_GEOMETRY); } ulong index_flags(uint idx, uint part, bool all_parts) const diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc index b781c61cdd1..d3f89aca910 100644 --- a/storage/connect/ha_connect.cc +++ b/storage/connect/ha_connect.cc @@ -2085,9 +2085,8 @@ int ha_connect::MakeRecord(char *buf) DBUG_ENTER("ha_connect::MakeRecord"); if (trace(2)) - htrc("Maps: read=%08X write=%08X vcol=%08X defr=%08X defw=%08X\n", + htrc("Maps: read=%08X write=%08X defr=%08X defw=%08X\n", *table->read_set->bitmap, *table->write_set->bitmap, - (table->vcol_set) ? *table->vcol_set->bitmap : 0, *table->def_read_set.bitmap, *table->def_write_set.bitmap); // Avoid asserts in field::store() for columns that are not updated @@ -2727,37 +2726,40 @@ PFIL ha_connect::CondFilter(PGLOBAL g, Item *cond) if (!i && (ismul)) return NULL; - switch (args[i]->real_type()) { - case COND::STRING_ITEM: - res= pval->val_str(&tmp); - pp->Value= PlugSubAllocStr(g, NULL, res->ptr(), res->length()); - pp->Type= (pp->Value) ? TYPE_STRING : TYPE_ERROR; - break; - case COND::INT_ITEM: - pp->Type= TYPE_INT; - pp->Value= PlugSubAlloc(g, NULL, sizeof(int)); - *((int*)pp->Value)= (int)pval->val_int(); - break; - case COND::DATE_ITEM: - pp->Type= TYPE_DATE; - pp->Value= PlugSubAlloc(g, NULL, sizeof(int)); - *((int*)pp->Value)= (int)pval->val_int_from_date(); - break; - case COND::REAL_ITEM: - pp->Type= TYPE_DOUBLE; - pp->Value= PlugSubAlloc(g, NULL, sizeof(double)); - *((double*)pp->Value)= pval->val_real(); - break; - case COND::DECIMAL_ITEM: - pp->Type= TYPE_DOUBLE; - pp->Value= PlugSubAlloc(g, NULL, sizeof(double)); - *((double*)pp->Value)= pval->val_real_from_decimal(); - break; + switch (args[i]->real_type()) { + case COND::CONST_ITEM: + switch (args[i]->cmp_type()) { + case STRING_RESULT: + res= pval->val_str(&tmp); + pp->Value= PlugSubAllocStr(g, NULL, res->ptr(), res->length()); + pp->Type= (pp->Value) ? TYPE_STRING : TYPE_ERROR; + break; + case INT_RESULT: + pp->Type= TYPE_INT; + pp->Value= PlugSubAlloc(g, NULL, sizeof(int)); + *((int*)pp->Value)= (int)pval->val_int(); + break; + case TIME_RESULT: + pp->Type= TYPE_DATE; + pp->Value= PlugSubAlloc(g, NULL, sizeof(int)); + *((int*)pp->Value)= (int) Temporal_hybrid(pval).to_longlong(); + break; + case REAL_RESULT: + case DECIMAL_RESULT: + pp->Type= TYPE_DOUBLE; + pp->Value= PlugSubAlloc(g, NULL, sizeof(double)); + *((double*)pp->Value)= pval->val_real(); + break; + case ROW_RESULT: + DBUG_ASSERT(0); + return NULL; + } + break; case COND::CACHE_ITEM: // Possible ??? case COND::NULL_ITEM: // TODO: handle this default: return NULL; - } // endswitch type + } // endswitch type if (trace(1)) htrc("Value type=%hd\n", pp->Type); @@ -3009,12 +3011,8 @@ PCFIL ha_connect::CheckCond(PGLOBAL g, PCFIL filp, const Item *cond) Item::Type type= args[i]->real_type(); switch (type) { - case COND::STRING_ITEM: - case COND::INT_ITEM: - case COND::REAL_ITEM: + case COND::CONST_ITEM: case COND::NULL_ITEM: - case COND::DECIMAL_ITEM: - case COND::DATE_ITEM: case COND::CACHE_ITEM: break; default: diff --git a/storage/connect/ha_connect.h b/storage/connect/ha_connect.h index 6bce46ead95..8504b585ae1 100644 --- a/storage/connect/ha_connect.h +++ b/storage/connect/ha_connect.h @@ -544,3 +544,7 @@ public: uint int_table_flags; // Inherited from MyISAM bool enable_activate_all_index; // Inherited from MyISAM }; // end of ha_connect class definition + +#if defined(JAVA_SUPPORT) || defined(CMGO_SUPPORT) +bool MongoEnabled(void); +#endif // JAVA_SUPPORT || CMGO_SUPPORT diff --git a/storage/connect/mycat.cc b/storage/connect/mycat.cc index 5aef6d9c660..c41ea0970ed 100644 --- a/storage/connect/mycat.cc +++ b/storage/connect/mycat.cc @@ -102,10 +102,6 @@ extern "C" HINSTANCE s_hModule; // Saved module handle #endif // !__WIN__ -#if defined(JAVA_SUPPORT) || defined(CMGO_SUPPORT) -bool MongoEnabled(void); -#endif // JAVA_SUPPORT || CMGO_SUPPORT - PQRYRES OEMColumns(PGLOBAL g, PTOS topt, char *tab, char *db, bool info); /***********************************************************************/ diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index 0e092e2fd90..d5db465588a 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -1385,7 +1385,7 @@ int ha_tina::rnd_end() if (mysql_file_write(update_temp_file, (uchar*) (file_buff->ptr() + (write_begin - file_buff->start())), - (size_t)write_length, MYF_RW)) + (size_t)write_length, MYF(MY_WME+MY_NABP))) goto error; temp_file_length+= write_length; } @@ -1571,7 +1571,7 @@ int ha_tina::repair(THD* thd, HA_CHECK_OPT* check_opt) write_end= MY_MIN(file_buff->end(), current_position); if ((write_end - write_begin) && (mysql_file_write(repair_file, (uchar*)file_buff->ptr(), - (size_t) (write_end - write_begin), MYF_RW))) + (size_t) (write_end - write_begin), MYF(MY_WME+MY_NABP)))) DBUG_RETURN(-1); write_begin= write_end; diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h index c75a64faa52..5b389d984d6 100644 --- a/storage/csv/ha_tina.h +++ b/storage/csv/ha_tina.h @@ -107,7 +107,7 @@ public: { return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_NO_AUTO_INCREMENT | HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_CAN_EXPORT | - HA_CAN_REPAIR); + HA_CAN_REPAIR | HA_SLOW_RND_POS); } ulong index_flags(uint idx, uint part, bool all_parts) const { diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index e0bc7006770..90a01540000 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -113,7 +113,6 @@ SET(INNOBASE_SOURCES row/row0purge.cc row/row0row.cc row/row0sel.cc - row/row0trunc.cc row/row0uins.cc row/row0umod.cc row/row0undo.cc @@ -179,7 +178,6 @@ IF(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") mtr/mtr0mtr.cc row/row0merge.cc row/row0mysql.cc - row/row0trunc.cc srv/srv0srv.cc COMPILE_FLAGS "-O0" ) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 96be7349b46..a1288642d63 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -435,7 +435,7 @@ btr_page_create( ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); if (page_zip) { - page_create_zip(block, index, level, 0, NULL, mtr); + page_create_zip(block, index, level, 0, mtr); } else { page_create(block, mtr, dict_table_is_comp(index->table), dict_index_is_spatial(index)); @@ -1176,21 +1176,18 @@ btr_free_root_check( /** Create the root node for a new index tree. @param[in] type type of the index -@param[in,out] space tablespace where created @param[in] index_id index id -@param[in] index index, or NULL when applying TRUNCATE -log record during recovery -@param[in] btr_redo_create_info used for applying TRUNCATE log -@param[in] mtr mini-transaction handle -record during recovery -@return page number of the created root, FIL_NULL if did not succeed */ +@param[in,out] space tablespace where created +@param[in] index index +@param[in,out] mtr mini-transaction +@return page number of the created root +@retval FIL_NULL if did not succeed */ ulint btr_create( ulint type, fil_space_t* space, index_id_t index_id, dict_index_t* index, - const btr_create_t* btr_redo_create_info, mtr_t* mtr) { buf_block_t* block; @@ -1205,7 +1202,7 @@ btr_create( (for an ibuf tree, not in the root, but on a separate ibuf header page) */ - if (type & DICT_IBUF) { + if (UNIV_UNLIKELY(type & DICT_IBUF)) { /* Allocate first the ibuf header page */ buf_block_t* ibuf_hdr_block = fseg_create( space, 0, @@ -1237,8 +1234,7 @@ btr_create( buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); - flst_init(block->frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - mtr); + flst_init(block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr); } else { block = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr); @@ -1270,44 +1266,11 @@ btr_create( page_zip = buf_block_get_page_zip(block); if (page_zip) { - if (index != NULL) { - page = page_create_zip(block, index, 0, 0, NULL, mtr); - } else { - /* Create a compressed index page when applying - TRUNCATE log record during recovery */ - ut_ad(btr_redo_create_info != NULL); - - redo_page_compress_t page_comp_info; - - page_comp_info.type = type; - - page_comp_info.index_id = index_id; - - page_comp_info.n_fields = - btr_redo_create_info->n_fields; - - page_comp_info.field_len = - btr_redo_create_info->field_len; - - page_comp_info.fields = btr_redo_create_info->fields; - - page_comp_info.trx_id_pos = - btr_redo_create_info->trx_id_pos; - - page = page_create_zip(block, NULL, 0, 0, - &page_comp_info, mtr); - } + page = page_create_zip(block, index, 0, 0, mtr); } else { - if (index != NULL) { - page = page_create(block, mtr, - dict_table_is_comp(index->table), - dict_index_is_spatial(index)); - } else { - ut_ad(btr_redo_create_info != NULL); - page = page_create( - block, mtr, btr_redo_create_info->format_flags, - type == DICT_SPATIAL); - } + page = page_create(block, mtr, + dict_table_is_comp(index->table), + dict_index_is_spatial(index)); /* Set the level of the new index page */ btr_page_set_level(page, NULL, 0, mtr); } @@ -1319,18 +1282,14 @@ btr_create( btr_page_set_next(page, page_zip, FIL_NULL, mtr); btr_page_set_prev(page, page_zip, FIL_NULL, mtr); - /* We reset the free bits for the page to allow creation of several - trees in the same mtr, otherwise the latch on a bitmap page would - prevent it because of the latching order. - - index will be NULL if we are recreating the table during recovery - on behalf of TRUNCATE. + /* We reset the free bits for the page in a separate + mini-transaction to allow creation of several trees in the + same mtr, otherwise the latch on a bitmap page would prevent + it because of the latching order. Note: Insert Buffering is disabled for temporary tables given that most temporary tables are smaller in size and short-lived. */ - if (!(type & DICT_CLUSTERED) - && (index == NULL || !index->table->is_temporary())) { - + if (!(type & DICT_CLUSTERED) && !index->table->is_temporary()) { ibuf_reset_free_bits(block); } @@ -1672,7 +1631,7 @@ btr_page_reorganize_low( } if (page_zip - && !page_zip_compress(page_zip, page, index, z_level, NULL, mtr)) { + && !page_zip_compress(page_zip, page, index, z_level, mtr)) { /* Restore the old page and exit. */ #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG @@ -1698,11 +1657,6 @@ btr_page_reorganize_low( goto func_exit; } - if (!recovery && !dict_table_is_locking_disabled(index->table)) { - /* Update the record lock bitmaps */ - lock_move_reorganize_page(block, temp_block); - } - data_size2 = page_get_data_size(page); max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1); @@ -1726,21 +1680,41 @@ btr_page_reorganize_low( ut_ad(cursor->rec == page_get_infimum_rec(page)); } -func_exit: #ifdef UNIV_ZIP_DEBUG ut_a(!page_zip || page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ - if (!recovery && page_is_root(temp_page) - && fil_page_get_type(temp_page) == FIL_PAGE_TYPE_INSTANT) { - /* Preserve the PAGE_INSTANT information. */ - ut_ad(!page_zip); - ut_ad(index->is_instant()); - memcpy(FIL_PAGE_TYPE + page, FIL_PAGE_TYPE + temp_page, 2); - memcpy(PAGE_HEADER + PAGE_INSTANT + page, - PAGE_HEADER + PAGE_INSTANT + temp_page, 2); + if (!recovery) { + if (page_is_root(temp_page) + && fil_page_get_type(temp_page) == FIL_PAGE_TYPE_INSTANT) { + /* Preserve the PAGE_INSTANT information. */ + ut_ad(!page_zip); + ut_ad(index->is_instant()); + memcpy(FIL_PAGE_TYPE + page, + FIL_PAGE_TYPE + temp_page, 2); + memcpy(PAGE_HEADER + PAGE_INSTANT + page, + PAGE_HEADER + PAGE_INSTANT + temp_page, 2); + if (!index->table->instant) { + } else if (page_is_comp(page)) { + memcpy(PAGE_NEW_INFIMUM + page, + PAGE_NEW_INFIMUM + temp_page, 8); + memcpy(PAGE_NEW_SUPREMUM + page, + PAGE_NEW_SUPREMUM + temp_page, 8); + } else { + memcpy(PAGE_OLD_INFIMUM + page, + PAGE_OLD_INFIMUM + temp_page, 8); + memcpy(PAGE_OLD_SUPREMUM + page, + PAGE_OLD_SUPREMUM + temp_page, 8); + } + } + + if (!dict_table_is_locking_disabled(index->table)) { + /* Update the record lock bitmaps */ + lock_move_reorganize_page(block, temp_block); + } } +func_exit: buf_block_free(temp_block); /* Restore logging mode */ @@ -1786,6 +1760,14 @@ func_exit: mach_read_from_2(PAGE_HEADER + PAGE_INSTANT + page), MLOG_2BYTES, mtr); + if (!index->table->instant) { + } else if (page_is_comp(page)) { + mlog_log_string(PAGE_NEW_INFIMUM + page, 8, mtr); + mlog_log_string(PAGE_NEW_SUPREMUM + page, 8, mtr); + } else { + mlog_log_string(PAGE_OLD_INFIMUM + page, 8, mtr); + mlog_log_string(PAGE_OLD_SUPREMUM + page, 8, mtr); + } } return(success); @@ -1921,7 +1903,7 @@ btr_page_empty( : 0; if (page_zip) { - page_create_zip(block, index, level, autoinc, NULL, mtr); + page_create_zip(block, index, level, autoinc, mtr); } else { page_create(block, mtr, dict_table_is_comp(index->table), dict_index_is_spatial(index)); @@ -1933,6 +1915,59 @@ btr_page_empty( } } +/** Write instant ALTER TABLE metadata to a root page. +@param[in,out] root clustered index root page +@param[in] index clustered index with instant ALTER TABLE +@param[in,out] mtr mini-transaction */ +void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr) +{ + ut_ad(index.n_core_fields > 0); + ut_ad(index.n_core_fields < REC_MAX_N_FIELDS); + ut_ad(index.is_instant()); + ut_ad(page_is_root(root->frame)); + + rec_t* infimum = page_get_infimum_rec(root->frame); + rec_t* supremum = page_get_supremum_rec(root->frame); + byte* page_type = root->frame + FIL_PAGE_TYPE; + uint16_t i = page_header_get_field(root->frame, PAGE_INSTANT); + + switch (mach_read_from_2(page_type)) { + case FIL_PAGE_TYPE_INSTANT: + ut_ad(page_get_instant(root->frame) == index.n_core_fields); + if (memcmp(infimum, "infimum", 8) + || memcmp(supremum, "supremum", 8)) { + ut_ad(index.table->instant); + ut_ad(!memcmp(infimum, field_ref_zero, 8)); + ut_ad(!memcmp(supremum, field_ref_zero, 7)); + ut_ad(supremum[7] == index.n_core_null_bytes); + return; + } + break; + default: + ut_ad(!"wrong page type"); + /* fall through */ + case FIL_PAGE_INDEX: + ut_ad(!page_is_comp(root->frame) + || !page_get_instant(root->frame)); + ut_ad(!memcmp(infimum, "infimum", 8)); + ut_ad(!memcmp(supremum, "supremum", 8)); + mlog_write_ulint(page_type, FIL_PAGE_TYPE_INSTANT, + MLOG_2BYTES, mtr); + ut_ad(i <= PAGE_NO_DIRECTION); + i |= index.n_core_fields << 3; + mlog_write_ulint(PAGE_HEADER + PAGE_INSTANT + root->frame, i, + MLOG_2BYTES, mtr); + break; + } + + if (index.table->instant) { + mlog_memset(root, infimum - root->frame, 8, 0, mtr); + mlog_memset(root, supremum - root->frame, 7, 0, mtr); + mlog_write_ulint(&supremum[7], index.n_core_null_bytes, + MLOG_1BYTE, mtr); + } +} + /*************************************************************//** Makes tree one level higher by splitting the root, and inserts the tuple. It is assumed that mtr contains an x-latch on the tree. @@ -2118,11 +2153,7 @@ btr_root_raise_and_insert( if (index->is_instant()) { ut_ad(!root_page_zip); - byte* page_type = root_block->frame + FIL_PAGE_TYPE; - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_INDEX); - mlog_write_ulint(page_type, FIL_PAGE_TYPE_INSTANT, - MLOG_2BYTES, mtr); - page_set_instant(root_block->frame, index->n_core_fields, mtr); + btr_set_instant(root_block, *index, mtr); } /* Set the next node and previous node fields, although @@ -3607,12 +3638,7 @@ btr_lift_page_up( if (page_level == 0 && index->is_instant()) { ut_ad(!father_page_zip); - byte* page_type = father_block->frame + FIL_PAGE_TYPE; - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_INDEX); - mlog_write_ulint(page_type, FIL_PAGE_TYPE_INSTANT, - MLOG_2BYTES, mtr); - page_set_instant(father_block->frame, - index->n_core_fields, mtr); + btr_set_instant(father_block, *index, mtr); } page_level++; @@ -4284,15 +4310,42 @@ btr_discard_only_page_on_level( } #endif /* UNIV_BTR_DEBUG */ + mem_heap_t* heap = NULL; + const rec_t* rec = NULL; + ulint* offsets = NULL; + if (index->table->instant) { + const rec_t* r = page_rec_get_next(page_get_infimum_rec( + block->frame)); + ut_ad(rec_is_metadata(r, *index) == index->is_instant()); + if (rec_is_alter_metadata(r, *index)) { + heap = mem_heap_create(srv_page_size); + offsets = rec_get_offsets(r, index, NULL, true, + ULINT_UNDEFINED, &heap); + rec = rec_copy(mem_heap_alloc(heap, + rec_offs_size(offsets)), + r, offsets); + rec_offs_make_valid(rec, index, true, offsets); + } + } + btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr); ut_ad(page_is_leaf(buf_block_get_frame(block))); /* btr_page_empty() is supposed to zero-initialize the field. */ ut_ad(!page_get_instant(block->frame)); if (index->is_primary()) { - /* Concurrent access is prevented by the root_block->lock - X-latch, so this should be safe. */ - index->remove_instant(); + if (rec) { + DBUG_ASSERT(index->table->instant); + DBUG_ASSERT(rec_is_alter_metadata(rec, *index)); + btr_set_instant(block, *index, mtr); + rec = page_cur_insert_rec_low( + page_get_infimum_rec(block->frame), + index, rec, offsets, mtr); + ut_ad(rec); + mem_heap_free(heap); + } else if (index->is_instant()) { + index->clear_instant_add(); + } } else if (!index->table->is_temporary()) { /* We play it safe and reset the free bits for the root */ ibuf_reset_free_bits(block); @@ -4716,14 +4769,32 @@ btr_index_rec_validate( return(FALSE); } + const bool is_alter_metadata = page_is_leaf(page) + && !page_has_prev(page) + && index->is_primary() && index->table->instant + && rec == page_rec_get_next_const(page_get_infimum_rec(page)); + + if (is_alter_metadata + && !rec_is_alter_metadata(rec, page_is_comp(page))) { + btr_index_rec_validate_report(page, rec, index); + + ib::error() << "First record is not ALTER TABLE metadata"; + return FALSE; + } + if (!page_is_comp(page)) { const ulint n_rec_fields = rec_get_n_fields_old(rec); if (n_rec_fields == DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD && index->id == DICT_INDEXES_ID) { /* A record for older SYS_INDEXES table (missing merge_threshold column) is acceptable. */ + } else if (is_alter_metadata) { + if (n_rec_fields != ulint(index->n_fields) + 1) { + goto n_field_mismatch; + } } else if (n_rec_fields < index->n_core_fields || n_rec_fields > index->n_fields) { +n_field_mismatch: btr_index_rec_validate_report(page, rec, index); ib::error() << "Has " << rec_get_n_fields_old(rec) @@ -4742,15 +4813,28 @@ btr_index_rec_validate( offsets = rec_get_offsets(rec, index, offsets, page_is_leaf(page), ULINT_UNDEFINED, &heap); + const dict_field_t* field = index->fields; + ut_ad(rec_offs_n_fields(offsets) + == ulint(index->n_fields) + is_alter_metadata); - for (unsigned i = 0; i < index->n_fields; i++) { - dict_field_t* field = dict_index_get_nth_field(index, i); - ulint fixed_size = dict_col_get_fixed_size( - dict_field_get_col(field), - page_is_comp(page)); - + for (unsigned i = 0; i < rec_offs_n_fields(offsets); i++) { rec_get_nth_field_offs(offsets, i, &len); + ulint fixed_size; + + if (is_alter_metadata && i == index->first_user_field()) { + fixed_size = FIELD_REF_SIZE; + if (len != FIELD_REF_SIZE + || !rec_offs_nth_extern(offsets, i)) { + goto len_mismatch; + } + + continue; + } else { + fixed_size = dict_col_get_fixed_size( + field->col, page_is_comp(page)); + } + /* Note that if fixed_size != 0, it equals the length of a fixed-size column in the clustered index. We should adjust it here. @@ -4762,8 +4846,8 @@ btr_index_rec_validate( && (field->prefix_len ? len > field->prefix_len : (fixed_size && len != fixed_size))) { +len_mismatch: btr_index_rec_validate_report(page, rec, index); - ib::error error; error << "Field " << i << " len is " << len @@ -4781,6 +4865,8 @@ btr_index_rec_validate( } return(FALSE); } + + field++; } #ifdef VIRTUAL_INDEX_DEBUG diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index 2ce2815acb0..5cb07af3f66 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -94,7 +94,7 @@ PageBulk::init() if (new_page_zip) { page_create_zip(new_block, m_index, m_level, 0, - NULL, &m_mtr); + &m_mtr); memset(FIL_PAGE_PREV + new_page, 0xff, 8); page_zip_write_header(new_page_zip, FIL_PAGE_PREV + new_page, @@ -374,7 +374,7 @@ PageBulk::compress() ut_ad(m_page_zip != NULL); return(page_zip_compress(m_page_zip, m_page, m_index, - page_zip_level, NULL, &m_mtr)); + page_zip_level, &m_mtr)); } /** Get node pointer diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 2ba311fce7b..95ebcfe4d1e 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -457,8 +457,8 @@ unreadable: return DB_CORRUPTION; } - if (info_bits != REC_INFO_MIN_REC_FLAG - || (comp && rec_get_status(rec) != REC_STATUS_COLUMNS_ADDED)) { + if ((info_bits & ~REC_INFO_DELETED_FLAG) != REC_INFO_MIN_REC_FLAG + || (comp && rec_get_status(rec) != REC_STATUS_INSTANT)) { incompatible: ib::error() << "Table " << index->table->name << " contains unrecognizable instant ALTER metadata"; @@ -476,6 +476,72 @@ incompatible: concurrent operations on the table, including table eviction from the cache. */ + if (info_bits & REC_INFO_DELETED_FLAG) { + /* This metadata record includes a BLOB that identifies + any dropped or reordered columns. */ + ulint trx_id_offset = index->trx_id_offset; + if (!trx_id_offset) { + /* The PRIMARY KEY contains variable-length columns. + For the metadata record, variable-length columns are + always written with zero length. The DB_TRX_ID will + start right after any fixed-length columns. */ + for (uint i = index->n_uniq; i--; ) { + trx_id_offset += index->fields[0].fixed_len; + } + } + + const byte* ptr = rec + trx_id_offset + + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + + if (mach_read_from_4(ptr + BTR_EXTERN_LEN)) { + goto incompatible; + } + + uint len = mach_read_from_4(ptr + BTR_EXTERN_LEN + 4); + if (!len + || mach_read_from_4(ptr + BTR_EXTERN_OFFSET) + != FIL_PAGE_DATA + || mach_read_from_4(ptr + BTR_EXTERN_SPACE_ID) + != space->id) { + goto incompatible; + } + + buf_block_t* block = buf_page_get( + page_id_t(space->id, + mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)), + univ_page_size, RW_S_LATCH, mtr); + buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE); + if (fil_page_get_type(block->frame) != FIL_PAGE_TYPE_BLOB + || mach_read_from_4(&block->frame[FIL_PAGE_DATA + + BTR_BLOB_HDR_NEXT_PAGE_NO]) + != FIL_NULL + || mach_read_from_4(&block->frame[FIL_PAGE_DATA + + BTR_BLOB_HDR_PART_LEN]) + != len) { + goto incompatible; + } + + /* The unused part of the BLOB page should be zero-filled. */ + for (const byte* b = block->frame + + (FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE) + len, + * const end = block->frame + srv_page_size + - BTR_EXTERN_LEN; + b < end; ) { + if (*b++) { + goto incompatible; + } + } + + if (index->table->deserialise_columns( + &block->frame[FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE], + len)) { + goto incompatible; + } + + /* Proceed to initialize the default values of + any instantly added columns. */ + } + mem_heap_t* heap = NULL; ulint* offsets = rec_get_offsets(rec, index, NULL, true, ULINT_UNDEFINED, &heap); @@ -489,7 +555,8 @@ inconsistent: record, it is also OK to perform READ UNCOMMITTED and then ignore any extra fields, provided that trx_sys.is_registered(DB_TRX_ID). */ - if (rec_offs_n_fields(offsets) > index->n_fields + if (rec_offs_n_fields(offsets) + > ulint(index->n_fields) + !!index->table->instant && !trx_sys.is_registered(current_trx(), row_get_rec_trx_id(rec, index, offsets))) { @@ -497,10 +564,11 @@ inconsistent: } for (unsigned i = index->n_core_fields; i < index->n_fields; i++) { - ulint len; - const byte* data = rec_get_nth_field(rec, offsets, i, &len); dict_col_t* col = index->fields[i].col; - ut_ad(!col->is_instant()); + const unsigned o = i + !!index->table->instant; + ulint len; + const byte* data = rec_get_nth_field(rec, offsets, o, &len); + ut_ad(!col->is_added()); ut_ad(!col->def_val.data); col->def_val.len = len; switch (len) { @@ -511,7 +579,7 @@ inconsistent: continue; } ut_ad(len != UNIV_SQL_DEFAULT); - if (!rec_offs_nth_extern(offsets, i)) { + if (!rec_offs_nth_extern(offsets, o)) { col->def_val.data = mem_heap_dup( index->table->heap, data, len); } else if (len < BTR_EXTERN_FIELD_REF_SIZE @@ -588,30 +656,49 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) const uint16_t n = page_get_instant(page); - if (n < index->n_uniq + DATA_ROLL_PTR || n > index->n_fields) { + if (n < index->n_uniq + DATA_ROLL_PTR) { /* The PRIMARY KEY (or hidden DB_ROW_ID) and DB_TRX_ID,DB_ROLL_PTR columns must always be present - as 'core' fields. All fields, including those for - instantly added columns, must be present in the data - dictionary. */ + as 'core' fields. */ return true; } - if (memcmp(page_get_infimum_rec(page), "infimum", 8) - || memcmp(page_get_supremum_rec(page), "supremum", 8)) { - /* In a later format, these fields in a FIL_PAGE_TYPE_INSTANT - root page could be repurposed for something else. */ + if (n > REC_MAX_N_FIELDS) { return true; } index->n_core_fields = n; - ut_ad(!index->is_dummy); - ut_d(index->is_dummy = true); - index->n_core_null_bytes = n == index->n_fields - ? UT_BITS_IN_BYTES(unsigned(index->n_nullable)) - : UT_BITS_IN_BYTES(index->get_n_nullable(n)); - ut_d(index->is_dummy = false); - return false; + + const rec_t* infimum = page_get_infimum_rec(page); + const rec_t* supremum = page_get_supremum_rec(page); + + if (!memcmp(infimum, "infimum", 8) + && !memcmp(supremum, "supremum", 8)) { + if (n > index->n_fields) { + /* All fields, including those for instantly + added columns, must be present in the + data dictionary. */ + return true; + } + + ut_ad(!index->is_dummy); + ut_d(index->is_dummy = true); + index->n_core_null_bytes = UT_BITS_IN_BYTES( + index->get_n_nullable(n)); + ut_d(index->is_dummy = false); + return false; + } + + if (memcmp(infimum, field_ref_zero, 8) + || memcmp(supremum, field_ref_zero, 7)) { + /* The infimum and supremum records must either contain + the original strings, or they must be filled with zero + bytes, except for the bytes that we have repurposed. */ + return true; + } + + index->n_core_null_bytes = supremum[7]; + return index->n_core_null_bytes > 128; } /** Optimistically latches the leaf page or pages requested. @@ -2292,9 +2379,10 @@ need_opposite_intention: ut_ad(index->is_instant()); /* This may be a search tuple for btr_pcur_restore_position(). */ - ut_ad(tuple->info_bits == REC_INFO_METADATA - || tuple->info_bits == REC_INFO_MIN_REC_FLAG); - } else if (rec_is_metadata(btr_cur_get_rec(cursor), index)) { + ut_ad(tuple->is_metadata() + || (tuple->is_metadata(tuple->info_bits + ^ REC_STATUS_INSTANT))); + } else if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { /* Only user records belong in the adaptive hash index. */ } else { @@ -3141,8 +3229,11 @@ btr_cur_ins_lock_and_undo( roll_ptr = roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS; if (!(flags & BTR_KEEP_SYS_FLAG)) { upd_sys: - row_upd_index_entry_sys_field(entry, index, - DATA_ROLL_PTR, roll_ptr); + dfield_t* r = dtuple_get_nth_field( + entry, index->db_roll_ptr()); + ut_ad(r->len == DATA_ROLL_PTR_LEN); + trx_write_roll_ptr(static_cast<byte*>(r->data), + roll_ptr); } } else { err = trx_undo_report_row_operation(thr, index, entry, @@ -3257,12 +3348,17 @@ btr_cur_optimistic_insert( leaf = page_is_leaf(page); + if (UNIV_UNLIKELY(entry->is_alter_metadata())) { + ut_ad(leaf); + goto convert_big_rec; + } + /* Calculate the record size when entry is converted to a record */ rec_size = rec_get_converted_size(index, entry, n_ext); if (page_zip_rec_needs_ext(rec_size, page_is_comp(page), dtuple_get_n_fields(entry), page_size)) { - +convert_big_rec: /* The record is so big that we have to store some fields externally on separate database pages */ big_rec_vec = dtuple_convert_big_rec(index, 0, entry, &n_ext); @@ -3433,7 +3529,7 @@ fail_err: } else if (index->disable_ahi) { # endif } else if (entry->info_bits & REC_INFO_MIN_REC_FLAG) { - ut_ad(entry->info_bits == REC_INFO_METADATA); + ut_ad(entry->is_metadata()); ut_ad(index->is_instant()); ut_ad(flags == BTR_NO_LOCKING_FLAG); } else { @@ -3641,7 +3737,7 @@ btr_cur_pessimistic_insert( if (index->disable_ahi); else # endif if (entry->info_bits & REC_INFO_MIN_REC_FLAG) { - ut_ad(entry->info_bits == REC_INFO_METADATA); + ut_ad(entry->is_metadata()); ut_ad(index->is_instant()); ut_ad((flags & ulint(~BTR_KEEP_IBUF_BITMAP)) == BTR_NO_LOCKING_FLAG); @@ -3726,6 +3822,50 @@ btr_cur_upd_lock_and_undo( cmpl_info, rec, offsets, roll_ptr)); } +/** Copy DB_TRX_ID,DB_ROLL_PTR to the redo log. +@param[in] index clustered index +@param[in] trx_id_t DB_TRX_ID +@param[in] roll_ptr DB_ROLL_PTR +@param[in,out] log_ptr redo log buffer +@return current end of the redo log buffer */ +static byte* +btr_cur_log_sys( + const dict_index_t* index, + trx_id_t trx_id, + roll_ptr_t roll_ptr, + byte* log_ptr) +{ + log_ptr += mach_write_compressed(log_ptr, index->db_trx_id()); + /* Yes, we are writing DB_ROLL_PTR,DB_TRX_ID in reverse order, + after emitting the position of DB_TRX_ID in the index. + This is how row_upd_write_sys_vals_to_log() + originally worked, and it is part of the redo log format. */ + trx_write_roll_ptr(log_ptr, roll_ptr); + log_ptr += DATA_ROLL_PTR_LEN; + log_ptr += mach_u64_write_compressed(log_ptr, trx_id); + + return log_ptr; +} + +/** Write DB_TRX_ID,DB_ROLL_PTR to a clustered index entry. +@param[in,out] entry clustered index entry +@param[in] index clustered index +@param[in] trx_id DB_TRX_ID +@param[in] roll_ptr DB_ROLL_PTR */ +static void btr_cur_write_sys( + dtuple_t* entry, + const dict_index_t* index, + trx_id_t trx_id, + roll_ptr_t roll_ptr) +{ + dfield_t* t = dtuple_get_nth_field(entry, index->db_trx_id()); + ut_ad(t->len == DATA_TRX_ID_LEN); + trx_write_trx_id(static_cast<byte*>(t->data), trx_id); + dfield_t* r = dtuple_get_nth_field(entry, index->db_roll_ptr()); + ut_ad(r->len == DATA_ROLL_PTR_LEN); + trx_write_roll_ptr(static_cast<byte*>(r->data), roll_ptr); +} + /***********************************************************//** Writes a redo log record of updating a record in-place. */ void @@ -3765,8 +3905,7 @@ btr_cur_update_in_place_log( log_ptr++; if (dict_index_is_clust(index)) { - log_ptr = row_upd_write_sys_vals_to_log( - index, trx_id, roll_ptr, log_ptr, mtr); + log_ptr = btr_cur_log_sys(index, trx_id, roll_ptr, log_ptr); } else { /* Dummy system fields for a secondary index */ /* TRX_ID Position */ @@ -4140,13 +4279,11 @@ btr_cur_trim( const que_thr_t* thr) { if (!index->is_instant()) { - } else if (UNIV_UNLIKELY(update->info_bits == REC_INFO_METADATA)) { + } else if (UNIV_UNLIKELY(update->is_metadata())) { /* We are either updating a metadata record - (instantly adding columns to a table where instant ADD was + (instant ALTER TABLE on a table where instant ALTER was already executed) or rolling back such an operation. */ ut_ad(!upd_get_nth_field(update, 0)->orig_len); - ut_ad(upd_get_nth_field(update, 0)->field_no - > index->n_core_fields); if (thr->graph->trx->in_rollback) { /* This rollback can occur either as part of @@ -4163,6 +4300,19 @@ btr_cur_trim( first instantly added column logged by innobase_add_instant_try(). */ ut_ad(update->n_fields > 2); + if (update->is_alter_metadata()) { + ut_ad(update->fields[0].field_no + == index->first_user_field()); + ut_ad(update->fields[0].new_val.ext); + ut_ad(update->fields[0].new_val.len + == FIELD_REF_SIZE); + ut_ad(entry->n_fields - 1 == index->n_fields); + ulint n_fields = update->fields[1].field_no; + ut_ad(n_fields <= index->n_fields); + entry->n_fields = n_fields; + return; + } + ulint n_fields = upd_get_nth_field(update, 0) ->field_no; ut_ad(n_fields + 1 >= entry->n_fields); @@ -4248,9 +4398,7 @@ btr_cur_optimistic_update( || trx_is_recv(thr_get_trx(thr))); #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - const bool is_metadata = update->info_bits == REC_INFO_METADATA; - - if (UNIV_LIKELY(!is_metadata) + if (UNIV_LIKELY(!update->is_metadata()) && !row_upd_changes_field_size_or_external(index, *offsets, update)) { @@ -4276,6 +4424,10 @@ any_extern: return(DB_OVERFLOW); } + if (rec_is_metadata(rec, *index) && index->table->instant) { + goto any_extern; + } + for (i = 0; i < upd_get_n_fields(update); i++) { if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) { @@ -4334,10 +4486,10 @@ any_extern: } /* We limit max record size to 16k even for 64k page size. */ - if (new_rec_size >= COMPRESSED_REC_MAX_DATA_SIZE || - (!dict_table_is_comp(index->table) - && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) { - err = DB_OVERFLOW; + if (new_rec_size >= COMPRESSED_REC_MAX_DATA_SIZE || + (!dict_table_is_comp(index->table) + && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) { + err = DB_OVERFLOW; goto func_exit; } @@ -4410,8 +4562,8 @@ any_extern: lock_rec_store_on_page_infimum(block, rec); } - if (UNIV_UNLIKELY(is_metadata)) { - ut_ad(new_entry->info_bits == REC_INFO_METADATA); + if (UNIV_UNLIKELY(update->is_metadata())) { + ut_ad(new_entry->is_metadata()); ut_ad(index->is_instant()); /* This can be innobase_add_instant_try() performing a subsequent instant ADD COLUMN, or its rollback by @@ -4426,10 +4578,7 @@ any_extern: page_cur_move_to_prev(page_cursor); if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, - roll_ptr); - row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - trx_id); + btr_cur_write_sys(new_entry, index, trx_id, roll_ptr); } /* There are no externally stored columns in new_entry */ @@ -4437,7 +4586,7 @@ any_extern: cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr); ut_a(rec); /* <- We calculated above the insert would fit */ - if (UNIV_UNLIKELY(is_metadata)) { + if (UNIV_UNLIKELY(update->is_metadata())) { /* We must empty the PAGE_FREE list, because if this was a rollback, the shortened metadata record would have too many fields, and we would be unable to @@ -4631,8 +4780,25 @@ btr_cur_pessimistic_update( rec, index, *offsets, page_is_leaf(page), ULINT_UNDEFINED, offsets_heap); - dtuple_t* new_entry = row_rec_to_index_entry( - rec, index, *offsets, &n_ext, entry_heap); + dtuple_t* new_entry; + + const bool is_metadata = rec_is_metadata(rec, *index); + + if (UNIV_UNLIKELY(is_metadata)) { + ut_ad(update->is_metadata()); + ut_ad(flags & BTR_NO_LOCKING_FLAG); + ut_ad(index->is_instant()); + new_entry = row_metadata_to_tuple( + rec, index, *offsets, + &n_ext, entry_heap, + update->info_bits, !thr_get_trx(thr)->in_rollback); + ut_ad(new_entry->n_fields + == ulint(index->n_fields) + + update->is_alter_metadata()); + } else { + new_entry = row_rec_to_index_entry(rec, index, *offsets, + &n_ext, entry_heap); + } /* The page containing the clustered index record corresponding to new_entry is latched in mtr. If the @@ -4644,9 +4810,6 @@ btr_cur_pessimistic_update( entry_heap); btr_cur_trim(new_entry, index, update, thr); - const bool is_metadata = new_entry->info_bits - & REC_INFO_MIN_REC_FLAG; - /* We have to set appropriate extern storage bits in the new record to be inserted: we have to remember which fields were such */ @@ -4674,11 +4837,14 @@ btr_cur_pessimistic_update( } if (page_zip_rec_needs_ext( - rec_get_converted_size(index, new_entry, n_ext), - page_is_comp(page), - dict_index_get_n_fields(index), - block->page.size)) { - + rec_get_converted_size(index, new_entry, n_ext), + page_is_comp(page), + dict_index_get_n_fields(index), + block->page.size) + || (UNIV_UNLIKELY(update->is_alter_metadata()) + && !dfield_is_ext(dtuple_get_nth_field( + new_entry, + index->first_user_field())))) { big_rec_vec = dtuple_convert_big_rec(index, update, new_entry, &n_ext); if (UNIV_UNLIKELY(big_rec_vec == NULL)) { @@ -4727,10 +4893,7 @@ btr_cur_pessimistic_update( } if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, - roll_ptr); - row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - trx_id); + btr_cur_write_sys(new_entry, index, trx_id, roll_ptr); } if (!page_zip) { @@ -4739,10 +4902,10 @@ btr_cur_pessimistic_update( } if (UNIV_UNLIKELY(is_metadata)) { - ut_ad(new_entry->info_bits == REC_INFO_METADATA); + ut_ad(new_entry->is_metadata()); ut_ad(index->is_instant()); /* This can be innobase_add_instant_try() performing a - subsequent instant ADD COLUMN, or its rollback by + subsequent instant ALTER TABLE, or its rollback by row_undo_mod_clust_low(). */ ut_ad(flags & BTR_NO_LOCKING_FLAG); } else { @@ -4791,7 +4954,8 @@ btr_cur_pessimistic_update( btr_cur_get_block(cursor), rec, block); } - if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets)) + || rec_is_alter_metadata(rec, *index)) { /* The new inserted record owns its possible externally stored fields */ btr_cur_unmark_extern_fields( @@ -5001,8 +5165,7 @@ btr_cur_del_mark_set_clust_rec_log( *log_ptr++ = 0; *log_ptr++ = 1; - log_ptr = row_upd_write_sys_vals_to_log( - index, trx_id, roll_ptr, log_ptr, mtr); + log_ptr = btr_cur_log_sys(index, trx_id, roll_ptr, log_ptr); mach_write_to_2(log_ptr, page_offset(rec)); log_ptr += 2; @@ -5434,42 +5597,41 @@ btr_cur_optimistic_delete_func( if (UNIV_UNLIKELY(page_is_root(block->frame) && page_get_n_recs(block->frame) == 1 + (cursor->index->is_instant() - && !rec_is_metadata(rec, cursor->index)))) { + && !rec_is_metadata(rec, *cursor->index)))) { /* The whole index (and table) becomes logically empty. Empty the whole page. That is, if we are deleting the only user record, also delete the metadata record - if one exists (it exists if and only if is_instant()). + if one exists for instant ADD COLUMN (not generic ALTER TABLE). If we are deleting the metadata record and the table becomes empty, clean up the whole page. */ dict_index_t* index = cursor->index; + const rec_t* first_rec = page_rec_get_next_const( + page_get_infimum_rec(block->frame)); ut_ad(!index->is_instant() - || rec_is_metadata( - page_rec_get_next_const( - page_get_infimum_rec(block->frame)), - index)); - if (UNIV_UNLIKELY(rec_get_info_bits(rec, page_rec_is_comp(rec)) - & REC_INFO_MIN_REC_FLAG)) { - /* This should be rolling back instant ADD COLUMN. - If this is a recovered transaction, then - index->is_instant() will hold until the - insert into SYS_COLUMNS is rolled back. */ - ut_ad(index->table->supports_instant()); - ut_ad(index->is_primary()); - } else { - lock_update_delete(block, rec); - } - btr_page_empty(block, buf_block_get_page_zip(block), - index, 0, mtr); - page_cur_set_after_last(block, btr_cur_get_page_cur(cursor)); - - if (index->is_primary()) { - /* Concurrent access is prevented by - root_block->lock X-latch, so this should be - safe. */ - index->remove_instant(); + || rec_is_metadata(first_rec, *index)); + const bool is_metadata = rec_is_metadata(rec, *index); + /* We can remove the metadata when rolling back an + instant ALTER TABLE operation, or when deleting the + last user record on the page such that only metadata for + instant ADD COLUMN (not generic ALTER TABLE) remains. */ + const bool empty_table = is_metadata + || !index->is_instant() + || (first_rec != rec + && rec_is_add_metadata(first_rec, *index)); + if (UNIV_LIKELY(empty_table)) { + if (UNIV_LIKELY(!is_metadata)) { + lock_update_delete(block, rec); + } + btr_page_empty(block, buf_block_get_page_zip(block), + index, 0, mtr); + if (index->is_instant()) { + /* MDEV-17383: free metadata BLOBs! */ + index->clear_instant_alter(); + } + page_cur_set_after_last(block, + btr_cur_get_page_cur(cursor)); + return true; } - - return true; } offsets = rec_get_offsets(rec, cursor->index, offsets, true, @@ -5649,10 +5811,10 @@ btr_cur_pessimistic_delete( } if (page_is_leaf(page)) { - const bool is_metadata = rec_get_info_bits( - rec, page_rec_is_comp(rec)) & REC_INFO_MIN_REC_FLAG; + const bool is_metadata = rec_is_metadata( + rec, page_rec_is_comp(rec)); if (UNIV_UNLIKELY(is_metadata)) { - /* This should be rolling back instant ADD COLUMN. + /* This should be rolling back instant ALTER TABLE. If this is a recovered transaction, then index->is_instant() will hold until the insert into SYS_COLUMNS is rolled back. */ @@ -5668,30 +5830,33 @@ btr_cur_pessimistic_delete( goto discard_page; } } else if (page_get_n_recs(page) == 1 - + (index->is_instant() - && !rec_is_metadata(rec, index))) { + + (index->is_instant() && !is_metadata)) { /* The whole index (and table) becomes logically empty. Empty the whole page. That is, if we are deleting the only user record, also delete the metadata record - if one exists (it exists if and only if is_instant()). + if one exists for instant ADD COLUMN + (not generic ALTER TABLE). If we are deleting the metadata record and the table becomes empty, clean up the whole page. */ + + const rec_t* first_rec = page_rec_get_next_const( + page_get_infimum_rec(page)); ut_ad(!index->is_instant() - || rec_is_metadata( - page_rec_get_next_const( - page_get_infimum_rec(page)), - index)); - btr_page_empty(block, page_zip, index, 0, mtr); - page_cur_set_after_last(block, - btr_cur_get_page_cur(cursor)); - if (index->is_primary()) { - /* Concurrent access is prevented by - index->lock and root_block->lock - X-latch, so this should be safe. */ - index->remove_instant(); + || rec_is_metadata(first_rec, *index)); + if (is_metadata || !index->is_instant() + || (first_rec != rec + && rec_is_add_metadata(first_rec, *index))) { + btr_page_empty(block, page_zip, index, 0, mtr); + if (index->is_instant()) { + /* MDEV-17383: free metadata BLOBs! */ + index->clear_instant_alter(); + } + page_cur_set_after_last( + block, + btr_cur_get_page_cur(cursor)); + ret = TRUE; + goto return_after_reservations; } - ret = TRUE; - goto return_after_reservations; } if (UNIV_LIKELY(!is_metadata)) { @@ -7521,16 +7686,20 @@ btr_store_big_rec_extern_fields( + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, rec_page_no, MLOG_4BYTES, &mtr); - - /* Zero out the unused part of the page. */ - memset(page + page_zip_get_size(page_zip) - - c_stream.avail_out, - 0, c_stream.avail_out); mlog_log_string(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, page_zip_get_size(page_zip) - - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + - c_stream.avail_out, &mtr); + /* Zero out the unused part of the page. */ + if (c_stream.avail_out) { + mlog_memset(block, + page_zip_get_size(page_zip) + - c_stream.avail_out, + c_stream.avail_out, + 0, &mtr); + } /* Copy the page to compressed storage, because it will be flushed to disk from there. */ diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index 41661d226e1..3f898f7d033 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -151,13 +151,20 @@ btr_pcur_store_position( rec = page_rec_get_prev(rec); ut_ad(!page_rec_is_infimum(rec)); - ut_ad(!rec_is_metadata(rec, index)); + if (UNIV_UNLIKELY(rec_is_metadata(rec, *index))) { + ut_ad(index->table->instant); + ut_ad(page_get_n_recs(block->frame) == 1); + ut_ad(page_is_leaf(page)); + ut_ad(page_get_page_no(page) == index->page); + cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE; + return; + } cursor->rel_pos = BTR_PCUR_AFTER; } else if (page_rec_is_infimum_low(offs)) { rec = page_rec_get_next(rec); - if (rec_is_metadata(rec, index)) { + if (rec_is_metadata(rec, *index)) { rec = page_rec_get_next(rec); ut_ad(!page_rec_is_supremum(rec)); } @@ -167,10 +174,25 @@ btr_pcur_store_position( cursor->rel_pos = BTR_PCUR_ON; } - cursor->old_rec = dict_index_copy_rec_order_prefix( - index, rec, &cursor->old_n_fields, - &cursor->old_rec_buf, &cursor->buf_size); + if (index->is_ibuf()) { + ut_ad(!index->table->not_redundant()); + cursor->old_n_fields = rec_get_n_fields_old(rec); + } else if (page_rec_is_leaf(rec)) { + cursor->old_n_fields = dict_index_get_n_unique_in_tree(index); + } else if (index->is_spatial()) { + ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index) + == DICT_INDEX_SPATIAL_NODEPTR_SIZE); + /* For R-tree, we have to compare + the child page numbers as well. */ + cursor->old_n_fields = DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; + } else { + cursor->old_n_fields = dict_index_get_n_unique_in_tree(index); + } + cursor->old_rec = rec_copy_prefix_to_buf(rec, index, + cursor->old_n_fields, + &cursor->old_rec_buf, + &cursor->buf_size); cursor->block_when_stored = block; /* Function try to check if block is S/X latch. */ diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index 9c2dedeef9e..37a839727ec 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1190,7 +1190,7 @@ retry: rec = page_get_infimum_rec(page); rec = page_rec_get_next_low(rec, page_is_comp(page)); - if (rec_is_metadata(rec, index)) { + if (rec_is_metadata(rec, *index)) { rec = page_rec_get_next_low(rec, page_is_comp(page)); } @@ -1398,7 +1398,7 @@ btr_search_build_page_hash_index( rec = page_rec_get_next_const(page_get_infimum_rec(page)); - if (rec_is_metadata(rec, index)) { + if (rec_is_metadata(rec, *index)) { rec = page_rec_get_next_const(rec); if (!--n_recs) return; } @@ -1862,7 +1862,7 @@ btr_search_update_hash_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch) n_bytes, index->id); } - if (!page_rec_is_infimum(rec) && !rec_is_metadata(rec, index)) { + if (!page_rec_is_infimum(rec) && !rec_is_metadata(rec, *index)) { offsets = rec_get_offsets( rec, index, offsets, true, btr_search_get_n_fields(n_fields, n_bytes), &heap); diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index dd9dca496fb..de97964cf53 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -6232,7 +6232,6 @@ database_corrupted: && !recv_no_ibuf_operations && (bpage->id.space() == 0 || !is_predefined_tablespace(bpage->id.space())) - && !srv_is_tablespace_truncated(bpage->id.space()) && fil_page_get_type(frame) == FIL_PAGE_INDEX && page_is_leaf(frame)) { diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 099a3752f7f..4ccd348062e 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -555,12 +555,9 @@ buf_dblwr_process() if (page_no >= space->size) { - /* Do not report the warning if the tablespace - is scheduled for truncation or was truncated - and we have parsed an MLOG_TRUNCATE record. */ - if (!srv_is_tablespace_truncated(space_id) - && !srv_was_tablespace_truncated(space) - && !srv_is_undo_tablespace(space_id)) { + /* Do not report the warning for undo + tablespaces, because they can be truncated in place. */ + if (!srv_is_undo_tablespace(space_id)) { ib::warn() << "A copy of page " << page_id << " in the doublewrite buffer slot " << page_no_dblwr diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 3edb6c6ee98..9454b57f59e 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -95,11 +95,9 @@ buffer buf_pool if it is not already there, in which case does nothing. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The flag is cleared and the x-lock released by an i/o-handler thread. -@param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED or - DB_TABLESPACE_TRUNCATED if we are trying - to read from a non-existent tablespace, a - tablespace which is just now being dropped, - or a tablespace which is truncated +@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED + if we are trying + to read from a non-existent tablespace @param[in] sync true if synchronous aio is desired @param[in] type IO type, SIMULATED, IGNORE_MISSING @param[in] mode BUF_READ_IBUF_PAGES_ONLY, ..., @@ -187,20 +185,8 @@ buf_read_page_low( } if (*err != DB_SUCCESS) { - if (*err == DB_TABLESPACE_TRUNCATED) { - /* Remove the page which is outside the - truncated tablespace bounds when recovering - from a crash happened during a truncation */ - buf_read_page_handle_error(bpage); - if (recv_recovery_on) { - mutex_enter(&recv_sys->mutex); - ut_ad(recv_sys->n_addrs > 0); - recv_sys->n_addrs--; - mutex_exit(&recv_sys->mutex); - } - return(0); - } else if (IORequest::ignore_missing(type) - || *err == DB_TABLESPACE_DELETED) { + if (IORequest::ignore_missing(type) + || *err == DB_TABLESPACE_DELETED) { buf_read_page_handle_error(bpage); return(0); } @@ -369,7 +355,6 @@ read_ahead: switch (err) { case DB_SUCCESS: - case DB_TABLESPACE_TRUNCATED: case DB_ERROR: break; case DB_TABLESPACE_DELETED: @@ -472,7 +457,6 @@ buf_read_page_background( switch (err) { case DB_SUCCESS: - case DB_TABLESPACE_TRUNCATED: case DB_ERROR: break; case DB_TABLESPACE_DELETED: @@ -755,7 +739,6 @@ buf_read_ahead_linear( switch (err) { case DB_SUCCESS: - case DB_TABLESPACE_TRUNCATED: case DB_TABLESPACE_DELETED: case DB_ERROR: break; @@ -853,7 +836,6 @@ tablespace_deleted: switch(err) { case DB_SUCCESS: - case DB_TABLESPACE_TRUNCATED: case DB_ERROR: break; case DB_TABLESPACE_DELETED: diff --git a/storage/innobase/data/data0data.cc b/storage/innobase/data/data0data.cc index cc14664821f..a2785da6cee 100644 --- a/storage/innobase/data/data0data.cc +++ b/storage/innobase/data/data0data.cc @@ -58,7 +58,12 @@ void dtuple_t::trim(const dict_index_t& index) for (; i > index.n_core_fields; i--) { const dfield_t* dfield = dtuple_get_nth_field(this, i - 1); const dict_col_t* col = dict_index_get_nth_col(&index, i - 1); - ut_ad(col->is_instant()); + + if (col->is_dropped()) { + continue; + } + + ut_ad(col->is_added()); ulint len = dfield_get_len(dfield); if (len != col->def_val.len) { break; @@ -596,7 +601,6 @@ dtuple_convert_big_rec( mem_heap_t* heap; big_rec_t* vector; dfield_t* dfield; - dict_field_t* ifield; ulint size; ulint n_fields; ulint local_len; @@ -606,14 +610,7 @@ dtuple_convert_big_rec( return(NULL); } - if (!dict_table_has_atomic_blobs(index->table)) { - /* up to MySQL 5.1: store a 768-byte prefix locally */ - local_len = BTR_EXTERN_FIELD_REF_SIZE - + DICT_ANTELOPE_MAX_INDEX_COL_LEN; - } else { - /* new-format table: do not store any BLOB prefix locally */ - local_len = BTR_EXTERN_FIELD_REF_SIZE; - } + ut_ad(index->n_uniq > 0); ut_a(dtuple_check_typed_no_assert(entry)); @@ -636,24 +633,41 @@ dtuple_convert_big_rec( stored externally */ n_fields = 0; + ulint longest_i; + + const bool mblob = entry->is_alter_metadata(); + ut_ad(entry->n_fields >= index->first_user_field() + mblob); + ut_ad(entry->n_fields - mblob <= index->n_fields); + + if (mblob) { + longest_i = index->first_user_field(); + dfield = dtuple_get_nth_field(entry, longest_i); + local_len = BTR_EXTERN_FIELD_REF_SIZE; + goto ext_write; + } + + if (!dict_table_has_atomic_blobs(index->table)) { + /* up to MySQL 5.1: store a 768-byte prefix locally */ + local_len = BTR_EXTERN_FIELD_REF_SIZE + + DICT_ANTELOPE_MAX_INDEX_COL_LEN; + } else { + /* new-format table: do not store any BLOB prefix locally */ + local_len = BTR_EXTERN_FIELD_REF_SIZE; + } while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, *n_ext), dict_table_is_comp(index->table), dict_index_get_n_fields(index), dict_table_page_size(index->table))) { - - ulint i; - ulint longest = 0; - ulint longest_i = ULINT_MAX; - byte* data; - - for (i = dict_index_get_n_unique_in_tree(index); - i < dtuple_get_n_fields(entry); i++) { + longest_i = 0; + for (ulint i = index->first_user_field(), longest = 0; + i + mblob < entry->n_fields; i++) { ulint savings; + dfield = dtuple_get_nth_field(entry, i + mblob); - dfield = dtuple_get_nth_field(entry, i); - ifield = dict_index_get_nth_field(index, i); + const dict_field_t* ifield = dict_index_get_nth_field( + index, i); /* Skip fixed-length, NULL, externally stored, or short columns */ @@ -695,7 +709,7 @@ skip_field: continue; } - if (!longest) { + if (!longest_i) { /* Cannot shorten more */ mem_heap_free(heap); @@ -708,9 +722,8 @@ skip_field: We store the first bytes locally to the record. Then we can calculate all ordering fields in all indexes from locally stored data. */ - dfield = dtuple_get_nth_field(entry, longest_i); - ifield = dict_index_get_nth_field(index, longest_i); +ext_write: local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE; vector->append( @@ -721,7 +734,8 @@ skip_field: + local_prefix_len)); /* Allocate the locally stored part of the column. */ - data = static_cast<byte*>(mem_heap_alloc(heap, local_len)); + byte* data = static_cast<byte*>( + mem_heap_alloc(heap, local_len)); /* Copy the local prefix. */ memcpy(data, dfield_get_data(dfield), local_prefix_len); @@ -735,7 +749,6 @@ skip_field: UNIV_MEM_ALLOC(data + local_prefix_len, BTR_EXTERN_FIELD_REF_SIZE); #endif - dfield_set_data(dfield, data, local_len); dfield_set_ext(dfield); diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc index e5bf33593ed..7a9b8556c1a 100644 --- a/storage/innobase/dict/dict0boot.cc +++ b/storage/innobase/dict/dict0boot.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. +Copyright (c) 2016, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -64,52 +64,14 @@ dict_hdr_get_new_id( (not assigned if NULL) */ index_id_t* index_id, /*!< out: index id (not assigned if NULL) */ - ulint* space_id, /*!< out: space id + ulint* space_id) /*!< out: space id (not assigned if NULL) */ - const dict_table_t* table, /*!< in: table */ - bool disable_redo) /*!< in: if true and table - object is NULL - then disable-redo */ { dict_hdr_t* dict_hdr; ib_id_t id; mtr_t mtr; mtr_start(&mtr); - if (table) { - if (table->is_temporary()) { - mtr.set_log_mode(MTR_LOG_NO_REDO); - } - } else if (disable_redo) { - /* In non-read-only mode we need to ensure that space-id header - page is written to disk else if page is removed from buffer - cache and re-loaded it would assign temporary tablespace id - to another tablespace. - This is not a case with read-only mode as there is no new object - that is created except temporary tablespace. */ - mtr.set_log_mode(srv_read_only_mode - ? MTR_LOG_NONE : MTR_LOG_NO_REDO); - } - - /* Server started and let's say space-id = x - - table created with file-per-table - - space-id = x + 1 - - crash - Case 1: If it was redo logged then we know that it will be - restored to x + 1 - Case 2: if not redo-logged - Header will have the old space-id = x - This is OK because on restart there is no object with - space id = x + 1 - Case 3: - space-id = x (on start) - space-id = x+1 (temp-table allocation) - no redo logging - space-id = x+2 (non-temp-table allocation), this get's - redo logged. - If there is a crash there will be only 2 entries - x (original) and x+2 (new) and disk hdr will be updated - to reflect x + 2 entry. - We cannot allocate the same space id to different objects. */ dict_hdr = dict_hdr_get(&mtr); if (table_id) { @@ -212,7 +174,7 @@ dict_hdr_create( /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_TABLES_ID, - dict_ind_redundant, NULL, mtr); + dict_ind_redundant, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -223,7 +185,7 @@ dict_hdr_create( /*--------------------------*/ root_page_no = btr_create(DICT_UNIQUE, fil_system.sys_space, DICT_TABLE_IDS_ID, - dict_ind_redundant, NULL, mtr); + dict_ind_redundant, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -234,7 +196,7 @@ dict_hdr_create( /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_COLUMNS_ID, - dict_ind_redundant, NULL, mtr); + dict_ind_redundant, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -245,7 +207,7 @@ dict_hdr_create( /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_INDEXES_ID, - dict_ind_redundant, NULL, mtr); + dict_ind_redundant, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -256,7 +218,7 @@ dict_hdr_create( /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_FIELDS_ID, - dict_ind_redundant, NULL, mtr); + dict_ind_redundant, mtr); if (root_page_no == FIL_NULL) { return(FALSE); diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 25a90342f78..b1ddb7032ab 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -352,10 +352,12 @@ dict_build_table_def_step( { ut_ad(mutex_own(&dict_sys->mutex)); dict_table_t* table = node->table; + trx_t* trx = thr_get_trx(thr); ut_ad(!table->is_temporary()); ut_ad(!table->space); ut_ad(table->space_id == ULINT_UNDEFINED); - dict_table_assign_new_id(table, thr_get_trx(thr)); + dict_hdr_get_new_id(&table->id, NULL, NULL); + trx->table_id = table->id; /* Always set this bit for all new created tables */ DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); @@ -368,8 +370,6 @@ dict_build_table_def_step( ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0 || dict_table_has_atomic_blobs(table)); - trx_t* trx = thr_get_trx(thr); - ut_ad(trx->table_id); mtr_t mtr; trx_undo_t* undo = trx->rsegs.m_redo.undo; if (undo && !undo->table_id @@ -397,7 +397,7 @@ dict_build_table_def_step( } /* Get a new tablespace ID */ ulint space_id; - dict_hdr_get_new_id(NULL, NULL, &space_id, table, false); + dict_hdr_get_new_id(NULL, NULL, &space_id); DBUG_EXECUTE_IF( "ib_create_table_fail_out_of_space_ids", @@ -745,7 +745,7 @@ dict_build_index_def_step( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - dict_hdr_get_new_id(NULL, &index->id, NULL, table, false); + dict_hdr_get_new_id(NULL, &index->id, NULL); /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ @@ -785,7 +785,7 @@ dict_build_index_def( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - dict_hdr_get_new_id(NULL, &index->id, NULL, table, false); + dict_hdr_get_new_id(NULL, &index->id, NULL); /* Note that the index was created by this transaction. */ index->trx_id = trx->id; @@ -859,7 +859,7 @@ dict_create_index_tree_step( node->page_no = btr_create( index->type, index->table->space, - index->id, index, NULL, &mtr); + index->id, index, &mtr); if (node->page_no == FIL_NULL) { err = DB_OUT_OF_FILE_SPACE; @@ -905,7 +905,7 @@ dict_create_index_tree_in_mem( ut_ad(!(index->table->flags2 & DICT_TF2_DISCARDED)); index->page = btr_create(index->type, index->table->space, - index->id, index, NULL, &mtr); + index->id, index, &mtr); mtr_commit(&mtr); index->trx_id = trx->id; @@ -971,13 +971,6 @@ dict_drop_index_tree( return(false); } - /* If tablespace is scheduled for truncate, do not try to drop - the indexes in that tablespace. There is a truncate fixup action - which will take care of it. */ - if (srv_is_tablespace_truncated(space)) { - return(false); - } - btr_free_if_exists(page_id_t(space, root_page_no), page_size, mach_read_from_8(ptr), mtr); @@ -1053,7 +1046,7 @@ dict_recreate_index_tree( ulint root_page_no = (index->type & DICT_FTS) ? FIL_NULL : btr_create(type, table->space, - index_id, index, NULL, mtr); + index_id, index, mtr); index->page = unsigned(root_page_no); return root_page_no; } @@ -2134,6 +2127,8 @@ dict_create_add_foreigns_to_dictionary( return(DB_ERROR); } + error = DB_SUCCESS; + for (dict_foreign_set::const_iterator it = local_fk_set.begin(); it != local_fk_set.end(); ++it) { @@ -2145,12 +2140,11 @@ dict_create_add_foreigns_to_dictionary( table->name.m_name, foreign, trx); if (error != DB_SUCCESS) { - - return(error); + break; } } - return(DB_SUCCESS); + return error; } /****************************************************************//** @@ -2382,15 +2376,3 @@ dict_delete_tablespace_and_datafiles( return(err); } - -/** Assign a new table ID and put it into the table cache and the transaction. -@param[in,out] table Table that needs an ID -@param[in,out] trx Transaction */ -void -dict_table_assign_new_id( - dict_table_t* table, - trx_t* trx) -{ - dict_hdr_get_new_id(&table->id, NULL, NULL, table, false); - trx->table_id = table->id; -} diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index c6f6455be73..dd4a56c365e 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -406,6 +406,27 @@ dict_table_stats_unlock( } } + +/** Open a persistent table. +@param[in] table_id persistent table identifier +@param[in] ignore_err errors to ignore +@param[in] cached_only whether to skip loading +@return persistent table +@retval NULL if not found */ +static dict_table_t* dict_table_open_on_id_low( + table_id_t table_id, + dict_err_ignore_t ignore_err, + bool cached_only) +{ + dict_table_t* table = dict_sys->get_table(table_id); + + if (!table && !cached_only) { + table = dict_load_table_on_id(table_id, ignore_err); + } + + return table; +} + /**********************************************************************//** Try to drop any indexes after an aborted index creation. This can also be after a server kill during DROP INDEX. */ @@ -1084,20 +1105,19 @@ dict_init(void) dict_operation_lock = static_cast<rw_lock_t*>( ut_zalloc_nokey(sizeof(*dict_operation_lock))); - dict_sys = static_cast<dict_sys_t*>(ut_zalloc_nokey(sizeof(*dict_sys))); + dict_sys = new (ut_zalloc_nokey(sizeof(*dict_sys))) dict_sys_t(); UT_LIST_INIT(dict_sys->table_LRU, &dict_table_t::table_LRU); UT_LIST_INIT(dict_sys->table_non_LRU, &dict_table_t::table_LRU); mutex_create(LATCH_ID_DICT_SYS, &dict_sys->mutex); - dict_sys->table_hash = hash_create( - buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE)); + const ulint hash_size = buf_pool_get_curr_size() + / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); - dict_sys->table_id_hash = hash_create( - buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE)); + dict_sys->table_hash = hash_create(hash_size); + dict_sys->table_id_hash = hash_create(hash_size); + dict_sys->temp_id_hash = hash_create(hash_size); rw_lock_create(dict_operation_lock_key, dict_operation_lock, SYNC_DICT_OPERATION); @@ -1257,8 +1277,7 @@ dict_table_add_system_columns( } /** Add the table definition to the data dictionary cache */ -void -dict_table_t::add_to_cache() +void dict_table_t::add_to_cache() { ut_ad(dict_lru_validate()); ut_ad(mutex_own(&dict_sys->mutex)); @@ -1266,7 +1285,6 @@ dict_table_t::add_to_cache() cached = TRUE; ulint fold = ut_fold_string(name.m_name); - ulint id_fold = ut_fold_ull(id); /* Look for a table with the same name: error if such exists */ { @@ -1284,31 +1302,30 @@ dict_table_t::add_to_cache() ut_ad(table2 == NULL); #endif /* UNIV_DEBUG */ } + HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, + this); /* Look for a table with the same id: error if such exists */ + hash_table_t* id_hash = is_temporary() + ? dict_sys->temp_id_hash : dict_sys->table_id_hash; + const ulint id_fold = ut_fold_ull(id); { dict_table_t* table2; - HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, + HASH_SEARCH(id_hash, id_hash, id_fold, dict_table_t*, table2, ut_ad(table2->cached), table2->id == id); ut_a(table2 == NULL); #ifdef UNIV_DEBUG /* Look for the same table pointer with a different id */ - HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash, + HASH_SEARCH_ALL(id_hash, id_hash, dict_table_t*, table2, ut_ad(table2->cached), table2 == this); ut_ad(table2 == NULL); #endif /* UNIV_DEBUG */ - } - - /* Add table to hash table of tables */ - HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, - this); - /* Add table to hash table of tables based on table id */ - HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold, - this); + HASH_INSERT(dict_table_t, id_hash, id_hash, id_fold, this); + } if (can_be_evicted) { UT_LIST_ADD_FIRST(dict_sys->table_LRU, this); @@ -1433,7 +1450,7 @@ dict_make_room_in_cache( ut_ad(0); } };); - dict_table_remove_from_cache_low(table, TRUE); + dict_table_remove_from_cache(table, true); ++n_evicted; } @@ -1955,6 +1972,7 @@ dict_table_change_id_in_cache( ut_ad(table); ut_ad(mutex_own(&dict_sys->mutex)); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + ut_ad(!table->is_temporary()); /* Remove the table from the hash table of id's */ @@ -1967,14 +1985,11 @@ dict_table_change_id_in_cache( ut_fold_ull(table->id), table); } -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -void -dict_table_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in, own: table */ - ibool lru_evict) /*!< in: TRUE if table being evicted - to make room in the table LRU list */ +/** Evict a table definition from the InnoDB data dictionary cache. +@param[in,out] table cached table definition to be evicted +@param[in] lru whether this is part of least-recently-used eviction +@param[in] keep whether to keep (not free) the object */ +void dict_table_remove_from_cache(dict_table_t* table, bool lru, bool keep) { dict_foreign_t* foreign; dict_index_t* index; @@ -2007,7 +2022,7 @@ dict_table_remove_from_cache_low( index != NULL; index = UT_LIST_GET_LAST(table->indexes)) { - dict_index_remove_from_cache_low(table, index, lru_evict); + dict_index_remove_from_cache_low(table, index, lru); } /* Remove table from the hash tables of tables */ @@ -2015,8 +2030,10 @@ dict_table_remove_from_cache_low( HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, ut_fold_string(table->name.m_name), table); - HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, - ut_fold_ull(table->id), table); + hash_table_t* id_hash = table->is_temporary() + ? dict_sys->temp_id_hash : dict_sys->table_id_hash; + const ulint id_fold = ut_fold_ull(table->id); + HASH_DELETE(dict_table_t, id_hash, id_hash, id_fold, table); /* Remove table from LRU or non-LRU list. */ if (table->can_be_evicted) { @@ -2029,7 +2046,7 @@ dict_table_remove_from_cache_low( ut_ad(dict_lru_validate()); - if (lru_evict && table->drop_aborted) { + if (lru && table->drop_aborted) { /* When evicting the table definition, drop the orphan indexes from the data dictionary and free the index pages. */ @@ -2054,17 +2071,9 @@ dict_table_remove_from_cache_low( UT_DELETE(table->vc_templ); } - dict_mem_table_free(table); -} - -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -void -dict_table_remove_from_cache( -/*=========================*/ - dict_table_t* table) /*!< in, own: table */ -{ - dict_table_remove_from_cache_low(table, FALSE); + if (!keep) { + dict_mem_table_free(table); + } } /****************************************************************//** @@ -5453,46 +5462,6 @@ dict_index_build_node_ptr( return(tuple); } -/**********************************************************************//** -Copies an initial segment of a physical record, long enough to specify an -index entry uniquely. -@return pointer to the prefix record */ -rec_t* -dict_index_copy_rec_order_prefix( -/*=============================*/ - const dict_index_t* index, /*!< in: index */ - const rec_t* rec, /*!< in: record for which to - copy prefix */ - ulint* n_fields,/*!< out: number of fields copied */ - byte** buf, /*!< in/out: memory buffer for the - copied prefix, or NULL */ - ulint* buf_size)/*!< in/out: buffer size */ -{ - ulint n; - - UNIV_PREFETCH_R(rec); - - if (dict_index_is_ibuf(index)) { - ut_ad(!dict_table_is_comp(index->table)); - n = rec_get_n_fields_old(rec); - } else { - if (page_rec_is_leaf(rec)) { - n = dict_index_get_n_unique_in_tree(index); - } else if (dict_index_is_spatial(index)) { - ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index) - == DICT_INDEX_SPATIAL_NODEPTR_SIZE); - /* For R-tree, we have to compare - the child page numbers as well. */ - n = DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; - } else { - n = dict_index_get_n_unique_in_tree(index); - } - } - - *n_fields = n; - return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size)); -} - /** Convert a physical record into a search tuple. @param[in] rec index record (not necessarily in an index page) @param[in] index index @@ -6546,17 +6515,17 @@ dict_resize() /* all table entries are in table_LRU and table_non_LRU lists */ hash_table_free(dict_sys->table_hash); hash_table_free(dict_sys->table_id_hash); + hash_table_free(dict_sys->temp_id_hash); - dict_sys->table_hash = hash_create( - buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE)); - - dict_sys->table_id_hash = hash_create( - buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE)); + const ulint hash_size = buf_pool_get_curr_size() + / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); + dict_sys->table_hash = hash_create(hash_size); + dict_sys->table_id_hash = hash_create(hash_size); + dict_sys->temp_id_hash = hash_create(hash_size); for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table; table = UT_LIST_GET_NEXT(table_LRU, table)) { + ut_ad(!table->is_temporary()); ulint fold = ut_fold_string(table->name.m_name); ulint id_fold = ut_fold_ull(table->id); @@ -6575,8 +6544,10 @@ dict_resize() HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); - HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, - id_fold, table); + hash_table_t* id_hash = table->is_temporary() + ? dict_sys->temp_id_hash : dict_sys->table_id_hash; + + HASH_INSERT(dict_table_t, id_hash, id_hash, id_fold, table); } mutex_exit(&dict_sys->mutex); @@ -6599,7 +6570,7 @@ dict_close(void) /* Free the hash elements. We don't remove them from the table because we are going to destroy the table anyway. */ - for (ulint i = 0; i < hash_get_n_cells(dict_sys->table_id_hash); i++) { + for (ulint i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) { dict_table_t* table; table = static_cast<dict_table_t*>( @@ -6620,6 +6591,7 @@ dict_close(void) /* The elements are the same instance as in dict_sys->table_hash, therefore we don't delete the individual elements. */ hash_table_free(dict_sys->table_id_hash); + hash_table_free(dict_sys->temp_id_hash); mutex_exit(&dict_sys->mutex); mutex_free(&dict_sys->mutex); diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc index 3056f73f0cb..5d2052b1f6f 100644 --- a/storage/innobase/dict/dict0mem.cc +++ b/storage/innobase/dict/dict0mem.cc @@ -190,8 +190,6 @@ dict_mem_table_create( || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) { table->fts = fts_create(table); table->fts->cache = fts_cache_create(table); - } else { - table->fts = NULL; } new(&table->foreign_set) dict_foreign_set(); @@ -531,6 +529,14 @@ dict_mem_table_col_rename_low( = dict_index_get_nth_field( index, i); + ut_ad(!field->name + == field->col->is_dropped()); + if (!field->name) { + /* dropped columns lack a name */ + ut_ad(index->is_instant()); + continue; + } + /* if is_virtual and that in field->col does not match, continue */ if ((!is_virtual) != @@ -715,6 +721,7 @@ dict_mem_fill_column_struct( column->mbmaxlen = mbmaxlen; column->def_val.data = NULL; column->def_val.len = UNIV_SQL_DEFAULT; + ut_ad(!column->is_dropped()); } /**********************************************************************//** @@ -1191,293 +1198,122 @@ operator<< (std::ostream& out, const dict_foreign_set& fk_set) return(out); } -/** Adjust clustered index metadata for instant ADD COLUMN. -@param[in] clustered index definition after instant ADD COLUMN */ -inline void dict_index_t::instant_add_field(const dict_index_t& instant) +/** Reconstruct the clustered index fields. */ +inline void dict_index_t::reconstruct_fields() { DBUG_ASSERT(is_primary()); - DBUG_ASSERT(instant.is_primary()); - DBUG_ASSERT(!instant.is_instant()); - DBUG_ASSERT(n_def == n_fields); - DBUG_ASSERT(instant.n_def == instant.n_fields); - - DBUG_ASSERT(type == instant.type); - DBUG_ASSERT(trx_id_offset == instant.trx_id_offset); - DBUG_ASSERT(n_user_defined_cols == instant.n_user_defined_cols); - DBUG_ASSERT(n_uniq == instant.n_uniq); - DBUG_ASSERT(instant.n_fields > n_fields); - DBUG_ASSERT(instant.n_def > n_def); - DBUG_ASSERT(instant.n_nullable >= n_nullable); - DBUG_ASSERT(instant.n_core_fields >= n_core_fields); - DBUG_ASSERT(instant.n_core_null_bytes >= n_core_null_bytes); - - n_fields = instant.n_fields; - n_def = instant.n_def; - n_nullable = instant.n_nullable; - fields = static_cast<dict_field_t*>( - mem_heap_dup(heap, instant.fields, n_fields * sizeof *fields)); - - ut_d(unsigned n_null = 0); - - for (unsigned i = 0; i < n_fields; i++) { - DBUG_ASSERT(fields[i].same(instant.fields[i])); - const dict_col_t* icol = instant.fields[i].col; - DBUG_ASSERT(!icol->is_virtual()); - dict_col_t* col = fields[i].col = &table->cols[ - icol - instant.table->cols]; - fields[i].name = col->name(*table); - ut_d(n_null += col->is_nullable()); - } - ut_ad(n_null == n_nullable); -} + n_fields += table->instant->n_dropped; + n_def += table->instant->n_dropped; -/** Adjust metadata for instant ADD COLUMN. -@param[in] table table definition after instant ADD COLUMN */ -void dict_table_t::instant_add_column(const dict_table_t& table) -{ - DBUG_ASSERT(!table.cached); - DBUG_ASSERT(table.n_def == table.n_cols); - DBUG_ASSERT(table.n_t_def == table.n_t_cols); - DBUG_ASSERT(n_def == n_cols); - DBUG_ASSERT(n_t_def == n_t_cols); - DBUG_ASSERT(table.n_cols > n_cols); - ut_ad(mutex_own(&dict_sys->mutex)); - - const char* end = table.col_names; - for (unsigned i = table.n_cols; i--; ) end += strlen(end) + 1; - - col_names = static_cast<char*>( - mem_heap_dup(heap, table.col_names, - ulint(end - table.col_names))); - const dict_col_t* const old_cols = cols; - const dict_col_t* const old_cols_end = cols + n_cols; - cols = static_cast<dict_col_t*>(mem_heap_dup(heap, table.cols, - table.n_cols - * sizeof *cols)); - - /* Preserve the default values of previously instantly - added columns. */ - for (unsigned i = unsigned(n_cols) - DATA_N_SYS_COLS; i--; ) { - cols[i].def_val = old_cols[i].def_val; - } + const unsigned n_first = first_user_field(); + + dict_field_t* tfields = static_cast<dict_field_t*>( + mem_heap_zalloc(heap, n_fields * sizeof *fields)); + + memcpy(tfields, fields, n_first * sizeof *fields); - /* Copy the new default values to this->heap. */ - for (unsigned i = n_cols; i < table.n_cols; i++) { - dict_col_t& c = cols[i - DATA_N_SYS_COLS]; - DBUG_ASSERT(c.is_instant()); - if (c.def_val.len == 0) { - c.def_val.data = field_ref_zero; - } else if (const void*& d = c.def_val.data) { - d = mem_heap_dup(heap, d, c.def_val.len); + n_nullable = 0; + ulint n_core_null = 0; + const bool comp = dict_table_is_comp(table); + const auto* non_pk_col_map = table->instant->non_pk_col_map; + for (unsigned i = n_first, j = 0; i < n_fields; ) { + dict_field_t& f = tfields[i++]; + auto c = *non_pk_col_map++; + if (c & 1U << 15) { + f.col = &table->instant->dropped[j++]; + DBUG_ASSERT(f.col->is_dropped()); + f.fixed_len = dict_col_get_fixed_size(f.col, comp); } else { - DBUG_ASSERT(c.def_val.len == UNIV_SQL_NULL); + const auto old = std::find_if( + fields + n_first, fields + n_fields, + [c](const dict_field_t& o) + { return o.col->ind == c; }); + ut_ad(old >= &fields[n_first]); + ut_ad(old < &fields[n_fields]); + DBUG_ASSERT(!old->prefix_len); + DBUG_ASSERT(old->col == &table->cols[c]); + f = *old; } - } - const unsigned old_n_cols = n_cols; - const unsigned n_add = unsigned(table.n_cols - n_cols); - - n_t_def += n_add; - n_t_cols += n_add; - n_cols = table.n_cols; - n_def = n_cols; - - for (unsigned i = n_v_def; i--; ) { - const dict_v_col_t& v = v_cols[i]; - for (ulint n = v.num_base; n--; ) { - dict_col_t*& base = v.base_col[n]; - if (!base->is_virtual()) { - DBUG_ASSERT(base >= old_cols); - size_t n = size_t(base - old_cols); - DBUG_ASSERT(n + DATA_N_SYS_COLS < old_n_cols); - base = &cols[n]; - } + f.col->clear_instant(); + if (f.col->is_nullable()) { + n_nullable++; + n_core_null += i <= n_core_fields; } } - dict_index_t* index = dict_table_get_first_index(this); - - index->instant_add_field(*dict_table_get_first_index(&table)); - - while ((index = dict_table_get_next_index(index)) != NULL) { - for (unsigned i = 0; i < index->n_fields; i++) { - dict_field_t& field = index->fields[i]; - if (field.col < old_cols - || field.col >= old_cols_end) { - DBUG_ASSERT(field.col->is_virtual()); - } else { - /* Secondary indexes may contain user - columns and DB_ROW_ID (if there is - GEN_CLUST_INDEX instead of PRIMARY KEY), - but not DB_TRX_ID,DB_ROLL_PTR. */ - DBUG_ASSERT(field.col >= old_cols); - size_t n = size_t(field.col - old_cols); - DBUG_ASSERT(n + DATA_N_SYS_COLS <= old_n_cols); - if (n + DATA_N_SYS_COLS >= old_n_cols) { - /* Replace DB_ROW_ID */ - n += n_add; - } - field.col = &cols[n]; - DBUG_ASSERT(!field.col->is_virtual()); - field.name = field.col->name(*this); - } - } - } + fields = tfields; + n_core_null_bytes = UT_BITS_IN_BYTES(n_core_null); } -/** Roll back instant_add_column(). -@param[in] old_n_cols original n_cols -@param[in] old_cols original cols -@param[in] old_col_names original col_names */ -void -dict_table_t::rollback_instant( - unsigned old_n_cols, - dict_col_t* old_cols, - const char* old_col_names) +/** Reconstruct dropped or reordered columns. +@param[in] metadata data from serialise_columns() +@param[in] len length of the metadata, in bytes +@return whether parsing the metadata failed */ +bool dict_table_t::deserialise_columns(const byte* metadata, ulint len) { - ut_ad(mutex_own(&dict_sys->mutex)); - dict_index_t* index = indexes.start; - /* index->is_instant() does not necessarily hold here, because - the table may have been emptied */ - DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS); - DBUG_ASSERT(n_cols >= old_n_cols); - DBUG_ASSERT(n_cols == n_def); - DBUG_ASSERT(index->n_def == index->n_fields); - - const unsigned n_remove = n_cols - old_n_cols; - - for (unsigned i = index->n_fields - n_remove; i < index->n_fields; - i++) { - if (index->fields[i].col->is_nullable()) { - index->n_nullable--; - } - } + DBUG_ASSERT(!instant); - index->n_fields -= n_remove; - index->n_def = index->n_fields; - if (index->n_core_fields > index->n_fields) { - index->n_core_fields = index->n_fields; - index->n_core_null_bytes - = UT_BITS_IN_BYTES(unsigned(index->n_nullable)); - } + unsigned num_non_pk_fields = mach_read_from_4(metadata); + metadata += 4; - const dict_col_t* const new_cols = cols; - const dict_col_t* const new_cols_end = cols + n_cols; - - cols = old_cols; - col_names = old_col_names; - n_cols = old_n_cols; - n_def = old_n_cols; - n_t_def -= n_remove; - n_t_cols -= n_remove; - - for (unsigned i = n_v_def; i--; ) { - const dict_v_col_t& v = v_cols[i]; - for (ulint n = v.num_base; n--; ) { - dict_col_t*& base = v.base_col[n]; - if (!base->is_virtual()) { - base = &cols[base - new_cols]; - } - } + if (num_non_pk_fields >= REC_MAX_N_FIELDS - 3) { + return true; } - do { - for (unsigned i = 0; i < index->n_fields; i++) { - dict_field_t& field = index->fields[i]; - if (field.col < new_cols - || field.col >= new_cols_end) { - DBUG_ASSERT(field.col->is_virtual()); - } else { - DBUG_ASSERT(field.col >= new_cols); - size_t n = size_t(field.col - new_cols); - DBUG_ASSERT(n <= n_cols); - if (n + DATA_N_SYS_COLS >= n_cols) { - n -= n_remove; - } - field.col = &cols[n]; - DBUG_ASSERT(!field.col->is_virtual()); - field.name = field.col->name(*this); - } - } - } while ((index = dict_table_get_next_index(index)) != NULL); -} + dict_index_t* index = UT_LIST_GET_FIRST(indexes); -/** Trim the instantly added columns when an insert into SYS_COLUMNS -is rolled back during ALTER TABLE or recovery. -@param[in] n number of surviving non-system columns */ -void dict_table_t::rollback_instant(unsigned n) -{ - ut_ad(mutex_own(&dict_sys->mutex)); - dict_index_t* index = indexes.start; - DBUG_ASSERT(index->is_instant()); - DBUG_ASSERT(index->n_def == index->n_fields); - DBUG_ASSERT(n_cols == n_def); - DBUG_ASSERT(n >= index->n_uniq); - DBUG_ASSERT(n_cols > n + DATA_N_SYS_COLS); - const unsigned n_remove = n_cols - n - DATA_N_SYS_COLS; - - char* names = const_cast<char*>(dict_table_get_col_name(this, n)); - const char* sys = names; - for (unsigned i = n_remove; i--; ) { - sys += strlen(sys) + 1; + if (num_non_pk_fields < unsigned(index->n_fields) + - index->first_user_field()) { + return true; } - static const char system[] = "DB_ROW_ID\0DB_TRX_ID\0DB_ROLL_PTR"; - DBUG_ASSERT(!memcmp(sys, system, sizeof system)); - for (unsigned i = index->n_fields - n_remove; i < index->n_fields; - i++) { - if (index->fields[i].col->is_nullable()) { - index->n_nullable--; + + uint16_t* non_pk_col_map = static_cast<uint16_t*>( + mem_heap_alloc(heap, + num_non_pk_fields * sizeof *non_pk_col_map)); + + unsigned n_dropped_cols = 0; + + for (unsigned i = 0; i < num_non_pk_fields; i++) { + non_pk_col_map[i] = mach_read_from_2(metadata); + metadata += 2; + + if (non_pk_col_map[i] & 1U << 15) { + if ((non_pk_col_map[i] & ~(3U << 14)) + > DICT_MAX_FIXED_COL_LEN + 1) { + return true; + } + n_dropped_cols++; + } else if (non_pk_col_map[i] >= n_cols) { + return true; } } - index->n_fields -= n_remove; - index->n_def = index->n_fields; - memmove(names, sys, sizeof system); - memmove(cols + n, cols + n_cols - DATA_N_SYS_COLS, - DATA_N_SYS_COLS * sizeof *cols); - n_cols -= n_remove; - n_def = n_cols; - n_t_cols -= n_remove; - n_t_def -= n_remove; - - for (unsigned i = DATA_N_SYS_COLS; i--; ) { - cols[n_cols - i].ind--; - } - if (dict_index_is_auto_gen_clust(index)) { - DBUG_ASSERT(index->n_uniq == 1); - dict_field_t* field = index->fields; - field->name = sys; - field->col = dict_table_get_sys_col(this, DATA_ROW_ID); - field++; - field->name = sys + sizeof "DB_ROW_ID"; - field->col = dict_table_get_sys_col(this, DATA_TRX_ID); - field++; - field->name = sys + sizeof "DB_ROW_ID\0DB_TRX_ID"; - field->col = dict_table_get_sys_col(this, DATA_ROLL_PTR); - - /* Replace the DB_ROW_ID column in secondary indexes. */ - while ((index = dict_table_get_next_index(index)) != NULL) { - field = &index->fields[index->n_fields - 1]; - DBUG_ASSERT(field->col->mtype == DATA_SYS); - DBUG_ASSERT(field->col->prtype - == DATA_NOT_NULL + DATA_TRX_ID); - field->col--; - field->name = sys; + dict_col_t* dropped_cols = static_cast<dict_col_t*>(mem_heap_zalloc( + heap, n_dropped_cols * sizeof(dict_col_t))); + instant = new (mem_heap_alloc(heap, sizeof *instant)) dict_instant_t(); + instant->n_dropped = n_dropped_cols; + instant->dropped = dropped_cols; + instant->non_pk_col_map = non_pk_col_map; + + dict_col_t* col = dropped_cols; + for (unsigned i = 0; i < num_non_pk_fields; i++) { + if (non_pk_col_map[i] & 1U << 15) { + auto fixed_len = non_pk_col_map[i] & ~(3U << 14); + DBUG_ASSERT(fixed_len <= DICT_MAX_FIXED_COL_LEN + 1); + (col++)->set_dropped(non_pk_col_map[i] & 1U << 14, + fixed_len == 1, + fixed_len > 1 ? fixed_len - 1 + : 0); } - - return; } + DBUG_ASSERT(col == &dropped_cols[n_dropped_cols]); - dict_field_t* field = &index->fields[index->n_uniq]; - field->name = sys + sizeof "DB_ROW_ID"; - field->col = dict_table_get_sys_col(this, DATA_TRX_ID); - field++; - field->name = sys + sizeof "DB_ROW_ID\0DB_TRX_ID"; - field->col = dict_table_get_sys_col(this, DATA_ROLL_PTR); + UT_LIST_GET_FIRST(indexes)->reconstruct_fields(); + return false; } - /** Check if record in clustered index is historical row. @param[in] rec clustered row @param[in] offsets offsets diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 0c2ee5203cf..9c4f1a0290d 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -42,7 +42,6 @@ Created 10/25/1995 Heikki Tuuri #include "os0file.h" #include "page0zip.h" #include "row0mysql.h" -#include "row0trunc.h" #include "srv0start.h" #include "trx0purge.h" #include "buf0lru.h" @@ -166,9 +165,6 @@ ulint fil_n_pending_log_flushes = 0; /** Number of pending tablespace flushes */ ulint fil_n_pending_tablespace_flushes = 0; -/** The null file address */ -const fil_addr_t fil_addr_null = {FIL_NULL, 0}; - /** The tablespace memory cache. This variable is NULL before the module is initialized. */ fil_system_t fil_system; @@ -515,7 +511,6 @@ bool fil_node_t::read_page0(bool first) ut_free(buf2); return false; } - srv_stats.page0_read.add(1); const ulint space_id = fsp_header_get_space_id(page); ulint flags = fsp_header_get_flags(page); const ulint size = fsp_header_get_field(page, FSP_SIZE); @@ -595,8 +590,7 @@ static bool fil_node_open_file(fil_node_t* node) if (first_time_open || (space->purpose == FIL_TYPE_TABLESPACE && node == UT_LIST_GET_FIRST(space->chain) - && srv_startup_is_before_trx_rollback_phase - && !undo::Truncate::was_tablespace_truncated(space->id))) { + && srv_startup_is_before_trx_rollback_phase)) { /* We do not know the size of the file yet. First we open the file in the normal mode, no async I/O here, for simplicity. Then do some checks, and close the @@ -4180,7 +4174,7 @@ fil_report_invalid_page_access( @param[in] message message for aio handler if non-sync aio used, else ignored @param[in] ignore_missing_space true=ignore missing space duging read -@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED +@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ dberr_t fil_io( @@ -4312,19 +4306,6 @@ fil_io( break; } else { - if (space->id != TRX_SYS_SPACE - && UT_LIST_GET_LEN(space->chain) == 1 - && (srv_is_tablespace_truncated(space->id) - || srv_was_tablespace_truncated(space)) - && req_type.is_read()) { - - /* Handle page which is outside the truncated - tablespace bounds when recovering from a crash - happened during a truncation */ - mutex_exit(&fil_system.mutex); - return(DB_TABLESPACE_TRUNCATED); - } - cur_page_no -= node->size; node = UT_LIST_GET_NEXT(chain, node); @@ -5044,116 +5025,6 @@ fil_names_clear( return(do_write); } -/** Truncate a single-table tablespace. The tablespace must be cached -in the memory cache. -@param space_id space id -@param dir_path directory path -@param tablename the table name in the usual - databasename/tablename format of InnoDB -@param flags tablespace flags -@param trunc_to_default truncate to default size if tablespace - is being newly re-initialized. -@return DB_SUCCESS or error */ -dberr_t -truncate_t::truncate( -/*=================*/ - ulint space_id, - const char* dir_path, - const char* tablename, - ulint flags, - bool trunc_to_default) -{ - dberr_t err = DB_SUCCESS; - char* path; - - ut_a(!is_system_tablespace(space_id)); - - if (FSP_FLAGS_HAS_DATA_DIR(flags)) { - ut_ad(dir_path != NULL); - path = fil_make_filepath(dir_path, tablename, IBD, true); - } else { - path = fil_make_filepath(NULL, tablename, IBD, false); - } - - if (path == NULL) { - return(DB_OUT_OF_MEMORY); - } - - mutex_enter(&fil_system.mutex); - - fil_space_t* space = fil_space_get_by_id(space_id); - - /* The following code must change when InnoDB supports - multiple datafiles per tablespace. */ - ut_a(UT_LIST_GET_LEN(space->chain) == 1); - - fil_node_t* node = UT_LIST_GET_FIRST(space->chain); - - if (trunc_to_default) { - space->size = node->size = FIL_IBD_FILE_INITIAL_SIZE; - } - - const bool already_open = node->is_open(); - - if (!already_open) { - - bool ret; - - node->handle = os_file_create_simple_no_error_handling( - innodb_data_file_key, path, OS_FILE_OPEN, - OS_FILE_READ_WRITE, - space->purpose != FIL_TYPE_TEMPORARY - && srv_read_only_mode, &ret); - - if (!ret) { - ib::error() << "Failed to open tablespace file " - << path << "."; - - ut_free(path); - - return(DB_ERROR); - } - - ut_a(node->is_open()); - } - - os_offset_t trunc_size = trunc_to_default - ? FIL_IBD_FILE_INITIAL_SIZE - : space->size; - - const bool success = os_file_truncate( - path, node->handle, trunc_size << srv_page_size_shift); - - if (!success) { - ib::error() << "Cannot truncate file " << path - << " in TRUNCATE TABLESPACE."; - err = DB_ERROR; - } - - space->stop_new_ops = false; - - /* If we opened the file in this function, close it. */ - if (!already_open) { - bool closed = os_file_close(node->handle); - - if (!closed) { - - ib::error() << "Failed to close tablespace file " - << path << "."; - - err = DB_ERROR; - } else { - node->handle = OS_FILE_CLOSED; - } - } - - mutex_exit(&fil_system.mutex); - - ut_free(path); - - return(err); -} - /* Unit Tests */ #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH #define MF fil_make_filepath diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 6ef6764ebb4..09342918753 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -621,8 +621,7 @@ fsp_space_modify_check( case MTR_LOG_NO_REDO: ut_ad(space->purpose == FIL_TYPE_TEMPORARY || space->purpose == FIL_TYPE_IMPORT - || my_atomic_loadlint(&space->redo_skipped_count) - || srv_is_tablespace_truncated(space->id)); + || my_atomic_loadlint(&space->redo_skipped_count)); return; case MTR_LOG_ALL: /* We may only write redo log for a persistent tablespace. */ @@ -728,23 +727,23 @@ void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr) mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SPACE_ID + block->frame, space->id, MLOG_4BYTES, mtr); - mlog_write_ulint(FSP_HEADER_OFFSET + FSP_NOT_USED + block->frame, 0, - MLOG_4BYTES, mtr); + ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_NOT_USED + + block->frame)); mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SIZE + block->frame, size, MLOG_4BYTES, mtr); - mlog_write_ulint(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + block->frame, 0, - MLOG_4BYTES, mtr); + ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + + block->frame)); mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + block->frame, space->flags & ~FSP_FLAGS_MEM_MASK, MLOG_4BYTES, mtr); - mlog_write_ulint(FSP_HEADER_OFFSET + FSP_FRAG_N_USED + block->frame, 0, - MLOG_4BYTES, mtr); + ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_FRAG_N_USED + + block->frame)); - flst_init(FSP_HEADER_OFFSET + FSP_FREE + block->frame, mtr); - flst_init(FSP_HEADER_OFFSET + FSP_FREE_FRAG + block->frame, mtr); - flst_init(FSP_HEADER_OFFSET + FSP_FULL_FRAG + block->frame, mtr); - flst_init(FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL + block->frame, mtr); - flst_init(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE + block->frame, mtr); + flst_init(block, FSP_HEADER_OFFSET + FSP_FREE, mtr); + flst_init(block, FSP_HEADER_OFFSET + FSP_FREE_FRAG, mtr); + flst_init(block, FSP_HEADER_OFFSET + FSP_FULL_FRAG, mtr); + flst_init(block, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, mtr); + flst_init(block, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, mtr); mlog_write_ull(FSP_HEADER_OFFSET + FSP_SEG_ID + block->frame, 1, mtr); @@ -1078,13 +1077,6 @@ fsp_fill_free_list( mtr_start(&ibuf_mtr); ibuf_mtr.set_named_space(space); - /* Avoid logging while truncate table - fix-up is active. */ - if (srv_is_tablespace_truncated(space->id)) { - mtr_set_log_mode( - &ibuf_mtr, MTR_LOG_NO_REDO); - } - const page_id_t page_id( space->id, i + FSP_IBUF_BITMAP_OFFSET); diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index f63ae9d82d7..58bfeee9278 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -3714,13 +3714,6 @@ fts_get_max_doc_id( if (!page_is_empty(btr_pcur_get_page(&pcur))) { const rec_t* rec = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - mem_heap_t* heap = NULL; - ulint len; - const void* data; - - rec_offs_init(offsets_); do { rec = btr_pcur_get_rec(&pcur); @@ -3730,18 +3723,11 @@ fts_get_max_doc_id( } } while (btr_pcur_move_to_prev(&pcur, &mtr)); - if (!rec) { + if (!rec || rec_is_metadata(rec, *index)) { goto func_exit; } - ut_ad(!rec_is_metadata(rec, index)); - offsets = rec_get_offsets( - rec, index, offsets, true, ULINT_UNDEFINED, &heap); - - data = rec_get_nth_field(rec, offsets, 0, &len); - - doc_id = static_cast<doc_id_t>(fts_read_doc_id( - static_cast<const byte*>(data))); + doc_id = fts_read_doc_id(rec); } func_exit: @@ -5223,49 +5209,23 @@ fts_get_doc_id_from_row( } /** Extract the doc id from the record that belongs to index. -@param[in] table table -@param[in] rec record contains FTS_DOC_ID +@param[in] rec record containing FTS_DOC_ID @param[in] index index of rec -@param[in] heap heap memory +@param[in] offsets rec_get_offsets(rec,index) @return doc id that was extracted from rec */ doc_id_t fts_get_doc_id_from_rec( - dict_table_t* table, const rec_t* rec, const dict_index_t* index, - mem_heap_t* heap) -{ - ulint len; - const byte* data; - ulint col_no; - doc_id_t doc_id = 0; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - mem_heap_t* my_heap = heap; - - ut_a(table->fts->doc_col != ULINT_UNDEFINED); - - rec_offs_init(offsets_); - - offsets = rec_get_offsets( - rec, index, offsets, true, ULINT_UNDEFINED, &my_heap); - - col_no = dict_col_get_index_pos( - &table->cols[table->fts->doc_col], index); - - ut_ad(col_no != ULINT_UNDEFINED); - - data = rec_get_nth_field(rec, offsets, col_no, &len); - - ut_a(len == 8); - ut_ad(8 == sizeof(doc_id)); - doc_id = static_cast<doc_id_t>(mach_read_from_8(data)); - - if (my_heap && !heap) { - mem_heap_free(my_heap); - } - - return(doc_id); + const ulint* offsets) +{ + ulint f = dict_col_get_index_pos( + &index->table->cols[index->table->fts->doc_col], index); + ulint len; + doc_id_t doc_id = mach_read_from_8( + rec_get_nth_field(rec, offsets, f, &len)); + ut_ad(len == 8); + return doc_id; } /*********************************************************************//** diff --git a/storage/innobase/fut/fut0lst.cc b/storage/innobase/fut/fut0lst.cc index 3e77165ac31..05474b02cbd 100644 --- a/storage/innobase/fut/fut0lst.cc +++ b/storage/innobase/fut/fut0lst.cc @@ -58,8 +58,8 @@ flst_add_to_empty( flst_write_addr(base + FLST_LAST, node_addr, mtr); /* Set prev and next fields of node to add */ - flst_write_addr(node + FLST_PREV, fil_addr_null, mtr); - flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr); + flst_zero_addr(node + FLST_PREV, mtr); + flst_zero_addr(node + FLST_NEXT, mtr); /* Update len of base node */ mlog_write_ulint(base + FLST_LEN, 1, MLOG_4BYTES, mtr); diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc index be67239e177..226b5d07e5d 100644 --- a/storage/innobase/gis/gis0rtree.cc +++ b/storage/innobase/gis/gis0rtree.cc @@ -914,7 +914,7 @@ rtr_split_page_move_rec_list( mtr_set_log_mode(mtr, log_mode); if (!page_zip_compress(new_page_zip, new_page, index, - page_zip_level, NULL, mtr)) { + page_zip_level, mtr)) { ulint ret_pos; /* Before trying to reorganize the page, diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index a747038aed4..f264285ad90 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -97,7 +97,6 @@ this program; if not, write to the Free Software Foundation, Inc., #include "row0mysql.h" #include "row0quiesce.h" #include "row0sel.h" -#include "row0trunc.h" #include "row0upd.h" #include "fil0crypt.h" #include "ut0timer.h" @@ -1029,8 +1028,6 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_pages_created, SHOW_LONG}, {"pages_read", (char*) &export_vars.innodb_pages_read, SHOW_LONG}, - {"pages0_read", - (char*) &export_vars.innodb_page0_read, SHOW_LONG}, {"pages_written", (char*) &export_vars.innodb_pages_written, SHOW_LONG}, {"row_lock_current_waits", @@ -2915,7 +2912,7 @@ ha_innobase::ha_innobase( | HA_CAN_RTREEKEYS | HA_CAN_TABLES_WITHOUT_ROLLBACK | HA_CONCURRENT_OPTIMIZE - | (srv_force_primary_key ? HA_WANTS_PRIMARY_KEY : 0) + | (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0) ), m_start_of_scan(), m_mysql_has_locked() @@ -5330,7 +5327,7 @@ ha_innobase::keys_to_use_for_scanning() /****************************************************************//** Ensures that if there's a concurrent inplace ADD INDEX, being-indexed virtual columns are computed. They are not marked as indexed in the old table, so the -server won't add them to the vcol_set automatically */ +server won't add them to the read_set automatically */ void ha_innobase::column_bitmaps_signal() /*================================*/ @@ -5350,7 +5347,7 @@ ha_innobase::column_bitmaps_signal() if (col->ord_part || (dict_index_is_online_ddl(clust_index) && row_log_col_is_indexed(clust_index, num_v))) { - table->mark_virtual_col(table->vfield[j]); + table->mark_virtual_column_with_deps(table->vfield[j]); } num_v++; } @@ -6056,6 +6053,14 @@ initialize_auto_increment(dict_table_t* table, const Field* field) int ha_innobase::open(const char* name, int, uint) { + /* TODO: If trx_rollback_recovered(bool all=false) is ever + removed, the first-time open() must hold (or acquire and release) + a table lock that conflicts with trx_resurrect_table_locks(), + to ensure that any recovered incomplete ALTER TABLE will have been + rolled back. Otherwise, dict_table_t::instant could be cleared by + the rollback invoking dict_index_t::clear_instant_alter() while + open table handles exist in client connections. */ + dict_table_t* ib_table; char norm_name[FN_REFLEN]; dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE; @@ -9470,12 +9475,14 @@ ha_innobase::change_active_index( } #endif } else { - dtuple_set_n_fields(m_prebuilt->search_tuple, - m_prebuilt->index->n_fields); + ulint n_fields = dict_index_get_n_unique_in_tree( + m_prebuilt->index); + + dtuple_set_n_fields(m_prebuilt->search_tuple, n_fields); dict_index_copy_types( m_prebuilt->search_tuple, m_prebuilt->index, - m_prebuilt->index->n_fields); + n_fields); /* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is always added to read_set. */ @@ -10664,9 +10671,8 @@ prepare_vcol_for_base_setup( ut_ad(col->base_col == NULL); MY_BITMAP *old_read_set = field->table->read_set; - MY_BITMAP *old_vcol_set = field->table->vcol_set; - field->table->read_set = field->table->vcol_set = &field->table->tmp_set; + field->table->read_set = &field->table->tmp_set; bitmap_clear_all(&field->table->tmp_set); field->vcol_info->expr->walk( @@ -10678,7 +10684,6 @@ prepare_vcol_for_base_setup( * col->base_col))); } field->table->read_set= old_read_set; - field->table->vcol_set= old_vcol_set; } @@ -11042,9 +11047,8 @@ err_col: dict_table_add_system_columns(table, heap); if (table->is_temporary()) { - /* Get a new table ID. FIXME: Make this a private - sequence, not shared with persistent tables! */ - dict_table_assign_new_id(table, m_trx); + m_trx->table_id = table->id + = dict_sys->get_temporary_table_id(); ut_ad(dict_tf_get_rec_format(table->flags) != REC_FORMAT_COMPRESSED); table->space_id = SRV_TMP_SPACE_ID; @@ -11720,7 +11724,7 @@ create_table_info_t::parse_table_name( } if (m_create_info->index_file_name) { - my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING, + my_error(WARN_OPTION_IGNORED, ME_WARNING, "INDEX DIRECTORY"); } @@ -17234,7 +17238,7 @@ innodb_internal_table_validate( DBUG_EXECUTE_IF("innodb_evict_autoinc_table", mutex_enter(&dict_sys->mutex); - dict_table_remove_from_cache_low(user_table, TRUE); + dict_table_remove_from_cache(user_table, true); mutex_exit(&dict_sys->mutex); ); } @@ -20923,10 +20927,10 @@ ib_senderrf( switch (level) { case IB_LOG_LEVEL_INFO: - l = ME_JUST_INFO; + l = ME_NOTE; break; case IB_LOG_LEVEL_WARN: - l = ME_JUST_WARNING; + l = ME_WARNING; break; case IB_LOG_LEVEL_ERROR: sd_notifyf(0, "STATUS=InnoDB: Error: %s", str); diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index bc483ffa130..2c36de1dad6 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -136,6 +136,624 @@ static const alter_table_operations INNOBASE_ALTER_INSTANT | ALTER_COLUMN_UNVERSIONED | ALTER_DROP_VIRTUAL_COLUMN; +/** Acquire a page latch on the possible metadata record, +to prevent concurrent invocation of dict_index_t::clear_instant_alter() +by purge when the table turns out to be empty. +@param[in,out] index clustered index +@param[in,out] mtr mini-transaction */ +static void instant_metadata_lock(dict_index_t& index, mtr_t& mtr) +{ + DBUG_ASSERT(index.is_primary()); + + if (!index.is_instant()) { + /* dict_index_t::clear_instant_alter() cannot be called. + No need for a latch. */ + return; + } + + btr_cur_t btr_cur; + btr_cur_open_at_index_side(true, &index, BTR_SEARCH_LEAF, + &btr_cur, 0, &mtr); + ut_ad(page_cur_is_before_first(btr_cur_get_page_cur(&btr_cur))); + ut_ad(page_is_leaf(btr_cur_get_page(&btr_cur))); + ut_ad(!page_has_prev(btr_cur_get_page(&btr_cur))); + ut_ad(!buf_block_get_page_zip(btr_cur_get_block(&btr_cur))); +} + +/** Set is_instant() before instant_column(). +@param[in] old previous table definition +@param[in] col_map map from old.cols[] and old.v_cols[] to this +@param[out] first_alter_pos 0, or 1 + first changed column position */ +inline void dict_table_t::prepare_instant(const dict_table_t& old, + const ulint* col_map, + unsigned& first_alter_pos) +{ + DBUG_ASSERT(!is_instant()); + DBUG_ASSERT(n_dropped() == 0); + DBUG_ASSERT(old.n_cols == old.n_def); + DBUG_ASSERT(n_cols == n_def); + DBUG_ASSERT(old.supports_instant()); + /* supports_instant() does not necessarily hold here, + in case ROW_FORMAT=COMPRESSED according to the + MariaDB data dictionary, and ALTER_OPTIONS was not set. + If that is the case, the instant ALTER TABLE would keep + the InnoDB table in its current format. */ + + dict_index_t& oindex = *old.indexes.start; + dict_index_t& index = *indexes.start; + first_alter_pos = 0; + + mtr_t mtr; + mtr.start(); + /* Prevent oindex.n_core_fields and others, so that + purge cannot invoke dict_index_t::clear_instant_alter(). */ + instant_metadata_lock(oindex, mtr); + + for (unsigned i = 0; i + DATA_N_SYS_COLS < old.n_cols; + i++) { + if (col_map[i] != i) { + first_alter_pos = 1 + i; + goto add_metadata; + } + } + + if (!old.instant) { + /* Columns were not dropped or reordered. + Therefore columns must have been added at the end. */ + DBUG_ASSERT(index.n_fields > oindex.n_fields); +set_core_fields: + index.n_core_fields = oindex.n_core_fields; + index.n_core_null_bytes = oindex.n_core_null_bytes; + } else { +add_metadata: + const unsigned n_old_drop = old.n_dropped(); + unsigned n_drop = n_old_drop; + for (unsigned i = old.n_cols; i--; ) { + if (col_map[i] == ULINT_UNDEFINED) { + DBUG_ASSERT(i + DATA_N_SYS_COLS + < uint(old.n_cols)); + n_drop++; + } + } + + instant = new (mem_heap_alloc(heap, sizeof(dict_instant_t))) + dict_instant_t(); + instant->n_dropped = n_drop; + if (n_drop) { + instant->dropped + = static_cast<dict_col_t*>( + mem_heap_alloc(heap, n_drop + * sizeof(dict_col_t))); + if (n_old_drop) { + memcpy(instant->dropped, old.instant->dropped, + n_old_drop * sizeof(dict_col_t)); + } + } else { + instant->dropped = NULL; + } + + unsigned d = n_old_drop; + + for (unsigned i = 0; i < old.n_cols; i++) { + if (col_map[i] == ULINT_UNDEFINED) { + (new (&instant->dropped[d++]) + dict_col_t(old.cols[i]))->set_dropped(); + } + } +#ifndef DBUG_OFF + for (unsigned i = 0; i < n_drop; i++) { + DBUG_ASSERT(instant->dropped[i].is_dropped()); + } +#endif + DBUG_ASSERT(d == n_drop); + const uint n_fields = index.n_fields + n_dropped(); + + DBUG_ASSERT(n_fields >= oindex.n_fields); + dict_field_t* fields = static_cast<dict_field_t*>( + mem_heap_zalloc(heap, n_fields * sizeof *fields)); + d = n_old_drop; + uint i = 0, j = 0, n_nullable = 0; + ut_d(uint core_null = 0); + for (; i < oindex.n_fields; i++) { + DBUG_ASSERT(j <= i); + dict_field_t&f = fields[i] = oindex.fields[i]; + if (f.col->is_dropped()) { + /* The column has been instantly + dropped earlier. */ + DBUG_ASSERT(f.col >= old.instant->dropped); + { + size_t d = f.col + - old.instant->dropped; + DBUG_ASSERT(d < n_old_drop); + DBUG_ASSERT(&old.instant->dropped[d] + == f.col); + DBUG_ASSERT(!f.name); + f.col = instant->dropped + d; + } + if (f.col->is_nullable()) { +found_nullable: + n_nullable++; + ut_d(core_null + += i < oindex.n_core_fields); + } + continue; + } + + const ulint col_ind = col_map[f.col->ind]; + if (col_ind != ULINT_UNDEFINED) { + if (index.fields[j].col->ind != col_ind) { + /* The fields for instantly + added columns must be placed + last in the clustered index. + Keep pre-existing fields in + the same position. */ + uint k; + for (k = j + 1; k < index.n_fields; + k++) { + if (index.fields[k].col->ind + == col_ind) { + goto found_j; + } + } + DBUG_ASSERT(!"no such col"); +found_j: + std::swap(index.fields[j], + index.fields[k]); + } + DBUG_ASSERT(index.fields[j].col->ind + == col_ind); + fields[i] = index.fields[j++]; + DBUG_ASSERT(!fields[i].col->is_dropped()); + DBUG_ASSERT(fields[i].name + == fields[i].col->name(*this)); + if (fields[i].col->is_nullable()) { + goto found_nullable; + } + continue; + } + + /* This column is being dropped. */ + DBUG_ASSERT(d < n_drop); + f.col = &instant->dropped[d++]; + f.name = NULL; + if (f.col->is_nullable()) { + goto found_nullable; + } + } + ut_ad(UT_BITS_IN_BYTES(core_null) == oindex.n_core_null_bytes); + DBUG_ASSERT(i >= oindex.n_core_fields); + DBUG_ASSERT(j <= i); + DBUG_ASSERT(n_fields - (i - j) == index.n_fields); + std::sort(index.fields + j, index.fields + index.n_fields, + [](const dict_field_t& a, const dict_field_t& b) + { return a.col->ind < b.col->ind; }); + DBUG_ASSERT(d == n_drop); + for (; i < n_fields; i++) { + fields[i] = index.fields[j++]; + n_nullable += fields[i].col->is_nullable(); + DBUG_ASSERT(!fields[i].col->is_dropped()); + DBUG_ASSERT(fields[i].name + == fields[i].col->name(*this)); + } + DBUG_ASSERT(j == index.n_fields); + index.n_fields = index.n_def = n_fields; + index.fields = fields; + DBUG_ASSERT(n_nullable >= index.n_nullable); + DBUG_ASSERT(n_nullable >= oindex.n_nullable); + index.n_nullable = n_nullable; + goto set_core_fields; + } + + DBUG_ASSERT(n_cols + n_dropped() >= old.n_cols + old.n_dropped()); + DBUG_ASSERT(n_dropped() >= old.n_dropped()); + DBUG_ASSERT(index.n_core_fields == oindex.n_core_fields); + DBUG_ASSERT(index.n_core_null_bytes == oindex.n_core_null_bytes); + mtr.commit(); +} + + +/** Adjust index metadata for instant ADD/DROP/reorder COLUMN. +@param[in] clustered index definition after instant ALTER TABLE */ +inline void dict_index_t::instant_add_field(const dict_index_t& instant) +{ + DBUG_ASSERT(is_primary()); + DBUG_ASSERT(instant.is_primary()); + DBUG_ASSERT(!has_virtual()); + DBUG_ASSERT(!instant.has_virtual()); + DBUG_ASSERT(instant.n_core_fields <= instant.n_fields); + DBUG_ASSERT(n_def == n_fields); + DBUG_ASSERT(instant.n_def == instant.n_fields); + DBUG_ASSERT(type == instant.type); + DBUG_ASSERT(trx_id_offset == instant.trx_id_offset); + DBUG_ASSERT(n_user_defined_cols == instant.n_user_defined_cols); + DBUG_ASSERT(n_uniq == instant.n_uniq); + DBUG_ASSERT(instant.n_fields >= n_fields); + DBUG_ASSERT(instant.n_nullable >= n_nullable); + /* dict_table_t::prepare_instant() initialized n_core_fields + to be equal. However, after that purge could have emptied the + table and invoked dict_index_t::clear_instant_alter(). */ + DBUG_ASSERT(instant.n_core_fields <= n_core_fields); + DBUG_ASSERT(instant.n_core_null_bytes <= n_core_null_bytes); + DBUG_ASSERT(instant.n_core_fields == n_core_fields + || (!is_instant() && instant.is_instant())); + DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes + || (!is_instant() && instant.is_instant())); + + /* instant will have all fields (including ones for columns + that have been or are being instantly dropped) in the same position + as this index. Fields for any added columns are appended at the end. */ +#ifndef DBUG_OFF + for (unsigned i = 0; i < n_fields; i++) { + DBUG_ASSERT(fields[i].same(instant.fields[i])); + DBUG_ASSERT(fields[i].col->is_nullable() + == instant.fields[i].col->is_nullable()); + } +#endif + n_fields = instant.n_fields; + n_def = instant.n_def; + n_nullable = instant.n_nullable; + fields = static_cast<dict_field_t*>( + mem_heap_dup(heap, instant.fields, n_fields * sizeof *fields)); + + ut_d(unsigned n_null = 0); + ut_d(unsigned n_dropped = 0); + + for (unsigned i = 0; i < n_fields; i++) { + const dict_col_t* icol = instant.fields[i].col; + dict_field_t& f = fields[i]; + ut_d(n_null += icol->is_nullable()); + DBUG_ASSERT(!icol->is_virtual()); + if (icol->is_dropped()) { + ut_d(n_dropped++); + f.col->set_dropped(); + f.name = NULL; + } else { + f.col = &table->cols[icol - instant.table->cols]; + f.name = f.col->name(*table); + } + } + + ut_ad(n_null == n_nullable); + ut_ad(n_dropped == instant.table->n_dropped()); +} + +/** Adjust table metadata for instant ADD/DROP/reorder COLUMN. +@param[in] table altered table (with dropped columns) +@param[in] col_map mapping from cols[] and v_cols[] to table */ +inline void dict_table_t::instant_column(const dict_table_t& table, + const ulint* col_map) +{ + DBUG_ASSERT(!table.cached); + DBUG_ASSERT(table.n_def == table.n_cols); + DBUG_ASSERT(table.n_t_def == table.n_t_cols); + DBUG_ASSERT(n_def == n_cols); + DBUG_ASSERT(n_t_def == n_t_cols); + DBUG_ASSERT(n_v_def == n_v_cols); + DBUG_ASSERT(table.n_v_def == table.n_v_cols); + DBUG_ASSERT(table.n_cols + table.n_dropped() >= n_cols + n_dropped()); + ut_ad(mutex_own(&dict_sys->mutex)); + + { + const char* end = table.col_names; + for (unsigned i = table.n_cols; i--; ) end += strlen(end) + 1; + + col_names = static_cast<char*>( + mem_heap_dup(heap, table.col_names, + ulint(end - table.col_names))); + } + const dict_col_t* const old_cols = cols; + cols = static_cast<dict_col_t*>(mem_heap_dup(heap, table.cols, + table.n_cols + * sizeof *cols)); + + /* Preserve the default values of previously instantly added + columns, or copy the new default values to this->heap. */ + for (ulint i = 0; i < ulint(table.n_cols); i++) { + dict_col_t& c = cols[i]; + + if (const dict_col_t* o = find(old_cols, col_map, n_cols, i)) { + c.def_val = o->def_val; + continue; + } + + DBUG_ASSERT(c.is_added()); + if (c.def_val.len <= sizeof field_ref_zero + && !memcmp(c.def_val.data, field_ref_zero, + c.def_val.len)) { + c.def_val.data = field_ref_zero; + } else if (const void*& d = c.def_val.data) { + d = mem_heap_dup(heap, d, c.def_val.len); + } else { + DBUG_ASSERT(c.def_val.len == UNIV_SQL_NULL); + } + } + + n_t_def += table.n_cols - n_cols; + n_t_cols += table.n_cols - n_cols; + n_def = table.n_cols; + + const dict_v_col_t* const old_v_cols = v_cols; + + if (const char* end = table.v_col_names) { + for (unsigned i = table.n_v_cols; i--; ) { + end += strlen(end) + 1; + } + + v_col_names = static_cast<char*>( + mem_heap_dup(heap, table.v_col_names, + ulint(end - table.v_col_names))); + v_cols = static_cast<dict_v_col_t*>( + mem_heap_dup(heap, table.v_cols, + table.n_v_cols * sizeof *v_cols)); + } else { + ut_ad(table.n_v_cols == 0); + v_col_names = NULL; + v_cols = NULL; + } + + n_t_def += table.n_v_cols - n_v_cols; + n_t_cols += table.n_v_cols - n_v_cols; + n_v_def = table.n_v_cols; + + for (unsigned i = 0; i < n_v_def; i++) { + dict_v_col_t& v = v_cols[i]; + v.v_indexes = UT_NEW_NOKEY(dict_v_idx_list()); + v.base_col = static_cast<dict_col_t**>( + mem_heap_dup(heap, v.base_col, + v.num_base * sizeof *v.base_col)); + + for (ulint n = v.num_base; n--; ) { + dict_col_t*& base = v.base_col[n]; + if (base->is_virtual()) { + } else if (base >= table.cols + && base < table.cols + table.n_cols) { + /* The base column was instantly added. */ + size_t c = base - table.cols; + DBUG_ASSERT(base == &table.cols[c]); + base = &cols[c]; + } else { + DBUG_ASSERT(base >= old_cols); + size_t c = base - old_cols; + DBUG_ASSERT(c + DATA_N_SYS_COLS < n_cols); + DBUG_ASSERT(base == &old_cols[c]); + DBUG_ASSERT(col_map[c] + DATA_N_SYS_COLS + < n_cols); + base = &cols[col_map[c]]; + } + } + } + + dict_index_t* index = dict_table_get_first_index(this); + + index->instant_add_field(*dict_table_get_first_index(&table)); + + if (instant || table.instant) { + const unsigned u = index->first_user_field(); + uint16_t* non_pk_col_map = static_cast<uint16_t*>( + mem_heap_alloc(heap, (index->n_fields - u) + * sizeof *non_pk_col_map)); + /* FIXME: add instant->heap, and transfer ownership here */ + if (!instant) { + instant = new (mem_heap_zalloc(heap, sizeof *instant)) + dict_instant_t(); + goto dup_dropped; + } else if (n_dropped() < table.n_dropped()) { +dup_dropped: + instant->dropped = static_cast<dict_col_t*>( + mem_heap_dup(heap, table.instant->dropped, + table.instant->n_dropped + * sizeof *instant->dropped)); + instant->n_dropped = table.instant->n_dropped; + } else if (table.instant->n_dropped) { + memcpy(instant->dropped, table.instant->dropped, + table.instant->n_dropped + * sizeof *instant->dropped); + } + + instant->non_pk_col_map = non_pk_col_map; + ut_d(unsigned n_drop = 0); + for (unsigned i = u; i < index->n_fields; i++) { + dict_field_t* field = &index->fields[i]; + DBUG_ASSERT(dict_col_get_fixed_size( + field->col, + flags & DICT_TF_COMPACT) + <= DICT_MAX_FIXED_COL_LEN); + if (!field->col->is_dropped()) { + *non_pk_col_map++ = field->col->ind; + continue; + } + + ulint fixed_len = dict_col_get_fixed_size( + field->col, flags & DICT_TF_COMPACT); + *non_pk_col_map++ = 1U << 15 + | uint16_t(!field->col->is_nullable()) << 14 + | (fixed_len + ? uint16_t(fixed_len + 1) + : field->col->len > 255); + ut_ad(field->col >= table.instant->dropped); + ut_ad(field->col < table.instant->dropped + + table.instant->n_dropped); + ut_d(n_drop++); + size_t d = field->col - table.instant->dropped; + ut_ad(field->col == &table.instant->dropped[d]); + ut_ad(d <= instant->n_dropped); + field->col = &instant->dropped[d]; + } + ut_ad(n_drop == n_dropped()); + ut_ad(non_pk_col_map + == &instant->non_pk_col_map[index->n_fields - u]); + } + + while ((index = dict_table_get_next_index(index)) != NULL) { + if (index->to_be_dropped) { + continue; + } + for (unsigned i = 0; i < index->n_fields; i++) { + dict_field_t& f = index->fields[i]; + if (f.col >= table.cols + && f.col < table.cols + table.n_cols) { + /* This is an instantly added column + in a newly added index. */ + DBUG_ASSERT(!f.col->is_virtual()); + size_t c = f.col - table.cols; + DBUG_ASSERT(f.col == &table.cols[c]); + f.col = &cols[c]; + } else if (f.col >= &table.v_cols->m_col + && f.col < &table.v_cols[n_v_cols].m_col) { + /* This is an instantly added virtual column + in a newly added index. */ + DBUG_ASSERT(f.col->is_virtual()); + size_t c = reinterpret_cast<dict_v_col_t*>( + f.col) - table.v_cols; + DBUG_ASSERT(f.col == &table.v_cols[c].m_col); + f.col = &v_cols[c].m_col; + } else if (f.col < old_cols + || f.col >= old_cols + n_cols) { + DBUG_ASSERT(f.col->is_virtual()); + f.col = &v_cols[col_map[ + reinterpret_cast<dict_v_col_t*>( + f.col) + - old_v_cols + n_cols]].m_col; + } else { + f.col = &cols[col_map[f.col - old_cols]]; + DBUG_ASSERT(!f.col->is_virtual()); + } + f.name = f.col->name(*this); + if (f.col->is_virtual()) { + reinterpret_cast<dict_v_col_t*>(f.col) + ->v_indexes->push_back( + dict_v_idx_t(index, i)); + } + } + } + + n_cols = table.n_cols; + n_v_cols = table.n_v_cols; +} + +/** Find the old column number for the given new column position. +@param[in] col_map column map from old column to new column +@param[in] pos new column position +@param[in] n number of columns present in the column map +@return old column position for the given new column position. */ +static ulint find_old_col_no(const ulint* col_map, ulint pos, ulint n) +{ + do { + ut_ad(n); + } while (col_map[--n] != pos); + return n; +} + +/** Roll back instant_column(). +@param[in] old_n_cols original n_cols +@param[in] old_cols original cols +@param[in] old_col_names original col_names +@param[in] old_instant original instant structure +@param[in] old_fields original fields +@param[in] old_n_fields original number of fields +@param[in] old_n_v_cols original n_v_cols +@param[in] old_v_cols original v_cols +@param[in] old_v_col_names original v_col_names +@param[in] col_map column map */ +inline void dict_table_t::rollback_instant( + unsigned old_n_cols, + dict_col_t* old_cols, + const char* old_col_names, + dict_instant_t* old_instant, + dict_field_t* old_fields, + unsigned old_n_fields, + unsigned old_n_v_cols, + dict_v_col_t* old_v_cols, + const char* old_v_col_names, + const ulint* col_map) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X)); + dict_index_t* index = indexes.start; + mtr_t mtr; + mtr.start(); + /* Prevent concurrent execution of dict_index_t::clear_instant_alter() + by acquiring a latch on the leftmost leaf page. */ + instant_metadata_lock(*index, mtr); + /* index->is_instant() does not necessarily hold here, because + the table may have been emptied */ + DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS); + DBUG_ASSERT(n_cols == n_def); + DBUG_ASSERT(index->n_def == index->n_fields); + DBUG_ASSERT(index->n_core_fields <= old_n_fields); + DBUG_ASSERT(index->n_core_fields <= index->n_fields); + DBUG_ASSERT(instant || !old_instant); + + instant = old_instant; + + index->n_nullable = 0; + + for (unsigned i = old_n_fields; i--; ) { + if (old_fields[i].col->is_nullable()) { + index->n_nullable++; + } + } + + for (unsigned i = n_v_cols; i--; ) { + UT_DELETE(v_cols[i].v_indexes); + } + + index->n_def = index->n_fields = old_n_fields; + + const dict_col_t* const new_cols = cols; + const dict_col_t* const new_cols_end = cols + n_cols; + const dict_v_col_t* const new_v_cols = v_cols; + const dict_v_col_t* const new_v_cols_end = v_cols + n_v_cols; + + cols = old_cols; + col_names = old_col_names; + v_cols = old_v_cols; + v_col_names = old_v_col_names; + n_def = n_cols = old_n_cols; + n_v_def = n_v_cols = old_n_v_cols; + n_t_def = n_t_cols = n_cols + n_v_cols; + + index->fields = old_fields; + mtr.commit(); + + while ((index = dict_table_get_next_index(index)) != NULL) { + if (index->to_be_dropped) { + /* instant_column() did not adjust these indexes. */ + continue; + } + + for (unsigned i = 0; i < index->n_fields; i++) { + dict_field_t& f = index->fields[i]; + if (f.col->is_virtual()) { + DBUG_ASSERT(f.col >= &new_v_cols->m_col); + DBUG_ASSERT(f.col < &new_v_cols_end->m_col); + size_t n = size_t( + reinterpret_cast<dict_v_col_t*>(f.col) + - new_v_cols); + DBUG_ASSERT(n <= n_v_cols); + + ulint old_col_no = find_old_col_no( + col_map + n_cols, n, n_v_cols); + DBUG_ASSERT(old_col_no <= n_v_cols); + f.col = &v_cols[old_col_no].m_col; + DBUG_ASSERT(f.col->is_virtual()); + } else { + DBUG_ASSERT(f.col >= new_cols); + DBUG_ASSERT(f.col < new_cols_end); + size_t n = size_t(f.col - new_cols); + DBUG_ASSERT(n <= n_cols); + + ulint old_col_no = find_old_col_no(col_map, + n, n_cols); + DBUG_ASSERT(old_col_no < n_cols); + f.col = &cols[old_col_no]; + DBUG_ASSERT(!f.col->is_virtual()); + } + f.name = f.col->name(*this); + } + } +} + struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx { /** Dummy query graph */ @@ -174,7 +792,7 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx dict_table_t* old_table; /** table where the indexes are being created or dropped */ dict_table_t* new_table; - /** table definition for instant ADD COLUMN */ + /** table definition for instant ADD/DROP/reorder COLUMN */ dict_table_t* instant_table; /** mapping of old column numbers to new ones, or NULL */ const ulint* col_map; @@ -208,7 +826,20 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx dict_col_t* const old_cols; /** original column names of the table */ const char* const old_col_names; - + /** original instantly dropped or reordered columns */ + dict_instant_t* const old_instant; + /** original index fields */ + dict_field_t* const old_fields; + /** size of old_fields */ + const unsigned old_n_fields; + /** original number of virtual columns in the table */ + const unsigned old_n_v_cols; + /** original virtual columns of the table */ + dict_v_col_t* const old_v_cols; + /** original virtual column names of the table */ + const char* const old_v_col_names; + /** 0, or 1 + first column whose position changes in instant ALTER */ + unsigned first_alter_pos; /** Allow non-null conversion. (1) Alter ignore should allow the conversion irrespective of sql mode. @@ -265,6 +896,13 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx old_n_cols(prebuilt_arg->table->n_cols), old_cols(prebuilt_arg->table->cols), old_col_names(prebuilt_arg->table->col_names), + old_instant(prebuilt_arg->table->instant), + old_fields(prebuilt_arg->table->indexes.start->fields), + old_n_fields(prebuilt_arg->table->indexes.start->n_fields), + old_n_v_cols(prebuilt_arg->table->n_v_cols), + old_v_cols(prebuilt_arg->table->v_cols), + old_v_col_names(prebuilt_arg->table->v_col_names), + first_alter_pos(0), allow_not_null(allow_not_null_flag), page_compression_level(page_compressed ? (page_compression_level_arg @@ -297,6 +935,9 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx rw_lock_free(&index->lock); dict_mem_index_free(index); } + for (unsigned i = old_n_v_cols; i--; ) { + UT_DELETE(old_v_cols[i].v_indexes); + } dict_mem_table_free(instant_table); } mem_heap_free(heap); @@ -321,14 +962,23 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx { DBUG_ASSERT(need_rebuild()); DBUG_ASSERT(!is_instant()); - DBUG_ASSERT(old_table->n_cols == old_table->n_def); - DBUG_ASSERT(new_table->n_cols == new_table->n_def); DBUG_ASSERT(old_table->n_cols == old_n_cols); - DBUG_ASSERT(new_table->n_cols > old_table->n_cols); - instant_table = new_table; + instant_table = new_table; new_table = old_table; export_vars.innodb_instant_alter_column++; + + instant_table->prepare_instant(*old_table, col_map, + first_alter_pos); + } + + /** Adjust table metadata for instant ADD/DROP/reorder COLUMN. */ + void instant_column() + { + DBUG_ASSERT(is_instant()); + DBUG_ASSERT(old_n_fields + == old_table->indexes.start->n_fields); + old_table->instant_column(*instant_table, col_map); } /** Revert prepare_instant() if the transaction is rolled back. */ @@ -336,7 +986,12 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx { if (!is_instant()) return; old_table->rollback_instant(old_n_cols, - old_cols, old_col_names); + old_cols, old_col_names, + old_instant, + old_fields, old_n_fields, + old_n_v_cols, old_v_cols, + old_v_col_names, + col_map); } /** @return whether this is instant ALTER TABLE */ @@ -666,20 +1321,56 @@ check_v_col_in_order( } /** Determine if an instant operation is possible for altering columns. +@param[in] ib_table InnoDB table definition @param[in] ha_alter_info the ALTER TABLE operation @param[in] table table definition before ALTER TABLE */ static bool instant_alter_column_possible( + const dict_table_t& ib_table, const Alter_inplace_info* ha_alter_info, const TABLE* table) { + if (!ib_table.supports_instant()) { + return false; + } +#if 1 // MDEV-17459: adjust fts_fetch_doc_from_rec() and friends; remove this + if (ib_table.fts) { + return false; + } +#endif + const dict_index_t* index = ib_table.indexes.start; + if (ha_alter_info->handler_flags & ALTER_ADD_STORED_BASE_COLUMN) { + List_iterator_fast<Create_field> cf_it( + ha_alter_info->alter_info->create_list); + uint n_add = 0; + while (const Create_field* cf = cf_it++) { + n_add += !cf->field; + } + if (index->n_fields >= REC_MAX_N_USER_FIELDS + DATA_N_SYS_COLS + - n_add) { + return false; + } + } +#if 1 // MDEV-17468: fix bugs with indexed virtual columns & remove this + ut_ad(index->is_primary()); + ut_ad(!index->has_virtual()); + while ((index = index->indexes.next) != NULL) { + if (index->has_virtual()) { + ut_ad(ib_table.n_v_cols); + return false; + } + } +#endif // Making table system-versioned instantly is not implemented yet. if (ha_alter_info->handler_flags & ALTER_ADD_SYSTEM_VERSIONING) { return false; } - if (~ha_alter_info->handler_flags & ALTER_ADD_STORED_BASE_COLUMN) { + if (!(ha_alter_info->handler_flags + & (ALTER_ADD_STORED_BASE_COLUMN + | ALTER_DROP_STORED_COLUMN + | ALTER_STORED_COLUMN_ORDER))) { return false; } @@ -702,6 +1393,8 @@ instant_alter_column_possible( columns. */ if (ha_alter_info->handler_flags & ((INNOBASE_ALTER_REBUILD | INNOBASE_ONLINE_CREATE) + & ~ALTER_DROP_STORED_COLUMN + & ~ALTER_STORED_COLUMN_ORDER & ~ALTER_ADD_STORED_BASE_COLUMN & ~ALTER_OPTIONS)) { return false; } @@ -1014,6 +1707,8 @@ ha_innobase::check_if_supported_inplace_alter( DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); } + const bool supports_instant = instant_alter_column_possible( + *m_prebuilt->table, ha_alter_info, table); bool add_drop_v_cols = false; /* If there is add or drop virtual columns, we will support operations @@ -1041,7 +1736,13 @@ ha_innobase::check_if_supported_inplace_alter( */ | ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX | ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX); - + if (supports_instant) { + flags &= ~(ALTER_DROP_STORED_COLUMN +#if 0 /* MDEV-17468: remove check_v_col_in_order() and fix the code */ + | ALTER_ADD_STORED_BASE_COLUMN +#endif + | ALTER_STORED_COLUMN_ORDER); + } if (flags != 0 || IF_PARTITIONING((altered_table->s->partition_info_str && altered_table->s->partition_info_str_len), 0) @@ -1222,8 +1923,8 @@ ha_innobase::check_if_supported_inplace_alter( constant DEFAULT expression. */ cf_it.rewind(); Field **af = altered_table->field; - bool add_column_not_last = false; - uint n_stored_cols = 0, n_add_cols = 0; + bool fts_need_rebuild = false; + const bool need_rebuild = innobase_need_rebuild(ha_alter_info, table); while (Create_field* cf = cf_it++) { DBUG_ASSERT(cf->field @@ -1271,45 +1972,36 @@ ha_innobase::check_if_supported_inplace_alter( goto next_column; } - ha_alter_info->unsupported_reason - = my_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL); - } else if (!is_non_const_value(*af)) { - - n_add_cols++; - - if (af < &altered_table->field[table_share->fields]) { - add_column_not_last = true; - } - - if (set_default_value(*af)) { - goto next_column; + ha_alter_info->unsupported_reason = my_get_err_msg( + ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL); + } else if (!is_non_const_value(*af) + && set_default_value(*af)) { + if (m_prebuilt->table->fts + && innobase_fulltext_exist(altered_table) + && !my_strcasecmp(system_charset_info, + (*af)->field_name.str, + FTS_DOC_ID_COL_NAME)) { + /* If a hidden FTS_DOC_ID column exists + (because of FULLTEXT INDEX), it cannot + be replaced with a user-created one + except when using ALGORITHM=COPY. */ + goto cannot_create_many_fulltext_index; } + goto next_column; } DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); next_column: - n_stored_cols += (*af++)->stored_in_db(); - } - - if (!add_column_not_last - && uint(m_prebuilt->table->n_cols) - DATA_N_SYS_COLS + n_add_cols - == n_stored_cols - && m_prebuilt->table->supports_instant() - && instant_alter_column_possible(ha_alter_info, table)) { - - DBUG_RETURN(HA_ALTER_INPLACE_INSTANT); + af++; } - if (!(ha_alter_info->handler_flags & ~(INNOBASE_ALTER_INSTANT - | INNOBASE_INPLACE_IGNORE))) { + if (supports_instant + || !(ha_alter_info->handler_flags + & ~(INNOBASE_ALTER_INSTANT | INNOBASE_INPLACE_IGNORE))) { DBUG_RETURN(HA_ALTER_INPLACE_INSTANT); } - bool fts_need_rebuild = false; - const bool need_rebuild = innobase_need_rebuild(ha_alter_info, table); - if (!online) { /* We already determined that only a non-locking operation is possible. */ @@ -2294,9 +2986,9 @@ innobase_row_to_mysql( } } if (table->vfield) { - my_bitmap_map* old_vcol_set = tmp_use_all_columns(table, table->vcol_set); + my_bitmap_map* old_read_set = tmp_use_all_columns(table, table->read_set); table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_READ); - tmp_restore_column_map(table->vcol_set, old_vcol_set); + tmp_restore_column_map(table->read_set, old_read_set); } } @@ -3370,12 +4062,13 @@ innobase_build_col_map( } } - ut_ad(!is_v); - innobase_build_col_map_add( - heap, dtuple_get_nth_field(defaults, i), - altered_table->field[i + num_v], - NULL, - dict_table_is_comp(new_table)); + if (!is_v) { + innobase_build_col_map_add( + heap, dtuple_get_nth_field(defaults, i), + altered_table->field[i + num_v], + NULL, + dict_table_is_comp(new_table)); + } found_col: if (is_v) { num_v++; @@ -3843,13 +4536,12 @@ prepare_inplace_add_virtual( ha_innobase_inplace_ctx* ctx; ulint i = 0; ulint j = 0; - const Create_field* new_field; ctx = static_cast<ha_innobase_inplace_ctx*> (ha_alter_info->handler_ctx); - ctx->num_to_add_vcol = altered_table->s->fields - + ctx->num_to_drop_vcol - table->s->fields; + ctx->num_to_add_vcol = altered_table->s->virtual_fields + + ctx->num_to_drop_vcol - table->s->virtual_fields; ctx->add_vcol = static_cast<dict_v_col_t*>( mem_heap_zalloc(ctx->heap, ctx->num_to_add_vcol @@ -3861,43 +4553,21 @@ prepare_inplace_add_virtual( List_iterator_fast<Create_field> cf_it( ha_alter_info->alter_info->create_list); - while ((new_field = (cf_it++)) != NULL) { - const Field* field = new_field->field; - ulint old_i; - - for (old_i = 0; table->field[old_i]; old_i++) { - const Field* n_field = table->field[old_i]; - if (field == n_field) { - break; - } - } - - i++; + while (const Create_field* new_field = cf_it++) { + const Field* field = altered_table->field[i++]; - if (table->field[old_i]) { + if (new_field->field || !innobase_is_v_fld(field)) { continue; } - ut_ad(!field); - - ulint col_len; ulint is_unsigned; - ulint field_type; ulint charset_no; - - field = altered_table->field[i - 1]; - ulint col_type = get_innobase_type_from_mysql_type( &is_unsigned, field); - - if (!innobase_is_v_fld(field)) { - continue; - } - - col_len = field->pack_length(); - field_type = (ulint) field->type(); + ulint col_len = field->pack_length(); + ulint field_type = (ulint) field->type(); if (!field->real_maybe_null()) { field_type |= DATA_NOT_NULL; @@ -3939,7 +4609,6 @@ prepare_inplace_add_virtual( } } - ctx->add_vcol[j].m_col.prtype = dtype_form_prtype( field_type, charset_no); @@ -3958,6 +4627,7 @@ prepare_inplace_add_virtual( /* No need to track the list */ ctx->add_vcol[j].v_indexes = NULL; + /* MDEV-17468: Do this on ctx->instant_table later */ innodb_base_col_setup(ctx->old_table, field, &ctx->add_vcol[j]); j++; } @@ -4084,33 +4754,76 @@ prepare_inplace_drop_virtual( @param[in] pos virtual column column no @param[in] base_pos base column pos @param[in] trx transaction -@return DB_SUCCESS if successful, otherwise error code */ -static -dberr_t -innobase_insert_sys_virtual( +@retval false on success +@retval true on failure (my_error() will have been called) */ +static bool innobase_insert_sys_virtual( const dict_table_t* table, ulint pos, ulint base_pos, trx_t* trx) { pars_info_t* info = pars_info_create(); - pars_info_add_ull_literal(info, "id", table->id); - pars_info_add_int4_literal(info, "pos", pos); - pars_info_add_int4_literal(info, "base_pos", base_pos); - dberr_t error = que_eval_sql( - info, - "PROCEDURE P () IS\n" - "BEGIN\n" - "INSERT INTO SYS_VIRTUAL VALUES" - "(:id, :pos, :base_pos);\n" - "END;\n", - FALSE, trx); + if (DB_SUCCESS != que_eval_sql( + info, + "PROCEDURE P () IS\n" + "BEGIN\n" + "INSERT INTO SYS_VIRTUAL VALUES (:id, :pos, :base_pos);\n" + "END;\n", + FALSE, trx)) { + my_error(ER_INTERNAL_ERROR, MYF(0), + "InnoDB: ADD COLUMN...VIRTUAL"); + return true; + } - return(error); + return false; +} + +/** Insert a record to the SYS_COLUMNS dictionary table. +@param[in] table_id table id +@param[in] pos position of the column +@param[in] field_name field name +@param[in] mtype main type +@param[in] prtype precise type +@param[in] len fixed length in bytes, or 0 +@param[in] n_base number of base columns of virtual columns, or 0 +@retval false on success +@retval true on failure (my_error() will have been called) */ +static bool innodb_insert_sys_columns( + table_id_t table_id, + ulint pos, + const char* field_name, + ulint mtype, + ulint prtype, + ulint len, + ulint n_base, + trx_t* trx) +{ + pars_info_t* info = pars_info_create(); + pars_info_add_ull_literal(info, "id", table_id); + pars_info_add_int4_literal(info, "pos", pos); + pars_info_add_str_literal(info, "name", field_name); + pars_info_add_int4_literal(info, "mtype", mtype); + pars_info_add_int4_literal(info, "prtype", prtype); + pars_info_add_int4_literal(info, "len", len); + pars_info_add_int4_literal(info, "base", n_base); + + if (DB_SUCCESS != que_eval_sql( + info, + "PROCEDURE ADD_COL () IS\n" + "BEGIN\n" + "INSERT INTO SYS_COLUMNS VALUES" + "(:id,:pos,:name,:mtype,:prtype,:len,:base);\n" + "END;\n", FALSE, trx)) { + my_error(ER_INTERNAL_ERROR, MYF(0), + "InnoDB: Insert into SYS_COLUMNS failed"); + return true; + } + + return false; } /** Update INNODB SYS_COLUMNS on new virtual columns @@ -4118,10 +4831,9 @@ innobase_insert_sys_virtual( @param[in] col_name column name @param[in] vcol virtual column @param[in] trx transaction -@return DB_SUCCESS if successful, otherwise error code */ -static -dberr_t -innobase_add_one_virtual( +@retval false on success +@retval true on failure (my_error() will have been called) */ +static bool innobase_add_one_virtual( const dict_table_t* table, const char* col_name, dict_v_col_t* vcol, @@ -4129,67 +4841,41 @@ innobase_add_one_virtual( { ulint pos = dict_create_v_col_pos(vcol->v_pos, vcol->m_col.ind); - ulint mtype = vcol->m_col.mtype; - ulint prtype = vcol->m_col.prtype; - ulint len = vcol->m_col.len; - pars_info_t* info = pars_info_create(); - - pars_info_add_ull_literal(info, "id", table->id); - - pars_info_add_int4_literal(info, "pos", pos); - - pars_info_add_str_literal(info, "name", col_name); - pars_info_add_int4_literal(info, "mtype", mtype); - pars_info_add_int4_literal(info, "prtype", prtype); - pars_info_add_int4_literal(info, "len", len); - pars_info_add_int4_literal(info, "prec", vcol->num_base); - dberr_t error = que_eval_sql( - info, - "PROCEDURE P () IS\n" - "BEGIN\n" - "INSERT INTO SYS_COLUMNS VALUES" - "(:id, :pos, :name, :mtype, :prtype, :len, :prec);\n" - "END;\n", - FALSE, trx); - - if (error != DB_SUCCESS) { - return(error); + if (innodb_insert_sys_columns(table->id, pos, col_name, + vcol->m_col.mtype, vcol->m_col.prtype, + vcol->m_col.len, vcol->num_base, trx)) { + return true; } for (ulint i = 0; i < vcol->num_base; i++) { - error = innobase_insert_sys_virtual( - table, pos, vcol->base_col[i]->ind, trx); - if (error != DB_SUCCESS) { - return(error); + if (innobase_insert_sys_virtual( + table, pos, vcol->base_col[i]->ind, trx)) { + return true; } } - return(error); + return false; } /** Update SYS_TABLES.N_COLS in the data dictionary. @param[in] user_table InnoDB table -@param[in] n_cols the new value of SYS_TABLES.N_COLS +@param[in] n the new value of SYS_TABLES.N_COLS @param[in] trx transaction @return whether the operation failed */ -static -bool -innodb_update_n_cols(const dict_table_t* table, ulint n_cols, trx_t* trx) +static bool innodb_update_cols(const dict_table_t* table, ulint n, trx_t* trx) { pars_info_t* info = pars_info_create(); - pars_info_add_int4_literal(info, "n", n_cols); + pars_info_add_int4_literal(info, "n", n); pars_info_add_ull_literal(info, "id", table->id); - dberr_t err = que_eval_sql(info, - "PROCEDURE UPDATE_N_COLS () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLES SET N_COLS = :n" - " WHERE ID = :id;\n" - "END;\n", FALSE, trx); - - if (err != DB_SUCCESS) { + if (DB_SUCCESS != que_eval_sql(info, + "PROCEDURE UPDATE_N_COLS () IS\n" + "BEGIN\n" + "UPDATE SYS_TABLES SET N_COLS = :n" + " WHERE ID = :id;\n" + "END;\n", FALSE, trx)) { my_error(ER_INTERNAL_ERROR, MYF(0), "InnoDB: Updating SYS_TABLES.N_COLS failed"); return true; @@ -4207,281 +4893,66 @@ innodb_update_n_cols(const dict_table_t* table, ulint n_cols, trx_t* trx) static bool innobase_add_virtual_try( - Alter_inplace_info* ha_alter_info, - const dict_table_t* user_table, - trx_t* trx) + const Alter_inplace_info* ha_alter_info, + const dict_table_t* user_table, + trx_t* trx) { - ha_innobase_inplace_ctx* ctx; - dberr_t err = DB_SUCCESS; - - ctx = static_cast<ha_innobase_inplace_ctx*>( + ha_innobase_inplace_ctx* ctx = static_cast<ha_innobase_inplace_ctx*>( ha_alter_info->handler_ctx); for (ulint i = 0; i < ctx->num_to_add_vcol; i++) { - - err = innobase_add_one_virtual( - user_table, ctx->add_vcol_name[i], - &ctx->add_vcol[i], trx); - - if (err != DB_SUCCESS) { - my_error(ER_INTERNAL_ERROR, MYF(0), - "InnoDB: ADD COLUMN...VIRTUAL"); - return(true); + if (innobase_add_one_virtual( + user_table, ctx->add_vcol_name[i], + &ctx->add_vcol[i], trx)) { + return true; } } - - ulint n_col = unsigned(user_table->n_cols) - DATA_N_SYS_COLS; - ulint n_v_col = unsigned(user_table->n_v_cols) - + ctx->num_to_add_vcol - ctx->num_to_drop_vcol; - ulint new_n = dict_table_encode_n_col(n_col, n_v_col) - + (unsigned(user_table->flags & DICT_TF_COMPACT) << 31); - - return innodb_update_n_cols(user_table, new_n, trx); + return false; } -/** Insert into SYS_COLUMNS and insert/update the hidden metadata record -for instant ADD COLUMN. -@param[in,out] ctx ALTER TABLE context for the current partition -@param[in] altered_table MySQL table that is being altered -@param[in] table MySQL table as it is before the ALTER operation -@param[in,out] trx dictionary transaction -@retval true failure -@retval false success */ -static -bool -innobase_add_instant_try( - ha_innobase_inplace_ctx*ctx, - const TABLE* altered_table, - const TABLE* table, - trx_t* trx) +/** Add the newly added column in the sys_column system table. +@param[in] table_id table id +@param[in] pos position of the column +@param[in] field_name field name +@param[in] type data type +@retval true Failure +@retval false Success. */ +static bool innobase_instant_add_col( + table_id_t table_id, + ulint pos, + const char* field_name, + const dtype_t& type, + trx_t* trx) { - DBUG_ASSERT(!ctx->need_rebuild()); - - if (!ctx->is_instant()) return false; - - DBUG_ASSERT(altered_table->s->fields > table->s->fields); - DBUG_ASSERT(ctx->old_table->n_cols == ctx->old_n_cols); - - dict_table_t* user_table = ctx->old_table; - user_table->instant_add_column(*ctx->instant_table); - dict_index_t* index = dict_table_get_first_index(user_table); - /* The table may have been emptied and may have lost its - 'instant-add-ness' during this instant ADD COLUMN. */ - - /* Construct a table row of default values for the stored columns. */ - dtuple_t* row = dtuple_create(ctx->heap, user_table->n_cols); - dict_table_copy_types(row, user_table); - Field** af = altered_table->field; - Field** const end = altered_table->field + altered_table->s->fields; - - for (uint i = 0; af < end; af++) { - if (!(*af)->stored_in_db()) { - continue; - } - - dict_col_t* col = dict_table_get_nth_col(user_table, i); - DBUG_ASSERT(!strcmp((*af)->field_name.str, - dict_table_get_col_name(user_table, i))); - - dfield_t* d = dtuple_get_nth_field(row, i); - - if (col->is_instant()) { - dfield_set_data(d, col->def_val.data, - col->def_val.len); - } else if ((*af)->real_maybe_null()) { - /* Store NULL for nullable 'core' columns. */ - dfield_set_null(d); - } else { - switch ((*af)->type()) { - case MYSQL_TYPE_VARCHAR: - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_LONG_BLOB: - /* Store the empty string for 'core' - variable-length NOT NULL columns. */ - dfield_set_data(d, field_ref_zero, 0); - break; - default: - /* For fixed-length NOT NULL 'core' columns, - get a dummy default value from SQL. Note that - we will preserve the old values of these - columns when updating the metadata - record, to avoid unnecessary updates. */ - ulint len = (*af)->pack_length(); - DBUG_ASSERT(d->type.mtype != DATA_INT - || len <= 8); - row_mysql_store_col_in_innobase_format( - d, d->type.mtype == DATA_INT - ? static_cast<byte*>( - mem_heap_alloc(ctx->heap, len)) - : NULL, true, (*af)->ptr, len, - dict_table_is_comp(user_table)); - } - } - - if (i + DATA_N_SYS_COLS < ctx->old_n_cols) { - i++; - continue; - } + return innodb_insert_sys_columns(table_id, pos, field_name, + type.mtype, type.prtype, type.len, 0, + trx); +} - pars_info_t* info = pars_info_create(); - pars_info_add_ull_literal(info, "id", user_table->id); - pars_info_add_int4_literal(info, "pos", i); - pars_info_add_str_literal(info, "name", (*af)->field_name.str); - pars_info_add_int4_literal(info, "mtype", d->type.mtype); - pars_info_add_int4_literal(info, "prtype", d->type.prtype); - pars_info_add_int4_literal(info, "len", d->type.len); +/** Delete metadata from SYS_COLUMNS and SYS_VIRTUAL. +@param[in] id table id +@param[in] pos first SYS_COLUMNS.POS +@param[in,out] trx data dictionary transaction +@retval true Failure +@retval false Success. */ +static bool innobase_instant_drop_cols(table_id_t id, ulint pos, trx_t* trx) +{ + pars_info_t* info = pars_info_create(); + pars_info_add_ull_literal(info, "id", id); + pars_info_add_int4_literal(info, "pos", pos); - dberr_t err = que_eval_sql( + dberr_t err = que_eval_sql( info, - "PROCEDURE ADD_COL () IS\n" + "PROCEDURE DELETE_COL () IS\n" "BEGIN\n" - "INSERT INTO SYS_COLUMNS VALUES" - "(:id,:pos,:name,:mtype,:prtype,:len,0);\n" + "DELETE FROM SYS_COLUMNS WHERE\n" + "TABLE_ID = :id AND POS >= :pos;\n" + "DELETE FROM SYS_VIRTUAL WHERE TABLE_ID = :id;\n" "END;\n", FALSE, trx); - if (err != DB_SUCCESS) { - my_error(ER_INTERNAL_ERROR, MYF(0), - "InnoDB: Insert into SYS_COLUMNS failed"); - return(true); - } - - i++; - } - - if (innodb_update_n_cols(user_table, dict_table_encode_n_col( - unsigned(user_table->n_cols) - - DATA_N_SYS_COLS, - user_table->n_v_cols) - | (user_table->flags & DICT_TF_COMPACT) << 31, - trx)) { - return true; - } - - unsigned i = unsigned(user_table->n_cols) - DATA_N_SYS_COLS; - byte trx_id[DATA_TRX_ID_LEN], roll_ptr[DATA_ROLL_PTR_LEN]; - dfield_set_data(dtuple_get_nth_field(row, i++), field_ref_zero, - DATA_ROW_ID_LEN); - dfield_set_data(dtuple_get_nth_field(row, i++), trx_id, sizeof trx_id); - dfield_set_data(dtuple_get_nth_field(row, i),roll_ptr,sizeof roll_ptr); - DBUG_ASSERT(i + 1 == user_table->n_cols); - - trx_write_trx_id(trx_id, trx->id); - /* The DB_ROLL_PTR will be assigned later, when allocating undo log. - Silence a Valgrind warning in dtuple_validate() when - row_ins_clust_index_entry_low() searches for the insert position. */ - memset(roll_ptr, 0, sizeof roll_ptr); - - dtuple_t* entry = row_build_index_entry(row, NULL, index, ctx->heap); - entry->info_bits = REC_INFO_METADATA; - - mtr_t mtr; - mtr.start(); - index->set_modified(mtr); - btr_pcur_t pcur; - btr_pcur_open_at_index_side(true, index, BTR_MODIFY_TREE, &pcur, true, - 0, &mtr); - ut_ad(btr_pcur_is_before_first_on_page(&pcur)); - btr_pcur_move_to_next_on_page(&pcur); - - buf_block_t* block = btr_pcur_get_block(&pcur); - ut_ad(page_is_leaf(block->frame)); - ut_ad(!page_has_prev(block->frame)); - ut_ad(!buf_block_get_page_zip(block)); - const rec_t* rec = btr_pcur_get_rec(&pcur); - que_thr_t* thr = pars_complete_graph_for_exec( - NULL, trx, ctx->heap, NULL); - - dberr_t err; - if (rec_is_metadata(rec, index)) { - ut_ad(page_rec_is_user_rec(rec)); - if (!page_has_next(block->frame) - && page_rec_is_last(rec, block->frame)) { - goto empty_table; - } - /* Extend the record with the instantly added columns. */ - const unsigned n = user_table->n_cols - ctx->old_n_cols; - /* Reserve room for DB_TRX_ID,DB_ROLL_PTR and any - non-updated off-page columns in case they are moved off - page as a result of the update. */ - upd_t* update = upd_create(index->n_fields, ctx->heap); - update->n_fields = n; - update->info_bits = REC_INFO_METADATA; - /* Add the default values for instantly added columns */ - for (unsigned i = 0; i < n; i++) { - upd_field_t* uf = upd_get_nth_field(update, i); - unsigned f = index->n_fields - n + i; - uf->field_no = f; - uf->new_val = entry->fields[f]; - } - ulint* offsets = NULL; - mem_heap_t* offsets_heap = NULL; - big_rec_t* big_rec; - err = btr_cur_pessimistic_update( - BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, - btr_pcur_get_btr_cur(&pcur), - &offsets, &offsets_heap, ctx->heap, - &big_rec, update, UPD_NODE_NO_ORD_CHANGE, - thr, trx->id, &mtr); - if (big_rec) { - if (err == DB_SUCCESS) { - err = btr_store_big_rec_extern_fields( - &pcur, offsets, big_rec, &mtr, - BTR_STORE_UPDATE); - } - - dtuple_big_rec_free(big_rec); - } - if (offsets_heap) { - mem_heap_free(offsets_heap); - } - btr_pcur_close(&pcur); - goto func_exit; - } else if (page_rec_is_supremum(rec)) { -empty_table: - /* The table is empty. */ - ut_ad(page_is_root(block->frame)); - btr_page_empty(block, NULL, index, 0, &mtr); - index->remove_instant(); - err = DB_SUCCESS; - goto func_exit; - } - - /* Convert the table to the instant ADD COLUMN format. */ - ut_ad(user_table->is_instant()); - mtr.commit(); - mtr.start(); - index->set_modified(mtr); - if (page_t* root = btr_root_get(index, &mtr)) { - if (fil_page_get_type(root) != FIL_PAGE_INDEX) { - DBUG_ASSERT(!"wrong page type"); - goto err_exit; - } - - DBUG_ASSERT(!page_is_comp(root) || !page_get_instant(root)); - mlog_write_ulint(root + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_INSTANT, MLOG_2BYTES, - &mtr); - page_set_instant(root, index->n_core_fields, &mtr); - mtr.commit(); - mtr.start(); - index->set_modified(mtr); - err = row_ins_clust_index_entry_low( - BTR_NO_LOCKING_FLAG, BTR_MODIFY_TREE, index, - index->n_uniq, entry, 0, thr, false); - } else { -err_exit: - err = DB_CORRUPTION; - } - -func_exit: - mtr.commit(); - if (err != DB_SUCCESS) { - my_error_innodb(err, table->s->table_name.str, - user_table->flags); + my_error(ER_INTERNAL_ERROR, MYF(0), + "InnoDB: DELETE from SYS_COLUMNS/SYS_VIRTUAL failed"); return true; } @@ -4659,9 +5130,9 @@ innobase_drop_one_virtual_sys_virtual( static bool innobase_drop_virtual_try( - Alter_inplace_info* ha_alter_info, - const dict_table_t* user_table, - trx_t* trx) + const Alter_inplace_info* ha_alter_info, + const dict_table_t* user_table, + trx_t* trx) { ha_innobase_inplace_ctx* ctx; dberr_t err = DB_SUCCESS; @@ -4694,14 +5165,504 @@ innobase_drop_virtual_try( } } + return false; +} + +/** Serialise metadata of dropped or reordered columns. +@param[in,out] heap memory heap for allocation +@param[out] field data field with the metadata */ +inline +void dict_table_t::serialise_columns(mem_heap_t* heap, dfield_t* field) const +{ + DBUG_ASSERT(instant); + const dict_index_t& index = *UT_LIST_GET_FIRST(indexes); + unsigned n_fixed = index.first_user_field(); + unsigned num_non_pk_fields = index.n_fields - n_fixed; + + ulint len = 4 + num_non_pk_fields * 2; + + byte* data = static_cast<byte*>(mem_heap_alloc(heap, len)); + + dfield_set_data(field, data, len); + + mach_write_to_4(data, num_non_pk_fields); + + data += 4; + + for (ulint i = n_fixed; i < index.n_fields; i++) { + mach_write_to_2(data, instant->non_pk_col_map[i - n_fixed]); + data += 2; + } +} + +/** Construct the metadata record for instant ALTER TABLE. +@param[in] row dummy or default values for existing columns +@param[in,out] heap memory heap for allocations +@return metadata record */ +inline +dtuple_t* +dict_index_t::instant_metadata(const dtuple_t& row, mem_heap_t* heap) const +{ + ut_ad(is_primary()); + dtuple_t* entry; + + if (!table->instant) { + entry = row_build_index_entry(&row, NULL, this, heap); + entry->info_bits = REC_INFO_METADATA_ADD; + return entry; + } + + entry = dtuple_create(heap, n_fields + 1); + entry->n_fields_cmp = n_uniq; + entry->info_bits = REC_INFO_METADATA_ALTER; + + const dict_field_t* field = fields; + + for (uint i = 0; i <= n_fields; i++, field++) { + dfield_t* dfield = dtuple_get_nth_field(entry, i); + + if (i == first_user_field()) { + table->serialise_columns(heap, dfield); + dfield->type.metadata_blob_init(); + field--; + continue; + } + + ut_ad(!field->col->is_virtual()); + + if (field->col->is_dropped()) { + dict_col_copy_type(field->col, &dfield->type); + if (field->col->is_nullable()) { + dfield_set_null(dfield); + } else { + dfield_set_data(dfield, field_ref_zero, + field->fixed_len); + } + continue; + } + + const dfield_t* s = dtuple_get_nth_field(&row, field->col->ind); + ut_ad(dict_col_type_assert_equal(field->col, &s->type)); + *dfield = *s; + + if (dfield_is_null(dfield)) { + continue; + } + + if (dfield_is_ext(dfield)) { + ut_ad(i > first_user_field()); + ut_ad(!field->prefix_len); + ut_ad(dfield->len >= FIELD_REF_SIZE); + dfield_set_len(dfield, dfield->len - FIELD_REF_SIZE); + } + + if (!field->prefix_len) { + continue; + } + + ut_ad(field->col->ord_part); + ut_ad(i < n_uniq); + + ulint len = dtype_get_at_most_n_mbchars( + field->col->prtype, + field->col->mbminlen, field->col->mbmaxlen, + field->prefix_len, dfield->len, + static_cast<char*>(dfield_get_data(dfield))); + dfield_set_len(dfield, len); + } + + return entry; +} + +/** Assign a new id to invalidate old undo log records, so +that purge will be unable to refer to fields that used to be +instantly added to the end of the index. This is only to be +used during ALTER TABLE when the table is empty, before +invoking dict_index_t::clear_instant_alter(). +@param[in,out] trx dictionary transaction +@return error code */ +inline dberr_t dict_table_t::reassign_id(trx_t* trx) +{ + DBUG_ASSERT(instant); + ut_ad(magic_n == DICT_TABLE_MAGIC_N); + ut_ad(!is_temporary()); + + table_id_t new_id; + dict_hdr_get_new_id(&new_id, NULL, NULL); + pars_info_t* pinfo = pars_info_create(); + + pars_info_add_ull_literal(pinfo, "old", id); + pars_info_add_ull_literal(pinfo, "new", new_id); + + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X)); + ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); + + dberr_t err = que_eval_sql( + pinfo, + "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n" + "BEGIN\n" + "UPDATE SYS_TABLES SET ID=:new WHERE ID=:old;\n" + "UPDATE SYS_COLUMNS SET TABLE_ID=:new WHERE TABLE_ID=:old;\n" + "UPDATE SYS_INDEXES SET TABLE_ID=:new WHERE TABLE_ID=:old;\n" + "UPDATE SYS_VIRTUAL SET TABLE_ID=:new WHERE TABLE_ID=:old;\n" + "END;\n" + , FALSE, trx); + if (err == DB_SUCCESS) { + auto fold = ut_fold_ull(id); + HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, + fold, this); + id = new_id; + fold = ut_fold_ull(id); + HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, + fold, this); + } + + return err; +} + +/** Insert or update SYS_COLUMNS and the hidden metadata record +for instant ALTER TABLE. +@param[in] ha_alter_info ALTER TABLE context +@param[in,out] ctx ALTER TABLE context for the current partition +@param[in] altered_table MySQL table that is being altered +@param[in] table MySQL table as it is before the ALTER operation +@param[in,out] trx dictionary transaction +@retval true failure +@retval false success */ +static bool innobase_instant_try( + const Alter_inplace_info* ha_alter_info, + ha_innobase_inplace_ctx* ctx, + const TABLE* altered_table, + const TABLE* table, + trx_t* trx) +{ + DBUG_ASSERT(!ctx->need_rebuild()); + + if (!ctx->is_instant()) return false; + + dict_table_t* user_table = ctx->old_table; + + dict_index_t* index = dict_table_get_first_index(user_table); + mtr_t mtr; + mtr.start(); + /* Prevent purge from calling dict_index_t::clear_instant_alter(), + to protect index->n_core_fields, index->table->instant and others + from changing during ctx->instant_column(). */ + instant_metadata_lock(*index, mtr); + const unsigned n_old_fields = index->n_fields; + const dict_col_t* old_cols = user_table->cols; + DBUG_ASSERT(user_table->n_cols == ctx->old_n_cols); + + ctx->instant_column(); + + DBUG_ASSERT(index->n_fields >= n_old_fields); + /* Release the page latch. Between this and the next + btr_pcur_open_at_index_side(), data fields such as + index->n_core_fields and index->table->instant could change, + but we would handle that in empty_table: below. */ + mtr.commit(); + /* The table may have been emptied and may have lost its + 'instantness' during this ALTER TABLE. */ + + /* Construct a table row of default values for the stored columns. */ + dtuple_t* row = dtuple_create(ctx->heap, user_table->n_cols); + dict_table_copy_types(row, user_table); + Field** af = altered_table->field; + Field** const end = altered_table->field + altered_table->s->fields; + ut_d(List_iterator_fast<Create_field> cf_it( + ha_alter_info->alter_info->create_list)); + if (ctx->first_alter_pos + && innobase_instant_drop_cols(user_table->id, + ctx->first_alter_pos - 1, trx)) { + return true; + } + for (uint i = 0; af < end; af++) { + if (!(*af)->stored_in_db()) { + ut_d(cf_it++); + continue; + } + + const dict_col_t* old = dict_table_t::find(old_cols, + ctx->col_map, + ctx->old_n_cols, i); + DBUG_ASSERT(!old || i >= ctx->old_n_cols - DATA_N_SYS_COLS + || old->ind == i + || (ctx->first_alter_pos + && old->ind >= ctx->first_alter_pos - 1)); + + dfield_t* d = dtuple_get_nth_field(row, i); + const dict_col_t* col = dict_table_get_nth_col(user_table, i); + DBUG_ASSERT(!col->is_virtual()); + DBUG_ASSERT(!col->is_dropped()); + DBUG_ASSERT(col->mtype != DATA_SYS); + DBUG_ASSERT(!strcmp((*af)->field_name.str, + dict_table_get_col_name(user_table, i))); + DBUG_ASSERT(old || col->is_added()); + + if (col->is_added()) { + dfield_set_data(d, col->def_val.data, + col->def_val.len); + } else if ((*af)->real_maybe_null()) { + /* Store NULL for nullable 'core' columns. */ + dfield_set_null(d); + } else { + switch ((*af)->type()) { + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_LONG_BLOB: + /* Store the empty string for 'core' + variable-length NOT NULL columns. */ + dfield_set_data(d, field_ref_zero, 0); + break; + default: + /* For fixed-length NOT NULL 'core' columns, + get a dummy default value from SQL. Note that + we will preserve the old values of these + columns when updating the metadata + record, to avoid unnecessary updates. */ + ulint len = (*af)->pack_length(); + DBUG_ASSERT(d->type.mtype != DATA_INT + || len <= 8); + row_mysql_store_col_in_innobase_format( + d, d->type.mtype == DATA_INT + ? static_cast<byte*>( + mem_heap_alloc(ctx->heap, len)) + : NULL, true, (*af)->ptr, len, + dict_table_is_comp(user_table)); + } + } + + ut_d(const Create_field* new_field = cf_it++); + /* new_field->field would point to an existing column. + If it is NULL, the column was added by this ALTER TABLE. */ + ut_ad(!new_field->field == !old); + + if (old && (!ctx->first_alter_pos + || i < ctx->first_alter_pos - 1)) { + /* The record is already present in SYS_COLUMNS. */ + } else if (innobase_instant_add_col(user_table->id, i, + (*af)->field_name.str, + d->type, trx)) { + return true; + } + + i++; + } + + if (innodb_update_cols(user_table, dict_table_encode_n_col( + unsigned(user_table->n_cols) + - DATA_N_SYS_COLS, + user_table->n_v_cols) + | (user_table->flags & DICT_TF_COMPACT) << 31, + trx)) { + return true; + } + + if (ctx->first_alter_pos) { +add_all_virtual: + for (uint i = 0; i < user_table->n_v_cols; i++) { + if (innobase_add_one_virtual( + user_table, + dict_table_get_v_col_name(user_table, i), + &user_table->v_cols[i], trx)) { + return true; + } + } + } else if (ha_alter_info->handler_flags & ALTER_DROP_VIRTUAL_COLUMN) { + if (innobase_instant_drop_cols(user_table->id, 65536, trx)) { + return true; + } + goto add_all_virtual; + } else if ((ha_alter_info->handler_flags & ALTER_ADD_VIRTUAL_COLUMN) + && innobase_add_virtual_try(ha_alter_info, user_table, + trx)) { + return true; + } + + unsigned i = unsigned(user_table->n_cols) - DATA_N_SYS_COLS; + DBUG_ASSERT(i >= altered_table->s->stored_fields); + DBUG_ASSERT(i <= altered_table->s->stored_fields + 1); + if (i > altered_table->s->fields) { + const dict_col_t& fts_doc_id = user_table->cols[i - 1]; + DBUG_ASSERT(!strcmp(fts_doc_id.name(*user_table), + FTS_DOC_ID_COL_NAME)); + DBUG_ASSERT(!fts_doc_id.is_nullable()); + DBUG_ASSERT(fts_doc_id.len == 8); + dfield_set_data(dtuple_get_nth_field(row, i - 1), + field_ref_zero, fts_doc_id.len); + } + byte trx_id[DATA_TRX_ID_LEN], roll_ptr[DATA_ROLL_PTR_LEN]; + dfield_set_data(dtuple_get_nth_field(row, i++), field_ref_zero, + DATA_ROW_ID_LEN); + dfield_set_data(dtuple_get_nth_field(row, i++), trx_id, sizeof trx_id); + dfield_set_data(dtuple_get_nth_field(row, i),roll_ptr,sizeof roll_ptr); + DBUG_ASSERT(i + 1 == user_table->n_cols); + + trx_write_trx_id(trx_id, trx->id); + /* The DB_ROLL_PTR will be assigned later, when allocating undo log. + Silence a Valgrind warning in dtuple_validate() when + row_ins_clust_index_entry_low() searches for the insert position. */ + memset(roll_ptr, 0, sizeof roll_ptr); + + dtuple_t* entry = index->instant_metadata(*row, ctx->heap); + mtr.start(); + index->set_modified(mtr); + btr_pcur_t pcur; + btr_pcur_open_at_index_side(true, index, BTR_MODIFY_TREE, &pcur, true, + 0, &mtr); + ut_ad(btr_pcur_is_before_first_on_page(&pcur)); + btr_pcur_move_to_next_on_page(&pcur); + + buf_block_t* block = btr_pcur_get_block(&pcur); + ut_ad(page_is_leaf(block->frame)); + ut_ad(!page_has_prev(block->frame)); + ut_ad(!buf_block_get_page_zip(block)); + const rec_t* rec = btr_pcur_get_rec(&pcur); + que_thr_t* thr = pars_complete_graph_for_exec( + NULL, trx, ctx->heap, NULL); + + dberr_t err; + if (rec_is_metadata(rec, *index)) { + ut_ad(page_rec_is_user_rec(rec)); + if (!page_has_next(block->frame) + && page_rec_is_last(rec, block->frame)) { + goto empty_table; + } + + /* Ensure that the root page is in the correct format. */ + buf_block_t* root = btr_root_block_get(index, RW_X_LATCH, + &mtr); + DBUG_ASSERT(root); + DBUG_ASSERT(!root->page.encrypted); + if (fil_page_get_type(root->frame) != FIL_PAGE_TYPE_INSTANT) { + DBUG_ASSERT(!"wrong page type"); + err = DB_CORRUPTION; + goto func_exit; + } + + btr_set_instant(root, *index, &mtr); + + /* Extend the record with any added columns. */ + uint n = uint(index->n_fields) - n_old_fields; + /* Reserve room for DB_TRX_ID,DB_ROLL_PTR and any + non-updated off-page columns in case they are moved off + page as a result of the update. */ + const unsigned f = user_table->instant != NULL; + upd_t* update = upd_create(index->n_fields + f, ctx->heap); + update->n_fields = n + f; + update->info_bits = f + ? REC_INFO_METADATA_ALTER + : REC_INFO_METADATA_ADD; + if (f) { + upd_field_t* uf = upd_get_nth_field(update, 0); + uf->field_no = index->first_user_field(); + uf->new_val = entry->fields[uf->field_no]; + DBUG_ASSERT(!dfield_is_ext(&uf->new_val)); + DBUG_ASSERT(!dfield_is_null(&uf->new_val)); + } + + /* Add the default values for instantly added columns */ + unsigned j = f; + + for (unsigned k = n_old_fields; k < index->n_fields; k++) { + upd_field_t* uf = upd_get_nth_field(update, j++); + uf->field_no = k + f; + uf->new_val = entry->fields[k + f]; + + ut_ad(j <= n + f); + } + + ut_ad(j == n + f); + + ulint* offsets = NULL; + mem_heap_t* offsets_heap = NULL; + big_rec_t* big_rec; + err = btr_cur_pessimistic_update( + BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, + btr_pcur_get_btr_cur(&pcur), + &offsets, &offsets_heap, ctx->heap, + &big_rec, update, UPD_NODE_NO_ORD_CHANGE, + thr, trx->id, &mtr); + + offsets = rec_get_offsets( + btr_pcur_get_rec(&pcur), index, offsets, + true, ULINT_UNDEFINED, &offsets_heap); + if (big_rec) { + if (err == DB_SUCCESS) { + err = btr_store_big_rec_extern_fields( + &pcur, offsets, big_rec, &mtr, + BTR_STORE_UPDATE); + } + + dtuple_big_rec_free(big_rec); + } + if (offsets_heap) { + mem_heap_free(offsets_heap); + } + btr_pcur_close(&pcur); + goto func_exit; + } else if (page_rec_is_supremum(rec)) { +empty_table: + /* The table is empty. */ + ut_ad(page_is_root(block->frame)); + if (index->table->instant) { + /* Assign a new dict_table_t::id + to invalidate old undo log records in purge, + so that they cannot refer to fields that were + instantly added to the end of the index, + instead of using the canonical positions + that will be replaced below + by index->clear_instant_alter(). */ + err = index->table->reassign_id(trx); + if (err != DB_SUCCESS) { + goto func_exit; + } + } + /* MDEV-17383: free metadata BLOBs! */ + btr_page_empty(block, NULL, index, 0, &mtr); + index->clear_instant_alter(); + err = DB_SUCCESS; + goto func_exit; + } + + /* Convert the table to the instant ALTER TABLE format. */ + ut_ad(user_table->is_instant()); + mtr.commit(); + mtr.start(); + index->set_modified(mtr); + if (buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, &mtr)) { + if (root->page.encrypted + || fil_page_get_type(root->frame) != FIL_PAGE_INDEX) { + DBUG_ASSERT(!"wrong page type"); + goto err_exit; + } + + btr_set_instant(root, *index, &mtr); + mtr.commit(); + mtr.start(); + index->set_modified(mtr); + err = row_ins_clust_index_entry_low( + BTR_NO_LOCKING_FLAG, BTR_MODIFY_TREE, index, + index->n_uniq, entry, 0, thr, false); + } else { +err_exit: + err = DB_CORRUPTION; + } + +func_exit: + mtr.commit(); - ulint n_col = unsigned(user_table->n_cols) - DATA_N_SYS_COLS; - ulint n_v_col = unsigned(user_table->n_v_cols) - - ctx->num_to_drop_vcol; - ulint new_n = dict_table_encode_n_col(n_col, n_v_col) - | ((user_table->flags & DICT_TF_COMPACT) << 31); + if (err != DB_SUCCESS) { + my_error_innodb(err, table->s->table_name.str, + user_table->flags); + return true; + } - return innodb_update_n_cols(user_table, new_n, trx); + return false; } /** Adjust the create index column number from "New table" to @@ -5296,20 +6257,12 @@ new_clustered_failed: == !!new_clustered); } - if (ctx->need_rebuild() && user_table->supports_instant()) { - if (!instant_alter_column_possible(ha_alter_info, old_table)) { - goto not_instant_add_column; - } - - for (uint i = uint(ctx->old_table->n_cols) - DATA_N_SYS_COLS; - i--; ) { - if (ctx->col_map[i] != i) { - goto not_instant_add_column; - } - } - - DBUG_ASSERT(ctx->new_table->n_cols > ctx->old_table->n_cols); - + if (ctx->need_rebuild() && instant_alter_column_possible( + *user_table, ha_alter_info, old_table) +#if 1 // MDEV-17459: adjust fts_fetch_doc_from_rec() and friends; remove this + && !innobase_fulltext_exist(altered_table) +#endif + ) { for (uint a = 0; a < ctx->num_to_add_index; a++) { ctx->add_index[a]->table = ctx->new_table; ctx->add_index[a] = dict_index_add_to_cache( @@ -5317,6 +6270,7 @@ new_clustered_failed: &error, add_v); ut_a(error == DB_SUCCESS); } + DBUG_ASSERT(ha_alter_info->key_count /* hidden GEN_CLUST_INDEX in InnoDB */ + dict_index_is_auto_gen_clust( @@ -5328,6 +6282,7 @@ new_clustered_failed: altered_table->key_info) != FTS_EXIST_DOC_ID_INDEX) == ctx->num_to_add_index); + ctx->num_to_add_index = 0; ctx->add_index = NULL; @@ -5358,26 +6313,11 @@ new_clustered_failed: DBUG_ASSERT(!strcmp((*af)->field_name.str, dict_table_get_col_name(ctx->new_table, i))); - DBUG_ASSERT(!col->is_instant()); + DBUG_ASSERT(!col->is_added()); if (new_field->field) { - ut_d(const dict_col_t* old_col - = dict_table_get_nth_col(user_table, i)); - ut_d(const dict_index_t* index - = user_table->indexes.start); - DBUG_SLOW_ASSERT(col->mtype == old_col->mtype); - ut_ad(col->prtype == old_col->prtype - || col->prtype - == (old_col->prtype & ~DATA_VERSIONED)); - DBUG_SLOW_ASSERT(col->mbminlen - == old_col->mbminlen); - DBUG_SLOW_ASSERT(col->mbmaxlen - == old_col->mbmaxlen); - DBUG_SLOW_ASSERT(col->len >= old_col->len); - DBUG_SLOW_ASSERT(old_col->is_instant() - == (dict_col_get_clust_pos( - old_col, index) - >= index->n_core_fields)); + /* This is a pre-existing column, + possibly at a different position. */ } else if ((*af)->is_real_null()) { /* DEFAULT NULL */ col->def_val.len = UNIV_SQL_NULL; @@ -5449,7 +6389,6 @@ new_clustered_failed: } if (ctx->need_rebuild()) { -not_instant_add_column: DBUG_ASSERT(ctx->need_rebuild()); DBUG_ASSERT(!ctx->is_instant()); DBUG_ASSERT(num_fts_index <= 1); @@ -7711,10 +8650,11 @@ err_exit: index = dict_table_get_next_index(index)) { for (ulint i = 0; i < dict_index_get_n_fields(index); i++) { - if (my_strcasecmp( - system_charset_info, - dict_index_get_nth_field(index, i)->name, - from)) { + const dict_field_t& f = index->fields[i]; + DBUG_ASSERT(!f.name == f.col->is_dropped()); + + if (!f.name || my_strcasecmp(system_charset_info, + f.name, from)) { continue; } @@ -9045,23 +9985,36 @@ commit_try_norebuild( } #endif /* MYSQL_RENAME_INDEX */ - if ((ha_alter_info->handler_flags - & ALTER_DROP_VIRTUAL_COLUMN) - && innobase_drop_virtual_try(ha_alter_info, ctx->old_table, trx)) { - DBUG_RETURN(true); - } + if (!ctx->is_instant() && ha_alter_info->handler_flags + & (ALTER_DROP_VIRTUAL_COLUMN | ALTER_ADD_VIRTUAL_COLUMN)) { + if ((ha_alter_info->handler_flags & ALTER_DROP_VIRTUAL_COLUMN) + && innobase_drop_virtual_try(ha_alter_info, ctx->old_table, + trx)) { + DBUG_RETURN(true); + } - if ((ha_alter_info->handler_flags - & ALTER_ADD_VIRTUAL_COLUMN) - && innobase_add_virtual_try(ha_alter_info, ctx->old_table, trx)) { - DBUG_RETURN(true); - } + if ((ha_alter_info->handler_flags & ALTER_ADD_VIRTUAL_COLUMN) + && innobase_add_virtual_try(ha_alter_info, ctx->old_table, + trx)) { + DBUG_RETURN(true); + } - if (innobase_add_instant_try(ctx, altered_table, old_table, trx)) { - DBUG_RETURN(true); + ulint n_col = unsigned(ctx->old_table->n_cols) + - DATA_N_SYS_COLS; + ulint n_v_col = unsigned(ctx->old_table->n_v_cols) + + ctx->num_to_add_vcol - ctx->num_to_drop_vcol; + + if (innodb_update_cols( + ctx->old_table, + dict_table_encode_n_col(n_col, n_v_col) + | unsigned(ctx->old_table->flags & DICT_TF_COMPACT) + << 31, trx)) { + DBUG_RETURN(true); + } } - DBUG_RETURN(false); + DBUG_RETURN(innobase_instant_try(ha_alter_info, ctx, altered_table, + old_table, trx)); } /** Commit the changes to the data dictionary cache @@ -9222,6 +10175,42 @@ commit_cache_norebuild( if (!ctx->is_instant()) { innobase_rename_or_enlarge_columns_cache( ha_alter_info, table, ctx->new_table); + } else { + ut_ad(ctx->col_map); + + if (fts_t* fts = ctx->new_table->fts) { + ut_ad(fts->doc_col != ULINT_UNDEFINED); + ut_ad(ctx->new_table->n_cols > DATA_N_SYS_COLS); + const ulint c = ctx->col_map[fts->doc_col]; + ut_ad(c < ulint(ctx->new_table->n_cols) + - DATA_N_SYS_COLS); + ut_d(const dict_col_t& col = ctx->new_table->cols[c]); + ut_ad(!col.is_nullable()); + ut_ad(!col.is_virtual()); + ut_ad(!col.is_added()); + ut_ad(col.prtype & DATA_UNSIGNED); + ut_ad(col.mtype == DATA_INT); + ut_ad(col.len == 8); + ut_ad(col.ord_part); + fts->doc_col = c; + } + + if (ha_alter_info->handler_flags & ALTER_DROP_STORED_COLUMN) { + dict_index_t* index = dict_table_get_first_index( + ctx->new_table); + for (const dict_field_t* f = index->fields, + * const end = f + index->n_fields; + f != end; f++) { + dict_col_t& c = *f->col; + if (c.is_dropped()) { + c.set_dropped(!c.is_nullable(), + DATA_LARGE_MTYPE(c.mtype) + || (!f->fixed_len + && c.len > 255), + f->fixed_len); + } + } + } } if (ha_alter_info->handler_flags & ALTER_COLUMN_UNVERSIONED) { @@ -9927,6 +10916,9 @@ foreign_fail: } } + /* MDEV-17468: Avoid this at least when ctx->is_instant(). + Currently dict_load_column_low() is the only place where + num_base for virtual columns is assigned to nonzero. */ if (ctx0->num_to_drop_vcol || ctx0->num_to_add_vcol) { DBUG_ASSERT(ctx0->old_table->get_ref_count() == 1); @@ -9944,6 +10936,12 @@ foreign_fail: tb_name[strlen(m_prebuilt->table->name.m_name)] = 0; dict_table_close(m_prebuilt->table, true, false); + if (ctx0->is_instant()) { + for (unsigned i = ctx0->old_n_v_cols; i--; ) { + UT_DELETE(ctx0->old_v_cols[i].v_indexes); + } + const_cast<unsigned&>(ctx0->old_n_v_cols) = 0; + } dict_table_remove_from_cache(m_prebuilt->table); m_prebuilt->table = dict_table_open_on_name( tb_name, TRUE, TRUE, DICT_ERR_IGNORE_NONE); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 9ed1efd35ff..f005102514a 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -911,10 +911,7 @@ ibuf_set_free_bits_func( ut_ad(0); break; case FIL_TYPE_TABLESPACE: - /* Avoid logging while fixing up truncate of table. */ - if (!srv_is_tablespace_truncated(block->page.id.space())) { - break; - } + break; /* fall through */ case FIL_TYPE_TEMPORARY: case FIL_TYPE_IMPORT: diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index b99e7e4c522..99d3b16f150 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -358,19 +358,16 @@ btr_node_ptr_get_child_page_no( @param[in] type type of the index @param[in,out] space tablespace where created @param[in] index_id index id -@param[in] index index, or NULL when applying TRUNCATE -log record during recovery -@param[in] btr_redo_create_info used for applying TRUNCATE log -@param[in] mtr mini-transaction handle -record during recovery -@return page number of the created root, FIL_NULL if did not succeed */ +@param[in] index index +@param[in,out] mtr mini-transaction +@return page number of the created root +@retval FIL_NULL if did not succeed */ ulint btr_create( ulint type, fil_space_t* space, index_id_t index_id, dict_index_t* index, - const btr_create_t* btr_redo_create_info, mtr_t* mtr); /** Free a persistent index tree if it exists. @@ -421,6 +418,12 @@ void btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false) MY_ATTRIBUTE((nonnull)); +/** Write instant ALTER TABLE metadata to a root page. +@param[in,out] root clustered index root page +@param[in] index clustered index with instant ALTER TABLE +@param[in,out] mtr mini-transaction */ +void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr); + /*************************************************************//** Makes tree one level higher by splitting the root, and inserts the tuple. It is assumed that mtr contains an x-latch on the tree. diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h index f8685d34764..fa59275dbff 100644 --- a/storage/innobase/include/btr0types.h +++ b/storage/innobase/include/btr0types.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,35 +56,4 @@ in the index record. */ #define BTR_EXTERN_LOCAL_STORED_MAX_SIZE \ (BTR_EXTERN_FIELD_REF_SIZE * 2) -/** The information is used for creating a new index tree when -applying TRUNCATE log record during recovery */ -struct btr_create_t { - - explicit btr_create_t(const byte* const ptr) - : - format_flags(), - n_fields(), - field_len(), - fields(ptr), - trx_id_pos(ULINT_UNDEFINED) - { - /* Do nothing */ - } - - /** Page format */ - ulint format_flags; - - /** Numbr of index fields */ - ulint n_fields; - - /** The length of the encoded meta-data */ - ulint field_len; - - /** Field meta-data, encoded. */ - const byte* const fields; - - /** Position of trx-id column. */ - ulint trx_id_pos; -}; - #endif diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h index 15c7ac9866a..8505505270b 100644 --- a/storage/innobase/include/data0data.h +++ b/storage/innobase/include/data0data.h @@ -643,6 +643,33 @@ struct dtuple_t { inserted or updated. @param[in] index index possibly with instantly added columns */ void trim(const dict_index_t& index); + + /** + @param info_bits the info_bits of a data tuple + @return whether this is a hidden metadata record + for instant ADD COLUMN or ALTER TABLE */ + static bool is_alter_metadata(ulint info_bits) + { + return UNIV_UNLIKELY(info_bits == REC_INFO_METADATA_ALTER); + } + + /** + @param info_bits the info_bits of a data tuple + @return whether this is a hidden metadata record + for instant ADD COLUMN or ALTER TABLE */ + static bool is_metadata(ulint info_bits) + { + return UNIV_UNLIKELY((info_bits & ~REC_INFO_DELETED_FLAG) + == REC_INFO_METADATA_ADD); + } + + /** @return whether this is a hidden metadata record + for instant ALTER TABLE (not only ADD COLUMN) */ + bool is_alter_metadata() const { return is_alter_metadata(info_bits); } + + /** @return whether this is a hidden metadata record + for instant ADD COLUMN or ALTER TABLE */ + bool is_metadata() const { return is_metadata(info_bits); } }; /** A slot for a field in a big rec vector */ diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h index b999106fee0..3b3ac9d1885 100644 --- a/storage/innobase/include/data0type.h +++ b/storage/innobase/include/data0type.h @@ -554,11 +554,55 @@ struct dtype_t{ { return (prtype & DATA_VERSIONED) == DATA_VERS_END; } + + /** Set the type of the BLOB in the hidden metadata record. */ + void metadata_blob_init() + { + prtype = DATA_NOT_NULL; + mtype = DATA_BLOB; + len = 0; + mbminlen = 0; + mbmaxlen = 0; + } }; /** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */ extern const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]; +/** Info bit denoting the predefined minimum record: this bit is set +if and only if the record is the first user record on a non-leaf +B-tree page that is the leftmost page on its level +(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */ +#define REC_INFO_MIN_REC_FLAG 0x10UL +/** The delete-mark flag in info bits */ +#define REC_INFO_DELETED_FLAG 0x20UL + +/** Record status values for ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED */ +enum rec_comp_status_t { + /** User record (PAGE_LEVEL=0, heap>=PAGE_HEAP_NO_USER_LOW) */ + REC_STATUS_ORDINARY = 0, + /** Node pointer record (PAGE_LEVEL>=0, heap>=PAGE_HEAP_NO_USER_LOW) */ + REC_STATUS_NODE_PTR = 1, + /** The page infimum pseudo-record (heap=PAGE_HEAP_NO_INFIMUM) */ + REC_STATUS_INFIMUM = 2, + /** The page supremum pseudo-record (heap=PAGE_HEAP_NO_SUPREMUM) */ + REC_STATUS_SUPREMUM = 3, + /** Clustered index record that has been inserted or updated + after instant ADD COLUMN (more than dict_index_t::n_core_fields) */ + REC_STATUS_INSTANT = 4 +}; + +/** The dtuple_t::info_bits of the hidden metadata of instant ADD COLUMN. +@see rec_is_metadata() +@see rec_is_alter_metadata() */ +static const byte REC_INFO_METADATA_ADD + = REC_INFO_MIN_REC_FLAG | REC_STATUS_INSTANT; + +/** The dtuple_t::info_bits of the hidden metadata of instant ALTER TABLE. +@see rec_is_metadata() */ +static const byte REC_INFO_METADATA_ALTER + = REC_INFO_METADATA_ADD | REC_INFO_DELETED_FLAG; + #include "data0type.ic" #endif diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h index ec8e29d458c..2110018b6d4 100644 --- a/storage/innobase/include/db0err.h +++ b/storage/innobase/include/db0err.h @@ -135,8 +135,6 @@ enum dberr_t { DB_FTS_TOO_MANY_WORDS_IN_PHRASE, /*< Too many words in a phrase */ - DB_TABLESPACE_TRUNCATED, /*!< tablespace was truncated */ - DB_DECRYPTION_FAILED, /* Tablespace encrypted and decrypt operation failed because of missing key management plugin, diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h index d683afcdc7e..d9d4b3d69d5 100644 --- a/storage/innobase/include/dict0boot.h +++ b/storage/innobase/include/dict0boot.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -50,12 +51,8 @@ dict_hdr_get_new_id( (not assigned if NULL) */ index_id_t* index_id, /*!< out: index id (not assigned if NULL) */ - ulint* space_id, /*!< out: space id + ulint* space_id); /*!< out: space id (not assigned if NULL) */ - const dict_table_t* table, /*!< in: table */ - bool disable_redo); /*!< in: if true and table - object is NULL - then disable-redo */ /**********************************************************************//** Writes the current value of the row id counter to the dictionary header file page. */ @@ -124,13 +121,6 @@ dict_is_sys_table( /* The following is a secondary index on SYS_TABLES */ #define DICT_TABLE_IDS_ID 5 -#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start - from this number, except for basic - system tables and their above defined - indexes; ibuf tables and indexes are - assigned as the id the number - DICT_IBUF_ID_MIN plus the space id */ - /* The offset of the dictionary header on the page */ #define DICT_HDR FSEG_PAGE_DATA diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h index 75ef4f09dbb..8c941d494c1 100644 --- a/storage/innobase/include/dict0crea.h +++ b/storage/innobase/include/dict0crea.h @@ -67,14 +67,6 @@ dict_create_table_step( /*===================*/ que_thr_t* thr); /*!< in: query thread */ -/** Assign a new table ID and put it into the table cache and the transaction. -@param[in,out] table Table that needs an ID -@param[in,out] trx Transaction */ -void -dict_table_assign_new_id( - dict_table_t* table, - trx_t* trx); - /***********************************************************//** Creates an index. This is a high-level function used in SQL execution graphs. diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index bf9fcd70f18..3530f6bd4d2 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -31,13 +31,15 @@ Created 1/8/1996 Heikki Tuuri #include "data0data.h" #include "dict0mem.h" #include "fsp0fsp.h" +#include <atomic> #include <deque> -#include "dict0pagecompress.h" extern bool innodb_table_stats_not_found; extern bool innodb_index_stats_not_found; -#include "sync0rw.h" +/** the first table or index ID for other than hard-coded system tables */ +#define DICT_HDR_FIRST_ID 10 + /********************************************************************//** Get the database name length in a table name. @return database name length */ @@ -360,21 +362,12 @@ dict_table_add_system_columns( dict_table_t* table, /*!< in/out: table */ mem_heap_t* heap) /*!< in: temporary heap */ MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -void -dict_table_remove_from_cache( -/*=========================*/ - dict_table_t* table) /*!< in, own: table */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -void -dict_table_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in, own: table */ - ibool lru_evict) /*!< in: TRUE if table being evicted - to make room in the table LRU list */ +/** Evict a table definition from the InnoDB data dictionary cache. +@param[in,out] table cached table definition to be evicted +@param[in] lru whether this is part of least-recently-used evictiono +@param[in] keep whether to keep (not free) the object */ +void dict_table_remove_from_cache(dict_table_t* table, bool lru = false, + bool keep = false) MY_ATTRIBUTE((nonnull)); /**********************************************************************//** Renames a table object. @@ -684,65 +677,14 @@ do { \ dict_table_skip_corrupt_index(index); \ } while (0) -/********************************************************************//** -Check whether the index is the clustered index. -@return nonzero for clustered index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_clust( -/*================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); - -/** Check if index is auto-generated clustered index. -@param[in] index index - -@return true if index is auto-generated clustered index. */ -UNIV_INLINE -bool -dict_index_is_auto_gen_clust( - const dict_index_t* index); - -/********************************************************************//** -Check whether the index is unique. -@return nonzero for unique index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_unique( -/*=================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Check whether the index is a Spatial Index. -@return nonzero for Spatial Index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_spatial( -/*==================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); - +#define dict_index_is_clust(index) (index)->is_clust() +#define dict_index_is_auto_gen_clust(index) (index)->is_gen_clust() +#define dict_index_is_unique(index) (index)->is_unique() +#define dict_index_is_spatial(index) (index)->is_spatial() +#define dict_index_is_ibuf(index) (index)->is_ibuf() +#define dict_index_is_sec_or_ibuf(index) !(index)->is_primary() #define dict_index_has_virtual(index) (index)->has_virtual() -/********************************************************************//** -Check whether the index is the insert buffer tree. -@return nonzero for insert buffer, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_ibuf( -/*===============*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Check whether the index is a secondary index or the insert buffer tree. -@return nonzero for insert buffer, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_sec_or_ibuf( -/*======================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); - /** Get all the FTS indexes on a table. @param[in] table table @param[out] indexes all FTS indexes on this table @@ -899,15 +841,8 @@ dict_index_get_min_size( /*====================*/ const dict_index_t* index) /*!< in: index */ MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Check whether the table uses the compact page format. -@return TRUE if table uses the compact page format */ -UNIV_INLINE -bool -dict_table_is_comp( -/*===============*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); + +#define dict_table_is_comp(table) (table)->not_redundant() /** Determine if a table uses atomic BLOBs (no locally stored prefix). @param[in] table InnoDB table @@ -1260,16 +1195,6 @@ dict_table_get_nth_col_pos( ulint n, /*!< in: column number */ ulint* prefix_col_pos) /*!< out: col num if prefix */ MY_ATTRIBUTE((nonnull(1), warn_unused_result)); -/********************************************************************//** -Returns the position of a system column in an index. -@return position, ULINT_UNDEFINED if not contained */ -UNIV_INLINE -ulint -dict_index_get_sys_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint type) /*!< in: DATA_ROW_ID, ... */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); /*******************************************************************//** Adds a column to index. */ void @@ -1367,21 +1292,6 @@ dict_index_build_node_ptr( ulint level) /*!< in: level of rec in tree: 0 means leaf level */ MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Copies an initial segment of a physical record, long enough to specify an -index entry uniquely. -@return pointer to the prefix record */ -rec_t* -dict_index_copy_rec_order_prefix( -/*=============================*/ - const dict_index_t* index, /*!< in: index */ - const rec_t* rec, /*!< in: record for which to - copy prefix */ - ulint* n_fields,/*!< out: number of fields copied */ - byte** buf, /*!< in/out: memory buffer for the - copied prefix, or NULL */ - ulint* buf_size)/*!< in/out: buffer size */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Convert a physical record into a search tuple. @param[in] rec index record (not necessarily in an index page) @param[in] index index @@ -1637,8 +1547,10 @@ struct dict_sys_t{ the log records */ hash_table_t* table_hash; /*!< hash table of the tables, based on name */ - hash_table_t* table_id_hash; /*!< hash table of the tables, based - on id */ + /** hash table of persistent table IDs */ + hash_table_t* table_id_hash; + /** hash table of temporary table IDs */ + hash_table_t* temp_id_hash; dict_table_t* sys_tables; /*!< SYS_TABLES table */ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ @@ -1652,6 +1564,52 @@ struct dict_sys_t{ UT_LIST_BASE_NODE_T(dict_table_t) table_non_LRU; /*!< List of tables that can't be evicted from the cache */ + + /** @return a new temporary table ID */ + table_id_t get_temporary_table_id() { + return temp_table_id.fetch_add(1, std::memory_order_relaxed); + } + + /** Look up a temporary table. + @param id temporary table ID + @return temporary table + @retval NULL if the table does not exist + (should only happen during the rollback of CREATE...SELECT) */ + dict_table_t* get_temporary_table(table_id_t id) + { + ut_ad(mutex_own(&mutex)); + dict_table_t* table; + ulint fold = ut_fold_ull(id); + HASH_SEARCH(id_hash, temp_id_hash, fold, dict_table_t*, table, + ut_ad(table->cached), table->id == id); + if (UNIV_LIKELY(table != NULL)) { + DBUG_ASSERT(table->is_temporary()); + DBUG_ASSERT(table->id >= DICT_HDR_FIRST_ID); + table->acquire(); + } + return table; + } + + /** Look up a persistent table. + @param id table ID + @return table + @retval NULL if not cached */ + dict_table_t* get_table(table_id_t id) + { + ut_ad(mutex_own(&mutex)); + dict_table_t* table; + ulint fold = ut_fold_ull(id); + HASH_SEARCH(id_hash, table_id_hash, fold, dict_table_t*, table, + ut_ad(table->cached), table->id == id); + DBUG_ASSERT(!table || !table->is_temporary()); + return table; + } + + dict_sys_t() : temp_table_id(DICT_HDR_FIRST_ID) {} + +private: + /** the sequence of temporary table IDs */ + std::atomic<table_id_t> temp_table_id; }; /** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */ diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 03b842e041d..2ef4a28dae2 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -25,6 +25,7 @@ Created 1/8/1996 Heikki Tuuri ***********************************************************************/ #include "fsp0sysspace.h" +#include "dict0pagecompress.h" /*********************************************************************//** Gets the minimum number of bytes per character. @@ -255,89 +256,6 @@ dict_table_get_next_index( #endif /* UNIV_DEBUG */ /********************************************************************//** -Check whether the index is the clustered index. -@return nonzero for clustered index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_clust( -/*================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - return(index->type & DICT_CLUSTERED); -} - -/** Check if index is auto-generated clustered index. -@param[in] index index - -@return true if index is auto-generated clustered index. */ -UNIV_INLINE -bool -dict_index_is_auto_gen_clust( - const dict_index_t* index) -{ - return(index->type == DICT_CLUSTERED); -} - -/********************************************************************//** -Check whether the index is unique. -@return nonzero for unique index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_unique( -/*=================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->type & DICT_UNIQUE); -} - -/********************************************************************//** -Check whether the index is a Spatial Index. -@return nonzero for Spatial Index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_spatial( -/*==================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return ulint(UNIV_EXPECT(index->type & DICT_SPATIAL, 0)); -} - -/********************************************************************//** -Check whether the index is the insert buffer tree. -@return nonzero for insert buffer, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_ibuf( -/*===============*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->type & DICT_IBUF); -} - -/********************************************************************//** -Check whether the index is a secondary index or the insert buffer tree. -@return nonzero for insert buffer, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_sec_or_ibuf( -/*======================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return((index->type & (DICT_CLUSTERED | DICT_IBUF)) != DICT_CLUSTERED); -} - -/********************************************************************//** Gets the number of user-defined non-virtual columns in a table in the dictionary cache. @return number of user-defined (e.g., not ROW_ID) non-virtual @@ -484,7 +402,8 @@ dict_table_get_nth_v_col( ut_ad(table); ut_ad(pos < table->n_v_def); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(!table->v_cols[pos].m_col.is_instant()); + ut_ad(!table->v_cols[pos].m_col.is_added()); + ut_ad(!table->v_cols[pos].m_col.is_dropped()); return &table->v_cols[pos]; } @@ -525,19 +444,6 @@ dict_table_get_sys_col_no( return unsigned(table->n_cols) + (sys - DATA_N_SYS_COLS); } -/********************************************************************//** -Check whether the table uses the compact page format. -@return TRUE if table uses the compact page format */ -UNIV_INLINE -bool -dict_table_is_comp( -/*===============*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table); - return (table->flags & DICT_TF_COMPACT) != 0; -} - /************************************************************************ Check if the table has an FTS index. */ UNIV_INLINE @@ -1014,31 +920,6 @@ dict_index_get_nth_field( } #endif /* UNIV_DEBUG */ -/********************************************************************//** -Returns the position of a system column in an index. -@return position, ULINT_UNDEFINED if not contained */ -UNIV_INLINE -ulint -dict_index_get_sys_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint type) /*!< in: DATA_ROW_ID, ... */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(!dict_index_is_ibuf(index)); - - if (dict_index_is_clust(index)) { - - return(dict_col_get_clust_pos( - dict_table_get_sys_col(index->table, type), - index)); - } - - return(dict_index_get_nth_col_pos( - index, dict_table_get_sys_col_no(index->table, type), NULL)); -} - /*********************************************************************//** Gets the field column. @return field->col, pointer to the table column */ diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 63cf5c8bbec..65ee84832d4 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -47,6 +47,7 @@ Created 1/8/1996 Heikki Tuuri #include "os0once.h" #include "fil0fil.h" #include "fil0crypt.h" +#include <sql_const.h> #include <set> #include <algorithm> #include <iterator> @@ -584,6 +585,10 @@ struct dict_col_t{ this column. Our current max limit is 3072 (REC_VERSION_56_MAX_INDEX_COL_LEN) bytes. */ +private: + /** Special value of ind for a dropped column */ + static const unsigned DROPPED = 1023; +public: /** Detach the column from an index. @param[in] index index to be detached from */ @@ -627,26 +632,55 @@ struct dict_col_t{ } /** @return whether this is an instantly-added column */ - bool is_instant() const + bool is_added() const { DBUG_ASSERT(def_val.len != UNIV_SQL_DEFAULT || !def_val.data); return def_val.len != UNIV_SQL_DEFAULT; } + /** Flag the column instantly dropped */ + void set_dropped() { ind = DROPPED; } + /** Flag the column instantly dropped. + @param[in] not_null whether the column was NOT NULL + @param[in] len2 whether the length exceeds 255 bytes + @param[in] fixed_len the fixed length in bytes, or 0 */ + void set_dropped(bool not_null, bool len2, unsigned fixed) + { + DBUG_ASSERT(!len2 || !fixed); + prtype = not_null + ? DATA_NOT_NULL | DATA_BINARY_TYPE + : DATA_BINARY_TYPE; + if (fixed) { + mtype = DATA_FIXBINARY; + len = fixed; + } else { + mtype = DATA_BINARY; + len = len2 ? 65535 : 255; + } + mbminlen = mbmaxlen = 0; + ind = DROPPED; + ord_part = 0; + max_prefix = 0; + } + /** @return whether the column was instantly dropped */ + bool is_dropped() const { return ind == DROPPED; } + /** @return whether the column was instantly dropped + @param[in] index the clustered index */ + inline bool is_dropped(const dict_index_t& index) const; + /** Get the default value of an instantly-added column. @param[out] len value length (in bytes), or UNIV_SQL_NULL @return default value @retval NULL if the default value is SQL NULL (len=UNIV_SQL_NULL) */ const byte* instant_value(ulint* len) const { - DBUG_ASSERT(is_instant()); + DBUG_ASSERT(is_added()); *len = def_val.len; return static_cast<const byte*>(def_val.data); } /** Remove the 'instant ADD' status of the column */ - void remove_instant() + void clear_instant() { - DBUG_ASSERT(is_instant()); def_val.len = UNIV_SQL_DEFAULT; def_val.data = NULL; } @@ -850,7 +884,7 @@ to start with. */ /** Data structure for an index. Most fields will be initialized to 0, NULL or FALSE in dict_mem_index_create(). */ -struct dict_index_t{ +struct dict_index_t { index_id_t id; /*!< id of the index */ mem_heap_t* heap; /*!< memory heap */ id_name_t name; /*!< index name */ @@ -1039,7 +1073,7 @@ struct dict_index_t{ page cannot be read or decrypted */ inline bool is_readable() const; - /** @return whether instant ADD COLUMN is in effect */ + /** @return whether instant ALTER TABLE is in effect */ inline bool is_instant() const; /** @return whether the index is the primary key index @@ -1049,9 +1083,38 @@ struct dict_index_t{ return DICT_CLUSTERED == (type & (DICT_CLUSTERED | DICT_IBUF)); } + /** @return whether this is a generated clustered index */ + bool is_gen_clust() const { return type == DICT_CLUSTERED; } + + /** @return whether this is a clustered index */ + bool is_clust() const { return type & DICT_CLUSTERED; } + + /** @return whether this is a unique index */ + bool is_unique() const { return type & DICT_UNIQUE; } + + /** @return whether this is a spatial index */ + bool is_spatial() const { return UNIV_UNLIKELY(type & DICT_SPATIAL); } + + /** @return whether this is the change buffer */ + bool is_ibuf() const { return UNIV_UNLIKELY(type & DICT_IBUF); } + /** @return whether the index includes virtual columns */ bool has_virtual() const { return type & DICT_VIRTUAL; } + /** @return the position of DB_TRX_ID */ + unsigned db_trx_id() const { + DBUG_ASSERT(is_primary()); + DBUG_ASSERT(n_uniq); + DBUG_ASSERT(n_uniq <= MAX_REF_PARTS); + return n_uniq; + } + /** @return the position of DB_ROLL_PTR */ + unsigned db_roll_ptr() const { return db_trx_id() + 1; } + + /** @return the offset of the metadata BLOB field, + or the first user field after the PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR */ + unsigned first_user_field() const { return db_trx_id() + 2; } + /** @return whether the index is corrupted */ inline bool is_corrupted() const; @@ -1097,24 +1160,20 @@ struct dict_index_t{ return fields[n].col->instant_value(len); } - /** Adjust clustered index metadata for instant ADD COLUMN. - @param[in] clustered index definition after instant ADD COLUMN */ - void instant_add_field(const dict_index_t& instant); - - /** Remove the 'instant ADD' status of a clustered index. - Protected by index root page x-latch or table X-lock. */ - void remove_instant() - { - DBUG_ASSERT(is_primary()); - if (!is_instant()) { - return; - } - for (unsigned i = n_core_fields; i < n_fields; i++) { - fields[i].col->remove_instant(); - } - n_core_fields = n_fields; - n_core_null_bytes = UT_BITS_IN_BYTES(unsigned(n_nullable)); - } + /** Adjust index metadata for instant ADD/DROP/reorder COLUMN. + @param[in] clustered index definition after instant ALTER TABLE */ + inline void instant_add_field(const dict_index_t& instant); + /** Remove instant ADD COLUMN metadata. */ + inline void clear_instant_add(); + /** Remove instant ALTER TABLE metadata. */ + inline void clear_instant_alter(); + + /** Construct the metadata record for instant ALTER TABLE. + @param[in] row dummy or default values for existing columns + @param[in,out] heap memory heap for allocations + @return metadata record */ + inline dtuple_t* + instant_metadata(const dtuple_t& row, mem_heap_t* heap) const; /** Check if record in clustered index is historical row. @param[in] rec clustered row @@ -1129,6 +1188,9 @@ struct dict_index_t{ @return true on error */ bool vers_history_row(const rec_t* rec, bool &history_row); + + /** Reconstruct the clustered index fields. */ + inline void reconstruct_fields(); }; /** Detach a column from an index. @@ -1463,6 +1525,17 @@ struct dict_vcol_templ_t { dict_vcol_templ_t() : vtempl(0), mysql_table_query_id(~0ULL) {} }; +/** Instantly dropped or reordered columns */ +struct dict_instant_t +{ + /** Number of dropped columns */ + unsigned n_dropped; + /** Dropped columns */ + dict_col_t* dropped; + /** Mapping the non-pk field to column of the table. */ + uint16_t* non_pk_col_map; +}; + /** These are used when MySQL FRM and InnoDB data dictionary are in inconsistent state. */ typedef enum { @@ -1505,6 +1578,9 @@ struct dict_table_t { return flags2 & DICT_TF2_TEMPORARY; } + /** @return whether the table is not in ROW_FORMAT=REDUNDANT */ + bool not_redundant() const { return flags & DICT_TF_COMPACT; } + /** @return whether this table is readable @retval true normally @retval false if this is a single-table tablespace @@ -1516,35 +1592,98 @@ struct dict_table_t { return(UNIV_LIKELY(!file_unreadable)); } - /** @return whether instant ADD COLUMN is in effect */ + /** @return whether instant ALTER TABLE is in effect */ bool is_instant() const { return(UT_LIST_GET_FIRST(indexes)->is_instant()); } - /** @return whether the table supports instant ADD COLUMN */ + /** @return whether the table supports instant ALTER TABLE */ bool supports_instant() const { return(!(flags & DICT_TF_MASK_ZIP_SSIZE)); } - /** Adjust metadata for instant ADD COLUMN. - @param[in] table table definition after instant ADD COLUMN */ - void instant_add_column(const dict_table_t& table); + /** @return the number of instantly dropped columns */ + unsigned n_dropped() const { return instant ? instant->n_dropped : 0; } + + /** Look up an old column. + @param[in] cols the old columns of the table + @param[in] col_map map from old table columns to altered ones + @param[in] n_cols number of old columns + @param[in] i the number of the new column + @return old column + @retval NULL if column i was added to the table */ + static const dict_col_t* find(const dict_col_t* cols, + const ulint* col_map, ulint n_cols, + ulint i) + { + for (ulint o = n_cols; o--; ) { + if (col_map[o] == i) { + return &cols[o]; + } + } + return NULL; + } - /** Roll back instant_add_column(). - @param[in] old_n_cols original n_cols - @param[in] old_cols original cols - @param[in] old_col_names original col_names */ - void rollback_instant( + /** Serialise metadata of dropped or reordered columns. + @param[in,out] heap memory heap for allocation + @param[out] field data field with the metadata */ + inline void serialise_columns(mem_heap_t* heap, dfield_t* field) const; + + /** Reconstruct dropped or reordered columns. + @param[in] metadata data from serialise_columns() + @param[in] len length of the metadata, in bytes + @return whether parsing the metadata failed */ + bool deserialise_columns(const byte* metadata, ulint len); + + /** Set is_instant() before instant_column(). + @param[in] old previous table definition + @param[in] col_map map from old.cols[] + and old.v_cols[] to this + @param[out] first_alter_pos 0, or + 1 + first changed column position */ + inline void prepare_instant(const dict_table_t& old, + const ulint* col_map, + unsigned& first_alter_pos); + + /** Adjust table metadata for instant ADD/DROP/reorder COLUMN. + @param[in] table table on which prepare_instant() was invoked + @param[in] col_map mapping from cols[] and v_cols[] to table */ + inline void instant_column(const dict_table_t& table, + const ulint* col_map); + + /** Roll back instant_column(). + @param[in] old_n_cols original n_cols + @param[in] old_cols original cols + @param[in] old_col_names original col_names + @param[in] old_instant original instant structure + @param[in] old_fields original fields + @param[in] old_n_fields original number of fields + @param[in] old_n_v_cols original n_v_cols + @param[in] old_v_cols original v_cols + @param[in] old_v_col_names original v_col_names + @param[in] col_map column map */ + inline void rollback_instant( unsigned old_n_cols, dict_col_t* old_cols, - const char* old_col_names); - - /** Trim the instantly added columns when an insert into SYS_COLUMNS - is rolled back during ALTER TABLE or recovery. - @param[in] n number of surviving non-system columns */ - void rollback_instant(unsigned n); + const char* old_col_names, + dict_instant_t* old_instant, + dict_field_t* old_fields, + unsigned old_n_fields, + unsigned old_n_v_cols, + dict_v_col_t* old_v_cols, + const char* old_v_col_names, + const ulint* col_map); + + /** Assign a new id to invalidate old undo log records, so + that purge will be unable to refer to fields that used to be + instantly added to the end of the index. This is only to be + used during ALTER TABLE when the table is empty, before + invoking dict_index_t::clear_instant_alter(). + @param[in,out] trx dictionary transaction + @return error code */ + inline dberr_t reassign_id(trx_t* trx); /** Add the table definition to the data dictionary cache */ void add_to_cache(); @@ -1686,6 +1825,9 @@ struct dict_table_t { reason s_cols is a part of dict_table_t */ dict_s_col_list* s_cols; + /** Instantly dropped or reordered columns, or NULL if none */ + dict_instant_t* instant; + /** Column names packed in a character string "name1\0name2\0...nameN\0". Until the string contains n_cols, it will be allocated from a temporary heap. The final string will be allocated @@ -1967,12 +2109,15 @@ inline bool dict_index_t::is_readable() const { return table->is_readable(); } inline bool dict_index_t::is_instant() const { ut_ad(n_core_fields > 0); - ut_ad(n_core_fields <= n_fields); + ut_ad(n_core_fields <= n_fields || table->n_dropped()); ut_ad(n_core_fields == n_fields || (type & ~(DICT_UNIQUE | DICT_CORRUPT)) == DICT_CLUSTERED); ut_ad(n_core_fields == n_fields || table->supports_instant()); ut_ad(n_core_fields == n_fields || !table->is_temporary()); - return(n_core_fields != n_fields); + ut_ad(!table->instant || !table->is_temporary()); + + return n_core_fields != n_fields + || (is_primary() && table->instant); } inline bool dict_index_t::is_corrupted() const @@ -1982,6 +2127,72 @@ inline bool dict_index_t::is_corrupted() const || (table && table->corrupted)); } +inline void dict_index_t::clear_instant_add() +{ + DBUG_ASSERT(is_primary()); + DBUG_ASSERT(is_instant()); + DBUG_ASSERT(!table->instant); + for (unsigned i = n_core_fields; i < n_fields; i++) { + fields[i].col->clear_instant(); + } + n_core_fields = n_fields; + n_core_null_bytes = UT_BITS_IN_BYTES(unsigned(n_nullable)); +} + +inline void dict_index_t::clear_instant_alter() +{ + DBUG_ASSERT(is_primary()); + DBUG_ASSERT(n_fields == n_def); + + if (!table->instant) { + if (is_instant()) { + clear_instant_add(); + } + return; + } + +#ifndef DBUG_OFF + for (unsigned i = first_user_field(); i--; ) { + DBUG_ASSERT(!fields[i].col->is_dropped()); + DBUG_ASSERT(!fields[i].col->is_nullable()); + } +#endif + dict_field_t* const begin = &fields[first_user_field()]; + dict_field_t* end = &fields[n_fields]; + + for (dict_field_t* d = begin; d < end; ) { + /* Move fields for dropped columns to the end. */ + if (!d->col->is_dropped()) { + d++; + } else { + if (d->col->is_nullable()) { + n_nullable--; + } + + std::swap(*d, *--end); + } + } + + DBUG_ASSERT(&fields[n_fields - table->n_dropped()] == end); + n_core_fields = n_fields = n_def = end - fields; + n_core_null_bytes = UT_BITS_IN_BYTES(n_nullable); + std::sort(begin, end, [](const dict_field_t& a, const dict_field_t& b) + { return a.col->ind < b.col->ind; }); + table->instant = NULL; +} + +/** @return whether the column was instantly dropped +@param[in] index the clustered index */ +inline bool dict_col_t::is_dropped(const dict_index_t& index) const +{ + DBUG_ASSERT(index.is_primary()); + DBUG_ASSERT(!is_dropped() == !index.table->instant); + DBUG_ASSERT(!is_dropped() || (this >= index.table->instant->dropped + && this < index.table->instant->dropped + + index.table->instant->n_dropped)); + return is_dropped(); +} + /*******************************************************************//** Initialise the table lock list. */ void diff --git a/storage/innobase/include/dict0priv.h b/storage/innobase/include/dict0priv.h index b216a16c181..8eda44bd3f2 100644 --- a/storage/innobase/include/dict0priv.h +++ b/storage/innobase/include/dict0priv.h @@ -45,18 +45,6 @@ dict_table_check_if_in_cache_low( /*=============================*/ const char* table_name); /*!< in: table name */ -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INLINE -dict_table_t* -dict_table_open_on_id_low( -/*=====================*/ - table_id_t table_id, /*!< in: table id */ - dict_err_ignore_t ignore_err, /*!< in: errors to ignore - when loading the table */ - ibool open_only_if_in_cache); - #include "dict0priv.ic" #endif /* dict0priv.h */ diff --git a/storage/innobase/include/dict0priv.ic b/storage/innobase/include/dict0priv.ic index fb7af2772fc..6d7fbf07394 100644 --- a/storage/innobase/include/dict0priv.ic +++ b/storage/innobase/include/dict0priv.ic @@ -25,7 +25,6 @@ Created Wed 13 Oct 2010 16:10:14 EST Sunny Bains #include "dict0dict.h" #include "dict0load.h" -#include "dict0priv.h" /**********************************************************************//** Gets a table; loads it to the dictionary cache if necessary. A low-level @@ -64,40 +63,6 @@ dict_table_get_low( } /**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INLINE -dict_table_t* -dict_table_open_on_id_low( -/*======================*/ - table_id_t table_id, /*!< in: table id */ - dict_err_ignore_t ignore_err, /*!< in: errors to ignore - when loading the table */ - ibool open_only_if_in_cache) -{ - dict_table_t* table; - ulint fold; - - ut_ad(mutex_own(&dict_sys->mutex)); - - /* Look for the table name in the hash table */ - fold = ut_fold_ull(table_id); - - HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, - dict_table_t*, table, ut_ad(table->cached), - table->id == table_id); - if (table == NULL && !open_only_if_in_cache) { - table = dict_load_table_on_id(table_id, ignore_err); - } - - ut_ad(!table || table->cached); - - /* TODO: should get the type information from MySQL */ - - return(table); -} - -/**********************************************************************//** Checks if a table is in the dictionary cache. @return table, NULL if not found */ UNIV_INLINE diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 91f50d70e5d..b1b30a53580 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -37,8 +37,6 @@ Created 10/25/1995 Heikki Tuuri extern my_bool srv_use_doublewrite_buf; extern struct buf_dblwr_t* buf_dblwr; class page_id_t; -struct trx_t; -class truncate_t; /** Structure containing encryption specification */ struct fil_space_crypt_t; @@ -385,19 +383,12 @@ typedef byte fil_faddr_t; /*!< 'type' definition in C: an address #define FIL_ADDR_BYTE 4U /* then comes 2-byte byte offset within page*/ #define FIL_ADDR_SIZE 6U /* address size is 6 bytes */ -#ifndef UNIV_INNOCHECKSUM - /** File space address */ struct fil_addr_t { ulint page; /*!< page number within a space */ ulint boffset; /*!< byte offset within the page */ }; -/** The null file address */ -extern const fil_addr_t fil_addr_null; - -#endif /* !UNIV_INNOCHECKSUM */ - /** The byte offsets on a file page for various variables @{ */ #define FIL_PAGE_SPACE_OR_CHKSUM 0 /*!< in < MySQL-4.0.14 space id the page belongs to (== 0) but in later @@ -1081,7 +1072,7 @@ fil_space_extend( @param[in] message message for aio handler if non-sync aio used, else ignored @param[in] ignore_missing_space true=ignore missing space during read -@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED +@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ dberr_t fil_io( diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index 4c4647dba95..9a10375759c 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2017, MariaDB Corporation. +Copyright (c) 2016, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -610,17 +610,15 @@ fts_get_doc_id_from_row( want to extract.*/ /** Extract the doc id from the record that belongs to index. -@param[in] table table -@param[in] rec record contains FTS_DOC_ID +@param[in] rec record containing FTS_DOC_ID @param[in] index index of rec -@param[in] heap heap memory +@param[in] offsets rec_get_offsets(rec,index) @return doc id that was extracted from rec */ doc_id_t fts_get_doc_id_from_rec( - dict_table_t* table, - const rec_t* rec, - const dict_index_t* index, - mem_heap_t* heap); + const rec_t* rec, + const dict_index_t* index, + const ulint* offsets); /** Add new fts doc id to the update vector. @param[in] table the table that contains the FTS index. diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h index 092889fc42c..d38b450085c 100644 --- a/storage/innobase/include/fut0lst.h +++ b/storage/innobase/include/fut0lst.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,9 +29,8 @@ Created 11/28/1995 Heikki Tuuri #ifndef UNIV_INNOCHECKSUM -#include "fil0fil.h" -#include "mtr0mtr.h" - +#include "fut0fut.h" +#include "mtr0log.h" /* The C 'types' of base node and list node: these should be used to write self-documenting code. Of course, the sizeof macro cannot be @@ -47,6 +47,47 @@ typedef byte flst_node_t; #define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE) #ifndef UNIV_INNOCHECKSUM +/* We define the field offsets of a node for the list */ +#define FLST_PREV 0 /* 6-byte address of the previous list element; + the page part of address is FIL_NULL, if no + previous element */ +#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next + list element; the page part of address + is FIL_NULL, if no next element */ + +/* We define the field offsets of a base node for the list */ +#define FLST_LEN 0 /* 32-bit list length field */ +#define FLST_FIRST 4 /* 6-byte address of the first element + of the list; undefined if empty list */ +#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the + last element of the list; undefined + if empty list */ + +/** Initialize a zero-initialized list base node. +@param[in,out] block file page +@param[in] ofs byte offset of the list base node +@param[in,out] mtr mini-transaction */ +inline void flst_init(buf_block_t* block, uint16_t ofs, mtr_t* mtr) +{ + ut_ad(0 == mach_read_from_2(FLST_LEN + ofs + block->frame)); + ut_ad(0 == mach_read_from_2(FLST_FIRST + FIL_ADDR_BYTE + ofs + + block->frame)); + ut_ad(0 == mach_read_from_2(FLST_LAST + FIL_ADDR_BYTE + ofs + + block->frame)); + compile_time_assert(FIL_NULL == 0xffU * 0x1010101U); + mlog_memset(block, FLST_FIRST + FIL_ADDR_PAGE + ofs, 4, 0xff, mtr); + mlog_memset(block, FLST_LAST + FIL_ADDR_PAGE + ofs, 4, 0xff, mtr); +} + +/** Write a null file address. +@param[in,out] faddr file address to be zeroed otu +@param[in,out] mtr mini-transaction */ +inline void flst_zero_addr(fil_faddr_t* faddr, mtr_t* mtr) +{ + mlog_memset(faddr + FIL_ADDR_PAGE, 4, 0xff, mtr); + mlog_write_ulint(faddr + FIL_ADDR_BYTE, 0, MLOG_2BYTES, mtr); +} + /********************************************************************//** Initializes a list base node. */ UNIV_INLINE diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic index 5c9a9ca94c1..3a978b8f75a 100644 --- a/storage/innobase/include/fut0lst.ic +++ b/storage/innobase/include/fut0lst.ic @@ -23,26 +23,8 @@ File-based list utilities Created 11/28/1995 Heikki Tuuri ***********************************************************************/ -#include "fut0fut.h" -#include "mtr0log.h" #include "buf0buf.h" -/* We define the field offsets of a node for the list */ -#define FLST_PREV 0 /* 6-byte address of the previous list element; - the page part of address is FIL_NULL, if no - previous element */ -#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next - list element; the page part of address - is FIL_NULL, if no next element */ - -/* We define the field offsets of a base node for the list */ -#define FLST_LEN 0 /* 32-bit list length field */ -#define FLST_FIRST 4 /* 6-byte address of the first element - of the list; undefined if empty list */ -#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the - last element of the list; undefined - if empty list */ - /********************************************************************//** Writes a file address. */ UNIV_INLINE @@ -101,8 +83,8 @@ flst_init( | MTR_MEMO_PAGE_SX_FIX)); mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr); - flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr); - flst_write_addr(base + FLST_LAST, fil_addr_null, mtr); + flst_zero_addr(base + FLST_FIRST, mtr); + flst_zero_addr(base + FLST_LAST, mtr); } /** Get the length of a list. diff --git a/storage/innobase/include/log0crypt.h b/storage/innobase/include/log0crypt.h index d972ca01491..359896c2fc5 100644 --- a/storage/innobase/include/log0crypt.h +++ b/storage/innobase/include/log0crypt.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (C) 2013, 2015, Google Inc. All Rights Reserved. -Copyright (C) 2014, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (C) 2014, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -73,14 +73,23 @@ UNIV_INTERN bool log_crypt_read_checkpoint_buf(const byte* buf); +/** log_crypt() operation code */ +enum log_crypt_t { + /** encrypt a log block without rotating key */ + LOG_ENCRYPT, + /** decrypt a log block */ + LOG_DECRYPT, + /** attempt to rotate the key, and encrypt a log block */ + LOG_ENCRYPT_ROTATE_KEY +}; + /** Encrypt or decrypt log blocks. @param[in,out] buf log blocks to encrypt or decrypt @param[in] lsn log sequence number of the start of the buffer @param[in] size size of the buffer, in bytes -@param[in] decrypt whether to decrypt instead of encrypting */ -UNIV_INTERN -void -log_crypt(byte* buf, lsn_t lsn, ulint size, bool decrypt = false); +@param[in] op whether to decrypt, encrypt, or rotate key and encrypt +@return whether the operation succeeded (encrypt always does) */ +bool log_crypt(byte* buf, lsn_t lsn, ulint size, log_crypt_t op = LOG_ENCRYPT); /** Encrypt or decrypt a temporary file block. @param[in] src block to encrypt or decrypt diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index d213a6c0884..3d28cacbfe0 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -160,19 +160,16 @@ bool log_set_capacity(ulonglong file_size) MY_ATTRIBUTE((warn_unused_result)); -/******************************************************//** -This function is called, e.g., when a transaction wants to commit. It checks -that the log has been written to the log file up to the last log entry written -by the transaction. If there is a flush running, it waits and checks if the -flush flushed enough. If not, starts a new flush. */ -void -log_write_up_to( -/*============*/ - lsn_t lsn, /*!< in: log sequence number up to which - the log should be written, LSN_MAX if not specified */ - bool flush_to_disk); - /*!< in: true if we want the written log - also to be flushed to disk */ +/** Ensure that the log has been written to the log file up to a given +log entry (such as that of a transaction commit). Start a new write, or +wait and check if an already running write is covering the request. +@param[in] lsn log sequence number that should be +included in the redo log file write +@param[in] flush_to_disk whether the written log should also +be flushed to the file system +@param[in] rotate_key whether to rotate the encryption key */ +void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key = false); + /** write to the log file up to the last log entry. @param[in] sync whether we want the written log also to be flushed to disk. */ @@ -414,13 +411,14 @@ extern my_bool innodb_log_checksums; #define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in bytes */ -/* Offsets of a log block trailer from the end of the block */ +#define LOG_BLOCK_KEY 4 /* encryption key version + before LOG_BLOCK_CHECKSUM; + in LOG_HEADER_FORMAT_ENC_10_4 only */ #define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block contents; in InnoDB versions < 3.23.52 this did not contain the checksum but the same value as - .._HDR_NO */ -#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */ + LOG_BLOCK_HDR_NO */ /** Offsets inside the checkpoint pages (redo log format version 1) @{ */ /** Checkpoint number */ @@ -482,11 +480,9 @@ MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT 1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2 (MDEV-13564 backup-friendly TRUNCATE). */ #define LOG_HEADER_FORMAT_10_3 103 -/** The redo log format identifier corresponding to the current format version. -Stored in LOG_HEADER_FORMAT. */ -#define LOG_HEADER_FORMAT_CURRENT LOG_HEADER_FORMAT_10_3 -/** Future MariaDB 10.4 log format */ #define LOG_HEADER_FORMAT_10_4 104 +/** The MariaDB 10.4.0 log format (only with innodb_encrypt_log=ON) */ +#define LOG_HEADER_FORMAT_ENC_10_4 (104U | 1U << 31) /** Encrypted MariaDB redo log */ #define LOG_HEADER_FORMAT_ENCRYPTED (1U<<31) @@ -556,7 +552,7 @@ struct log_t{ struct files { /** number of files */ ulint n_files; - /** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */ + /** format of the redo log: e.g., LOG_HEADER_FORMAT_10_4 */ uint32_t format; /** redo log subformat: 0 with separately logged TRUNCATE, 2 with fully redo-logged TRUNCATE (1 in MariaDB 10.2) */ @@ -713,11 +709,34 @@ public: /** @return whether the redo log is encrypted */ bool is_encrypted() const { return(log.is_encrypted()); } - bool is_initialised() { return m_initialised; } + bool is_initialised() const { return m_initialised; } /** Complete an asynchronous checkpoint write. */ void complete_checkpoint(); + /** @return the log block header + trailer size */ + unsigned framing_size() const + { + return log.format == LOG_HEADER_FORMAT_ENC_10_4 + ? LOG_BLOCK_HDR_SIZE + LOG_BLOCK_KEY + LOG_BLOCK_CHECKSUM + : LOG_BLOCK_HDR_SIZE + LOG_BLOCK_CHECKSUM; + } + /** @return the log block payload size */ + unsigned payload_size() const + { + return log.format == LOG_HEADER_FORMAT_ENC_10_4 + ? OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - LOG_BLOCK_CHECKSUM - + LOG_BLOCK_KEY + : OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - LOG_BLOCK_CHECKSUM; + } + /** @return the log block trailer offset */ + unsigned trailer_offset() const + { + return log.format == LOG_HEADER_FORMAT_ENC_10_4 + ? OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM - LOG_BLOCK_KEY + : OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM; + } + /** Initialise the redo log subsystem. */ void create(); diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic index 87d55f9e01d..60e6958d592 100644 --- a/storage/innobase/include/log0log.ic +++ b/storage/innobase/include/log0log.ic @@ -215,7 +215,7 @@ log_block_calc_checksum_format_0( sum = 1; sh = 0; - for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) { + for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM; i++) { ulint b = (ulint) block[i]; sum &= 0x7FFFFFFFUL; sum += b; @@ -237,7 +237,7 @@ ulint log_block_calc_checksum_crc32( const byte* block) { - return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE)); + return ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM); } /** Calculates the checksum for a log block using the "no-op" algorithm. @@ -338,7 +338,7 @@ log_reserve_and_write_fast( #endif /* UNIV_LOG_LSN_DEBUG */ + log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE; - if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { + if (data_len >= log_sys.trailer_offset()) { /* The string does not fit within the current log block or the log block would become full */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 89485b7f31d..5fe00d65f2c 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -143,10 +143,6 @@ corresponding to MLOG_INDEX_LOAD. */ extern void (*log_optimized_ddl_op)(ulint space_id); -/** Report backup-unfriendly TRUNCATE operation (with separate log file), -corresponding to MLOG_TRUNCATE. */ -extern void (*log_truncate)(); - /** Report an operation to create, delete, or rename a file during backup. @param[in] space_id tablespace identifier @param[in] flags tablespace flags (NULL if not create) diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index d2de11b3470..4cb7ea4eb64 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -71,6 +72,23 @@ mlog_log_string( byte* ptr, /*!< in: pointer written to */ ulint len, /*!< in: string length */ mtr_t* mtr); /*!< in: mini-transaction handle */ + +/** Initialize a string of bytes. +@param[in,out] b buffer page +@param[in] ofs byte offset from block->frame +@param[in] len length of the data to write +@param[in] val the data byte to write +@param[in,out] mtr mini-transaction */ +void +mlog_memset(buf_block_t* b, ulint ofs, ulint len, byte val, mtr_t* mtr); + +/** Initialize a string of bytes. +@param[in,out] byte byte address +@param[in] len length of the data to write +@param[in] val the data byte to write +@param[in,out] mtr mini-transaction */ +void mlog_memset(byte* b, ulint len, byte val, mtr_t* mtr); + /********************************************************//** Writes initial part of a log record consisting of one-byte item type and four-byte space and page numbers. */ @@ -188,7 +206,7 @@ mlog_parse_initial_log_record( ulint* space, /*!< out: space id */ ulint* page_no);/*!< out: page number */ /********************************************************//** -Parses a log record written by mlog_write_ulint or mlog_write_ull. +Parses a log record written by mlog_write_ulint, mlog_write_ull, mlog_memset. @return parsed record end, NULL if not a complete record */ byte* mlog_parse_nbytes( diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index eaf838aaa76..39ed707267d 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -216,7 +216,8 @@ enum mlog_id_t { /** initialize a file page */ MLOG_INIT_FILE_PAGE2 = 59, - /** Table is being truncated. (Marked only for file-per-table) */ + /** Table is being truncated. (Was used in 10.2 and 10.3; + not supported for crash-upgrade to 10.4 or later.) */ MLOG_TRUNCATE = 60, /** notify that an index tree is being loaded without writing @@ -227,8 +228,11 @@ enum mlog_id_t { of a ROW_FORMAT=COMPRESSED table */ MLOG_ZIP_WRITE_TRX_ID = 62, + /** initialize a page with a string of identical bytes */ + MLOG_MEMSET = 63, + /** biggest value (used in assertions) */ - MLOG_BIGGEST_TYPE = MLOG_ZIP_WRITE_TRX_ID, + MLOG_BIGGEST_TYPE = MLOG_MEMSET, /** log record for writing/updating crypt data of a tablespace */ diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic index 86e560395f3..1ba85d93fb4 100644 --- a/storage/innobase/include/page0cur.ic +++ b/storage/innobase/include/page0cur.ic @@ -280,6 +280,7 @@ page_cur_tuple_insert( *offsets = rec_get_offsets(rec, index, *offsets, page_is_leaf(cursor->block->frame), ULINT_UNDEFINED, heap); + ut_ad(size == rec_offs_size(*offsets)); if (buf_block_get_page_zip(cursor->block)) { rec = page_cur_insert_rec_zip( diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index 0ff63f8047f..1ec3cf7b6a4 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -1023,13 +1023,6 @@ page_get_direction(const page_t* page) inline uint16_t page_get_instant(const page_t* page); -/** Assign the PAGE_INSTANT field. -@param[in,out] page clustered index root page -@param[in] n original number of clustered index fields -@param[in,out] mtr mini-transaction */ -inline -void -page_set_instant(page_t* page, unsigned n, mtr_t* mtr); /**********************************************************//** Create an uncompressed B-tree index page. @@ -1057,10 +1050,6 @@ page_create_zip( ulint level, /*!< in: the B-tree level of the page */ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */ - const redo_page_compress_t* page_comp_info, - /*!< in: used for applying - TRUNCATE log - record during recovery */ mtr_t* mtr); /*!< in/out: mini-transaction handle */ /**********************************************************//** diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index 307803367c0..7567853667c 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -1098,24 +1098,6 @@ page_get_instant(const page_t* page) #endif /* UNIV_DEBUG */ return(i >> 3); } - -/** Assign the PAGE_INSTANT field. -@param[in,out] page clustered index root page -@param[in] n original number of clustered index fields -@param[in,out] mtr mini-transaction */ -inline -void -page_set_instant(page_t* page, unsigned n, mtr_t* mtr) -{ - ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_INSTANT); - ut_ad(n > 0); - ut_ad(n < REC_MAX_N_FIELDS); - uint16_t i = page_header_get_field(page, PAGE_INSTANT); - ut_ad(i <= PAGE_NO_DIRECTION); - i |= n << 3; - mlog_write_ulint(PAGE_HEADER + PAGE_INSTANT + page, i, - MLOG_2BYTES, mtr); -} #endif /* !UNIV_INNOCHECKSUM */ #ifdef UNIV_MATERIALIZE diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h index 981f8743960..08d072822bf 100644 --- a/storage/innobase/include/page0size.h +++ b/storage/innobase/include/page0size.h @@ -34,7 +34,7 @@ Created Nov 14, 2013 Vasil Dimov /** A BLOB field reference full of zero, for use in assertions and tests.Initially, BLOB field references are set to zero, in dtuple_convert_big_rec(). */ -extern const byte field_ref_zero[FIELD_REF_SIZE]; +extern const byte field_ref_zero[UNIV_PAGE_SIZE_MAX]; #define PAGE_SIZE_T_SIZE_BITS 17 diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h index 47d66df2758..0913a50fee2 100644 --- a/storage/innobase/include/page0types.h +++ b/storage/innobase/include/page0types.h @@ -83,18 +83,6 @@ enum page_cur_mode_t { PAGE_CUR_RTREE_GET_FATHER = 14 }; - -/** The information used for compressing a page when applying -TRUNCATE log record during recovery */ -struct redo_page_compress_t { - ulint type; /*!< index type */ - index_id_t index_id; /*!< index id */ - ulint n_fields; /*!< number of index fields */ - ulint field_len; /*!< the length of index field */ - const byte* fields; /*!< index field information */ - ulint trx_id_pos; /*!< position of trx-id column. */ -}; - /** Compressed page descriptor */ struct page_zip_des_t { diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h index a2910a73634..60b6b46d7ab 100644 --- a/storage/innobase/include/page0zip.h +++ b/storage/innobase/include/page0zip.h @@ -2,7 +2,7 @@ Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -163,10 +163,6 @@ page_zip_compress( dict_index_t* index, /*!< in: index of the B-tree node */ ulint level, /*!< in: commpression level */ - const redo_page_compress_t* page_comp_info, - /*!< in: used for applying - TRUNCATE log - record during recovery */ mtr_t* mtr); /*!< in/out: mini-transaction, or NULL */ diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic index b3ebc5dcf51..eefe7c8f0f7 100644 --- a/storage/innobase/include/page0zip.ic +++ b/storage/innobase/include/page0zip.ic @@ -414,7 +414,7 @@ page_zip_parse_compress_no_data( was successful. Crash in this case. */ if (page - && !page_zip_compress(page_zip, page, index, level, NULL, NULL)) { + && !page_zip_compress(page_zip, page, index, level, NULL)) { ut_error; } diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index 3a541289a61..fdcae3818bc 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -38,15 +38,6 @@ Created 5/30/1994 Heikki Tuuri #include <ostream> #include <sstream> -/* Info bit denoting the predefined minimum record: this bit is set -if and only if the record is the first user record on a non-leaf -B-tree page that is the leftmost page on its level -(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */ -#define REC_INFO_MIN_REC_FLAG 0x10UL -/* The deleted flag in info bits */ -#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the - record has been delete marked */ - /* Number of extra bytes in an old-style record, in addition to the data and the offsets */ #define REC_N_OLD_EXTRA_BYTES 6 @@ -54,26 +45,6 @@ in addition to the data and the offsets */ in addition to the data and the offsets */ #define REC_N_NEW_EXTRA_BYTES 5 -/** Record status values for ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED */ -enum rec_comp_status_t { - /** User record (PAGE_LEVEL=0, heap>=PAGE_HEAP_NO_USER_LOW) */ - REC_STATUS_ORDINARY = 0, - /** Node pointer record (PAGE_LEVEL>=0, heap>=PAGE_HEAP_NO_USER_LOW) */ - REC_STATUS_NODE_PTR = 1, - /** The page infimum pseudo-record (heap=PAGE_HEAP_NO_INFIMUM) */ - REC_STATUS_INFIMUM = 2, - /** The page supremum pseudo-record (heap=PAGE_HEAP_NO_SUPREMUM) */ - REC_STATUS_SUPREMUM = 3, - /** Clustered index record that has been inserted or updated - after instant ADD COLUMN (more than dict_index_t::n_core_fields) */ - REC_STATUS_COLUMNS_ADDED = 4 -}; - -/** The dtuple_t::info_bits of the metadata pseudo-record. -@see rec_is_metadata() */ -static const byte REC_INFO_METADATA - = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED; - #define REC_NEW_STATUS 3 /* This is single byte bit-field */ #define REC_NEW_STATUS_MASK 0x7UL #define REC_NEW_STATUS_SHIFT 0 @@ -295,7 +266,7 @@ rec_comp_status_t rec_get_status(const rec_t* rec) { byte bits = rec[-REC_NEW_STATUS] & REC_NEW_STATUS_MASK; - ut_ad(bits <= REC_STATUS_COLUMNS_ADDED); + ut_ad(bits <= REC_STATUS_INSTANT); return static_cast<rec_comp_status_t>(bits); } @@ -306,12 +277,12 @@ inline void rec_set_status(rec_t* rec, byte bits) { - ut_ad(bits <= REC_STATUS_COLUMNS_ADDED); + ut_ad(bits <= REC_STATUS_INSTANT); rec[-REC_NEW_STATUS] = (rec[-REC_NEW_STATUS] & ~REC_NEW_STATUS_MASK) | bits; } -/** Get the length of added field count in a REC_STATUS_COLUMNS_ADDED record. +/** Get the length of added field count in a REC_STATUS_INSTANT record. @param[in] n_add_field number of added fields, minus one @return storage size of the field count, in bytes */ inline unsigned rec_get_n_add_field_len(ulint n_add_field) @@ -320,8 +291,8 @@ inline unsigned rec_get_n_add_field_len(ulint n_add_field) return n_add_field < 0x80 ? 1 : 2; } -/** Set the added field count in a REC_STATUS_COLUMNS_ADDED record. -@param[in,out] header variable header of a REC_STATUS_COLUMNS_ADDED record +/** Set the added field count in a REC_STATUS_INSTANT record. +@param[in,out] header variable header of a REC_STATUS_INSTANT record @param[in] n_add number of added fields, minus 1 @return record header before the number of added fields */ inline void rec_set_n_add_field(byte*& header, ulint n_add) @@ -780,20 +751,89 @@ rec_offs_comp(const ulint* offsets) } /** Determine if the record is the metadata pseudo-record -in the clustered index. +in the clustered index for instant ADD COLUMN or ALTER TABLE. +@param[in] rec leaf page record +@param[in] comp 0 if ROW_FORMAT=REDUNDANT, else nonzero +@return whether the record is the metadata pseudo-record */ +inline bool rec_is_metadata(const rec_t* rec, ulint comp) +{ + bool is = !!(rec_get_info_bits(rec, comp) & REC_INFO_MIN_REC_FLAG); + ut_ad(!is || !comp || rec_get_status(rec) == REC_STATUS_INSTANT); + return is; +} + +/** Determine if the record is the metadata pseudo-record +in the clustered index for instant ADD COLUMN or ALTER TABLE. @param[in] rec leaf page record @param[in] index index of the record @return whether the record is the metadata pseudo-record */ -inline bool rec_is_metadata(const rec_t* rec, const dict_index_t* index) +inline bool rec_is_metadata(const rec_t* rec, const dict_index_t& index) { - bool is = rec_get_info_bits(rec, dict_table_is_comp(index->table)) - & REC_INFO_MIN_REC_FLAG; - ut_ad(!is || index->is_instant()); - ut_ad(!is || !dict_table_is_comp(index->table) - || rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED); + bool is = rec_is_metadata(rec, dict_table_is_comp(index.table)); + ut_ad(!is || index.is_instant()); return is; } +/** Determine if the record is the metadata pseudo-record +in the clustered index for instant ADD COLUMN (not other ALTER TABLE). +@param[in] rec leaf page record +@param[in] comp 0 if ROW_FORMAT=REDUNDANT, else nonzero +@return whether the record is the metadata pseudo-record */ +inline bool rec_is_add_metadata(const rec_t* rec, ulint comp) +{ + bool is = rec_get_info_bits(rec, comp) == REC_INFO_MIN_REC_FLAG; + ut_ad(!is || !comp || rec_get_status(rec) == REC_STATUS_INSTANT); + return is; +} + +/** Determine if the record is the metadata pseudo-record +in the clustered index for instant ADD COLUMN (not other ALTER TABLE). +@param[in] rec leaf page record +@param[in] index index of the record +@return whether the record is the metadata pseudo-record */ +inline bool rec_is_add_metadata(const rec_t* rec, const dict_index_t& index) +{ + bool is = rec_is_add_metadata(rec, dict_table_is_comp(index.table)); + ut_ad(!is || index.is_instant()); + return is; +} + +/** Determine if the record is the metadata pseudo-record +in the clustered index for instant ALTER TABLE (not plain ADD COLUMN). +@param[in] rec leaf page record +@param[in] comp 0 if ROW_FORMAT=REDUNDANT, else nonzero +@return whether the record is the ALTER TABLE metadata pseudo-record */ +inline bool rec_is_alter_metadata(const rec_t* rec, ulint comp) +{ + bool is = !(~rec_get_info_bits(rec, comp) + & (REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG)); + ut_ad(!is || rec_is_metadata(rec, comp)); + return is; +} + +/** Determine if the record is the metadata pseudo-record +in the clustered index for instant ALTER TABLE (not plain ADD COLUMN). +@param[in] rec leaf page record +@param[in] index index of the record +@return whether the record is the ALTER TABLE metadata pseudo-record */ +inline bool rec_is_alter_metadata(const rec_t* rec, const dict_index_t& index) +{ + bool is = rec_is_alter_metadata(rec, dict_table_is_comp(index.table)); + ut_ad(!is || index.is_dummy || index.is_instant()); + return is; +} + +/** Determine if a record is delete-marked (not a metadata pseudo-record). +@param[in] rec record +@param[in] comp nonzero if ROW_FORMAT!=REDUNDANT +@return whether the record is a delete-marked user record */ +inline bool rec_is_delete_marked(const rec_t* rec, ulint comp) +{ + return (rec_get_info_bits(rec, comp) + & (REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG)) + == REC_INFO_DELETED_FLAG; +} + /** Get the nth field from an index. @param[in] rec index record @param[in] index index @@ -811,6 +851,7 @@ rec_get_nth_cfield( ulint* len) { ut_ad(rec_offs_validate(rec, index, offsets)); + if (!rec_offs_nth_default(offsets, n)) { return rec_get_nth_field(rec, offsets, n, len); } @@ -957,7 +998,7 @@ rec_copy( @param[in] fields data fields @param[in] n_fields number of data fields @param[out] extra record header size -@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED +@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT @return total size, in bytes */ ulint rec_get_converted_size_temp( @@ -974,7 +1015,7 @@ rec_get_converted_size_temp( @param[in,out] offsets offsets to the fields; in: rec_offs_n_fields(offsets) @param[in] n_core number of core fields (index->n_core_fields) @param[in] def_val default values for non-core fields -@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED */ +@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT */ void rec_init_offsets_temp( const rec_t* rec, @@ -1001,8 +1042,7 @@ rec_init_offsets_temp( @param[in] index clustered or secondary index @param[in] fields data fields @param[in] n_fields number of data fields -@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED -*/ +@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT */ void rec_convert_dtuple_to_temp( rec_t* rec, @@ -1065,21 +1105,20 @@ rec_get_converted_size_comp_prefix( ulint n_fields,/*!< in: number of data fields */ ulint* extra) /*!< out: extra size */ MY_ATTRIBUTE((warn_unused_result, nonnull(1,2))); -/**********************************************************//** -Determines the size of a data tuple in ROW_FORMAT=COMPACT. + +/** Determine the size of a record in ROW_FORMAT=COMPACT. +@param[in] index record descriptor. dict_table_is_comp() + is assumed to hold, even if it doesn't +@param[in] tuple logical record +@param[out] extra extra size @return total size */ ulint rec_get_converted_size_comp( -/*========================*/ - const dict_index_t* index, /*!< in: record descriptor; - dict_table_is_comp() is - assumed to hold, even if - it does not */ - rec_comp_status_t status, /*!< in: status bits of the record */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra) /*!< out: extra size */ - MY_ATTRIBUTE((nonnull(1,3))); + const dict_index_t* index, + const dtuple_t* tuple, + ulint* extra) + MY_ATTRIBUTE((nonnull(1,2))); + /**********************************************************//** The following function returns the size of a data tuple when converted to a physical record. diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic index 41794582f37..f86643ddd62 100644 --- a/storage/innobase/include/rem0rec.ic +++ b/storage/innobase/include/rem0rec.ic @@ -67,7 +67,7 @@ most significant bytes and bits are written below less significant. 001=REC_STATUS_NODE_PTR 010=REC_STATUS_INFIMUM 011=REC_STATUS_SUPREMUM - 100=REC_STATUS_COLUMNS_ADDED + 100=REC_STATUS_INSTANT 1xx=reserved 5 bits heap number 4 8 bits heap number @@ -453,7 +453,7 @@ rec_get_n_fields( } switch (rec_get_status(rec)) { - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: case REC_STATUS_ORDINARY: return(dict_index_get_n_fields(index)); case REC_STATUS_NODE_PTR: @@ -549,19 +549,6 @@ rec_set_n_owned_new( } } -#ifdef UNIV_DEBUG -/** Check if the info bits are valid. -@param[in] bits info bits to check -@return true if valid */ -inline -bool -rec_info_bits_valid( - ulint bits) -{ - return(0 == (bits & ~(REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG))); -} -#endif /* UNIV_DEBUG */ - /******************************************************//** The following function is used to retrieve the info bits of a record. @return info bits */ @@ -575,7 +562,6 @@ rec_get_info_bits( const ulint val = rec_get_bit_field_1( rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); - ut_ad(rec_info_bits_valid(val)); return(val); } @@ -588,7 +574,6 @@ rec_set_info_bits_old( rec_t* rec, /*!< in: old-style physical record */ ulint bits) /*!< in: info bits */ { - ut_ad(rec_info_bits_valid(bits)); rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS, REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); } @@ -601,7 +586,6 @@ rec_set_info_bits_new( rec_t* rec, /*!< in/out: new-style physical record */ ulint bits) /*!< in: info bits */ { - ut_ad(rec_info_bits_valid(bits)); rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS, REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); } @@ -894,7 +878,6 @@ rec_get_nth_field_offs( if SQL null; UNIV_SQL_DEFAULT is default value */ { ulint offs; - ulint length; ut_ad(n < rec_offs_n_fields(offsets)); ut_ad(len); @@ -904,7 +887,7 @@ rec_get_nth_field_offs( offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK; } - length = rec_offs_base(offsets)[1 + n]; + ulint length = rec_offs_base(offsets)[1 + n]; if (length & REC_OFFS_SQL_NULL) { length = UNIV_SQL_NULL; @@ -1263,8 +1246,9 @@ rec_offs_data_size( ulint size; ut_ad(rec_offs_validate(NULL, NULL, offsets)); - size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)] - & REC_OFFS_MASK; + + ulint n = rec_offs_n_fields(offsets); + size = rec_offs_base(offsets)[n] & REC_OFFS_MASK; ut_ad(size < srv_page_size); return(size); } @@ -1405,24 +1389,20 @@ rec_get_converted_size( } else if (index->table->id == DICT_INDEXES_ID) { /* The column SYS_INDEXES.MERGE_THRESHOLD was instantly added in MariaDB 10.2.2 (MySQL 5.7). */ + ut_ad(!index->table->is_temporary()); ut_ad(index->n_fields == DICT_NUM_FIELDS__SYS_INDEXES); ut_ad(dtuple->n_fields == DICT_NUM_FIELDS__SYS_INDEXES || dtuple->n_fields == DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD); } else { ut_ad(dtuple->n_fields >= index->n_core_fields); - ut_ad(dtuple->n_fields <= index->n_fields); + ut_ad(dtuple->n_fields <= index->n_fields + || dtuple->is_alter_metadata()); } #endif if (dict_table_is_comp(index->table)) { - return(rec_get_converted_size_comp( - index, - static_cast<rec_comp_status_t>( - dtuple->info_bits - & REC_NEW_STATUS_MASK), - dtuple->fields, - dtuple->n_fields, NULL)); + return rec_get_converted_size_comp(index, dtuple, NULL); } data_size = dtuple_get_data_size(dtuple, 0); diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h index 630a40b0765..932accc46b0 100644 --- a/storage/innobase/include/row0row.h +++ b/storage/innobase/include/row0row.h @@ -74,6 +74,7 @@ row_get_rec_roll_ptr( #define ROW_BUILD_FOR_PURGE 1 /*!< build row for purge. */ #define ROW_BUILD_FOR_UNDO 2 /*!< build row for undo. */ #define ROW_BUILD_FOR_INSERT 3 /*!< build row for insert. */ + /*****************************************************************//** When an insert or purge to a table is performed, this function builds the entry to be inserted into or purged from an index on the table. @@ -227,6 +228,26 @@ row_rec_to_index_entry( mem_heap_t* heap) /*!< in: memory heap from which the memory needed is allocated */ MY_ATTRIBUTE((warn_unused_result)); + +/** Convert a metadata record to a data tuple. +@param[in] rec metadata record +@param[in] index clustered index after instant ALTER TABLE +@param[in] offsets rec_get_offsets(rec) +@param[out] n_ext number of externally stored fields +@param[in,out] heap memory heap for allocations +@param[in] info_bits the info_bits after an update +@param[in] pad whether to pad to index->n_fields */ +dtuple_t* +row_metadata_to_tuple( + const rec_t* rec, + const dict_index_t* index, + const ulint* offsets, + ulint* n_ext, + mem_heap_t* heap, + ulint info_bits, + bool pad) + MY_ATTRIBUTE((nonnull,warn_unused_result)); + /*******************************************************************//** Builds from a secondary index record a row reference with which we can search the clustered index record. diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic index e1a3b5f6a1a..c9db1d2a37e 100644 --- a/storage/innobase/include/row0row.ic +++ b/storage/innobase/include/row0row.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -39,16 +39,12 @@ row_get_trx_id_offset( const dict_index_t* index, /*!< in: clustered index */ const ulint* offsets)/*!< in: record offsets */ { - ulint pos; ulint offset; ulint len; - ut_ad(dict_index_is_clust(index)); ut_ad(rec_offs_validate(NULL, index, offsets)); - pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - - offset = rec_get_nth_field_offs(offsets, pos, &len); + offset = rec_get_nth_field_offs(offsets, index->db_trx_id(), &len); ut_ad(len == DATA_TRX_ID_LEN); diff --git a/storage/innobase/include/row0trunc.h b/storage/innobase/include/row0trunc.h deleted file mode 100644 index a7592f33cf7..00000000000 --- a/storage/innobase/include/row0trunc.h +++ /dev/null @@ -1,416 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0trunc.h -TRUNCATE implementation - -Created 2013-04-25 Krunal Bauskar -*******************************************************/ - -#ifndef row0trunc_h -#define row0trunc_h - -#include "row0mysql.h" -#include "dict0boot.h" -#include "fil0fil.h" -#include "srv0start.h" - -#include <vector> - -/** The information of TRUNCATE log record. -This class handles the recovery stage of TRUNCATE table. */ -class truncate_t { - -public: - /** - Constructor - - @param old_table_id old table id assigned to table before truncate - @param new_table_id new table id that will be assigned to table - after truncate - @param dir_path directory path */ - truncate_t( - table_id_t old_table_id, - table_id_t new_table_id, - const char* dir_path); - - /** - Constructor - - @param log_file_name parse the log file during recovery to populate - information related to table to truncate */ - truncate_t(const char* log_file_name); - - /** - Consturctor - - @param space_id space in which table reisde - @param name table name - @param tablespace_flags tablespace flags use for recreating tablespace - @param log_flags page format flag - @param recv_lsn lsn of redo log record. */ - truncate_t( - ulint space_id, - const char* name, - ulint tablespace_flags, - ulint log_flags, - lsn_t recv_lsn); - - /** Destructor */ - ~truncate_t(); - - /** The index information of MLOG_FILE_TRUNCATE redo record */ - struct index_t { - - /* Default copy constructor and destructor should be OK. */ - - index_t(); - - /** - Set the truncate log values for a compressed table. - @return DB_CORRUPTION or error code */ - dberr_t set(const dict_index_t* index); - - typedef std::vector<byte, ut_allocator<byte> > fields_t; - - /** Index id */ - index_id_t m_id; - - /** Index type */ - ulint m_type; - - /** Root Page Number */ - ulint m_root_page_no; - - /** New Root Page Number. - Note: This field is not persisted to TRUNCATE log but used - during truncate table fix-up for updating SYS_XXXX tables. */ - ulint m_new_root_page_no; - - /** Number of index fields */ - ulint m_n_fields; - - /** DATA_TRX_ID column position. */ - ulint m_trx_id_pos; - - /** Compressed table field meta data, encode by - page_zip_fields_encode. Empty for non-compressed tables. - Should be NUL terminated. */ - fields_t m_fields; - }; - - /** - @return the directory path, can be NULL */ - const char* get_dir_path() const - { - return(m_dir_path); - } - - /** - Register index information - - @param index index information logged as part of truncate log. */ - void add(index_t& index) - { - m_indexes.push_back(index); - } - - /** - Add table to truncate post recovery. - - @param ptr table information need to complete truncate of table. */ - static void add(truncate_t* ptr) - { - s_tables.push_back(ptr); - } - - /** - Clear registered index vector */ - void clear() - { - m_indexes.clear(); - } - - /** - @return old table id of the table to truncate */ - table_id_t old_table_id() const - { - return(m_old_table_id); - } - - /** - @return new table id of the table to truncate */ - table_id_t new_table_id() const - { - return(m_new_table_id); - } - - /** - Update root page number in SYS_XXXX tables. - - @param trx transaction object - @param table_id table id for which information needs to - be updated. - @param reserve_dict_mutex if TRUE, acquire/release - dict_sys->mutex around call to pars_sql. - @param mark_index_corrupted if true, then mark index corrupted - @return DB_SUCCESS or error code */ - dberr_t update_root_page_no( - trx_t* trx, - table_id_t table_id, - ibool reserve_dict_mutex, - bool mark_index_corrupted) const; - - /** Create an index for a table. - @param[in] table_name table name, for which to create - the index - @param[in,out] space tablespace - @param[in] index_type type of index to truncate - @param[in] index_id id of index to truncate - @param[in] btr_redo_create_info control info for ::btr_create() - @param[in,out] mtr mini-transaction covering the - create index - @return root page no or FIL_NULL on failure */ - inline ulint create_index( - const char* table_name, - fil_space_t* space, - ulint index_type, - index_id_t index_id, - const btr_create_t& btr_redo_create_info, - mtr_t* mtr) const; - - /** Create the indexes for a table - @param[in] table_name table name, for which to create the - indexes - @param[in,out] space tablespace - @param[in] format_flags page format flags - @return DB_SUCCESS or error code. */ - inline dberr_t create_indexes( - const char* table_name, - fil_space_t* space, - ulint format_flags); - - /** Check if index has been modified since TRUNCATE log snapshot - was recorded. - @param[in] space tablespace - @param[in] root_page_no index root page number - @return true if modified else false */ - inline bool is_index_modified_since_logged( - const fil_space_t* space, - ulint root_page_no) const; - - /** Drop indexes for a table. - @param[in,out] space tablespace - @return DB_SUCCESS or error code. */ - void drop_indexes(fil_space_t* space) const; - - /** - Parses log record during recovery - @param start_ptr buffer containing log body to parse - @param end_ptr buffer end - - @return DB_SUCCESS or error code */ - dberr_t parse( - byte* start_ptr, - const byte* end_ptr); - - /** Parse MLOG_TRUNCATE log record from REDO log file during recovery. - @param[in,out] start_ptr buffer containing log body to parse - @param[in] end_ptr buffer end - @param[in] space_id tablespace identifier - @return parsed upto or NULL. */ - static byte* parse_redo_entry( - byte* start_ptr, - const byte* end_ptr, - ulint space_id); - - /** - Write a log record for truncating a single-table tablespace. - - @param start_ptr buffer to write log record - @param end_ptr buffer end - @param space_id space id - @param tablename the table name in the usual - databasename/tablename format of InnoDB - @param flags tablespace flags - @param format_flags page format - @param lsn lsn while logging */ - dberr_t write( - byte* start_ptr, - byte* end_ptr, - ulint space_id, - const char* tablename, - ulint flags, - ulint format_flags, - lsn_t lsn) const; - - /** - @return number of indexes parsed from the truncate log record */ - size_t indexes() const; - - /** - Truncate a single-table tablespace. The tablespace must be cached - in the memory cache. - - Note: This is defined in fil0fil.cc because it needs to access some - types that are local to that file. - - @param space_id space id - @param dir_path directory path - @param tablename the table name in the usual - databasename/tablename format of InnoDB - @param flags tablespace flags - @param default_size if true, truncate to default size if tablespace - is being newly re-initialized. - @return DB_SUCCESS or error */ - static dberr_t truncate( - ulint space_id, - const char* dir_path, - const char* tablename, - ulint flags, - bool default_size); - - /** - Fix the table truncate by applying information parsed from TRUNCATE log. - Fix-up includes re-creating table (drop and re-create indexes) - @return error code or DB_SUCCESS */ - static dberr_t fixup_tables_in_system_tablespace(); - - /** - Fix the table truncate by applying information parsed from TRUNCATE log. - Fix-up includes re-creating tablespace. - @return error code or DB_SUCCESS */ - static dberr_t fixup_tables_in_non_system_tablespace(); - - /** - Check whether a tablespace was truncated during recovery - @param space_id tablespace id to check - @return true if the tablespace was truncated */ - static bool is_tablespace_truncated(ulint space_id); - - /** Was tablespace truncated (on crash before checkpoint). - If the MLOG_TRUNCATE redo-record is still available then tablespace - was truncated and checkpoint is yet to happen. - @param[in] space_id tablespace id to check. - @return true if tablespace was truncated. */ - static bool was_tablespace_truncated(ulint space_id); - - /** Get the lsn associated with space. - @param[in] space_id tablespace id to check. - @return associated lsn. */ - static lsn_t get_truncated_tablespace_init_lsn(ulint space_id); - -private: - typedef std::vector<index_t, ut_allocator<index_t> > indexes_t; - - /** Space ID of tablespace */ - ulint m_space_id; - - /** ID of table that is being truncated. */ - table_id_t m_old_table_id; - - /** New ID that will be assigned to table on truncation. */ - table_id_t m_new_table_id; - - /** Data dir path of tablespace */ - char* m_dir_path; - - /** Table name */ - char* m_tablename; - - /** Tablespace Flags */ - ulint m_tablespace_flags; - - /** Format flags (log flags; stored in page-no field of header) */ - ulint m_format_flags; - - /** Index meta-data */ - indexes_t m_indexes; - - /** LSN of TRUNCATE log record. */ - lsn_t m_log_lsn; - - /** Log file name. */ - char* m_log_file_name; - - /** Encryption information of the table */ - fil_encryption_t m_encryption; - uint32_t m_key_id; - - /** Vector of tables to truncate. */ - typedef std::vector<truncate_t*, ut_allocator<truncate_t*> > - tables_t; - - /** Information about tables to truncate post recovery */ - static tables_t s_tables; - - /** Information about truncated table - This is case when truncate is complete but checkpoint hasn't. */ - typedef std::map<ulint, lsn_t> truncated_tables_t; - static truncated_tables_t s_truncated_tables; - -public: - /** If true then fix-up of table is active and so while creating - index instead of grabbing information from dict_index_t, grab it - from parsed truncate log record. */ - static bool s_fix_up_active; -}; - -/** -Parse truncate log file. */ -class TruncateLogParser { - -public: - - /** - Scan and Parse truncate log files. - - @param dir_path look for log directory in following path - @return DB_SUCCESS or error code. */ - static dberr_t scan_and_parse( - const char* dir_path); - -private: - typedef std::vector<char*, ut_allocator<char*> > - trunc_log_files_t; - -private: - /** - Scan to find out truncate log file from the given directory path. - - @param dir_path look for log directory in following path. - @param log_files cache to hold truncate log file name found. - @return DB_SUCCESS or error code. */ - static dberr_t scan( - const char* dir_path, - trunc_log_files_t& log_files); - - /** - Parse the log file and populate table to truncate information. - (Add this table to truncate information to central vector that is then - used by truncate fix-up routine to fix-up truncate action of the table.) - - @param log_file_name log file to parse - @return DB_SUCCESS or error code. */ - static dberr_t parse( - const char* log_file_name); -}; - -#endif /* row0trunc_h */ diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h index 5ac2c7c5ee0..6aa7ebaa339 100644 --- a/storage/innobase/include/row0undo.h +++ b/storage/innobase/include/row0undo.h @@ -82,17 +82,20 @@ that index record. */ enum undo_exec { UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next undo log record */ - UNDO_NODE_INSERT, /*!< undo a fresh insert of a - row to a table */ - UNDO_NODE_MODIFY /*!< undo a modify operation - (DELETE or UPDATE) on a row - of a table */ + /** rollback an insert into persistent table */ + UNDO_INSERT_PERSISTENT, + /** rollback an update (or delete) in a persistent table */ + UNDO_UPDATE_PERSISTENT, + /** rollback an insert into temporary table */ + UNDO_INSERT_TEMPORARY, + /** rollback an update (or delete) in a temporary table */ + UNDO_UPDATE_TEMPORARY, }; /** Undo node structure */ struct undo_node_t{ que_common_t common; /*!< node type: QUE_NODE_UNDO */ - enum undo_exec state; /*!< node execution state */ + undo_exec state; /*!< rollback execution state */ trx_t* trx; /*!< trx for which undo is done */ roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */ trx_undo_rec_t* undo_rec;/*!< undo log record */ diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h index 742f897f3ec..f21b3416ef6 100644 --- a/storage/innobase/include/row0upd.h +++ b/storage/innobase/include/row0upd.h @@ -100,19 +100,6 @@ upd_get_field_by_field_no( bool is_virtual) /*!< in: if it is a virtual column */ MY_ATTRIBUTE((warn_unused_result)); /*********************************************************************//** -Writes into the redo log the values of trx id and roll ptr and enough info -to determine their positions within a clustered index record. -@return new pointer to mlog */ -byte* -row_upd_write_sys_vals_to_log( -/*==========================*/ - dict_index_t* index, /*!< in: clustered index */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */ - byte* log_ptr,/*!< pointer to a buffer of size > 20 opened - in mlog */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************************//** Updates the trx id and roll ptr field in a clustered index record when a row is updated or marked deleted. */ UNIV_INLINE @@ -127,18 +114,6 @@ row_upd_rec_sys_fields( const trx_t* trx, /*!< in: transaction */ roll_ptr_t roll_ptr);/*!< in: DB_ROLL_PTR to the undo log */ /*********************************************************************//** -Sets the trx id or roll ptr field of a clustered index entry. */ -void -row_upd_index_entry_sys_field( -/*==========================*/ - dtuple_t* entry, /*!< in/out: index entry, where the memory - buffers for sys fields are already allocated: - the function just copies the new values to - them */ - dict_index_t* index, /*!< in: clustered index */ - ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */ - ib_uint64_t val); /*!< in: value to write */ -/*********************************************************************//** Creates an update node for a query graph. @return own: update node */ upd_node_t* @@ -489,6 +464,14 @@ struct upd_t{ return false; } + /** @return whether this is for a hidden metadata record + for instant ALTER TABLE */ + bool is_metadata() const { return dtuple_t::is_metadata(info_bits); } + /** @return whether this is for a hidden metadata record + for instant ALTER TABLE (not only ADD COLUMN) */ + bool is_alter_metadata() const + { return dtuple_t::is_alter_metadata(info_bits); } + #ifdef UNIV_DEBUG bool validate() const { @@ -502,7 +485,6 @@ struct upd_t{ return(true); } #endif // UNIV_DEBUG - }; /** Kinds of update operation */ diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic index 5e43a272388..403c39250cb 100644 --- a/storage/innobase/include/row0upd.ic +++ b/storage/innobase/include/row0upd.ic @@ -167,13 +167,13 @@ row_upd_rec_sys_fields( const trx_t* trx, /*!< in: transaction */ roll_ptr_t roll_ptr)/*!< in: DB_ROLL_PTR to the undo log */ { - ut_ad(dict_index_is_clust(index)); + ut_ad(index->is_primary()); ut_ad(rec_offs_validate(rec, index, offsets)); - if (page_zip) { - ulint pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); + if (UNIV_LIKELY_NULL(page_zip)) { page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets, - pos, trx->id, roll_ptr); + index->db_trx_id(), + trx->id, roll_ptr); } else { ulint offset = index->trx_id_offset; diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index 069ab5cf93a..474634ef5e6 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -177,7 +177,6 @@ enum monitor_id_t { MONITOR_OVLD_INDEX_PAGES_WRITTEN, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN, MONITOR_OVLD_PAGES_READ, - MONITOR_OVLD_PAGES0_READ, MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS, MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED, MONITOR_OVLD_BYTE_READ, @@ -300,7 +299,6 @@ enum monitor_id_t { MONITOR_TRX_COMMIT_UNDO, MONITOR_TRX_ROLLBACK, MONITOR_TRX_ROLLBACK_SAVEPOINT, - MONITOR_TRX_ROLLBACK_ACTIVE, MONITOR_TRX_ACTIVE, MONITOR_RSEG_HISTORY_LEN, MONITOR_NUM_UNDO_SLOT_USED, diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index c6873ca8938..451959a5ab8 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -176,9 +176,6 @@ struct srv_stats_t /** Number of times prefix optimization avoided triggering cluster lookup */ ulint_ctr_64_t n_sec_rec_cluster_reads_avoided; - /** Number of times page 0 is read from tablespace */ - ulint_ctr_64_t page0_read; - /** Number of encryption_get_latest_key_version calls */ ulint_ctr_64_t n_key_requests; @@ -900,23 +897,6 @@ srv_purge_wakeup(); /** Shut down the purge threads. */ void srv_purge_shutdown(); -/** Check if tablespace is being truncated. -(Ignore system-tablespace as we don't re-create the tablespace -and so some of the action that are suppressed by this function -for independent tablespace are not applicable to system-tablespace). -@param space_id space_id to check for truncate action -@return true if being truncated, false if not being - truncated or tablespace is system-tablespace. */ -bool -srv_is_tablespace_truncated(ulint space_id); - -/** Check if tablespace was truncated. -@param[in] space space object to check for truncate action -@return true if tablespace was truncated and we still have an active -MLOG_TRUNCATE REDO log record. */ -bool -srv_was_tablespace_truncated(const fil_space_t* space); - #ifdef UNIV_DEBUG /** Disables master thread. It's used by: SET GLOBAL innodb_master_thread_disabled_debug = 1 (0). @@ -971,7 +951,6 @@ struct export_var_t{ ulint innodb_page_size; /*!< srv_page_size */ ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */ ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read*/ - ulint innodb_page0_read; /*!< srv_stats.page0_read */ ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */ ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */ ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */ diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic index cd1d8e27625..d8f24cb4279 100644 --- a/storage/innobase/include/sync0arr.ic +++ b/storage/innobase/include/sync0arr.ic @@ -44,8 +44,7 @@ sync_array_get() return(sync_wait_array[0]); } - return(sync_wait_array[default_indexer_t<>::get_rnd_index() - % sync_array_size]); + return(sync_wait_array[get_rnd_value() % sync_array_size]); } /******************************************************************//** diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index 43d771c646b..6d1649c0a9c 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -135,202 +135,6 @@ private: TrxUndoRsegs::const_iterator m_iter; }; -/* Namespace to hold all the related functions and variables need for truncate -of undo tablespace. */ -namespace undo { - - typedef std::vector<ulint> undo_spaces_t; - typedef std::vector<trx_rseg_t*> rseg_for_trunc_t; - - /** Mark completion of undo truncate action by writing magic number to - the log file and then removing it from the disk. - If we are going to remove it from disk then why write magic number ? - This is to safeguard from unlink (file-system) anomalies that will keep - the link to the file even after unlink action is successfull and - ref-count = 0. - @param[in] space_id id of the undo tablespace to truncate.*/ - void done(ulint space_id); - - /** Check if TRUNCATE_DDL_LOG file exist. - @param[in] space_id id of the undo tablespace. - @return true if exist else false. */ - bool is_log_present(ulint space_id); - - /** Track UNDO tablespace mark for truncate. */ - class Truncate { - public: - void create() - { - m_undo_for_trunc = ULINT_UNDEFINED; - m_scan_start = 1; - m_purge_rseg_truncate_frequency = - ulint(srv_purge_rseg_truncate_frequency); - } - - /** Clear the cached rollback segment. Normally done - when purge is about to shutdown. */ - void clear() - { - reset(); - rseg_for_trunc_t temp; - m_rseg_for_trunc.swap(temp); - } - - /** Is tablespace selected for truncate. - @return true if undo tablespace is marked for truncate */ - bool is_marked() const - { - return(!(m_undo_for_trunc == ULINT_UNDEFINED)); - } - - /** Mark the tablespace for truncate. - @param[in] undo_id tablespace for truncate. */ - void mark(ulint undo_id) - { - m_undo_for_trunc = undo_id; - - m_scan_start = (undo_id + 1) - % (srv_undo_tablespaces_active + 1); - if (m_scan_start == 0) { - /* Note: UNDO tablespace ids starts from 1. */ - m_scan_start = 1; - } - - /* We found an UNDO-tablespace to truncate so set the - local purge rseg truncate frequency to 1. This will help - accelerate the purge action and in turn truncate. */ - m_purge_rseg_truncate_frequency = 1; - } - - /** Get the tablespace marked for truncate. - @return tablespace id marked for truncate. */ - ulint get_marked_space_id() const - { - return(m_undo_for_trunc); - } - - /** Add rseg to truncate vector. - @param[in,out] rseg rseg for truncate */ - void add_rseg_to_trunc(trx_rseg_t* rseg) - { - m_rseg_for_trunc.push_back(rseg); - } - - /** Get number of rsegs registered for truncate. - @return return number of rseg that belongs to tablespace mark - for truncate. */ - ulint rsegs_size() const - { - return(m_rseg_for_trunc.size()); - } - - /** Get ith registered rseg. - @param[in] id index of rseg to get. - @return reference to registered rseg. */ - trx_rseg_t* get_ith_rseg(ulint id) - { - ut_ad(id < m_rseg_for_trunc.size()); - return(m_rseg_for_trunc.at(id)); - } - - /** Reset for next rseg truncate. */ - void reset() - { - m_undo_for_trunc = ULINT_UNDEFINED; - m_rseg_for_trunc.clear(); - - /* Sync with global value as we are done with - truncate now. */ - m_purge_rseg_truncate_frequency = static_cast<ulint>( - srv_purge_rseg_truncate_frequency); - } - - /** Get the tablespace id to start scanning from. - @return id of UNDO tablespace to start scanning from. */ - ulint get_scan_start() const - { - return(m_scan_start); - } - - /** Check if the tablespace needs fix-up (based on presence of - DDL truncate log) - @param space_id space id of the undo tablespace to check - @return true if fix up is needed else false */ - bool needs_fix_up(ulint space_id) const - { - return(is_log_present(space_id)); - } - - /** Add undo tablespace to truncate vector. - @param[in] space_id space id of tablespace to - truncate */ - static void add_space_to_trunc_list(ulint space_id) - { - s_spaces_to_truncate.push_back(space_id); - } - - /** Clear the truncate vector. */ - static void clear_trunc_list() - { - s_spaces_to_truncate.clear(); - } - - /** Is tablespace marked for truncate. - @param[in] space_id space id to check - @return true if marked for truncate, else false. */ - static bool is_tablespace_truncated(ulint space_id) - { - return(std::find(s_spaces_to_truncate.begin(), - s_spaces_to_truncate.end(), space_id) - != s_spaces_to_truncate.end()); - } - - /** Was a tablespace truncated at startup - @param[in] space_id space id to check - @return whether space_id was truncated at startup */ - static bool was_tablespace_truncated(ulint space_id) - { - return(std::find(s_fix_up_spaces.begin(), - s_fix_up_spaces.end(), - space_id) - != s_fix_up_spaces.end()); - } - - /** Get local rseg purge truncate frequency - @return rseg purge truncate frequency. */ - ulint get_rseg_truncate_frequency() const - { - return(m_purge_rseg_truncate_frequency); - } - - private: - /** UNDO tablespace is mark for truncate. */ - ulint m_undo_for_trunc; - - /** rseg that resides in UNDO tablespace is marked for - truncate. */ - rseg_for_trunc_t m_rseg_for_trunc; - - /** Start scanning for UNDO tablespace from this space_id. - This is to avoid bias selection of one tablespace always. */ - ulint m_scan_start; - - /** Rollback segment(s) purge frequency. This is local - value maintained along with global value. It is set to global - value on start but when tablespace is marked for truncate it - is updated to 1 and then minimum value among 2 is used by - purge action. */ - ulint m_purge_rseg_truncate_frequency; - - /** List of UNDO tablespace(s) to truncate. */ - static undo_spaces_t s_spaces_to_truncate; - public: - /** Undo tablespaces that were truncated at startup */ - static undo_spaces_t s_fix_up_spaces; - }; /* class Truncate */ - -}; /* namespace undo */ - /** The control structure used in the purge operation */ class purge_sys_t { @@ -412,9 +216,14 @@ public: by the pq_mutex */ PQMutex pq_mutex; /*!< Mutex protecting purge_queue */ - undo::Truncate undo_trunc; /*!< Track UNDO tablespace marked - for truncate. */ - + /** Undo tablespace file truncation (only accessed by the + srv_purge_coordinator_thread) */ + struct { + /** The undo tablespace that is currently being truncated */ + fil_space_t* current; + /** The undo tablespace that was last truncated */ + fil_space_t* last; + } truncate; /** Constructor. diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h index b74b56eae98..93432cde74e 100644 --- a/storage/innobase/include/trx0roll.h +++ b/storage/innobase/include/trx0roll.h @@ -51,16 +51,6 @@ trx_savept_take( /*============*/ trx_t* trx); /*!< in: transaction */ -/** Get the last undo log record of a transaction (for rollback). -@param[in,out] trx transaction -@param[out] roll_ptr DB_ROLL_PTR to the undo record -@param[in,out] heap memory heap for allocation -@return undo log record copied to heap -@retval NULL if none left or the roll_limit (savepoint) was reached */ -trx_undo_rec_t* -trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - /** Report progress when rolling back a row of a recovered transaction. */ void trx_roll_report_progress(); /*******************************************************************//** diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 9fb65c19911..2c6b610f930 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -478,6 +478,7 @@ Check transaction state */ ut_ad(!(t)->read_view.is_open()); \ ut_ad((t)->lock.wait_thr == NULL); \ ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0); \ + ut_ad(UT_LIST_GET_LEN((t)->lock.evicted_tables) == 0); \ ut_ad((t)->dict_operation == TRX_DICT_OP_NONE); \ } while(0) @@ -602,6 +603,9 @@ struct trx_lock_t { lock_list table_locks; /*!< All table locks requested by this transaction, including AUTOINC locks */ + /** List of pending trx_t::evict_table() */ + UT_LIST_BASE_NODE_T(dict_table_t) evicted_tables; + bool cancel; /*!< true if the transaction is being rolled back either via deadlock detection or due to lock timeout. The @@ -1108,6 +1112,10 @@ public: return(assign_temp_rseg()); } + /** Evict a table definition due to the rollback of ALTER TABLE. + @param[in] table_id table identifier */ + void evict_table(table_id_t table_id); + bool is_referenced() { diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index 60b0517db0d..f3c52fff7b5 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -79,27 +79,22 @@ trx_undo_trx_id_is_insert( /*======================*/ const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */ MY_ATTRIBUTE((warn_unused_result)); -/*****************************************************************//** -Writes a roll ptr to an index page. In case that the size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_roll_ptr( -/*===============*/ - byte* ptr, /*!< in: pointer to memory where - written */ - roll_ptr_t roll_ptr); /*!< in: roll ptr */ -/*****************************************************************//** -Reads a roll ptr from an index page. In case that the roll ptr size -changes in some future version, this function should be used instead of -mach_read_... +/** Write DB_ROLL_PTR. +@param[out] ptr buffer +@param[in] roll_ptr DB_ROLL_PTR value */ +inline void trx_write_roll_ptr(byte* ptr, roll_ptr_t roll_ptr) +{ + compile_time_assert(DATA_ROLL_PTR_LEN == 7); + mach_write_to_7(ptr, roll_ptr); +} +/** Read DB_ROLL_PTR. +@param[in] ptr buffer @return roll ptr */ -UNIV_INLINE -roll_ptr_t -trx_read_roll_ptr( -/*==============*/ - const byte* ptr); /*!< in: pointer to memory from where to read */ +inline roll_ptr_t trx_read_roll_ptr(const byte* ptr) +{ + compile_time_assert(DATA_ROLL_PTR_LEN == 7); + return mach_read_from_7(ptr); +} /** Gets an undo log page and x-latches it. @param[in] page_id page id @@ -185,9 +180,7 @@ trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr) @param[in,out] undo undo log @param[in] limit all undo logs after this limit will be discarded @param[in] is_temp whether this is temporary undo log */ -void -trx_undo_truncate_end(trx_undo_t* undo, undo_no_t limit, bool is_temp) - MY_ATTRIBUTE((nonnull)); +void trx_undo_truncate_end(trx_undo_t& undo, undo_no_t limit, bool is_temp); /** Truncate the head of an undo log. NOTE that only whole pages are freed; the header page is not diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic index ac8af61be09..f6106ffddfa 100644 --- a/storage/innobase/include/trx0undo.ic +++ b/storage/innobase/include/trx0undo.ic @@ -103,37 +103,6 @@ trx_undo_trx_id_is_insert( return bool(trx_id[DATA_TRX_ID_LEN] >> 7); } -/*****************************************************************//** -Writes a roll ptr to an index page. In case that the size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_roll_ptr( -/*===============*/ - byte* ptr, /*!< in: pointer to memory where - written */ - roll_ptr_t roll_ptr) /*!< in: roll ptr */ -{ - compile_time_assert(DATA_ROLL_PTR_LEN == 7); - mach_write_to_7(ptr, roll_ptr); -} - -/*****************************************************************//** -Reads a roll ptr from an index page. In case that the roll ptr size -changes in some future version, this function should be used instead of -mach_read_... -@return roll ptr */ -UNIV_INLINE -roll_ptr_t -trx_read_roll_ptr( -/*==============*/ - const byte* ptr) /*!< in: pointer to memory from where to read */ -{ - compile_time_assert(DATA_ROLL_PTR_LEN == 7); - return(mach_read_from_7(ptr)); -} - /** Gets an undo log page and x-latches it. @param[in] page_id page id @param[in,out] mtr mini-transaction diff --git a/storage/innobase/include/ut0counter.h b/storage/innobase/include/ut0counter.h index ab694386099..653515a3d7d 100644 --- a/storage/innobase/include/ut0counter.h +++ b/storage/innobase/include/ut0counter.h @@ -29,8 +29,8 @@ Created 2012/04/12 by Sunny Bains #define ut0counter_h #include "os0thread.h" +#include <atomic> #include "my_rdtsc.h" -#include "my_atomic.h" /** CPU cache line size */ #ifdef CPU_LEVEL1_DCACHE_LINESIZE @@ -42,120 +42,79 @@ Created 2012/04/12 by Sunny Bains /** Default number of slots to use in ib_counter_t */ #define IB_N_SLOTS 64 -/** Get the offset into the counter array. */ -template <typename Type, int N> -struct generic_indexer_t { - /** @return offset within m_counter */ - static size_t offset(size_t index) UNIV_NOTHROW - { - return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type))); - } -}; +/** Use the result of my_timer_cycles(), which mainly uses RDTSC for cycles +as a random value. See the comments for my_timer_cycles() */ +/** @return result from RDTSC or similar functions. */ +static inline size_t +get_rnd_value() +{ + size_t c = static_cast<size_t>(my_timer_cycles()); + + if (c != 0) { + return c; + } -/** Use the result of my_timer_cycles(), which mainly uses RDTSC for cycles, -to index into the counter array. See the comments for my_timer_cycles() */ -template <typename Type=ulint, int N=1> -struct counter_indexer_t : public generic_indexer_t<Type, N> { - /** @return result from RDTSC or similar functions. */ - static size_t get_rnd_index() UNIV_NOTHROW - { - size_t c = static_cast<size_t>(my_timer_cycles()); - - if (c != 0) { - return(c); - } else { - /* We may go here if my_timer_cycles() returns 0, - so we have to have the plan B for the counter. */ + /* We may go here if my_timer_cycles() returns 0, + so we have to have the plan B for the counter. */ #if !defined(_WIN32) - return(size_t(os_thread_get_curr_id())); + return (size_t)os_thread_get_curr_id(); #else - LARGE_INTEGER cnt; - QueryPerformanceCounter(&cnt); + LARGE_INTEGER cnt; + QueryPerformanceCounter(&cnt); - return(static_cast<size_t>(cnt.QuadPart)); + return static_cast<size_t>(cnt.QuadPart); #endif /* !_WIN32 */ - } - } +} - /** @return a random offset to the array */ - static size_t get_rnd_offset() UNIV_NOTHROW - { - return(generic_indexer_t<Type, N>::offset(get_rnd_index())); - } -}; - -#define default_indexer_t counter_indexer_t - -/** Class for using fuzzy counters. The counter is relaxed atomic +/** Class for using fuzzy counters. The counter is multi-instance relaxed atomic so the results are not guaranteed to be 100% accurate but close enough. Creates an array of counters and separates each element by the CACHE_LINE_SIZE bytes */ -template < - typename Type, - int N = IB_N_SLOTS, - template<typename, int> class Indexer = default_indexer_t> -struct MY_ALIGNED(CACHE_LINE_SIZE) ib_counter_t -{ +template <typename Type, int N = IB_N_SLOTS> +struct ib_counter_t { /** Increment the counter by 1. */ - void inc() UNIV_NOTHROW { add(1); } + void inc() { add(1); } /** Increment the counter by 1. @param[in] index a reasonably thread-unique identifier */ - void inc(size_t index) UNIV_NOTHROW { add(index, 1); } + void inc(size_t index) { add(index, 1); } /** Add to the counter. @param[in] n amount to be added */ - void add(Type n) UNIV_NOTHROW { add(m_policy.get_rnd_offset(), n); } + void add(Type n) { add(get_rnd_value(), n); } /** Add to the counter. @param[in] index a reasonably thread-unique identifier @param[in] n amount to be added */ - void add(size_t index, Type n) UNIV_NOTHROW { - size_t i = m_policy.offset(index); - - ut_ad(i < UT_ARR_SIZE(m_counter)); - - if (sizeof(Type) == 8) { - my_atomic_add64_explicit( - reinterpret_cast<int64*>(&m_counter[i]), - static_cast<int64>(n), MY_MEMORY_ORDER_RELAXED); - } else if (sizeof(Type) == 4) { - my_atomic_add32_explicit( - reinterpret_cast<int32*>(&m_counter[i]), - static_cast<int32>(n), MY_MEMORY_ORDER_RELAXED); - } - compile_time_assert(sizeof(Type) == 8 || sizeof(Type) == 4); + void add(size_t index, Type n) { + index = index % N; + + ut_ad(index < UT_ARR_SIZE(m_counter)); + + m_counter[index].value.fetch_add(n, std::memory_order_relaxed); } - /* @return total value - not 100% accurate, since it is relaxed atomic. */ - operator Type() const UNIV_NOTHROW { + /* @return total value - not 100% accurate, since it is relaxed atomic*/ + operator Type() const { Type total = 0; - for (size_t i = 0; i < N; ++i) { - if (sizeof(Type) == 8) { - total += static_cast< - Type>(my_atomic_load64_explicit( - reinterpret_cast<int64*>(const_cast<Type*>( - &m_counter[m_policy.offset(i)])), - MY_MEMORY_ORDER_RELAXED)); - } else if (sizeof(Type) == 4) { - total += static_cast< - Type>(my_atomic_load32_explicit( - reinterpret_cast<int32*>(const_cast<Type*>( - &m_counter[m_policy.offset(i)])), - MY_MEMORY_ORDER_RELAXED)); - } + for (const auto &counter : m_counter) { + total += counter.value.load(std::memory_order_relaxed); } return(total); } private: - /** Indexer into the array */ - Indexer<Type, N>m_policy; - - /** Slot 0 is unused. */ - Type m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))]; + /** Atomic which occupies whole CPU cache line */ + union ib_counter_element_t { + std::atomic<Type> value; + byte padding[CACHE_LINE_SIZE]; + }; + static_assert(sizeof(ib_counter_element_t) == CACHE_LINE_SIZE, ""); + + /** Array of counter elements */ + MY_ALIGNED(CACHE_LINE_SIZE) ib_counter_element_t m_counter[N]; }; #endif /* ut0counter_h */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index c8707955a5d..51d6dedf512 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -333,7 +333,7 @@ lock_report_trx_id_insanity( trx_id_t max_trx_id) /*!< in: trx_sys.get_max_trx_id() */ { ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); ib::error() << "Transaction id " << trx_id @@ -356,7 +356,7 @@ lock_check_trx_id_sanity( const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */ { ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); trx_id_t max_trx_id = trx_sys.get_max_trx_id(); ut_ad(max_trx_id || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN); @@ -385,7 +385,7 @@ lock_clust_rec_cons_read_sees( ut_ad(dict_index_is_clust(index)); ut_ad(page_rec_is_user_rec(rec)); ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); /* Temp-tables are not shared across connections and multiple transactions from different connections cannot simultaneously @@ -424,7 +424,7 @@ lock_sec_rec_cons_read_sees( { ut_ad(page_rec_is_user_rec(rec)); ut_ad(!index->is_primary()); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); /* NOTE that we might call this function while holding the search system latch. */ @@ -1218,7 +1218,7 @@ lock_sec_rec_some_has_impl( ut_ad(!dict_index_is_clust(index)); ut_ad(page_rec_is_user_rec(rec)); ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); max_trx_id = page_get_max_trx_id(page); @@ -4253,6 +4253,7 @@ lock_check_dict_lock( const lock_t* lock) /*!< in: lock to check */ { if (lock_get_type_low(lock) == LOCK_REC) { + ut_ad(!lock->index->table->is_temporary()); /* Check if the transcation locked a record in a system table in X mode. It should have set @@ -4266,9 +4267,8 @@ lock_check_dict_lock( } else { ut_ad(lock_get_type_low(lock) & LOCK_TABLE); - const dict_table_t* table; - - table = lock->un_member.tab_lock.table; + const dict_table_t* table = lock->un_member.tab_lock.table; + ut_ad(!table->is_temporary()); /* Check if the transcation locked a system table in IX mode. It should have set the dict_op code @@ -5320,7 +5320,7 @@ lock_rec_insert_check_and_lock( trx_t* trx = thr_get_trx(thr); const rec_t* next_rec = page_rec_get_next_const(rec); ulint heap_no = page_rec_get_heap_no(next_rec); - ut_ad(!rec_is_metadata(next_rec, index)); + ut_ad(!rec_is_metadata(next_rec, *index)); lock_mutex_enter(); /* Because this code is invoked for a running transaction by @@ -5448,7 +5448,7 @@ lock_rec_convert_impl_to_expl_for_trx( { ut_ad(trx->is_referenced()); ut_ad(page_rec_is_leaf(rec)); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx"); @@ -5572,7 +5572,7 @@ lock_rec_convert_impl_to_expl( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); ut_ad(page_rec_is_leaf(rec)); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); if (dict_index_is_clust(index)) { trx_id_t trx_id; @@ -5649,7 +5649,7 @@ lock_clust_rec_modify_check_and_lock( return(DB_SUCCESS); } - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); ut_ad(!index->table->is_temporary()); heap_no = rec_offs_comp(offsets) @@ -5705,7 +5705,7 @@ lock_sec_rec_modify_check_and_lock( ut_ad(block->frame == page_align(rec)); ut_ad(mtr->is_named_space(index->table->space)); ut_ad(page_rec_is_leaf(rec)); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); if (flags & BTR_NO_LOCKING_FLAG) { @@ -5799,7 +5799,7 @@ lock_sec_rec_read_check_and_lock( return(DB_SUCCESS); } - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); heap_no = page_rec_get_heap_no(rec); /* Some transaction may have an implicit x-lock on the record only @@ -5861,7 +5861,7 @@ lock_clust_rec_read_check_and_lock( || gap_mode == LOCK_REC_NOT_GAP); ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(page_rec_is_leaf(rec)); - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); if ((flags & BTR_NO_LOCKING_FLAG) || srv_read_only_mode @@ -6132,10 +6132,8 @@ lock_get_table_id( /*==============*/ const lock_t* lock) /*!< in: lock */ { - dict_table_t* table; - - table = lock_get_table(lock); - + dict_table_t* table = lock_get_table(lock); + ut_ad(!table->is_temporary()); return(table->id); } diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc index dff9661c6eb..7ad39da29ec 100644 --- a/storage/innobase/log/log0crypt.cc +++ b/storage/innobase/log/log0crypt.cc @@ -82,19 +82,62 @@ log_block_get_start_lsn( return start_lsn; } +/** Generate crypt key from crypt msg. +@param[in,out] info encryption key +@param[in] upgrade whether to use the key in MariaDB 10.1 format +@return whether the operation was successful */ +static bool init_crypt_key(crypt_info_t* info, bool upgrade = false) +{ + byte mysqld_key[MY_AES_MAX_KEY_LENGTH]; + uint keylen = sizeof mysqld_key; + + compile_time_assert(16 == sizeof info->crypt_key); + + if (uint rc = encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY, + info->key_version, mysqld_key, + &keylen)) { + ib::error() + << "Obtaining redo log encryption key version " + << info->key_version << " failed (" << rc + << "). Maybe the key or the required encryption " + "key management plugin was not found."; + return false; + } + + if (upgrade) { + while (keylen < sizeof mysqld_key) { + mysqld_key[keylen++] = 0; + } + } + + uint dst_len; + int err= my_aes_crypt(MY_AES_ECB, + ENCRYPTION_FLAG_NOPAD | ENCRYPTION_FLAG_ENCRYPT, + info->crypt_msg.bytes, sizeof info->crypt_msg, + info->crypt_key.bytes, &dst_len, + mysqld_key, keylen, NULL, 0); + + if (err != MY_AES_OK || dst_len != MY_AES_BLOCK_SIZE) { + ib::error() << "Getting redo log crypto key failed: err = " + << err << ", len = " << dst_len; + return false; + } + + return true; +} + /** Encrypt or decrypt log blocks. @param[in,out] buf log blocks to encrypt or decrypt @param[in] lsn log sequence number of the start of the buffer @param[in] size size of the buffer, in bytes -@param[in] decrypt whether to decrypt instead of encrypting */ -UNIV_INTERN -void -log_crypt(byte* buf, lsn_t lsn, ulint size, bool decrypt) +@param[in] op whether to decrypt, encrypt, or rotate key and encrypt +@return whether the operation succeeded (encrypt always does) */ +bool log_crypt(byte* buf, lsn_t lsn, ulint size, log_crypt_t op) { ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_ad(ulint(buf) % OS_FILE_LOG_BLOCK_SIZE == 0); ut_a(info.key_version); - uint dst_len; uint32_t aes_ctr_iv[MY_AES_BLOCK_SIZE / sizeof(uint32_t)]; compile_time_assert(sizeof(uint32_t) == 4); @@ -103,7 +146,8 @@ log_crypt(byte* buf, lsn_t lsn, ulint size, bool decrypt) for (const byte* const end = buf + size; buf != end; buf += OS_FILE_LOG_BLOCK_SIZE, lsn += OS_FILE_LOG_BLOCK_SIZE) { - uint32_t dst[(OS_FILE_LOG_BLOCK_SIZE - LOG_CRYPT_HDR_SIZE) + uint32_t dst[(OS_FILE_LOG_BLOCK_SIZE - LOG_CRYPT_HDR_SIZE + - LOG_BLOCK_CHECKSUM) / sizeof(uint32_t)]; /* The log block number is not encrypted. */ @@ -123,64 +167,61 @@ log_crypt(byte* buf, lsn_t lsn, ulint size, bool decrypt) ut_ad(log_block_get_start_lsn(lsn, log_block_get_hdr_no(buf)) == lsn); + byte* key_ver = &buf[OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_KEY + - LOG_BLOCK_CHECKSUM]; + const uint dst_size + = log_sys.log.format == LOG_HEADER_FORMAT_ENC_10_4 + ? sizeof dst - LOG_BLOCK_KEY + : sizeof dst; + if (log_sys.log.format == LOG_HEADER_FORMAT_ENC_10_4) { + const uint key_version = info.key_version; + switch (op) { + case LOG_ENCRYPT_ROTATE_KEY: + info.key_version + = encryption_key_get_latest_version( + LOG_DEFAULT_ENCRYPTION_KEY); + if (key_version != info.key_version + && !init_crypt_key(&info)) { + info.key_version = key_version; + } + /* fall through */ + case LOG_ENCRYPT: + mach_write_to_4(key_ver, info.key_version); + break; + case LOG_DECRYPT: + info.key_version = mach_read_from_4(key_ver); + if (key_version != info.key_version + && !init_crypt_key(&info)) { + return false; + } + } +#ifndef DBUG_OFF + if (key_version != info.key_version) { + DBUG_PRINT("ib_log", ("key_version: %x -> %x", + key_version, + info.key_version)); + } +#endif /* !DBUG_OFF */ + } + ut_ad(LOG_CRYPT_HDR_SIZE + dst_size + == log_sys.trailer_offset()); + + uint dst_len; int rc = encryption_crypt( - buf + LOG_CRYPT_HDR_SIZE, sizeof dst, + buf + LOG_CRYPT_HDR_SIZE, dst_size, reinterpret_cast<byte*>(dst), &dst_len, const_cast<byte*>(info.crypt_key.bytes), sizeof info.crypt_key, reinterpret_cast<byte*>(aes_ctr_iv), sizeof aes_ctr_iv, - decrypt + op == LOG_DECRYPT ? ENCRYPTION_FLAG_DECRYPT | ENCRYPTION_FLAG_NOPAD : ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD, LOG_DEFAULT_ENCRYPTION_KEY, info.key_version); - ut_a(rc == MY_AES_OK); - ut_a(dst_len == sizeof dst); - memcpy(buf + LOG_CRYPT_HDR_SIZE, dst, sizeof dst); - } -} - -/** Generate crypt key from crypt msg. -@param[in,out] info encryption key -@param[in] upgrade whether to use the key in MariaDB 10.1 format -@return whether the operation was successful */ -static bool init_crypt_key(crypt_info_t* info, bool upgrade = false) -{ - byte mysqld_key[MY_AES_MAX_KEY_LENGTH]; - uint keylen = sizeof mysqld_key; - - compile_time_assert(16 == sizeof info->crypt_key); - - if (uint rc = encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY, - info->key_version, mysqld_key, - &keylen)) { - ib::error() - << "Obtaining redo log encryption key version " - << info->key_version << " failed (" << rc - << "). Maybe the key or the required encryption " - "key management plugin was not found."; - return false; - } - - if (upgrade) { - while (keylen < sizeof mysqld_key) { - mysqld_key[keylen++] = 0; - } - } - - uint dst_len; - int err= my_aes_crypt(MY_AES_ECB, - ENCRYPTION_FLAG_NOPAD | ENCRYPTION_FLAG_ENCRYPT, - info->crypt_msg.bytes, sizeof info->crypt_msg, - info->crypt_key.bytes, &dst_len, - mysqld_key, keylen, NULL, 0); - - if (err != MY_AES_OK || dst_len != MY_AES_BLOCK_SIZE) { - ib::error() << "Getting redo log crypto key failed: err = " - << err << ", len = " << dst_len; - return false; + ut_a(dst_len == dst_size); + memcpy(buf + LOG_CRYPT_HDR_SIZE, dst, dst_size); } return true; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 3119a110f74..9e05383bfc6 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -258,9 +258,9 @@ log_calculate_actual_len( { ut_ad(log_mutex_own()); + const ulint framing_size = log_sys.framing_size(); /* actual length stored per block */ - const ulint len_per_blk = OS_FILE_LOG_BLOCK_SIZE - - (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE); + const ulint len_per_blk = OS_FILE_LOG_BLOCK_SIZE - framing_size; /* actual data length in last block already written */ ulint extra_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE); @@ -269,8 +269,7 @@ log_calculate_actual_len( extra_len -= LOG_BLOCK_HDR_SIZE; /* total extra length for block header and trailer */ - extra_len = ((len + extra_len) / len_per_blk) - * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE); + extra_len = ((len + extra_len) / len_per_blk) * framing_size; return(len + extra_len); } @@ -402,26 +401,24 @@ log_write_low( ulint str_len) /*!< in: string length */ { ulint len; - ulint data_len; - byte* log_block; ut_ad(log_mutex_own()); + const ulint trailer_offset = log_sys.trailer_offset(); part_loop: /* Calculate a part length */ - data_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len; + ulint data_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len; - if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { + if (data_len <= trailer_offset) { /* The string fits within the current log block */ len = str_len; } else { - data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; + data_len = trailer_offset; - len = OS_FILE_LOG_BLOCK_SIZE - - (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) - - LOG_BLOCK_TRL_SIZE; + len = trailer_offset + - log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE; } memcpy(log_sys.buf + log_sys.buf_free, str, len); @@ -429,18 +426,18 @@ part_loop: str_len -= len; str = str + len; - log_block = static_cast<byte*>( + byte* log_block = static_cast<byte*>( ut_align_down(log_sys.buf + log_sys.buf_free, OS_FILE_LOG_BLOCK_SIZE)); log_block_set_data_len(log_block, data_len); - if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { + if (data_len == trailer_offset) { /* This block became full */ log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE); log_block_set_checkpoint_no(log_block, log_sys.next_checkpoint_no); - len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE; + len += log_sys.framing_size(); log_sys.lsn += len; @@ -668,8 +665,7 @@ void log_t::files::create(ulint n_files) this->n_files= n_files; format= srv_encrypt_log - ? LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED - : LOG_HEADER_FORMAT_CURRENT; + ? LOG_HEADER_FORMAT_ENC_10_4 : LOG_HEADER_FORMAT_10_4; subformat= 2; file_size= srv_log_file_size; lsn= LOG_START_LSN; @@ -702,8 +698,8 @@ log_file_header_flush( ut_ad(log_write_mutex_own()); ut_ad(!recv_no_log_write); ut_a(nth_file < log_sys.log.n_files); - ut_ad((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED) - == LOG_HEADER_FORMAT_CURRENT); + ut_ad(log_sys.log.format == LOG_HEADER_FORMAT_10_4 + || log_sys.log.format == LOG_HEADER_FORMAT_ENC_10_4); buf = log_sys.log.file_header_bufs[nth_file]; @@ -940,11 +936,9 @@ wait and check if an already running write is covering the request. @param[in] lsn log sequence number that should be included in the redo log file write @param[in] flush_to_disk whether the written log should also -be flushed to the file system */ -void -log_write_up_to( - lsn_t lsn, - bool flush_to_disk) +be flushed to the file system +@param[in] rotate_key whether to rotate the encryption key */ +void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key) { #ifdef UNIV_DEBUG ulint loop_count = 0; @@ -953,6 +947,7 @@ log_write_up_to( lsn_t write_lsn; ut_ad(!srv_read_only_mode); + ut_ad(!rotate_key || flush_to_disk); if (recv_no_ibuf_operations) { /* Recovery is running and no operations on the log files are @@ -1097,7 +1092,8 @@ loop: if (log_sys.is_encrypted()) { log_crypt(write_buf + area_start, log_sys.write_lsn, - area_end - area_start); + area_end - area_start, + rotate_key ? LOG_ENCRYPT_ROTATE_KEY : LOG_ENCRYPT); } /* Do the write to the log files */ @@ -1505,7 +1501,7 @@ log_checkpoint( log_mutex_exit(); - log_write_up_to(flush_lsn, true); + log_write_up_to(flush_lsn, true, true); DBUG_EXECUTE_IF( "using_wa_checkpoint_middle", @@ -2080,13 +2076,9 @@ log_pad_current_log_block(void) /* We retrieve lsn only because otherwise gcc crashed on HP-UX */ lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE); - pad_length = OS_FILE_LOG_BLOCK_SIZE - - (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) - - LOG_BLOCK_TRL_SIZE; - if (pad_length - == (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - - LOG_BLOCK_TRL_SIZE)) { - + pad_length = log_sys.trailer_offset() + - log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE; + if (pad_length == log_sys.payload_size()) { pad_length = 0; } diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index afcd9079480..23dfc4afa0f 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -52,7 +52,6 @@ Created 9/20/1997 Heikki Tuuri #include "trx0undo.h" #include "trx0rec.h" #include "fil0fil.h" -#include "row0trunc.h" #include "buf0rea.h" #include "srv0srv.h" #include "srv0start.h" @@ -201,10 +200,6 @@ corresponding to MLOG_INDEX_LOAD. */ void (*log_optimized_ddl_op)(ulint space_id); -/** Report backup-unfriendly TRUNCATE operation (with separate log file), -corresponding to MLOG_TRUNCATE. */ -void (*log_truncate)(); - /** Report an operation to create, delete, or rename a file during backup. @param[in] space_id tablespace identifier @param[in] flags tablespace flags (NULL if not create) @@ -813,21 +808,24 @@ loop: << log_block_get_checkpoint_no(buf) << " expected: " << crc << " found: " << cksum; +fail: end_lsn = *start_lsn; success = false; break; } - if (is_encrypted()) { - log_crypt(buf, *start_lsn, - OS_FILE_LOG_BLOCK_SIZE, true); + if (is_encrypted() + && !log_crypt(buf, *start_lsn, + OS_FILE_LOG_BLOCK_SIZE, + LOG_DECRYPT)) { + goto fail; } } ulint dl = log_block_get_data_len(buf); if (dl < LOG_BLOCK_HDR_SIZE - || (dl > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE - && dl != OS_FILE_LOG_BLOCK_SIZE)) { + || (dl != OS_FILE_LOG_BLOCK_SIZE + && dl > log_sys.trailer_offset())) { recv_sys->found_corrupt_log = true; end_lsn = *start_lsn; break; @@ -1028,54 +1026,6 @@ static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt) return(DB_SUCCESS); } -/** Determine if a redo log from MariaDB 10.4 is clean. -@return error code -@retval DB_SUCCESS if the redo log is clean -@retval DB_CORRUPTION if the redo log is corrupted -@retval DB_ERROR if the redo log is not empty */ -static dberr_t recv_log_recover_10_4() -{ - ut_ad(!log_sys.is_encrypted()); - const lsn_t lsn = log_sys.log.lsn; - log_mutex_enter(); - const lsn_t source_offset = log_sys.log.calc_lsn_offset(lsn); - log_mutex_exit(); - const ulint page_no - = (ulint) (source_offset / univ_page_size.physical()); - byte* buf = log_sys.buf; - - fil_io(IORequestLogRead, true, - page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no), - univ_page_size, - (ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1)) - % univ_page_size.physical()), - OS_FILE_LOG_BLOCK_SIZE, buf, NULL); - - if (log_block_calc_checksum(buf) != log_block_get_checksum(buf)) { - return DB_CORRUPTION; - } - - /* On a clean shutdown, the redo log will be logically empty - after the checkpoint lsn. */ - - if (log_block_get_data_len(buf) - != (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) { - return DB_ERROR; - } - - /* Mark the redo log for downgrading. */ - srv_log_file_size = 0; - recv_sys->parse_start_lsn = recv_sys->recovered_lsn - = recv_sys->scanned_lsn - = recv_sys->mlog_checkpoint_lsn = lsn; - log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn - = log_sys.lsn = log_sys.write_lsn - = log_sys.current_flush_lsn = log_sys.flushed_to_disk_lsn - = lsn; - log_sys.next_checkpoint_no = 0; - return DB_SUCCESS; -} - /** Find the latest checkpoint in the log header. @param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 @return error code or DB_SUCCESS */ @@ -1116,11 +1066,10 @@ recv_find_max_checkpoint(ulint* max_field) return(recv_find_max_checkpoint_0(max_field)); case LOG_HEADER_FORMAT_10_2: case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED: - case LOG_HEADER_FORMAT_CURRENT: - case LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED: + case LOG_HEADER_FORMAT_10_3: + case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED: case LOG_HEADER_FORMAT_10_4: - /* We can only parse the unencrypted LOG_HEADER_FORMAT_10_4. - The encrypted format uses a larger redo log block trailer. */ + case LOG_HEADER_FORMAT_10_4 | LOG_HEADER_FORMAT_ENCRYPTED: break; default: ib::error() << "Unsupported redo log format." @@ -1185,19 +1134,7 @@ recv_find_max_checkpoint(ulint* max_field) return(DB_ERROR); } - if (log_sys.log.format == LOG_HEADER_FORMAT_10_4) { - dberr_t err = recv_log_recover_10_4(); - if (err != DB_SUCCESS) { - ib::error() - << "Downgrade after a crash is not supported." - " The redo log was created with " << creator - << (err == DB_ERROR - ? "." : ", and it appears corrupted."); - } - return err; - } - - return DB_SUCCESS; + return(DB_SUCCESS); } /** Try to parse a single log record body and also applies it if @@ -1245,14 +1182,10 @@ recv_parse_or_apply_log_rec_body( } return(ptr + 8); case MLOG_TRUNCATE: - if (log_truncate) { - ut_ad(srv_operation != SRV_OPERATION_NORMAL); - log_truncate(); - recv_sys->found_corrupt_fs = true; - return NULL; - } - return(truncate_t::parse_redo_entry(ptr, end_ptr, space_id)); - + ib::error() << "Cannot crash-upgrade from " + "old-style TRUNCATE TABLE"; + recv_sys->found_corrupt_log = true; + return NULL; default: break; } @@ -1306,6 +1239,7 @@ parse_log: break; #endif /* UNIV_LOG_LSN_DEBUG */ case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES: + case MLOG_MEMSET: #ifdef UNIV_DEBUG if (page && page_type == FIL_PAGE_TYPE_ALLOCATED && end_ptr >= ptr + 2) { @@ -1835,13 +1769,10 @@ recv_recover_page(bool just_read_in, buf_block_t* block) page_t* page; page_zip_des_t* page_zip; recv_addr_t* recv_addr; - recv_t* recv; - byte* buf; lsn_t start_lsn; lsn_t end_lsn; lsn_t page_lsn; lsn_t page_newest_lsn; - ibool modification_to_page; mtr_t mtr; mutex_enter(&(recv_sys->mutex)); @@ -1916,57 +1847,19 @@ recv_recover_page(bool just_read_in, buf_block_t* block) page_lsn = page_newest_lsn; } - modification_to_page = FALSE; start_lsn = end_lsn = 0; - recv = UT_LIST_GET_FIRST(recv_addr->rec_list); fil_space_t* space = fil_space_acquire(block->page.id.space()); - while (recv) { + for (recv_t* recv = UT_LIST_GET_FIRST(recv_addr->rec_list); + recv; recv = UT_LIST_GET_NEXT(rec_list, recv)) { end_lsn = recv->end_lsn; ut_ad(end_lsn <= log_sys.log.scanned_lsn); - if (recv->len > RECV_DATA_BLOCK_SIZE) { - /* We have to copy the record body to a separate - buffer */ - - buf = static_cast<byte*>(ut_malloc_nokey(recv->len)); - - recv_data_copy_to_buf(buf, recv); - } else { - buf = ((byte*)(recv->data)) + sizeof(recv_data_t); - } - - /* If per-table tablespace was truncated and there exist REDO - records before truncate that are to be applied as part of - recovery (checkpoint didn't happen since truncate was done) - skip such records using lsn check as they may not stand valid - post truncate. - LSN at start of truncate is recorded and any redo record - with LSN less than recorded LSN is skipped. - Note: We can't skip complete recv_addr as same page may have - valid REDO records post truncate those needs to be applied. */ - - /* Ignore applying the redo logs for tablespace that is - truncated. Post recovery there is fixup action that will - restore the tablespace back to normal state. - Applying redo at this stage can result in error given that - redo will have action recorded on page before tablespace - was re-inited and that would lead to an error while applying - such action. */ - if (recv->start_lsn >= page_lsn - && !srv_is_tablespace_truncated(space->id) - && !(srv_was_tablespace_truncated(space) - && recv->start_lsn - < truncate_t::get_truncated_tablespace_init_lsn( - space->id))) { - - lsn_t end_lsn; - - if (!modification_to_page) { - - modification_to_page = TRUE; + ut_ad(recv->start_lsn); + if (recv->start_lsn >= page_lsn) { + if (!start_lsn) { start_lsn = recv->start_lsn; } @@ -1982,29 +1875,41 @@ recv_recover_page(bool just_read_in, buf_block_t* block) << " len " << recv->len << " page " << block->page.id); + byte* buf; + + if (recv->len > RECV_DATA_BLOCK_SIZE) { + /* We have to copy the record body to + a separate buffer */ + + buf = static_cast<byte*>(ut_malloc_nokey( + recv->len)); + + recv_data_copy_to_buf(buf, recv); + } else { + buf = reinterpret_cast<byte*>(recv->data) + + sizeof *recv->data; + } + recv_parse_or_apply_log_rec_body( recv->type, buf, buf + recv->len, block->page.id.space(), - block->page.id.page_no(), - true, block, &mtr); + block->page.id.page_no(), true, block, &mtr); - end_lsn = recv->start_lsn + recv->len; + lsn_t end_lsn = recv->start_lsn + recv->len; mach_write_to_8(FIL_PAGE_LSN + page, end_lsn); mach_write_to_8(srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + page, end_lsn); if (page_zip) { - mach_write_to_8(FIL_PAGE_LSN - + page_zip->data, end_lsn); + mach_write_to_8(FIL_PAGE_LSN + page_zip->data, + end_lsn); } - } - if (recv->len > RECV_DATA_BLOCK_SIZE) { - ut_free(buf); + if (recv->len > RECV_DATA_BLOCK_SIZE) { + ut_free(buf); + } } - - recv = UT_LIST_GET_NEXT(rec_list, recv); } space->release(); @@ -2018,9 +1923,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block) } #endif /* UNIV_ZIP_DEBUG */ - if (modification_to_page) { - ut_a(block); - + if (start_lsn) { log_flush_order_mutex_enter(); buf_flush_recv_note_modification(block, start_lsn, end_lsn); log_flush_order_mutex_exit(); @@ -2131,6 +2034,17 @@ void recv_apply_hashed_log_recs(bool last_batch) ut_d(recv_no_log_write = recv_no_ibuf_operations); if (ulint n = recv_sys->n_addrs) { + if (!log_sys.log.subformat && !srv_force_recovery + && srv_undo_tablespaces_open) { + ib::error() << "Recovery of separately logged" + " TRUNCATE operations is no longer supported." + " Set innodb_force_recovery=1" + " if no *trunc.log files exist"; + recv_sys->found_corrupt_log = true; + mutex_exit(&recv_sys->mutex); + return; + } + const char* msg = last_batch ? "Starting final batch to recover " : "Starting a batch to recover "; @@ -2156,15 +2070,6 @@ void recv_apply_hashed_log_recs(bool last_batch) recv_addr = static_cast<recv_addr_t*>( HASH_GET_NEXT(addr_hash, recv_addr))) { - if (srv_is_tablespace_truncated(recv_addr->space)) { - /* Avoid applying REDO log for the tablespace - that is schedule for TRUNCATE. */ - ut_a(recv_sys->n_addrs); - recv_addr->state = RECV_DISCARDED; - recv_sys->n_addrs--; - continue; - } - if (recv_addr->state == RECV_DISCARDED || !UT_LIST_GET_LEN(recv_addr->rec_list)) { ut_a(recv_sys->n_addrs); @@ -2364,17 +2269,12 @@ recv_calc_lsn_on_data_add( ib_uint64_t len) /*!< in: this many bytes of data is added, log block headers not included */ { - ulint frag_len; - ib_uint64_t lsn_len; - - frag_len = (lsn % OS_FILE_LOG_BLOCK_SIZE) - LOG_BLOCK_HDR_SIZE; - ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - - LOG_BLOCK_TRL_SIZE); - lsn_len = len; - lsn_len += (lsn_len + frag_len) - / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - - LOG_BLOCK_TRL_SIZE) - * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE); + unsigned frag_len = (lsn % OS_FILE_LOG_BLOCK_SIZE) - LOG_BLOCK_HDR_SIZE; + unsigned payload_size = log_sys.payload_size(); + ut_ad(frag_len < payload_size); + lsn_t lsn_len = len; + lsn_len += (lsn_len + frag_len) / payload_size + * (OS_FILE_LOG_BLOCK_SIZE - payload_size); return(lsn + lsn_len); } @@ -2833,11 +2733,7 @@ bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn) start_offset = LOG_BLOCK_HDR_SIZE; } - end_offset = data_len; - - if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; - } + end_offset = std::min<ulint>(data_len, log_sys.trailer_offset()); ut_ad(start_offset <= end_offset); @@ -3936,6 +3832,9 @@ static const char* get_mlog_string(mlog_id_t type) case MLOG_TRUNCATE: return("MLOG_TRUNCATE"); + case MLOG_MEMSET: + return("MLOG_MEMSET"); + case MLOG_FILE_WRITE_CRYPT_DATA: return("MLOG_FILE_WRITE_CRYPT_DATA"); } diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc index 6baf1f06bf9..c9a6de8c902 100644 --- a/storage/innobase/mtr/mtr0log.cc +++ b/storage/innobase/mtr/mtr0log.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -121,7 +121,7 @@ mlog_parse_initial_log_record( } /********************************************************//** -Parses a log record written by mlog_write_ulint or mlog_write_ull. +Parses a log record written by mlog_write_ulint, mlog_write_ull, mlog_memset. @return parsed record end, NULL if not a complete record or a corrupt record */ byte* mlog_parse_nbytes( @@ -137,29 +137,43 @@ mlog_parse_nbytes( ulint val; ib_uint64_t dval; - ut_a(type <= MLOG_8BYTES); + ut_ad(type <= MLOG_8BYTES || type == MLOG_MEMSET); ut_a(!page || !page_zip || !fil_page_index_page_check(page)); if (end_ptr < ptr + 2) { - - return(NULL); + return NULL; } offset = mach_read_from_2(ptr); ptr += 2; - if (offset >= srv_page_size) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); + if (UNIV_UNLIKELY(offset >= srv_page_size)) { + goto corrupt; } - if (type == MLOG_8BYTES) { + switch (type) { + case MLOG_MEMSET: + if (end_ptr < ptr + 3) { + return NULL; + } + val = mach_read_from_2(ptr); + ptr += 2; + if (UNIV_UNLIKELY(offset + val > srv_page_size)) { + goto corrupt; + } + if (page) { + memset(page + offset, *ptr, val); + if (page_zip) { + memset(static_cast<page_zip_des_t*>(page_zip) + ->data + offset, *ptr, val); + } + } + return const_cast<byte*>(++ptr); + case MLOG_8BYTES: dval = mach_u64_parse_compressed(&ptr, end_ptr); if (ptr == NULL) { - - return(NULL); + return NULL; } if (page) { @@ -171,14 +185,13 @@ mlog_parse_nbytes( mach_write_to_8(page + offset, dval); } - return(const_cast<byte*>(ptr)); + return const_cast<byte*>(ptr); + default: + val = mach_parse_compressed(&ptr, end_ptr); } - val = mach_parse_compressed(&ptr, end_ptr); - if (ptr == NULL) { - - return(NULL); + return NULL; } switch (type) { @@ -221,11 +234,11 @@ mlog_parse_nbytes( break; default: corrupt: - recv_sys->found_corrupt_log = TRUE; + recv_sys->found_corrupt_log = true; ptr = NULL; } - return(const_cast<byte*>(ptr)); + return const_cast<byte*>(ptr); } /********************************************************//** @@ -409,6 +422,72 @@ mlog_parse_string( return(ptr + len); } +/** Initialize a string of bytes. +@param[in,out] b buffer page +@param[in] ofs byte offset from block->frame +@param[in] len length of the data to write +@param[in] val the data byte to write +@param[in,out] mtr mini-transaction */ +void +mlog_memset(buf_block_t* b, ulint ofs, ulint len, byte val, mtr_t* mtr) +{ + ut_ad(len); + ut_ad(ofs <= ulint(srv_page_size)); + ut_ad(ofs + len <= ulint(srv_page_size)); + memset(ofs + b->frame, val, len); + + mtr->set_modified(); + switch (mtr->get_log_mode()) { + case MTR_LOG_NONE: + case MTR_LOG_NO_REDO: + return; + case MTR_LOG_SHORT_INSERTS: + ut_ad(0); + /* fall through */ + case MTR_LOG_ALL: + break; + } + + byte* l = mtr->get_log()->open(11 + 2 + 2 + 1); + l = mlog_write_initial_log_record_low( + MLOG_MEMSET, b->page.id.space(), b->page.id.page_no(), l, mtr); + mach_write_to_2(l, ofs); + mach_write_to_2(l + 2, len); + l[4] = val; + mlog_close(mtr, l + 5); +} + +/** Initialize a string of bytes. +@param[in,out] byte byte address +@param[in] len length of the data to write +@param[in] val the data byte to write +@param[in,out] mtr mini-transaction */ +void mlog_memset(byte* b, ulint len, byte val, mtr_t* mtr) +{ + ut_ad(len); + ut_ad(page_offset(b) + len <= ulint(srv_page_size)); + memset(b, val, len); + + mtr->set_modified(); + switch (mtr->get_log_mode()) { + case MTR_LOG_NONE: + case MTR_LOG_NO_REDO: + return; + case MTR_LOG_SHORT_INSERTS: + ut_ad(0); + /* fall through */ + case MTR_LOG_ALL: + break; + } + + byte* l = mtr->get_log()->open(11 + 2 + 2 + 1); + l = mlog_write_initial_log_record_fast(b, MLOG_MEMSET, l, mtr); + mach_write_to_2(l, page_offset(b)); + mach_write_to_2(l + 2, len); + l[4] = val; + mlog_close(mtr, l + 5); +} + /********************************************************//** Opens a buffer for mlog, writes the initial log record and, if needed, the field lengths of an index. diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 92b1aa38a81..3084ba387fb 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -28,10 +28,10 @@ Created 11/26/1995 Heikki Tuuri #include "buf0buf.h" #include "buf0flu.h" +#include "fsp0sysspace.h" #include "page0types.h" #include "mtr0log.h" #include "log0log.h" -#include "row0trunc.h" #include "log0recv.h" @@ -677,8 +677,7 @@ mtr_t::x_lock_space(ulint space_id, const char* file, unsigned line) ut_ad(get_log_mode() != MTR_LOG_NO_REDO || space->purpose == FIL_TYPE_TEMPORARY || space->purpose == FIL_TYPE_IMPORT - || my_atomic_loadlint(&space->redo_skipped_count) > 0 - || srv_is_tablespace_truncated(space->id)); + || my_atomic_loadlint(&space->redo_skipped_count) > 0); } ut_ad(space); diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index b4775d19e1d..25b2d202168 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -735,7 +735,7 @@ up_slot_match: & REC_INFO_MIN_REC_FLAG)) { ut_ad(!page_has_prev(page_align(mid_rec))); ut_ad(!page_rec_is_leaf(mid_rec) - || rec_is_metadata(mid_rec, index)); + || rec_is_metadata(mid_rec, *index)); cmp = 1; goto low_rec_match; } @@ -1368,7 +1368,7 @@ use_heap: switch (rec_get_status(current_rec)) { case REC_STATUS_ORDINARY: case REC_STATUS_NODE_PTR: - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: case REC_STATUS_INFIMUM: break; case REC_STATUS_SUPREMUM: @@ -1377,7 +1377,7 @@ use_heap: switch (rec_get_status(insert_rec)) { case REC_STATUS_ORDINARY: case REC_STATUS_NODE_PTR: - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: break; case REC_STATUS_INFIMUM: case REC_STATUS_SUPREMUM: @@ -1564,7 +1564,7 @@ page_cur_insert_rec_zip( get rid of the modification log. */ page_create_zip(page_cur_get_block(cursor), index, page_header_get_field(page, PAGE_LEVEL), - 0, NULL, mtr); + 0, mtr); ut_ad(!page_header_get_ptr(page, PAGE_FREE)); if (page_zip_available( @@ -1639,7 +1639,7 @@ page_cur_insert_rec_zip( if (!log_compressed) { if (page_zip_compress( page_zip, page, index, - level, NULL, NULL)) { + level, NULL)) { page_cur_insert_rec_write_log( insert_rec, rec_size, cursor->rec, index, mtr); @@ -1785,17 +1785,11 @@ too_small: columns of free_rec, in case it will not be overwritten by insert_rec. */ - ulint trx_id_col; ulint trx_id_offs; ulint len; - trx_id_col = dict_index_get_sys_col_pos(index, - DATA_TRX_ID); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - - trx_id_offs = rec_get_nth_field_offs(foffsets, - trx_id_col, &len); + trx_id_offs = rec_get_nth_field_offs( + foffsets, index->db_trx_id(), &len); ut_ad(len == DATA_TRX_ID_LEN); if (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + trx_id_offs @@ -1811,7 +1805,7 @@ too_small: ut_ad(free_rec + trx_id_offs + DATA_TRX_ID_LEN == rec_get_nth_field(free_rec, foffsets, - trx_id_col + 1, &len)); + index->db_roll_ptr(), &len)); ut_ad(len == DATA_ROLL_PTR_LEN); } diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 36a4cb46cf7..1b6567ad9a4 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -30,7 +30,6 @@ Created 2/2/1994 Heikki Tuuri #include "page0zip.h" #include "buf0buf.h" #include "btr0btr.h" -#include "row0trunc.h" #include "srv0srv.h" #include "lock0lock.h" #include "fut0lst.h" @@ -454,22 +453,15 @@ page_create_zip( ulint level, /*!< in: the B-tree level of the page */ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */ - const redo_page_compress_t* page_comp_info, - /*!< in: used for applying - TRUNCATE log - record during recovery */ mtr_t* mtr) /*!< in/out: mini-transaction handle */ { page_t* page; page_zip_des_t* page_zip = buf_block_get_page_zip(block); - bool is_spatial; ut_ad(block); ut_ad(page_zip); - ut_ad(index == NULL || dict_table_is_comp(index->table)); - is_spatial = index ? dict_index_is_spatial(index) - : page_comp_info->type & DICT_SPATIAL; + ut_ad(dict_table_is_comp(index->table)); /* PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC are always 0 for temporary tables. */ @@ -487,22 +479,11 @@ page_create_zip( || !dict_index_is_sec_or_ibuf(index) || index->table->is_temporary()); - page = page_create_low(block, TRUE, is_spatial); + page = page_create_low(block, TRUE, dict_index_is_spatial(index)); mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level); mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id); - if (truncate_t::s_fix_up_active) { - /* Compress the index page created when applying - TRUNCATE log during recovery */ - if (!page_zip_compress(page_zip, page, index, page_zip_level, - page_comp_info, NULL)) { - /* The compression of a newly created - page should always succeed. */ - ut_error; - } - - } else if (!page_zip_compress(page_zip, page, index, - page_zip_level, NULL, mtr)) { + if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) { /* The compression of a newly created page should always succeed. */ ut_error; @@ -546,7 +527,7 @@ page_create_empty( ut_ad(!index->table->is_temporary()); page_create_zip(block, index, page_header_get_field(page, PAGE_LEVEL), - max_trx_id, NULL, mtr); + max_trx_id, mtr); } else { page_create(block, mtr, page_is_comp(page), dict_index_is_spatial(index)); @@ -721,11 +702,8 @@ page_copy_rec_list_end( if (new_page_zip) { mtr_set_log_mode(mtr, log_mode); - if (!page_zip_compress(new_page_zip, - new_page, - index, - page_zip_level, - NULL, mtr)) { + if (!page_zip_compress(new_page_zip, new_page, index, + page_zip_level, mtr)) { /* Before trying to reorganize the page, store the number of preceding records on the page. */ ulint ret_pos @@ -887,7 +865,7 @@ page_copy_rec_list_start( goto zip_reorganize;); if (!page_zip_compress(new_page_zip, new_page, index, - page_zip_level, NULL, mtr)) { + page_zip_level, mtr)) { ulint ret_pos; #ifndef DBUG_OFF zip_reorganize: @@ -1826,6 +1804,7 @@ page_print_list( count = 0; for (;;) { offsets = rec_get_offsets(cur.rec, index, offsets, + page_rec_is_leaf(cur.rec), ULINT_UNDEFINED, &heap); page_rec_print(cur.rec, offsets); @@ -1848,6 +1827,7 @@ page_print_list( if (count + pr_n >= n_recs) { offsets = rec_get_offsets(cur.rec, index, offsets, + page_rec_is_leaf(cur.rec), ULINT_UNDEFINED, &heap); page_rec_print(cur.rec, offsets); } diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index 4b611baefae..cef492ca511 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -31,12 +31,7 @@ Created June 2005 by Marko Makela /** A BLOB field reference full of zero, for use in assertions and tests. Initially, BLOB field references are set to zero, in dtuple_convert_big_rec(). */ -const byte field_ref_zero[FIELD_REF_SIZE] = { - 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -}; +const byte field_ref_zero[UNIV_PAGE_SIZE_MAX] = { 0, }; #ifndef UNIV_INNOCHECKSUM #include "page0page.h" @@ -46,7 +41,6 @@ const byte field_ref_zero[FIELD_REF_SIZE] = { #include "page0types.h" #include "log0recv.h" #include "row0row.h" -#include "row0trunc.h" #include "zlib.h" #include "buf0buf.h" #include "buf0types.h" @@ -106,11 +100,11 @@ Compare at most sizeof(field_ref_zero) bytes. @param s in: size of the memory block, in bytes */ #define ASSERT_ZERO(b, s) \ ut_ad(!memcmp(b, field_ref_zero, \ - ut_min(static_cast<size_t>(s), sizeof field_ref_zero))); + std::min<size_t>(s, sizeof field_ref_zero))); /** Assert that a BLOB pointer is filled with zero bytes. @param b in: BLOB pointer */ #define ASSERT_ZERO_BLOB(b) \ - ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero)) + ut_ad(!memcmp(b, field_ref_zero, FIELD_REF_SIZE)) /* Enable some extra debugging output. This code can be enabled independently of any UNIV_ debugging conditions. */ @@ -1248,17 +1242,11 @@ page_zip_compress( dict_index_t* index, /*!< in: index of the B-tree node */ ulint level, /*!< in: commpression level */ - const redo_page_compress_t* page_comp_info, - /*!< in: used for applying - TRUNCATE log - record during recovery */ mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */ { z_stream c_stream; int err; - ulint n_fields; /* number of index fields - needed */ byte* fields; /*!< index field information */ byte* buf; /*!< compressed payload of the page */ @@ -1273,7 +1261,6 @@ page_zip_compress( ulint n_blobs = 0; byte* storage; /* storage of uncompressed columns */ - index_id_t ind_id; uintmax_t usec = ut_time_us(NULL); #ifdef PAGE_ZIP_COMPRESS_DBG FILE* logfile = NULL; @@ -1288,10 +1275,8 @@ page_zip_compress( ut_a(fil_page_index_page_check(page)); ut_ad(page_simple_validate_new((page_t*) page)); ut_ad(page_zip_simple_validate(page_zip)); - ut_ad(!index - || (index - && dict_table_is_comp(index->table) - && !dict_index_is_ibuf(index))); + ut_ad(dict_table_is_comp(index->table)); + ut_ad(!dict_index_is_ibuf(index)); UNIV_MEM_ASSERT_RW(page, srv_page_size); @@ -1311,18 +1296,10 @@ page_zip_compress( == PAGE_NEW_SUPREMUM); } - if (truncate_t::s_fix_up_active) { - ut_ad(page_comp_info != NULL); - n_fields = page_comp_info->n_fields; - ind_id = page_comp_info->index_id; - } else { - if (page_is_leaf(page)) { - n_fields = dict_index_get_n_fields(index); - } else { - n_fields = dict_index_get_n_unique_in_tree_nonleaf(index); - } - ind_id = index->id; - } + const ulint n_fields = page_is_leaf(page) + ? dict_index_get_n_fields(index) + : dict_index_get_n_unique_in_tree_nonleaf(index); + index_id_t ind_id = index->id; /* The dense directory excludes the infimum and supremum records. */ n_dense = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW; @@ -1433,20 +1410,8 @@ page_zip_compress( /* Dense page directory and uncompressed columns, if any */ if (page_is_leaf(page)) { - if ((index && dict_index_is_clust(index)) - || (page_comp_info - && (page_comp_info->type & DICT_CLUSTERED))) { - - if (index) { - trx_id_col = dict_index_get_sys_col_pos( - index, DATA_TRX_ID); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - } else if (page_comp_info - && (page_comp_info->type - & DICT_CLUSTERED)) { - trx_id_col = page_comp_info->trx_id_pos; - } + if (dict_index_is_clust(index)) { + trx_id_col = index->db_trx_id(); slot_size = PAGE_ZIP_DIR_SLOT_SIZE + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; @@ -1454,10 +1419,6 @@ page_zip_compress( } else { /* Signal the absence of trx_id in page_zip_fields_encode() */ - if (index) { - ut_ad(dict_index_get_sys_col_pos( - index, DATA_TRX_ID) == ULINT_UNDEFINED); - } trx_id_col = 0; slot_size = PAGE_ZIP_DIR_SLOT_SIZE; } @@ -1471,19 +1432,9 @@ page_zip_compress( goto zlib_error; } - c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size); - if (truncate_t::s_fix_up_active) { - ut_ad(page_comp_info != NULL); - c_stream.avail_in = static_cast<uInt>( - page_comp_info->field_len); - for (ulint i = 0; i < page_comp_info->field_len; i++) { - fields[i] = page_comp_info->fields[i]; - } - } else { - c_stream.avail_in = static_cast<uInt>( - page_zip_fields_encode( - n_fields, index, trx_id_col, fields)); - } + c_stream.avail_out -= uInt(n_dense * slot_size); + c_stream.avail_in = uInt(page_zip_fields_encode(n_fields, index, + trx_id_col, fields)); c_stream.next_in = fields; if (UNIV_LIKELY(!trx_id_col)) { @@ -1637,7 +1588,7 @@ err_exit: mutex_exit(&page_zip_stat_per_index_mutex); } - if (page_is_leaf(page) && !truncate_t::s_fix_up_active) { + if (page_is_leaf(page)) { dict_index_zip_success(index); } @@ -2169,6 +2120,10 @@ page_zip_apply_log( rec_get_offsets_reverse(data, index, hs & REC_STATUS_NODE_PTR, offsets); + /* Silence a debug assertion in rec_offs_make_valid(). + This will be overwritten in page_zip_set_extra_bytes(), + called by page_zip_decompress_low(). */ + ut_d(rec[-REC_NEW_INFO_BITS] = 0); rec_offs_make_valid(rec, index, is_leaf, offsets); /* Copy the extra bytes (backwards). */ @@ -3770,29 +3725,25 @@ page_zip_write_rec( ulint len; if (dict_index_is_clust(index)) { - ulint trx_id_col; - - trx_id_col = dict_index_get_sys_col_pos(index, - DATA_TRX_ID); - ut_ad(trx_id_col != ULINT_UNDEFINED); - /* Store separately trx_id, roll_ptr and the BTR_EXTERN_FIELD_REF of each BLOB column. */ if (rec_offs_any_extern(offsets)) { data = page_zip_write_rec_ext( page_zip, page, rec, index, offsets, create, - trx_id_col, heap_no, storage, data); + index->db_trx_id(), heap_no, + storage, data); } else { /* Locate trx_id and roll_ptr. */ const byte* src = rec_get_nth_field(rec, offsets, - trx_id_col, &len); + index->db_trx_id(), + &len); ut_ad(len == DATA_TRX_ID_LEN); ut_ad(src + DATA_TRX_ID_LEN == rec_get_nth_field( rec, offsets, - trx_id_col + 1, &len)); + index->db_roll_ptr(), &len)); ut_ad(len == DATA_ROLL_PTR_LEN); /* Log the preceding fields. */ @@ -3820,8 +3771,6 @@ page_zip_write_rec( } else { /* Leaf page of a secondary index: no externally stored columns */ - ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID) - == ULINT_UNDEFINED); ut_ad(!rec_offs_any_extern(offsets)); /* Log the entire record. */ @@ -4807,9 +4756,7 @@ page_zip_reorganize( /* Restore logging. */ mtr_set_log_mode(mtr, log_mode); - if (!page_zip_compress(page_zip, page, index, - page_zip_level, NULL, mtr)) { - + if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) { buf_block_free(temp_block); return(FALSE); } diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc index 495c29e4805..19764318c1a 100644 --- a/storage/innobase/rem/rem0rec.cc +++ b/storage/innobase/rem/rem0rec.cc @@ -169,7 +169,7 @@ rec_get_n_extern_new( ut_ad(!index->table->supports_instant() || index->is_dummy); ut_ad(!index->is_instant()); ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY - || rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED); + || rec_get_status(rec) == REC_STATUS_INSTANT); ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index)); if (n == ULINT_UNDEFINED) { @@ -231,8 +231,8 @@ rec_get_n_extern_new( return(n_extern); } -/** Get the added field count in a REC_STATUS_COLUMNS_ADDED record. -@param[in,out] header variable header of a REC_STATUS_COLUMNS_ADDED record +/** Get the added field count in a REC_STATUS_INSTANT record. +@param[in,out] header variable header of a REC_STATUS_INSTANT record @return number of added fields */ static inline unsigned rec_get_n_add_field(const byte*& header) { @@ -253,18 +253,18 @@ static inline unsigned rec_get_n_add_field(const byte*& header) enum rec_leaf_format { /** Temporary file record */ REC_LEAF_TEMP, - /** Temporary file record, with added columns - (REC_STATUS_COLUMNS_ADDED) */ - REC_LEAF_TEMP_COLUMNS_ADDED, + /** Temporary file record, with added columns (REC_STATUS_INSTANT) */ + REC_LEAF_TEMP_INSTANT, /** Normal (REC_STATUS_ORDINARY) */ REC_LEAF_ORDINARY, - /** With added columns (REC_STATUS_COLUMNS_ADDED) */ - REC_LEAF_COLUMNS_ADDED + /** With add or drop columns (REC_STATUS_INSTANT) */ + REC_LEAF_INSTANT }; /** Determine the offset to each field in a leaf-page record in ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED. This is a special case of rec_init_offsets() and rec_get_offsets_func(). +@tparam mblob whether the record includes a metadata BLOB @param[in] rec leaf-page record @param[in] index the index that the record belongs in @param[in] n_core number of core fields (index->n_core_fields) @@ -272,6 +272,7 @@ This is a special case of rec_init_offsets() and rec_get_offsets_func(). NULL to refer to index->fields[].col->def_val @param[in,out] offsets offsets, with valid rec_offs_n_fields(offsets) @param[in] format record format */ +template<bool mblob = false> static inline void rec_init_offsets_comp_ordinary( @@ -293,12 +294,32 @@ rec_init_offsets_comp_ordinary( ut_ad(n_core > 0); ut_ad(index->n_fields >= n_core); ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable)); - ut_ad(format == REC_LEAF_TEMP || format == REC_LEAF_TEMP_COLUMNS_ADDED + ut_ad(format == REC_LEAF_TEMP || format == REC_LEAF_TEMP_INSTANT || dict_table_is_comp(index->table)); - ut_ad(format != REC_LEAF_TEMP_COLUMNS_ADDED + ut_ad(format != REC_LEAF_TEMP_INSTANT || index->n_fields == rec_offs_n_fields(offsets)); ut_d(ulint n_null= 0); + if (mblob) { + ut_ad(index->is_dummy || index->table->instant); + ut_ad(index->is_dummy || index->is_instant()); + ut_ad(rec_offs_n_fields(offsets) + <= ulint(index->n_fields) + 1); + ut_ad(!def_val); + ut_ad(format == REC_LEAF_INSTANT); + nulls -= REC_N_NEW_EXTRA_BYTES; + n_fields = n_core + 1 + rec_get_n_add_field(nulls); + ut_ad(n_fields <= ulint(index->n_fields) + 1); + const ulint n_nullable = index->get_n_nullable(n_fields - 1); + const ulint n_null_bytes = UT_BITS_IN_BYTES(n_nullable); + ut_d(n_null = n_nullable); + ut_ad(n_null <= index->n_nullable); + ut_ad(n_null_bytes >= index->n_core_null_bytes + || n_core < index->n_core_fields); + lens = --nulls - n_null_bytes; + goto start; + } + switch (format) { case REC_LEAF_TEMP: if (dict_table_is_comp(index->table)) { @@ -312,17 +333,15 @@ rec_init_offsets_comp_ordinary( ordinary: lens = --nulls - index->n_core_null_bytes; - ut_d(n_null = std::min(index->n_core_null_bytes * 8U, - index->n_nullable)); + ut_d(n_null = std::min<uint>(index->n_core_null_bytes * 8U, + index->n_nullable)); break; - case REC_LEAF_COLUMNS_ADDED: - /* We would have !index->is_instant() when rolling back - an instant ADD COLUMN operation. */ + case REC_LEAF_INSTANT: nulls -= REC_N_NEW_EXTRA_BYTES; ut_ad(index->is_instant()); /* fall through */ - case REC_LEAF_TEMP_COLUMNS_ADDED: - n_fields = n_core + 1 + rec_get_n_add_field(nulls); + case REC_LEAF_TEMP_INSTANT: + n_fields = n_core + rec_get_n_add_field(nulls) + 1; ut_ad(n_fields <= index->n_fields); const ulint n_nullable = index->get_n_nullable(n_fields); const ulint n_null_bytes = UT_BITS_IN_BYTES(n_nullable); @@ -333,26 +352,34 @@ ordinary: lens = --nulls - n_null_bytes; } -#ifdef UNIV_DEBUG +start: /* We cannot invoke rec_offs_make_valid() if format==REC_LEAF_TEMP. Similarly, rec_offs_validate() will fail in that case, because it invokes rec_get_status(). */ - offsets[2] = (ulint) rec; - offsets[3] = (ulint) index; -#endif /* UNIV_DEBUG */ + ut_d(offsets[2] = ulint(rec)); + ut_d(offsets[3] = ulint(index)); /* read the lengths of fields 0..n_fields */ + ulint len; ulint i = 0; - do { - const dict_field_t* field - = dict_index_get_nth_field(index, i); - const dict_col_t* col - = dict_field_get_col(field); - ulint len; + const dict_field_t* field = index->fields; - /* set default value flag */ - if (i < n_fields) { - } else if (def_val) { + do { + if (mblob) { + if (i == index->first_user_field()) { + offs += FIELD_REF_SIZE; + len = offs | REC_OFFS_EXTERNAL; + any |= REC_OFFS_EXTERNAL; + field--; + continue; + } else if (i >= n_fields) { + len = offs | REC_OFFS_DEFAULT; + any |= REC_OFFS_DEFAULT; + continue; + } + } else if (i < n_fields) { + /* The field is present, and will be covered below. */ + } else if (!mblob && def_val) { const dict_col_t::def_t& d = def_val[i - n_core]; if (!d.data) { len = offs | REC_OFFS_SQL_NULL; @@ -362,21 +389,22 @@ ordinary: any |= REC_OFFS_DEFAULT; } - goto resolved; + continue; } else { - ulint dlen; - if (!index->instant_field_value(i, &dlen)) { + if (!index->instant_field_value(i, &len)) { + ut_ad(len == UNIV_SQL_NULL); len = offs | REC_OFFS_SQL_NULL; - ut_ad(dlen == UNIV_SQL_NULL); } else { len = offs | REC_OFFS_DEFAULT; any |= REC_OFFS_DEFAULT; } - goto resolved; + continue; } - if (!(col->prtype & DATA_NOT_NULL)) { + const dict_col_t* col = field->col; + + if (col->is_nullable()) { /* nullable field => read the null flag */ ut_ad(n_null--); @@ -392,7 +420,7 @@ ordinary: the length to zero and enable the SQL NULL flag in offsets[]. */ len = offs | REC_OFFS_SQL_NULL; - goto resolved; + continue; } null_mask <<= 1; } @@ -423,16 +451,15 @@ ordinary: len = offs; } - goto resolved; + continue; } len = offs += len; } else { len = offs += field->fixed_len; } -resolved: - rec_offs_base(offsets)[i + 1] = len; - } while (++i < rec_offs_n_fields(offsets)); + } while (field++, rec_offs_base(offsets)[++i] = len, + i < rec_offs_n_fields(offsets)); *rec_offs_base(offsets) = ulint(rec - (lens + 1)) | REC_OFFS_COMPACT | any; @@ -451,7 +478,10 @@ rec_offs_make_valid( bool leaf, ulint* offsets) { - ut_ad(rec_offs_n_fields(offsets) + const bool is_alter_metadata = leaf + && rec_is_alter_metadata(rec, *index); + ut_ad(is_alter_metadata + || rec_offs_n_fields(offsets) <= (leaf ? dict_index_get_n_fields(index) : dict_index_get_n_unique_in_tree_nonleaf(index) + 1) @@ -469,7 +499,8 @@ rec_offs_make_valid( || n >= rec_offs_n_fields(offsets)); for (; n < rec_offs_n_fields(offsets); n++) { ut_ad(leaf); - ut_ad(rec_offs_base(offsets)[1 + n] & REC_OFFS_DEFAULT); + ut_ad(is_alter_metadata + || rec_offs_base(offsets)[1 + n] & REC_OFFS_DEFAULT); } offsets[2] = ulint(rec); offsets[3] = ulint(index); @@ -509,14 +540,18 @@ rec_offs_validate( } } if (index) { - ulint max_n_fields; ut_ad(ulint(index) == offsets[3]); - max_n_fields = ut_max( + ulint max_n_fields = ut_max( dict_index_get_n_fields(index), dict_index_get_n_unique_in_tree(index) + 1); if (comp && rec) { switch (rec_get_status(rec)) { - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: + ut_ad(index->is_instant() || index->is_dummy); + ut_ad(max_n_fields == index->n_fields); + max_n_fields += index->table->instant + || index->is_dummy; + break; case REC_STATUS_ORDINARY: break; case REC_STATUS_NODE_PTR: @@ -530,14 +565,19 @@ rec_offs_validate( default: ut_error; } + } else if (max_n_fields == index->n_fields + && (index->is_dummy + || (index->is_instant() + && index->table->instant))) { + max_n_fields++; } /* index->n_def == 0 for dummy indexes if !comp */ - ut_a(!comp || index->n_def); - ut_a(!index->n_def || i <= max_n_fields); + ut_ad(!comp || index->n_def); + ut_ad(!index->n_def || i <= max_n_fields); } while (i--) { ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK; - ut_a(curr <= last); + ut_ad(curr <= last); last = curr; } return(TRUE); @@ -598,12 +638,12 @@ rec_init_offsets( = dict_index_get_n_unique_in_tree_nonleaf( index); break; - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: ut_ad(leaf); rec_init_offsets_comp_ordinary(rec, index, offsets, index->n_core_fields, NULL, - REC_LEAF_COLUMNS_ADDED); + REC_LEAF_INSTANT); return; case REC_STATUS_ORDINARY: ut_ad(leaf); @@ -780,6 +820,7 @@ rec_get_offsets_func( { ulint n; ulint size; + bool alter_metadata = false; ut_ad(rec); ut_ad(index); @@ -788,10 +829,12 @@ rec_get_offsets_func( if (dict_table_is_comp(index->table)) { switch (UNIV_EXPECT(rec_get_status(rec), REC_STATUS_ORDINARY)) { - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: + alter_metadata = rec_is_alter_metadata(rec, true); + /* fall through */ case REC_STATUS_ORDINARY: ut_ad(leaf); - n = dict_index_get_n_fields(index); + n = dict_index_get_n_fields(index) + alter_metadata; break; case REC_STATUS_NODE_PTR: /* Node pointer records consist of the @@ -835,7 +878,8 @@ rec_get_offsets_func( || dict_index_is_ibuf(index) || n == n_fields /* btr_pcur_restore_position() */ || (n + (index->id == DICT_INDEXES_ID) - >= index->n_core_fields && n <= index->n_fields)); + >= index->n_core_fields && n <= index->n_fields + + unsigned(rec_is_alter_metadata(rec, false)))); if (is_user_rec && leaf && n < index->n_fields) { ut_ad(!index->is_dummy); @@ -865,8 +909,24 @@ rec_get_offsets_func( } rec_offs_set_n_fields(offsets, n); - rec_init_offsets(rec, index, leaf, offsets); - return(offsets); + + if (UNIV_UNLIKELY(alter_metadata) + && dict_table_is_comp(index->table)) { + ut_d(offsets[2] = ulint(rec)); + ut_d(offsets[3] = ulint(index)); + ut_ad(leaf); + ut_ad(index->is_dummy || index->table->instant); + ut_ad(index->is_dummy || index->is_instant()); + ut_ad(rec_offs_n_fields(offsets) + <= ulint(index->n_fields) + 1); + rec_init_offsets_comp_ordinary<true>(rec, index, offsets, + index->n_core_fields, + NULL, + REC_LEAF_INSTANT); + } else { + rec_init_offsets(rec, index, leaf, offsets); + } + return offsets; } /******************************************************//** @@ -1044,36 +1104,45 @@ rec_get_nth_field_offs_old( return(os); } -/**********************************************************//** -Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT. +/** Determine the size of a data tuple prefix in ROW_FORMAT=COMPACT. +@tparam mblob whether the record includes a metadata BLOB +@param[in] index record descriptor; dict_table_is_comp() + is assumed to hold, even if it doesn't +@param[in] dfield array of data fields +@param[in] n_fields number of data fields +@param[out] extra extra size +@param[in] status status flags +@param[in] temp whether this is a temporary file record @return total size */ -MY_ATTRIBUTE((warn_unused_result, nonnull(1,2))) +template<bool mblob = false> static inline ulint rec_get_converted_size_comp_prefix_low( -/*===================================*/ - const dict_index_t* index, /*!< in: record descriptor; - dict_table_is_comp() is - assumed to hold, even if - it does not */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra, /*!< out: extra size */ - rec_comp_status_t status, /*!< in: status flags */ - bool temp) /*!< in: whether this is a - temporary file record */ + const dict_index_t* index, + const dfield_t* dfield, + ulint n_fields, + ulint* extra, + rec_comp_status_t status, + bool temp) { ulint extra_size = temp ? 0 : REC_N_NEW_EXTRA_BYTES; - ulint data_size; - ulint i; ut_ad(n_fields > 0); - ut_ad(n_fields <= dict_index_get_n_fields(index)); + ut_ad(n_fields <= dict_index_get_n_fields(index) + mblob); ut_d(ulint n_null = index->n_nullable); ut_ad(status == REC_STATUS_ORDINARY || status == REC_STATUS_NODE_PTR - || status == REC_STATUS_COLUMNS_ADDED); + || status == REC_STATUS_INSTANT); - if (status == REC_STATUS_COLUMNS_ADDED - && (!temp || n_fields > index->n_core_fields)) { + if (mblob) { + ut_ad(!temp); + ut_ad(index->table->instant); + ut_ad(index->is_instant()); + ut_ad(status == REC_STATUS_INSTANT); + ut_ad(n_fields == ulint(index->n_fields) + 1); + extra_size += UT_BITS_IN_BYTES(index->n_nullable) + + rec_get_n_add_field_len(n_fields - 1 + - index->n_core_fields); + } else if (status == REC_STATUS_INSTANT + && (!temp || n_fields > index->n_core_fields)) { ut_ad(index->is_instant()); ut_ad(UT_BITS_IN_BYTES(n_null) >= index->n_core_null_bytes); extra_size += UT_BITS_IN_BYTES(index->get_n_nullable(n_fields)) @@ -1084,7 +1153,7 @@ rec_get_converted_size_comp_prefix_low( extra_size += index->n_core_null_bytes; } - data_size = 0; + ulint data_size = 0; if (temp && dict_table_is_comp(index->table)) { /* No need to do adjust fixed_len=0. We only need to @@ -1092,48 +1161,50 @@ rec_get_converted_size_comp_prefix_low( temp = false; } + const dfield_t* const end = dfield + n_fields; /* read the lengths of fields 0..n */ - for (i = 0; i < n_fields; i++) { - const dict_field_t* field; - ulint len; - ulint fixed_len; - const dict_col_t* col; + for (ulint i = 0; dfield < end; i++, dfield++) { + if (mblob && i == index->first_user_field()) { + data_size += FIELD_REF_SIZE; + ++dfield; + } - field = dict_index_get_nth_field(index, i); - len = dfield_get_len(&fields[i]); - col = dict_field_get_col(field); + ulint len = dfield_get_len(dfield); + const dict_field_t* field = dict_index_get_nth_field(index, i); #ifdef UNIV_DEBUG - dtype_t* type; - - type = dfield_get_type(&fields[i]); if (dict_index_is_spatial(index)) { - if (DATA_GEOMETRY_MTYPE(col->mtype) && i == 0) { - ut_ad(type->prtype & DATA_GIS_MBR); + if (DATA_GEOMETRY_MTYPE(field->col->mtype) && i == 0) { + ut_ad(dfield->type.prtype & DATA_GIS_MBR); } else { - ut_ad(type->mtype == DATA_SYS_CHILD - || dict_col_type_assert_equal(col, type)); + ut_ad(dfield->type.mtype == DATA_SYS_CHILD + || dict_col_type_assert_equal( + field->col, &dfield->type)); } } else { - ut_ad(dict_col_type_assert_equal(col, type)); + ut_ad(field->col->is_dropped() + || dict_col_type_assert_equal(field->col, + &dfield->type)); } #endif /* All NULLable fields must be included in the n_null count. */ - ut_ad((col->prtype & DATA_NOT_NULL) || n_null--); + ut_ad(!field->col->is_nullable() || n_null--); - if (dfield_is_null(&fields[i])) { + if (dfield_is_null(dfield)) { /* No length is stored for NULL fields. */ - ut_ad(!(col->prtype & DATA_NOT_NULL)); + ut_ad(field->col->is_nullable()); continue; } - ut_ad(len <= col->len || DATA_LARGE_MTYPE(col->mtype) - || (col->len == 0 && col->mtype == DATA_VARCHAR)); + ut_ad(len <= field->col->len + || DATA_LARGE_MTYPE(field->col->mtype) + || (field->col->len == 0 + && field->col->mtype == DATA_VARCHAR)); - fixed_len = field->fixed_len; + ulint fixed_len = field->fixed_len; if (temp && fixed_len - && !dict_col_get_fixed_size(col, temp)) { + && !dict_col_get_fixed_size(field->col, temp)) { fixed_len = 0; } /* If the maximum length of a variable-length field @@ -1148,25 +1219,27 @@ rec_get_converted_size_comp_prefix_low( ut_ad(len <= fixed_len); if (dict_index_is_spatial(index)) { - ut_ad(type->mtype == DATA_SYS_CHILD - || !col->mbmaxlen - || len >= col->mbminlen - * fixed_len / col->mbmaxlen); + ut_ad(dfield->type.mtype == DATA_SYS_CHILD + || !field->col->mbmaxlen + || len >= field->col->mbminlen + * fixed_len / field->col->mbmaxlen); } else { - ut_ad(type->mtype != DATA_SYS_CHILD); - ut_ad(!col->mbmaxlen - || len >= col->mbminlen - * fixed_len / col->mbmaxlen); + ut_ad(dfield->type.mtype != DATA_SYS_CHILD); + + ut_ad(field->col->is_dropped() + || !field->col->mbmaxlen + || len >= field->col->mbminlen + * fixed_len / field->col->mbmaxlen); } /* dict_index_add_col() should guarantee this */ ut_ad(!field->prefix_len || fixed_len == field->prefix_len); #endif /* UNIV_DEBUG */ - } else if (dfield_is_ext(&fields[i])) { - ut_ad(DATA_BIG_COL(col)); + } else if (dfield_is_ext(dfield)) { + ut_ad(DATA_BIG_COL(field->col)); extra_size += 2; - } else if (len < 128 || !DATA_BIG_COL(col)) { + } else if (len < 128 || !DATA_BIG_COL(field->col)) { extra_size++; } else { /* For variable-length columns, we look up the @@ -1202,43 +1275,51 @@ rec_get_converted_size_comp_prefix( REC_STATUS_ORDINARY, false)); } -/**********************************************************//** -Determines the size of a data tuple in ROW_FORMAT=COMPACT. +/** Determine the size of a record in ROW_FORMAT=COMPACT. +@param[in] index record descriptor. dict_table_is_comp() + is assumed to hold, even if it doesn't +@param[in] tuple logical record +@param[out] extra extra size @return total size */ ulint rec_get_converted_size_comp( -/*========================*/ - const dict_index_t* index, /*!< in: record descriptor; - dict_table_is_comp() is - assumed to hold, even if - it does not */ - rec_comp_status_t status, /*!< in: status bits of the record */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra) /*!< out: extra size */ + const dict_index_t* index, + const dtuple_t* tuple, + ulint* extra) { - ut_ad(n_fields > 0); + ut_ad(tuple->n_fields > 0); + + rec_comp_status_t status = rec_comp_status_t(tuple->info_bits + & REC_NEW_STATUS_MASK); switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { case REC_STATUS_ORDINARY: - if (n_fields > index->n_core_fields) { + ut_ad(!tuple->is_metadata()); + if (tuple->n_fields > index->n_core_fields) { ut_ad(index->is_instant()); - status = REC_STATUS_COLUMNS_ADDED; + status = REC_STATUS_INSTANT; } /* fall through */ - case REC_STATUS_COLUMNS_ADDED: - ut_ad(n_fields >= index->n_core_fields); - ut_ad(n_fields <= index->n_fields); + case REC_STATUS_INSTANT: + ut_ad(tuple->n_fields >= index->n_core_fields); + if (tuple->is_alter_metadata()) { + return rec_get_converted_size_comp_prefix_low<true>( + index, tuple->fields, tuple->n_fields, + extra, status, false); + } + ut_ad(tuple->n_fields <= index->n_fields); return rec_get_converted_size_comp_prefix_low( - index, fields, n_fields, extra, status, false); + index, tuple->fields, tuple->n_fields, + extra, status, false); case REC_STATUS_NODE_PTR: - n_fields--; - ut_ad(n_fields == dict_index_get_n_unique_in_tree_nonleaf( - index)); - ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE); + ut_ad(tuple->n_fields - 1 + == dict_index_get_n_unique_in_tree_nonleaf(index)); + ut_ad(dfield_get_len(&tuple->fields[tuple->n_fields - 1]) + == REC_NODE_PTR_SIZE); return REC_NODE_PTR_SIZE /* child page number */ + rec_get_converted_size_comp_prefix_low( - index, fields, n_fields, extra, status, false); + index, tuple->fields, tuple->n_fields - 1, + extra, status, false); case REC_STATUS_INFIMUM: case REC_STATUS_SUPREMUM: /* not supported */ @@ -1416,33 +1497,30 @@ rec_convert_dtuple_to_rec_old( } /** Convert a data tuple into a ROW_FORMAT=COMPACT record. +@tparam mblob whether the record includes a metadata BLOB @param[out] rec converted record @param[in] index index -@param[in] fields data fields to convert +@param[in] field data fields to convert @param[in] n_fields number of data fields @param[in] status rec_get_status(rec) @param[in] temp whether to use the format for temporary files in index creation */ +template<bool mblob = false> static inline void rec_convert_dtuple_to_rec_comp( rec_t* rec, const dict_index_t* index, - const dfield_t* fields, + const dfield_t* field, ulint n_fields, rec_comp_status_t status, bool temp) { - const dfield_t* field; - const dtype_t* type; byte* end; byte* nulls = temp ? rec - 1 : rec - (REC_N_NEW_EXTRA_BYTES + 1); byte* UNINIT_VAR(lens); - ulint len; - ulint i; ulint UNINIT_VAR(n_node_ptr_field); - ulint fixed_len; ulint null_mask = 1; ut_ad(n_fields > 0); @@ -1451,8 +1529,22 @@ rec_convert_dtuple_to_rec_comp( ut_d(ulint n_null = index->n_nullable); + if (mblob) { + ut_ad(!temp); + ut_ad(index->table->instant); + ut_ad(index->is_instant()); + ut_ad(status == REC_STATUS_INSTANT); + ut_ad(n_fields == ulint(index->n_fields) + 1); + rec_set_n_add_field(nulls, n_fields - 1 + - index->n_core_fields); + rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW); + rec_set_status(rec, REC_STATUS_INSTANT); + n_node_ptr_field = ULINT_UNDEFINED; + lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); + goto start; + } switch (status) { - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: ut_ad(index->is_instant()); ut_ad(n_fields > index->n_core_fields); rec_set_n_add_field(nulls, n_fields - 1 @@ -1462,19 +1554,24 @@ rec_convert_dtuple_to_rec_comp( ut_ad(n_fields <= dict_index_get_n_fields(index)); if (!temp) { rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW); - rec_set_status(rec, n_fields == index->n_core_fields - ? REC_STATUS_ORDINARY - : REC_STATUS_COLUMNS_ADDED); - } if (dict_table_is_comp(index->table)) { + + rec_set_status( + rec, n_fields == index->n_core_fields + ? REC_STATUS_ORDINARY + : REC_STATUS_INSTANT); + } + + if (dict_table_is_comp(index->table)) { /* No need to do adjust fixed_len=0. We only need to adjust it for ROW_FORMAT=REDUNDANT. */ temp = false; } n_node_ptr_field = ULINT_UNDEFINED; + lens = nulls - (index->is_instant() ? UT_BITS_IN_BYTES(index->get_n_nullable( - n_fields)) + n_fields)) : UT_BITS_IN_BYTES( unsigned(index->n_nullable))); break; @@ -1484,8 +1581,8 @@ rec_convert_dtuple_to_rec_comp( rec_set_status(rec, status); ut_ad(n_fields == dict_index_get_n_unique_in_tree_nonleaf(index) + 1); - ut_d(n_null = std::min(index->n_core_null_bytes * 8U, - index->n_nullable)); + ut_d(n_null = std::min<uint>(index->n_core_null_bytes * 8U, + index->n_nullable)); n_node_ptr_field = n_fields - 1; lens = nulls - index->n_core_null_bytes; break; @@ -1495,30 +1592,33 @@ rec_convert_dtuple_to_rec_comp( return; } +start: end = rec; /* clear the SQL-null flags */ memset(lens + 1, 0, ulint(nulls - lens)); + const dfield_t* const fend = field + n_fields; /* Store the data and the offsets */ - - for (i = 0; i < n_fields; i++) { - const dict_field_t* ifield; - dict_col_t* col = NULL; - - field = &fields[i]; - - type = dfield_get_type(field); - len = dfield_get_len(field); - - if (UNIV_UNLIKELY(i == n_node_ptr_field)) { - ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); + for (ulint i = 0; field < fend; i++, field++) { + ulint len = dfield_get_len(field); + + if (mblob) { + if (i == index->first_user_field()) { + ut_ad(len == FIELD_REF_SIZE); + ut_ad(dfield_is_ext(field)); + memcpy(end, dfield_get_data(field), len); + end += len; + len = dfield_get_len(++field); + } + } else if (UNIV_UNLIKELY(i == n_node_ptr_field)) { + ut_ad(field->type.prtype & DATA_NOT_NULL); ut_ad(len == REC_NODE_PTR_SIZE); memcpy(end, dfield_get_data(field), len); end += REC_NODE_PTR_SIZE; break; } - if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + if (!(field->type.prtype & DATA_NOT_NULL)) { /* nullable field */ ut_ad(n_null--); @@ -1541,11 +1641,12 @@ rec_convert_dtuple_to_rec_comp( /* only nullable fields can be null */ ut_ad(!dfield_is_null(field)); - ifield = dict_index_get_nth_field(index, i); - fixed_len = ifield->fixed_len; - col = ifield->col; + const dict_field_t* ifield + = dict_index_get_nth_field(index, i); + ulint fixed_len = ifield->fixed_len; + if (temp && fixed_len - && !dict_col_get_fixed_size(col, temp)) { + && !dict_col_get_fixed_size(ifield->col, temp)) { fixed_len = 0; } @@ -1557,23 +1658,23 @@ rec_convert_dtuple_to_rec_comp( it is 128 or more, or when the field is stored externally. */ if (fixed_len) { ut_ad(len <= fixed_len); - ut_ad(!col->mbmaxlen - || len >= col->mbminlen - * fixed_len / col->mbmaxlen); + ut_ad(!ifield->col->mbmaxlen + || len >= ifield->col->mbminlen + * fixed_len / ifield->col->mbmaxlen); ut_ad(!dfield_is_ext(field)); } else if (dfield_is_ext(field)) { - ut_ad(DATA_BIG_COL(col)); + ut_ad(DATA_BIG_COL(ifield->col)); ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE); + + BTR_EXTERN_FIELD_REF_SIZE); *lens-- = (byte) (len >> 8) | 0xc0; *lens-- = (byte) len; } else { - ut_ad(len <= dtype_get_len(type) - || DATA_LARGE_MTYPE(dtype_get_mtype(type)) + ut_ad(len <= field->type.len + || DATA_LARGE_MTYPE(field->type.mtype) || !strcmp(index->name, FTS_INDEX_TABLE_IND_NAME)); if (len < 128 || !DATA_BIG_LEN_MTYPE( - dtype_get_len(type), dtype_get_mtype(type))) { + field->type.len, field->type.mtype)) { *lens-- = (byte) len; } else { @@ -1606,24 +1707,37 @@ rec_convert_dtuple_to_rec_new( ut_ad(!(dtuple->info_bits & ~(REC_NEW_STATUS_MASK | REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG))); - rec_comp_status_t status = static_cast<rec_comp_status_t>( - dtuple->info_bits & REC_NEW_STATUS_MASK); - if (status == REC_STATUS_ORDINARY - && dtuple->n_fields > index->n_core_fields) { - ut_ad(index->is_instant()); - status = REC_STATUS_COLUMNS_ADDED; - } ulint extra_size; - rec_get_converted_size_comp( - index, status, dtuple->fields, dtuple->n_fields, &extra_size); - rec_t* rec = buf + extra_size; + if (UNIV_UNLIKELY(dtuple->is_alter_metadata())) { + ut_ad((dtuple->info_bits & REC_NEW_STATUS_MASK) + == REC_STATUS_INSTANT); + rec_get_converted_size_comp_prefix_low<true>( + index, dtuple->fields, dtuple->n_fields, + &extra_size, REC_STATUS_INSTANT, false); + buf += extra_size; + rec_convert_dtuple_to_rec_comp<true>( + buf, index, dtuple->fields, dtuple->n_fields, + REC_STATUS_INSTANT, false); + } else { + rec_get_converted_size_comp(index, dtuple, &extra_size); + buf += extra_size; + rec_comp_status_t status = rec_comp_status_t( + dtuple->info_bits & REC_NEW_STATUS_MASK); + if (status == REC_STATUS_ORDINARY + && dtuple->n_fields > index->n_core_fields) { + ut_ad(index->is_instant()); + status = REC_STATUS_INSTANT; + } - rec_convert_dtuple_to_rec_comp( - rec, index, dtuple->fields, dtuple->n_fields, status, false); - rec_set_info_bits_new(rec, dtuple->info_bits & ~REC_NEW_STATUS_MASK); - return(rec); + rec_convert_dtuple_to_rec_comp( + buf, index, dtuple->fields, dtuple->n_fields, + status, false); + } + + rec_set_info_bits_new(buf, dtuple->info_bits & ~REC_NEW_STATUS_MASK); + return buf; } /*********************************************************//** @@ -1662,7 +1776,7 @@ rec_convert_dtuple_to_rec( @param[in] fields data fields @param[in] n_fields number of data fields @param[out] extra record header size -@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED +@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT @return total size, in bytes */ ulint rec_get_converted_size_temp( @@ -1682,7 +1796,7 @@ rec_get_converted_size_temp( @param[in,out] offsets offsets to the fields; in: rec_offs_n_fields(offsets) @param[in] n_core number of core fields (index->n_core_fields) @param[in] def_val default values for non-core fields -@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED */ +@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT */ void rec_init_offsets_temp( const rec_t* rec, @@ -1693,14 +1807,14 @@ rec_init_offsets_temp( rec_comp_status_t status) { ut_ad(status == REC_STATUS_ORDINARY - || status == REC_STATUS_COLUMNS_ADDED); + || status == REC_STATUS_INSTANT); /* The table may have been converted to plain format if it was emptied during an ALTER TABLE operation. */ ut_ad(index->n_core_fields == n_core || !index->is_instant()); ut_ad(index->n_core_fields >= n_core); rec_init_offsets_comp_ordinary(rec, index, offsets, n_core, def_val, - status == REC_STATUS_COLUMNS_ADDED - ? REC_LEAF_TEMP_COLUMNS_ADDED + status == REC_STATUS_INSTANT + ? REC_LEAF_TEMP_INSTANT : REC_LEAF_TEMP); } @@ -1726,7 +1840,7 @@ rec_init_offsets_temp( @param[in] index clustered or secondary index @param[in] fields data fields @param[in] n_fields number of data fields -@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED +@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT */ void rec_convert_dtuple_to_temp( @@ -1896,13 +2010,15 @@ rec_copy_prefix_to_buf( ut_ad(n_fields <= dict_index_get_n_unique_in_tree_nonleaf(index)); break; - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: /* We would have !index->is_instant() when rolling back an instant ADD COLUMN operation. */ ut_ad(index->is_instant() || page_rec_is_metadata(rec)); + ut_ad(n_fields <= index->first_user_field()); nulls++; const ulint n_rec = ulint(index->n_core_fields) + 1 - + rec_get_n_add_field(nulls); + + rec_get_n_add_field(nulls) + - rec_is_alter_metadata(rec, true); instant_omit = ulint(&rec[-REC_N_NEW_EXTRA_BYTES] - nulls); ut_ad(instant_omit == 1 || instant_omit == 2); nullf = nulls; @@ -1991,7 +2107,7 @@ rec_copy_prefix_to_buf( /* copy the fixed-size header and the record prefix */ memcpy(b - REC_N_NEW_EXTRA_BYTES, rec - REC_N_NEW_EXTRA_BYTES, prefix_len + REC_N_NEW_EXTRA_BYTES); - ut_ad(rec_get_status(b) == REC_STATUS_COLUMNS_ADDED); + ut_ad(rec_get_status(b) == REC_STATUS_INSTANT); rec_set_status(b, REC_STATUS_ORDINARY); return b; } else { @@ -2518,8 +2634,6 @@ rec_get_trx_id( const rec_t* rec, const dict_index_t* index) { - ulint trx_id_col - = dict_index_get_sys_col_pos(index, DATA_TRX_ID); const byte* trx_id; ulint len; mem_heap_t* heap = NULL; @@ -2527,15 +2641,10 @@ rec_get_trx_id( rec_offs_init(offsets_); ulint* offsets = offsets_; - ut_ad(trx_id_col <= MAX_REF_PARTS); - ut_ad(dict_index_is_clust(index)); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - offsets = rec_get_offsets(rec, index, offsets, true, - trx_id_col + 1, &heap); + index->db_trx_id() + 1, &heap); - trx_id = rec_get_nth_field(rec, offsets, trx_id_col, &len); + trx_id = rec_get_nth_field(rec, offsets, index->db_trx_id(), &len); ut_ad(len == DATA_TRX_ID_LEN); diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 1fdd2ac9b94..8fc71d698df 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1459,7 +1459,7 @@ IndexPurge::open() UNIV_NOTHROW btr_pcur_open_at_index_side( true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr); btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr); - if (rec_is_metadata(btr_pcur_get_rec(&m_pcur), m_index)) { + if (rec_is_metadata(btr_pcur_get_rec(&m_pcur), *m_index)) { ut_ad(btr_pcur_is_on_user_rec(&m_pcur)); /* Skip the metadata pseudo-record. */ } else { @@ -2224,17 +2224,15 @@ row_import_adjust_root_pages_of_secondary_indexes( } /*****************************************************************//** -Ensure that dict_sys->row_id exceeds SELECT MAX(DB_ROW_ID). -@return error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t +Ensure that dict_sys->row_id exceeds SELECT MAX(DB_ROW_ID). */ +MY_ATTRIBUTE((nonnull)) static +void row_import_set_sys_max_row_id( /*==========================*/ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */ const dict_table_t* table) /*!< in: table to import */ { - dberr_t err; const rec_t* rec; mtr_t mtr; btr_pcur_t pcur; @@ -2242,7 +2240,8 @@ row_import_set_sys_max_row_id( dict_index_t* index; index = dict_table_get_first_index(table); - ut_a(dict_index_is_clust(index)); + ut_ad(index->is_primary()); + ut_ad(dict_index_is_auto_gen_clust(index)); mtr_start(&mtr); @@ -2263,57 +2262,17 @@ row_import_set_sys_max_row_id( /* Check for empty table. */ if (page_rec_is_infimum(rec)) { /* The table is empty. */ - err = DB_SUCCESS; - } else if (rec_is_metadata(rec, index)) { + } else if (rec_is_metadata(rec, *index)) { /* The clustered index contains the metadata record only, that is, the table is empty. */ - err = DB_SUCCESS; } else { - ulint len; - const byte* field; - mem_heap_t* heap = NULL; - ulint offsets_[1 + REC_OFFS_HEADER_SIZE]; - ulint* offsets; - - rec_offs_init(offsets_); - - offsets = rec_get_offsets( - rec, index, offsets_, true, ULINT_UNDEFINED, &heap); - - field = rec_get_nth_field( - rec, offsets, - dict_index_get_sys_col_pos(index, DATA_ROW_ID), - &len); - - if (len == DATA_ROW_ID_LEN) { - row_id = mach_read_from_6(field); - err = DB_SUCCESS; - } else { - err = DB_CORRUPTION; - } - - if (heap != NULL) { - mem_heap_free(heap); - } + row_id = mach_read_from_6(rec); } btr_pcur_close(&pcur); mtr_commit(&mtr); - DBUG_EXECUTE_IF("ib_import_set_max_rowid_failure", - err = DB_CORRUPTION;); - - if (err != DB_SUCCESS) { - ib_errf(prebuilt->trx->mysql_thd, - IB_LOG_LEVEL_WARN, - ER_INNODB_INDEX_CORRUPT, - "Index `%s` corruption detected, invalid DB_ROW_ID" - " in index.", index->name()); - - return(err); - - } else if (row_id > 0) { - + if (row_id) { /* Update the system row id if the imported index row id is greater than the max system row id. */ @@ -2326,8 +2285,6 @@ row_import_set_sys_max_row_id( mutex_exit(&dict_sys->mutex); } - - return(DB_SUCCESS); } /*****************************************************************//** @@ -4073,12 +4030,7 @@ row_import_for_mysql( any DB_ROW_ID stored in the table. */ if (prebuilt->clust_index_was_generated) { - - err = row_import_set_sys_max_row_id(prebuilt, table); - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } + row_import_set_sys_max_row_id(prebuilt, table); } ib::info() << "Phase III - Flush changes to disk"; diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index bf454fbb505..27df0a9249d 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1270,8 +1270,10 @@ row_ins_foreign_check_on_constraint( } if (table->fts) { - doc_id = fts_get_doc_id_from_rec(table, clust_rec, - clust_index, tmp_heap); + doc_id = fts_get_doc_id_from_rec( + clust_rec, clust_index, + rec_get_offsets(clust_rec, clust_index, NULL, true, + ULINT_UNDEFINED, &tmp_heap)); } if (node->is_delete @@ -2591,25 +2593,32 @@ row_ins_clust_index_entry_low( } else { index->set_modified(mtr); - if (mode == BTR_MODIFY_LEAF - && dict_index_is_online_ddl(index)) { - mode = BTR_MODIFY_LEAF_ALREADY_S_LATCHED; - mtr_s_lock(dict_index_get_lock(index), &mtr); - } + if (UNIV_UNLIKELY(entry->is_metadata())) { + ut_ad(index->is_instant()); + ut_ad(!dict_index_is_online_ddl(index)); + ut_ad(mode == BTR_MODIFY_TREE); + } else { + if (mode == BTR_MODIFY_LEAF + && dict_index_is_online_ddl(index)) { + mode = BTR_MODIFY_LEAF_ALREADY_S_LATCHED; + mtr_s_lock(dict_index_get_lock(index), &mtr); + } - if (unsigned ai = index->table->persistent_autoinc) { - /* Prepare to persist the AUTO_INCREMENT value - from the index entry to PAGE_ROOT_AUTO_INC. */ - const dfield_t* dfield = dtuple_get_nth_field( - entry, ai - 1); - auto_inc = dfield_is_null(dfield) - ? 0 - : row_parse_int(static_cast<const byte*>( + if (unsigned ai = index->table->persistent_autoinc) { + /* Prepare to persist the AUTO_INCREMENT value + from the index entry to PAGE_ROOT_AUTO_INC. */ + const dfield_t* dfield = dtuple_get_nth_field( + entry, ai - 1); + if (!dfield_is_null(dfield)) { + auto_inc = row_parse_int( + static_cast<const byte*>( dfield->data), dfield->len, dfield->type.mtype, dfield->type.prtype & DATA_UNSIGNED); + } + } } } @@ -2639,7 +2648,7 @@ row_ins_clust_index_entry_low( #endif /* UNIV_DEBUG */ if (UNIV_UNLIKELY(entry->info_bits != 0)) { - ut_ad(entry->info_bits == REC_INFO_METADATA); + ut_ad(entry->is_metadata()); ut_ad(flags == BTR_NO_LOCKING_FLAG); ut_ad(index->is_instant()); ut_ad(!dict_index_is_online_ddl(index)); @@ -2647,28 +2656,18 @@ row_ins_clust_index_entry_low( const rec_t* rec = btr_cur_get_rec(cursor); - switch (rec_get_info_bits(rec, page_rec_is_comp(rec)) - & (REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG)) { - case REC_INFO_MIN_REC_FLAG: + if (rec_get_info_bits(rec, page_rec_is_comp(rec)) + & REC_INFO_MIN_REC_FLAG) { thr_get_trx(thr)->error_info = index; err = DB_DUPLICATE_KEY; goto err_exit; - case REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG: - /* The metadata record never carries the delete-mark - in MariaDB Server 10.3. - If a table loses its 'instantness', it happens - by the rollback of this first-time insert, or - by a call to btr_page_empty() on the root page - when the table becomes empty. */ - err = DB_CORRUPTION; - goto err_exit; - default: - ut_ad(!row_ins_must_modify_rec(cursor)); - goto do_insert; } + + ut_ad(!row_ins_must_modify_rec(cursor)); + goto do_insert; } - if (rec_is_metadata(btr_cur_get_rec(cursor), index)) { + if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { goto do_insert; } @@ -3454,6 +3453,23 @@ row_ins_index_entry_set_vals( ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(index->table)); row_field = dtuple_get_nth_v_field(row, v_col->v_pos); + } else if (col->is_dropped()) { + ut_ad(index->is_primary()); + + if (!(col->prtype & DATA_NOT_NULL)) { + field->data = NULL; + field->len = UNIV_SQL_NULL; + field->type.prtype = DATA_BINARY_TYPE; + } else { + ut_ad(col->len <= sizeof field_ref_zero); + dfield_set_data(field, field_ref_zero, + col->len); + field->type.prtype = DATA_NOT_NULL; + } + + field->type.mtype = col->len + ? DATA_FIXBINARY : DATA_BINARY; + continue; } else { row_field = dtuple_get_nth_field( row, ind_field->col->ind); @@ -3463,7 +3479,7 @@ row_ins_index_entry_set_vals( /* Check column prefix indexes */ if (ind_field != NULL && ind_field->prefix_len > 0 - && dfield_get_len(row_field) != UNIV_SQL_NULL) { + && len != UNIV_SQL_NULL) { const dict_col_t* col = dict_field_get_col(ind_field); @@ -3517,7 +3533,8 @@ row_ins_index_entry_step( ut_ad(dtuple_check_typed(node->row)); - err = row_ins_index_entry_set_vals(node->index, node->entry, node->row); + err = row_ins_index_entry_set_vals(node->index, node->entry, + node->row); if (err != DB_SUCCESS) { DBUG_RETURN(err); diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 3ad4aff8b24..1f9d4b6b8b4 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -683,9 +683,9 @@ row_log_table_delete( fields of the record. */ heap = mem_heap_create( DATA_TRX_ID_LEN - + DTUPLE_EST_ALLOC(unsigned(new_index->n_uniq) + 2)); - old_pk = tuple = dtuple_create( - heap, unsigned(new_index->n_uniq) + 2); + + DTUPLE_EST_ALLOC(new_index->first_user_field())); + old_pk = tuple = dtuple_create(heap, + new_index->first_user_field()); dict_index_copy_types(tuple, new_index, tuple->n_fields); dtuple_set_n_fields_cmp(tuple, new_index->n_uniq); @@ -850,7 +850,7 @@ row_log_table_low_redundant( const bool is_instant = index->online_log->is_instant(index); rec_comp_status_t status = is_instant - ? REC_STATUS_COLUMNS_ADDED : REC_STATUS_ORDINARY; + ? REC_STATUS_INSTANT : REC_STATUS_ORDINARY; size = rec_get_converted_size_temp( index, tuple->fields, tuple->n_fields, &extra_size, status); @@ -904,7 +904,7 @@ row_log_table_low_redundant( *b++ = static_cast<byte>(extra_size); } - if (status == REC_STATUS_COLUMNS_ADDED) { + if (status == REC_STATUS_INSTANT) { ut_ad(is_instant); if (n_fields <= index->online_log->n_core_fields) { status = REC_STATUS_ORDINARY; @@ -969,7 +969,7 @@ row_log_table_low( ut_ad(!"wrong page type"); } #endif /* UNIV_DEBUG */ - ut_ad(!rec_is_metadata(rec, index)); + ut_ad(!rec_is_metadata(rec, *index)); ut_ad(page_rec_is_leaf(rec)); ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets)); /* old_pk=row_log_table_get_pk() [not needed in INSERT] is a prefix @@ -992,7 +992,7 @@ row_log_table_low( ut_ad(page_is_comp(page_align(rec))); ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY - || rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED); + || rec_get_status(rec) == REC_STATUS_INSTANT); const ulint omit_size = REC_N_NEW_EXTRA_BYTES; @@ -1066,7 +1066,7 @@ row_log_table_low( if (is_instant) { *b++ = fake_extra_size - ? REC_STATUS_COLUMNS_ADDED + ? REC_STATUS_INSTANT : rec_get_status(rec); } else { ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY); @@ -1243,19 +1243,16 @@ row_log_table_get_pk( ulint trx_id_offs = index->trx_id_offset; if (!trx_id_offs) { - ulint pos = dict_index_get_sys_col_pos( - index, DATA_TRX_ID); ulint len; - ut_ad(pos > 0); if (!offsets) { offsets = rec_get_offsets( rec, index, NULL, true, - pos + 1, heap); + index->db_trx_id() + 1, heap); } trx_id_offs = rec_get_nth_field_offs( - offsets, pos, &len); + offsets, index->db_trx_id(), &len); ut_ad(len == DATA_TRX_ID_LEN); } @@ -1558,11 +1555,17 @@ row_log_table_apply_convert_mrec( const dict_col_t* col = dict_field_get_col(ind_field); + if (col->is_dropped()) { + /* the column was instantly dropped earlier */ + ut_ad(index->table->instant); + continue; + } + ulint col_no = log->col_map[dict_col_get_no(col)]; if (col_no == ULINT_UNDEFINED) { - /* dropped column */ + /* the column is being dropped now */ continue; } @@ -1918,8 +1921,7 @@ row_log_table_apply_delete( btr_pcur_t pcur; ulint* offsets; - ut_ad(rec_offs_n_fields(moffsets) - == dict_index_get_n_unique(index) + 2); + ut_ad(rec_offs_n_fields(moffsets) == index->first_user_field()); ut_ad(!rec_offs_any_extern(moffsets)); /* Convert the row to a search tuple. */ @@ -2482,8 +2484,7 @@ row_log_table_apply_op( /* The ROW_T_DELETE record was converted by rec_convert_dtuple_to_temp() using new_index. */ ut_ad(!new_index->is_instant()); - rec_offs_set_n_fields(offsets, - unsigned(new_index->n_uniq) + 2); + rec_offs_set_n_fields(offsets, new_index->first_user_field()); rec_init_offsets_temp(mrec, new_index, offsets); next_mrec = mrec + rec_offs_data_size(offsets); if (next_mrec > mrec_end) { @@ -2575,7 +2576,7 @@ row_log_table_apply_op( rec_convert_dtuple_to_temp() using new_index. */ ut_ad(!new_index->is_instant()); rec_offs_set_n_fields(offsets, - unsigned(new_index->n_uniq) + 2); + new_index->first_user_field()); rec_init_offsets_temp(mrec, new_index, offsets); next_mrec = mrec + rec_offs_data_size(offsets); @@ -2585,13 +2586,12 @@ row_log_table_apply_op( /* Copy the PRIMARY KEY fields and DB_TRX_ID, DB_ROLL_PTR from mrec to old_pk. */ - old_pk = dtuple_create( - heap, unsigned(new_index->n_uniq) + 2); + old_pk = dtuple_create(heap, + new_index->first_user_field()); dict_index_copy_types(old_pk, new_index, old_pk->n_fields); - for (ulint i = 0; - i < dict_index_get_n_unique(new_index) + 2; + for (ulint i = 0; i < new_index->first_user_field(); i++) { const void* field; ulint len; @@ -2742,8 +2742,8 @@ row_log_table_apply_ops( dict_index_t* new_index = dict_table_get_first_index( new_table); const ulint i = 1 + REC_OFFS_HEADER_SIZE - + ut_max(dict_index_get_n_fields(index), - dict_index_get_n_unique(new_index) + 2); + + std::max<ulint>(index->n_fields, + new_index->first_user_field()); const ulint new_trx_id_col = dict_col_get_clust_pos( dict_table_get_sys_col(new_table, DATA_TRX_ID), new_index); trx_t* trx = thr_get_trx(thr); @@ -3203,7 +3203,8 @@ row_log_allocate( log->head.total = 0; log->path = path; log->n_core_fields = index->n_core_fields; - ut_ad(!table || log->is_instant(index) == index->is_instant()); + ut_ad(!table || log->is_instant(index) + == (index->n_core_fields < index->n_fields)); log->allow_not_null = allow_not_null; log->old_table = old_table; log->n_rows = 0; diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 15d6ab8e76e..38af0eb1931 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -1869,7 +1869,7 @@ row_merge_read_clustered_index( btr_pcur_open_at_index_side( true, clust_index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); btr_pcur_move_to_next_user_rec(&pcur, &mtr); - if (rec_is_metadata(btr_pcur_get_rec(&pcur), clust_index)) { + if (rec_is_metadata(btr_pcur_get_rec(&pcur), *clust_index)) { ut_ad(btr_pcur_is_on_user_rec(&pcur)); /* Skip the metadata pseudo-record. */ } else { diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 757a9ff232a..d91a317a8fd 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -34,7 +34,6 @@ Created 9/17/2000 Heikki Tuuri #include "btr0sea.h" #include "dict0boot.h" #include "dict0crea.h" -#include <sql_const.h> #include "dict0dict.h" #include "dict0load.h" #include "dict0priv.h" @@ -329,6 +328,7 @@ row_mysql_read_geometry( ulint col_len) /*!< in: MySQL format length */ { byte* data; + ut_ad(col_len > 8); *len = mach_read_from_n_little_endian(ref, col_len - 8); @@ -828,7 +828,8 @@ row_create_prebuilt( clust_index = dict_table_get_first_index(table); /* Make sure that search_tuple is long enough for clustered index */ - ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields); + ut_a(2 * unsigned(table->n_cols) >= unsigned(clust_index->n_fields) + - clust_index->table->n_dropped()); ref_len = dict_index_get_n_unique(clust_index); @@ -2841,7 +2842,7 @@ row_mysql_table_id_reassign( dberr_t err; pars_info_t* info = pars_info_create(); - dict_hdr_get_new_id(new_id, NULL, NULL, table, false); + dict_hdr_get_new_id(new_id, NULL, NULL); pars_info_add_ull_literal(info, "old_id", table->id); pars_info_add_ull_literal(info, "new_id", *new_id); diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 5699c8b2f56..f0652ed3d54 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -127,33 +127,32 @@ row_purge_remove_clust_if_poss_low( purge_node_t* node, /*!< in/out: row purge node */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { - dict_index_t* index; - bool success = true; - mtr_t mtr; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint* offsets; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S) || node->vcol_info.is_used()); - index = dict_table_get_first_index(node->table); + dict_index_t* index = dict_table_get_first_index(node->table); log_free_check(); - mtr_start(&mtr); - index->set_modified(mtr); + + mtr_t mtr; + mtr.start(); if (!row_purge_reposition_pcur(mode, node, &mtr)) { /* The record was already removed. */ - goto func_exit; + mtr.commit(); + return true; } - rec = btr_pcur_get_rec(&node->pcur); + ut_d(const bool was_instant = !!index->table->instant); + index->set_modified(mtr); - offsets = rec_get_offsets( + rec_t* rec = btr_pcur_get_rec(&node->pcur); + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs_init(offsets_); + mem_heap_t* heap = NULL; + ulint* offsets = rec_get_offsets( rec, index, offsets_, true, ULINT_UNDEFINED, &heap); + bool success = true; if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) { /* Someone else has modified the record later: do not remove */ @@ -186,6 +185,10 @@ row_purge_remove_clust_if_poss_low( } } + /* Prove that dict_index_t::clear_instant_alter() was + not called with index->table->instant != NULL. */ + ut_ad(!was_instant || index->table->instant); + func_exit: if (heap) { mem_heap_free(heap); @@ -845,8 +848,9 @@ static void row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr) became purgeable) */ if (node->roll_ptr == row_get_rec_roll_ptr(rec, index, offsets)) { - ut_ad(!rec_get_deleted_flag(rec, - rec_offs_comp(offsets))); + ut_ad(!rec_get_deleted_flag( + rec, rec_offs_comp(offsets)) + || rec_is_alter_metadata(rec, *index)); DBUG_LOG("purge", "reset DB_TRX_ID=" << ib::hex(row_get_rec_trx_id( rec, index, offsets))); diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc index 352407b6ee5..d77cc1e33dc 100644 --- a/storage/innobase/row/row0quiesce.cc +++ b/storage/innobase/row/row0quiesce.cc @@ -70,17 +70,16 @@ row_quiesce_write_index_fields( return(DB_IO_ERROR); } + const char* field_name = field->name ? field->name : ""; /* Include the NUL byte in the length. */ - ib_uint32_t len = static_cast<ib_uint32_t>(strlen(field->name) + 1); - ut_a(len > 1); - + ib_uint32_t len = static_cast<ib_uint32_t>(strlen(field_name) + 1); mach_write_to_4(row, len); DBUG_EXECUTE_IF("ib_export_io_write_failure_10", close(fileno(file));); if (fwrite(row, 1, sizeof(len), file) != sizeof(len) - || fwrite(field->name, 1, len, file) != len) { + || fwrite(field_name, 1, len, file) != len) { ib_senderrf( thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index d419fd9998f..3c03f8277ae 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -197,7 +197,7 @@ row_build_index_entry_low( { dtuple_t* entry; ulint entry_len; - ulint i; + ulint i = 0; ulint num_v = 0; entry_len = dict_index_get_n_fields(index); @@ -217,90 +217,87 @@ row_build_index_entry_low( } else { dtuple_set_n_fields_cmp( entry, dict_index_get_n_unique_in_tree(index)); - } + if (dict_index_is_spatial(index)) { + /* Set the MBR field */ + if (!row_build_spatial_index_key( + index, ext, + dtuple_get_nth_field(entry, 0), + dtuple_get_nth_field( + row, + dict_index_get_nth_field(index, i) + ->col->ind), flag, heap)) { + return NULL; + } - for (i = 0; i < entry_len + num_v; i++) { - const dict_field_t* ind_field = NULL; - const dict_col_t* col; - ulint col_no = 0; - dfield_t* dfield; - dfield_t* dfield2; - ulint len; - - if (i >= entry_len) { - /* This is to insert new rows to cluster index */ - ut_ad(dict_index_is_clust(index) - && flag == ROW_BUILD_FOR_INSERT); - dfield = dtuple_get_nth_v_field(entry, i - entry_len); - col = &dict_table_get_nth_v_col( - index->table, i - entry_len)->m_col; + i = 1; + } + } - } else { - ind_field = dict_index_get_nth_field(index, i); - col = ind_field->col; - col_no = dict_col_get_no(col); - dfield = dtuple_get_nth_field(entry, i); + for (; i < entry_len; i++) { + const dict_field_t& f = index->fields[i]; + dfield_t* dfield = dtuple_get_nth_field(entry, i); + + if (f.col->is_dropped()) { + ut_ad(index->is_primary()); + ut_ad(index->is_instant()); + ut_ad(!f.col->is_virtual()); + dict_col_copy_type(f.col, &dfield->type); + if (f.col->is_nullable()) { + dfield_set_null(dfield); + } else { + dfield_set_data(dfield, field_ref_zero, + f.fixed_len); + } + continue; } - compile_time_assert(DATA_MISSING == 0); + const dfield_t* dfield2; - if (col->is_virtual()) { - const dict_v_col_t* v_col - = reinterpret_cast<const dict_v_col_t*>(col); + if (f.col->is_virtual()) { + const dict_v_col_t* v_col + = reinterpret_cast<const dict_v_col_t*>(f.col); ut_ad(v_col->v_pos < dtuple_get_n_v_fields(row)); dfield2 = dtuple_get_nth_v_field(row, v_col->v_pos); ut_ad(dfield_is_null(dfield2) || dfield_get_len(dfield2) == 0 || dfield2->data); + ut_ad(!dfield_is_ext(dfield2)); + if (UNIV_UNLIKELY(dfield2->type.mtype + == DATA_MISSING)) { + ut_ad(flag == ROW_BUILD_FOR_PURGE); + return(NULL); + } } else { - dfield2 = dtuple_get_nth_field(row, col_no); - ut_ad(dfield_get_type(dfield2)->mtype == DATA_MISSING - || (!(dfield_get_type(dfield2)->prtype - & DATA_VIRTUAL))); - } - - if (UNIV_UNLIKELY(dfield_get_type(dfield2)->mtype - == DATA_MISSING)) { - /* The field has not been initialized in the row. - This should be from trx_undo_rec_get_partial_row(). */ - return(NULL); - } - -#ifdef UNIV_DEBUG - if (dfield_get_type(dfield2)->prtype & DATA_VIRTUAL - && dict_index_is_clust(index)) { - ut_ad(flag == ROW_BUILD_FOR_INSERT); - } -#endif /* UNIV_DEBUG */ - - /* Special handle spatial index, set the first field - which is for store MBR. */ - if (dict_index_is_spatial(index) && i == 0) { - if (!row_build_spatial_index_key( - index, ext, dfield, dfield2, flag, heap)) { - return NULL; + dfield2 = dtuple_get_nth_field(row, f.col->ind); + if (UNIV_UNLIKELY(dfield2->type.mtype + == DATA_MISSING)) { + /* The field has not been initialized in + the row. This should be from + trx_undo_rec_get_partial_row(). */ + return(NULL); } - continue; + ut_ad(!(dfield2->type.prtype & DATA_VIRTUAL)); } - len = dfield_get_len(dfield2); + compile_time_assert(DATA_MISSING == 0); - dfield_copy(dfield, dfield2); + *dfield = *dfield2; if (dfield_is_null(dfield)) { continue; } - if ((!ind_field || ind_field->prefix_len == 0) + ulint len = dfield_get_len(dfield); + + if (f.prefix_len == 0 && (!dfield_is_ext(dfield) || dict_index_is_clust(index))) { /* The dfield_copy() above suffices for columns that are stored in-page, or for clustered index record columns that are not - part of a column prefix in the PRIMARY KEY, - or for virtaul columns in cluster index record. */ + part of a column prefix in the PRIMARY KEY. */ continue; } @@ -311,11 +308,11 @@ row_build_index_entry_low( index record with an off-page column is when it is a column prefix index. If atomic_blobs, also fully indexed long columns may be stored off-page. */ - ut_ad(col->ord_part); + ut_ad(f.col->ord_part); - if (ext && !col->is_virtual()) { + if (ext && !f.col->is_virtual()) { /* See if the column is stored externally. */ - const byte* buf = row_ext_lookup(ext, col_no, + const byte* buf = row_ext_lookup(ext, f.col->ind, &len); if (UNIV_LIKELY_NULL(buf)) { if (UNIV_UNLIKELY(buf == field_ref_zero)) { @@ -324,7 +321,7 @@ row_build_index_entry_low( dfield_set_data(dfield, buf, len); } - if (ind_field->prefix_len == 0) { + if (f.prefix_len == 0) { /* If ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED, we can have a secondary index on an entire column @@ -351,16 +348,33 @@ row_build_index_entry_low( } /* If a column prefix index, take only the prefix. */ - if (ind_field->prefix_len) { + if (f.prefix_len) { len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminlen, col->mbmaxlen, - ind_field->prefix_len, len, + f.col->prtype, + f.col->mbminlen, f.col->mbmaxlen, + f.prefix_len, len, static_cast<char*>(dfield_get_data(dfield))); dfield_set_len(dfield, len); } } - return(entry); + for (i = num_v; i--; ) { + ut_ad(index->is_primary()); + ut_ad(flag == ROW_BUILD_FOR_INSERT); + dfield_t* dfield = dtuple_get_nth_v_field(entry, i); + const dict_v_col_t* v_col = dict_table_get_nth_v_col( + index->table, i); + ut_ad(!v_col->m_col.is_dropped()); + ut_ad(v_col->v_pos < dtuple_get_n_v_fields(row)); + const dfield_t* dfield2 = dtuple_get_nth_v_field( + row, v_col->v_pos); + ut_ad(dfield_is_null(dfield2) || + dfield_get_len(dfield2) == 0 || dfield2->data); + ut_ad(dfield2->type.mtype != DATA_MISSING); + *dfield = *dfield2; + } + + return entry; } /** An inverse function to row_build_index_entry. Builds a row from a @@ -497,11 +511,23 @@ row_build_low( j = 0; + const dict_field_t* ind_field = index->fields; + for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) { - const dict_field_t* ind_field - = dict_index_get_nth_field(index, i); + if (i == index->first_user_field() + && rec_is_alter_metadata(rec, *index)) { + ut_ad(rec_offs_nth_extern(offsets, i)); + ut_d(ulint len); + ut_d(rec_get_nth_field_offs(offsets, i, &len)); + ut_ad(len == FIELD_REF_SIZE); + continue; + } + + ut_ad(ind_field < &index->fields[index->n_fields]); - if (ind_field->prefix_len) { + const dict_col_t* col = dict_field_get_col(ind_field); + + if ((ind_field++)->prefix_len) { /* Column prefixes can only occur in key fields, which cannot be stored externally. For a column prefix, there should also be the full @@ -511,10 +537,11 @@ row_build_low( continue; } - const dict_col_t* col - = dict_field_get_col(ind_field); - ulint col_no - = dict_col_get_no(col); + if (col->is_dropped()) { + continue; + } + + ulint col_no = dict_col_get_no(col); if (col_map) { col_no = col_map[col_no]; @@ -526,6 +553,7 @@ row_build_low( } dfield_t* dfield = dtuple_get_nth_field(row, col_no); + const void* field = rec_get_nth_field( copy, offsets, i, &len); if (len == UNIV_SQL_DEFAULT) { @@ -669,15 +697,19 @@ row_build_w_add_vcol( } /** Convert an index record to a data tuple. -@tparam def whether the index->instant_field_value() needs to be accessed -@param[in] rec index record -@param[in] index index -@param[in] offsets rec_get_offsets(rec, index) -@param[out] n_ext number of externally stored columns -@param[in,out] heap memory heap for allocations +@tparam metadata whether the index->instant_field_value() needs to be accessed +@tparam mblob 1 if rec_is_alter_metadata(); +2 if we want converted metadata corresponding to info_bits +@param[in] rec index record +@param[in] index index +@param[in] offsets rec_get_offsets(rec, index) +@param[out] n_ext number of externally stored columns +@param[in,out] heap memory heap for allocations +@param[in] info_bits (only used if mblob=2) +@param[in] pad (only used if mblob=2) @return index entry built; does not set info_bits, and the data fields in the entry will point directly to rec */ -template<bool def> +template<bool metadata, int mblob = 0> static inline dtuple_t* row_rec_to_index_entry_impl( @@ -685,44 +717,66 @@ row_rec_to_index_entry_impl( const dict_index_t* index, const ulint* offsets, ulint* n_ext, - mem_heap_t* heap) + mem_heap_t* heap, + ulint info_bits = 0, + bool pad = false) { - dtuple_t* entry; - dfield_t* dfield; - ulint i; - const byte* field; - ulint len; - ulint rec_len; - ut_ad(rec != NULL); ut_ad(heap != NULL); ut_ad(index != NULL); - ut_ad(def || !rec_offs_any_default(offsets)); - + ut_ad(!mblob || index->is_primary()); + ut_ad(!mblob || !index->table->is_temporary()); + ut_ad(!mblob || !dict_index_is_spatial(index)); + compile_time_assert(!mblob || metadata); + compile_time_assert(mblob <= 2); /* Because this function may be invoked by row0merge.cc on a record whose header is in different format, the check rec_offs_validate(rec, index, offsets) must be avoided here. */ ut_ad(n_ext); *n_ext = 0; - rec_len = rec_offs_n_fields(offsets); - - entry = dtuple_create(heap, rec_len); + const bool got = mblob == 2 && rec_is_alter_metadata(rec, *index); + ulint rec_len = rec_offs_n_fields(offsets); + if (mblob == 2) { + ut_ad(info_bits == REC_INFO_METADATA_ALTER + || info_bits == REC_INFO_METADATA_ADD); + ut_ad(rec_len <= ulint(index->n_fields + got)); + if (pad) { + rec_len = ulint(index->n_fields) + + (info_bits == REC_INFO_METADATA_ALTER); + } else if (!got && info_bits == REC_INFO_METADATA_ALTER) { + rec_len++; + } + } else { + ut_ad(info_bits == 0); + ut_ad(!pad); + } + dtuple_t* entry = dtuple_create(heap, rec_len); + dfield_t* dfield = entry->fields; dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique_in_tree(index)); - ut_ad(rec_len == dict_index_get_n_fields(index) + ut_ad(mblob == 2 + || rec_len == dict_index_get_n_fields(index) + uint(mblob == 1) /* a record for older SYS_INDEXES table (missing merge_threshold column) is acceptable. */ - || (index->table->id == DICT_INDEXES_ID + || (!index->table->is_temporary() + && index->table->id == DICT_INDEXES_ID && rec_len == dict_index_get_n_fields(index) - 1)); - dict_index_copy_types(entry, index, rec_len); - - for (i = 0; i < rec_len; i++) { + ulint i; + for (i = 0; i < (mblob ? index->first_user_field() : rec_len); + i++, dfield++) { + dict_col_copy_type(dict_index_get_nth_col(index, i), + &dfield->type); + if (!mblob + && dict_index_is_spatial(index) + && DATA_GEOMETRY_MTYPE(dfield->type.mtype)) { + dfield->type.prtype |= DATA_GIS_MBR; + } - dfield = dtuple_get_nth_field(entry, i); - field = def + ulint len; + const byte* field = metadata ? rec_get_nth_cfield(rec, index, offsets, i, &len) : rec_get_nth_field(rec, offsets, i, &len); @@ -730,12 +784,80 @@ row_rec_to_index_entry_impl( if (rec_offs_nth_extern(offsets, i)) { dfield_set_ext(dfield); - (*n_ext)++; + ++*n_ext; + } + } + + if (mblob) { + ulint len; + const byte* field; + ulint j = i; + + if (mblob == 2) { + const bool want = info_bits == REC_INFO_METADATA_ALTER; + if (got == want) { + if (got) { + goto copy_metadata; + } + } else { + if (want) { + /* Allocate a placeholder for + adding metadata in an update. */ + len = FIELD_REF_SIZE; + field = static_cast<byte*>( + mem_heap_zalloc(heap, len)); + /* In reality there is one fewer + field present in the record. */ + rec_len--; + goto init_metadata; + } + + /* Skip the undesired metadata blob + (for example, when rolling back an + instant ALTER TABLE). */ + i++; + } + goto copy_user_fields; + } +copy_metadata: + ut_ad(rec_offs_nth_extern(offsets, i)); + field = rec_get_nth_field(rec, offsets, i++, &len); +init_metadata: + dfield->type.metadata_blob_init(); + ut_ad(len == FIELD_REF_SIZE); + dfield_set_data(dfield, field, len); + dfield_set_ext(dfield++); + ++*n_ext; +copy_user_fields: + for (; i < rec_len; i++, dfield++) { + dict_col_copy_type(dict_index_get_nth_col(index, j++), + &dfield->type); + if (mblob == 2 && pad + && i >= rec_offs_n_fields(offsets)) { + field = index->instant_field_value(j - 1, + &len); + dfield_set_data(dfield, field, len); + continue; + } + + field = rec_get_nth_field(rec, offsets, i, &len); + dfield_set_data(dfield, field, len); + + if (rec_offs_nth_extern(offsets, i)) { + dfield_set_ext(dfield); + ++*n_ext; + } } } + if (mblob == 2) { + ulint n_fields = ulint(dfield - entry->fields); + ut_ad(entry->n_fields >= n_fields); + entry->n_fields = n_fields; + } + ut_ad(dfield == entry->fields + entry->n_fields); ut_ad(dtuple_check_typed(entry)); - return(entry); + return entry; } /** Convert an index record to a data tuple. @@ -771,25 +893,26 @@ row_rec_to_index_entry( mem_heap_t* heap) /*!< in: memory heap from which the memory needed is allocated */ { - dtuple_t* entry; - byte* buf; - const rec_t* copy_rec; - ut_ad(rec != NULL); ut_ad(heap != NULL); ut_ad(index != NULL); ut_ad(rec_offs_validate(rec, index, offsets)); /* Take a copy of rec to heap */ - buf = static_cast<byte*>( - mem_heap_alloc(heap, rec_offs_size(offsets))); - - copy_rec = rec_copy(buf, rec, offsets); + const rec_t* copy_rec = rec_copy( + static_cast<byte*>(mem_heap_alloc(heap, + rec_offs_size(offsets))), + rec, offsets); rec_offs_make_valid(copy_rec, index, true, const_cast<ulint*>(offsets)); - entry = row_rec_to_index_entry_impl<true>( - copy_rec, index, offsets, n_ext, heap); + + dtuple_t* entry = rec_is_alter_metadata(copy_rec, *index) + ? row_rec_to_index_entry_impl<true,1>( + copy_rec, index, offsets, n_ext, heap) + : row_rec_to_index_entry_impl<true>( + copy_rec, index, offsets, n_ext, heap); + rec_offs_make_valid(rec, index, true, const_cast<ulint*>(offsets)); @@ -799,6 +922,51 @@ row_rec_to_index_entry( return(entry); } +/** Convert a metadata record to a data tuple. +@param[in] rec metadata record +@param[in] index clustered index after instant ALTER TABLE +@param[in] offsets rec_get_offsets(rec) +@param[out] n_ext number of externally stored fields +@param[in,out] heap memory heap for allocations +@param[in] info_bits the info_bits after an update +@param[in] pad whether to pad to index->n_fields */ +dtuple_t* +row_metadata_to_tuple( + const rec_t* rec, + const dict_index_t* index, + const ulint* offsets, + ulint* n_ext, + mem_heap_t* heap, + ulint info_bits, + bool pad) +{ + ut_ad(info_bits == REC_INFO_METADATA_ALTER + || info_bits == REC_INFO_METADATA_ADD); + ut_ad(rec_is_metadata(rec, *index)); + ut_ad(rec_offs_validate(rec, index, offsets)); + + const rec_t* copy_rec = rec_copy( + static_cast<byte*>(mem_heap_alloc(heap, + rec_offs_size(offsets))), + rec, offsets); + + rec_offs_make_valid(copy_rec, index, true, + const_cast<ulint*>(offsets)); + + dtuple_t* entry = info_bits == REC_INFO_METADATA_ALTER + || rec_is_alter_metadata(copy_rec, *index) + ? row_rec_to_index_entry_impl<true,2>( + copy_rec, index, offsets, n_ext, heap, info_bits, pad) + : row_rec_to_index_entry_impl<true>( + copy_rec, index, offsets, n_ext, heap); + + rec_offs_make_valid(rec, index, true, + const_cast<ulint*>(offsets)); + + dtuple_set_info_bits(entry, info_bits); + return entry; +} + /*******************************************************************//** Builds from a secondary index record a row reference with which we can search the clustered index record. @@ -1033,7 +1201,7 @@ row_search_on_row_ref( index = dict_table_get_first_index(table); if (UNIV_UNLIKELY(ref->info_bits != 0)) { - ut_ad(ref->info_bits == REC_INFO_METADATA); + ut_ad(ref->is_metadata()); ut_ad(ref->n_fields <= index->n_uniq); btr_pcur_open_at_index_side(true, index, mode, pcur, true, 0, mtr); diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 58b063a3b05..5e7894d3ca7 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -1487,7 +1487,7 @@ row_sel_try_search_shortcut( const rec_t* rec = btr_pcur_get_rec(&(plan->pcur)); - if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, index)) { + if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, *index)) { retry: rw_lock_s_unlock(ahi_latch); return(SEL_RETRY); @@ -1787,7 +1787,7 @@ skip_lock: goto next_rec; } - if (rec_is_metadata(rec, index)) { + if (rec_is_metadata(rec, *index)) { /* Skip the metadata pseudo-record. */ cost_counter++; goto next_rec; @@ -2693,44 +2693,6 @@ row_sel_convert_mysql_key_to_innobase( } /**************************************************************//** -Stores the row id to the prebuilt struct. */ -static -void -row_sel_store_row_id_to_prebuilt( -/*=============================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt */ - const rec_t* index_rec, /*!< in: record */ - const dict_index_t* index, /*!< in: index of the record */ - const ulint* offsets) /*!< in: rec_get_offsets - (index_rec, index) */ -{ - const byte* data; - ulint len; - - ut_ad(rec_offs_validate(index_rec, index, offsets)); - - data = rec_get_nth_field( - index_rec, offsets, - dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len); - - if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) { - - ib::error() << "Row id field is wrong length " << len << " in" - " index " << index->name - << " of table " << index->table->name - << ", Field number " - << dict_index_get_sys_col_pos(index, DATA_ROW_ID) - << ", record:"; - - rec_print_new(stderr, index_rec, offsets); - putc('\n', stderr); - ut_error; - } - - ut_memcpy(prebuilt->row_id, data, len); -} - -/**************************************************************//** Stores a non-SQL-NULL field in the MySQL format. The counterpart of this function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */ void @@ -3212,7 +3174,7 @@ row_sel_store_mysql_rec( if (dict_index_is_clust(index) || prebuilt->fts_doc_id_in_read_set) { prebuilt->fts_doc_id = fts_get_doc_id_from_rec( - prebuilt->table, rec, index, NULL); + rec, index, offsets); } } @@ -3562,7 +3524,7 @@ sel_restore_position_for_mysql( next: if (btr_pcur_move_to_next(pcur, mtr) && rec_is_metadata(btr_pcur_get_rec(pcur), - pcur->btr_cur.index)) { + *pcur->btr_cur.index)) { btr_pcur_move_to_next(pcur, mtr); } @@ -3578,7 +3540,7 @@ next: prev: if (btr_pcur_is_on_user_rec(pcur) && !moves_up && !rec_is_metadata(btr_pcur_get_rec(pcur), - pcur->btr_cur.index)) { + *pcur->btr_cur.index)) { btr_pcur_move_to_prev(pcur, mtr); } return true; @@ -3855,7 +3817,7 @@ row_sel_try_search_shortcut_for_mysql( BTR_SEARCH_LEAF, pcur, ahi_latch, mtr); rec = btr_pcur_get_rec(pcur); - if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, index)) { + if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, *index)) { retry: rw_lock_s_unlock(ahi_latch); return(SEL_RETRY); @@ -5492,11 +5454,19 @@ use_covering_index: } } - if (prebuilt->clust_index_was_generated) { - row_sel_store_row_id_to_prebuilt( - prebuilt, result_rec, - result_rec == rec ? index : clust_index, - offsets); + if (!prebuilt->clust_index_was_generated) { + } else if (result_rec != rec || index->is_primary()) { + memcpy(prebuilt->row_id, result_rec, DATA_ROW_ID_LEN); + } else { + ulint len; + const byte* data = rec_get_nth_field( + result_rec, offsets, index->n_fields - 1, + &len); + ut_ad(dict_index_get_nth_col(index, + index->n_fields - 1) + ->prtype == (DATA_ROW_ID | DATA_NOT_NULL)); + ut_ad(len == DATA_ROW_ID_LEN); + memcpy(prebuilt->row_id, data, DATA_ROW_ID_LEN); } } diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc deleted file mode 100644 index ce98717b3c9..00000000000 --- a/storage/innobase/row/row0trunc.cc +++ /dev/null @@ -1,1966 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2018, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0trunc.cc -TRUNCATE implementation - -Created 2013-04-12 Sunny Bains -*******************************************************/ - -#include "row0trunc.h" -#include "btr0sea.h" -#include "pars0pars.h" -#include "btr0pcur.h" -#include "dict0crea.h" -#include "dict0stats.h" -#include "dict0stats_bg.h" -#include "lock0lock.h" -#include "fts0fts.h" -#include "ibuf0ibuf.h" -#include "os0file.h" -#include "que0que.h" -#include "trx0undo.h" - -/* FIXME: For temporary tables, use a simple approach of btr_free() -and btr_create() of each index tree. */ - -/* FIXME: For persistent tables, remove this code in MDEV-11655 -and use a combination of the transactional DDL log to make atomic the -low-level operations ha_innobase::delete_table(), ha_innobase::create(). */ - -bool truncate_t::s_fix_up_active = false; -truncate_t::tables_t truncate_t::s_tables; -truncate_t::truncated_tables_t truncate_t::s_truncated_tables; - -/** -Iterator over the the raw records in an index, doesn't support MVCC. */ -class IndexIterator { - -public: - /** - Iterate over an indexes records - @param index index to iterate over */ - explicit IndexIterator(dict_index_t* index) - : - m_index(index) - { - /* Do nothing */ - } - - /** - Search for key. Position the cursor on a record GE key. - @return DB_SUCCESS or error code. */ - dberr_t search(dtuple_t& key, bool noredo) - { - mtr_start(&m_mtr); - - if (noredo) { - mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO); - } - - btr_pcur_open_on_user_rec( - m_index, - &key, - PAGE_CUR_GE, - BTR_MODIFY_LEAF, - &m_pcur, &m_mtr); - - return(DB_SUCCESS); - } - - /** - Iterate over all the records - @return DB_SUCCESS or error code */ - template <typename Callback> - dberr_t for_each(Callback& callback) - { - dberr_t err = DB_SUCCESS; - - for (;;) { - - if (!btr_pcur_is_on_user_rec(&m_pcur) - || !callback.match(&m_pcur)) { - - /* The end of of the index has been reached. */ - err = DB_END_OF_INDEX; - break; - } - - rec_t* rec = btr_pcur_get_rec(&m_pcur); - - if (!rec_get_deleted_flag(rec, FALSE)) { - - err = callback(&m_mtr, &m_pcur); - - if (err != DB_SUCCESS) { - break; - } - } - - btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr); - } - - btr_pcur_close(&m_pcur); - mtr_commit(&m_mtr); - - return(err == DB_END_OF_INDEX ? DB_SUCCESS : err); - } - -private: - // Disable copying - IndexIterator(const IndexIterator&); - IndexIterator& operator=(const IndexIterator&); - -private: - mtr_t m_mtr; - btr_pcur_t m_pcur; - dict_index_t* m_index; -}; - -/** SysIndex table iterator, iterate over records for a table. */ -class SysIndexIterator { - -public: - /** - Iterate over all the records that match the table id. - @return DB_SUCCESS or error code */ - template <typename Callback> - dberr_t for_each(Callback& callback) const - { - dict_index_t* sys_index; - byte buf[DTUPLE_EST_ALLOC(1)]; - dtuple_t* tuple = - dtuple_create_from_mem(buf, sizeof(buf), 1, 0); - dfield_t* dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data( - dfield, - callback.table_id(), - sizeof(*callback.table_id())); - - sys_index = dict_table_get_first_index(dict_sys->sys_indexes); - - dict_index_copy_types(tuple, sys_index, 1); - - IndexIterator iterator(sys_index); - - /* Search on the table id and position the cursor - on GE table_id. */ - iterator.search(*tuple, callback.get_logging_status()); - - return(iterator.for_each(callback)); - } -}; - -/** Generic callback abstract class. */ -class Callback -{ - -public: - /** - Constructor - @param table_id id of the table being operated. - @param noredo if true turn off logging. */ - Callback(table_id_t table_id, bool noredo) - : - m_id(), - m_noredo(noredo) - { - /* Convert to storage byte order. */ - mach_write_to_8(&m_id, table_id); - } - - /** - Destructor */ - virtual ~Callback() - { - /* Do nothing */ - } - - /** - @param pcur persistent cursor used for iteration - @return true if the table id column matches. */ - bool match(btr_pcur_t* pcur) const - { - ulint len; - const byte* field; - rec_t* rec = btr_pcur_get_rec(pcur); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len); - - ut_ad(len == 8); - - return(memcmp(&m_id, field, len) == 0); - } - - /** - @return pointer to table id storage format buffer */ - const table_id_t* table_id() const - { - return(&m_id); - } - - /** - @return return if logging needs to be turned off. */ - bool get_logging_status() const - { - return(m_noredo); - } - -protected: - // Disably copying - Callback(const Callback&); - Callback& operator=(const Callback&); - -protected: - /** Table id in storage format */ - table_id_t m_id; - - /** Turn off logging. */ - const bool m_noredo; -}; - -/** -Scan to find out truncate log file from the given directory path. - -@param dir_path look for log directory in following path. -@param log_files cache to hold truncate log file name found. -@return DB_SUCCESS or error code. */ -dberr_t -TruncateLogParser::scan( - const char* dir_path, - trunc_log_files_t& log_files) -{ - os_file_dir_t dir; - os_file_stat_t fileinfo; - dberr_t err = DB_SUCCESS; - const ulint dir_len = strlen(dir_path); - - /* Scan and look out for the truncate log files. */ - dir = os_file_opendir(dir_path, true); - if (dir == NULL) { - return(DB_IO_ERROR); - } - - while (fil_file_readdir_next_file( - &err, dir_path, dir, &fileinfo) == 0) { - - ulint nm_len = strlen(fileinfo.name); - - if (fileinfo.type == OS_FILE_TYPE_FILE - && nm_len > sizeof "ib_trunc.log" - && (0 == strncmp(fileinfo.name + nm_len - - ((sizeof "trunc.log") - 1), - "trunc.log", (sizeof "trunc.log") - 1)) - && (0 == strncmp(fileinfo.name, "ib_", 3))) { - - if (fileinfo.size == 0) { - /* Truncate log not written. Remove the file. */ - os_file_delete( - innodb_log_file_key, fileinfo.name); - continue; - } - - /* Construct file name by appending directory path */ - ulint sz = dir_len + 22 + 22 + sizeof "ib_trunc.log"; - char* log_file_name = UT_NEW_ARRAY_NOKEY(char, sz); - if (log_file_name == NULL) { - err = DB_OUT_OF_MEMORY; - break; - } - memset(log_file_name, 0, sz); - - strncpy(log_file_name, dir_path, dir_len); - ulint log_file_name_len = strlen(log_file_name); - if (log_file_name[log_file_name_len - 1] - != OS_PATH_SEPARATOR) { - - log_file_name[log_file_name_len] - = OS_PATH_SEPARATOR; - log_file_name_len = strlen(log_file_name); - } - strcat(log_file_name, fileinfo.name); - log_files.push_back(log_file_name); - } - } - - os_file_closedir(dir); - - return(err); -} - -/** -Parse the log file and populate table to truncate information. -(Add this table to truncate information to central vector that is then - used by truncate fix-up routine to fix-up truncate action of the table.) - -@param log_file_name log file to parse -@return DB_SUCCESS or error code. */ -dberr_t -TruncateLogParser::parse( - const char* log_file_name) -{ - dberr_t err = DB_SUCCESS; - truncate_t* truncate = NULL; - - /* Open the file and read magic-number to findout if truncate action - was completed. */ - bool ret; - os_file_t handle = os_file_create_simple( - innodb_log_file_key, log_file_name, - OS_FILE_OPEN, OS_FILE_READ_ONLY, srv_read_only_mode, &ret); - if (!ret) { - ib::error() << "Error opening truncate log file: " - << log_file_name; - return(DB_IO_ERROR); - } - - ulint sz = srv_page_size; - void* buf = ut_zalloc_nokey(sz + srv_page_size); - if (buf == 0) { - os_file_close(handle); - return(DB_OUT_OF_MEMORY); - } - - IORequest request(IORequest::READ); - - /* Align the memory for file i/o if we might have O_DIRECT set*/ - byte* log_buf = static_cast<byte*>(ut_align(buf, srv_page_size)); - - do { - err = os_file_read(request, handle, log_buf, 0, sz); - - if (err != DB_SUCCESS) { - os_file_close(handle); - break; - } - - if (mach_read_from_4(log_buf) == 32743712) { - - /* Truncate action completed. Avoid parsing the file. */ - os_file_close(handle); - - os_file_delete(innodb_log_file_key, log_file_name); - break; - } - - if (truncate == NULL) { - truncate = UT_NEW_NOKEY(truncate_t(log_file_name)); - if (truncate == NULL) { - os_file_close(handle); - err = DB_OUT_OF_MEMORY; - break; - } - } - - err = truncate->parse(log_buf + 4, log_buf + sz - 4); - - if (err != DB_SUCCESS) { - - ut_ad(err == DB_FAIL); - - ut_free(buf); - buf = 0; - - sz *= 2; - - buf = ut_zalloc_nokey(sz + srv_page_size); - - if (buf == 0) { - os_file_close(handle); - err = DB_OUT_OF_MEMORY; - UT_DELETE(truncate); - truncate = NULL; - break; - } - - log_buf = static_cast<byte*>( - ut_align(buf, srv_page_size)); - } - } while (err != DB_SUCCESS); - - ut_free(buf); - - if (err == DB_SUCCESS && truncate != NULL) { - truncate_t::add(truncate); - os_file_close(handle); - } - - return(err); -} - -/** -Scan and Parse truncate log files. - -@param dir_path look for log directory in following path -@return DB_SUCCESS or error code. */ -dberr_t -TruncateLogParser::scan_and_parse( - const char* dir_path) -{ - dberr_t err; - trunc_log_files_t log_files; - - /* Scan and trace all the truncate log files. */ - err = TruncateLogParser::scan(dir_path, log_files); - - /* Parse truncate lof files if scan was successful. */ - if (err == DB_SUCCESS) { - - for (ulint i = 0; - i < log_files.size() && err == DB_SUCCESS; - i++) { - err = TruncateLogParser::parse(log_files[i]); - } - } - - trunc_log_files_t::const_iterator end = log_files.end(); - for (trunc_log_files_t::const_iterator it = log_files.begin(); - it != end; - ++it) { - if (*it != NULL) { - UT_DELETE_ARRAY(*it); - } - } - log_files.clear(); - - return(err); -} - -/** Callback to drop indexes during TRUNCATE */ -class DropIndex : public Callback { - -public: - /** - Constructor - - @param[in,out] table Table to truncate - @param[in] noredo whether to disable redo logging */ - DropIndex(dict_table_t* table, bool noredo) - : - Callback(table->id, noredo), - m_table(table) - { - /* No op */ - } - - /** - @param mtr mini-transaction covering the read - @param pcur persistent cursor used for reading - @return DB_SUCCESS or error code */ - dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const; - -private: - /** Table to be truncated */ - dict_table_t* m_table; -}; - -/** Callback to create the indexes during TRUNCATE */ -class CreateIndex : public Callback { - -public: - /** - Constructor - - @param[in,out] table Table to truncate - @param[in] noredo whether to disable redo logging */ - CreateIndex(dict_table_t* table, bool noredo) - : - Callback(table->id, noredo), - m_table(table) - { - /* No op */ - } - - /** - Create the new index and update the root page number in the - SysIndex table. - - @param mtr mini-transaction covering the read - @param pcur persistent cursor used for reading - @return DB_SUCCESS or error code */ - dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const; - -private: - // Disably copying - CreateIndex(const CreateIndex&); - CreateIndex& operator=(const CreateIndex&); - -private: - /** Table to be truncated */ - dict_table_t* m_table; -}; - -/** Check for presence of table-id in SYS_XXXX tables. */ -class TableLocator : public Callback { - -public: - /** - Constructor - @param table_id table_id to look for */ - explicit TableLocator(table_id_t table_id) - : - Callback(table_id, false), - m_table_found() - { - /* No op */ - } - - /** - @return true if table is found */ - bool is_table_found() const - { - return(m_table_found); - } - - /** - Look for table-id in SYS_XXXX tables without loading the table. - - @param pcur persistent cursor used for reading - @return DB_SUCCESS */ - dberr_t operator()(mtr_t*, btr_pcur_t*) - { - m_table_found = true; - return(DB_SUCCESS); - } - -private: - /** Set to true if table is present */ - bool m_table_found; -}; - -/** -Drop an index in the table. - -@param mtr mini-transaction covering the read -@param pcur persistent cursor used for reading -@return DB_SUCCESS or error code */ -dberr_t -DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const -{ - rec_t* rec = btr_pcur_get_rec(pcur); - - bool freed = dict_drop_index_tree(rec, pcur, mtr); - -#ifdef UNIV_DEBUG - { - ulint len; - const byte* field; - ulint index_type; - - field = rec_get_nth_field_old( - btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE, - &len); - ut_ad(len == 4); - - index_type = mach_read_from_4(field); - - if (index_type & DICT_CLUSTERED) { - /* Clustered index */ - DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_clust_index", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - } else if (index_type & DICT_UNIQUE) { - /* Unique index */ - DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_uniq_index", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - } else if (index_type == 0) { - /* Secondary index */ - DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_sec_index", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - } - } -#endif /* UNIV_DEBUG */ - - DBUG_EXECUTE_IF("ib_err_trunc_drop_index", return DB_ERROR;); - - if (freed) { - - /* We will need to commit and restart the - mini-transaction in order to avoid deadlocks. - The dict_drop_index_tree() call has freed - a page in this mini-transaction, and the rest - of this loop could latch another index page.*/ - const mtr_log_t log_mode = mtr->get_log_mode(); - mtr_commit(mtr); - - mtr_start(mtr); - mtr->set_log_mode(log_mode); - - btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); - } else { - if (!m_table->space) { - return DB_ERROR; - } - } - - return(DB_SUCCESS); -} - -/** -Create the new index and update the root page number in the -SysIndex table. - -@param mtr mini-transaction covering the read -@param pcur persistent cursor used for reading -@return DB_SUCCESS or error code */ -dberr_t -CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const -{ - ulint root_page_no; - - root_page_no = dict_recreate_index_tree(m_table, pcur, mtr); - -#ifdef UNIV_DEBUG - { - ulint len; - const byte* field; - ulint index_type; - - field = rec_get_nth_field_old( - btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE, - &len); - ut_ad(len == 4); - - index_type = mach_read_from_4(field); - - if (index_type & DICT_CLUSTERED) { - /* Clustered index */ - DBUG_EXECUTE_IF( - "ib_trunc_crash_on_create_of_clust_index", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - } else if (index_type & DICT_UNIQUE) { - /* Unique index */ - DBUG_EXECUTE_IF( - "ib_trunc_crash_on_create_of_uniq_index", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - } else if (index_type == 0) { - /* Secondary index */ - DBUG_EXECUTE_IF( - "ib_trunc_crash_on_create_of_sec_index", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - } - } -#endif /* UNIV_DEBUG */ - - DBUG_EXECUTE_IF("ib_err_trunc_create_index", return DB_ERROR;); - - if (root_page_no != FIL_NULL) { - - rec_t* rec = btr_pcur_get_rec(pcur); - - page_rec_write_field( - rec, DICT_FLD__SYS_INDEXES__PAGE_NO, - root_page_no, mtr); - - /* We will need to commit and restart the - mini-transaction in order to avoid deadlocks. - The dict_create_index_tree() call has allocated - a page in this mini-transaction, and the rest of - this loop could latch another index page. */ - mtr_commit(mtr); - - mtr_start(mtr); - - btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); - - } else { - if (!m_table->space) { - return(DB_ERROR); - } - } - - return(DB_SUCCESS); -} - -/** -Update system table to reflect new table id. -@param old_table_id old table id -@param new_table_id new table id -@param reserve_dict_mutex if TRUE, acquire/release - dict_sys->mutex around call to pars_sql. -@param trx transaction -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((warn_unused_result)) -dberr_t -row_truncate_update_table_id( - table_id_t old_table_id, - table_id_t new_table_id, - ibool reserve_dict_mutex, - trx_t* trx) -{ - pars_info_t* info = NULL; - dberr_t err = DB_SUCCESS; - - /* Scan the SYS_XXXX table and update to reflect new table-id. */ - info = pars_info_create(); - pars_info_add_ull_literal(info, "old_id", old_table_id); - pars_info_add_ull_literal(info, "new_id", new_table_id); - - err = que_eval_sql( - info, - "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLES" - " SET ID = :new_id\n" - " WHERE ID = :old_id;\n" - "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = :old_id;\n" - "UPDATE SYS_INDEXES" - " SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = :old_id;\n" - "UPDATE SYS_VIRTUAL" - " SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = :old_id;\n" - "END;\n", reserve_dict_mutex, trx); - - return(err); -} - -/** -Get the table id to truncate. -@param truncate_t old/new table id of table to truncate -@return table_id_t table_id to use in SYS_XXXX table update. */ -static MY_ATTRIBUTE((warn_unused_result)) -table_id_t -row_truncate_get_trunc_table_id( - const truncate_t& truncate) -{ - TableLocator tableLocator(truncate.old_table_id()); - - SysIndexIterator().for_each(tableLocator); - - return(tableLocator.is_table_found() ? - truncate.old_table_id(): truncate.new_table_id()); -} - -/** -Update system table to reflect new table id and root page number. -@param truncate_t old/new table id of table to truncate - and updated root_page_no of indexes. -@param new_table_id new table id -@param reserve_dict_mutex if TRUE, acquire/release - dict_sys->mutex around call to pars_sql. -@param mark_index_corrupted if true, then mark index corrupted. -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((warn_unused_result)) -dberr_t -row_truncate_update_sys_tables_during_fix_up( - const truncate_t& truncate, - table_id_t new_table_id, - ibool reserve_dict_mutex, - bool mark_index_corrupted) -{ - trx_t* trx = trx_create(); - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - table_id_t table_id = row_truncate_get_trunc_table_id(truncate); - - /* Step-1: Update the root-page-no */ - - dberr_t err; - - err = truncate.update_root_page_no( - trx, table_id, reserve_dict_mutex, mark_index_corrupted); - - if (err != DB_SUCCESS) { - return(err); - } - - /* Step-2: Update table-id. */ - - err = row_truncate_update_table_id( - table_id, new_table_id, reserve_dict_mutex, trx); - - if (err == DB_SUCCESS) { - dict_mutex_enter_for_mysql(); - - /* Remove the table with old table_id from cache. */ - dict_table_t* old_table = dict_table_open_on_id( - table_id, true, DICT_TABLE_OP_NORMAL); - - if (old_table != NULL) { - dict_table_close(old_table, true, false); - dict_table_remove_from_cache(old_table); - } - - /* Open table with new table_id and set table as - corrupted if it has FTS index. */ - - dict_table_t* table = dict_table_open_on_id( - new_table_id, true, DICT_TABLE_OP_NORMAL); - ut_ad(table->id == new_table_id); - - bool has_internal_doc_id = - dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET( - table, DICT_TF2_FTS_HAS_DOC_ID); - - if (has_internal_doc_id) { - trx->dict_operation_lock_mode = RW_X_LATCH; - fts_check_corrupt(table, trx); - trx->dict_operation_lock_mode = 0; - } - - dict_table_close(table, true, false); - dict_mutex_exit_for_mysql(); - } - - trx_commit_for_mysql(trx); - trx_free(trx); - - return(err); -} - -/********************************************************//** -Recreates table indexes by applying -TRUNCATE log record during recovery. -@return DB_SUCCESS or error code */ -static -dberr_t -fil_recreate_table( -/*===============*/ - ulint format_flags, /*!< in: page format */ - const char* name, /*!< in: table name */ - truncate_t& truncate) /*!< in: The information of - TRUNCATE log record */ -{ - ut_ad(!truncate_t::s_fix_up_active); - truncate_t::s_fix_up_active = true; - - /* Step-1: Scan for active indexes from REDO logs and drop - all the indexes using low level function that take root_page_no - and space-id. */ - truncate.drop_indexes(fil_system.sys_space); - - /* Step-2: Scan for active indexes and re-create them. */ - dberr_t err = truncate.create_indexes( - name, fil_system.sys_space, format_flags); - if (err != DB_SUCCESS) { - ib::info() << "Recovery failed for TRUNCATE TABLE '" - << name << "' within the system tablespace"; - } - - truncate_t::s_fix_up_active = false; - - return(err); -} - -/********************************************************//** -Recreates the tablespace and table indexes by applying -TRUNCATE log record during recovery. -@return DB_SUCCESS or error code */ -static -dberr_t -fil_recreate_tablespace( -/*====================*/ - ulint space_id, /*!< in: space id */ - ulint format_flags, /*!< in: page format */ - ulint flags, /*!< in: tablespace flags */ - const char* name, /*!< in: table name */ - truncate_t& truncate, /*!< in: The information of - TRUNCATE log record */ - lsn_t recv_lsn) /*!< in: the end LSN of - the log record */ -{ - dberr_t err = DB_SUCCESS; - mtr_t mtr; - - ut_ad(!truncate_t::s_fix_up_active); - truncate_t::s_fix_up_active = true; - - /* Step-1: Invalidate buffer pool pages belonging to the tablespace - to re-create. */ - buf_LRU_flush_or_remove_pages(space_id, NULL); - - /* Remove all insert buffer entries for the tablespace */ - ibuf_delete_for_discarded_space(space_id); - - /* Step-2: truncate tablespace (reset the size back to original or - default size) of tablespace. */ - err = truncate.truncate( - space_id, truncate.get_dir_path(), name, flags, true); - - if (err != DB_SUCCESS) { - - ib::info() << "Cannot access .ibd file for table '" - << name << "' with tablespace " << space_id - << " while truncating"; - return(DB_ERROR); - } - - fil_space_t* space = fil_space_acquire(space_id); - if (!space) { - ib::info() << "Missing .ibd file for table '" << name - << "' with tablespace " << space_id; - return(DB_ERROR); - } - - const page_size_t page_size(space->flags); - - /* Step-3: Initialize Header. */ - if (page_size.is_compressed()) { - byte* buf; - page_t* page; - - buf = static_cast<byte*>( - ut_zalloc_nokey(3U << srv_page_size_shift)); - - /* Align the memory for file i/o */ - page = static_cast<byte*>(ut_align(buf, srv_page_size)); - - flags |= FSP_FLAGS_PAGE_SSIZE(); - - fsp_header_init_fields(page, space_id, flags); - - mach_write_to_4( - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); - - page_zip_des_t page_zip; - page_zip_set_size(&page_zip, page_size.physical()); - page_zip.data = page + srv_page_size; - -#ifdef UNIV_DEBUG - page_zip.m_start = -#endif /* UNIV_DEBUG */ - page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0; - buf_flush_init_for_writing(NULL, page, &page_zip, 0); - - err = fil_io(IORequestWrite, true, page_id_t(space_id, 0), - page_size, 0, page_size.physical(), page_zip.data, - NULL); - - ut_free(buf); - - if (err != DB_SUCCESS) { - ib::info() << "Failed to clean header of the" - " table '" << name << "' with tablespace " - << space_id; - goto func_exit; - } - } - - mtr_start(&mtr); - /* Don't log the operation while fixing up table truncate operation - as crash at this level can still be sustained with recovery restarting - from last checkpoint. */ - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - - /* Initialize the first extent descriptor page and - the second bitmap page for the new tablespace. */ - fsp_header_init(space, FIL_IBD_FILE_INITIAL_SIZE, &mtr); - mtr_commit(&mtr); - - /* Step-4: Re-Create Indexes to newly re-created tablespace. - This operation will restore tablespace back to what it was - when it was created during CREATE TABLE. */ - err = truncate.create_indexes(name, space, format_flags); - if (err != DB_SUCCESS) { - goto func_exit; - } - - /* Step-5: Write new created pages into ibd file handle and - flush it to disk for the tablespace, in case i/o-handler thread - deletes the bitmap page from buffer. */ - mtr_start(&mtr); - - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - - for (ulint page_no = 0; - page_no < UT_LIST_GET_FIRST(space->chain)->size; ++page_no) { - - const page_id_t cur_page_id(space_id, page_no); - - buf_block_t* block = buf_page_get(cur_page_id, page_size, - RW_X_LATCH, &mtr); - - byte* page = buf_block_get_frame(block); - - if (!FSP_FLAGS_GET_ZIP_SSIZE(flags)) { - ut_ad(!page_size.is_compressed()); - - buf_flush_init_for_writing( - block, page, NULL, recv_lsn); - - err = fil_io(IORequestWrite, true, cur_page_id, - page_size, 0, srv_page_size, page, NULL); - } else { - ut_ad(page_size.is_compressed()); - - /* We don't want to rewrite empty pages. */ - - if (fil_page_get_type(page) != 0) { - page_zip_des_t* page_zip = - buf_block_get_page_zip(block); - - buf_flush_init_for_writing( - block, page, page_zip, recv_lsn); - - err = fil_io(IORequestWrite, true, - cur_page_id, - page_size, 0, - page_size.physical(), - page_zip->data, NULL); - } else { -#ifdef UNIV_DEBUG - const byte* data = block->page.zip.data; - - /* Make sure that the page is really empty */ - for (ulint i = 0; - i < page_size.physical(); - ++i) { - - ut_a(data[i] == 0); - } -#endif /* UNIV_DEBUG */ - } - } - - if (err != DB_SUCCESS) { - ib::info() << "Cannot write page " << page_no - << " into a .ibd file for table '" - << name << "' with tablespace " << space_id; - } - } - - mtr_commit(&mtr); - - truncate_t::s_fix_up_active = false; -func_exit: - space->release(); - return(err); -} - -/** -Fix the table truncate by applying information parsed from TRUNCATE log. -Fix-up includes re-creating table (drop and re-create indexes) -@return error code or DB_SUCCESS */ -dberr_t -truncate_t::fixup_tables_in_system_tablespace() -{ - dberr_t err = DB_SUCCESS; - - /* Using the info cached during REDO log scan phase fix the - table truncate. */ - - for (tables_t::iterator it = s_tables.begin(); - it != s_tables.end();) { - - if ((*it)->m_space_id == TRX_SYS_SPACE) { - /* Step-1: Drop and re-create indexes. */ - ib::info() << "Completing truncate for table with " - "id (" << (*it)->m_old_table_id << ") " - "residing in the system tablespace."; - - err = fil_recreate_table( - (*it)->m_format_flags, - (*it)->m_tablename, - **it); - - /* Step-2: Update the SYS_XXXX tables to reflect - this new table_id and root_page_no. */ - table_id_t new_id; - - dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true); - - err = row_truncate_update_sys_tables_during_fix_up( - **it, new_id, TRUE, - (err == DB_SUCCESS) ? false : true); - - if (err != DB_SUCCESS) { - break; - } - - os_file_delete( - innodb_log_file_key, (*it)->m_log_file_name); - UT_DELETE(*it); - it = s_tables.erase(it); - } else { - ++it; - } - } - - /* Also clear the map used to track tablespace truncated. */ - s_truncated_tables.clear(); - - return(err); -} - -/** -Fix the table truncate by applying information parsed from TRUNCATE log. -Fix-up includes re-creating tablespace. -@return error code or DB_SUCCESS */ -dberr_t -truncate_t::fixup_tables_in_non_system_tablespace() -{ - dberr_t err = DB_SUCCESS; - - /* Using the info cached during REDO log scan phase fix the - table truncate. */ - tables_t::iterator end = s_tables.end(); - - for (tables_t::iterator it = s_tables.begin(); it != end; ++it) { - - /* All tables in the system tablespace have already been - done and erased from this list. */ - ut_a((*it)->m_space_id != TRX_SYS_SPACE); - - /* Drop tablespace, drop indexes and re-create indexes. */ - - ib::info() << "Completing truncate for table with " - "id (" << (*it)->m_old_table_id << ") " - "residing in file-per-table tablespace with " - "id (" << (*it)->m_space_id << ")"; - - fil_space_t* space = fil_space_get((*it)->m_space_id); - - if (!space) { - /* Create the database directory for name, - if it does not exist yet */ - fil_create_directory_for_tablename( - (*it)->m_tablename); - - space = fil_ibd_create((*it)->m_space_id, - (*it)->m_tablename, - (*it)->m_dir_path, - (*it)->m_tablespace_flags, - FIL_IBD_FILE_INITIAL_SIZE, - (*it)->m_encryption, - (*it)->m_key_id, &err); - if (!space) { - /* If checkpoint is not yet done - and table is dropped and then we might - still have REDO entries for this table - which are INVALID. Ignore them. */ - ib::warn() << "Failed to create" - " tablespace for " - << (*it)->m_space_id - << " space-id"; - err = DB_ERROR; - break; - } - } - - err = fil_recreate_tablespace( - (*it)->m_space_id, - (*it)->m_format_flags, - (*it)->m_tablespace_flags, - (*it)->m_tablename, - **it, log_get_lsn()); - - /* Step-2: Update the SYS_XXXX tables to reflect new - table-id and root_page_no. */ - table_id_t new_id; - - dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true); - - err = row_truncate_update_sys_tables_during_fix_up( - **it, new_id, TRUE, (err == DB_SUCCESS) ? false : true); - - if (err != DB_SUCCESS) { - break; - } - } - - if (err == DB_SUCCESS && s_tables.size() > 0) { - - log_make_checkpoint_at(LSN_MAX, TRUE); - } - - for (ulint i = 0; i < s_tables.size(); ++i) { - os_file_delete( - innodb_log_file_key, s_tables[i]->m_log_file_name); - UT_DELETE(s_tables[i]); - } - - s_tables.clear(); - - return(err); -} - -/** -Constructor - -@param old_table_id old table id assigned to table before truncate -@param new_table_id new table id that will be assigned to table - after truncate -@param dir_path directory path */ - -truncate_t::truncate_t( - table_id_t old_table_id, - table_id_t new_table_id, - const char* dir_path) - : - m_space_id(), - m_old_table_id(old_table_id), - m_new_table_id(new_table_id), - m_dir_path(), - m_tablename(), - m_tablespace_flags(), - m_format_flags(), - m_indexes(), - m_log_lsn(), - m_log_file_name(), - /* JAN: TODO: Encryption */ - m_encryption(FIL_ENCRYPTION_DEFAULT), - m_key_id(FIL_DEFAULT_ENCRYPTION_KEY) -{ - if (dir_path != NULL) { - m_dir_path = mem_strdup(dir_path); - } -} - -/** -Consturctor - -@param log_file_name parse the log file during recovery to populate - information related to table to truncate */ -truncate_t::truncate_t( - const char* log_file_name) - : - m_space_id(), - m_old_table_id(), - m_new_table_id(), - m_dir_path(), - m_tablename(), - m_tablespace_flags(), - m_format_flags(), - m_indexes(), - m_log_lsn(), - m_log_file_name(), - /* JAN: TODO: Encryption */ - m_encryption(FIL_ENCRYPTION_DEFAULT), - m_key_id(FIL_DEFAULT_ENCRYPTION_KEY) - -{ - m_log_file_name = mem_strdup(log_file_name); - if (m_log_file_name == NULL) { - ib::fatal() << "Failed creating truncate_t; out of memory"; - } -} - -/** Constructor */ - -truncate_t::index_t::index_t() - : - m_id(), - m_type(), - m_root_page_no(FIL_NULL), - m_new_root_page_no(FIL_NULL), - m_n_fields(), - m_trx_id_pos(ULINT_UNDEFINED), - m_fields() -{ - /* Do nothing */ -} - -/** Destructor */ - -truncate_t::~truncate_t() -{ - if (m_dir_path != NULL) { - ut_free(m_dir_path); - m_dir_path = NULL; - } - - if (m_tablename != NULL) { - ut_free(m_tablename); - m_tablename = NULL; - } - - if (m_log_file_name != NULL) { - ut_free(m_log_file_name); - m_log_file_name = NULL; - } - - m_indexes.clear(); -} - -/** -@return number of indexes parsed from the log record */ - -size_t -truncate_t::indexes() const -{ - return(m_indexes.size()); -} - -/** -Update root page number in SYS_XXXX tables. - -@param trx transaction object -@param table_id table id for which information needs to - be updated. -@param reserve_dict_mutex if TRUE, acquire/release - dict_sys->mutex around call to pars_sql. -@param mark_index_corrupted if true, then mark index corrupted. -@return DB_SUCCESS or error code */ - -dberr_t -truncate_t::update_root_page_no( - trx_t* trx, - table_id_t table_id, - ibool reserve_dict_mutex, - bool mark_index_corrupted) const -{ - indexes_t::const_iterator end = m_indexes.end(); - - dberr_t err = DB_SUCCESS; - - for (indexes_t::const_iterator it = m_indexes.begin(); - it != end; - ++it) { - - pars_info_t* info = pars_info_create(); - - pars_info_add_int4_literal( - info, "page_no", it->m_new_root_page_no); - - pars_info_add_ull_literal(info, "table_id", table_id); - - pars_info_add_ull_literal( - info, "index_id", - (mark_index_corrupted ? IB_ID_MAX : it->m_id)); - - err = que_eval_sql( - info, - "PROCEDURE RENUMBER_IDX_PAGE_NO_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_INDEXES" - " SET PAGE_NO = :page_no\n" - " WHERE TABLE_ID = :table_id" - " AND ID = :index_id;\n" - "END;\n", reserve_dict_mutex, trx); - - if (err != DB_SUCCESS) { - break; - } - } - - return(err); -} - -/** -Check whether a tablespace was truncated during recovery -@param space_id tablespace id to check -@return true if the tablespace was truncated */ - -bool -truncate_t::is_tablespace_truncated(ulint space_id) -{ - tables_t::iterator end = s_tables.end(); - - for (tables_t::iterator it = s_tables.begin(); it != end; ++it) { - - if ((*it)->m_space_id == space_id) { - - return(true); - } - } - - return(false); -} - -/** Was tablespace truncated (on crash before checkpoint). -If the MLOG_TRUNCATE redo-record is still available then tablespace -was truncated and checkpoint is yet to happen. -@param[in] space_id tablespace id to check. -@return true if tablespace is was truncated. */ -bool -truncate_t::was_tablespace_truncated(ulint space_id) -{ - return(s_truncated_tables.find(space_id) != s_truncated_tables.end()); -} - -/** Get the lsn associated with space. -@param[in] space_id tablespace id to check. -@return associated lsn. */ -lsn_t -truncate_t::get_truncated_tablespace_init_lsn(ulint space_id) -{ - ut_ad(was_tablespace_truncated(space_id)); - - return(s_truncated_tables.find(space_id)->second); -} - -/** -Parses log record during recovery -@param start_ptr buffer containing log body to parse -@param end_ptr buffer end - -@return DB_SUCCESS or error code */ - -dberr_t -truncate_t::parse( - byte* start_ptr, - const byte* end_ptr) -{ - /* Parse lsn, space-id, format-flags and tablespace-flags. */ - if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) { - return(DB_FAIL); - } - - m_log_lsn = mach_read_from_8(start_ptr); - start_ptr += 8; - - m_space_id = mach_read_from_4(start_ptr); - start_ptr += 4; - - m_format_flags = mach_read_from_4(start_ptr); - start_ptr += 4; - - m_tablespace_flags = mach_read_from_4(start_ptr); - start_ptr += 4; - - /* Parse table-name. */ - if (end_ptr < start_ptr + (2)) { - return(DB_FAIL); - } - - ulint n_tablename_len = mach_read_from_2(start_ptr); - start_ptr += 2; - - if (n_tablename_len > 0) { - if (end_ptr < start_ptr + n_tablename_len) { - return(DB_FAIL); - } - m_tablename = mem_strdup(reinterpret_cast<char*>(start_ptr)); - ut_ad(m_tablename[n_tablename_len - 1] == 0); - start_ptr += n_tablename_len; - } - - - /* Parse and read old/new table-id, number of indexes */ - if (end_ptr < start_ptr + (8 + 8 + 2 + 2)) { - return(DB_FAIL); - } - - ut_ad(m_indexes.empty()); - - m_old_table_id = mach_read_from_8(start_ptr); - start_ptr += 8; - - m_new_table_id = mach_read_from_8(start_ptr); - start_ptr += 8; - - ulint n_indexes = mach_read_from_2(start_ptr); - start_ptr += 2; - - /* Parse the remote directory from TRUNCATE log record */ - { - ulint n_tabledirpath_len = mach_read_from_2(start_ptr); - start_ptr += 2; - - if (end_ptr < start_ptr + n_tabledirpath_len) { - return(DB_FAIL); - } - - if (n_tabledirpath_len > 0) { - - m_dir_path = mem_strdup(reinterpret_cast<char*>(start_ptr)); - ut_ad(m_dir_path[n_tabledirpath_len - 1] == 0); - start_ptr += n_tabledirpath_len; - } - } - - /* Parse index ids and types from TRUNCATE log record */ - for (ulint i = 0; i < n_indexes; ++i) { - index_t index; - - if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) { - return(DB_FAIL); - } - - index.m_id = mach_read_from_8(start_ptr); - start_ptr += 8; - - index.m_type = mach_read_from_4(start_ptr); - start_ptr += 4; - - index.m_root_page_no = mach_read_from_4(start_ptr); - start_ptr += 4; - - index.m_trx_id_pos = mach_read_from_4(start_ptr); - start_ptr += 4; - - if (!(index.m_type & DICT_FTS)) { - m_indexes.push_back(index); - } - } - - ut_ad(!m_indexes.empty()); - - if (FSP_FLAGS_GET_ZIP_SSIZE(m_tablespace_flags)) { - - /* Parse the number of index fields from TRUNCATE log record */ - for (ulint i = 0; i < m_indexes.size(); ++i) { - - if (end_ptr < start_ptr + (2 + 2)) { - return(DB_FAIL); - } - - m_indexes[i].m_n_fields = mach_read_from_2(start_ptr); - start_ptr += 2; - - ulint len = mach_read_from_2(start_ptr); - start_ptr += 2; - - if (end_ptr < start_ptr + len) { - return(DB_FAIL); - } - - index_t& index = m_indexes[i]; - - /* Should be NUL terminated. */ - ut_ad((start_ptr)[len - 1] == 0); - - index_t::fields_t::iterator end; - - end = index.m_fields.end(); - - index.m_fields.insert( - end, start_ptr, &(start_ptr)[len]); - - start_ptr += len; - } - } - - return(DB_SUCCESS); -} - -/** Parse log record from REDO log file during recovery. -@param[in,out] start_ptr buffer containing log body to parse -@param[in] end_ptr buffer end -@param[in] space_id tablespace identifier -@return parsed upto or NULL. */ -byte* -truncate_t::parse_redo_entry( - byte* start_ptr, - const byte* end_ptr, - ulint space_id) -{ - lsn_t lsn; - - /* Parse space-id, lsn */ - if (end_ptr < (start_ptr + 8)) { - return(NULL); - } - - lsn = mach_read_from_8(start_ptr); - start_ptr += 8; - - /* Tablespace can't exist in both state. - (scheduled-for-truncate, was-truncated). */ - if (!is_tablespace_truncated(space_id)) { - - truncated_tables_t::iterator it = - s_truncated_tables.find(space_id); - - if (it == s_truncated_tables.end()) { - s_truncated_tables.insert( - std::pair<ulint, lsn_t>(space_id, lsn)); - } else { - it->second = lsn; - } - } - - return(start_ptr); -} - -/** -Set the truncate log values for a compressed table. -@param index index from which recreate infoormation needs to be extracted -@return DB_SUCCESS or error code */ - -dberr_t -truncate_t::index_t::set( - const dict_index_t* index) -{ - /* Get trx-id column position (set only for clustered index) */ - if (dict_index_is_clust(index)) { - m_trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - ut_ad(m_trx_id_pos > 0); - ut_ad(m_trx_id_pos != ULINT_UNDEFINED); - } else { - m_trx_id_pos = 0; - } - - /* Original logic set this field differently if page is not leaf. - For truncate case this being first page to get created it is - always a leaf page and so we don't need that condition here. */ - m_n_fields = dict_index_get_n_fields(index); - - /* See requirements of page_zip_fields_encode for size. */ - ulint encoded_buf_size = (m_n_fields + 1) * 2; - byte* encoded_buf = UT_NEW_ARRAY_NOKEY(byte, encoded_buf_size); - - if (encoded_buf == NULL) { - return(DB_OUT_OF_MEMORY); - } - - ulint len = page_zip_fields_encode( - m_n_fields, index, m_trx_id_pos, encoded_buf); - ut_a(len <= encoded_buf_size); - - /* Append the encoded fields data. */ - m_fields.insert(m_fields.end(), &encoded_buf[0], &encoded_buf[len]); - - /* NUL terminate the encoded data */ - m_fields.push_back(0); - - UT_DELETE_ARRAY(encoded_buf); - - return(DB_SUCCESS); -} - -/** Create an index for a table. -@param[in] table_name table name, for which to create -the index -@param[in] space tablespace -@param[in] page_size page size of the .ibd file -@param[in] index_type type of index to truncate -@param[in] index_id id of index to truncate -@param[in] btr_redo_create_info control info for ::btr_create() -@param[in,out] mtr mini-transaction covering the -create index -@return root page no or FIL_NULL on failure */ -inline ulint -truncate_t::create_index( - const char* table_name, - fil_space_t* space, - ulint index_type, - index_id_t index_id, - const btr_create_t& btr_redo_create_info, - mtr_t* mtr) const -{ - ulint root_page_no = btr_create( - index_type, space, index_id, - NULL, &btr_redo_create_info, mtr); - - if (root_page_no == FIL_NULL) { - - ib::info() << "innodb_force_recovery was set to " - << srv_force_recovery << ". Continuing crash recovery" - " even though we failed to create index " << index_id - << " for compressed table '" << table_name << "' with" - " file " << space->chain.start->name; - } - - return(root_page_no); -} - -/** Check if index has been modified since TRUNCATE log snapshot -was recorded. -@param[in] space tablespace -@param[in] root_page_no index root page number -@return true if modified else false */ -inline -bool -truncate_t::is_index_modified_since_logged( - const fil_space_t* space, - ulint root_page_no) const -{ - dberr_t err; - mtr_t mtr; - - mtr_start(&mtr); - - /* Root page could be in free state if truncate crashed after drop_index - and page was not allocated for any other object. */ - buf_block_t* block= buf_page_get_gen( - page_id_t(space->id, root_page_no), page_size_t(space->flags), - RW_X_LATCH, NULL, - BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr, &err); - if (!block) return true; - - page_t* root = buf_block_get_frame(block); - -#ifdef UNIV_DEBUG - /* If the root page has been freed as part of truncate drop_index action - and not yet allocated for any object still the pagelsn > snapshot lsn */ - if (block->page.file_page_was_freed) { - ut_ad(mach_read_from_8(root + FIL_PAGE_LSN) > m_log_lsn); - } -#endif /* UNIV_DEBUG */ - - lsn_t page_lsn = mach_read_from_8(root + FIL_PAGE_LSN); - - mtr_commit(&mtr); - - if (page_lsn > m_log_lsn) { - return(true); - } - - return(false); -} - -/** Drop indexes for a table. -@param[in,out] space tablespace */ -void truncate_t::drop_indexes(fil_space_t* space) const -{ - mtr_t mtr; - - indexes_t::const_iterator end = m_indexes.end(); - const page_size_t page_size(space->flags); - - for (indexes_t::const_iterator it = m_indexes.begin(); - it != end; - ++it) { - - ulint root_page_no = it->m_root_page_no; - - if (is_index_modified_since_logged(space, root_page_no)) { - /* Page has been modified since TRUNCATE log snapshot - was recorded so not safe to drop the index. */ - continue; - } - - mtr_start(&mtr); - - if (space->id != TRX_SYS_SPACE) { - /* Do not log changes for single-table - tablespaces, we are in recovery mode. */ - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - } - - if (root_page_no != FIL_NULL) { - const page_id_t root_page_id(space->id, root_page_no); - - btr_free_if_exists( - root_page_id, page_size, it->m_id, &mtr); - } - - /* If tree is already freed then we might return immediately - in which case we need to release the lock we have acquired - on root_page. */ - mtr_commit(&mtr); - } -} - - -/** Create the indexes for a table -@param[in] table_name table name, for which to create the indexes -@param[in,out] space tablespace -@param[in] format_flags page format flags -@return DB_SUCCESS or error code. */ -inline dberr_t -truncate_t::create_indexes( - const char* table_name, - fil_space_t* space, - ulint format_flags) -{ - mtr_t mtr; - - mtr_start(&mtr); - - if (space->id != TRX_SYS_SPACE) { - /* Do not log changes for single-table tablespaces, we - are in recovery mode. */ - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - } - - /* Create all new index trees with table format, index ids, index - types, number of index fields and index field information taken - out from the TRUNCATE log record. */ - - ulint root_page_no = FIL_NULL; - indexes_t::iterator end = m_indexes.end(); - for (indexes_t::iterator it = m_indexes.begin(); - it != end; - ++it) { - - btr_create_t btr_redo_create_info( - FSP_FLAGS_GET_ZIP_SSIZE(space->flags) - ? &it->m_fields[0] : NULL); - - btr_redo_create_info.format_flags = format_flags; - - if (FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) { - - btr_redo_create_info.n_fields = it->m_n_fields; - /* Skip the NUL appended field */ - btr_redo_create_info.field_len = - it->m_fields.size() - 1; - btr_redo_create_info.trx_id_pos = it->m_trx_id_pos; - } - - root_page_no = create_index( - table_name, space, it->m_type, it->m_id, - btr_redo_create_info, &mtr); - - if (root_page_no == FIL_NULL) { - break; - } - - it->m_new_root_page_no = root_page_no; - } - - mtr_commit(&mtr); - - return(root_page_no == FIL_NULL ? DB_ERROR : DB_SUCCESS); -} - -/** -Write a TRUNCATE log record for fixing up table if truncate crashes. -@param start_ptr buffer to write log record -@param end_ptr buffer end -@param space_id space id -@param tablename the table name in the usual databasename/tablename - format of InnoDB -@param flags tablespace flags -@param format_flags page format -@param lsn lsn while logging -@return DB_SUCCESS or error code */ - -dberr_t -truncate_t::write( - byte* start_ptr, - byte* end_ptr, - ulint space_id, - const char* tablename, - ulint flags, - ulint format_flags, - lsn_t lsn) const -{ - if (end_ptr < start_ptr) { - return(DB_FAIL); - } - - /* LSN, Type, Space-ID, format-flag (also know as log_flag. - Stored in page_no field), tablespace flags */ - if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) { - return(DB_FAIL); - } - - mach_write_to_8(start_ptr, lsn); - start_ptr += 8; - - mach_write_to_4(start_ptr, space_id); - start_ptr += 4; - - mach_write_to_4(start_ptr, format_flags); - start_ptr += 4; - - mach_write_to_4(start_ptr, flags); - start_ptr += 4; - - /* Name of the table. */ - /* Include the NUL in the log record. */ - ulint len = strlen(tablename) + 1; - if (end_ptr < (start_ptr + (len + 2))) { - return(DB_FAIL); - } - - mach_write_to_2(start_ptr, len); - start_ptr += 2; - - memcpy(start_ptr, tablename, len - 1); - start_ptr += len; - - DBUG_EXECUTE_IF("ib_trunc_crash_while_writing_redo_log", - DBUG_SUICIDE();); - - /* Old/New Table-ID, Number of Indexes and Tablespace dir-path-name. */ - /* Write the remote directory of the table into mtr log */ - len = m_dir_path != NULL ? strlen(m_dir_path) + 1 : 0; - if (end_ptr < (start_ptr + (len + 8 + 8 + 2 + 2))) { - return(DB_FAIL); - } - - /* Write out old-table-id. */ - mach_write_to_8(start_ptr, m_old_table_id); - start_ptr += 8; - - /* Write out new-table-id. */ - mach_write_to_8(start_ptr, m_new_table_id); - start_ptr += 8; - - /* Write out the number of indexes. */ - mach_write_to_2(start_ptr, m_indexes.size()); - start_ptr += 2; - - /* Write the length (NUL included) of the .ibd path. */ - mach_write_to_2(start_ptr, len); - start_ptr += 2; - - if (m_dir_path != NULL) { - memcpy(start_ptr, m_dir_path, len - 1); - start_ptr += len; - } - - /* Indexes information (id, type) */ - /* Write index ids, type, root-page-no into mtr log */ - for (ulint i = 0; i < m_indexes.size(); ++i) { - - if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) { - return(DB_FAIL); - } - - mach_write_to_8(start_ptr, m_indexes[i].m_id); - start_ptr += 8; - - mach_write_to_4(start_ptr, m_indexes[i].m_type); - start_ptr += 4; - - mach_write_to_4(start_ptr, m_indexes[i].m_root_page_no); - start_ptr += 4; - - mach_write_to_4(start_ptr, m_indexes[i].m_trx_id_pos); - start_ptr += 4; - } - - /* If tablespace compressed then field info of each index. */ - if (FSP_FLAGS_GET_ZIP_SSIZE(flags)) { - - for (ulint i = 0; i < m_indexes.size(); ++i) { - - ulint len = m_indexes[i].m_fields.size(); - if (end_ptr < (start_ptr + (len + 2 + 2))) { - return(DB_FAIL); - } - - mach_write_to_2( - start_ptr, m_indexes[i].m_n_fields); - start_ptr += 2; - - mach_write_to_2(start_ptr, len); - start_ptr += 2; - - const byte* ptr = &m_indexes[i].m_fields[0]; - memcpy(start_ptr, ptr, len - 1); - start_ptr += len; - } - } - - return(DB_SUCCESS); -} diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index b2679c87dae..9a42333133d 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -80,8 +80,19 @@ row_undo_ins_remove_clust_rec( if (index->table->is_temporary()) { ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); mtr.set_log_mode(MTR_LOG_NO_REDO); + ut_ad(!dict_index_is_online_ddl(index)); + ut_ad(index->table->id >= DICT_HDR_FIRST_ID); + online = false; } else { index->set_modified(mtr); + online = dict_index_is_online_ddl(index); + if (online) { + ut_ad(node->trx->dict_operation_lock_mode + != RW_X_LATCH); + ut_ad(node->table->id != DICT_INDEXES_ID); + ut_ad(node->table->id != DICT_COLUMNS_ID); + mtr_s_lock(dict_index_get_lock(index), &mtr); + } } /* This is similar to row_undo_mod_clust(). The DDL thread may @@ -90,14 +101,6 @@ row_undo_ins_remove_clust_rec( purged. However, we can log the removal out of sync with the B-tree modification. */ - online = dict_index_is_online_ddl(index); - if (online) { - ut_ad(node->trx->dict_operation_lock_mode - != RW_X_LATCH); - ut_ad(node->table->id != DICT_INDEXES_ID); - mtr_s_lock(dict_index_get_lock(index), &mtr); - } - success = btr_pcur_restore_position( online ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED @@ -119,72 +122,47 @@ row_undo_ins_remove_clust_rec( rec, index, NULL, true, ULINT_UNDEFINED, &heap); row_log_table_delete(rec, index, offsets, NULL); mem_heap_free(heap); - } - - switch (node->table->id) { - case DICT_INDEXES_ID: - ut_ad(!online); - ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - - dict_drop_index_tree( - btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr); + } else { + switch (node->table->id) { + case DICT_INDEXES_ID: + ut_ad(!online); + ut_ad(node->trx->dict_operation_lock_mode + == RW_X_LATCH); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - mtr.commit(); + dict_drop_index_tree(btr_pcur_get_rec(&node->pcur), + &node->pcur, &mtr); + mtr.commit(); - mtr.start(); - - success = btr_pcur_restore_position( - BTR_MODIFY_LEAF, &node->pcur, &mtr); - ut_a(success); - break; - case DICT_COLUMNS_ID: - /* This is rolling back an INSERT into SYS_COLUMNS. - If it was part of an instant ADD COLUMN operation, we - must modify the table definition. At this point, any - corresponding operation to the metadata record will have - been rolled back. */ - ut_ad(!online); - ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - const rec_t* rec = btr_pcur_get_rec(&node->pcur); - if (rec_get_n_fields_old(rec) - != DICT_NUM_FIELDS__SYS_COLUMNS) { + mtr.start(); + success = btr_pcur_restore_position( + BTR_MODIFY_LEAF, &node->pcur, &mtr); + ut_a(success); break; + case DICT_COLUMNS_ID: + /* This is rolling back an INSERT into SYS_COLUMNS. + If it was part of an instant ALTER TABLE operation, we + must evict the table definition, so that it can be + reloaded after the dictionary operation has been + completed. At this point, any corresponding operation + to the metadata record will have been rolled back. */ + ut_ad(!online); + ut_ad(node->trx->dict_operation_lock_mode + == RW_X_LATCH); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); + const rec_t* rec = btr_pcur_get_rec(&node->pcur); + if (rec_get_n_fields_old(rec) + != DICT_NUM_FIELDS__SYS_COLUMNS) { + break; + } + ulint len; + const byte* data = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len); + if (len != 8) { + break; + } + node->trx->evict_table(mach_read_from_8(data)); } - ulint len; - const byte* data = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len); - if (len != 8) { - break; - } - const table_id_t table_id = mach_read_from_8(data); - data = rec_get_nth_field_old(rec, DICT_FLD__SYS_COLUMNS__POS, - &len); - if (len != 4) { - break; - } - const unsigned pos = mach_read_from_4(data); - if (pos == 0 || pos >= (1U << 16)) { - break; - } - dict_table_t* table = dict_table_open_on_id( - table_id, true, DICT_TABLE_OP_OPEN_ONLY_IF_CACHED); - if (!table) { - break; - } - - dict_index_t* index = dict_table_get_first_index(table); - - if (index && index->is_instant() - && DATA_N_SYS_COLS + 1 + pos == table->n_cols) { - /* This is the rollback of an instant ADD COLUMN. - Remove the column from the dictionary cache, - but keep the system columns. */ - table->rollback_instant(pos); - } - - dict_table_close(table, true, false); } if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) { @@ -388,14 +366,10 @@ retry: return(err); } -/***********************************************************//** -Parses the row reference and other info in a fresh insert undo record. */ -static -void -row_undo_ins_parse_undo_rec( -/*========================*/ - undo_node_t* node, /*!< in/out: row undo node */ - ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */ +/** Parse an insert undo record. +@param[in,out] node row rollback state +@param[in] dict_locked whether the data dictionary cache is locked */ +static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) { dict_index_t* clust_index; byte* ptr; @@ -404,18 +378,28 @@ row_undo_ins_parse_undo_rec( ulint dummy; bool dummy_extern; - ut_ad(node); + ut_ad(node->state == UNDO_INSERT_PERSISTENT + || node->state == UNDO_INSERT_TEMPORARY); + ut_ad(node->trx->in_rollback); + ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr)); ptr = trx_undo_rec_get_pars(node->undo_rec, &node->rec_type, &dummy, &dummy_extern, &undo_no, &table_id); node->update = NULL; - node->table = dict_table_open_on_id( - table_id, dict_locked, DICT_TABLE_OP_NORMAL); + if (node->state == UNDO_INSERT_PERSISTENT) { + node->table = dict_table_open_on_id(table_id, dict_locked, + DICT_TABLE_OP_NORMAL); + } else if (!dict_locked) { + mutex_enter(&dict_sys->mutex); + node->table = dict_sys->get_temporary_table(table_id); + mutex_exit(&dict_sys->mutex); + } else { + node->table = dict_sys->get_temporary_table(table_id); + } - /* Skip the UNDO if we can't find the table or the .ibd file. */ - if (UNIV_UNLIKELY(node->table == NULL)) { - return; + if (!node->table) { + return false; } switch (node->rec_type) { @@ -454,6 +438,7 @@ close_table: connection, instead of doing this rollback. */ dict_table_close(node->table, dict_locked, FALSE); node->table = NULL; + return false; } else { ut_ad(!node->table->skip_alter_undo); clust_index = dict_table_get_first_index(node->table); @@ -485,6 +470,8 @@ close_table: goto close_table; } } + + return true; } /***************************************************************//** @@ -561,18 +548,10 @@ row_undo_ins( que_thr_t* thr) /*!< in: query thread */ { dberr_t err; - ibool dict_locked; + bool dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH; - ut_ad(node->state == UNDO_NODE_INSERT); - ut_ad(node->trx->in_rollback); - ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr)); - - dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH; - - row_undo_ins_parse_undo_rec(node, dict_locked); - - if (node->table == NULL) { - return(DB_SUCCESS); + if (!row_undo_ins_parse_undo_rec(node, dict_locked)) { + return DB_SUCCESS; } /* Iterate over all the indexes and undo the insert.*/ @@ -595,26 +574,19 @@ row_undo_ins( break; } - /* fall through */ - case TRX_UNDO_INSERT_METADATA: log_free_check(); if (node->table->id == DICT_INDEXES_ID) { - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - + ut_ad(!node->table->is_temporary()); if (!dict_locked) { mutex_enter(&dict_sys->mutex); } - } - - // FIXME: We need to update the dict_index_t::space and - // page number fields too. - err = row_undo_ins_remove_clust_rec(node); - - if (node->table->id == DICT_INDEXES_ID - && !dict_locked) { - - mutex_exit(&dict_sys->mutex); + err = row_undo_ins_remove_clust_rec(node); + if (!dict_locked) { + mutex_exit(&dict_sys->mutex); + } + } else { + err = row_undo_ins_remove_clust_rec(node); } if (err == DB_SUCCESS && node->table->stat_initialized) { @@ -634,6 +606,12 @@ row_undo_ins( node->table, node->trx->mysql_thd); } } + break; + + case TRX_UNDO_INSERT_METADATA: + log_free_check(); + ut_ad(!node->table->is_temporary()); + err = row_undo_ins_remove_clust_rec(node); } dict_table_close(node->table, dict_locked, FALSE); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 41079450159..fbbe6d4eef9 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -111,6 +111,9 @@ row_undo_mod_clust_low( ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur)) == thr_get_trx(thr)->id); + ut_ad(node->ref != &trx_undo_metadata + || node->update->info_bits == REC_INFO_METADATA_ADD + || node->update->info_bits == REC_INFO_METADATA_ALTER); if (mode != BTR_MODIFY_LEAF && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) { @@ -131,6 +134,7 @@ row_undo_mod_clust_low( btr_cur, offsets, offsets_heap, node->update, node->cmpl_info, thr, thr_get_trx(thr)->id, mtr); + ut_ad(err != DB_SUCCESS || node->ref != &trx_undo_metadata); } else { big_rec_t* dummy_big_rec; @@ -143,6 +147,52 @@ row_undo_mod_clust_low( node->cmpl_info, thr, thr_get_trx(thr)->id, mtr); ut_a(!dummy_big_rec); + + static const byte + INFIMUM[8] = {'i','n','f','i','m','u','m',0}, + SUPREMUM[8] = {'s','u','p','r','e','m','u','m'}; + + if (err == DB_SUCCESS + && node->ref == &trx_undo_metadata + && btr_cur_get_index(btr_cur)->table->instant + && node->update->info_bits == REC_INFO_METADATA_ADD) { + if (page_t* root = btr_root_get( + btr_cur_get_index(btr_cur), mtr)) { + byte* infimum; + byte *supremum; + if (page_is_comp(root)) { + infimum = PAGE_NEW_INFIMUM + root; + supremum = PAGE_NEW_SUPREMUM + root; + } else { + infimum = PAGE_OLD_INFIMUM + root; + supremum = PAGE_OLD_SUPREMUM + root; + } + + ut_ad(!memcmp(infimum, INFIMUM, 8) + == !memcmp(supremum, SUPREMUM, 8)); + + if (memcmp(infimum, INFIMUM, 8)) { + mlog_write_string(infimum, INFIMUM, + 8, mtr); + mlog_write_string(supremum, SUPREMUM, + 8, mtr); + } + } + } + } + + if (err == DB_SUCCESS + && btr_cur_get_index(btr_cur)->table->id == DICT_COLUMNS_ID) { + /* This is rolling back an UPDATE or DELETE on SYS_COLUMNS. + If it was part of an instant ALTER TABLE operation, we + must evict the table definition, so that it can be + reloaded after the dictionary operation has been + completed. At this point, any corresponding operation + to the metadata record will have been rolled back. */ + const dfield_t& table_id = *dtuple_get_nth_field(node->row, 0); + ut_ad(dfield_get_len(&table_id) == 8); + node->trx->evict_table(mach_read_from_8(static_cast<byte*>( + table_id.data))); } return(err); @@ -399,22 +449,36 @@ row_undo_mod_clust( goto mtr_commit_exit; } + ulint trx_id_offset = index->trx_id_offset; ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; - ut_ad(index->n_uniq <= MAX_REF_PARTS); - /* Reserve enough offsets for the PRIMARY KEY and 2 columns - so that we can access DB_TRX_ID, DB_ROLL_PTR. */ - ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2]; - rec_offs_init(offsets_); - offsets = rec_get_offsets( - rec, index, offsets_, true, trx_id_pos + 2, &heap); - ulint len; - ulint trx_id_offset = rec_get_nth_field_offs( - offsets, trx_id_pos, &len); - ut_ad(len == DATA_TRX_ID_LEN); + if (trx_id_offset) { + } else if (rec_is_metadata(rec, *index)) { + ut_ad(!buf_block_get_page_zip(btr_pcur_get_block( + &node->pcur))); + for (unsigned i = index->first_user_field(); i--; ) { + trx_id_offset += index->fields[i].fixed_len; + } + } else { + ut_ad(index->n_uniq <= MAX_REF_PARTS); + /* Reserve enough offsets for the PRIMARY KEY and + 2 columns so that we can access + DB_TRX_ID, DB_ROLL_PTR. */ + ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + + 2]; + rec_offs_init(offsets_); + offsets = rec_get_offsets( + rec, index, offsets_, true, trx_id_pos + 2, + &heap); + ulint len; + trx_id_offset = rec_get_nth_field_offs( + offsets, trx_id_pos, &len); + ut_ad(len == DATA_TRX_ID_LEN); + } if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) { ut_ad(!rec_get_deleted_flag( - rec, dict_table_is_comp(node->table))); + rec, dict_table_is_comp(node->table)) + || rec_is_alter_metadata(rec, *index)); index->set_modified(mtr); if (page_zip_des_t* page_zip = buf_block_get_page_zip( btr_pcur_get_block(&node->pcur))) { @@ -436,8 +500,6 @@ mtr_commit_exit: btr_pcur_commit_specify_mtr(pcur, &mtr); func_exit: - node->state = UNDO_NODE_FETCH_NEXT; - if (offsets_heap) { mem_heap_free(offsets_heap); } @@ -1139,14 +1201,10 @@ row_undo_mod_upd_exist_sec( return(err); } -/***********************************************************//** -Parses the row reference and other info in a modify undo log record. */ -static MY_ATTRIBUTE((nonnull)) -void -row_undo_mod_parse_undo_rec( -/*========================*/ - undo_node_t* node, /*!< in: row undo node */ - ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */ +/** Parse an update undo record. +@param[in,out] node row rollback state +@param[in] dict_locked whether the data dictionary cache is locked */ +static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked) { dict_index_t* clust_index; byte* ptr; @@ -1159,19 +1217,28 @@ row_undo_mod_parse_undo_rec( ulint cmpl_info; bool dummy_extern; + ut_ad(node->state == UNDO_UPDATE_PERSISTENT + || node->state == UNDO_UPDATE_TEMPORARY); + ut_ad(node->trx->in_rollback); + ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); + ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, &dummy_extern, &undo_no, &table_id); node->rec_type = type; - node->table = dict_table_open_on_id( - table_id, dict_locked, DICT_TABLE_OP_NORMAL); - - /* TODO: other fixes associated with DROP TABLE + rollback in the - same table by another user */ + if (node->state == UNDO_UPDATE_PERSISTENT) { + node->table = dict_table_open_on_id(table_id, dict_locked, + DICT_TABLE_OP_NORMAL); + } else if (!dict_locked) { + mutex_enter(&dict_sys->mutex); + node->table = dict_sys->get_temporary_table(table_id); + mutex_exit(&dict_sys->mutex); + } else { + node->table = dict_sys->get_temporary_table(table_id); + } - if (node->table == NULL) { - /* Table was dropped */ - return; + if (!node->table) { + return false; } ut_ad(!node->table->skip_alter_undo); @@ -1189,7 +1256,7 @@ close_table: connection, instead of doing this rollback. */ dict_table_close(node->table, dict_locked, FALSE); node->table = NULL; - return; + return false; } clust_index = dict_table_get_first_index(node->table); @@ -1208,16 +1275,21 @@ close_table: ut_ad(!node->ref->info_bits); if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) { - /* This must be an undo log record for a subsequent - instant ALTER TABLE, extending the metadata record. */ - ut_ad(clust_index->is_instant()); - if (node->update->info_bits != REC_INFO_MIN_REC_FLAG) { + if ((node->update->info_bits & ~REC_INFO_DELETED_FLAG) + != REC_INFO_MIN_REC_FLAG) { ut_ad(!"wrong info_bits in undo log record"); goto close_table; } - node->update->info_bits = REC_INFO_METADATA; - const_cast<dtuple_t*>(node->ref)->info_bits - = REC_INFO_METADATA; + /* This must be an undo log record for a subsequent + instant ALTER TABLE, extending the metadata record. */ + ut_ad(clust_index->is_instant()); + ut_ad(clust_index->table->instant + || !(node->update->info_bits & REC_INFO_DELETED_FLAG)); + node->ref = &trx_undo_metadata; + node->update->info_bits = (node->update->info_bits + & REC_INFO_DELETED_FLAG) + ? REC_INFO_METADATA_ALTER + : REC_INFO_METADATA_ADD; } if (!row_undo_search_clust_to_pcur(node)) { @@ -1255,6 +1327,8 @@ close_table: (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) ? NULL : ptr); } + + return true; } /***********************************************************//** @@ -1267,34 +1341,19 @@ row_undo_mod( que_thr_t* thr) /*!< in: query thread */ { dberr_t err; - ibool dict_locked; - - ut_ad(node != NULL); - ut_ad(thr != NULL); - ut_ad(node->state == UNDO_NODE_MODIFY); - ut_ad(node->trx->in_rollback); - ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); - - dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH; - ut_ad(thr_get_trx(thr) == node->trx); + const bool dict_locked = node->trx->dict_operation_lock_mode + == RW_X_LATCH; - row_undo_mod_parse_undo_rec(node, dict_locked); - - if (node->table == NULL) { - /* It is already undone, or will be undone by another query - thread, or table was dropped */ - - node->state = UNDO_NODE_FETCH_NEXT; - - return(DB_SUCCESS); + if (!row_undo_mod_parse_undo_rec(node, dict_locked)) { + return DB_SUCCESS; } node->index = dict_table_get_first_index(node->table); ut_ad(dict_index_is_clust(node->index)); if (node->ref->info_bits) { - ut_ad(node->ref->info_bits == REC_INFO_METADATA); + ut_ad(node->ref->is_metadata()); goto rollback_clust; } diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index 9b88f950917..2b8187171f9 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -217,7 +217,8 @@ row_undo_search_clust_to_pcur( log, first mark them DATA_MISSING. So we will know if the value gets updated */ if (node->table->n_v_cols - && node->state != UNDO_NODE_INSERT + && (node->state == UNDO_UPDATE_PERSISTENT + || node->state == UNDO_UPDATE_TEMPORARY) && !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { for (ulint i = 0; i < dict_table_get_n_v_cols(node->table); i++) { @@ -227,13 +228,15 @@ row_undo_search_clust_to_pcur( } if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { - ut_ad(node->row->info_bits == REC_INFO_MIN_REC_FLAG + ut_ad((node->row->info_bits & ~REC_INFO_DELETED_FLAG) + == REC_INFO_MIN_REC_FLAG || node->row->info_bits == 0); node->undo_row = dtuple_copy(node->row, node->heap); row_upd_replace(node->undo_row, &node->undo_ext, clust_index, node->update, node->heap); } else { - ut_ad((node->row->info_bits == REC_INFO_MIN_REC_FLAG) + ut_ad(((node->row->info_bits & ~REC_INFO_DELETED_FLAG) + == REC_INFO_MIN_REC_FLAG) == (node->rec_type == TRX_UNDO_INSERT_METADATA)); node->undo_row = NULL; node->undo_ext = NULL; @@ -251,6 +254,149 @@ func_exit: return(found); } +/** Try to truncate the undo logs. +@param[in,out] trx transaction */ +static void row_undo_try_truncate(trx_t* trx) +{ + if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { + ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); + trx_undo_truncate_end(*undo, trx->undo_no, false); + } + + if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { + ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); + trx_undo_truncate_end(*undo, trx->undo_no, true); + } +} + +/** Get the latest undo log record for rollback. +@param[in,out] node rollback context +@return whether an undo log record was fetched */ +static bool row_undo_rec_get(undo_node_t* node) +{ + trx_t* trx = node->trx; + + if (trx->pages_undone) { + trx->pages_undone = 0; + row_undo_try_truncate(trx); + } + + trx_undo_t* undo = NULL; + trx_undo_t* insert = trx->rsegs.m_redo.old_insert; + trx_undo_t* update = trx->rsegs.m_redo.undo; + trx_undo_t* temp = trx->rsegs.m_noredo.undo; + const undo_no_t limit = trx->roll_limit; + + ut_ad(!insert || !update || insert->empty() || update->empty() + || insert->top_undo_no != update->top_undo_no); + ut_ad(!insert || !temp || insert->empty() || temp->empty() + || insert->top_undo_no != temp->top_undo_no); + ut_ad(!update || !temp || update->empty() || temp->empty() + || update->top_undo_no != temp->top_undo_no); + + if (UNIV_LIKELY_NULL(insert) + && !insert->empty() && limit <= insert->top_undo_no) { + undo = insert; + } + + if (update && !update->empty() && update->top_undo_no >= limit) { + if (!undo) { + undo = update; + } else if (undo->top_undo_no < update->top_undo_no) { + undo = update; + } + } + + if (temp && !temp->empty() && temp->top_undo_no >= limit) { + if (!undo) { + undo = temp; + } else if (undo->top_undo_no < temp->top_undo_no) { + undo = temp; + } + } + + if (undo == NULL) { + row_undo_try_truncate(trx); + /* Mark any ROLLBACK TO SAVEPOINT completed, so that + if the transaction object is committed and reused + later, we will default to a full ROLLBACK. */ + trx->roll_limit = 0; + trx->in_rollback = false; + return false; + } + + ut_ad(!undo->empty()); + ut_ad(limit <= undo->top_undo_no); + + node->roll_ptr = trx_undo_build_roll_ptr( + false, undo->rseg->id, undo->top_page_no, undo->top_offset); + + mtr_t mtr; + mtr.start(); + + page_t* undo_page = trx_undo_page_get_s_latched( + page_id_t(undo->rseg->space->id, undo->top_page_no), &mtr); + + ulint offset = undo->top_offset; + + trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec( + undo_page + offset, undo->hdr_page_no, undo->hdr_offset, + true, &mtr); + + if (prev_rec == NULL) { + undo->top_undo_no = IB_ID_MAX; + ut_ad(undo->empty()); + } else { + page_t* prev_rec_page = page_align(prev_rec); + + if (prev_rec_page != undo_page) { + + trx->pages_undone++; + } + + undo->top_page_no = page_get_page_no(prev_rec_page); + undo->top_offset = ulint(prev_rec - prev_rec_page); + undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); + ut_ad(!undo->empty()); + } + + { + const trx_undo_rec_t* undo_rec = undo_page + offset; + node->undo_rec = trx_undo_rec_copy(undo_rec, node->heap); + } + + mtr.commit(); + + switch (trx_undo_rec_get_type(node->undo_rec)) { + case TRX_UNDO_INSERT_METADATA: + /* This record type was introduced in MDEV-11369 + instant ADD COLUMN, which was implemented after + MDEV-12288 removed the insert_undo log. There is no + instant ADD COLUMN for temporary tables. Therefore, + this record can only be present in the main undo log. */ + ut_ad(undo == update); + /* fall through */ + case TRX_UNDO_RENAME_TABLE: + ut_ad(undo == insert || undo == update); + /* fall through */ + case TRX_UNDO_INSERT_REC: + ut_ad(undo == insert || undo == update || undo == temp); + node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS; + node->state = undo == temp + ? UNDO_INSERT_TEMPORARY : UNDO_INSERT_PERSISTENT; + break; + default: + ut_ad(undo == update || undo == temp); + node->state = undo == temp + ? UNDO_UPDATE_TEMPORARY : UNDO_UPDATE_PERSISTENT; + break; + } + + trx->undo_no = node->undo_no = trx_undo_rec_get_undo_no( + node->undo_rec); + return true; +} + /***********************************************************//** Fetches an undo log record and does the undo for the recorded operation. If none left, or a partial rollback completed, returns control to the @@ -263,23 +409,12 @@ row_undo( undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { - trx_t* trx = node->trx; - ut_ad(trx->in_rollback); - - if (node->state == UNDO_NODE_FETCH_NEXT) { - - node->undo_rec = trx_roll_pop_top_rec_of_trx( - trx, &node->roll_ptr, node->heap); - - if (!node->undo_rec) { - /* Rollback completed for this query thread */ - thr->run_node = que_node_get_parent(node); - return(DB_SUCCESS); - } + ut_ad(node->trx->in_rollback); - node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec); - node->state = trx_undo_roll_ptr_is_insert(node->roll_ptr) - ? UNDO_NODE_INSERT : UNDO_NODE_MODIFY; + if (node->state == UNDO_NODE_FETCH_NEXT && !row_undo_rec_get(node)) { + /* Rollback completed for this query thread */ + thr->run_node = que_node_get_parent(node); + return DB_SUCCESS; } /* Prevent DROP TABLE etc. while we are rolling back this row. @@ -287,31 +422,33 @@ row_undo( then we already have dict_operation_lock locked in x-mode. Do not try to lock again, because that would cause a hang. */ + trx_t* trx = node->trx; const bool locked_data_dict = (trx->dict_operation_lock_mode == 0); if (locked_data_dict) { - row_mysql_freeze_data_dictionary(trx); } dberr_t err; - if (node->state == UNDO_NODE_INSERT) { - + switch (node->state) { + case UNDO_INSERT_PERSISTENT: + case UNDO_INSERT_TEMPORARY: err = row_undo_ins(node, thr); - - node->state = UNDO_NODE_FETCH_NEXT; - } else { - ut_ad(node->state == UNDO_NODE_MODIFY); + break; + case UNDO_UPDATE_PERSISTENT: + case UNDO_UPDATE_TEMPORARY: err = row_undo_mod(node, thr); + break; + case UNDO_NODE_FETCH_NEXT: + ut_ad(!"wrong state"); } if (locked_data_dict) { - row_mysql_unfreeze_data_dictionary(trx); } - /* Do some cleanup */ + node->state = UNDO_NODE_FETCH_NEXT; btr_pcur_close(&(node->pcur)); mem_heap_empty(node->heap); diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index a60694c6613..b4f2b91881a 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -495,39 +495,6 @@ row_upd_rec_sys_fields_in_recovery( } } -/*********************************************************************//** -Sets the trx id or roll ptr field of a clustered index entry. */ -void -row_upd_index_entry_sys_field( -/*==========================*/ - dtuple_t* entry, /*!< in/out: index entry, where the memory - buffers for sys fields are already allocated: - the function just copies the new values to - them */ - dict_index_t* index, /*!< in: clustered index */ - ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */ - ib_uint64_t val) /*!< in: value to write */ -{ - dfield_t* dfield; - byte* field; - ulint pos; - - ut_ad(dict_index_is_clust(index)); - - pos = dict_index_get_sys_col_pos(index, type); - - dfield = dtuple_get_nth_field(entry, pos); - field = static_cast<byte*>(dfield_get_data(dfield)); - - if (type == DATA_TRX_ID) { - ut_ad(val > 0); - trx_write_trx_id(field, val); - } else { - ut_ad(type == DATA_ROLL_PTR); - trx_write_roll_ptr(field, val); - } -} - /***********************************************************//** Returns TRUE if row update changes size of some field in index or if some field to be updated is stored externally in rec or update. @@ -680,7 +647,7 @@ row_upd_rec_in_place( switch (rec_get_status(rec)) { case REC_STATUS_ORDINARY: break; - case REC_STATUS_COLUMNS_ADDED: + case REC_STATUS_INSTANT: ut_ad(index->is_instant()); break; case REC_STATUS_NODE_PTR: @@ -731,35 +698,6 @@ row_upd_rec_in_place( } /*********************************************************************//** -Writes into the redo log the values of trx id and roll ptr and enough info -to determine their positions within a clustered index record. -@return new pointer to mlog */ -byte* -row_upd_write_sys_vals_to_log( -/*==========================*/ - dict_index_t* index, /*!< in: clustered index */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */ - byte* log_ptr,/*!< pointer to a buffer of size > 20 opened - in mlog */ - mtr_t* mtr MY_ATTRIBUTE((unused))) /*!< in: mtr */ -{ - ut_ad(dict_index_is_clust(index)); - ut_ad(mtr); - - log_ptr += mach_write_compressed(log_ptr, - dict_index_get_sys_col_pos( - index, DATA_TRX_ID)); - - trx_write_roll_ptr(log_ptr, roll_ptr); - log_ptr += DATA_ROLL_PTR_LEN; - - log_ptr += mach_u64_write_compressed(log_ptr, trx_id); - - return(log_ptr); -} - -/*********************************************************************//** Parses the log data of system field values. @return log data end or NULL */ byte* @@ -1053,7 +991,6 @@ row_upd_build_difference_binary( ulint len; upd_t* update; ulint n_diff; - ulint trx_id_pos; ulint i; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint n_fld = dtuple_get_n_fields(entry); @@ -1068,10 +1005,6 @@ row_upd_build_difference_binary( n_diff = 0; - trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - ut_ad(dict_index_get_sys_col_pos(index, DATA_ROLL_PTR) - == trx_id_pos + 1); - if (!offsets) { offsets = rec_get_offsets(rec, index, offsets_, true, ULINT_UNDEFINED, &heap); @@ -1086,16 +1019,9 @@ row_upd_build_difference_binary( /* NOTE: we compare the fields as binary strings! (No collation) */ - if (no_sys) { - /* TRX_ID */ - if (i == trx_id_pos) { - continue; - } - - /* DB_ROLL_PTR */ - if (i == trx_id_pos + 1) { - continue; - } + if (no_sys && (i == index->db_trx_id() + || i == index->db_roll_ptr())) { + continue; } if (!dfield_is_ext(dfield) @@ -1254,7 +1180,7 @@ row_upd_index_replace_new_col_val( len = dfield_get_len(dfield); data = static_cast<const byte*>(dfield_get_data(dfield)); - if (field->prefix_len > 0) { + if (field && field->prefix_len > 0) { ibool fetch_ext = dfield_is_ext(dfield) && len < (ulint) field->prefix_len + BTR_EXTERN_FIELD_REF_SIZE; @@ -1320,6 +1246,57 @@ row_upd_index_replace_new_col_val( } } +/** Apply an update vector to an metadata entry. +@param[in,out] entry clustered index metadata record to be updated +@param[in] index index of the entry +@param[in] update update vector built for the entry +@param[in,out] heap memory heap for copying off-page columns */ +static +void +row_upd_index_replace_metadata( + dtuple_t* entry, + const dict_index_t* index, + const upd_t* update, + mem_heap_t* heap) +{ + ut_ad(!index->table->skip_alter_undo); + ut_ad(update->is_alter_metadata()); + ut_ad(entry->info_bits == update->info_bits); + ut_ad(entry->n_fields == ulint(index->n_fields) + 1); + const page_size_t& page_size = dict_table_page_size(index->table); + const ulint first = index->first_user_field(); + ut_d(bool found_mblob = false); + + for (ulint i = upd_get_n_fields(update); i--; ) { + const upd_field_t* uf = upd_get_nth_field(update, i); + ut_ad(!upd_fld_is_virtual_col(uf)); + ut_ad(uf->field_no >= first - 2); + ulint f = uf->field_no; + dfield_t* dfield = dtuple_get_nth_field(entry, f); + + if (f == first) { + ut_d(found_mblob = true); + ut_ad(!dfield_is_null(&uf->new_val)); + ut_ad(dfield_is_ext(dfield)); + ut_ad(dfield_get_len(dfield) == FIELD_REF_SIZE); + ut_ad(!dfield_is_null(dfield)); + dfield_set_data(dfield, uf->new_val.data, + uf->new_val.len); + if (dfield_is_ext(&uf->new_val)) { + dfield_set_ext(dfield); + } + continue; + } + + f -= f > first; + const dict_field_t* field = dict_index_get_nth_field(index, f); + row_upd_index_replace_new_col_val(dfield, field, field->col, + uf, heap, page_size); + } + + ut_ad(found_mblob); +} + /** Apply an update vector to an index entry. @param[in,out] entry index entry to be updated; the clustered index record must be covered by a lock or a page latch to prevent @@ -1335,6 +1312,12 @@ row_upd_index_replace_new_col_vals_index_pos( mem_heap_t* heap) { ut_ad(!index->table->skip_alter_undo); + ut_ad(!entry->is_metadata() || entry->info_bits == update->info_bits); + + if (UNIV_UNLIKELY(entry->is_alter_metadata())) { + row_upd_index_replace_metadata(entry, index, update, heap); + return; + } const page_size_t& page_size = dict_table_page_size(index->table); @@ -2560,10 +2543,10 @@ row_upd_sec_step( } #ifdef UNIV_DEBUG -# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ - row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update) +# define row_upd_clust_rec_by_insert_inherit(rec,index,offsets,entry,update) \ + row_upd_clust_rec_by_insert_inherit_func(rec,index,offsets,entry,update) #else /* UNIV_DEBUG */ -# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ +# define row_upd_clust_rec_by_insert_inherit(rec,index,offsets,entry,update) \ row_upd_clust_rec_by_insert_inherit_func(rec,entry,update) #endif /* UNIV_DEBUG */ /*******************************************************************//** @@ -2578,6 +2561,7 @@ row_upd_clust_rec_by_insert_inherit_func( /*=====================================*/ const rec_t* rec, /*!< in: old record, or NULL */ #ifdef UNIV_DEBUG + dict_index_t* index, /*!< in: index, or NULL */ const ulint* offsets,/*!< in: rec_get_offsets(rec), or NULL */ #endif /* UNIV_DEBUG */ dtuple_t* entry, /*!< in/out: updated entry to be @@ -2588,6 +2572,8 @@ row_upd_clust_rec_by_insert_inherit_func( ulint i; ut_ad(!rec == !offsets); + ut_ad(!rec == !index); + ut_ad(!rec || rec_offs_validate(rec, index, offsets)); ut_ad(!rec || rec_offs_any_extern(offsets)); for (i = 0; i < dtuple_get_n_fields(entry); i++) { @@ -2598,6 +2584,9 @@ row_upd_clust_rec_by_insert_inherit_func( ut_ad(!offsets || !rec_offs_nth_extern(offsets, i) == !dfield_is_ext(dfield) + || (!dict_index_get_nth_field(index, i)->name + && !dfield_is_ext(dfield) + && (dfield_is_null(dfield) || dfield->len == 0)) || upd_get_field_by_field_no(update, i, false)); if (!dfield_is_ext(dfield) || upd_get_field_by_field_no(update, i, false)) { @@ -2696,7 +2685,11 @@ row_upd_clust_rec_by_insert( if (index->is_instant()) entry->trim(*index); ut_ad(dtuple_get_info_bits(entry) == 0); - row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); + { + dfield_t* t = dtuple_get_nth_field(entry, index->db_trx_id()); + ut_ad(t->len == DATA_TRX_ID_LEN); + trx_write_trx_id(static_cast<byte*>(t->data), trx->id); + } switch (node->state) { default: @@ -2705,7 +2698,7 @@ row_upd_clust_rec_by_insert( /* A lock wait occurred in row_ins_clust_index_entry() in the previous invocation of this function. */ row_upd_clust_rec_by_insert_inherit( - NULL, NULL, entry, node->update); + NULL, NULL, NULL, entry, node->update); break; case UPD_NODE_UPDATE_CLUSTERED: /* This is the first invocation of the function where @@ -2746,7 +2739,8 @@ err_exit: if (rec_offs_any_extern(offsets)) { if (row_upd_clust_rec_by_insert_inherit( - rec, offsets, entry, node->update)) { + rec, index, offsets, + entry, node->update)) { /* The blobs are disowned here, expecting the insert down below to inherit them. But if the insert fails, then this disown will be undone diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index 85d9f0522aa..8fa71689caf 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -298,12 +298,6 @@ static monitor_info_t innodb_counter_info[] = MONITOR_EXISTING | MONITOR_DEFAULT_ON), MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_READ}, - {"buffer_pages0_read", "buffer", - "Number of page 0 read (innodb_pages0_read)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES0_READ}, - {"buffer_index_sec_rec_cluster_reads", "buffer", "Number of secondary record reads triggered cluster read", static_cast<monitor_type_t>( @@ -802,11 +796,6 @@ static monitor_info_t innodb_counter_info[] = MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_SAVEPOINT}, - {"trx_rollback_active", "transaction", - "Number of resurrected active transactions rolled back", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_ACTIVE}, - {"trx_active_transactions", "transaction", "Number of active transactions", MONITOR_NONE, @@ -1745,11 +1734,6 @@ srv_mon_process_existing_counter( value = stat.n_pages_read; break; - /* innodb_pages0_read */ - case MONITOR_OVLD_PAGES0_READ: - value = srv_stats.page0_read; - break; - /* Number of times secondary index lookup triggered cluster lookup */ case MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS: value = srv_stats.n_sec_rec_cluster_reads; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index f3cab013437..8db656cd561 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -59,7 +59,6 @@ Created 10/8/1995 Heikki Tuuri #include "pars0pars.h" #include "que0que.h" #include "row0mysql.h" -#include "row0trunc.h" #include "row0log.h" #include "srv0mon.h" #include "srv0srv.h" @@ -1537,7 +1536,6 @@ srv_export_innodb_status(void) export_vars.innodb_pages_created = stat.n_pages_created; export_vars.innodb_pages_read = stat.n_pages_read; - export_vars.innodb_page0_read = srv_stats.page0_read; export_vars.innodb_pages_written = stat.n_pages_written; @@ -2585,16 +2583,10 @@ srv_do_purge(ulint* n_total_purged) break; } - ulint undo_trunc_freq = - purge_sys.undo_trunc.get_rseg_truncate_frequency(); - - ulint rseg_truncate_frequency = ut_min( - static_cast<ulint>(srv_purge_rseg_truncate_frequency), - undo_trunc_freq); - n_pages_purged = trx_purge( n_use_threads, - (++count % rseg_truncate_frequency) == 0); + !(++count % srv_purge_rseg_truncate_frequency) + || purge_sys.truncate.current); *n_total_purged += n_pages_purged; } while (n_pages_purged > 0 && !purge_sys.paused() @@ -2729,11 +2721,6 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( /* Note that we are shutting down. */ rw_lock_x_lock(&purge_sys.latch); purge_sys.coordinator_shutdown(); - - /* If there are any pending undo-tablespace truncate then clear - it off as we plan to shutdown the purge thread. */ - purge_sys.undo_trunc.clear(); - /* Ensure that the wait in purge_sys_t::stop() will terminate. */ os_event_set(purge_sys.event); @@ -2840,38 +2827,3 @@ void srv_purge_shutdown() srv_purge_wakeup(); } while (srv_sys.sys_threads[SRV_PURGE_SLOT].in_use); } - -/** Check if tablespace is being truncated. -(Ignore system-tablespace as we don't re-create the tablespace -and so some of the action that are suppressed by this function -for independent tablespace are not applicable to system-tablespace). -@param space_id space_id to check for truncate action -@return true if being truncated, false if not being - truncated or tablespace is system-tablespace. */ -bool -srv_is_tablespace_truncated(ulint space_id) -{ - if (is_system_tablespace(space_id)) { - return(false); - } - - return(truncate_t::is_tablespace_truncated(space_id) - || undo::Truncate::is_tablespace_truncated(space_id)); - -} - -/** Check if tablespace was truncated. -@param[in] space space object to check for truncate action -@return true if tablespace was truncated and we still have an active -MLOG_TRUNCATE REDO log record. */ -bool -srv_was_tablespace_truncated(const fil_space_t* space) -{ - if (space == NULL) { - ut_ad(0); - return(false); - } - - return (!is_system_tablespace(space->id) - && truncate_t::was_tablespace_truncated(space->id)); -} diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index a2c9828bfee..fa38741fb5d 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -74,7 +74,6 @@ Created 2/16/1996 Heikki Tuuri #include "srv0start.h" #include "srv0srv.h" #include "btr0defragment.h" -#include "row0trunc.h" #include "mysql/service_wsrep.h" /* wsrep_recovery */ #include "trx0rseg.h" #include "os0proc.h" @@ -97,7 +96,6 @@ Created 2/16/1996 Heikki Tuuri #include "row0upd.h" #include "row0row.h" #include "row0mysql.h" -#include "row0trunc.h" #include "btr0pcur.h" #include "os0event.h" #include "zlib.h" @@ -761,8 +759,6 @@ srv_check_undo_redo_logs_exists() return(DB_SUCCESS); } -undo::undo_spaces_t undo::Truncate::s_fix_up_spaces; - /** Open the configured number of dedicated undo tablespaces. @param[in] create_new_db whether the database is being initialized @return DB_SUCCESS or error code */ @@ -844,46 +840,8 @@ srv_undo_tablespaces_init(bool create_new_db) prev_space_id = srv_undo_space_id_start - 1; break; case SRV_OPERATION_NORMAL: - if (create_new_db) { - break; - } - /* fall through */ case SRV_OPERATION_RESTORE: case SRV_OPERATION_RESTORE_EXPORT: - ut_ad(!create_new_db); - - /* Check if any of the UNDO tablespace needs fix-up because - server crashed while truncate was active on UNDO tablespace.*/ - for (i = 0; i < n_undo_tablespaces; ++i) { - - undo::Truncate undo_trunc; - - if (undo_trunc.needs_fix_up(undo_tablespace_ids[i])) { - - char name[OS_FILE_MAX_PATH]; - - snprintf(name, sizeof(name), - "%s%cundo%03zu", - srv_undo_dir, OS_PATH_SEPARATOR, - undo_tablespace_ids[i]); - - os_file_delete(innodb_data_file_key, name); - - err = srv_undo_tablespace_create( - name, - SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); - - if (err != DB_SUCCESS) { - ib::error() << "Could not fix-up undo " - " tablespace truncate '" - << name << "'."; - return(err); - } - - undo::Truncate::s_fix_up_spaces.push_back( - undo_tablespace_ids[i]); - } - } break; } @@ -988,64 +946,6 @@ srv_undo_tablespaces_init(bool create_new_db) } } - if (!undo::Truncate::s_fix_up_spaces.empty()) { - - /* Step-1: Initialize the tablespace header and rsegs header. */ - mtr_t mtr; - - mtr_start(&mtr); - /* Turn off REDO logging. We are in server start mode and fixing - UNDO tablespace even before REDO log is read. Let's say we - do REDO logging here then this REDO log record will be applied - as part of the current recovery process. We surely don't need - that as this is fix-up action parallel to REDO logging. */ - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - buf_block_t* sys_header = trx_sysf_get(&mtr); - if (!sys_header) { - mtr.commit(); - return DB_CORRUPTION; - } - - for (undo::undo_spaces_t::const_iterator it - = undo::Truncate::s_fix_up_spaces.begin(); - it != undo::Truncate::s_fix_up_spaces.end(); - ++it) { - - undo::Truncate::add_space_to_trunc_list(*it); - - fil_space_t* space = fil_space_get(*it); - - fsp_header_init(space, - SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, - &mtr); - - for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) { - if (trx_sysf_rseg_get_space(sys_header, i) - == *it) { - trx_rseg_header_create( - space, i, sys_header, &mtr); - } - } - - undo::Truncate::clear_trunc_list(); - } - mtr_commit(&mtr); - - /* Step-2: Flush the dirty pages from the buffer pool. */ - for (undo::undo_spaces_t::const_iterator it - = undo::Truncate::s_fix_up_spaces.begin(); - it != undo::Truncate::s_fix_up_spaces.end(); - ++it) { - FlushObserver dummy(fil_system.sys_space, NULL, NULL); - buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, &dummy); - FlushObserver dummy2(fil_space_get(*it), NULL, NULL); - buf_LRU_flush_or_remove_pages(*it, &dummy2); - - /* Remove the truncate redo log file. */ - undo::done(*it); - } - } - return(DB_SUCCESS); } @@ -1273,9 +1173,7 @@ srv_prepare_to_delete_redo_log_files( ulint pending_io = 0; ulint count = 0; - if ((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED) - != LOG_HEADER_FORMAT_CURRENT - || log_sys.log.subformat != 2) { + if (log_sys.log.subformat != 2) { srv_log_file_size = 0; } @@ -1294,12 +1192,10 @@ srv_prepare_to_delete_redo_log_files( { ib::info info; - if (srv_log_file_size == 0) { - info << ((log_sys.log.format - & ~LOG_HEADER_FORMAT_ENCRYPTED) - != LOG_HEADER_FORMAT_10_4 - ? "Upgrading redo log: " - : "Downgrading redo log: "); + if (srv_log_file_size == 0 + || (log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED) + != LOG_HEADER_FORMAT_10_4) { + info << "Upgrading redo log: "; } else if (n_files != srv_n_log_files || srv_log_file_size != srv_log_file_size_requested) { @@ -1888,7 +1784,7 @@ files_checked: ulint ibuf_root = btr_create( DICT_CLUSTERED | DICT_IBUF, fil_system.sys_space, - DICT_IBUF_ID_MIN, dict_ind_redundant, NULL, &mtr); + DICT_IBUF_ID_MIN, dict_ind_redundant, &mtr); mtr_commit(&mtr); @@ -1927,22 +1823,6 @@ files_checked: return(srv_init_abort(err)); } } else { - /* Invalidate the buffer pool to ensure that we reread - the page that we read above, during recovery. - Note that this is not as heavy weight as it seems. At - this point there will be only ONE page in the buf_LRU - and there must be no page in the buf_flush list. */ - buf_pool_invalidate(); - - /* Scan and locate truncate log files. Parsed located files - and add table to truncate information to central vector for - truncate fix-up action post recovery. */ - err = TruncateLogParser::scan_and_parse(srv_log_group_home_dir); - if (err != DB_SUCCESS) { - - return(srv_init_abort(DB_ERROR)); - } - /* We always try to do a recovery, even if the database had been shut down normally: this is the normal startup path */ @@ -2122,9 +2002,8 @@ files_checked: && srv_n_log_files_found == srv_n_log_files && log_sys.log.format == (srv_encrypt_log - ? LOG_HEADER_FORMAT_CURRENT - | LOG_HEADER_FORMAT_ENCRYPTED - : LOG_HEADER_FORMAT_CURRENT) + ? LOG_HEADER_FORMAT_ENC_10_4 + : LOG_HEADER_FORMAT_10_4) && log_sys.log.subformat == 2) { /* No need to add or remove encryption, upgrade, downgrade, or resize. */ @@ -2222,14 +2101,6 @@ files_checked: trx_rollback_recovered(false); } - /* Fix-up truncate of tables in the system tablespace - if server crashed while truncate was active. The non- - system tables are done after tablespace discovery. Do - this now because this procedure assumes that no pages - have changed since redo recovery. Tablespace discovery - can do updates to pages in the system tablespace.*/ - err = truncate_t::fixup_tables_in_system_tablespace(); - if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { /* Open or Create SYS_TABLESPACES and SYS_DATAFILES so that tablespace names and other metadata can be @@ -2267,10 +2138,6 @@ files_checked: dict_check_tablespaces_and_store_max_id(validate); } - /* Fix-up truncate of table if server crashed while truncate - was active. */ - err = truncate_t::fixup_tables_in_non_system_tablespace(); - if (err != DB_SUCCESS) { return(srv_init_abort(err)); } diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index a4fa12708ac..b3c2d078551 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -174,7 +174,8 @@ void purge_sys_t::create() hdr_offset= 0; rw_lock_create(trx_purge_latch_key, &latch, SYNC_PURGE_LATCH); mutex_create(LATCH_ID_PURGE_SYS_PQ, &pq_mutex); - undo_trunc.create(); + truncate.current= NULL; + truncate.last= NULL; } /** Close the purge subsystem on shutdown. */ @@ -510,309 +511,22 @@ func_exit: goto loop; } -/** UNDO log truncate logger. Needed to track state of truncate during crash. -An auxiliary redo log file undo_<space_id>_trunc.log will created while the -truncate of the UNDO is in progress. This file is required during recovery -to complete the truncate. */ - -namespace undo { - /** Magic Number to indicate truncate action is complete. */ - static const ib_uint32_t s_magic = 76845412; - - /** Populate log file name based on space_id - @param[in] space_id id of the undo tablespace. - @return DB_SUCCESS or error code */ - static dberr_t populate_log_file_name( - ulint space_id, - char*& log_file_name) - { - static const char s_log_prefix[] = "undo_"; - static const char s_log_ext[] = "trunc.log"; - - ulint log_file_name_sz = strlen(srv_log_group_home_dir) - + (22 - 1 /* NUL */ - + sizeof s_log_prefix + sizeof s_log_ext); - - log_file_name = new (std::nothrow) char[log_file_name_sz]; - if (log_file_name == 0) { - return(DB_OUT_OF_MEMORY); - } - - memset(log_file_name, 0, log_file_name_sz); - - strcpy(log_file_name, srv_log_group_home_dir); - ulint log_file_name_len = strlen(log_file_name); - - if (log_file_name[log_file_name_len - 1] - != OS_PATH_SEPARATOR) { - - log_file_name[log_file_name_len] - = OS_PATH_SEPARATOR; - log_file_name_len = strlen(log_file_name); - } - - snprintf(log_file_name + log_file_name_len, - log_file_name_sz - log_file_name_len, - "%s" ULINTPF "_%s", s_log_prefix, - space_id, s_log_ext); - - return(DB_SUCCESS); - } - - /** Mark completion of undo truncate action by writing magic number to - the log file and then removing it from the disk. - If we are going to remove it from disk then why write magic number ? - This is to safeguard from unlink (file-system) anomalies that will keep - the link to the file even after unlink action is successfull and - ref-count = 0. - @param[in] space_id id of the undo tablespace to truncate.*/ - void done( - ulint space_id) - { - dberr_t err; - char* log_file_name; - - /* Step-1: Create the log file name using the pre-decided - prefix/suffix and table id of undo tablepsace to truncate. */ - err = populate_log_file_name(space_id, log_file_name); - if (err != DB_SUCCESS) { - return; - } - - /* Step-2: Open log file and write magic number to - indicate done phase. */ - bool ret; - os_file_t handle = - os_file_create_simple_no_error_handling( - innodb_log_file_key, log_file_name, - OS_FILE_OPEN, OS_FILE_READ_WRITE, - srv_read_only_mode, &ret); - - if (!ret) { - os_file_delete(innodb_log_file_key, log_file_name); - delete[] log_file_name; - return; - } - - ulint sz = srv_page_size; - void* buf = ut_zalloc_nokey(sz + srv_page_size); - if (buf == NULL) { - os_file_close(handle); - os_file_delete(innodb_log_file_key, log_file_name); - delete[] log_file_name; - return; - } - - byte* log_buf = static_cast<byte*>( - ut_align(buf, srv_page_size)); - - mach_write_to_4(log_buf, undo::s_magic); - - IORequest request(IORequest::WRITE); - - err = os_file_write( - request, log_file_name, handle, log_buf, 0, sz); - - ut_ad(err == DB_SUCCESS); - - os_file_flush(handle); - os_file_close(handle); - - ut_free(buf); - os_file_delete(innodb_log_file_key, log_file_name); - delete[] log_file_name; - } - - /** Check if TRUNCATE_DDL_LOG file exist. - @param[in] space_id id of the undo tablespace. - @return true if exist else false. */ - bool is_log_present( - ulint space_id) - { - dberr_t err; - char* log_file_name; - - /* Step-1: Populate log file name. */ - err = populate_log_file_name(space_id, log_file_name); - if (err != DB_SUCCESS) { - return(false); - } - - /* Step-2: Check for existence of the file. */ - bool exist; - os_file_type_t type; - os_file_status(log_file_name, &exist, &type); - - /* Step-3: If file exists, check it for presence of magic - number. If found, then delete the file and report file - doesn't exist as presence of magic number suggest that - truncate action was complete. */ - - if (exist) { - bool ret; - os_file_t handle = - os_file_create_simple_no_error_handling( - innodb_log_file_key, log_file_name, - OS_FILE_OPEN, OS_FILE_READ_WRITE, - srv_read_only_mode, &ret); - if (!ret) { - os_file_delete(innodb_log_file_key, - log_file_name); - delete[] log_file_name; - return(false); - } - - ulint sz = srv_page_size; - void* buf = ut_zalloc_nokey(sz + srv_page_size); - if (buf == NULL) { - os_file_close(handle); - os_file_delete(innodb_log_file_key, - log_file_name); - delete[] log_file_name; - return(false); - } - - byte* log_buf = static_cast<byte*>( - ut_align(buf, srv_page_size)); - - IORequest request(IORequest::READ); - - dberr_t err; - - err = os_file_read(request, handle, log_buf, 0, sz); - - os_file_close(handle); - - if (err != DB_SUCCESS) { - - ib::info() - << "Unable to read '" - << log_file_name << "' : " - << ut_strerr(err); - - os_file_delete( - innodb_log_file_key, log_file_name); - - ut_free(buf); - - delete[] log_file_name; - - return(false); - } - - ulint magic_no = mach_read_from_4(log_buf); - - ut_free(buf); - - if (magic_no == undo::s_magic) { - /* Found magic number. */ - os_file_delete(innodb_log_file_key, - log_file_name); - delete[] log_file_name; - return(false); - } - } - - delete[] log_file_name; - - return(exist); - } -}; - -/** Iterate over all the UNDO tablespaces and check if any of the UNDO -tablespace qualifies for TRUNCATE (size > threshold). -@param[in,out] undo_trunc undo truncate tracker */ -static -void -trx_purge_mark_undo_for_truncate( - undo::Truncate* undo_trunc) -{ - /* Step-1: If UNDO Tablespace - - already marked for truncate (OR) - - truncate disabled - return immediately else search for qualifying tablespace. */ - if (undo_trunc->is_marked() || !srv_undo_log_truncate) { - return; - } - - /* Step-2: Validation/Qualification checks - a. At-least 2 UNDO tablespaces so even if one UNDO tablespace - is being truncated server can continue to operate. - b. At-least 2 persistent UNDO logs (besides the default rseg-0) - b. At-least 1 UNDO tablespace size > threshold. */ - if (srv_undo_tablespaces_active < 2 || srv_undo_logs < 3) { - return; - } - - /* Avoid bias selection and so start the scan from immediate next - of last selected UNDO tablespace for truncate. */ - ulint space_id = undo_trunc->get_scan_start(); - - for (ulint i = 1; i <= srv_undo_tablespaces_active; i++) { - - if (fil_space_get_size(space_id) - > (srv_max_undo_log_size >> srv_page_size_shift)) { - /* Tablespace qualifies for truncate. */ - undo_trunc->mark(space_id); - undo::Truncate::add_space_to_trunc_list(space_id); - break; - } - - space_id = ((space_id + 1) % (srv_undo_tablespaces_active + 1)); - if (space_id == 0) { - /* Note: UNDO tablespace ids starts from 1. */ - ++space_id; - } - } - - /* Couldn't make any selection. */ - if (!undo_trunc->is_marked()) { - return; - } - - DBUG_LOG("undo", - "marking for truncate UNDO tablespace " - << undo_trunc->get_marked_space_id()); - - /* Step-3: Iterate over all the rsegs of selected UNDO tablespace - and mark them temporarily unavailable for allocation.*/ - for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { - if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) { - ut_ad(rseg->is_persistent()); - if (rseg->space->id - == undo_trunc->get_marked_space_id()) { - - /* Once set this rseg will not be allocated - to new booting transaction but we will wait - for existing active transaction to finish. */ - rseg->skip_allocation = true; - undo_trunc->add_rseg_to_trunc(rseg); - } - } - } -} - -undo::undo_spaces_t undo::Truncate::s_spaces_to_truncate; - /** Cleanse purge queue to remove the rseg that reside in undo-tablespace marked for truncate. -@param[in,out] undo_trunc undo truncate tracker */ -static -void -trx_purge_cleanse_purge_queue( - undo::Truncate* undo_trunc) +@param[in] space undo tablespace being truncated */ +static void trx_purge_cleanse_purge_queue(const fil_space_t& space) { - mutex_enter(&purge_sys.pq_mutex); typedef std::vector<TrxUndoRsegs> purge_elem_list_t; purge_elem_list_t purge_elem_list; + mutex_enter(&purge_sys.pq_mutex); + /* Remove rseg instances that are in the purge queue before we start truncate of corresponding UNDO truncate. */ while (!purge_sys.purge_queue.empty()) { purge_elem_list.push_back(purge_sys.purge_queue.top()); purge_sys.purge_queue.pop(); } - ut_ad(purge_sys.purge_queue.empty()); for (purge_elem_list_t::iterator it = purge_elem_list.begin(); it != purge_elem_list.end(); @@ -821,9 +535,7 @@ trx_purge_cleanse_purge_queue( for (TrxUndoRsegs::iterator it2 = it->begin(); it2 != it->end(); ++it2) { - - if ((*it2)->space->id - == undo_trunc->get_marked_space_id()) { + if ((*it2)->space == &space) { it->erase(it2); break; } @@ -833,280 +545,285 @@ trx_purge_cleanse_purge_queue( purge_sys.purge_queue.push(*it); } } + mutex_exit(&purge_sys.pq_mutex); } -/** Iterate over selected UNDO tablespace and check if all the rsegs -that resides in the tablespace are free. -@param[in] limit truncate_limit -@param[in,out] undo_trunc undo truncate tracker */ -static -void -trx_purge_initiate_truncate( - const purge_sys_t::iterator& limit, - undo::Truncate* undo_trunc) +/** +Removes unnecessary history data from rollback segments. NOTE that when this +function is called, the caller must not have any latches on undo log pages! +*/ +static void trx_purge_truncate_history() { - /* Step-1: Early check to findout if any of the the UNDO tablespace - is marked for truncate. */ - if (!undo_trunc->is_marked()) { - /* No tablespace marked for truncate yet. */ - return; - } - - /* Step-2: Scan over each rseg and ensure that it doesn't hold any - active undo records. */ - bool all_free = true; - - for (ulint i = 0; i < undo_trunc->rsegs_size() && all_free; ++i) { - - trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i); + ut_ad(purge_sys.head <= purge_sys.tail); + purge_sys_t::iterator& head = purge_sys.head.commit + ? purge_sys.head : purge_sys.tail; - mutex_enter(&rseg->mutex); + if (head.trx_no() >= purge_sys.view.low_limit_no()) { + /* This is sometimes necessary. TODO: find out why. */ + head.reset_trx_no(purge_sys.view.low_limit_no()); + head.undo_no = 0; + } - if (rseg->trx_ref_count > 0) { - /* This rseg is still being held by an active - transaction. */ - all_free = false; - mutex_exit(&rseg->mutex); - continue; + for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { + if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) { + ut_ad(rseg->id == i); + trx_purge_truncate_rseg_history(*rseg, head); } + } - ut_ad(rseg->trx_ref_count == 0); - ut_ad(rseg->skip_allocation); - - ulint size_of_rsegs = rseg->curr_size; - - if (size_of_rsegs == 1) { - mutex_exit(&rseg->mutex); - continue; - } else { - - /* There could be cached undo segment. Check if records - in these segments can be purged. Normal purge history - will not touch these cached segment. */ - ulint cached_undo_size = 0; + if (srv_undo_tablespaces_active < 2) { + return; + } - for (trx_undo_t* undo = - UT_LIST_GET_FIRST(rseg->undo_cached); - undo != NULL && all_free; - undo = UT_LIST_GET_NEXT(undo_list, undo)) { + while (srv_undo_log_truncate && srv_undo_logs >= 3) { + if (!purge_sys.truncate.current) { + const ulint threshold = ulint(srv_max_undo_log_size + >> srv_page_size_shift); + for (ulint i = purge_sys.truncate.last + ? purge_sys.truncate.last->id + - srv_undo_space_id_start + : 0, j = i;; ) { + ulint space_id = srv_undo_space_id_start + i; + ut_ad(srv_is_undo_tablespace(space_id)); + + if (fil_space_get_size(space_id) + > threshold) { + purge_sys.truncate.current + = fil_space_get(space_id); + break; + } - if (limit.trx_no() < undo->trx_id) { - all_free = false; - } else { - cached_undo_size += undo->size; + ++i; + i %= srv_undo_tablespaces_active; + if (i == j) { + break; } } + } - ut_ad(size_of_rsegs >= (cached_undo_size + 1)); + if (!purge_sys.truncate.current) { + return; + } - if (size_of_rsegs > (cached_undo_size + 1)) { - /* There are pages besides cached pages that - still hold active data. */ - all_free = false; + const fil_space_t& space = *purge_sys.truncate.current; + /* Undo tablespace always are a single file. */ + ut_a(UT_LIST_GET_LEN(space.chain) == 1); + fil_node_t* file = UT_LIST_GET_FIRST(space.chain); + /* The undo tablespace files are never closed. */ + ut_ad(file->is_open()); + + DBUG_LOG("undo", "marking for truncate: " << file->name); + + for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { + if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) { + ut_ad(rseg->is_persistent()); + if (rseg->space == &space) { + /* Once set, this rseg will + not be allocated to subsequent + transactions, but we will wait + for existing active + transactions to finish. */ + rseg->skip_allocation = true; + } } } - mutex_exit(&rseg->mutex); - } - - if (!all_free) { - /* rseg still holds active data.*/ - return; - } - - - /* Step-3: Start the actual truncate. - a. Remove rseg instance if added to purge queue before we - initiate truncate. - b. Execute actual truncate */ - - const ulint space_id = undo_trunc->get_marked_space_id(); - - ib::info() << "Truncating UNDO tablespace " << space_id; - - trx_purge_cleanse_purge_queue(undo_trunc); - - ut_a(srv_is_undo_tablespace(space_id)); - - fil_space_t* space = fil_space_get(space_id); + for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { + trx_rseg_t* rseg = trx_sys.rseg_array[i]; + if (!rseg || rseg->space != &space) { + continue; + } + mutex_enter(&rseg->mutex); + ut_ad(rseg->skip_allocation); + if (rseg->trx_ref_count) { +not_free: + mutex_exit(&rseg->mutex); + return; + } - if (!space) { -not_found: - ib::error() << "Failed to find UNDO tablespace " << space_id; - return; - } + if (rseg->curr_size != 1) { + /* Check if all segments are + cached and safe to remove. */ + ulint cached = 0; + + for (trx_undo_t* undo = UT_LIST_GET_FIRST( + rseg->undo_cached); + undo; + undo = UT_LIST_GET_NEXT(undo_list, + undo)) { + if (head.trx_no() < undo->trx_id) { + goto not_free; + } else { + cached += undo->size; + } + } - /* Flush all to-be-discarded pages of the tablespace. - - During truncation, we do not want any writes to the - to-be-discarded area, because we must set the space->size - early in order to have deterministic page allocation. - - If a log checkpoint was completed at LSN earlier than our - mini-transaction commit and the server was killed, then - discarding the to-be-trimmed pages without flushing would - break crash recovery. So, we cannot avoid the write. */ - { - FlushObserver observer( - space, - UT_LIST_GET_FIRST(purge_sys.query->thrs)->graph->trx, - NULL); - buf_LRU_flush_or_remove_pages(space_id, &observer); - } + ut_ad(rseg->curr_size > cached); - log_free_check(); + if (rseg->curr_size > cached + 1) { + goto not_free; + } + } - /* Adjust the tablespace metadata. */ - space = fil_truncate_prepare(space_id); + mutex_exit(&rseg->mutex); + } - if (!space) { - goto not_found; - } + ib::info() << "Truncating " << file->name; + trx_purge_cleanse_purge_queue(space); + + /* Flush all to-be-discarded pages of the tablespace. + + During truncation, we do not want any writes to the + to-be-discarded area, because we must set the space.size + early in order to have deterministic page allocation. + + If a log checkpoint was completed at LSN earlier than our + mini-transaction commit and the server was killed, then + discarding the to-be-trimmed pages without flushing would + break crash recovery. So, we cannot avoid the write. */ + { + FlushObserver observer( + purge_sys.truncate.current, + UT_LIST_GET_FIRST(purge_sys.query->thrs) + ->graph->trx, + NULL); + buf_LRU_flush_or_remove_pages(space.id, &observer); + } - /* Undo tablespace always are a single file. */ - ut_a(UT_LIST_GET_LEN(space->chain) == 1); - fil_node_t* file = UT_LIST_GET_FIRST(space->chain); - /* The undo tablespace files are never closed. */ - ut_ad(file->is_open()); + log_free_check(); - /* Re-initialize tablespace, in a single mini-transaction. */ - mtr_t mtr; - const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; - mtr.start(); - mtr_x_lock(&space->latch, &mtr); - fil_truncate_log(space, size, &mtr); - fsp_header_init(space, size, &mtr); - mutex_enter(&fil_system.mutex); - space->size = file->size = size; - mutex_exit(&fil_system.mutex); + /* Adjust the tablespace metadata. */ + if (!fil_truncate_prepare(space.id)) { + ib::error() << "Failed to find UNDO tablespace " + << file->name; + return; + } - buf_block_t* sys_header = trx_sysf_get(&mtr); + /* Re-initialize tablespace, in a single mini-transaction. */ + mtr_t mtr; + const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; + mtr.start(); + mtr_x_lock(&purge_sys.truncate.current->latch, &mtr); + fil_truncate_log(purge_sys.truncate.current, size, &mtr); + fsp_header_init(purge_sys.truncate.current, size, &mtr); + mutex_enter(&fil_system.mutex); + purge_sys.truncate.current->size = file->size = size; + mutex_exit(&fil_system.mutex); + + buf_block_t* sys_header = trx_sysf_get(&mtr); + + for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { + trx_rseg_t* rseg = trx_sys.rseg_array[i]; + if (!rseg || rseg->space != &space) { + continue; + } - for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) { - trx_rsegf_t* rseg_header; + ut_ad(rseg->is_persistent()); + ut_d(const ulint old_page = rseg->page_no); - trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i); + rseg->page_no = trx_rseg_header_create( + purge_sys.truncate.current, + rseg->id, sys_header, &mtr); + ut_ad(old_page == rseg->page_no); - rseg->page_no = trx_rseg_header_create( - space, rseg->id, sys_header, &mtr); + trx_rsegf_t* rseg_header = trx_rsegf_get_new( + space.id, rseg->page_no, &mtr); - rseg_header = trx_rsegf_get_new( - space_id, rseg->page_no, &mtr); + /* Before re-initialization ensure that we + free the existing structure. There can't be + any active transactions. */ + ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0); + ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0); - /* Before re-initialization ensure that we free the existing - structure. There can't be any active transactions. */ - ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0); - ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0); + trx_undo_t* next_undo; - trx_undo_t* next_undo; + for (trx_undo_t* undo = UT_LIST_GET_FIRST( + rseg->undo_cached); + undo; undo = next_undo) { - for (trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached); - undo != NULL; - undo = next_undo) { + next_undo = UT_LIST_GET_NEXT(undo_list, undo); + UT_LIST_REMOVE(rseg->undo_cached, undo); + MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); + ut_free(undo); + } - next_undo = UT_LIST_GET_NEXT(undo_list, undo); - UT_LIST_REMOVE(rseg->undo_cached, undo); - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - ut_free(undo); + UT_LIST_INIT(rseg->undo_list, + &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->undo_cached, + &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->old_insert_list, + &trx_undo_t::undo_list); + + /* These were written by trx_rseg_header_create(). */ + ut_ad(!mach_read_from_4(rseg_header + + TRX_RSEG_FORMAT)); + ut_ad(!mach_read_from_4(rseg_header + + TRX_RSEG_HISTORY_SIZE)); + + /* Initialize the undo log lists according to + the rseg header */ + rseg->curr_size = 1; + rseg->trx_ref_count = 0; + rseg->last_page_no = FIL_NULL; + rseg->last_offset = 0; + rseg->last_commit = 0; + rseg->needs_purge = false; } - UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->old_insert_list, &trx_undo_t::undo_list); - - /* These were written by trx_rseg_header_create(). */ - ut_ad(!mach_read_from_4(rseg_header + TRX_RSEG_FORMAT)); - ut_ad(!mach_read_from_4(rseg_header + TRX_RSEG_HISTORY_SIZE)); - - /* Initialize the undo log lists according to the rseg header */ - rseg->curr_size = 1; - rseg->trx_ref_count = 0; - rseg->last_page_no = FIL_NULL; - rseg->last_offset = 0; - rseg->last_commit = 0; - rseg->needs_purge = false; - } - - mtr.commit(); - /* Write-ahead the redo log record. */ - log_write_up_to(mtr.commit_lsn(), true); - - /* Trim the file size. */ - os_file_truncate(file->name, file->handle, - os_offset_t(size) << srv_page_size_shift, true); - - /* This is only executed by the srv_purge_coordinator_thread. */ - export_vars.innodb_undo_truncations++; - - /* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */ - - mutex_enter(&fil_system.mutex); - ut_ad(space->stop_new_ops); - ut_ad(space->is_being_truncated); - space->stop_new_ops = false; - space->is_being_truncated = false; - mutex_exit(&fil_system.mutex); - - if (purge_sys.rseg != NULL - && purge_sys.rseg->last_page_no == FIL_NULL) { - /* If purge_sys.rseg is pointing to rseg that was recently - truncated then move to next rseg element. - Note: Ideally purge_sys.rseg should be NULL because purge - should complete processing of all the records but there is - purge_batch_size that can force the purge loop to exit before - all the records are purged and in this case purge_sys.rseg - could point to a valid rseg waiting for next purge cycle. */ - purge_sys.next_stored = false; - purge_sys.rseg = NULL; - } - - DBUG_EXECUTE_IF("ib_undo_trunc", - ib::info() << "ib_undo_trunc"; - log_write_up_to(LSN_MAX, true); - DBUG_SUICIDE();); - - /* Completed truncate. Now it is safe to re-use the tablespace. */ - for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) { - trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i); - rseg->skip_allocation = false; - } - - ib::info() << "Truncated UNDO tablespace " << space_id; - - undo_trunc->reset(); - undo::Truncate::clear_trunc_list(); -} - -/** -Removes unnecessary history data from rollback segments. NOTE that when this -function is called, the caller must not have any latches on undo log pages! -*/ -static void trx_purge_truncate_history() -{ - ut_ad(purge_sys.head <= purge_sys.tail); - purge_sys_t::iterator& head = purge_sys.head.commit - ? purge_sys.head : purge_sys.tail; + mtr.commit(); + /* Write-ahead the redo log record. */ + log_write_up_to(mtr.commit_lsn(), true); + + /* Trim the file size. */ + os_file_truncate(file->name, file->handle, + os_offset_t(size) << srv_page_size_shift, + true); + + /* This is only executed by srv_purge_coordinator_thread. */ + export_vars.innodb_undo_truncations++; + + /* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */ + mutex_enter(&fil_system.mutex); + ut_ad(&space == purge_sys.truncate.current); + ut_ad(space.stop_new_ops); + ut_ad(space.is_being_truncated); + purge_sys.truncate.current->stop_new_ops = false; + purge_sys.truncate.current->is_being_truncated = false; + mutex_exit(&fil_system.mutex); + + if (purge_sys.rseg != NULL + && purge_sys.rseg->last_page_no == FIL_NULL) { + /* If purge_sys.rseg is pointing to rseg that + was recently truncated then move to next rseg + element. Note: Ideally purge_sys.rseg should + be NULL because purge should complete + processing of all the records but there is + purge_batch_size that can force the purge loop + to exit before all the records are purged and + in this case purge_sys.rseg could point to a + valid rseg waiting for next purge cycle. */ + purge_sys.next_stored = false; + purge_sys.rseg = NULL; + } - if (head.trx_no() >= purge_sys.view.low_limit_no()) { - /* This is sometimes necessary. TODO: find out why. */ - head.reset_trx_no(purge_sys.view.low_limit_no()); - head.undo_no = 0; - } + DBUG_EXECUTE_IF("ib_undo_trunc", + ib::info() << "ib_undo_trunc"; + log_write_up_to(LSN_MAX, true); + DBUG_SUICIDE();); - for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { - if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) { - ut_ad(rseg->id == i); - trx_purge_truncate_rseg_history(*rseg, head); + for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { + if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) { + ut_ad(rseg->is_persistent()); + if (rseg->space == &space) { + rseg->skip_allocation = false; + } + } } - } - /* UNDO tablespace truncate. We will try to truncate as much as we - can (greedy approach). This will ensure when the server is idle we - try and truncate all the UNDO tablespaces. */ - for (ulint i = srv_undo_tablespaces_active; i--; ) { - trx_purge_mark_undo_for_truncate(&purge_sys.undo_trunc); - trx_purge_initiate_truncate(head, &purge_sys.undo_trunc); + ib::info() << "Truncated " << file->name; + purge_sys.truncate.last = purge_sys.truncate.current; + purge_sys.truncate.current = NULL; } } diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index 56b8db7ec37..73af66d1893 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -39,9 +39,12 @@ Created 3/26/1996 Heikki Tuuri #include "row0row.h" #include "row0mysql.h" -/** The search tuple corresponding to TRX_UNDO_INSERT_METADATA */ +/** The search tuple corresponding to TRX_UNDO_INSERT_METADATA. */ const dtuple_t trx_undo_metadata = { - REC_INFO_METADATA, 0, 0, + /* This also works for REC_INFO_METADATA_ALTER, because the + delete-mark (REC_INFO_DELETED_FLAG) is ignored when searching. */ + REC_INFO_METADATA_ADD, + 0, 0, NULL, 0, NULL, UT_LIST_NODE_T(dtuple_t)() #ifdef UNIV_DEBUG @@ -505,7 +508,7 @@ trx_undo_page_report_insert( /* Store then the fields required to uniquely determine the record to be inserted in the clustered index */ if (UNIV_UNLIKELY(clust_entry->info_bits != 0)) { - ut_ad(clust_entry->info_bits == REC_INFO_METADATA); + ut_ad(clust_entry->is_metadata()); ut_ad(index->is_instant()); ut_ad(undo_block->frame[first_free + 2] == TRX_UNDO_INSERT_REC); @@ -919,9 +922,9 @@ trx_undo_page_report_modify( /* Store first some general parameters to the undo log */ if (!update) { - ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(table))); + ut_ad(!rec_is_delete_marked(rec, dict_table_is_comp(table))); type_cmpl = TRX_UNDO_DEL_MARK_REC; - } else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) { + } else if (rec_is_delete_marked(rec, dict_table_is_comp(table))) { /* In delete-marked records, DB_TRX_ID must always refer to an existing update_undo log record. */ ut_ad(row_get_rec_trx_id(rec, index, offsets)); @@ -950,9 +953,7 @@ trx_undo_page_report_modify( *ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table)); /* Store the values of the system columns */ - field = rec_get_nth_field(rec, offsets, - dict_index_get_sys_col_pos( - index, DATA_TRX_ID), &flen); + field = rec_get_nth_field(rec, offsets, index->db_trx_id(), &flen); ut_ad(flen == DATA_TRX_ID_LEN); trx_id = trx_read_trx_id(field); @@ -966,9 +967,7 @@ trx_undo_page_report_modify( } ptr += mach_u64_write_compressed(ptr, trx_id); - field = rec_get_nth_field(rec, offsets, - dict_index_get_sys_col_pos( - index, DATA_ROLL_PTR), &flen); + field = rec_get_nth_field(rec, offsets, index->db_roll_ptr(), &flen); ut_ad(flen == DATA_ROLL_PTR_LEN); ut_ad(memcmp(field, field_ref_zero, DATA_ROLL_PTR_LEN)); @@ -1035,20 +1034,35 @@ trx_undo_page_report_modify( } } + i = 0; + + if (UNIV_UNLIKELY(update->is_alter_metadata())) { + ut_ad(update->n_fields >= 1); + ut_ad(!upd_fld_is_virtual_col(&update->fields[0])); + ut_ad(update->fields[0].field_no + == index->first_user_field()); + ut_ad(!dfield_is_ext(&update->fields[0].new_val)); + ut_ad(!dfield_is_null(&update->fields[0].new_val)); + /* The instant ADD COLUMN metadata record does not + contain the BLOB. Do not write anything for it. */ + i = !rec_is_alter_metadata(rec, *index); + n_updated -= i; + } + ptr += mach_write_compressed(ptr, n_updated); - for (i = 0; i < upd_get_n_fields(update); i++) { + for (; i < upd_get_n_fields(update); i++) { + if (trx_undo_left(undo_block, ptr) < 5) { + return 0; + } + upd_field_t* fld = upd_get_nth_field(update, i); bool is_virtual = upd_fld_is_virtual_col(fld); ulint max_v_log_len = 0; - ulint pos = fld->field_no; - - /* Write field number to undo log */ - if (trx_undo_left(undo_block, ptr) < 5) { - return(0); - } + ulint pos = fld->field_no; + const dict_col_t* col = NULL; if (is_virtual) { /* Skip the non-indexed column, during @@ -1061,13 +1075,13 @@ trx_undo_page_report_modify( /* add REC_MAX_N_FIELDS to mark this is a virtual col */ - pos += REC_MAX_N_FIELDS; - } + ptr += mach_write_compressed( + ptr, pos + REC_MAX_N_FIELDS); - ptr += mach_write_compressed(ptr, pos); + if (trx_undo_left(undo_block, ptr) < 15) { + return 0; + } - /* Save the old value of field */ - if (is_virtual) { ut_ad(fld->field_no < table->n_v_def); ptr = trx_undo_log_v_idx(undo_block, table, @@ -1092,28 +1106,78 @@ trx_undo_page_report_modify( flen = ut_min( flen, max_v_log_len); } + + goto store_len; + } + + if (UNIV_UNLIKELY(update->is_metadata())) { + ut_ad(pos >= index->first_user_field()); + ut_ad(rec_is_metadata(rec, *index)); + + if (rec_is_alter_metadata(rec, *index)) { + ut_ad(update->is_alter_metadata()); + + field = rec_offs_n_fields(offsets) + > pos + && !rec_offs_nth_default( + offsets, pos) + ? rec_get_nth_field( + rec, offsets, + pos, &flen) + : index->instant_field_value( + pos - 1, &flen); + + if (pos == index->first_user_field()) { + ut_ad(rec_offs_nth_extern( + offsets, pos)); + ut_ad(flen == FIELD_REF_SIZE); + goto write_field; + } + col = dict_index_get_nth_col(index, + pos - 1); + } else if (!update->is_alter_metadata()) { + goto get_field; + } else { + /* We are converting an ADD COLUMN + metadata record to an ALTER TABLE + metadata record, with BLOB. Subtract + the missing metadata BLOB field. */ + ut_ad(pos > index->first_user_field()); + --pos; + goto get_field; + } } else { +get_field: + col = dict_index_get_nth_col(index, pos); field = rec_get_nth_cfield( rec, index, offsets, pos, &flen); } +write_field: + /* Write field number to undo log */ + ptr += mach_write_compressed(ptr, pos); if (trx_undo_left(undo_block, ptr) < 15) { - return(0); + return 0; } - if (!is_virtual && rec_offs_nth_extern(offsets, pos)) { - const dict_col_t* col - = dict_index_get_nth_col(index, pos); - ulint prefix_len - = dict_max_field_len_store_undo( - table, col); + if (rec_offs_n_fields(offsets) > pos + && rec_offs_nth_extern(offsets, pos)) { + ut_ad(col || pos == index->first_user_field()); + ut_ad(col || update->is_alter_metadata()); + ut_ad(col + || rec_is_alter_metadata(rec, *index)); + ulint prefix_len = col + ? dict_max_field_len_store_undo( + table, col) + : 0; ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE <= sizeof ext_buf); ptr = trx_undo_page_report_modify_ext( ptr, - col->ord_part + col + && col->ord_part && !ignore_prefix && flen < REC_ANTELOPE_MAX_INDEX_COL_LEN ? ext_buf : NULL, prefix_len, @@ -1122,6 +1186,7 @@ trx_undo_page_report_modify( *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN; } else { +store_len: ptr += mach_write_compressed(ptr, flen); } @@ -1508,9 +1573,7 @@ trx_undo_update_rec_get_update( mach_write_to_6(buf, trx_id); - upd_field_set_field_no(upd_field, - dict_index_get_sys_col_pos(index, DATA_TRX_ID), - index); + upd_field_set_field_no(upd_field, index->db_trx_id(), index); dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN); upd_field = upd_get_nth_field(update, n_fields + 1); @@ -1519,9 +1582,7 @@ trx_undo_update_rec_get_update( trx_write_roll_ptr(buf, roll_ptr); - upd_field_set_field_no( - upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR), - index); + upd_field_set_field_no(upd_field, index->db_roll_ptr(), index); dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN); /* Store then the updated ordinary columns to the update vector */ @@ -1530,14 +1591,12 @@ trx_undo_update_rec_get_update( const byte* field; ulint len; - ulint field_no; ulint orig_len; - bool is_virtual; upd_field = upd_get_nth_field(update, i); - field_no = mach_read_next_compressed(&ptr); + ulint field_no = mach_read_next_compressed(&ptr); - is_virtual = (field_no >= REC_MAX_N_FIELDS); + const bool is_virtual = (field_no >= REC_MAX_N_FIELDS); if (is_virtual) { /* If new version, we need to check index list to figure @@ -1560,15 +1619,33 @@ trx_undo_update_rec_get_update( } upd_field_set_v_field_no(upd_field, field_no, index); - } else if (field_no < index->n_fields) { - upd_field_set_field_no(upd_field, field_no, index); - } else if (update->info_bits == REC_INFO_MIN_REC_FLAG + } else if (UNIV_UNLIKELY((update->info_bits + & ~REC_INFO_DELETED_FLAG) + == REC_INFO_MIN_REC_FLAG) && index->is_instant()) { - /* This must be a rollback of a subsequent - instant ADD COLUMN operation. This will be - detected and handled by btr_cur_trim(). */ + const ulint uf = index->first_user_field(); + ut_ad(field_no >= uf); + + if (update->info_bits != REC_INFO_MIN_REC_FLAG) { + if (field_no == uf) { + upd_field->new_val.type + .metadata_blob_init(); + } else { + ut_ad(field_no > uf); + dict_col_copy_type( + dict_index_get_nth_col( + index, field_no - 1), + &upd_field->new_val.type); + } + } else { + dict_col_copy_type( + dict_index_get_nth_col(index, + field_no), + &upd_field->new_val.type); + } upd_field->field_no = field_no; - upd_field->orig_len = 0; + } else if (field_no < index->n_fields) { + upd_field_set_field_no(upd_field, field_no, index); } else { ib::error() << "Trying to access update undo rec" " field " << field_no @@ -1601,6 +1678,12 @@ trx_undo_update_rec_get_update( dfield_set_ext(&upd_field->new_val); } + ut_ad(update->info_bits != (REC_INFO_DELETED_FLAG + | REC_INFO_MIN_REC_FLAG) + || field_no != index->first_user_field() + || (upd_field->new_val.ext + && upd_field->new_val.len == FIELD_REF_SIZE)); + if (is_virtual) { upd_field->old_v_val = static_cast<dfield_t*>( mem_heap_alloc( @@ -1701,8 +1784,11 @@ trx_undo_rec_get_partial_row( if (uf->old_v_val) { continue; } - ulint c = dict_index_get_nth_col(index, uf->field_no)->ind; - *dtuple_get_nth_field(*row, c) = uf->new_val; + const dict_col_t& c = *dict_index_get_nth_col(index, + uf->field_no); + if (!c.is_dropped()) { + *dtuple_get_nth_field(*row, c.ind) = uf->new_val; + } } end_ptr = ptr + mach_read_from_2(ptr); @@ -1713,7 +1799,6 @@ trx_undo_rec_get_partial_row( const byte* field; ulint field_no; const dict_col_t* col; - ulint col_no; ulint len; ulint orig_len; bool is_virtual; @@ -1741,15 +1826,18 @@ trx_undo_rec_get_partial_row( dict_v_col_t* vcol = dict_table_get_nth_v_col( index->table, field_no); col = &vcol->m_col; - col_no = dict_col_get_no(col); dfield = dtuple_get_nth_v_field(*row, vcol->v_pos); dict_col_copy_type( &vcol->m_col, dfield_get_type(dfield)); } else { col = dict_index_get_nth_col(index, field_no); - col_no = dict_col_get_no(col); - dfield = dtuple_get_nth_field(*row, col_no); + + if (col->is_dropped()) { + continue; + } + + dfield = dtuple_get_nth_field(*row, col->ind); ut_ad(dfield->type.mtype == DATA_MISSING || dict_col_type_assert_equal(col, &dfield->type)); @@ -1757,9 +1845,7 @@ trx_undo_rec_get_partial_row( || dfield->len == len || (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD)); - dict_col_copy_type( - dict_table_get_nth_col(index->table, col_no), - dfield_get_type(dfield)); + dict_col_copy_type(col, dfield_get_type(dfield)); } dfield_set_data(dfield, field, len); diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 9e992d2f145..4b0684d1735 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -44,10 +44,6 @@ Created 3/26/1996 Heikki Tuuri #include "trx0trx.h" #include "trx0undo.h" -/** This many pages must be undone before a truncate is tried within -rollback */ -static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1; - /** true if trx_rollback_all_recovered() thread is active */ bool trx_rollback_is_active; @@ -874,175 +870,6 @@ DECLARE_THREAD(trx_rollback_all_recovered)(void*) OS_THREAD_DUMMY_RETURN; } -/** Try to truncate the undo logs. -@param[in,out] trx transaction */ -static -void -trx_roll_try_truncate(trx_t* trx) -{ - trx->pages_undone = 0; - - undo_no_t undo_no = trx->undo_no; - - if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { - ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); - mutex_enter(&undo->rseg->mutex); - trx_undo_truncate_end(undo, undo_no, false); - mutex_exit(&undo->rseg->mutex); - } - - if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { - ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); - mutex_enter(&undo->rseg->mutex); - trx_undo_truncate_end(undo, undo_no, true); - mutex_exit(&undo->rseg->mutex); - } - -#ifdef WITH_WSREP_OUT - if (wsrep_on(trx->mysql_thd)) { - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } -#endif /* WITH_WSREP */ -} - -/***********************************************************************//** -Pops the topmost undo log record in a single undo log and updates the info -about the topmost record in the undo log memory struct. -@return undo log record, the page s-latched */ -static -trx_undo_rec_t* -trx_roll_pop_top_rec( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - trx_undo_t* undo, /*!< in: undo log */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* undo_page = trx_undo_page_get_s_latched( - page_id_t(undo->rseg->space->id, undo->top_page_no), mtr); - - ulint offset = undo->top_offset; - - trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec( - undo_page + offset, undo->hdr_page_no, undo->hdr_offset, - true, mtr); - - if (prev_rec == NULL) { - undo->top_undo_no = IB_ID_MAX; - ut_ad(undo->empty()); - } else { - page_t* prev_rec_page = page_align(prev_rec); - - if (prev_rec_page != undo_page) { - - trx->pages_undone++; - } - - undo->top_page_no = page_get_page_no(prev_rec_page); - undo->top_offset = ulint(prev_rec - prev_rec_page); - undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); - ut_ad(!undo->empty()); - } - - return(undo_page + offset); -} - -/** Get the last undo log record of a transaction (for rollback). -@param[in,out] trx transaction -@param[out] roll_ptr DB_ROLL_PTR to the undo record -@param[in,out] heap memory heap for allocation -@return undo log record copied to heap -@retval NULL if none left or the roll_limit (savepoint) was reached */ -trx_undo_rec_t* -trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap) -{ - if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) { - trx_roll_try_truncate(trx); - } - - trx_undo_t* undo = NULL; - trx_undo_t* insert = trx->rsegs.m_redo.old_insert; - trx_undo_t* update = trx->rsegs.m_redo.undo; - trx_undo_t* temp = trx->rsegs.m_noredo.undo; - const undo_no_t limit = trx->roll_limit; - - ut_ad(!insert || !update || insert->empty() || update->empty() - || insert->top_undo_no != update->top_undo_no); - ut_ad(!insert || !temp || insert->empty() || temp->empty() - || insert->top_undo_no != temp->top_undo_no); - ut_ad(!update || !temp || update->empty() || temp->empty() - || update->top_undo_no != temp->top_undo_no); - - if (UNIV_LIKELY_NULL(insert) - && !insert->empty() && limit <= insert->top_undo_no) { - undo = insert; - } - - if (update && !update->empty() && update->top_undo_no >= limit) { - if (!undo) { - undo = update; - } else if (undo->top_undo_no < update->top_undo_no) { - undo = update; - } - } - - if (temp && !temp->empty() && temp->top_undo_no >= limit) { - if (!undo) { - undo = temp; - } else if (undo->top_undo_no < temp->top_undo_no) { - undo = temp; - } - } - - if (undo == NULL) { - trx_roll_try_truncate(trx); - /* Mark any ROLLBACK TO SAVEPOINT completed, so that - if the transaction object is committed and reused - later, we will default to a full ROLLBACK. */ - trx->roll_limit = 0; - trx->in_rollback = false; - return(NULL); - } - - ut_ad(!undo->empty()); - ut_ad(limit <= undo->top_undo_no); - - *roll_ptr = trx_undo_build_roll_ptr( - false, undo->rseg->id, undo->top_page_no, undo->top_offset); - - mtr_t mtr; - mtr.start(); - - trx_undo_rec_t* undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr); - const undo_no_t undo_no = trx_undo_rec_get_undo_no(undo_rec); - switch (trx_undo_rec_get_type(undo_rec)) { - case TRX_UNDO_INSERT_METADATA: - /* This record type was introduced in MDEV-11369 - instant ADD COLUMN, which was implemented after - MDEV-12288 removed the insert_undo log. There is no - instant ADD COLUMN for temporary tables. Therefore, - this record can only be present in the main undo log. */ - ut_ad(undo == update); - /* fall through */ - case TRX_UNDO_RENAME_TABLE: - ut_ad(undo == insert || undo == update); - /* fall through */ - case TRX_UNDO_INSERT_REC: - ut_ad(undo == insert || undo == update || undo == temp); - *roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS; - break; - default: - ut_ad(undo == update || undo == temp); - break; - } - - trx->undo_no = undo_no; - - trx_undo_rec_t* undo_rec_copy = trx_undo_rec_copy(undo_rec, heap); - mtr.commit(); - - return(undo_rec_copy); -} - /****************************************************************//** Builds an undo 'query' graph for a transaction. The actual rollback is performed by executing this query graph like a query subprocedure call. diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc index 4a3102d225f..ef871be1552 100644 --- a/storage/innobase/trx/trx0rseg.cc +++ b/storage/innobase/trx/trx0rseg.cc @@ -252,12 +252,10 @@ void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr) mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr); /* Clear also possible garbage at the end of the page. Old InnoDB versions did not initialize unused parts of pages. */ - byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8; - ulint len = srv_page_size - - (FIL_PAGE_DATA_END - + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8); - memset(b, 0, len); - mlog_log_string(b, len, mtr); + mlog_memset(TRX_RSEG_MAX_TRX_ID + 8 + rseg_header, + srv_page_size + - (FIL_PAGE_DATA_END + + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8), 0, mtr); } /** Create a rollback segment header. @@ -273,8 +271,6 @@ trx_rseg_header_create( buf_block_t* sys_header, mtr_t* mtr) { - ulint page_no; - trx_rsegf_t* rsegf; buf_block_t* block; ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_X_LOCK)); @@ -291,25 +287,17 @@ trx_rseg_header_create( buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW); - page_no = block->page.id.page_no(); - - /* Get the rollback segment file page */ - rsegf = trx_rsegf_get_new(space->id, page_no, mtr); - - mlog_write_ulint(rsegf + TRX_RSEG_FORMAT, 0, MLOG_4BYTES, mtr); + ut_ad(0 == mach_read_from_4(TRX_RSEG_FORMAT + TRX_RSEG + + block->frame)); + ut_ad(0 == mach_read_from_4(TRX_RSEG_HISTORY_SIZE + TRX_RSEG + + block->frame)); /* Initialize the history list */ - - mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr); - flst_init(rsegf + TRX_RSEG_HISTORY, mtr); + flst_init(block, TRX_RSEG_HISTORY + TRX_RSEG, mtr); /* Reset the undo log slots */ - for (ulint i = 0; i < TRX_RSEG_N_SLOTS; i++) { - /* FIXME: This is generating a lot of redo log. - Why not just let it remain zero-initialized, - and adjust trx_rsegf_undo_find_free() and friends? */ - trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr); - } + mlog_memset(block, TRX_RSEG_UNDO_SLOTS + TRX_RSEG, + TRX_RSEG_N_SLOTS * 4, 0xff, mtr); if (sys_header) { /* Add the rollback segment info to the free slot in @@ -324,10 +312,10 @@ trx_rseg_header_create( + TRX_SYS_RSEG_PAGE_NO + rseg_id * TRX_SYS_RSEG_SLOT_SIZE + sys_header->frame, - page_no, MLOG_4BYTES, mtr); + block->page.id.page_no(), MLOG_4BYTES, mtr); } - return(page_no); + return block->page.id.page_no(); } /** Free a rollback segment in memory. */ diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index b46805eef4b..63966d280b2 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -190,10 +190,9 @@ trx_sysf_create( ut_a(ptr <= page + (srv_page_size - FIL_PAGE_DATA_END)); /* Initialize all of the page. This part used to be uninitialized. */ - memset(ptr, 0, srv_page_size - FIL_PAGE_DATA_END + size_t(page - ptr)); - - mlog_log_string(TRX_SYS + page, srv_page_size - FIL_PAGE_DATA_END - - TRX_SYS, mtr); + mlog_memset(block, ptr - page, + srv_page_size - FIL_PAGE_DATA_END + size_t(page - ptr), + 0, mtr); /* Create the first rollback segment in the SYSTEM tablespace */ slot_no = trx_sys_rseg_find_free(block); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 07654be12ee..a0ceb44b8d1 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -210,6 +210,9 @@ struct TrxFactory { lock_trx_lock_list_init(&trx->lock.trx_locks); + UT_LIST_INIT(trx->lock.evicted_tables, + &dict_table_t::table_LRU); + UT_LIST_INIT( trx->trx_savepoints, &trx_named_savept_t::trx_savepoints); @@ -234,6 +237,7 @@ struct TrxFactory { } ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); + ut_ad(UT_LIST_GET_LEN(trx->lock.evicted_tables) == 0); UT_DELETE(trx->xid); ut_free(trx->detailed_error); @@ -386,6 +390,7 @@ trx_t *trx_create() ut_ad(trx->lock.n_rec_locks == 0); ut_ad(trx->lock.table_cached == 0); ut_ad(trx->lock.rec_cached == 0); + ut_ad(UT_LIST_GET_LEN(trx->lock.evicted_tables) == 0); #ifdef WITH_WSREP trx->wsrep_event = NULL; @@ -1244,6 +1249,37 @@ trx_update_mod_tables_timestamp( trx->mod_tables.clear(); } +/** Evict a table definition due to the rollback of ALTER TABLE. +@param[in] table_id table identifier */ +void trx_t::evict_table(table_id_t table_id) +{ + ut_ad(in_rollback); + + dict_table_t* table = dict_table_open_on_id( + table_id, true, DICT_TABLE_OP_OPEN_ONLY_IF_CACHED); + if (!table) { + return; + } + + if (!table->release()) { + /* This must be a DDL operation that is being rolled + back in an active connection. */ + ut_a(table->get_ref_count() == 1); + ut_ad(!is_recovered); + ut_ad(mysql_thd); + return; + } + + /* This table should only be locked by this transaction, if at all. */ + ut_ad(UT_LIST_GET_LEN(table->locks) <= 1); + const bool locked = UT_LIST_GET_LEN(table->locks); + ut_ad(!locked || UT_LIST_GET_FIRST(table->locks)->trx == this); + dict_table_remove_from_cache(table, true, locked); + if (locked) { + UT_LIST_ADD_FIRST(lock.evicted_tables, table); + } +} + /****************************************************************//** Commits a transaction in memory. */ static @@ -1309,9 +1345,16 @@ trx_commit_in_memory( trx_update_mod_tables_timestamp(trx); MONITOR_INC(MONITOR_TRX_RW_COMMIT); } + + while (dict_table_t* table = UT_LIST_GET_FIRST( + trx->lock.evicted_tables)) { + UT_LIST_REMOVE(trx->lock.evicted_tables, table); + dict_mem_table_free(table); + } } ut_ad(!trx->rsegs.m_redo.undo); + ut_ad(UT_LIST_GET_LEN(trx->lock.evicted_tables) == 0); if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) { mutex_enter(&rseg->mutex); diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 61ba65ebc19..e084b0b67bf 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -888,54 +888,55 @@ trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr) @param[in,out] undo undo log @param[in] limit all undo logs after this limit will be discarded @param[in] is_temp whether this is temporary undo log */ -void -trx_undo_truncate_end(trx_undo_t* undo, undo_no_t limit, bool is_temp) +void trx_undo_truncate_end(trx_undo_t& undo, undo_no_t limit, bool is_temp) { - ut_ad(mutex_own(&undo->rseg->mutex)); - ut_ad(is_temp == !undo->rseg->is_persistent()); + mtr_t mtr; + ut_ad(is_temp == !undo.rseg->is_persistent()); for (;;) { - mtr_t mtr; mtr.start(); if (is_temp) { mtr.set_log_mode(MTR_LOG_NO_REDO); } trx_undo_rec_t* trunc_here = NULL; + mutex_enter(&undo.rseg->mutex); page_t* undo_page = trx_undo_page_get( - page_id_t(undo->rseg->space->id, undo->last_page_no), + page_id_t(undo.rseg->space->id, undo.last_page_no), &mtr); trx_undo_rec_t* rec = trx_undo_page_get_last_rec( - undo_page, undo->hdr_page_no, undo->hdr_offset); + undo_page, undo.hdr_page_no, undo.hdr_offset); while (rec) { - if (trx_undo_rec_get_undo_no(rec) >= limit) { - /* Truncate at least this record off, maybe - more */ - trunc_here = rec; - } else { - goto function_exit; + if (trx_undo_rec_get_undo_no(rec) < limit) { + goto func_exit; } + /* Truncate at least this record off, maybe more */ + trunc_here = rec; rec = trx_undo_page_get_prev_rec(rec, - undo->hdr_page_no, - undo->hdr_offset); + undo.hdr_page_no, + undo.hdr_offset); } - if (undo->last_page_no == undo->hdr_page_no) { -function_exit: - if (trunc_here) { - mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE, - ulint(trunc_here - undo_page), - MLOG_2BYTES, &mtr); - } - + if (undo.last_page_no != undo.hdr_page_no) { + trx_undo_free_last_page(&undo, &mtr); + mutex_exit(&undo.rseg->mutex); mtr.commit(); - return; + continue; + } + +func_exit: + mutex_exit(&undo.rseg->mutex); + + if (trunc_here) { + mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE, + ulint(trunc_here - undo_page), + MLOG_2BYTES, &mtr); } - trx_undo_free_last_page(undo, &mtr); mtr.commit(); + return; } } diff --git a/storage/innobase/ut/ut0new.cc b/storage/innobase/ut/ut0new.cc index 14f2748218c..cdba09974ad 100644 --- a/storage/innobase/ut/ut0new.cc +++ b/storage/innobase/ut/ut0new.cc @@ -148,7 +148,6 @@ ut_new_boot() "row0merge", "row0mysql", "row0sel", - "row0trunc", "srv0conc", "srv0srv", "srv0start", diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index 39fb037aa28..adda0b960c9 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -589,8 +589,6 @@ ut_strerr( return("Tablespace already exists"); case DB_TABLESPACE_DELETED: return("Tablespace deleted or being deleted"); - case DB_TABLESPACE_TRUNCATED: - return("Tablespace was truncated"); case DB_TABLESPACE_NOT_FOUND: return("Tablespace not found"); case DB_LOCK_TABLE_FULL: diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt index b8e3350ca76..77dfb201413 100644 --- a/storage/maria/CMakeLists.txt +++ b/storage/maria/CMakeLists.txt @@ -56,7 +56,8 @@ IF(APPLE) ENDIF() MYSQL_ADD_PLUGIN(aria ${ARIA_SOURCES} - STORAGE_ENGINE STATIC_ONLY DEFAULT + STORAGE_ENGINE + MANDATORY RECOMPILE_FOR_EMBEDDED) IF(NOT WITH_ARIA_STORAGE_ENGINE) diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 10ec7ad4d67..04b684ed132 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -387,6 +387,10 @@ static void init_aria_psi_keys(void) #define init_aria_psi_keys() /* no-op */ #endif /* HAVE_PSI_INTERFACE */ +const char *MA_CHECK_INFO= "info"; +const char *MA_CHECK_WARNING= "warning"; +const char *MA_CHECK_ERROR= "error"; + /***************************************************************************** ** MARIA tables *****************************************************************************/ @@ -399,6 +403,20 @@ static handler *maria_create_handler(handlerton *hton, } +static void _ma_check_print(HA_CHECK *param, const char* msg_type, + const char *msgbuf) +{ + if (msg_type == MA_CHECK_INFO) + sql_print_information("%s.%s: %s", param->db_name, param->table_name, + msgbuf); + else if (msg_type == MA_CHECK_WARNING) + sql_print_warning("%s.%s: %s", param->db_name, param->table_name, + msgbuf); + else + sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); +} + + // collect errors printed by maria_check routines static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, @@ -420,16 +438,21 @@ static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, if (!thd->vio_ok()) { - sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); + _ma_check_print(param, msg_type, msgbuf); return; } if (param->testflag & (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR)) { - my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME)); + myf flag= 0; + if (msg_type == MA_CHECK_INFO) + flag= ME_NOTE; + else if (msg_type == MA_CHECK_WARNING) + flag= ME_WARNING; + my_message(ER_NOT_KEYFILE, msgbuf, MYF(flag)); if (thd->variables.log_warnings > 2) - sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); + _ma_check_print(param, msg_type, msgbuf); return; } length= (uint) (strxmov(name, param->db_name, ".", param->table_name, @@ -451,7 +474,7 @@ static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, sql_print_error("Failed on my_net_write, writing to stderr instead: %s.%s: %s\n", param->db_name, param->table_name, msgbuf); else if (thd->variables.log_warnings > 2) - sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); + _ma_check_print(param, msg_type, msgbuf); return; } @@ -879,7 +902,7 @@ void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) if (param->testflag & T_SUPPRESS_ERR_HANDLING) DBUG_VOID_RETURN; va_start(args, fmt); - _ma_check_print_msg(param, "error", fmt, args); + _ma_check_print_msg(param, MA_CHECK_ERROR, fmt, args); va_end(args); DBUG_VOID_RETURN; } @@ -890,7 +913,7 @@ void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...) va_list args; DBUG_ENTER("_ma_check_print_info"); va_start(args, fmt); - _ma_check_print_msg(param, "info", fmt, args); + _ma_check_print_msg(param, MA_CHECK_INFO, fmt, args); va_end(args); DBUG_VOID_RETURN; } @@ -903,7 +926,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) param->warning_printed= 1; param->out_flag |= O_DATA_LOST; va_start(args, fmt); - _ma_check_print_msg(param, "warning", fmt, args); + _ma_check_print_msg(param, MA_CHECK_WARNING, fmt, args); va_end(args); DBUG_VOID_RETURN; } @@ -1006,6 +1029,8 @@ handler *ha_maria::clone(const char *name, MEM_ROOT *mem_root) new_handler->file->state= file->state; /* maria_create_trn_for_mysql() is never called for clone() tables */ new_handler->file->trn= file->trn; + DBUG_ASSERT(new_handler->file->trn_prev == 0 && + new_handler->file->trn_next == 0); } return new_handler; } @@ -1271,6 +1296,8 @@ int ha_maria::close(void) MARIA_HA *tmp= file; if (!tmp) return 0; + DBUG_ASSERT(file->trn == 0 || file->trn == &dummy_transaction_object); + DBUG_ASSERT(file->trn_next == 0 && file->trn_prev == 0); file= 0; return maria_close(tmp); } @@ -1386,6 +1413,16 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) mysql_mutex_unlock(&share->intern_lock); info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | HA_STATUS_CONST); + + /* + Write a 'table is ok' message to error log if table is ok and + we have written to error log that table was getting checked + */ + if (!error && !(table->db_stat & HA_READ_ONLY) && + !maria_is_crashed(file) && thd->error_printed_to_log && + (param->warning_printed || param->error_printed || + param->note_printed)) + _ma_check_print_info(param, "Table is fixed"); } } else if (!maria_is_crashed(file) && !thd->killed) @@ -1396,7 +1433,10 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) /* Reset trn, that may have been set by repair */ if (old_trn && old_trn != file->trn) + { + DBUG_ASSERT(old_trn->used_instances == 0); _ma_set_trn_for_table(file, old_trn); + } thd_proc_info(thd, old_proc_info); thd_progress_end(thd); return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; @@ -2615,14 +2655,20 @@ int ha_maria::extra(enum ha_extra_function operation) operation == HA_EXTRA_PREPARE_FOR_FORCED_CLOSE)) { THD *thd= table->in_use; - TRN *trn= THD_TRN; - _ma_set_tmp_trn_for_table(file, trn); + file->trn= THD_TRN; } DBUG_ASSERT(file->s->base.born_transactional || file->trn == 0 || file->trn == &dummy_transaction_object); tmp= maria_extra(file, operation, 0); - file->trn= old_trn; // Reset trn if was used + /* + Restore trn if it was changed above. + Note that table could be removed from trn->used_tables and + trn->used_instances if trn was set and some of the above operations + was used. This is ok as the table should not be part of any transaction + after this and thus doesn't need to be part of any of the above lists. + */ + file->trn= old_trn; return tmp; } @@ -2858,9 +2904,12 @@ static void reset_thd_trn(THD *thd, MARIA_HA *first_table) { DBUG_ENTER("reset_thd_trn"); THD_TRN= NULL; - for (MARIA_HA *table= first_table; table ; - table= table->trn_next) + MARIA_HA *next; + for (MARIA_HA *table= first_table; table ; table= next) + { + next= table->trn_next; _ma_reset_trn_for_table(table); + } DBUG_VOID_RETURN; } @@ -2907,9 +2956,11 @@ int ha_maria::implicit_commit(THD *thd, bool new_trn) DBUG_RETURN(0); } + /* Prepare to move used_instances and locked tables to new TRN object */ locked_tables= trnman_has_locked_tables(trn); + trnman_reset_locked_tables(trn, 0); + relink_trn_used_instances(&used_tables, trn); - used_tables= (MARIA_HA*) trn->used_instances; error= 0; if (unlikely(ma_commit(trn))) error= 1; @@ -3334,6 +3385,8 @@ static int maria_commit(handlerton *hton __attribute__ ((unused)), { TRN *trn= THD_TRN; DBUG_ENTER("maria_commit"); + + DBUG_ASSERT(trnman_has_locked_tables(trn) == 0); trnman_reset_locked_tables(trn, 0); trnman_set_flags(trn, trnman_get_flags(trn) & ~TRN_STATE_INFO_LOGGED); @@ -3351,9 +3404,12 @@ static int maria_rollback(handlerton *hton __attribute__ ((unused)), { TRN *trn= THD_TRN; DBUG_ENTER("maria_rollback"); + + DBUG_ASSERT(trnman_has_locked_tables(trn) == 0); trnman_reset_locked_tables(trn, 0); /* statement or transaction ? */ - if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all) + if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + !all) { trnman_rollback_statement(trn); DBUG_RETURN(0); // end of statement @@ -3509,7 +3565,7 @@ static int mark_recovery_start(const char* log_dir) int res; DBUG_ENTER("mark_recovery_start"); if (!(maria_recover_options & HA_RECOVER_ANY)) - ma_message_no_user(ME_JUST_WARNING, "Please consider using option" + ma_message_no_user(ME_WARNING, "Please consider using option" " --aria-recover-options[=...] to automatically check and" " repair tables when logs are removed by option" " --aria-force-start-after-recovery-failures=#"); @@ -3527,7 +3583,7 @@ static int mark_recovery_start(const char* log_dir) " recovery from logs", (res ? "failed to remove some" : "removed all"), recovery_failures); - ma_message_no_user((res ? 0 : ME_JUST_WARNING), msg); + ma_message_no_user((res ? 0 : ME_WARNING), msg); } else res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno, @@ -3988,8 +4044,8 @@ maria_declare_plugin(aria) MYSQL_STORAGE_ENGINE_PLUGIN, &maria_storage_engine, "Aria", - "Monty Program Ab", - "Crash-safe tables with MyISAM heritage", + "MariaDB Corporation Ab", + "Crash-safe tables with MyISAM heritage. Used for internal temporary tables and privilege tables", PLUGIN_LICENSE_GPL, ha_maria_init, /* Plugin Init */ NULL, /* Plugin Deinit */ diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index eeb922dd3f5..29a31a4596f 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -2600,7 +2600,8 @@ static my_bool free_full_page_range(MARIA_HA *info, pgcache_page_no_t page, @param record Record we should write @param row Statistics about record (calculated by calc_record_size()) - @param map_blocks On which pages the record should be stored + @param bitmap_blocks On which pages the record should be stored + @param head_block_is_read 1 if head block existed. 0 if new block. @param row_pos Position on head page where to put head part of record @param undo_lsn <> LSN_ERROR if we are executing an UNDO diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h index df877ad2bbc..2ad044d5686 100644 --- a/storage/maria/ma_checkpoint.h +++ b/storage/maria/ma_checkpoint.h @@ -84,8 +84,8 @@ static inline LSN lsn_read_non_atomic_32(const volatile LSN *x) prints a message from a task not connected to any user (checkpoint and recovery for example). - @param level 0 if error, ME_JUST_WARNING if warning, - ME_JUST_INFO if info + @param level 0 if error, ME_WARNING if warning, + ME_NOTE if info @param sentence text to write */ #define ma_message_no_user(level, sentence) \ diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 6ca6d06c97f..3c536d2c46f 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -531,7 +531,7 @@ int ma_control_file_write_and_force(LSN last_checkpoint_lsn_arg, "Control file must be from a newer version; zero-ing out %u" " unknown bytes in control file at offset %u", zeroed, cf_changeable_size + cf_create_time_size); - ma_message_no_user(ME_JUST_WARNING, msg); + ma_message_no_user(ME_WARNING, msg); } else { diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 503d2420c41..24aa892d212 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -827,6 +827,11 @@ int maria_create(const char *name, enum data_file_type datafile_type, */ share.state.skip_redo_lsn= share.state.is_of_horizon= share.state.create_rename_lsn= LSN_MAX; + /* + We have to mark the table as not movable as the table will contain the + maria_uuid and create_rename_lsn + */ + share.state.changed|= STATE_NOT_MOVABLE; } if (datafile_type == DYNAMIC_RECORD) @@ -1446,6 +1451,7 @@ int _ma_update_state_lsns_sub(MARIA_SHARE *share, LSN lsn, TrID create_trid, uchar buf[LSN_STORE_SIZE * 3], *ptr; uchar trid_buff[8]; File file= share->kfile.file; + DBUG_ENTER("_ma_update_state_lsns_sub"); DBUG_ASSERT(file >= 0); if (lsn == LSN_IMPOSSIBLE) @@ -1464,7 +1470,7 @@ int _ma_update_state_lsns_sub(MARIA_SHARE *share, LSN lsn, TrID create_trid, 0].length, sizeof(log_array)/sizeof(log_array[0]), log_array, NULL, NULL))) - return res; + DBUG_RETURN(res); } for (ptr= buf; ptr < (buf + sizeof(buf)); ptr+= LSN_STORE_SIZE) @@ -1497,13 +1503,13 @@ int _ma_update_state_lsns_sub(MARIA_SHARE *share, LSN lsn, TrID create_trid, } else lsn_store(buf, share->state.create_rename_lsn); - return (my_pwrite(file, buf, sizeof(buf), - sizeof(share->state.header) + - MARIA_FILE_CREATE_RENAME_LSN_OFFSET, MYF(MY_NABP)) || - my_pwrite(file, trid_buff, sizeof(trid_buff), - sizeof(share->state.header) + - MARIA_FILE_CREATE_TRID_OFFSET, MYF(MY_NABP)) || - (do_sync && mysql_file_sync(file, MYF(0)))); + DBUG_RETURN(my_pwrite(file, buf, sizeof(buf), + sizeof(share->state.header) + + MARIA_FILE_CREATE_RENAME_LSN_OFFSET, MYF(MY_NABP)) || + my_pwrite(file, trid_buff, sizeof(trid_buff), + sizeof(share->state.header) + + MARIA_FILE_CREATE_TRID_OFFSET, MYF(MY_NABP)) || + (do_sync && mysql_file_sync(file, MYF(0)))); } #if defined(_MSC_VER) && (_MSC_VER == 1310) #pragma optimize("",on) diff --git a/storage/maria/ma_crypt.c b/storage/maria/ma_crypt.c index 42895dcdfa4..a007c14ba29 100644 --- a/storage/maria/ma_crypt.c +++ b/storage/maria/ma_crypt.c @@ -156,7 +156,7 @@ ma_crypt_read(MARIA_SHARE* share, uchar *buff) { my_printf_error(HA_ERR_UNSUPPORTED, "Unsupported crypt scheme! type: %d iv_length: %d\n", - MYF(ME_FATALERROR|ME_NOREFRESH), + MYF(ME_FATAL|ME_ERROR_LOG), type, iv_length); return 0; } @@ -464,7 +464,7 @@ static int ma_encrypt(MARIA_SHARE *share, MARIA_CRYPT_DATA *crypt_data, my_errno= HA_ERR_DECRYPTION_FAILED; my_printf_error(HA_ERR_DECRYPTION_FAILED, "Unknown key id %u. Can't continue!", - MYF(ME_FATALERROR|ME_NOREFRESH), + MYF(ME_FATAL|ME_ERROR_LOG), crypt_data->scheme.key_id); return 1; } @@ -481,7 +481,7 @@ static int ma_encrypt(MARIA_SHARE *share, MARIA_CRYPT_DATA *crypt_data, my_errno= HA_ERR_DECRYPTION_FAILED; my_printf_error(HA_ERR_DECRYPTION_FAILED, "failed to encrypt '%s' rc: %d dstlen: %u size: %u\n", - MYF(ME_FATALERROR|ME_NOREFRESH), + MYF(ME_FATAL|ME_ERROR_LOG), share->open_file_name.str, rc, dstlen, size); return 1; } @@ -508,7 +508,7 @@ static int ma_decrypt(MARIA_SHARE *share, MARIA_CRYPT_DATA *crypt_data, my_errno= HA_ERR_DECRYPTION_FAILED; my_printf_error(HA_ERR_DECRYPTION_FAILED, "failed to decrypt '%s' rc: %d dstlen: %u size: %u\n", - MYF(ME_FATALERROR|ME_NOREFRESH), + MYF(ME_FATAL|ME_ERROR_LOG), share->open_file_name.str, rc, dstlen, size); return 1; } diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c index da44da123d2..22b9c86f21d 100644 --- a/storage/maria/ma_info.c +++ b/storage/maria/ma_info.c @@ -148,6 +148,6 @@ void _ma_report_error(int errcode, const LEX_STRING *name) } } - my_error(errcode, MYF(ME_NOREFRESH), file_name); + my_error(errcode, MYF(ME_ERROR_LOG), file_name); DBUG_VOID_RETURN; } diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index 8af3c41a3a1..7244d95b184 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -148,7 +148,7 @@ my_bool maria_upgrade() my_message(HA_ERR_INITIALIZATION, "Found old style Maria log files; " "Converting them to Aria names", - MYF(ME_JUST_INFO)); + MYF(ME_NOTE)); for (i= 0; i < dir->number_of_files; i++) { diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index 4723c04e3cf..54f072ede5c 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -454,7 +454,7 @@ int _ma_mark_file_changed_now(register MARIA_SHARE *share) } /* Set uuid of file if not yet set (zerofilled file) */ if (share->base.born_transactional && - !(share->state.changed & STATE_NOT_MOVABLE)) + !(share->state.org_changed & STATE_NOT_MOVABLE)) { /* Lock table to current installation */ if (_ma_set_uuid(share, 0) || @@ -464,6 +464,7 @@ int _ma_mark_file_changed_now(register MARIA_SHARE *share) TRUE, TRUE))) goto err; share->state.changed|= STATE_NOT_MOVABLE; + share->state.org_changed|= STATE_NOT_MOVABLE; } } error= 0; diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 13f7a64e786..cd065d5eb1c 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -453,6 +453,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->state.state_length=base_pos; /* For newly opened tables we reset the error-has-been-printed flag */ share->state.changed&= ~STATE_CRASHED_PRINTED; + share->state.org_changed= share->state.changed; if (!(open_flags & HA_OPEN_FOR_REPAIR) && ((share->state.changed & STATE_CRASHED_FLAGS) || @@ -473,13 +474,13 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) /* A transactional table is not usable on this system if: - share->state.create_trid > trnman_get_max_trid() - - Critical as trid as stored releativel to create_trid. + - Critical as trid as stored releative to create_trid. - uuid is different STATE_NOT_MOVABLE is reset when a table is zerofilled (has no LSN's and no trids) - We can ignore testing uuid if STATE_NOT_MOVABLE is set, as in this + We can ignore testing uuid if STATE_NOT_MOVABLE is not set, as in this case the uuid will be set in _ma_mark_file_changed(). */ if (share->base.born_transactional && @@ -800,17 +801,27 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->state.is_of_horizon) > 0) || !LSN_VALID(share->state.skip_redo_lsn) || (cmp_translog_addr(share->state.create_rename_lsn, - share->state.skip_redo_lsn) > 0)) && - !(open_flags & HA_OPEN_FOR_REPAIR)) + share->state.skip_redo_lsn) > 0))) { - /* - If in Recovery, it will not work. If LSN is invalid and not - LSN_NEEDS_NEW_STATE_LSNS, header must be corrupted. - In both cases, must repair. - */ - my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ? - HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); - goto err; + if (!(open_flags & HA_OPEN_FOR_REPAIR)) + { + /* + If in Recovery, it will not work. If LSN is invalid and not + LSN_NEEDS_NEW_STATE_LSNS, header must be corrupted. + In both cases, must repair. + */ + my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ? + HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); + goto err; + } + else + { + /* + Open in repair mode. Ensure that we mark the table crashed, so + that we run auto_repair on it + */ + maria_mark_crashed_share(share); + } } else if (!(open_flags & HA_OPEN_FOR_REPAIR)) { diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index d1cb2d584b3..ae52b9191de 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -184,7 +184,7 @@ void maria_recover_error_handler_hook(uint error, const char *str, static void print_preamble() { - ma_message_no_user(ME_JUST_INFO, "starting recovery"); + ma_message_no_user(ME_NOTE, "starting recovery"); } @@ -523,7 +523,7 @@ end: } if (!error) { - ma_message_no_user(ME_JUST_INFO, "recovery done"); + ma_message_no_user(ME_NOTE, "recovery done"); maria_recovery_changed_data= 1; } } @@ -1363,6 +1363,7 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) silently pass in the "info == NULL" test below. */ tprint(tracef, ", record is corrupted"); + eprint(tracef, "\n***WARNING: %s may be corrupted", name ? name : "NULL"); info= NULL; recovery_warnings++; goto end; @@ -1375,7 +1376,11 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) " or its header is so corrupted that we cannot open it;" " we skip it"); if (my_errno != ENOENT) + { recovery_found_crashed_tables++; + eprint(tracef, "\n***WARNING: %s could not be opened: Error: %d", + name ? name : "NULL", (int) my_errno); + } error= 0; goto end; } @@ -1404,6 +1409,7 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) not transactional table */ tprint(tracef, ", is not transactional. Ignoring open request"); + eprint(tracef, "\n***WARNING: '%s' may be crashed", name); error= -1; recovery_warnings++; goto end; @@ -1414,6 +1420,7 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) " LOGREC_FILE_ID's LSN " LSN_FMT ", ignoring open request", LSN_IN_PARTS(share->state.create_rename_lsn), LSN_IN_PARTS(lsn_of_file_id)); + eprint(tracef, "\n***WARNING: '%s' may be crashed", name); recovery_warnings++; error= -1; goto end; @@ -1445,6 +1452,8 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) (kfile_len == MY_FILEPOS_ERROR)) { tprint(tracef, ", length unknown\n"); + eprint(tracef, "\n***WARNING: Can't read length of file '%s'", + share->open_file_name.str); recovery_warnings++; goto end; } @@ -3568,7 +3577,12 @@ void _ma_tmp_disable_logging_for_table(MARIA_HA *info, should be now. info->trn may be NULL in maria_chk. */ if (info->trn == NULL) + { info->trn= &dummy_transaction_object; + info->trn_next= 0; + info->trn_prev= 0; + } + DBUG_ASSERT(info->trn->rec_lsn == LSN_IMPOSSIBLE); share->page_type= PAGECACHE_PLAIN_PAGE; /* Functions below will pick up now_transactional and change callbacks */ diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c index 024b72fff2e..ccf48b80f7c 100644 --- a/storage/maria/ma_sort.c +++ b/storage/maria/ma_sort.c @@ -30,13 +30,11 @@ /* static variables */ #undef MIN_SORT_MEMORY -#undef MYF_RW #undef DISK_BUFFER_SIZE #define MERGEBUFF 15 #define MERGEBUFF2 31 #define MIN_SORT_MEMORY (4096-MALLOC_OVERHEAD) -#define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL) #define DISK_BUFFER_SIZE (IO_SIZE*128) /* How many keys we can keep in memory */ diff --git a/storage/maria/ma_state.c b/storage/maria/ma_state.c index 23cb625fc58..c658b9e667c 100644 --- a/storage/maria/ma_state.c +++ b/storage/maria/ma_state.c @@ -30,6 +30,7 @@ #include "maria_def.h" #include "trnman.h" +#include "ma_trnman.h" #include "ma_blockrec.h" /** @@ -571,7 +572,6 @@ void _ma_remove_table_from_trnman(MARIA_HA *info) MARIA_SHARE *share= info->s; TRN *trn= info->trn; MARIA_USED_TABLES *tables, **prev; - MARIA_HA *handler, **prev_file; DBUG_ENTER("_ma_remove_table_from_trnman"); DBUG_PRINT("enter", ("trn: %p used_tables: %p share: %p in_trans: %d", trn, trn->used_tables, share, share->in_trans)); @@ -603,26 +603,9 @@ void _ma_remove_table_from_trnman(MARIA_HA *info) DBUG_PRINT("warning", ("share: %p where not in used_tables_list", share)); } - /* unlink table from used_instances */ - for (prev_file= (MARIA_HA**) &trn->used_instances; - (handler= *prev_file); - prev_file= &handler->trn_next) - { - if (handler == info) - { - *prev_file= info->trn_next; - break; - } - } - if (handler != 0) - { - /* - This can only happens in case of rename of intermediate table as - part of alter table - */ - DBUG_PRINT("warning", ("table: %p where not in used_instances", info)); - } - info->trn= 0; /* Not part of trans anymore */ + /* Reset trn and remove table from used_instances */ + _ma_reset_trn_for_table(info); + DBUG_VOID_RETURN; } diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index 24c48c67210..b6442c2be91 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -1086,6 +1086,11 @@ static void get_options(int argc, char **argv) fprintf(stderr,"record count must be >= 10 (if testflag > 2)\n"); exit(1); } + if (recant <= 1) + { + fprintf(stderr,"record count must be >= 2\n"); + exit(1); + } break; case 'e': /* maria_block_length */ case 'E': diff --git a/storage/maria/ma_trnman.h b/storage/maria/ma_trnman.h index 9bfd1f0d047..5b6d0e9f60d 100644 --- a/storage/maria/ma_trnman.h +++ b/storage/maria/ma_trnman.h @@ -18,7 +18,7 @@ /** Sets table's trn and prints debug information - Links table into used_instances if new_trn is not 0 + Links table into new_trn->used_instances @param tbl MARIA_HA of table @param newtrn what to put into tbl->trn @@ -34,7 +34,10 @@ static inline void _ma_set_trn_for_table(MARIA_HA *tbl, TRN *newtrn) tbl->trn= newtrn; /* Link into used list */ + if (newtrn->used_instances) + ((MARIA_HA*) newtrn->used_instances)->trn_prev= &tbl->trn_next; tbl->trn_next= (MARIA_HA*) newtrn->used_instances; + tbl->trn_prev= (MARIA_HA**) &newtrn->used_instances; newtrn->used_instances= tbl; } @@ -49,6 +52,8 @@ static inline void _ma_set_tmp_trn_for_table(MARIA_HA *tbl, TRN *newtrn) DBUG_PRINT("info",("table: %p trn: %p -> %p", tbl, tbl->trn, newtrn)); tbl->trn= newtrn; + tbl->trn_prev= 0; + tbl->trn_next= 0; /* To avoid assert in ha_maria::close() */ } @@ -59,7 +64,36 @@ static inline void _ma_set_tmp_trn_for_table(MARIA_HA *tbl, TRN *newtrn) static inline void _ma_reset_trn_for_table(MARIA_HA *tbl) { DBUG_PRINT("info",("table: %p trn: %p -> NULL", tbl, tbl->trn)); + + /* The following is only false if tbl->trn == &dummy_transaction_object */ + if (tbl->trn_prev) + { + if (tbl->trn_next) + tbl->trn_next->trn_prev= tbl->trn_prev; + *tbl->trn_prev= tbl->trn_next; + tbl->trn_prev= 0; + tbl->trn_next= 0; + } tbl->trn= 0; } + +/* + Take over the used_instances link from a trn object + Reset the link in the trn object +*/ + +static inline void relink_trn_used_instances(MARIA_HA **used_tables, TRN *trn) +{ + if (likely(*used_tables= (MARIA_HA*) trn->used_instances)) + { + /* Check that first back link is correct */ + DBUG_ASSERT((*used_tables)->trn_prev == (MARIA_HA **)&trn->used_instances); + + /* Fix back link to point to new base for the list */ + (*used_tables)->trn_prev= used_tables; + trn->used_instances= 0; + } +} + #endif /* _ma_trnman_h */ diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index dd2e2949856..d5f2b1e80fd 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -177,6 +177,7 @@ typedef struct st_maria_state_info uint sortkey; /* sorted by this key (not used) */ uint open_count; uint changed; /* Changed since maria_chk */ + uint org_changed; /* Changed since open */ /** Birthday of the table: no record in the log before this LSN should ever be applied to the table. Updated when created, renamed, explicitly @@ -604,7 +605,7 @@ struct st_maria_handler { MARIA_SHARE *s; /* Shared between open:s */ struct st_ma_transaction *trn; /* Pointer to active transaction */ - struct st_maria_handler *trn_next; + struct st_maria_handler *trn_next,**trn_prev; MARIA_STATUS_INFO *state, state_save; MARIA_STATUS_INFO *state_start; /* State at start of transaction */ MARIA_USED_TABLES *used_tables; diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index 551732d8ba3..9a9c78f66d4 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -261,7 +261,7 @@ static void usage(void) puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); puts("and you are welcome to modify and redistribute it under the GPL license\n"); - puts("Display and apply log records from a Aria transaction log"); + puts("Display or apply log records from a Aria transaction log"); puts("found in the current directory (for now)"); #ifndef IDENTICAL_PAGES_AFTER_RECOVERY puts("\nNote: Aria is compiled without -DIDENTICAL_PAGES_AFTER_RECOVERY\n" @@ -269,8 +269,8 @@ static void usage(void) "files created during normal execution. This should be ok, except for\n" "test scripts that tries to compare files before and after recovery."); #endif - printf("\nUsage: %s OPTIONS\n", my_progname_short); - puts("You need to use one of -d or -a"); + printf("\nUsage: %s OPTIONS [-d | -a] -h `aria_log_directory`\n", + my_progname_short); my_print_help(my_long_options); print_defaults("my", load_default_groups); my_print_variables(my_long_options); diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 5b3c9f0287a..3c5ce831f95 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -413,6 +413,7 @@ my_bool trnman_end_trn(TRN *trn, my_bool commit) /* if a rollback, all UNDO records should have been executed */ DBUG_ASSERT(commit || trn->undo_lsn == 0); DBUG_ASSERT(trn != &dummy_transaction_object); + DBUG_ASSERT(trn->locked_tables == 0 && trn->used_instances == 0); DBUG_PRINT("info", ("mysql_mutex_lock LOCK_trn_list")); mysql_mutex_lock(&LOCK_trn_list); @@ -529,6 +530,8 @@ static void trnman_free_trn(TRN *trn) */ union { TRN *trn; void *v; } tmp; + DBUG_ASSERT(trn != &dummy_transaction_object); + mysql_mutex_lock(&trn->state_lock); trn->short_id= 0; mysql_mutex_unlock(&trn->state_lock); diff --git a/storage/maria/unittest/ma_test_all-t b/storage/maria/unittest/ma_test_all-t index 18b26a7bd45..8858649fb5d 100755 --- a/storage/maria/unittest/ma_test_all-t +++ b/storage/maria/unittest/ma_test_all-t @@ -749,9 +749,10 @@ Options --help Show this help and exit. --abort-on-error Abort at once in case of error. --number-of-tests Print the total number of tests and exit. ---run-tests=... Test number(s) that should be run. You can give just - one number or a range. For example 45..89. To run a specific - test alone, for example test 215, use --run-tests=215..215 +--run-tests=... Test range that should be run. You can give just + one number, to start tests from this test, or a range. + For example 45..89. To run a specific test alone, + for example test 215, use --run-tests=215..215 Use this option with caution, because some of the tests might depend on previous ones. --start-from=... Alias for --run-tests diff --git a/storage/maria/unittest/ma_test_recovery.expected b/storage/maria/unittest/ma_test_recovery.expected index 6aaff86e6cf..38e8e4d8e93 100644 --- a/storage/maria/unittest/ma_test_recovery.expected +++ b/storage/maria/unittest/ma_test_recovery.expected @@ -70,7 +70,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -79,7 +79,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -88,7 +88,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -99,7 +99,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -108,7 +108,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -117,7 +117,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -167,7 +167,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -176,7 +176,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -185,7 +185,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -196,7 +196,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -205,7 +205,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -214,7 +214,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -264,7 +264,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -273,7 +273,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -282,7 +282,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -293,7 +293,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -302,7 +302,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -311,7 +311,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -361,7 +361,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -370,7 +370,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -379,7 +379,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -390,7 +390,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -399,7 +399,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -408,7 +408,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -458,7 +458,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -467,7 +467,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -476,7 +476,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -487,7 +487,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -496,7 +496,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -505,7 +505,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -555,7 +555,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -564,7 +564,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -573,7 +573,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -584,7 +584,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -593,7 +593,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -602,7 +602,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -652,7 +652,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -661,7 +661,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -670,7 +670,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -681,7 +681,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -690,7 +690,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -699,7 +699,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -749,7 +749,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -758,7 +758,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -767,7 +767,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -778,7 +778,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -787,7 +787,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -796,7 +796,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -846,7 +846,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -855,7 +855,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -864,7 +864,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -875,7 +875,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -884,7 +884,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -893,7 +893,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -943,7 +943,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -952,7 +952,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -961,7 +961,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -972,7 +972,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -981,7 +981,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -990,7 +990,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1040,7 +1040,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1049,7 +1049,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1058,7 +1058,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1069,7 +1069,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1078,7 +1078,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1087,7 +1087,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1137,7 +1137,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1146,7 +1146,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1155,7 +1155,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1166,7 +1166,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1175,7 +1175,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1184,7 +1184,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1234,7 +1234,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1243,7 +1243,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1252,7 +1252,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1263,7 +1263,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1272,7 +1272,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1281,7 +1281,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1331,7 +1331,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1340,7 +1340,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1349,7 +1349,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1360,7 +1360,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1369,7 +1369,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1378,7 +1378,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1428,7 +1428,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1437,7 +1437,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1446,7 +1446,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1457,7 +1457,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1466,7 +1466,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1475,7 +1475,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1525,7 +1525,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1534,7 +1534,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1543,7 +1543,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1554,7 +1554,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1563,7 +1563,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= @@ -1572,7 +1572,7 @@ applying log Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 7c7 -< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable +< Status: checked,analyzed,optimized keys,sorted index pages,zerofilled --- > Status: changed ========DIFF END======= diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp index a80776fe5f5..7a41442d4ba 100644 --- a/storage/mroonga/ha_mroonga.cpp +++ b/storage/mroonga/ha_mroonga.cpp @@ -193,7 +193,7 @@ static mysql_mutex_t *mrn_LOCK_open; #if MYSQL_VERSION_ID >= 50706 && !defined(MRN_MARIADB_P) # define MRN_LEX_GET_TABLE_LIST(lex) (lex)->select_lex->table_list.first #else -# define MRN_LEX_GET_TABLE_LIST(lex) (lex)->select_lex.table_list.first +# define MRN_LEX_GET_TABLE_LIST(lex) (lex)->first_select_lex()->table_list.first #endif #if MYSQL_VERSION_ID >= 50706 && !defined(MRN_MARIADB_P) @@ -10576,7 +10576,7 @@ int ha_mroonga::generic_store_bulk_time(Field *field, grn_obj *buf) bool truncated = false; Field_time *time_field = (Field_time *)field; MYSQL_TIME mysql_time; - time_field->get_time(&mysql_time); + time_field->get_date(&mysql_time, Time::Options(current_thd)); mrn::TimeConverter time_converter; long long int time = time_converter.mysql_time_to_grn_time(&mysql_time, &truncated); @@ -10596,7 +10596,7 @@ int ha_mroonga::generic_store_bulk_datetime(Field *field, grn_obj *buf) bool truncated = false; Field_datetime *datetime_field = (Field_datetime *)field; MYSQL_TIME mysql_time; - datetime_field->get_time(&mysql_time); + datetime_field->get_date(&mysql_time, Time::Options(current_thd)); mrn::TimeConverter time_converter; long long int time = time_converter.mysql_time_to_grn_time(&mysql_time, &truncated); @@ -10657,7 +10657,7 @@ int ha_mroonga::generic_store_bulk_datetime2(Field *field, grn_obj *buf) bool truncated = false; Field_datetimef *datetimef_field = (Field_datetimef *)field; MYSQL_TIME mysql_time; - datetimef_field->get_time(&mysql_time); + datetimef_field->get_date(&mysql_time, Time::Options(current_thd)); mrn::TimeConverter time_converter; long long int time = time_converter.mysql_time_to_grn_time(&mysql_time, &truncated); @@ -10682,7 +10682,7 @@ int ha_mroonga::generic_store_bulk_time2(Field *field, grn_obj *buf) int error = 0; bool truncated = false; MYSQL_TIME mysql_time; - field->get_time(&mysql_time); + field->get_date(&mysql_time, Time::Options(current_thd)); mrn::TimeConverter time_converter; long long int time = time_converter.mysql_time_to_grn_time(&mysql_time, &truncated); @@ -10707,7 +10707,7 @@ int ha_mroonga::generic_store_bulk_new_date(Field *field, grn_obj *buf) bool truncated = false; Field_newdate *newdate_field = (Field_newdate *)field; MYSQL_TIME mysql_date; - newdate_field->get_time(&mysql_date); + newdate_field->get_date(&mysql_date, Time::Options(current_thd)); mrn::TimeConverter time_converter; long long int time = time_converter.mysql_time_to_grn_time(&mysql_date, &truncated); @@ -11617,14 +11617,14 @@ int ha_mroonga::storage_encode_key_timestamp(Field *field, const uchar *key, } else { Field_timestamp_hires *timestamp_hires_field = (Field_timestamp_hires *)field; - uint fuzzy_date = 0; uchar *ptr_backup = field->ptr; uchar *null_ptr_backup = field->null_ptr; TABLE *table_backup = field->table; field->ptr = (uchar *)key; field->null_ptr = (uchar *)(key - 1); field->table = table; - timestamp_hires_field->get_date(&mysql_time, fuzzy_date); + Temporal::Options opt(TIME_CONV_NONE, current_thd); + timestamp_hires_field->get_date(&mysql_time, opt); field->ptr = ptr_backup; field->null_ptr = null_ptr_backup; field->table = table_backup; @@ -11675,12 +11675,12 @@ int ha_mroonga::storage_encode_key_time(Field *field, const uchar *key, mysql_time.time_type = MYSQL_TIMESTAMP_TIME; } else { Field_time_hires *time_hires_field = (Field_time_hires *)field; - uint fuzzy_date = 0; uchar *ptr_backup = field->ptr; uchar *null_ptr_backup = field->null_ptr; field->ptr = (uchar *)key; field->null_ptr = (uchar *)(key - 1); - time_hires_field->get_date(&mysql_time, fuzzy_date); + Temporal::Options opt(TIME_CONV_NONE, current_thd); + time_hires_field->get_date(&mysql_time, opt); field->ptr = ptr_backup; field->null_ptr = null_ptr_backup; } @@ -11749,12 +11749,12 @@ int ha_mroonga::storage_encode_key_datetime(Field *field, const uchar *key, if (field->decimals() > 0) { Field_datetime_hires *datetime_hires_field = (Field_datetime_hires *)field; MYSQL_TIME mysql_time; - uint fuzzy_date = 0; uchar *ptr_backup = field->ptr; uchar *null_ptr_backup = field->null_ptr; field->ptr = (uchar *)key; field->null_ptr = (uchar *)(key - 1); - datetime_hires_field->get_date(&mysql_time, fuzzy_date); + Temporal::Options opt(TIME_CONV_NONE, current_thd); + datetime_hires_field->get_date(&mysql_time, opt); field->ptr = ptr_backup; field->null_ptr = null_ptr_backup; mrn::TimeConverter time_converter; diff --git a/storage/mroonga/lib/mrn_condition_converter.cpp b/storage/mroonga/lib/mrn_condition_converter.cpp index 579292a7f89..68ffa073f4f 100644 --- a/storage/mroonga/lib/mrn_condition_converter.cpp +++ b/storage/mroonga/lib/mrn_condition_converter.cpp @@ -179,17 +179,17 @@ namespace mrn { NormalizedType normalized_type = normalize_field_type(field_type); switch (normalized_type) { case STRING_TYPE: - if (value_item->type() == Item::STRING_ITEM && + if (value_item->is_of_type(Item::CONST_ITEM, STRING_RESULT) && func_type == Item_func::EQ_FUNC) { convertable = have_index(field_item, GRN_OP_EQUAL); } break; case INT_TYPE: if (field_type == MYSQL_TYPE_ENUM) { - convertable = (value_item->type() == Item::STRING_ITEM || - value_item->type() == Item::INT_ITEM); + convertable = value_item->is_of_type(Item::CONST_ITEM, STRING_RESULT) || + value_item->is_of_type(Item::CONST_ITEM, INT_RESULT); } else { - convertable = value_item->type() == Item::INT_ITEM; + convertable = value_item->is_of_type(Item::CONST_ITEM, INT_RESULT); } break; case TIME_TYPE: @@ -215,14 +215,14 @@ namespace mrn { NormalizedType normalized_type = normalize_field_type(field_type); switch (normalized_type) { case STRING_TYPE: - if (min_item->type() == Item::STRING_ITEM && - max_item->type() == Item::STRING_ITEM) { + if (min_item->is_of_type(Item::CONST_ITEM, STRING_RESULT) && + max_item->is_of_type(Item::CONST_ITEM, STRING_RESULT)) { convertable = have_index(field_item, GRN_OP_LESS); } break; case INT_TYPE: - if (min_item->type() == Item::INT_ITEM && - max_item->type() == Item::INT_ITEM) { + if (min_item->is_of_type(Item::CONST_ITEM, INT_RESULT) && + max_item->is_of_type(Item::CONST_ITEM, INT_RESULT)) { convertable = have_index(field_item, GRN_OP_LESS); } break; @@ -258,8 +258,11 @@ namespace mrn { Item *real_value_item = value_item->real_item(); switch (field_item->field->type()) { case MYSQL_TYPE_TIME: - error = real_value_item->get_time(mysql_time); + { + THD *thd= current_thd; + error= real_value_item->get_date(thd, mysql_time, Time::Options(thd)); break; + } case MYSQL_TYPE_YEAR: mysql_time->year = static_cast<int>(value_item->val_int()); mysql_time->month = 1; @@ -273,9 +276,13 @@ namespace mrn { error = false; break; default: - error = real_value_item->get_date(mysql_time, TIME_FUZZY_DATE); + { + THD *thd= current_thd; + Datetime::Options opt(TIME_FUZZY_DATES, thd); + error = real_value_item->get_date(thd, mysql_time, opt); break; } + } DBUG_RETURN(error); } @@ -587,7 +594,7 @@ namespace mrn { case INT_TYPE: grn_obj_reinit(ctx_, &value_, GRN_DB_INT64, 0); if (field_type == MYSQL_TYPE_ENUM) { - if (const_item->type() == Item::STRING_ITEM) { + if (const_item->is_of_type(Item::CONST_ITEM, STRING_RESULT)) { String *string; string = const_item->val_str(NULL); Field_enum *enum_field = static_cast<Field_enum *>(field_item->field); diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index d9b34c86095..9ab7d156251 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -96,6 +96,10 @@ static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG, "and NULLS_IGNORED", NULL, NULL, MI_STATS_METHOD_NULLS_NOT_EQUAL, &myisam_stats_method_typelib); +const char *MI_CHECK_INFO= "info"; +const char *MI_CHECK_WARNING= "warning"; +const char *MI_CHECK_ERROR= "error"; + #ifndef DBUG_OFF /** Causes the thread to wait in a spin lock for a query kill signal. @@ -130,6 +134,20 @@ static handler *myisam_create_handler(handlerton *hton, return new (mem_root) ha_myisam(hton, table); } + +static void mi_check_print(HA_CHECK *param, const char* msg_type, + const char *msgbuf) +{ + if (msg_type == MI_CHECK_INFO) + sql_print_information("%s.%s: %s", param->db_name, param->table_name, + msgbuf); + else if (msg_type == MI_CHECK_WARNING) + sql_print_warning("%s.%s: %s", param->db_name, param->table_name, + msgbuf); + else + sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); +} + // collect errors printed by mi_check routines static void mi_check_print_msg(HA_CHECK *param, const char* msg_type, @@ -151,16 +169,21 @@ static void mi_check_print_msg(HA_CHECK *param, const char* msg_type, if (!thd->vio_ok()) { - sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); + mi_check_print(param, msg_type, msgbuf); return; } if (param->testflag & (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR)) { - my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME)); + myf flag= 0; + if (msg_type == MI_CHECK_INFO) + flag= ME_NOTE; + else if (msg_type == MI_CHECK_WARNING) + flag= ME_WARNING; + my_message(ER_NOT_KEYFILE, msgbuf, MYF(flag)); if (thd->variables.log_warnings > 2 && ! thd->log_all_errors) - sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); + mi_check_print(param, msg_type, msgbuf); return; } length=(uint) (strxmov(name, param->db_name,".",param->table_name,NullS) - @@ -185,7 +208,7 @@ static void mi_check_print_msg(HA_CHECK *param, const char* msg_type, sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n", msgbuf); else if (thd->variables.log_warnings > 2) - sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); + mi_check_print(param, msg_type, msgbuf); if (param->need_print_msg_lock) mysql_mutex_unlock(¶m->print_msg_mutex); @@ -592,7 +615,7 @@ void mi_check_print_error(HA_CHECK *param, const char *fmt,...) return; va_list args; va_start(args, fmt); - mi_check_print_msg(param, "error", fmt, args); + mi_check_print_msg(param, MI_CHECK_ERROR, fmt, args); va_end(args); } @@ -600,7 +623,7 @@ void mi_check_print_info(HA_CHECK *param, const char *fmt,...) { va_list args; va_start(args, fmt); - mi_check_print_msg(param, "info", fmt, args); + mi_check_print_msg(param, MI_CHECK_INFO, fmt, args); param->note_printed= 1; va_end(args); } @@ -611,7 +634,7 @@ void mi_check_print_warning(HA_CHECK *param, const char *fmt,...) param->out_flag|= O_DATA_LOST; va_list args; va_start(args, fmt); - mi_check_print_msg(param, "warning", fmt, args); + mi_check_print_msg(param, MI_CHECK_WARNING, fmt, args); va_end(args); } @@ -946,7 +969,6 @@ void ha_myisam::setup_vcols_for_repair(HA_CHECK *param) } param->fix_record= compute_vcols; table->use_all_columns(); - table->vcol_set= &table->s->all_set; } void ha_myisam::restore_vcos_after_repair() @@ -1037,6 +1059,15 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) mysql_mutex_unlock(&share->intern_lock); info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | HA_STATUS_CONST); + /* + Write a 'table is ok' message to error log if table is ok and + we have written to error log that table was getting checked + */ + if (!error && !(table->db_stat & HA_READ_ONLY) && + !mi_is_crashed(file) && thd->error_printed_to_log && + (param->warning_printed || param->error_printed || + param->note_printed)) + mi_check_print_info(param, "Table is fixed"); } } else if (!mi_is_crashed(file) && !thd->killed) @@ -1811,7 +1842,7 @@ bool ha_myisam::check_and_repair(THD *thd) sql_print_information("Making backup of index file %s with extension '%s'", file->s->index_file_name, buff); mi_make_backup_of_index(file, check_opt.start_time, - MYF(MY_WME | ME_JUST_WARNING)); + MYF(MY_WME | ME_WARNING)); } check_opt.flags= (((myisam_recover_options & diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index 5f9b5414174..f6929438ac0 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -4782,7 +4782,7 @@ static int replace_data_file(HA_CHECK *param, MI_INFO *info, File new_file) my_create_backup_name(buff, "", param->backup_time); my_printf_error(ER_GET_ERRMSG, "Making backup of data file %s with extension '%s'", - MYF(ME_JUST_INFO | ME_NOREFRESH), share->data_file_name, + MYF(ME_NOTE | ME_ERROR_LOG), share->data_file_name, buff); } diff --git a/storage/myisam/mi_info.c b/storage/myisam/mi_info.c index 3b9288eeb83..33ff6abb32d 100644 --- a/storage/myisam/mi_info.c +++ b/storage/myisam/mi_info.c @@ -127,7 +127,7 @@ void mi_report_error(int errcode, const char *file_name) if ((lgt= strlen(file_name)) > 64) file_name+= lgt - 64; - my_error(errcode, MYF(ME_NOREFRESH), file_name); + my_error(errcode, MYF(ME_ERROR_LOG), file_name); DBUG_VOID_RETURN; } diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c index 4ea1602bec3..f69dd0196b3 100644 --- a/storage/myisam/myisampack.c +++ b/storage/myisam/myisampack.c @@ -2148,7 +2148,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) */ if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2))) { - my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_FATALERROR), + my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_FATAL), sizeof(uint)*length*2); return 0; } diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c index 533f2cd2aa6..aef3c9b42a8 100644 --- a/storage/myisam/sort.c +++ b/storage/myisam/sort.c @@ -28,12 +28,10 @@ /* static variables */ -#undef MYF_RW #undef DISK_BUFFER_SIZE #define MERGEBUFF 15 #define MERGEBUFF2 31 -#define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL) #define DISK_BUFFER_SIZE (IO_SIZE*128) /* How many keys we can keep in memory */ diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 98ce1180d48..38b33a41565 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -7290,7 +7290,7 @@ int ha_rocksdb::create_cfs( tbl_def_arg->full_tablename().c_str(), table_arg->key_info[i].key_part[part].field->field_name.str); - my_error(ER_INTERNAL_ERROR, MYF(ME_JUST_WARNING), buf); + my_error(ER_INTERNAL_ERROR, MYF(ME_WARNING), buf); } } } @@ -7805,14 +7805,14 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, // The `rocksdb_datadir` setting should be used to configure RocksDB data // directory. print_error(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED, - MYF(ME_JUST_WARNING)); + MYF(ME_WARNING)); DBUG_RETURN(HA_WRONG_CREATE_OPTION); } if (create_info->index_file_name) { // Similar check for INDEX DIRECTORY as well. print_error(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED, - MYF(ME_JUST_WARNING)); + MYF(ME_WARNING)); DBUG_RETURN(HA_WRONG_CREATE_OPTION); } diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result index a14ffdec2e3..6bd6cea97de 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result @@ -92,5 +92,7 @@ disconnect con2; disconnect con1; disconnect con0; SELECT * FROM t1 ORDER BY pk INTO OUTFILE <output_file>; +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead All pk values matched their expected values DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result index 1f4d1a641a2..c4a1c5f4668 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result @@ -20,6 +20,8 @@ END IF; SET id1_cond = id1_cond + 1; END WHILE; END// +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead "Skipping bloom filter" SET session rocksdb_skip_bloom_filter_on_read=1; CALL select_test(); diff --git a/storage/rocksdb/mysql-test/rocksdb/r/misc.result b/storage/rocksdb/mysql-test/rocksdb/r/misc.result index 6087928b80f..f19f7f82ab7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/misc.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/misc.result @@ -56,8 +56,6 @@ help_relation help_keyword_id NULL NULL help_relation help_topic_id NULL NULL help_topic help_topic_id NULL NULL help_topic name NULL NULL -host Db NULL NULL -host Host NULL NULL index_stats db_name NULL NULL index_stats index_name NULL NULL index_stats prefix_arity NULL NULL diff --git a/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result b/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result index 3a631d2925b..6ea13872033 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result @@ -1,63 +1,123 @@ Warnings: Note 1051 Unknown table 'test.ti_nk' +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead skip_merge_sort true DROP TABLE ti_nk; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/select.result b/storage/rocksdb/mysql-test/rocksdb/r/select.result index 22a6ca9bc87..2bb2bd636dc 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/select.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/select.result @@ -114,6 +114,8 @@ SELECT t1.a, t2.b FROM t2, t1 WHERE t1.a = t2.a ORDER BY t2.b, t1.a INTO OUTFILE '<DATADIR>/select.out' CHARACTER SET utf8 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY ''''; +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead 200,'bar' 200,'bar' 100,'foobar' @@ -125,8 +127,12 @@ INTO DUMPFILE '<DATADIR>/select.dump'; ERROR 42000: Result consisted of more than one row SELECT t1.*, t2.* FROM t1, t2 ORDER BY t2.b, t1.a, t2.a, t1.b, t1.pk, t2.pk LIMIT 1 INTO DUMPFILE '<DATADIR>/select.dump'; +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead 1z2200bar3 SELECT MIN(a), MAX(a) FROM t1 INTO @min, @max; +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead SELECT @min, @max; @min @max 1 200 diff --git a/storage/sphinx/ha_sphinx.cc b/storage/sphinx/ha_sphinx.cc index 90409213843..86332a741c6 100644 --- a/storage/sphinx/ha_sphinx.cc +++ b/storage/sphinx/ha_sphinx.cc @@ -2746,7 +2746,9 @@ const Item * ha_sphinx::cond_push ( const Item *cond ) if ( !m_pShare->m_bSphinxQL ) { // on non-QL tables, intercept query=value condition for SELECT - if (!( args[0]->type()==Item::FIELD_ITEM && args[1]->type()==Item::STRING_ITEM )) + if (!( args[0]->type()==Item::FIELD_ITEM && + args[1]->is_of_type(Item::CONST_ITEM, + STRING_RESULT))) break; Item_field * pField = (Item_field *) args[0]; @@ -2762,7 +2764,9 @@ const Item * ha_sphinx::cond_push ( const Item *cond ) } else { - if (!( args[0]->type()==Item::FIELD_ITEM && args[1]->type()==Item::INT_ITEM )) + if (!( args[0]->type()==Item::FIELD_ITEM && + args[1]->is_of_type(Item::CONST_ITEM, + INT_RESULT))) break; // on QL tables, intercept id=value condition for DELETE diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc index bd302f155d2..92b7d2c2ac1 100644 --- a/storage/spider/ha_spider.cc +++ b/storage/spider/ha_spider.cc @@ -9392,8 +9392,8 @@ ulonglong ha_spider::table_flags() const HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_PARTIAL_COLUMN_READ | -#ifdef HA_CMP_REF_IS_EXPENSIVE - HA_CMP_REF_IS_EXPENSIVE | +#ifdef HA_SLOW_CMP_REF + HA_SLOW_CMP_REF | #endif #ifdef SPIDER_ENGINE_CONDITION_PUSHDOWN_IS_ALWAYS_ON HA_CAN_TABLE_CONDITION_PUSHDOWN | @@ -13526,7 +13526,7 @@ void ha_spider::check_pre_call( ) || ( (skip_parallel_search & 2) && - select_lex && select_lex->sql_cache == SELECT_LEX::SQL_NO_CACHE // for mysqldump + thd->lex->sql_cache == LEX::SQL_NO_CACHE // for mysqldump ) ) { use_pre_call = FALSE; diff --git a/storage/spider/mysql-test/spider/include/init_spider.inc b/storage/spider/mysql-test/spider/include/init_spider.inc index c4d171d418e..1da1ec970b5 100644 --- a/storage/spider/mysql-test/spider/include/init_spider.inc +++ b/storage/spider/mysql-test/spider/include/init_spider.inc @@ -103,10 +103,17 @@ if (!$VERSION_COMPILE_OS_WIN) ); } +let $SERVER_NAME= + `SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(version(), '-', 2), '-', -1)`; +let $SERVER_MAJOR_VERSION= + `SELECT SUBSTRING_INDEX(version(), '.', 1)`; +let $SERVER_MINOR_VERSION= + `SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(version(), '.', 2), '.', -1)`; let $PLUGIN_VERSION= `SELECT SUBSTRING_INDEX(plugin_version, '.', 1) FROM information_schema.plugins WHERE plugin_name = 'SPIDER'`; + if (`SELECT IF($PLUGIN_VERSION = 1, 1, 0)`) { DROP TABLE IF EXISTS mysql.spider_xa; @@ -245,7 +252,16 @@ if (`SELECT IF($PLUGIN_VERSION = 2, 1, 0)`) } if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) { + let $ENGINE_NAME= + `SELECT IF (STRCMP('$SERVER_NAME', 'MariaDB') = 0, + IF ($SERVER_MAJOR_VERSION = 10, + IF ($SERVER_MINOR_VERSION < 4, 'MyISAM', + 'Aria transactional=1'), + IF ($SERVER_MAJOR_VERSION < 10, 'MyISAM', + 'Aria transactional=1')), + 'MyISAM')`; DROP TABLE IF EXISTS mysql.spider_xa; + eval CREATE TABLE mysql.spider_xa( format_id int not null default 0, gtrid_length int not null default 0, @@ -254,8 +270,9 @@ if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) status char(8) not null default '', PRIMARY KEY (data, format_id, gtrid_length), KEY idx1 (status) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; DROP TABLE IF EXISTS mysql.spider_xa_member; + eval CREATE TABLE mysql.spider_xa_member( format_id int not null default 0, gtrid_length int not null default 0, @@ -276,8 +293,9 @@ if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) default_file text default null, default_group char(64) default null, KEY idx1 (data, format_id, gtrid_length, host) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; DROP TABLE IF EXISTS mysql.spider_xa_failed_log; + eval CREATE TABLE mysql.spider_xa_failed_log( format_id int not null default 0, gtrid_length int not null default 0, @@ -301,8 +319,9 @@ if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) status char(8) not null default '', failed_time timestamp not null default current_timestamp, key idx1 (data, format_id, gtrid_length, host) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; DROP TABLE IF EXISTS mysql.spider_tables; + eval CREATE TABLE mysql.spider_tables( db_name char(64) not null default '', table_name char(199) not null default '', @@ -332,8 +351,9 @@ if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) PRIMARY KEY (db_name, table_name, link_id), KEY idx1 (priority), UNIQUE KEY uidx1 (db_name, table_name, static_link_id) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; DROP TABLE IF EXISTS mysql.spider_link_mon_servers; + eval CREATE TABLE mysql.spider_link_mon_servers( db_name char(64) not null default '', table_name char(199) not null default '', @@ -355,15 +375,17 @@ if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) default_file text default null, default_group char(64) default null, PRIMARY KEY (db_name, table_name, link_id, sid) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; DROP TABLE IF EXISTS mysql.spider_link_failed_log; + eval CREATE TABLE mysql.spider_link_failed_log( db_name char(64) not null default '', table_name char(199) not null default '', link_id char(64) not null default '', failed_time timestamp not null default current_timestamp - ) ENGINE=MYISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; DROP TABLE IF EXISTS mysql.spider_table_position_for_recovery; + eval CREATE TABLE mysql.spider_table_position_for_recovery( db_name char(64) not null default '', table_name char(199) not null default '', @@ -373,8 +395,9 @@ if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) position text, gtid text, primary key (db_name, table_name, failed_link_id, source_link_id) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; DROP TABLE IF EXISTS mysql.spider_table_sts; + eval CREATE TABLE mysql.spider_table_sts( db_name char(64) not null default '', table_name char(199) not null default '', @@ -387,15 +410,16 @@ if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) create_time datetime not null default '0000-00-00 00:00:00', update_time datetime not null default '0000-00-00 00:00:00', primary key (db_name, table_name) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; DROP TABLE IF EXISTS mysql.spider_table_crd; + eval CREATE TABLE mysql.spider_table_crd( db_name char(64) not null default '', table_name char(199) not null default '', key_seq int unsigned not null default 0, cardinality bigint not null default 0, primary key (db_name, table_name, key_seq) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; } SET spider_internal_sql_log_off= 0; diff --git a/storage/spider/mysql-test/spider/r/show_system_tables.result b/storage/spider/mysql-test/spider/r/show_system_tables.result new file mode 100644 index 00000000000..67411862e00 --- /dev/null +++ b/storage/spider/mysql-test/spider/r/show_system_tables.result @@ -0,0 +1,37 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +child3_1 +child3_2 +child3_3 + +Show system tables on the Spider node +connection master_1; +SELECT table_name, engine FROM information_schema.tables +WHERE table_schema = 'mysql' AND table_name like '%spider_%'; +table_name engine +spider_link_failed_log Aria +spider_link_mon_servers Aria +spider_table_crd Aria +spider_table_position_for_recovery Aria +spider_table_sts Aria +spider_tables Aria +spider_xa Aria +spider_xa_failed_log Aria +spider_xa_member Aria + +deinit +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +child3_1 +child3_2 +child3_3 + +end of test diff --git a/storage/spider/mysql-test/spider/t/show_system_tables.test b/storage/spider/mysql-test/spider/t/show_system_tables.test new file mode 100644 index 00000000000..ae8259b01bc --- /dev/null +++ b/storage/spider/mysql-test/spider/t/show_system_tables.test @@ -0,0 +1,26 @@ +--disable_warnings +--disable_query_log +--disable_result_log +--source test_init.inc +--enable_result_log +--enable_query_log +--enable_warnings + +--echo +--echo Show system tables on the Spider node +--connection master_1 +--sorted_result +SELECT table_name, engine FROM information_schema.tables + WHERE table_schema = 'mysql' AND table_name like '%spider_%'; + +--echo +--echo deinit +--disable_warnings +--disable_query_log +--disable_result_log +--source test_deinit.inc +--enable_result_log +--enable_query_log +--enable_warnings +--echo +--echo end of test diff --git a/storage/spider/scripts/install_spider.sql b/storage/spider/scripts/install_spider.sql index c5a86caa219..9b2e5c480ab 100644 --- a/storage/spider/scripts/install_spider.sql +++ b/storage/spider/scripts/install_spider.sql @@ -400,6 +400,71 @@ begin alter table mysql.spider_table_crd modify table_name char(199) not null default ''; end if; + + -- Fix for MariaDB 10.4: Crash-Safe system tables + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_link_failed_log'; + if @engine_name != 'Aria' then + alter table mysql.spider_link_failed_log + engine=Aria transactional=1; + end if; + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_link_mon_servers'; + if @engine_name != 'Aria' then + alter table mysql.spider_link_mon_servers + engine=Aria transactional=1; + end if; + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_table_crd'; + if @engine_name != 'Aria' then + alter table mysql.spider_table_crd + engine=Aria transactional=1; + end if; + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_table_position_for_recovery'; + if @engine_name != 'Aria' then + alter table mysql.spider_table_position_for_recovery + engine=Aria transactional=1; + end if; + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_table_sts'; + if @engine_name != 'Aria' then + alter table mysql.spider_table_sts + engine=Aria transactional=1; + end if; + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_tables'; + if @engine_name != 'Aria' then + alter table mysql.spider_tables + engine=Aria transactional=1; + end if; + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_xa'; + if @engine_name != 'Aria' then + alter table mysql.spider_xa + engine=Aria transactional=1; + end if; + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_xa_failed_log'; + if @engine_name != 'Aria' then + alter table mysql.spider_xa_failed_log + engine=Aria transactional=1; + end if; + select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES + where TABLE_SCHEMA = 'mysql' + AND TABLE_NAME = 'spider_xa_member'; + if @engine_name != 'Aria' then + alter table mysql.spider_xa_member + engine=Aria transactional=1; + end if; end;// delimiter ; call mysql.spider_fix_system_tables; diff --git a/storage/spider/spd_db_conn.cc b/storage/spider/spd_db_conn.cc index ac5701a8274..7ac1b050d76 100644 --- a/storage/spider/spd_db_conn.cc +++ b/storage/spider/spd_db_conn.cc @@ -8560,16 +8560,6 @@ int spider_db_print_item_type(Item *item, Field *field, ha_spider *spider, case Item::ROW_ITEM: DBUG_RETURN(spider_db_open_item_row((Item_row *) item, spider, str, alias, alias_length, dbton_id, use_fields, fields)); - case Item::STRING_ITEM: - DBUG_RETURN(spider_db_open_item_string(item, field, spider, str, - alias, alias_length, dbton_id, - use_fields, fields)); - case Item::INT_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: - DBUG_RETURN(spider_db_open_item_int(item, field, spider, str, - alias, alias_length, dbton_id, - use_fields, fields)); case Item::CACHE_ITEM: DBUG_RETURN(spider_db_open_item_cache((Item_cache *) item, field, spider, str, alias, alias_length, @@ -8586,6 +8576,26 @@ int spider_db_print_item_type(Item *item, Field *field, ha_spider *spider, case Item::EXPR_CACHE_ITEM: #endif DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM); + case Item::CONST_ITEM: + { + switch (item->cmp_type()) + { + case STRING_RESULT: + case TIME_RESULT: + DBUG_RETURN(spider_db_open_item_string(item, field, spider, str, + alias, alias_length, dbton_id, + use_fields, fields)); + case INT_RESULT: + case REAL_RESULT: + case DECIMAL_RESULT: + DBUG_RETURN(spider_db_open_item_int(item, field, spider, str, + alias, alias_length, dbton_id, + use_fields, fields)); + default: + DBUG_ASSERT(FALSE); + /* Fall through */ + } + } default: THD *thd = spider->trx->thd; SPIDER_SHARE *share = spider->share; @@ -9102,7 +9112,6 @@ int spider_db_open_item_int(Item *item, Field *field, ha_spider *spider, { THD *thd = NULL; TABLE *table; - bool print_quoted_string; my_bitmap_map *saved_map; Time_zone *saved_time_zone; char tmp_buf[MAX_FIELD_WIDTH]; @@ -9131,31 +9140,17 @@ int spider_db_open_item_int(Item *item, Field *field, ha_spider *spider, item->save_in_field(field, FALSE); saved_time_zone = thd->variables.time_zone; thd->variables.time_zone = UTC; - print_quoted_string = TRUE; - } - else - { -#ifdef SPIDER_ITEM_HAS_CMP_TYPE - DBUG_PRINT("info", ("spider cmp_type=%u", item->cmp_type())); - if (item->cmp_type() == TIME_RESULT) - print_quoted_string = TRUE; - else -#endif - print_quoted_string = FALSE; } - if (print_quoted_string) + if (thd) { - if (thd) - { - /* Retrieve the stored value converted to UTC */ - tmp_str2 = field->val_str(&str_value); + /* Retrieve the stored value converted to UTC */ + tmp_str2 = field->val_str(&str_value); - if (!tmp_str2) - { - error_num = HA_ERR_OUT_OF_MEM; - goto error; - } + if (!tmp_str2) + { + error_num = HA_ERR_OUT_OF_MEM; + goto error; } if (str->reserve(SPIDER_SQL_VALUE_QUOTE_LEN * 2 + tmp_str2->length())) diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc index ada9aa20821..3af7c424c3d 100644 --- a/storage/spider/spd_db_mysql.cc +++ b/storage/spider/spd_db_mysql.cc @@ -618,6 +618,15 @@ SPIDER_DB_ROW *spider_db_mysql_result::fetch_row_from_tmp_table( DBUG_RETURN((SPIDER_DB_ROW *) &row); } + +static my_bool str_to_datetime(const char *str, size_t length, + MYSQL_TIME *l_time, + ulonglong flags, MYSQL_TIME_STATUS *status) +{ + return str_to_datetime_or_date(str, length, l_time, flags, status); +} + + int spider_db_mysql_result::fetch_table_status( int mode, ha_rows &records, @@ -7276,16 +7285,16 @@ int spider_mysql_handler::append_select( if (result_list->lock_type != F_WRLCK && spider->lock_mode < 1) { /* no lock */ - st_select_lex *select_lex = &spider->trx->thd->lex->select_lex; + LEX *lex = spider->trx->thd->lex; if ( - select_lex->sql_cache == SELECT_LEX::SQL_CACHE && + lex->sql_cache == LEX::SQL_CACHE && (spider->share->query_cache_sync & 1) ) { if (str->reserve(SPIDER_SQL_SQL_CACHE_LEN)) DBUG_RETURN(HA_ERR_OUT_OF_MEM); str->q_append(SPIDER_SQL_SQL_CACHE_STR, SPIDER_SQL_SQL_CACHE_LEN); } else if ( - select_lex->sql_cache == SELECT_LEX::SQL_NO_CACHE && + lex->sql_cache == LEX::SQL_NO_CACHE && (spider->share->query_cache_sync & 2) ) { if (str->reserve(SPIDER_SQL_SQL_NO_CACHE_LEN)) diff --git a/storage/spider/spd_sys_table.cc b/storage/spider/spd_sys_table.cc index ed25e4fcf32..e64b5173b9a 100644 --- a/storage/spider/spd_sys_table.cc +++ b/storage/spider/spd_sys_table.cc @@ -2386,7 +2386,7 @@ void spider_get_sys_table_sts_info( *index_file_length = (ulonglong) table->field[4]->val_int(); *records = (ha_rows) table->field[5]->val_int(); *mean_rec_length = (ulong) table->field[6]->val_int(); - table->field[7]->get_date(&mysql_time, 0); + table->field[7]->get_date(&mysql_time, date_mode_t(0)); #ifdef MARIADB_BASE_VERSION *check_time = (time_t) my_system_gmt_sec(&mysql_time, ¬_used_long, ¬_used_uint); @@ -2394,7 +2394,7 @@ void spider_get_sys_table_sts_info( *check_time = (time_t) my_system_gmt_sec(&mysql_time, ¬_used_long, ¬_used_my_bool); #endif - table->field[8]->get_date(&mysql_time, 0); + table->field[8]->get_date(&mysql_time, date_mode_t(0)); #ifdef MARIADB_BASE_VERSION *create_time = (time_t) my_system_gmt_sec(&mysql_time, ¬_used_long, ¬_used_uint); @@ -2402,7 +2402,7 @@ void spider_get_sys_table_sts_info( *create_time = (time_t) my_system_gmt_sec(&mysql_time, ¬_used_long, ¬_used_my_bool); #endif - table->field[9]->get_date(&mysql_time, 0); + table->field[9]->get_date(&mysql_time, date_mode_t(0)); #ifdef MARIADB_BASE_VERSION *update_time = (time_t) my_system_gmt_sec(&mysql_time, ¬_used_long, ¬_used_uint); diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index 1940dd5aad9..2e3705f04ee 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -9048,8 +9048,14 @@ int spider_set_direct_limit_offset( ) DBUG_RETURN(FALSE); + /* + TODO: following comment is wrong or the check is wrong (correct + check for derived table will be something like select_lex->linkage, + if they need only top level it is better to check nested level and do + not loose UNIONS & Co + */ // must not be derived table - if (&thd->lex->select_lex != select_lex) + if (thd->lex->first_select_lex() != select_lex) DBUG_RETURN(FALSE); spider->direct_select_offset = offset_limit; diff --git a/storage/tokudb/PerconaFT/portability/toku_crash.cc b/storage/tokudb/PerconaFT/portability/toku_crash.cc index 0af85342a99..297cc29d9ca 100644 --- a/storage/tokudb/PerconaFT/portability/toku_crash.cc +++ b/storage/tokudb/PerconaFT/portability/toku_crash.cc @@ -70,7 +70,7 @@ run_gdb(pid_t parent_pid, const char *gdb_path) { "-ex", "thread apply all bt", "-ex", "thread apply all bt full", exe_buf, pid_buf, - NULL); + (char*) NULL); } static void diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result index c7450a1b9c0..aea5bab7cf3 100644 --- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result @@ -51,6 +51,10 @@ DELETE FROM test.regular_tbl WHERE id = del_count; SET del_count = del_count - 2; END WHILE; END| +Warnings: +Level Warning +Code 1287 +Message '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead CREATE PROCEDURE test.proc_bykey() BEGIN DECLARE ins_count INT DEFAULT 1000; @@ -72,6 +76,10 @@ DELETE FROM test.bykey_tbl WHERE id = del_count; SET del_count = del_count - 2; END WHILE; END| +Warnings: +Level Warning +Code 1287 +Message '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead CREATE PROCEDURE test.proc_byrange() BEGIN DECLARE ins_count INT DEFAULT 1000; @@ -93,6 +101,10 @@ DELETE FROM test.byrange_tbl WHERE id = del_count; SET del_count = del_count - 2; END WHILE; END| +Warnings: +Level Warning +Code 1287 +Message '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead CALL test.proc_norm(); SELECT count(*) as "Master regular" FROM test.regular_tbl; Master regular 500 diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result index f3ffc908504..912b05216da 100644 --- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result @@ -26,6 +26,10 @@ DELETE FROM test.regular_tbl WHERE id = del_count; SET del_count = del_count - 2; END WHILE; END| +Warnings: +Level Warning +Code 1287 +Message '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead CALL test.proc_norm(); connection slave; connection master; diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result index 65057791b48..8ada947f217 100644 --- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result @@ -485,60 +485,60 @@ SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; CREATE USER 'user_test_rpl'@'localhost' IDENTIFIED BY PASSWORD '*1111111111111111111111111111111111111111'; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl *1111111111111111111111111111111111111111 N +localhost user_test_rpl mysql_native_password *1111111111111111111111111111111111111111 N connection slave; USE test_rpl; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl *1111111111111111111111111111111111111111 N +localhost user_test_rpl mysql_native_password *1111111111111111111111111111111111111111 N connection master; ******************** GRANT ******************** GRANT SELECT ON *.* TO 'user_test_rpl'@'localhost'; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl *1111111111111111111111111111111111111111 Y +localhost user_test_rpl mysql_native_password *1111111111111111111111111111111111111111 Y connection slave; USE test_rpl; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl *1111111111111111111111111111111111111111 Y +localhost user_test_rpl mysql_native_password *1111111111111111111111111111111111111111 Y connection master; ******************** REVOKE ******************** REVOKE SELECT ON *.* FROM 'user_test_rpl'@'localhost'; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl *1111111111111111111111111111111111111111 N +localhost user_test_rpl mysql_native_password *1111111111111111111111111111111111111111 N connection slave; USE test_rpl; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl *1111111111111111111111111111111111111111 N +localhost user_test_rpl mysql_native_password *1111111111111111111111111111111111111111 N connection master; ******************** SET PASSWORD ******************** SET PASSWORD FOR 'user_test_rpl'@'localhost' = '*0000000000000000000000000000000000000000'; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl *0000000000000000000000000000000000000000 mysql_native_password *0000000000000000000000000000000000000000 N +localhost user_test_rpl mysql_native_password *0000000000000000000000000000000000000000 N connection slave; USE test_rpl; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl *0000000000000000000000000000000000000000 mysql_native_password *0000000000000000000000000000000000000000 N +localhost user_test_rpl mysql_native_password *0000000000000000000000000000000000000000 N connection master; ******************** RENAME USER ******************** RENAME USER 'user_test_rpl'@'localhost' TO 'user_test_rpl_2'@'localhost'; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl_2 *0000000000000000000000000000000000000000 mysql_native_password *0000000000000000000000000000000000000000 N +localhost user_test_rpl_2 mysql_native_password *0000000000000000000000000000000000000000 N connection slave; USE test_rpl; SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%'; host user password plugin authentication_string select_priv -localhost user_test_rpl_2 *0000000000000000000000000000000000000000 mysql_native_password *0000000000000000000000000000000000000000 N +localhost user_test_rpl_2 mysql_native_password *0000000000000000000000000000000000000000 N connection master; ******************** DROP USER ******************** @@ -679,7 +679,6 @@ DROP TRIGGER tr1; ******************** EVENTS ******************** -GRANT EVENT ON *.* TO 'root'@'localhost'; INSERT INTO t1 VALUES(1, 'test1'); CREATE EVENT e1 ON SCHEDULE EVERY '1' SECOND COMMENT 'e_second_comment' DO DELETE FROM t1; SHOW EVENTS; @@ -1101,8 +1100,6 @@ master-bin.000001 # Query # # use `test_rpl`; DELETE FROM t2 master-bin.000001 # Xid # # COMMIT /* XID */ master-bin.000001 # Gtid # # GTID #-#-# master-bin.000001 # Query # # use `test_rpl`; DROP TRIGGER tr1 -master-bin.000001 # Gtid # # GTID #-#-# -master-bin.000001 # Query # # use `test_rpl`; GRANT EVENT ON *.* TO 'root'@'localhost' master-bin.000001 # Gtid # # BEGIN GTID #-#-# master-bin.000001 # Query # # use `test_rpl`; INSERT INTO t1 VALUES(1, 'test1') master-bin.000001 # Xid # # COMMIT /* XID */ diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result index 593f177569f..e0b6b615bb6 100644 --- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result @@ -24,6 +24,10 @@ unix_timestamp()-@tstart <= 10 connection slave; connection master; include/diff_tables.inc [master:test.t, slave:test.t] +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead +Warnings: +Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead connection master; drop table if exists t; connection slave; diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result index a80e1664663..2a0ee4fa3e1 100644 --- a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result +++ b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result @@ -1,7 +1,7 @@ drop table if exists t; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=innodb; insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); explain select x,id from t force index (x) where x=0 and id=0; diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result index 96d681407fe..4580cc96404 100644 --- a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result +++ b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result @@ -1,7 +1,7 @@ drop table if exists t; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=tokudb; insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); explain select x,id from t force index (x) where x=0 and id=0; diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result index 43737c7753e..fb998e3a6ad 100644 --- a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result +++ b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result @@ -1,7 +1,7 @@ drop table if exists t; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=innodb; insert into t values (0,0,0,0),(0,1,0,1); explain select c,a,b from t where c=0 and a=0 and b=1; diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result index 1dcb1ee1b8b..9d9fb4ca079 100644 --- a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result +++ b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result @@ -1,7 +1,7 @@ drop table if exists t; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=tokudb; insert into t values (0,0,0,0),(0,1,0,1); explain select c,a,b from t where c=0 and a=0 and b=1; diff --git a/storage/tokudb/mysql-test/tokudb/r/type_datetime.result b/storage/tokudb/mysql-test/tokudb/r/type_datetime.result index 80f886683e7..698bad10390 100644 --- a/storage/tokudb/mysql-test/tokudb/r/type_datetime.result +++ b/storage/tokudb/mysql-test/tokudb/r/type_datetime.result @@ -363,7 +363,7 @@ greatest(cast('01-01-01' as date), '01-01-02') + 0 20010102 select least(cast('01-01-01' as datetime), '01-01-02') + 0; least(cast('01-01-01' as datetime), '01-01-02') + 0 -20010101000000.000000 +20010101000000 select cast(least(cast('01-01-01' as datetime), '01-01-02') as signed); cast(least(cast('01-01-01' as datetime), '01-01-02') as signed) 20010101000000 @@ -401,7 +401,7 @@ if(@bug28261 = f1, '', @bug28261:= f1) 2001-01-01 2002-02-02 Warnings: -Warning 1292 Incorrect datetime value: '' +Warning 1292 Truncated incorrect datetime value: '' select if(@bug28261 = f1, '', @bug28261:= f1) from t1; if(@bug28261 = f1, '', @bug28261:= f1) 2001-01-01 @@ -425,11 +425,11 @@ f1 2001-01-01 00:00:00 2002-02-02 00:00:00 Warnings: -Warning 1292 Incorrect datetime value: '2002010' +Warning 1292 Truncated incorrect datetime value: '2002010' select * from t1 where f1 between 20020101 and 2007010100000; f1 Warnings: -Warning 1292 Incorrect datetime value: '2007010100000' +Warning 1292 Truncated incorrect datetime value: '2007010100000' drop table t1; # # Bug#27216: functions with parameters of different date types may @@ -500,7 +500,7 @@ f1 45:44:44 15:44:44 Warnings: -Warning 1292 Incorrect datetime value: '1' +Warning 1292 Truncated incorrect datetime value: '1' drop table t1; create table t1 (a tinyint); insert into t1 values (), (), (); diff --git a/storage/tokudb/mysql-test/tokudb/r/type_varchar.result b/storage/tokudb/mysql-test/tokudb/r/type_varchar.result index bf98e12ce16..881a4cd66ac 100644 --- a/storage/tokudb/mysql-test/tokudb/r/type_varchar.result +++ b/storage/tokudb/mysql-test/tokudb/r/type_varchar.result @@ -13,7 +13,7 @@ t1 CREATE TABLE `t1` ( show create table vchar; Table Create Table vchar CREATE TABLE `vchar` ( - `v` varchar(30) DEFAULT NULL, + `v` varchar(30)/*old*/ DEFAULT NULL, `c` char(3) DEFAULT NULL, `e` enum('abc','def','ghi') DEFAULT NULL, `t` text DEFAULT NULL diff --git a/storage/tokudb/tokudb_dir_cmd.cc b/storage/tokudb/tokudb_dir_cmd.cc index 5431cbab7aa..d0da92eab27 100644 --- a/storage/tokudb/tokudb_dir_cmd.cc +++ b/storage/tokudb/tokudb_dir_cmd.cc @@ -50,11 +50,11 @@ static int MDL_and_TDC(THD *thd, table_arg.str = const_cast<char *>(table); table_arg.length = strlen(table); Table_ident table_ident(thd, &db_arg, &table_arg, true);; - thd->lex->select_lex.add_table_to_list( + thd->lex->first_select_lex()->add_table_to_list( thd, &table_ident, NULL, 1, TL_UNLOCK, MDL_EXCLUSIVE, 0, 0, 0); /* The lock will be released at the end of mysq_execute_command() */ error = lock_table_names(thd, - thd->lex->select_lex.table_list.first, + thd->lex->first_select_lex()->table_list.first, NULL, thd->variables.lock_wait_timeout, 0); diff --git a/storage/tokudb/tokudb_sysvars.cc b/storage/tokudb/tokudb_sysvars.cc index 1e841f3b959..c561fb65dc8 100644 --- a/storage/tokudb/tokudb_sysvars.cc +++ b/storage/tokudb/tokudb_sysvars.cc @@ -892,6 +892,7 @@ static MYSQL_THDVAR_ULONGLONG( 1); #endif // defined(TOKU_INCLUDE_RFR) && TOKU_INCLUDE_RFR +#if defined(TOKU_INCLUDE_UPSERT) static MYSQL_THDVAR_BOOL( enable_fast_update, PLUGIN_VAR_THDLOCAL, @@ -900,13 +901,14 @@ static MYSQL_THDVAR_BOOL( NULL, false); -static MYSQL_THDVAR_BOOL( + static MYSQL_THDVAR_BOOL( enable_fast_upsert, PLUGIN_VAR_THDLOCAL, "disable slow upsert", NULL, NULL, false); +#endif #if TOKU_INCLUDE_XA static MYSQL_THDVAR_BOOL( |