diff options
author | Sergei Golubchik <sergii@pisem.net> | 2011-11-22 18:04:38 +0100 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2011-11-22 18:04:38 +0100 |
commit | d2755a2c9c109ddb4e2e0c9feda89431a6c4fd50 (patch) | |
tree | c6e4678908c750d7f558e98cedc349aa1d350892 /storage | |
parent | af32b02c06f32a89dc9f52e556bc5dd3bf49c19e (diff) | |
parent | 42221abaed700f6dc5d280b462755851780e8487 (diff) | |
download | mariadb-git-d2755a2c9c109ddb4e2e0c9feda89431a6c4fd50.tar.gz |
5.3->5.5 merge
Diffstat (limited to 'storage')
46 files changed, 1112 insertions, 676 deletions
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 50b7bf9f5d1..0b58afc23c9 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -156,10 +156,16 @@ static MYSQL_SYSVAR_ULONG(block_size, maria_block_size, static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval, PLUGIN_VAR_RQCMDARG, - "Interval between automatic checkpoints, in seconds; 0 means" + "Interval between tries to do an automatic checkpoints. In seconds; 0 means" " 'no automatic checkpoints' which makes sense only for testing.", NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1); +static MYSQL_SYSVAR_ULONG(checkpoint_log_activity, maria_checkpoint_min_log_activity, + PLUGIN_VAR_RQCMDARG, + "Number of bytes that the transaction log has to grow between checkpoints before a new " + "checkpoint is written to the log.", + NULL, NULL, 1024*1024, 0, UINT_MAX, 1); + static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures, force_start_after_recovery_failures, /* @@ -397,7 +403,7 @@ static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, if (!thd->vio_ok()) { - sql_print_error(fmt, args); + sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); return; } @@ -405,6 +411,8 @@ static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR)) { my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME)); + if (thd->variables.log_warnings > 2) + sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); return; } length= (uint) (strxmov(name, param->db_name, ".", param->table_name, @@ -423,8 +431,11 @@ static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, protocol->store(msg_type, system_charset_info); protocol->store(msgbuf, msg_length, system_charset_info); if (protocol->write()) - sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n", - msgbuf); + sql_print_error("Failed on my_net_write, writing to stderr instead: %s.%s: %s\n", + param->db_name, param->table_name, msgbuf); + else if (thd->variables.log_warnings > 2) + sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); + return; } @@ -1018,8 +1029,7 @@ double ha_maria::scan_time() uint ha_maria::max_supported_key_length() const { - uint tmp= (maria_max_key_length() - 8 - HA_MAX_KEY_SEG*3); - return min(HA_MAX_KEY_BUFF, tmp); + return maria_max_key_length(); } @@ -2151,13 +2161,17 @@ bool ha_maria::check_and_repair(THD *thd) if (crashed) { + bool save_log_all_errors; sql_print_warning("Recovering table: '%s'", table->s->path.str); + save_log_all_errors= thd->log_all_errors; + thd->log_all_errors|= (thd->variables.log_warnings > 2); check_opt.flags= ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) | (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) | T_AUTO_REPAIR); if (repair(thd, &check_opt)) error= 1; + thd->log_all_errors= save_log_all_errors; } thd->set_query(query_backup); DBUG_RETURN(error); @@ -2480,9 +2494,6 @@ int ha_maria::extra(enum ha_extra_function operation) int ha_maria::reset(void) { - pushed_idx_cond= NULL; - pushed_idx_cond_keyno= MAX_KEY; - in_range_check_pushed_down= FALSE; ma_set_index_cond_func(file, NULL, 0); ds_mrr.dsmrr_close(); if (file->trn) @@ -2542,6 +2553,14 @@ void ha_maria::drop_table(const char *name) } +void ha_maria::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) +{ + handler::change_table_ptr(table_arg, share); + if (file) + file->external_ref= table_arg; +} + + int ha_maria::external_lock(THD *thd, int lock_type) { DBUG_ENTER("ha_maria::external_lock"); @@ -2641,7 +2660,7 @@ int ha_maria::external_lock(THD *thd, int lock_type) changes to commit (rollback shouldn't be tested). */ DBUG_ASSERT(!thd->stmt_da->is_sent || - thd->killed == THD::KILL_CONNECTION); + thd->killed == KILL_CONNECTION); /* autocommit ? rollback a transaction */ #ifdef MARIA_CANNOT_ROLLBACK if (ma_commit(trn)) @@ -3484,7 +3503,7 @@ my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name, *engine_data= 0; if (file->s->now_transactional && file->s->have_versioning) - return (file->trn->trid >= file->s->state.last_change_trn); + DBUG_RETURN(file->trn->trid >= file->s->state.last_change_trn); /* If a concurrent INSERT has happened just before the currently processed @@ -3519,6 +3538,7 @@ my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name, struct st_mysql_sys_var* system_variables[]= { MYSQL_SYSVAR(block_size), MYSQL_SYSVAR(checkpoint_interval), + MYSQL_SYSVAR(checkpoint_log_activity), MYSQL_SYSVAR(force_start_after_recovery_failures), MYSQL_SYSVAR(group_commit), MYSQL_SYSVAR(group_commit_interval), @@ -3552,6 +3572,7 @@ static void update_checkpoint_interval(MYSQL_THD thd, ma_checkpoint_init(*(ulong *)var_ptr= (ulong)(*(long *)save)); } + /** @brief Updates group commit mode */ @@ -3729,9 +3750,6 @@ Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg) return NULL; } - - - struct st_mysql_storage_engine maria_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index 39c23c8d1b6..dba0aeaa20e 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -78,6 +78,7 @@ public: { return max_supported_key_length(); } enum row_type get_row_type() const; uint checksum() const; + void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share); virtual double scan_time(); int open(const char *name, int mode, uint test_if_locked); diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index 483a7dd5c12..76049a54238 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -286,7 +286,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file, */ { pgcache_page_no_t last_bitmap_page; - ulong blocks, bytes; + pgcache_page_no_t blocks, bytes; last_bitmap_page= *last_page - *last_page % bitmap->pages_covered; blocks= *last_page - last_bitmap_page; @@ -1415,7 +1415,7 @@ static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, uchar *page_end= data + bitmap->total_size; uchar *best_data= 0; uint min_size; - uint best_area_size, UNINIT_VAR(best_prefix_area_size), best_suffix_area_size; + uint best_area_size, UNINIT_VAR(best_prefix_area_size); uint page, size; ulonglong UNINIT_VAR(best_prefix_bits); DBUG_ENTER("allocate_full_pages"); @@ -1491,7 +1491,6 @@ static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, best_area_size= area_size; best_prefix_bits= prefix_bits; best_prefix_area_size= prefix_area_size; - best_suffix_area_size= suffix_area_size; /* Prefer to put data in biggest possible area */ if (area_size <= pages_needed) diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 2d2a770ba82..371325594af 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -5004,7 +5004,8 @@ static my_bool read_row_extent_info(MARIA_HA *info, uchar *buff, MARIA_EXTENT_CURSOR extent; MARIA_RECORD_POS *tail_pos; uchar *data, *end_of_data; - uint flag, row_extents, row_extents_size, field_lengths; + uint flag, row_extents, row_extents_size; + uint field_lengths __attribute__ ((unused)); uchar *extents, *end; DBUG_ENTER("read_row_extent_info"); @@ -5039,6 +5040,10 @@ static my_bool read_row_extent_info(MARIA_HA *info, uchar *buff, } info->cur_row.extents_count= row_extents; + /* + field_lengths looks unused but get_key_length will + increment data, which is required as data it's used later. + */ if (share->base.max_field_lengths) get_key_length(field_lengths, data); @@ -5327,7 +5332,6 @@ int _ma_scan_block_record(MARIA_HA *info, uchar *record, my_bool skip_deleted __attribute__ ((unused))) { uint block_size; - my_off_t filepos; MARIA_SHARE *share= info->s; DBUG_ENTER("_ma_scan_block_record"); @@ -5457,7 +5461,6 @@ restart_bitmap_scan: /* Read next bitmap */ info->scan.bitmap_page+= share->bitmap.pages_covered; - filepos= (my_off_t) info->scan.bitmap_page * block_size; if (unlikely(info->scan.bitmap_page >= info->scan.max_page)) { DBUG_PRINT("info", ("Found end of file")); @@ -6737,11 +6740,11 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info, uint i; uint res; uint page_range; - pgcache_page_no_t page, start_page; + pgcache_page_no_t page; uchar *buff; uint data_on_page= data_size; - start_page= page= page_korr(header); + page= page_korr(header); header+= PAGE_STORE_SIZE; page_range= pagerange_korr(header); header+= PAGERANGE_STORE_SIZE; diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 7f686c28532..4d60b4cafe5 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -2343,7 +2343,7 @@ static int initialize_variables_for_repair(HA_CHECK *param, return 1; } - /* Allow us to restore state and check how state changed */ + /* Make a copy to allow us to restore state and check how state changed */ memcpy(org_share, share, sizeof(*share)); /* Repair code relies on share->state.state so we have to update it here */ @@ -2363,6 +2363,14 @@ static int initialize_variables_for_repair(HA_CHECK *param, param->testflag&= ~T_QUICK; param->org_key_map= share->state.key_map; + /* + Clear check variables set by repair. This is needed to allow one to run + several repair's in a row with same param + */ + param->retry_repair= 0; + param->warning_printed= 0; + param->error_printed= 0; + sort_param->sort_info= sort_info; sort_param->fix_datafile= ! rep_quick; sort_param->calc_checksum= test(param->testflag & T_CALC_CHECKSUM); @@ -5660,7 +5668,8 @@ static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo, MARIA_KEY key; key.keyinfo= keyinfo; key.data= (uchar*) key_data; - key.data_length= _ma_keylength(keyinfo, key_data); + key.data_length= (_ma_keylength(keyinfo, key_data) - + keyinfo->share->rec_reflength); return _ma_row_pos_from_key(&key); } /* get_record_for_key */ @@ -5673,7 +5682,7 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, my_off_t prev_block) { uint a_length,t_length,nod_flag; - my_off_t filepos,key_file_length; + my_off_t filepos; uchar *anc_buff,*lastkey; MARIA_KEY_PARAM s_temp; MARIA_KEYDEF *keyinfo=sort_param->keyinfo; @@ -5741,7 +5750,6 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, _ma_store_page_used(share, anc_buff, key_block->last_length); bzero(anc_buff+key_block->last_length, keyinfo->block_length- key_block->last_length); - key_file_length=share->state.state.key_file_length; if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR) DBUG_RETURN(1); _ma_fast_unlock_key_del(info); @@ -5852,7 +5860,7 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) { uint nod_flag,length; - my_off_t filepos,key_file_length; + my_off_t filepos; SORT_KEY_BLOCKS *key_block; MARIA_SORT_INFO *sort_info= sort_param->sort_info; myf myf_rw=sort_info->param->myf_rw; @@ -5869,7 +5877,6 @@ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) length= _ma_get_page_used(info->s, key_block->buff); if (nod_flag) _ma_kpointer(info,key_block->end_pos,filepos); - key_file_length= info->s->state.state.key_file_length; bzero(key_block->buff+length, keyinfo->block_length-length); if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR) diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index dd188501f4e..addb014969a 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -534,8 +534,9 @@ filter_flush_file_evenly(enum pagecache_page_type type, risk could be that while a checkpoint happens no LRD flushing happens. */ -static uint maria_checkpoint_min_activity= 2*1024*1024; - +static ulong maria_checkpoint_min_cache_activity= 10*1024*1024; +/* Set in ha_maria.cc */ +ulong maria_checkpoint_min_log_activity= 1*1024*1024; pthread_handler_t ma_checkpoint_background(void *arg) { @@ -576,53 +577,62 @@ pthread_handler_t ma_checkpoint_background(void *arg) switch (sleeps % interval) { case 0: + { /* If checkpoints are disabled, wait 1 second and try again */ if (maria_checkpoint_disabled) { sleep_time= 1; break; } - /* - With background flushing evenly distributed over the time - between two checkpoints, we should have only little flushing to do - in the checkpoint. - */ - /* - No checkpoint if little work of interest for recovery was done - since last checkpoint. Such work includes log writing (lengthens - recovery, checkpoint would shorten it), page flushing (checkpoint - would decrease the amount of read pages in recovery). - In case of one short statement per minute (very low load), we don't - want to checkpoint every minute, hence the positive - maria_checkpoint_min_activity. - */ - - if (((translog_get_horizon() - log_horizon_at_last_checkpoint) + - (maria_pagecache->global_cache_write - - pagecache_flushes_at_last_checkpoint) * - maria_pagecache->block_size) < maria_checkpoint_min_activity) { - /* don't take checkpoint, so don't know what to flush */ - pages_to_flush_before_next_checkpoint= 0; - sleep_time= interval; - break; + TRANSLOG_ADDRESS horizon= translog_get_horizon(); + + /* + With background flushing evenly distributed over the time + between two checkpoints, we should have only little flushing to do + in the checkpoint. + */ + /* + No checkpoint if little work of interest for recovery was done + since last checkpoint. Such work includes log writing (lengthens + recovery, checkpoint would shorten it), page flushing (checkpoint + would decrease the amount of read pages in recovery). + In case of one short statement per minute (very low load), we don't + want to checkpoint every minute, hence the positive + maria_checkpoint_min_activity. + */ + if ((ulonglong) (horizon - log_horizon_at_last_checkpoint) <= + maria_checkpoint_min_log_activity && + ((ulonglong) (maria_pagecache->global_cache_write - + pagecache_flushes_at_last_checkpoint) * + maria_pagecache->block_size) <= + maria_checkpoint_min_cache_activity) + { + /* + Not enough has happend since last checkpoint. + Sleep for a while and try again later + */ + sleep_time= interval; + break; + } + sleep_time= 1; + ma_checkpoint_execute(CHECKPOINT_MEDIUM, TRUE); + /* + Snapshot this kind of "state" of the engine. Note that the value + below is possibly greater than last_checkpoint_lsn. + */ + log_horizon_at_last_checkpoint= translog_get_horizon(); + pagecache_flushes_at_last_checkpoint= + maria_pagecache->global_cache_write; + /* + If the checkpoint above succeeded it has set d|kfiles and + d|kfiles_end. If is has failed, it has set + pages_to_flush_before_next_checkpoint to 0 so we will skip flushing + and sleep until the next checkpoint. + */ } - sleep_time= 1; - ma_checkpoint_execute(CHECKPOINT_MEDIUM, TRUE); - /* - Snapshot this kind of "state" of the engine. Note that the value below - is possibly greater than last_checkpoint_lsn. - */ - log_horizon_at_last_checkpoint= translog_get_horizon(); - pagecache_flushes_at_last_checkpoint= - maria_pagecache->global_cache_write; - /* - If the checkpoint above succeeded it has set d|kfiles and - d|kfiles_end. If is has failed, it has set - pages_to_flush_before_next_checkpoint to 0 so we will skip flushing - and sleep until the next checkpoint. - */ break; + } case 1: /* set up parameters for background page flushing */ filter_param.up_to_lsn= last_checkpoint_lsn; diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 5be819f341b..2173d2cac8f 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -621,7 +621,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, to be able to put at least 2 keys on an index block for the key algorithms to work). */ - if (length > maria_max_key_length()) + if (length > _ma_max_key_length()) { my_errno=HA_WRONG_CREATE_OPTION; goto err_no_lock; diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 70ecac318cc..c1c0a8e9729 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -1734,14 +1734,19 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, my_bool skip_deleted_blocks) { - int block_of_record, info_read; + int block_of_record; +#ifdef MARIA_EXTERNAL_LOCKING + int info_read; +#endif uint left_len,b_type; uchar *UNINIT_VAR(to); MARIA_BLOCK_INFO block_info; MARIA_SHARE *share= info->s; DBUG_ENTER("_ma_read_rnd_dynamic_record"); +#ifdef MARIA_EXTERNAL_LOCKING info_read=0; +#endif if (info->lock_type == F_UNLCK) { @@ -1750,8 +1755,10 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, info->tmp_lock_type=F_RDLCK; #endif } +#ifdef MARIA_EXTERNAL_LOCKING else info_read=1; /* memory-keyinfoblock is ok */ +#endif block_of_record= 0; /* First block of record is numbered as zero. */ block_info.second_read= 0; diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c index 3e47f09c1fa..99748c65ebf 100644 --- a/storage/maria/ma_ft_update.c +++ b/storage/maria/ma_ft_update.c @@ -363,7 +363,7 @@ my_bool _ma_ft_convert_to_ft2(MARIA_HA *info, MARIA_KEY *key) for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength) { tmp_key.data= key_ptr; - if (_ma_ck_real_write_btree(info, key, &root, SEARCH_SAME)) + if (_ma_ck_real_write_btree(info, &tmp_key, &root, SEARCH_SAME)) DBUG_RETURN(1); } diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c index 361ee41f2c8..fdd9ba7d2c6 100644 --- a/storage/maria/ma_info.c +++ b/storage/maria/ma_info.c @@ -28,6 +28,12 @@ MARIA_RECORD_POS maria_position(MARIA_HA *info) } +uint maria_max_key_length() +{ + uint tmp= (_ma_max_key_length() - 8 - HA_MAX_KEY_SEG*3); + return min(HA_MAX_KEY_BUFF, tmp); +} + /* Get information about the table */ /* if flag == 2 one get current info (no sync from database */ diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c index 920f5a08013..9c794501022 100644 --- a/storage/maria/ma_key_recover.c +++ b/storage/maria/ma_key_recover.c @@ -944,7 +944,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, uchar *buff; const uchar *header_end= header + head_length; uint page_offset= 0, org_page_length; - uint nod_flag, page_length, keypage_header, keynr; + uint page_length, keypage_header, keynr; uint max_page_size= share->max_index_block_size; int result; MARIA_PAGE page; @@ -972,7 +972,6 @@ uint _ma_apply_redo_index(MARIA_HA *info, keynr= _ma_get_keynr(share, buff); _ma_page_setup(&page, info, share->keyinfo + keynr, page_pos, buff); - nod_flag= page.node; org_page_length= page_length= page.size; keypage_header= share->keypage_header; diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index f437bf1399f..b3e4acb2995 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -2612,14 +2612,12 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) i < buffer->size; i+= TRANSLOG_PAGE_SIZE, pg++) { - TRANSLOG_ADDRESS addr= (buffer->offset + i); - TRANSLOG_VALIDATOR_DATA data; + TRANSLOG_ADDRESS addr __attribute__((unused))= (buffer->offset + i); DBUG_PRINT("info", ("send log form %lu till %lu address: (%lu,0x%lx) " "page #: %lu buffer size: %lu buffer: 0x%lx", (ulong) i, (ulong) (i + TRANSLOG_PAGE_SIZE), LSN_IN_PARTS(addr), (ulong) pg, (ulong) buffer->size, (ulong) buffer)); - data.addr= &addr; DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE); DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size); if (translog_status != TRANSLOG_OK && translog_status != TRANSLOG_SHUTDOWN) @@ -6568,16 +6566,12 @@ my_bool translog_scanner_init(LSN lsn, TRANSLOG_SCANNER_DATA *scanner, my_bool use_direct) { - TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_scanner_init"); DBUG_PRINT("enter", ("Scanner: 0x%lx LSN: (%lu,0x%lx)", (ulong) scanner, LSN_IN_PARTS(lsn))); DBUG_ASSERT(translog_status == TRANSLOG_OK || translog_status == TRANSLOG_READONLY); - data.addr= &scanner->page_addr; - data.was_recovered= 0; - scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; scanner->fixed_horizon= fixed_horizon; diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index d545ed76592..82e6e30839b 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -469,7 +469,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) } key_parts+=fulltext_keys*FT_SEGS; - if (share->base.max_key_length > maria_max_key_length() || + if (share->base.max_key_length > _ma_max_key_length() || keys > MARIA_MAX_KEY || key_parts > MARIA_MAX_KEY * HA_MAX_KEY_SEG) { DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts)); @@ -1861,7 +1861,7 @@ int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, const char *org_name, } info->dfile.file= share->bitmap.file.file= - mysql_file_open(key_file_dfile, share->data_file_name.str, + mysql_file_open(key_file_dfile, data_name, share->mode | O_SHARE, MYF(MY_WME)); return info->dfile.file >= 0 ? 0 : 1; } diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index b0ad778a4f2..893e393a79a 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -3216,9 +3216,11 @@ static LSN parse_checkpoint_record(LSN lsn) tprint(tracef, "Loading data from checkpoint record at LSN (%lu,0x%lx)\n", LSN_IN_PARTS(lsn)); - if ((len= translog_read_record_header(lsn, &rec)) == RECHEADER_READ_ERROR) + if ((len= translog_read_record_header(lsn, &rec)) == RECHEADER_READ_ERROR || + rec.type != LOGREC_CHECKPOINT) { - tprint(tracef, "Cannot find checkpoint record where it should be\n"); + eprint(tracef, "Cannot find checkpoint record at LSN (%lu,0x%lx)", + LSN_IN_PARTS(lsn)); return LSN_ERROR; } diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c index 6f32a60c073..ea90b60ce12 100644 --- a/storage/maria/ma_rt_split.c +++ b/storage/maria/ma_rt_split.c @@ -380,7 +380,6 @@ int maria_rtree_split_page(const MARIA_KEY *key, MARIA_PAGE *page, SplitStruct *stop; double *coord_buf; double *next_coord; - double *old_coord; int n_dim; uchar *source_cur, *cur1, *cur2; uchar *new_page_buff, *log_internal_copy, *log_internal_copy_ptr, @@ -426,7 +425,6 @@ int maria_rtree_split_page(const MARIA_KEY *key, MARIA_PAGE *page, maria_rtree_d_mbr(keyinfo->seg, key->data, key_data_length, cur->coords); cur->key= key->data; - old_coord= next_coord; if (split_maria_rtree_node(task, max_keys + 1, page->size + full_length + 2, diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index cd3294e8975..25c09e8de33 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -738,7 +738,7 @@ struct st_maria_handler { length=mi_uint2korr((key)+1)+3; } \ } -#define maria_max_key_length() ((maria_block_size - MAX_KEYPAGE_HEADER_SIZE)/3 - MARIA_INDEX_OVERHEAD_SIZE) +#define _ma_max_key_length() ((maria_block_size - MAX_KEYPAGE_HEADER_SIZE)/3 - MARIA_INDEX_OVERHEAD_SIZE) #define get_pack_length(length) ((length) >= 255 ? 3 : 1) #define _ma_have_versioning(info) ((info)->row_flag & ROW_FLAG_TRANSID) @@ -817,6 +817,7 @@ extern uchar maria_zero_string[]; extern my_bool maria_inited, maria_in_ha_maria, maria_recovery_changed_data; extern my_bool maria_recovery_verbose, maria_checkpoint_disabled; extern my_bool maria_assert_if_crashed_table; +extern ulong maria_checkpoint_min_log_activity; extern HASH maria_stored_state; extern int (*maria_create_trn_hook)(MARIA_HA *); extern my_bool (*ma_killed)(MARIA_HA *); diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c index 4480dabbcad..481b77a2cc6 100644 --- a/storage/maria/maria_pack.c +++ b/storage/maria/maria_pack.c @@ -3051,7 +3051,6 @@ static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf) { int error; MARIA_HA *isam_info; - my_off_t filepos; if (!info->current) { @@ -3076,7 +3075,6 @@ static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf) return(HA_ERR_END_OF_FILE); info->current++; isam_info= *info->current; - filepos=isam_info->s->pack.header_length; maria_reset(isam_info); maria_extra(isam_info,HA_EXTRA_CACHE, 0); if ((error= maria_scan_init(isam_info))) diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 0f02976dbb4..0734e12cbe4 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -179,6 +179,7 @@ int trnman_init(TrID initial_trid) trnman_allocated_transactions= 0; /* This is needed for recovery and repair */ dummy_transaction_object.min_read_from= ~(TrID) 0; + dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID; pool= 0; global_trid_generator= initial_trid; diff --git a/storage/maria/unittest/CMakeLists.txt b/storage/maria/unittest/CMakeLists.txt index fd3e3f909ce..ba7c8d324f0 100644 --- a/storage/maria/unittest/CMakeLists.txt +++ b/storage/maria/unittest/CMakeLists.txt @@ -13,7 +13,6 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib ${CMAKE_SOURCE_DIR}/unittest/mytap) LINK_LIBRARIES(aria myisam mytap mysys dbug strings ${ZLIB_LIBRARY}) diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 25020e76c7f..8fde949189b 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -146,14 +146,16 @@ static void mi_check_print_msg(HA_CHECK *param, const char* msg_type, if (!thd->vio_ok()) { - sql_print_error("%s", msgbuf); + sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); return; } if (param->testflag & (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR)) { - my_message(ER_NOT_KEYFILE,msgbuf,MYF(MY_WME)); + my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME)); + if (thd->variables.log_warnings > 2 && ! thd->log_all_errors) + sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); return; } length=(uint) (strxmov(name, param->db_name,".",param->table_name,NullS) - @@ -178,7 +180,7 @@ static void mi_check_print_msg(HA_CHECK *param, const char* msg_type, sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n", msgbuf); else if (thd->variables.log_warnings > 2) - sql_print_error("%s", msgbuf); + sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf); if (param->need_print_msg_lock) mysql_mutex_unlock(¶m->print_msg_mutex); @@ -579,6 +581,7 @@ void mi_check_print_info(HA_CHECK *param, const char *fmt,...) va_list args; va_start(args, fmt); mi_check_print_msg(param, "info", fmt, args); + param->note_printed= 1; va_end(args); } @@ -640,7 +643,6 @@ my_bool mi_killed_in_mariadb(MI_INFO *info) } - ha_myisam::ha_myisam(handlerton *hton, TABLE_SHARE *table_arg) :handler(hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | @@ -1581,7 +1583,10 @@ bool ha_myisam::check_and_repair(THD *thd) if ((marked_crashed= mi_is_crashed(file)) || check(thd, &check_opt)) { + bool save_log_all_errors; sql_print_warning("Recovering table: '%s'",table->s->path.str); + save_log_all_errors= thd->log_all_errors; + thd->log_all_errors|= (thd->variables.log_warnings > 2); if (myisam_recover_options & HA_RECOVER_FULL_BACKUP) { char buff[MY_BACKUP_NAME_EXTRA_LENGTH+1]; @@ -1599,6 +1604,7 @@ bool ha_myisam::check_and_repair(THD *thd) T_AUTO_REPAIR); if (repair(thd, &check_opt)) error=1; + thd->log_all_errors= save_log_all_errors; } thd->set_query(query_backup); DBUG_RETURN(error); @@ -1858,9 +1864,6 @@ int ha_myisam::extra(enum ha_extra_function operation) int ha_myisam::reset(void) { - pushed_idx_cond= NULL; - pushed_idx_cond_keyno= MAX_KEY; - in_range_check_pushed_down= FALSE; mi_set_index_cond_func(file, NULL, 0); ds_mrr.dsmrr_close(); return mi_reset(file); @@ -1892,6 +1895,13 @@ int ha_myisam::delete_table(const char *name) return mi_delete_table(name); } +void ha_myisam::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) +{ + handler::change_table_ptr(table_arg, share); + if (file) + file->external_ref= table_arg; +} + int ha_myisam::external_lock(THD *thd, int lock_type) { diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index 0de25c6d355..c3c236ad4fe 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -76,7 +76,7 @@ class ha_myisam: public handler uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; } uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; } uint checksum() const; - + void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share); int open(const char *name, int mode, uint test_if_locked); int close(void); int write_row(uchar * buf); diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index 10c0567e508..05fd9af9079 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -938,7 +938,7 @@ static uint isam_key_length(MI_INFO *info, register MI_KEYDEF *keyinfo) int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend) { int error,got_error,flag; - uint key,UNINIT_VAR(left_length),b_type,field; + uint key, UNINIT_VAR(left_length), b_type; ha_rows records,del_blocks; my_off_t used,empty,pos,splits,UNINIT_VAR(start_recpos), del_length,link_used,start_block; @@ -946,7 +946,6 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend) char llbuff[22],llbuff2[22],llbuff3[22]; ha_checksum intern_record_checksum; ha_checksum key_checksum[HA_MAX_POSSIBLE_KEY]; - my_bool static_row_size; MI_KEYDEF *keyinfo; MI_BLOCK_INFO block_info; DBUG_ENTER("chk_data_link"); @@ -970,21 +969,6 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend) got_error=error=0; empty=info->s->pack.header_length; - /* Check how to calculate checksum of rows */ - static_row_size=1; - if (info->s->data_file_type == COMPRESSED_RECORD) - { - for (field=0 ; field < info->s->base.fields ; field++) - { - if (info->s->rec[field].base_type == FIELD_BLOB || - info->s->rec[field].base_type == FIELD_VARCHAR) - { - static_row_size=0; - break; - } - } - } - pos=my_b_tell(¶m->read_cache); bzero((char*) key_checksum, info->s->base.keys * sizeof(key_checksum[0])); while (pos < info->state->data_file_length) @@ -1343,7 +1327,7 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend) } if (param->testflag & T_INFO) { - if (param->warning_printed || param->error_printed) + if (param->warning_printed || param->error_printed || param->note_printed) puts(""); if (used != 0 && ! param->error_printed) { @@ -1536,6 +1520,8 @@ int mi_repair(HA_CHECK *param, register MI_INFO *info, got_error=1; new_file= -1; sort_param.sort_info=&sort_info; + param->retry_repair= 0; + param->warning_printed= param->error_printed= param->note_printed= 0; if (!(param->testflag & T_SILENT)) { @@ -1678,7 +1664,7 @@ int mi_repair(HA_CHECK *param, register MI_INFO *info, if (rep_quick && del+sort_info.dupp != info->state->del) { mi_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); - mi_check_print_error(param,"Run recovery again without -q"); + mi_check_print_error(param,"Run recovery again without --quick"); got_error=1; param->retry_repair=1; param->testflag|=T_RETRY_WITHOUT_QUICK; @@ -1907,7 +1893,7 @@ int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file, { if (flush_key_blocks(key_cache, file, dirty_part_map, FLUSH_RELEASE)) { - mi_check_print_error(param,"%d when trying to write bufferts",my_errno); + mi_check_print_error(param,"%d when trying to write buffers",my_errno); return(1); } if (!param->using_global_keycache) @@ -2216,7 +2202,7 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, MYISAM_SHARE *share=info->s; HA_KEYSEG *keyseg; ulong *rec_per_key_part; - char llbuff[22]; + char llbuff[22], llbuff2[22]; MI_SORT_INFO sort_info; ulonglong UNINIT_VAR(key_map); DBUG_ENTER("mi_repair_by_sort"); @@ -2232,12 +2218,15 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, printf("Data records: %s\n", llstr(start_records,llbuff)); } param->testflag|=T_REP; /* for easy checking */ + param->retry_repair= 0; + param->warning_printed= param->error_printed= param->note_printed= 0; if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; bzero((char*)&sort_info,sizeof(sort_info)); bzero((char *)&sort_param, sizeof(sort_param)); + if (!(sort_info.key_block= alloc_key_blocks(param, (uint) param->sort_key_blocks, @@ -2249,7 +2238,7 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, init_io_cache(&info->rec_cache,info->dfile, (uint) param->write_buffer_length, WRITE_CACHE,new_header_length,1, - MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))) + MYF((param->myf_rw & MY_WAIT_IF_FULL) | MY_WME)))) goto err; sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; info->opt_flag|=WRITE_CACHE_USED; @@ -2419,7 +2408,10 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, (my_bool) (!(param->testflag & T_VERBOSE)), (uint) param->sort_buffer_length)) { - param->retry_repair=1; + param->retry_repair= 1; + if (! param->error_printed) + mi_check_print_error(param, "Couldn't fix table with create_index_by_sort(). Error: %d", + my_errno); goto err; } /* No need to calculate checksum again. */ @@ -2448,7 +2440,10 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, /* Don't repair if we loosed more than one row */ if (info->state->records+1 < start_records) { - info->state->records=start_records; + mi_check_print_error(param, + "Couldn't fix table as SAFE_REPAIR was requested and we would loose too many rows. %s -> %s", + llstr(start_records, llbuff), llstr(info->state->records, llbuff2)); + info->state->records= start_records; goto err; } } @@ -2478,7 +2473,7 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, if (rep_quick && del+sort_info.dupp != info->state->del) { mi_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); - mi_check_print_error(param,"Run recovery again without -q"); + mi_check_print_error(param,"Run recovery again without --quick"); got_error=1; param->retry_repair=1; param->testflag|=T_RETRY_WITHOUT_QUICK; @@ -2550,6 +2545,8 @@ err: param->retry_repair= 0; /* Safety */ } mi_mark_crashed_on_repair(info); + if (killed_ptr(param)) + param->retry_repair= 0; /* No use to retry repair */ } else if (key_map == share->state.key_map) share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; @@ -2645,6 +2642,9 @@ int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info, printf("Data records: %s\n", llstr(start_records,llbuff)); } param->testflag|=T_REP; /* for easy checking */ + param->retry_repair= 0; + param->warning_printed= 0; + param->error_printed= 0; if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; @@ -3077,6 +3077,8 @@ err: param->retry_repair= 0; /* Safety */ } mi_mark_crashed_on_repair(info); + if (killed_ptr(param)) + param->retry_repair= 0; } else if (key_map == share->state.key_map) share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; @@ -3111,7 +3113,13 @@ static int sort_key_read(MI_SORT_PARAM *sort_param, void *key) DBUG_ENTER("sort_key_read"); if ((error=sort_get_next_record(sort_param))) + { + DBUG_ASSERT(error < 0 || + sort_info->param->error_printed || + sort_info->param->warning_printed || + sort_info->param->note_printed); DBUG_RETURN(error); + } if (info->state->records == sort_info->max_records) { mi_check_print_error(sort_info->param, @@ -3228,7 +3236,12 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) DBUG_ENTER("sort_get_next_record"); if (killed_ptr(param)) + { + mi_check_print_error(param, "Repair killed by user with cause: %d", + (int) killed_ptr(param)); + param->retry_repair= 0; DBUG_RETURN(1); + } switch (share->data_file_type) { case STATIC_RECORD: @@ -3314,6 +3327,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) } if (searching && ! sort_param->fix_datafile) { + mi_check_print_info(param, + "Datafile is corrupted; Restart repair with option to copy datafile"); param->error_printed=1; param->retry_repair=1; param->testflag|=T_RETRY_WITHOUT_QUICK; @@ -3375,6 +3390,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) } if (error) { + DBUG_ASSERT(param->note_printed); if (found_record) goto try_next; searching=1; @@ -3415,7 +3431,11 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) share->state.split++; } if (found_record) + { + mi_check_print_info(param, + "Found row block followed by deleted block"); goto try_next; + } if (searching) { pos+=MI_DYN_ALIGN_SIZE; @@ -3449,6 +3469,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) mi_check_print_error(param,"Not enough memory for blob at %s (need %lu)", llstr(sort_param->start_recpos,llbuff), (ulong) block_info.rec_len); + DBUG_ASSERT(param->error_printed); DBUG_RETURN(1); } else @@ -3530,8 +3551,6 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) if (_mi_rec_unpack(info,sort_param->record,sort_param->rec_buff, sort_param->find_length) != MY_FILE_ERROR) { - if (sort_param->read_cache.error < 0) - DBUG_RETURN(1); if (sort_param->calc_checksum) info->checksum= (*info->s->calc_check_checksum)(info, sort_param->record); @@ -3557,6 +3576,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) sort_param->key+1, llstr(sort_param->start_recpos,llbuff)); try_next: + DBUG_ASSERT(param->error_printed || param->note_printed); pos=(sort_param->start_recpos+=MI_DYN_ALIGN_SIZE); searching=1; } @@ -3628,6 +3648,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) DBUG_ASSERT(0); /* Impossible */ break; } + DBUG_ASSERT(0); /* Impossible */ DBUG_RETURN(1); /* Impossible */ } @@ -3686,7 +3707,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param) if (sort_info->buff_length < reclength) { if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength, - MYF(MY_FREE_ON_ERROR | + MYF(MY_FREE_ON_ERROR | MY_WME | MY_ALLOW_ZERO_PTR)))) DBUG_RETURN(1); sort_info->buff_length=reclength; diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c index 5906c9dc70a..9270ee4fbb8 100644 --- a/storage/myisam/mi_test2.c +++ b/storage/myisam/mi_test2.c @@ -836,6 +836,7 @@ end: puts("Locking used"); if (use_blob) puts("blobs used"); + bzero(&stats, sizeof(stats)); get_key_cache_statistics(dflt_key_cache, 0, &stats); printf("key cache status: \n\ blocks used:%10lu\n\ diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c index 72be3d2f810..6f27b1bc0d2 100644 --- a/storage/myisam/myisamchk.c +++ b/storage/myisam/myisamchk.c @@ -1719,6 +1719,7 @@ void mi_check_print_info(HA_CHECK *param __attribute__((unused)), { va_list args; + param->note_printed=1; va_start(args,fmt); (void) vfprintf(stdout, fmt, args); (void) fputc('\n',stdout); diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c index cc05121f164..da65ddb7be6 100644 --- a/storage/myisam/sort.c +++ b/storage/myisam/sort.c @@ -151,6 +151,7 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, { mi_check_print_error(info->sort_info->param, "myisam_sort_buffer_size is too small"); + my_errno= ENOMEM; goto err; } } @@ -175,7 +176,8 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, if (memavl < MIN_SORT_BUFFER) { mi_check_print_error(info->sort_info->param,"MyISAM sort buffer too small"); /* purecov: tested */ - goto err; /* purecov: tested */ + my_errno= ENOMEM; /* purecov: tested */ + goto err; /* purecov: tested */ } (*info->lock_in_memory)(info->sort_info->param);/* Everything is allocated */ diff --git a/storage/ndb/plug.in b/storage/ndb/plug.in index 04e618923e4..349c0660a1c 100644 --- a/storage/ndb/plug.in +++ b/storage/ndb/plug.in @@ -2,7 +2,6 @@ sinclude(storage/ndb/ndb_configure.m4) MYSQL_STORAGE_ENGINE(ndbcluster, ndbcluster, [Cluster Storage Engine], [High Availability Clustered tables],) -MYSQL_PLUGIN_DIRECTORY(ndbcluster,[storage/ndb]) MYSQL_PLUGIN_STATIC(ndbcluster, [[\$(ndbcluster_libs) \$(ndbcluster_system_libs) \$(NDB_SCI_LIBS)]]) MYSQL_PLUGIN_ACTIONS(ndbcluster,[MYSQL_SETUP_NDBCLUSTER]) MYSQL_PLUGIN_DEPENDS(ndbcluster, [partition]) diff --git a/storage/pbxt/src/database_xt.cc b/storage/pbxt/src/database_xt.cc index 8d1b4e46da9..635cb63224a 100644 --- a/storage/pbxt/src/database_xt.cc +++ b/storage/pbxt/src/database_xt.cc @@ -87,7 +87,7 @@ xtPublic void xt_lock_installation(XTThreadPtr self, char *installation_path) char file_path[PATH_MAX]; char buffer[101]; size_t red_size; - llong pid; + llong pid __attribute__ ((unused)); xtBool cd = pbxt_crash_debug; xt_strcpy(PATH_MAX, file_path, installation_path); diff --git a/storage/pbxt/src/ha_pbxt.cc b/storage/pbxt/src/ha_pbxt.cc index 1aec6426c8a..f574dbd362a 100644 --- a/storage/pbxt/src/ha_pbxt.cc +++ b/storage/pbxt/src/ha_pbxt.cc @@ -1596,7 +1596,7 @@ static int pbxt_rollback(handlerton *hton, THD *thd, bool all) if (!all) self->st_stat_trans = FALSE; } - return 0; + return err; } #ifdef DRIZZLED @@ -2904,11 +2904,9 @@ int ha_pbxt::update_row(const byte * old_data, byte * new_data) * insert into t1 (val) values (1); */ if (table->found_next_number_field && new_data == table->record[0]) { - MX_LONGLONG_T nr; my_bitmap_map *old_map; old_map = mx_tmp_use_all_columns(table, table->read_set); - nr = table->found_next_number_field->val_int(); ha_set_auto_increment(pb_open_tab, table->found_next_number_field); mx_tmp_restore_column_map(table, old_map); } diff --git a/storage/pbxt/src/memory_xt.cc b/storage/pbxt/src/memory_xt.cc index b2f6c248b3c..d498336e814 100644 --- a/storage/pbxt/src/memory_xt.cc +++ b/storage/pbxt/src/memory_xt.cc @@ -558,7 +558,7 @@ static size_t mm_checkmem(XTThreadPtr self, MissingMemoryPtr mm_ptr, void *p, xt unsigned char *ptr = (unsigned char *) p - MEM_DEBUG_HDR_SIZE; MemoryDebugPtr debug_ptr = (MemoryDebugPtr) ptr; size_t size = debug_ptr->size; - long a_value; /* Added to simplfy debugging. */ + long a_value __attribute__ ((unused)); /* Added to simplfy debugging. */ if (!ASSERT(p)) return(0); diff --git a/storage/pbxt/src/myxt_xt.cc b/storage/pbxt/src/myxt_xt.cc index 7fd94aeaef7..f04138bd10f 100644 --- a/storage/pbxt/src/myxt_xt.cc +++ b/storage/pbxt/src/myxt_xt.cc @@ -836,7 +836,10 @@ xtPublic xtBool myxt_create_row_from_key(XTOpenTablePtr XT_UNUSED(ot), XTIndexPt { byte *record = (byte *) dest_buff; register byte *key; - byte *pos,*key_end; + byte *pos; +#ifdef CHECK_KEYS + byte *key_end; +#endif register XTIndexSegRec *keyseg = ind->mi_seg; /* GOTCHA: When selecting from multiple @@ -849,7 +852,9 @@ xtPublic xtBool myxt_create_row_from_key(XTOpenTablePtr XT_UNUSED(ot), XTIndexPt memset(dest_buff, 0xFF, table->s->null_bytes); */ key = (byte *) b_value; +#ifdef CHECK_KEYS key_end = key + key_len; +#endif for (u_int i=0; i<ind->mi_seg_count; i++, keyseg++) { if (keyseg->null_bit) { if (!*key++) @@ -1027,7 +1032,8 @@ xtPublic u_int myxt_get_key_length(XTIndexPtr ind, xtWord1 *key_buf) register XTIndexSegRec *keyseg = ind->mi_seg; register uchar *key_data = (uchar *) key_buf; uint seg_len; - uint pack_len; + uint pack_len + __attribute__ ((unused)); for (u_int i=0; i<ind->mi_seg_count; i++, keyseg++) { /* Handle NULL part */ @@ -1512,7 +1518,7 @@ xtPublic u_int myxt_key_seg_length(XTIndexSegRec *keyseg, u_int key_offset, xtWo xtPublic xtWord4 myxt_store_row_length(XTOpenTablePtr ot, char *rec_buff) { TABLE *table = ot->ot_table->tab_dic.dic_my_table; - char *sdata; + char *sdata __attribute__ ((unused)); xtWord4 dlen; xtWord4 item_size; xtWord4 row_size = 0; @@ -1892,7 +1898,8 @@ xtPublic void myxt_print_key(XTIndexPtr ind, xtWord1 *key_value) register XTIndexSegRec *keyseg = ind->mi_seg; register uchar *b = (uchar *) key_value; uint b_length; - uint pack_len; + uint pack_len + __attribute__ ((unused)); for (u_int i = 0; i < ind->mi_seg_count; i++, keyseg++) { if (i!=0) diff --git a/storage/pbxt/src/restart_xt.cc b/storage/pbxt/src/restart_xt.cc index 162ec90d754..54454ac92c5 100644 --- a/storage/pbxt/src/restart_xt.cc +++ b/storage/pbxt/src/restart_xt.cc @@ -94,17 +94,21 @@ void xt_print_log_record(xtLogID log, xtLogOffset offset, XTXactLogBufferDPtr re xtBool xn_set = FALSE; xtXactID xn_id = 0; char buffer[200]; +#ifdef TRACE_RECORD_DATA XTTabRecExtDPtr rec_buf; - XTTabRecExtDPtr ext_rec; XTTabRecFixDPtr fix_rec; u_int rec_len; +#endif + XTTabRecExtDPtr ext_rec; xtLogID log_id = 0; xtLogOffset log_offset = 0; +#ifdef TRACE_RECORD_DATA rec_buf = NULL; - ext_rec = NULL; fix_rec = NULL; rec_len = 0; +#endif + ext_rec = NULL; switch (record->xl.xl_status_1) { case XT_LOG_ENT_REC_MODIFIED: case XT_LOG_ENT_UPDATE: @@ -118,10 +122,12 @@ void xt_print_log_record(xtLogID log, xtLogOffset offset, XTXactLogBufferDPtr re rec_id = XT_GET_DISK_4(record->xu.xu_rec_id_4); xn_id = XT_GET_DISK_4(record->xu.xu_xact_id_4); row_id = XT_GET_DISK_4(record->xu.xu_row_id_4); - rec_len = XT_GET_DISK_2(record->xu.xu_size_2); xn_set = TRUE; type="rec"; +#ifdef TRACE_RECORD_DATA + rec_len = XT_GET_DISK_2(record->xu.xu_size_2); rec_buf = (XTTabRecExtDPtr) &record->xu.xu_rec_type_1; +#endif ext_rec = (XTTabRecExtDPtr) &record->xu.xu_rec_type_1; if (XT_REC_IS_EXT_DLOG(ext_rec->tr_rec_type_1)) { log_id = XT_GET_DISK_2(ext_rec->re_log_id_2); @@ -129,7 +135,9 @@ void xt_print_log_record(xtLogID log, xtLogOffset offset, XTXactLogBufferDPtr re } else { ext_rec = NULL; +#ifdef TRACE_RECORD_DATA fix_rec = (XTTabRecFixDPtr) &record->xu.xu_rec_type_1; +#endif } break; case XT_LOG_ENT_UPDATE_FL: @@ -143,10 +151,12 @@ void xt_print_log_record(xtLogID log, xtLogOffset offset, XTXactLogBufferDPtr re rec_id = XT_GET_DISK_4(record->xf.xf_rec_id_4); xn_id = XT_GET_DISK_4(record->xf.xf_xact_id_4); row_id = XT_GET_DISK_4(record->xf.xf_row_id_4); - rec_len = XT_GET_DISK_2(record->xf.xf_size_2); xn_set = TRUE; type="rec"; +#ifdef TRACE_RECORD_DATA + rec_len = XT_GET_DISK_2(record->xf.xf_size_2); rec_buf = (XTTabRecExtDPtr) &record->xf.xf_rec_type_1; +#endif ext_rec = (XTTabRecExtDPtr) &record->xf.xf_rec_type_1; if (XT_REC_IS_EXT_DLOG(ext_rec->tr_rec_type_1)) { log_id = XT_GET_DISK_2(ext_rec->re_log_id_2); @@ -154,7 +164,9 @@ void xt_print_log_record(xtLogID log, xtLogOffset offset, XTXactLogBufferDPtr re } else { ext_rec = NULL; +#ifdef TRACE_RECORD_DATA fix_rec = (XTTabRecFixDPtr) &record->xf.xf_rec_type_1; +#endif } break; case XT_LOG_ENT_REC_FREED: @@ -173,10 +185,12 @@ void xt_print_log_record(xtLogID log, xtLogOffset offset, XTXactLogBufferDPtr re rec_id = XT_GET_DISK_4(record->rb.rb_rec_id_4); xn_id = XT_GET_DISK_4(record->rb.rb_xact_id_4); row_id = XT_GET_DISK_4(record->rb.rb_row_id_4); - rec_len = XT_GET_DISK_2(record->rb.rb_size_2); xn_set = TRUE; type="rec"; +#ifdef TRACE_RECORD_DATA + rec_len = XT_GET_DISK_2(record->rb.rb_size_2); rec_buf = (XTTabRecExtDPtr) &record->rb.rb_rec_type_1; +#endif ext_rec = (XTTabRecExtDPtr) &record->rb.rb_rec_type_1; if (XT_REC_IS_EXT_DLOG(record->rb.rb_rec_type_1)) { log_id = XT_GET_DISK_2(ext_rec->re_log_id_2); @@ -184,7 +198,9 @@ void xt_print_log_record(xtLogID log, xtLogOffset offset, XTXactLogBufferDPtr re } else { ext_rec = NULL; +#ifdef TRACE_RECORD_DATA fix_rec = (XTTabRecFixDPtr) &record->rb.rb_rec_type_1; +#endif } break; case XT_LOG_ENT_REC_MOVED: @@ -967,7 +983,8 @@ static void xres_apply_change(XTThreadPtr self, XTOpenTablePtr ot, XTXactLogBuff xtLogID data_log_id = 0; xtLogOffset data_log_offset = 0; u_int cols_required = 0; - xtBool record_loaded; + xtBool record_loaded + __attribute__ ((unused)); size_t rec_size; rec_id = XT_GET_DISK_4(record->rb.rb_rec_id_4); diff --git a/storage/pbxt/src/tabcache_xt.cc b/storage/pbxt/src/tabcache_xt.cc index 92958f2da49..c5374b400c3 100644 --- a/storage/pbxt/src/tabcache_xt.cc +++ b/storage/pbxt/src/tabcache_xt.cc @@ -393,7 +393,8 @@ void XTTabCache::xt_tc_release_page(XT_ROW_REC_FILE_PTR XT_UNUSED(file), XTTabCa TAB_CAC_WRITE_LOCK(&seg->tcs_lock, thread->t_id); #ifdef DEBUG - XTTabCachePagePtr lpage, ppage; + XTTabCachePagePtr lpage; + XTTabCachePagePtr ppage __attribute__ ((unused)); ppage = NULL; lpage = seg->tcs_hash_table[page->tcp_hash_idx]; @@ -1202,7 +1203,7 @@ static void tabc_fr_main(XTThreadPtr self) static void *tabc_fr_run_thread(XTThreadPtr self) { int count; - void *mysql_thread; + void *mysql_thread __attribute__ ((unused)); myxt_wait_pbxt_plugin_slot_assigned(self); diff --git a/storage/pbxt/src/table_xt.cc b/storage/pbxt/src/table_xt.cc index 443fc3ee193..7c243d26f9b 100644 --- a/storage/pbxt/src/table_xt.cc +++ b/storage/pbxt/src/table_xt.cc @@ -1976,8 +1976,10 @@ xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot) xtLogOffset log_offset; #endif xtRecordID rec_id; +#ifdef DUMP_CHECK_TABLE xtRecordID prev_rec_id; xtXactID xn_id; +#endif xtRowID row_id; u_llong free_rec_count = 0, free_count2 = 0; u_llong delete_rec_count = 0; @@ -2104,8 +2106,10 @@ xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot) else printf(" "); #endif +#ifdef DUMP_CHECK_TABLE prev_rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4); xn_id = XT_GET_DISK_4(rec_buf->tr_xact_id_4); +#endif row_id = XT_GET_DISK_4(rec_buf->tr_row_id_4); switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) { case XT_TAB_STATUS_FREED: diff --git a/storage/pbxt/src/thread_xt.cc b/storage/pbxt/src/thread_xt.cc index 1addf487153..9e3261bc8e0 100644 --- a/storage/pbxt/src/thread_xt.cc +++ b/storage/pbxt/src/thread_xt.cc @@ -488,8 +488,8 @@ static void thr_free_resources(XTThreadPtr self, XTResourcePtr top) xtPublic void xt_bug(XTThreadPtr XT_UNUSED(self)) { - static int *bug_ptr = NULL; - + static int *bug_ptr __attribute__ ((unused)); + bug_ptr = NULL; } @@ -1178,7 +1178,7 @@ xtPublic XTThreadPtr xt_init_threading(u_int max_threads) #ifdef XT_TRACK_CONNECTIONS if (xt_thr_maximum_threads > XT_TRACK_MAX_CONNS) { xt_log_error(XT_NS_CONTEXT, XT_LOG_FATAL, XT_ERR_TOO_MANY_THREADS, 0, - "XT_TRACK_CONNECTIONS is enabled and xt_thr_maximum_threads > XT_TRACK_MAX_CONNS"); + "XT_TRACK_CONNECTIONS (debugging aid) is enabled and xt_thr_maximum_threads > XT_TRACK_MAX_CONNS. To continue restart with a smaller value for --max-connections"); goto failed; } #endif diff --git a/storage/pbxt/src/xactlog_xt.cc b/storage/pbxt/src/xactlog_xt.cc index addc14ff5d8..69059046067 100644 --- a/storage/pbxt/src/xactlog_xt.cc +++ b/storage/pbxt/src/xactlog_xt.cc @@ -2117,7 +2117,7 @@ xtBool XTDatabaseLog::xlog_seq_next(XTXactSeqReadPtr seq, XTXactLogBufferDPtr *r size_t rec_offset; size_t max_rec_len; size_t size; - u_int check_size = 1; + u_int check_size __attribute__ ((unused))= 1; /* Go to the next record (xseq_record_len must be initialized * to 0 for this to work. @@ -2629,7 +2629,7 @@ static void *xlog_wr_run_thread(XTThreadPtr self) { XTDatabaseHPtr db = (XTDatabaseHPtr) self->t_data; int count; - void *mysql_thread; + void *mysql_thread __attribute__ ((unused)); mysql_thread = myxt_create_thread(); diff --git a/storage/sphinx/ha_sphinx.cc b/storage/sphinx/ha_sphinx.cc index ce94b675af4..8abff2003be 100644 --- a/storage/sphinx/ha_sphinx.cc +++ b/storage/sphinx/ha_sphinx.cc @@ -705,7 +705,7 @@ static int sphinx_done_func ( void * ) pthread_mutex_destroy ( &sphinx_mutex ); } - SPH_RET(0); + SPH_RET(error); } diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index f6ef44e5b5c..20876e73da0 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -3598,7 +3598,7 @@ static void btr_record_not_null_field_in_rec( /*=============================*/ - rec_t* rec, /*!< in: physical record */ + rec_t* rec __attribute__ ((unused)),/*!< in: physical record */ ulint n_unique, /*!< in: dict_index_get_n_unique(index), number of columns uniquely determine an index entry */ @@ -3618,9 +3618,8 @@ btr_record_not_null_field_in_rec( for (i = 0; i < n_unique; i++) { ulint rec_len; - byte* field; - field = rec_get_nth_field(rec, offsets, i, &rec_len); + rec_get_nth_field_offs(offsets, i, &rec_len); if (rec_len != UNIV_SQL_NULL) { n_not_null[i]++; diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 1ad540f47ab..e1b29fa02b0 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -484,10 +484,12 @@ Looks for column n in an index. ULINT_UNDEFINED if not contained */ UNIV_INTERN ulint -dict_index_get_nth_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint n) /*!< in: column number */ +dict_index_get_nth_col_or_prefix_pos( +/*=================================*/ + const dict_index_t* index, /*!< in: index */ + ulint n, /*!< in: column number */ + ibool inc_prefix) /*!< in: TRUE=consider + column prefixes too */ { const dict_field_t* field; const dict_col_t* col; @@ -509,7 +511,8 @@ dict_index_get_nth_col_pos( for (pos = 0; pos < n_fields; pos++) { field = dict_index_get_nth_field(index, pos); - if (col == field->col && field->prefix_len == 0) { + if (col == field->col + && (inc_prefix || field->prefix_len == 0)) { return(pos); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 7cf212d28ae..e0a4f77ddd6 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -122,11 +122,6 @@ static mysql_cond_t commit_cond; static mysql_mutex_t commit_cond_m; static bool innodb_inited = 0; -C_MODE_START -static xtradb_icp_result_t index_cond_func_innodb(void *arg); -C_MODE_END - - #define INSIDE_HA_INNOBASE_CC @@ -2305,14 +2300,24 @@ trx_is_strict( /**************************************************************//** Resets some fields of a prebuilt struct. The template is used in fast retrieval of just those column values MySQL needs in its processing. */ -static void -reset_template( -/*===========*/ - row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ +inline +ha_innobase::reset_template(void) +/*=============================*/ { + ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); + ut_ad(prebuilt->magic_n2 == prebuilt->magic_n); + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; + /* Reset index condition pushdown state. */ + if (prebuilt->idx_cond) { + prebuilt->idx_cond = NULL; + prebuilt->idx_cond_n_cols = 0; + /* Invalidate prebuilt->mysql_template + in ha_innobase::write_row(). */ + prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE; + } } /*****************************************************************//** @@ -2367,7 +2372,7 @@ ha_innobase::init_table_handle_for_HANDLER(void) we???? */ prebuilt->used_in_HANDLER = TRUE; - reset_template(prebuilt); + reset_template(); } /*********************************************************************//** @@ -4525,8 +4530,8 @@ static inline uint get_field_offset( /*=============*/ - TABLE* table, /*!< in: MySQL table object */ - Field* field) /*!< in: MySQL field object */ + const TABLE* table, /*!< in: MySQL table object */ + const Field* field) /*!< in: MySQL field object */ { return((uint) (field->ptr - table->record[0])); } @@ -5088,44 +5093,170 @@ ha_innobase::store_key_val_for_row( } /**************************************************************//** +Determines if a field is needed in a prebuilt struct 'template'. +@return field to use, or NULL if the field is not needed */ +static +const Field* +build_template_needs_field( +/*=======================*/ + ibool index_contains, /*!< in: + dict_index_contains_col_or_prefix( + index, i) */ + ibool read_just_key, /*!< in: TRUE when MySQL calls + ha_innobase::extra with the + argument HA_EXTRA_KEYREAD; it is enough + to read just columns defined in + the index (i.e., no read of the + clustered index record necessary) */ + ibool fetch_all_in_key, + /*!< in: true=fetch all fields in + the index */ + ibool fetch_primary_key_cols, + /*!< in: true=fetch the + primary key columns */ + dict_index_t* index, /*!< in: InnoDB index to use */ + const TABLE* table, /*!< in: MySQL table object */ + ulint i, /*!< in: field index in InnoDB table */ + ulint sql_idx) /*!< in: field index in SQL table */ +{ + const Field* field = table->field[sql_idx]; + + ut_ad(index_contains == dict_index_contains_col_or_prefix(index, i)); + + if (!index_contains) { + if (read_just_key) { + /* If this is a 'key read', we do not need + columns that are not in the key */ + + return(NULL); + } + } else if (fetch_all_in_key) { + /* This field is needed in the query */ + + return(field); + } + + if (bitmap_is_set(table->read_set, sql_idx) + || bitmap_is_set(table->write_set, sql_idx)) { + /* This field is needed in the query */ + + return(field); + } + + if (fetch_primary_key_cols + && dict_table_col_in_clustered_key(index->table, i)) { + /* This field is needed in the query */ + + return(field); + } + + /* This field is not needed in the query, skip it */ + + return(NULL); +} + +/**************************************************************//** +Adds a field is to a prebuilt struct 'template'. +@return the field template */ +static +mysql_row_templ_t* +build_template_field( +/*=================*/ + row_prebuilt_t* prebuilt, /*!< in/out: template */ + dict_index_t* clust_index, /*!< in: InnoDB clustered index */ + dict_index_t* index, /*!< in: InnoDB index to use */ + TABLE* table, /*!< in: MySQL table object */ + const Field* field, /*!< in: field in MySQL table */ + ulint i) /*!< in: field index in InnoDB table */ +{ + mysql_row_templ_t* templ; + const dict_col_t* col; + + //ut_ad(field == table->field[i]); + ut_ad(clust_index->table == index->table); + + col = dict_table_get_nth_col(index->table, i); + + templ = prebuilt->mysql_template + prebuilt->n_template++; + UNIV_MEM_INVALID(templ, sizeof *templ); + templ->col_no = i; + templ->clust_rec_field_no = dict_col_get_clust_pos(col, clust_index); + ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED); + + if (dict_index_is_clust(index)) { + templ->rec_field_no = templ->clust_rec_field_no; + } else { + templ->rec_field_no = dict_index_get_nth_col_pos(index, i); + } + + if (field->null_ptr) { + templ->mysql_null_byte_offset = + (ulint) ((char*) field->null_ptr + - (char*) table->record[0]); + + templ->mysql_null_bit_mask = (ulint) field->null_bit; + } else { + templ->mysql_null_bit_mask = 0; + } + + templ->mysql_col_offset = (ulint) get_field_offset(table, field); + + templ->mysql_col_len = (ulint) field->pack_length(); + templ->type = col->mtype; + templ->mysql_type = (ulint)field->type(); + + if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { + templ->mysql_length_bytes = (ulint) + (((Field_varstring*)field)->length_bytes); + } + + templ->charset = dtype_get_charset_coll(col->prtype); + templ->mbminlen = DATA_MBMINLEN(col->mbminmaxlen); + templ->mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen); + templ->is_unsigned = col->prtype & DATA_UNSIGNED; + + if (!dict_index_is_clust(index) + && templ->rec_field_no == ULINT_UNDEFINED) { + prebuilt->need_to_access_clustered = TRUE; + } + + if (prebuilt->mysql_prefix_len < templ->mysql_col_offset + + templ->mysql_col_len) { + prebuilt->mysql_prefix_len = templ->mysql_col_offset + + templ->mysql_col_len; + } + + if (templ->type == DATA_BLOB) { + prebuilt->templ_contains_blob = TRUE; + } + + return(templ); +} + +/**************************************************************//** Builds a 'template' to the prebuilt struct. The template is used in fast retrieval of just those column values MySQL needs in its processing. */ -static +UNIV_INTERN void -build_template( -/*===========*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ - THD* thd, /*!< in: current user thread, used - only if templ_type is - ROW_MYSQL_REC_FIELDS */ - TABLE* table, /* in: MySQL table */ - ha_innobase* file, /* in: ha_innobase handler */ - uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or - ROW_MYSQL_REC_FIELDS */ +ha_innobase::build_template( +/*========================*/ + bool whole_row) /*!< in: true=ROW_MYSQL_WHOLE_ROW, + false=ROW_MYSQL_REC_FIELDS */ { dict_index_t* index; dict_index_t* clust_index; - mysql_row_templ_t* templ; - Field* field; - ulint n_fields, n_stored_fields; - ulint n_requested_fields = 0; + ulint n_stored_fields; ibool fetch_all_in_key = FALSE; ibool fetch_primary_key_cols = FALSE; - ulint sql_idx, innodb_idx=0; - /* byte offset of the end of last requested column */ - ulint mysql_prefix_len = 0; - ibool do_idx_cond_push= FALSE; - ibool need_second_pass= FALSE; - + ulint i, sql_idx; + if (prebuilt->select_lock_type == LOCK_X) { /* We always retrieve the whole clustered index record if we use exclusive row level locks, for example, if the read is done in an UPDATE statement. */ - templ_type = ROW_MYSQL_WHOLE_ROW; - } - - if (templ_type == ROW_MYSQL_REC_FIELDS) { + whole_row = true; + } else if (!whole_row) { if (prebuilt->hint_need_to_fetch_extra_cols == ROW_RETRIEVE_ALL_COLS) { @@ -5142,7 +5273,7 @@ build_template( fetch_all_in_key = TRUE; } else { - templ_type = ROW_MYSQL_WHOLE_ROW; + whole_row = true; } } else if (prebuilt->hint_need_to_fetch_extra_cols == ROW_RETRIEVE_PRIMARY_KEY) { @@ -5159,21 +5290,13 @@ build_template( clust_index = dict_table_get_first_index(prebuilt->table); - if (templ_type == ROW_MYSQL_REC_FIELDS) { - index = prebuilt->index; - } else { - index = clust_index; - } + index = whole_row ? clust_index : prebuilt->index; - if (index == clust_index) { - prebuilt->need_to_access_clustered = TRUE; - } else { - prebuilt->need_to_access_clustered = FALSE; - /* Below we check column by column if we need to access - the clustered index */ - } + prebuilt->need_to_access_clustered = (index == clust_index); + + /* Below we check column by column if we need to access + the clustered index. */ - n_fields = (ulint)table->s->fields; /* number of columns */ n_stored_fields= (ulint)table->s->stored_fields; /* number of stored columns */ if (!prebuilt->mysql_template) { @@ -5181,160 +5304,206 @@ build_template( mem_alloc(n_stored_fields * sizeof(mysql_row_templ_t)); } - prebuilt->template_type = templ_type; + prebuilt->template_type = whole_row + ? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS; prebuilt->null_bitmap_len = table->s->null_bytes; + /* Prepare to build prebuilt->mysql_template[]. */ prebuilt->templ_contains_blob = FALSE; + prebuilt->mysql_prefix_len = 0; + prebuilt->n_template = 0; + prebuilt->idx_cond_n_cols = 0; + + /* Note that in InnoDB, i is the column number in the table. + MySQL calls columns 'fields'. */ + + if (active_index != MAX_KEY && active_index == pushed_idx_cond_keyno) { + /* Push down an index condition or an end_range check. */ + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { + + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } + + const ibool index_contains + = dict_index_contains_col_or_prefix(index, i); + + /* Test if an end_range or an index condition + refers to the field. Note that "index" and + "index_contains" may refer to the clustered index. + Index condition pushdown is relative to prebuilt->index + (the index that is being looked up first). */ + + /* When join_read_always_key() invokes this + code via handler::ha_index_init() and + ha_innobase::index_init(), end_range is not + yet initialized. Because of that, we must + always check for index_contains, instead of + the subset + field->part_of_key.is_set(active_index) + which would be acceptable if end_range==NULL. */ + if (index == prebuilt->index + ? index_contains + : dict_index_contains_col_or_prefix( + prebuilt->index, i)) { + /* Needed in ICP */ + const Field* field; + mysql_row_templ_t* templ; + + if (whole_row) { + field = table->field[sql_idx]; + } else { + field = build_template_needs_field( + index_contains, + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i, sql_idx); + if (!field) { + continue; + } + } - - /* - Setup index condition pushdown (note: we don't need to check if - this is a scan on primary key as that is checked in idx_cond_push) - */ - if (file->active_index == file->pushed_idx_cond_keyno && - file->active_index != MAX_KEY && - templ_type == ROW_MYSQL_REC_FIELDS) - do_idx_cond_push= need_second_pass= TRUE; - - /* Note that in InnoDB, i is the column number. MySQL calls columns - 'fields'. */ - for (sql_idx = 0; sql_idx < n_fields; sql_idx++) { - const dict_col_t* col = &index->table->cols[innodb_idx]; - templ = prebuilt->mysql_template + n_requested_fields; - field = table->field[sql_idx]; - if (!field->stored_in_db) - goto skip_field; - - if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) { - /* Decide which columns we should fetch - and which we can skip. */ - register const ibool index_contains_field = - dict_index_contains_col_or_prefix(index, innodb_idx); - register const ibool index_covers_field = - field->part_of_key.is_set(file->active_index); - - if (!index_contains_field && prebuilt->read_just_key) { - /* If this is a 'key read', we do not need - columns that are not in the key */ - - goto skip_field; - } - - if (index_contains_field && fetch_all_in_key) { - /* This field is needed in the query */ - - goto include_field; - } - - if (bitmap_is_set(table->read_set, sql_idx) || - bitmap_is_set(table->write_set, sql_idx)) { - /* This field is needed in the query */ - - goto include_field; - } + templ = build_template_field( + prebuilt, clust_index, index, + table, field, i); + prebuilt->idx_cond_n_cols++; + ut_ad(prebuilt->idx_cond_n_cols + == prebuilt->n_template); + + if (index == prebuilt->index) { + templ->icp_rec_field_no + = templ->rec_field_no; + } else { + templ->icp_rec_field_no + = dict_index_get_nth_col_pos( + prebuilt->index, i); + } - if (fetch_primary_key_cols - && dict_table_col_in_clustered_key( - index->table, innodb_idx)) { - /* This field is needed in the query */ + if (dict_index_is_clust(prebuilt->index)) { + ut_ad(templ->icp_rec_field_no + != ULINT_UNDEFINED); + /* If the primary key includes + a column prefix, use it in + index condition pushdown, + because the condition is + evaluated before fetching any + off-page (externally stored) + columns. */ + if (templ->icp_rec_field_no + < prebuilt->index->n_uniq) { + /* This is a key column; + all set. */ + continue; + } + } else if (templ->icp_rec_field_no + != ULINT_UNDEFINED) { + continue; + } - goto include_field; + /* This is a column prefix index. + The column prefix can be used in + an end_range comparison. */ + + templ->icp_rec_field_no + = dict_index_get_nth_col_or_prefix_pos( + prebuilt->index, i, TRUE); + ut_ad(templ->icp_rec_field_no + != ULINT_UNDEFINED); + + /* Index condition pushdown can be used on + all columns of a secondary index, and on + the PRIMARY KEY columns. */ + /* TODO: enable this assertion + (but first ensure that end_range is + valid here and use an accurate condition + for end_range) + ut_ad(!dict_index_is_clust(prebuilt->index) + || templ->rec_field_no + < prebuilt->index->n_uniq); + */ } - - /* This field is not needed in the query, skip it */ - - goto skip_field; -include_field: - if (do_idx_cond_push && - ((need_second_pass && !index_covers_field) || - (!need_second_pass && index_covers_field))) - goto skip_field; } - n_requested_fields++; - templ->col_no = innodb_idx; - templ->clust_rec_field_no = dict_col_get_clust_pos( - col, clust_index); - ut_ad(templ->clust_rec_field_no != ULINT_UNDEFINED); + ut_ad(prebuilt->idx_cond_n_cols > 0); + ut_ad(prebuilt->idx_cond_n_cols == prebuilt->n_template); + + /* Include the fields that are not needed in index condition + pushdown. */ + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { + + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } + + const ibool index_contains + = dict_index_contains_col_or_prefix(index, i); + + if (index == prebuilt->index + ? !index_contains + : !dict_index_contains_col_or_prefix( + prebuilt->index, i)) { + /* Not needed in ICP */ + const Field* field; + + if (whole_row) { + field = table->field[sql_idx]; + } else { + field = build_template_needs_field( + index_contains, + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i, sql_idx); + if (!field) { + continue; + } + } - if (index == clust_index) { - templ->rec_field_no = templ->clust_rec_field_no; - } else { - templ->rec_field_no = dict_index_get_nth_col_pos( - index, innodb_idx); - if (templ->rec_field_no == ULINT_UNDEFINED) { - prebuilt->need_to_access_clustered = TRUE; + build_template_field(prebuilt, + clust_index, index, + table, field, i); } } - if (field->null_ptr) { - templ->mysql_null_byte_offset = - (ulint) ((char*) field->null_ptr - - (char*) table->record[0]); - - templ->mysql_null_bit_mask = (ulint) field->null_bit; - } else { - templ->mysql_null_bit_mask = 0; - } + prebuilt->idx_cond = this; + } else { + /* No index condition pushdown */ + prebuilt->idx_cond = NULL; - templ->mysql_col_offset = (ulint) - get_field_offset(table, field); + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { + const Field* field; - templ->mysql_col_len = (ulint) field->pack_length(); - if (mysql_prefix_len < templ->mysql_col_offset - + templ->mysql_col_len) { - mysql_prefix_len = templ->mysql_col_offset - + templ->mysql_col_len; - } - templ->type = col->mtype; - templ->mysql_type = (ulint)field->type(); + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } - if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { - templ->mysql_length_bytes = (ulint) - (((Field_varstring*)field)->length_bytes); - } + if (whole_row) { + field = table->field[sql_idx]; + } else { + field = build_template_needs_field( + dict_index_contains_col_or_prefix( + index, i), + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i, sql_idx); + if (!field) { + continue; + } + } - templ->charset = dtype_get_charset_coll(col->prtype); - templ->mbminlen = dict_col_get_mbminlen(col); - templ->mbmaxlen = dict_col_get_mbmaxlen(col); - templ->is_unsigned = col->prtype & DATA_UNSIGNED; - if (templ->type == DATA_BLOB) { - prebuilt->templ_contains_blob = TRUE; - } -skip_field: - if (need_second_pass && (sql_idx+1 == n_fields)) - { - prebuilt->n_index_fields= n_requested_fields; - need_second_pass= FALSE; - sql_idx= (~(ulint)0); /* to start from 0 */ - innodb_idx= (~(ulint)0); /* to start from 0 */ ///psergey-merge-merge-last-change + build_template_field(prebuilt, clust_index, index, + table, field, i); } - if (field->stored_in_db) { - innodb_idx++; - } - } - - prebuilt->n_template = n_requested_fields; - prebuilt->mysql_prefix_len = mysql_prefix_len; - - if (do_idx_cond_push) - { - prebuilt->idx_cond_func= index_cond_func_innodb; - prebuilt->idx_cond_func_arg= file; - } - else - { - prebuilt->idx_cond_func= NULL; - prebuilt->n_index_fields= n_requested_fields; } if (index != clust_index && prebuilt->need_to_access_clustered) { /* Change rec_field_no's to correspond to the clustered index record */ - for (ulint i = do_idx_cond_push? prebuilt->n_index_fields : 0; - i < n_requested_fields; i++) { - templ = prebuilt->mysql_template + i; - + for (i = 0; i < prebuilt->n_template; i++) { + mysql_row_templ_t* templ + = &prebuilt->mysql_template[i]; templ->rec_field_no = templ->clust_rec_field_no; } } @@ -5597,7 +5766,7 @@ no_commit: /* Build the template used in converting quickly between the two database formats */ - build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW); + build_template(true); } innodb_srv_conc_enter_innodb(prebuilt->trx); @@ -6298,8 +6467,7 @@ ha_innobase::index_read( necessarily prebuilt->index, but can also be the clustered index */ if (prebuilt->sql_stat_start) { - build_template(prebuilt, user_thd, table, this, - ROW_MYSQL_REC_FIELDS); + build_template(false); } if (key_ptr) { @@ -6514,7 +6682,7 @@ ha_innobase::change_active_index( the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary copying. Starting from MySQL-4.1 we use a more efficient flag here. */ - build_template(prebuilt, user_thd, table, this, ROW_MYSQL_REC_FIELDS); + build_template(false); DBUG_RETURN(0); } @@ -7439,7 +7607,7 @@ ha_innobase::create( if (srv_file_per_table && !mysqld_embedded - && (!create_info->options & HA_LEX_CREATE_TMP_TABLE)) { + && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) { if ((name[1] == ':') || (name[0] == '\\' && name[1] == '\\')) { @@ -8908,7 +9076,7 @@ ha_innobase::check( /* Build the template; we will use a dummy template in index scans done in checking */ - build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW); + build_template(true); } if (prebuilt->table->ibd_file_missing) { @@ -9463,12 +9631,7 @@ ha_innobase::extra( } break; case HA_EXTRA_RESET_STATE: - reset_template(prebuilt); - /* Reset index condition pushdown state */ - pushed_idx_cond= FALSE; - pushed_idx_cond_keyno= MAX_KEY; - prebuilt->idx_cond_func= NULL; - in_range_check_pushed_down= FALSE; + reset_template(); break; case HA_EXTRA_NO_KEYREAD: prebuilt->read_just_key = 0; @@ -9514,14 +9677,8 @@ ha_innobase::reset() row_mysql_prebuilt_free_blob_heap(prebuilt); } - reset_template(prebuilt); - - /* Reset index condition pushdown state */ - pushed_idx_cond_keyno= MAX_KEY; - pushed_idx_cond= NULL; - in_range_check_pushed_down= FALSE; + reset_template(); ds_mrr.dsmrr_close(); - prebuilt->idx_cond_func= NULL; /* TODO: This should really be reset in reset_template() but for now it's safer to do it explicitly here. */ @@ -9571,7 +9728,7 @@ ha_innobase::start_stmt( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (!prebuilt->mysql_has_locked) { /* This handle is for a temporary table created inside @@ -9679,7 +9836,7 @@ ha_innobase::external_lock( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (lock_type == F_WRLCK) { @@ -9855,7 +10012,7 @@ ha_innobase::transactional_table_lock( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (lock_type == F_WRLCK) { prebuilt->select_lock_type = LOCK_X; @@ -12637,39 +12794,47 @@ bool ha_innobase::is_thd_killed() * Index Condition Pushdown interface implementation */ -C_MODE_START - -/* - Index condition check function to be called from within Innobase. - See note on ICP_RESULT for return values description. -*/ - -static xtradb_icp_result_t index_cond_func_innodb(void *arg) +/*************************************************************//** +InnoDB index push-down condition check +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +extern "C" UNIV_INTERN +enum icp_result +innobase_index_cond( +/*================*/ + void* file) /*!< in/out: pointer to ha_innobase */ { - ha_innobase *h= (ha_innobase*)arg; + ha_innobase *h= (ha_innobase*) file; + if (h->is_thd_killed()) - return XTRADB_ICP_ABORTED_BY_USER; + return ICP_ABORTED_BY_USER; if (h->end_range) { if (h->compare_key2(h->end_range) > 0) - return XTRADB_ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */ + return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */ } - return h->pushed_idx_cond->val_int()? XTRADB_ICP_MATCH : XTRADB_ICP_NO_MATCH; + return h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH; } -C_MODE_END - - -Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg) -{ - if (keyno_arg != primary_key && prebuilt->select_lock_type != LOCK_X) - { - pushed_idx_cond_keyno= keyno_arg; - pushed_idx_cond= idx_cond_arg; - in_range_check_pushed_down= TRUE; - return NULL; /* Table handler will check the entire condition */ - } - return idx_cond_arg; /* Table handler will not make any checks */ +/** Attempt to push down an index condition. +* @param[in] keyno MySQL key number +* @param[in] idx_cond Index condition to be checked +* @return idx_cond if pushed; NULL if not pushed +*/ +UNIV_INTERN +class Item* +ha_innobase::idx_cond_push( + uint keyno, + class Item* idx_cond) +{ + DBUG_ENTER("ha_innobase::idx_cond_push"); + DBUG_ASSERT(keyno != MAX_KEY); + DBUG_ASSERT(idx_cond != NULL); + + pushed_idx_cond = idx_cond; + pushed_idx_cond_keyno = keyno; + in_range_check_pushed_down = TRUE; + /* Table handler will check the entire condition */ + DBUG_RETURN(NULL); } diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index f368d08f954..628ba536eb9 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -227,27 +227,81 @@ class ha_innobase: public handler bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); bool check_if_supported_virtual_columns(void) { return TRUE; } +private: + /** Builds a 'template' to the prebuilt struct. + + The template is used in fast retrieval of just those column + values MySQL needs in its processing. + @param whole_row true if access is needed to a whole row, + false if accessing individual fields is enough */ + void build_template(bool whole_row); + /** Resets a query execution 'template'. + @see build_template() */ + inline void reset_template(); + public: - /** - * Multi Range Read interface - */ - int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf); - int multi_range_read_next(range_id_t *range_info); - ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, - void *seq_init_param, - uint n_ranges, uint *bufsz, + /** @name Multi Range Read interface @{ */ + /** Initialize multi range read @see DsMrr_impl::dsmrr_init + * @param seq + * @param seq_init_param + * @param n_ranges + * @param mode + * @param buf + */ + int multi_range_read_init(RANGE_SEQ_IF* seq, + void* seq_init_param, + uint n_ranges, uint mode, + HANDLER_BUFFER *buf); + /** Process next multi range read @see DsMrr_impl::dsmrr_next + * @param range_info + */ + int multi_range_read_next(range_id_t *range_info); + /** Initialize multi range read and get information. + * @see ha_myisam::multi_range_read_info_const + * @see DsMrr_impl::dsmrr_info_const + * @param keyno + * @param seq + * @param seq_init_param + * @param n_ranges + * @param bufsz + * @param flags + * @param cost + */ + ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint *bufsz, + uint *flags, COST_VECT *cost); + /** Initialize multi range read and get information. + * @see DsMrr_impl::dsmrr_info + * @param keyno + * @param n_ranges + * @param keys + * @param key_parts + * @param bufsz + * @param flags + * @param cost + */ + ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, + uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost); - ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint key_parts, uint *bufsz, - uint *flags, COST_VECT *cost); - int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size); - DsMrr_impl ds_mrr; + int multi_range_read_explain_info(uint mrr_mode, + char *str, size_t size); + + /** Attempt to push down an index condition. + * @param[in] keyno MySQL key number + * @param[in] idx_cond Index condition to be checked + * @return idx_cond if pushed; NULL if not pushed + */ + class Item* idx_cond_push(uint keyno, class Item* idx_cond); + + /* An helper function for index_cond_func_innodb: */ + bool is_thd_killed(); - Item *idx_cond_push(uint keyno, Item* idx_cond); +private: + /** The multi range read session object */ + DsMrr_impl ds_mrr; - /* An helper function for index_cond_func_innodb: */ - bool is_thd_killed(); + /* @} */ }; /* Some accessor functions which the InnoDB plugin needs, but which diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index d99177e0330..28d4413b568 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -884,13 +884,25 @@ dict_index_get_nth_col_no( Looks for column n in an index. @return position in internal representation of the index; ULINT_UNDEFINED if not contained */ -UNIV_INTERN +UNIV_INLINE ulint dict_index_get_nth_col_pos( /*=======================*/ const dict_index_t* index, /*!< in: index */ ulint n); /*!< in: column number */ /********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INTERN +ulint +dict_index_get_nth_col_or_prefix_pos( +/*=================================*/ + const dict_index_t* index, /*!< in: index */ + ulint n, /*!< in: column number */ + ibool inc_prefix); /*!< in: TRUE=consider + column prefixes too */ +/********************************************************************//** Returns TRUE if the index contains a column or a prefix of that column. @return TRUE if contains the column or its prefix */ UNIV_INTERN diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index b03f5117295..4ec82220ffb 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -694,6 +694,20 @@ dict_index_get_nth_col_no( return(dict_col_get_no(dict_index_get_nth_col(index, pos))); } +/********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INLINE +ulint +dict_index_get_nth_col_pos( +/*=======================*/ + const dict_index_t* index, /*!< in: index */ + ulint n) /*!< in: column number */ +{ + return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE)); +} + #ifndef UNIV_HOTBACKUP /********************************************************************//** Returns the minimum data size of an index record. diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 02e7712df58..61ec644796b 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -256,6 +256,15 @@ innobase_get_at_most_n_mbchars( ulint data_len, /*!< in: length of the string in bytes */ const char* str); /*!< in: character string */ +/*************************************************************//** +InnoDB index push-down condition check +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +UNIV_INTERN +enum icp_result +innobase_index_cond( +/*================*/ + void* file) /*!< in/out: pointer to ha_innobase */ + __attribute__((nonnull, warn_unused_result)); /******************************************************************//** Returns true if the thread supports XA, global value of innodb_supports_xa if thd is NULL. diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index a75aea1d046..69e59ae120f 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -558,6 +558,10 @@ struct mysql_row_templ_struct { Innobase record in the clustered index; not defined if template_type is ROW_MYSQL_WHOLE_ROW */ + ulint icp_rec_field_no; /*!< field number of the column in an + Innobase record in the current index; + not defined unless + index condition pushdown is used */ ulint mysql_col_offset; /*!< offset of the column in the MySQL row format */ ulint mysql_col_len; /*!< length of the column in the MySQL @@ -596,16 +600,6 @@ struct mysql_row_templ_struct { #define ROW_PREBUILT_ALLOCATED 78540783 #define ROW_PREBUILT_FREED 26423527 - -typedef enum xtradb_icp_result { - XTRADB_ICP_ERROR=-1, - XTRADB_ICP_NO_MATCH=0, - XTRADB_ICP_MATCH=1, - XTRADB_ICP_OUT_OF_RANGE=2, - XTRADB_ICP_ABORTED_BY_USER=3, -} xtradb_icp_result_t; - -typedef xtradb_icp_result_t (*idx_cond_func_t)(void *param); /** A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; these are used to save CPU time. */ @@ -803,16 +797,15 @@ struct row_prebuilt_struct { store it here so that we can return it to MySQL */ /*----------------------*/ + void* idx_cond; /*!< In ICP, pointer to a ha_innobase, + passed to innobase_index_cond(). + NULL if index condition pushdown is + not used. */ + ulint idx_cond_n_cols;/*!< Number of fields in idx_cond_cols. + 0 if and only if idx_cond == NULL. */ + /*----------------------*/ ulint magic_n2; /*!< this should be the same as magic_n */ - /*----------------------*/ - idx_cond_func_t idx_cond_func; /* Index Condition Pushdown function, - or NULL if there is none set */ - void* idx_cond_func_arg;/* ICP function argument */ - ulint n_index_fields; /* Number of fields at the start of - mysql_template. Valid only when using - ICP. */ - /*----------------------*/ }; #define ROW_PREBUILT_FETCH_MAGIC_N 465765687 diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index ee8ee34ea49..8fe8bab5d9a 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -3915,7 +3915,7 @@ os_aio_simulated_wake_handler_thread( { os_aio_array_t* array; os_aio_slot_t* slot; - ulint segment; + ulint segment __attribute__ ((unused)); ulint n; ulint i; @@ -4655,7 +4655,7 @@ os_aio_simulated_handle( ulint* space_id) { os_aio_array_t* array; - ulint segment; + ulint segment __attribute__ ((unused)); os_aio_slot_t* slot; os_aio_slot_t* slot2; os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE]; diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 1d91b1f4b53..ef5e30ea16d 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -58,6 +58,8 @@ Created 12/19/1997 Heikki Tuuri #include "buf0lru.h" #include "ha_prototypes.h" +#include "my_compare.h" /* enum icp_result */ + /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -2667,144 +2669,96 @@ row_sel_field_store_in_mysql_format( } /**************************************************************//** -Convert a row in the Innobase format to a row in the MySQL format. -Note that the template in prebuilt may advise us to copy only a few -columns to mysql_rec, other columns are left blank. All columns may not -be needed in the query. -@return TRUE on success, FALSE if not all columns could be retrieved */ +Convert a field in the Innobase format to a field in the MySQL format. */ static __attribute__((warn_unused_result)) ibool -row_sel_store_mysql_rec( -/*====================*/ - byte* mysql_rec, /*!< out: row in the MySQL format */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: Innobase record in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const ulint* offsets, /*!< in: array returned by - rec_get_offsets(rec) */ - ulint start_field_no, /* in: start from this field */ - ulint end_field_no) /* in: end at this field */ +row_sel_store_mysql_field( +/*======================*/ + byte* mysql_rec, /*!< out: record in the + MySQL format */ + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ + const rec_t* rec, /*!< in: InnoDB record; + must be protected by + a page latch */ + const ulint* offsets, /*!< in: array returned by + rec_get_offsets() */ + ulint field_no, /*!< in: templ->rec_field_no or + templ->clust_rec_field_no */ + const mysql_row_templ_t*templ) /*!< in: row template */ { - mem_heap_t* extern_field_heap = NULL; - mem_heap_t* heap; - ulint i; + const byte* data; + ulint len; - ut_ad(prebuilt->mysql_template); ut_ad(prebuilt->default_rec); + ut_ad(templ); + ut_ad(templ >= prebuilt->mysql_template); + ut_ad(templ < &prebuilt->mysql_template[prebuilt->n_template]); + ut_ad(field_no == templ->clust_rec_field_no + || field_no == templ->rec_field_no + || field_no == templ->icp_rec_field_no); ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - - if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { - mem_heap_free(prebuilt->blob_heap); - prebuilt->blob_heap = NULL; - } - for (i = start_field_no; i < end_field_no /* prebuilt->n_template */ ; i++) { + if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { - const mysql_row_templ_t*templ = prebuilt->mysql_template + i; - const byte* data; - ulint len; - ulint field_no; - - field_no = rec_clust - ? templ->clust_rec_field_no : templ->rec_field_no; + mem_heap_t* heap; + /* Copy an externally stored field to a temporary heap */ - if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { + ut_a(!prebuilt->trx->has_search_latch); + ut_ad(field_no == templ->clust_rec_field_no); - /* Copy an externally stored field to the temporary - heap */ + if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { + if (prebuilt->blob_heap == NULL) { + prebuilt->blob_heap = mem_heap_create( + UNIV_PAGE_SIZE); + } - ut_a(!prebuilt->trx->has_search_latch); + heap = prebuilt->blob_heap; + } else { + heap = mem_heap_create(UNIV_PAGE_SIZE); + } - if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } + /* NOTE: if we are retrieving a big BLOB, we may + already run out of memory in the next call, which + causes an assert */ - heap = prebuilt->blob_heap; - } else { - extern_field_heap - = mem_heap_create(UNIV_PAGE_SIZE); + data = btr_rec_copy_externally_stored_field( + rec, offsets, + dict_table_zip_size(prebuilt->table), + field_no, &len, heap); - heap = extern_field_heap; - } + if (UNIV_UNLIKELY(!data)) { + /* The externally stored field was not written + yet. This record should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED transactions. */ - /* NOTE: if we are retrieving a big BLOB, we may - already run out of memory in the next call, which - causes an assert */ - - data = btr_rec_copy_externally_stored_field( - rec, offsets, - dict_table_zip_size(prebuilt->table), - field_no, &len, heap); - - if (UNIV_UNLIKELY(!data)) { - /* The externally stored field - was not written yet. This - record should only be seen by - recv_recovery_rollback_active() - or any TRX_ISO_READ_UNCOMMITTED - transactions. */ - - if (extern_field_heap) { - mem_heap_free(extern_field_heap); - } - - return(FALSE); + if (heap != prebuilt->blob_heap) { + mem_heap_free(heap); } - ut_a(len != UNIV_SQL_NULL); - } else { - /* Field is stored in the row. */ - - data = rec_get_nth_field(rec, offsets, field_no, &len); - - if (UNIV_UNLIKELY(templ->type == DATA_BLOB) - && len != UNIV_SQL_NULL) { + ut_a(prebuilt->trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + return(FALSE); + } - /* It is a BLOB field locally stored in the - InnoDB record: we MUST copy its contents to - prebuilt->blob_heap here because later code - assumes all BLOB values have been copied to a - safe place. */ + ut_a(len != UNIV_SQL_NULL); - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } + row_sel_field_store_in_mysql_format( + mysql_rec + templ->mysql_col_offset, + templ, data, len); - data = memcpy(mem_heap_alloc( - prebuilt->blob_heap, len), - data, len); - } + if (heap != prebuilt->blob_heap) { + mem_heap_free(heap); } + } else { + /* Field is stored in the row. */ - if (len != UNIV_SQL_NULL) { - row_sel_field_store_in_mysql_format( - mysql_rec + templ->mysql_col_offset, - templ, data, len); - - /* Cleanup */ - if (extern_field_heap) { - mem_heap_free(extern_field_heap); - extern_field_heap = NULL; - } + data = rec_get_nth_field(rec, offsets, field_no, &len); - if (templ->mysql_null_bit_mask) { - /* It is a nullable column with a non-NULL - value */ - mysql_rec[templ->mysql_null_byte_offset] - &= ~(byte) templ->mysql_null_bit_mask; - } - } else { + if (len == UNIV_SQL_NULL) { /* MySQL assumes that the field for an SQL NULL value is set to the default value. */ + ut_ad(templ->mysql_null_bit_mask); UNIV_MEM_ASSERT_RW(prebuilt->default_rec + templ->mysql_col_offset, @@ -2815,6 +2769,85 @@ row_sel_store_mysql_rec( (const byte*) prebuilt->default_rec + templ->mysql_col_offset, templ->mysql_col_len); + return(TRUE); + } + + if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { + + /* It is a BLOB field locally stored in the + InnoDB record: we MUST copy its contents to + prebuilt->blob_heap here because + row_sel_field_store_in_mysql_format() stores a + pointer to the data, and the data passed to us + will be invalid as soon as the + mini-transaction is committed and the page + latch on the clustered index page is + released. */ + + if (prebuilt->blob_heap == NULL) { + prebuilt->blob_heap = mem_heap_create( + UNIV_PAGE_SIZE); + } + + data = mem_heap_dup(prebuilt->blob_heap, data, len); + } + + row_sel_field_store_in_mysql_format( + mysql_rec + templ->mysql_col_offset, + templ, data, len); + } + + ut_ad(len != UNIV_SQL_NULL); + + if (templ->mysql_null_bit_mask) { + /* It is a nullable column with a non-NULL + value */ + mysql_rec[templ->mysql_null_byte_offset] + &= ~(byte) templ->mysql_null_bit_mask; + } + + return(TRUE); +} + +/**************************************************************//** +Convert a row in the Innobase format to a row in the MySQL format. +Note that the template in prebuilt may advise us to copy only a few +columns to mysql_rec, other columns are left blank. All columns may not +be needed in the query. +@return TRUE on success, FALSE if not all columns could be retrieved */ +static __attribute__((warn_unused_result)) +ibool +row_sel_store_mysql_rec( +/*====================*/ + byte* mysql_rec, /*!< out: row in the MySQL format */ + row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ + const rec_t* rec, /*!< in: Innobase record in the index + which was described in prebuilt's + template, or in the clustered index; + must be protected by a page latch */ + ibool rec_clust, /*!< in: TRUE if rec is in the + clustered index instead of + prebuilt->index */ + const ulint* offsets) /*!< in: array returned by + rec_get_offsets(rec) */ +{ + ulint i; + + if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { + mem_heap_free(prebuilt->blob_heap); + prebuilt->blob_heap = NULL; + } + + for (i = 0; i < prebuilt->n_template; i++) { + const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; + + if (!row_sel_store_mysql_field(mysql_rec, prebuilt, + rec, offsets, + rec_clust + ? templ->clust_rec_field_no + : templ->rec_field_no, + templ)) { + return(FALSE); } } @@ -3185,31 +3218,19 @@ UNIV_INLINE __attribute__((warn_unused_result)) ibool row_sel_push_cache_row_for_mysql( /*=============================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: record to push, in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const ulint* offsets, /*!< in: rec_get_offsets(rec) */ - ulint start_field_no, /* in: start from this field */ - byte* remainder_buf) /* in: if start_field_no !=0, - where to take prev fields */ + byte* mysql_rec, /*!< in/out: MySQL record */ + row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ { - byte* buf; - ulint i; - ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); ut_a(!prebuilt->templ_contains_blob); - if (prebuilt->fetch_cache[0] == NULL) { + if (UNIV_UNLIKELY(prebuilt->fetch_cache[0] == NULL)) { + ulint i; /* Allocate memory for the fetch cache */ + ut_ad(prebuilt->n_fetch_cached == 0); for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) { + byte* buf; /* A user has reported memory corruption in these buffers in Linux. Put magic numbers there to help @@ -3229,46 +3250,14 @@ row_sel_push_cache_row_for_mysql( UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], prebuilt->mysql_row_len); - if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( - prebuilt->fetch_cache[ - prebuilt->n_fetch_cached], - prebuilt, - rec, - rec_clust, - offsets, - start_field_no, - prebuilt->n_template))) { - return(FALSE); - } - - if (start_field_no) { - - for (i=0; i < start_field_no; i++) { - register ulint offs; - mysql_row_templ_t* templ; - register byte * null_byte; - - templ = prebuilt->mysql_template + i; - - if (templ->mysql_null_bit_mask) { - offs = templ->mysql_null_byte_offset; - - null_byte= prebuilt->fetch_cache[ - prebuilt->n_fetch_cached]+offs; - (*null_byte)&= ~templ->mysql_null_bit_mask; - (*null_byte)|= (*(remainder_buf + offs) & - templ->mysql_null_bit_mask); - } + memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached], + mysql_rec, prebuilt->mysql_row_len); - offs = templ->mysql_col_offset; - memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached] - + offs, - remainder_buf + offs, - templ->mysql_col_len); - } + if (++prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE) { + return(FALSE); } - prebuilt->n_fetch_cached++; + row_sel_pop_cached_row_for_mysql(mysql_rec, prebuilt); return(TRUE); } @@ -3346,6 +3335,81 @@ row_sel_try_search_shortcut_for_mysql( return(SEL_FOUND); } +/*********************************************************************//** +Check a pushed-down index condition. +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +static +enum icp_result +row_search_idx_cond_check( +/*======================*/ + byte* mysql_rec, /*!< out: record + in MySQL format (invalid unless + prebuilt->idx_cond!=NULL and + we return ICP_MATCH) */ + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct + for the table handle */ + const rec_t* rec, /*!< in: InnoDB record */ + const ulint* offsets) /*!< in: rec_get_offsets() */ +{ + enum icp_result result; + ulint i; + + ut_ad(rec_offs_validate(rec, prebuilt->index, offsets)); + + if (!prebuilt->idx_cond) { + return(ICP_MATCH); + } + + /* Convert to MySQL format those fields that are needed for + evaluating the index condition. */ + + if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { + mem_heap_empty(prebuilt->blob_heap); + } + + for (i = 0; i < prebuilt->idx_cond_n_cols; i++) { + const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; + + if (!row_sel_store_mysql_field(mysql_rec, prebuilt, + rec, offsets, + templ->icp_rec_field_no, + templ)) { + return(ICP_NO_MATCH); + } + } + + /* We assume that the index conditions on + case-insensitive columns are case-insensitive. The + case of such columns may be wrong in a secondary + index, if the case of the column has been updated in + the past, or a record has been deleted and a record + inserted in a different case. */ + result = innobase_index_cond(prebuilt->idx_cond); + switch (result) { + case ICP_MATCH: + /* Convert the remaining fields to MySQL format. + If this is a secondary index record, we must defer + this until we have fetched the clustered index record. */ + if (!prebuilt->need_to_access_clustered + || dict_index_is_clust(prebuilt->index)) { + if (!row_sel_store_mysql_rec(mysql_rec, prebuilt, + rec, + FALSE, offsets)) { + ut_ad(dict_index_is_clust(prebuilt->index)); + result = ICP_NO_MATCH; + } + } + /* fall through */ + case ICP_NO_MATCH: + case ICP_OUT_OF_RANGE: + case ICP_ABORTED_BY_USER: + return(result); + default: ; + } + + ut_error; +} + /********************************************************************//** Searches for rows in the database. This is used in the interface to MySQL. This function opens a cursor, and also implements fetch next @@ -3410,10 +3474,8 @@ row_search_for_mysql( mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ibool some_fields_in_buffer; ibool table_lock_waited = FALSE; - ibool problematic_use = FALSE; - ibool get_clust_rec = 0; + ibool problematic_use = FALSE; rec_offs_init(offsets_); @@ -3715,10 +3777,24 @@ row_search_for_mysql( mtr_commit(&mtr). */ ut_ad(!rec_get_deleted_flag(rec, comp)); + if (prebuilt->idx_cond) { + switch (row_search_idx_cond_check( + buf, prebuilt, + rec, offsets)) { + case ICP_NO_MATCH: + case ICP_OUT_OF_RANGE: + case ICP_ABORTED_BY_USER: + goto shortcut_mismatch; + case ICP_MATCH: + goto shortcut_match; + default: ; + } + ut_error; + } + if (!row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE, - offsets, 0, - prebuilt->n_template)) { + offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such @@ -3729,13 +3805,12 @@ row_search_for_mysql( rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level - == TRX_ISO_READ_UNCOMMITTED); /* Proceed as in case SEL_RETRY. */ break; } + shortcut_match: mtr_commit(&mtr); /* ut_print_name(stderr, index->name); @@ -3747,6 +3822,7 @@ row_search_for_mysql( goto release_search_latch_if_needed; case SEL_EXHAUSTED: + shortcut_mismatch: mtr_commit(&mtr); /* ut_print_name(stderr, index->name); @@ -3849,8 +3925,9 @@ retry_check: if (!prebuilt->sql_stat_start) { /* No need to set an intention lock or assign a read view */ - if (trx->read_view == NULL - && prebuilt->select_lock_type == LOCK_NONE) { + if (UNIV_UNLIKELY + (trx->read_view == NULL + && prebuilt->select_lock_type == LOCK_NONE)) { fputs("InnoDB: Error: MySQL is trying to" " perform a consistent read\n" @@ -4321,6 +4398,16 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); + mutex_exit(&kernel_mutex); + + if (old_vers == NULL) { + /* The row was not yet committed */ + + goto next_rec; + } + + did_semi_consistent_read = TRUE; + rec = old_vers; } else { mutex_exit(&kernel_mutex); @@ -4331,19 +4418,7 @@ no_gap_lock: offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - err = DB_SUCCESS; - break; } - mutex_exit(&kernel_mutex); - - if (old_vers == NULL) { - /* The row was not yet committed */ - - goto next_rec; - } - - did_semi_consistent_read = TRUE; - rec = old_vers; break; default: @@ -4402,8 +4477,27 @@ no_gap_lock: if (!lock_sec_rec_cons_read_sees( rec, trx->read_view)) { - get_clust_rec = TRUE; - goto idx_cond_check; + /* We should look at the clustered index. + However, as this is a non-locking read, + we can skip the clustered index lookup if + the condition does not match the secondary + index entry. */ + switch (row_search_idx_cond_check( + buf, prebuilt, rec, offsets)) { + case ICP_NO_MATCH: + goto next_rec; + case ICP_OUT_OF_RANGE: + err = DB_RECORD_NOT_FOUND; + goto idx_cond_failed; + case ICP_ABORTED_BY_USER: + err = DB_SEARCH_ABORTED_BY_USER; + goto idx_cond_failed; + case ICP_MATCH: + goto requires_clust_rec; + default: ; + } + + ut_error; } } } @@ -4448,38 +4542,31 @@ no_gap_lock: goto next_rec; } - -idx_cond_check: - if (prebuilt->idx_cond_func) { - int res; - ibool ib_res; - ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - ib_res= row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE, - offsets, 0, prebuilt->n_index_fields); - /* - The above call will fail and return FALSE when requested to - store an "externally stored column" (afaiu, a blob). Index - Condition Pushdown is not supported for indexes with blob - columns, so we should never get this error. - */ - ut_ad(ib_res); - res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg); - if (res == XTRADB_ICP_NO_MATCH) - goto next_rec; - else if (res != XTRADB_ICP_MATCH) { - err= (res == XTRADB_ICP_ABORTED_BY_USER ? - DB_SEARCH_ABORTED_BY_USER : - DB_RECORD_NOT_FOUND); - goto idx_cond_failed; - } - /* res == XTRADB_ICP_MATCH */ - } + /* Check if the record matches the index condition. */ + switch (row_search_idx_cond_check(buf, prebuilt, rec, offsets)) { + case ICP_NO_MATCH: + if (did_semi_consistent_read) { + row_unlock_for_mysql(prebuilt, TRUE); + } + goto next_rec; + case ICP_ABORTED_BY_USER: + err = DB_SEARCH_ABORTED_BY_USER; + goto idx_cond_failed; + case ICP_OUT_OF_RANGE: + err = DB_RECORD_NOT_FOUND; + goto idx_cond_failed; + case ICP_MATCH: + break; + default: + ut_error; + } /* Get the clustered index record if needed, if we did not do the search using the clustered index. */ - if (get_clust_rec || (index != clust_index - && prebuilt->need_to_access_clustered)) { + if (index != clust_index && prebuilt->need_to_access_clustered) { + +requires_clust_rec: + ut_ad(index != clust_index); /* We use a 'goto' to the preceding label if a consistent read of a secondary index record requires us to look up old @@ -4543,6 +4630,19 @@ idx_cond_check: result_rec = clust_rec; ut_ad(rec_offs_validate(result_rec, clust_index, offsets)); + + if (prebuilt->idx_cond) { + /* Convert the remaining fields to + MySQL format. We were unable to do + this in row_search_idx_cond_check(), + because the condition is on the + secondary index and the requested + column is in the clustered index. */ + if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, + TRUE, offsets)) { + goto next_rec; + } + } } else { result_rec = rec; } @@ -4576,15 +4676,10 @@ idx_cond_check: are BLOBs in the fields to be fetched. In HANDLER we do not cache rows because there the cursor is a scrollable cursor. */ - some_fields_in_buffer = (index != clust_index - && prebuilt->idx_cond_func); - - if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec, - result_rec != rec, - offsets, - some_fields_in_buffer? - prebuilt->n_index_fields : 0, - buf)) { + + if (!prebuilt->idx_cond + && !row_sel_store_mysql_rec(buf, prebuilt, result_rec, + result_rec != rec, offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such records do not exist. Such records may only be @@ -4592,14 +4687,10 @@ idx_cond_check: level or when rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED); - } else if (prebuilt->n_fetch_cached - == MYSQL_FETCH_CACHE_SIZE) { - - goto got_row; + goto next_rec; + } else if (row_sel_push_cache_row_for_mysql(buf, prebuilt)) { + goto next_rec; } - - goto next_rec; } else { if (UNIV_UNLIKELY (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) { @@ -4620,16 +4711,11 @@ idx_cond_check: rec_offs_size(offsets)); mach_write_to_4(buf, rec_offs_extra_size(offsets) + 4); - } else { - /* Returning a row to MySQL */ - - if (!row_sel_store_mysql_rec(buf, prebuilt, - result_rec, - result_rec != rec, - offsets, - prebuilt->idx_cond_func? - prebuilt->n_index_fields: 0, - prebuilt->n_template)) { + } else if (!prebuilt->idx_cond) { + /* The record was not yet converted to MySQL format. */ + if (!row_sel_store_mysql_rec( + buf, prebuilt, + result_rec, result_rec != rec, offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such records do @@ -4638,8 +4724,6 @@ idx_cond_check: isolation level or when rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level - == TRX_ISO_READ_UNCOMMITTED); goto next_rec; } } @@ -4657,7 +4741,6 @@ idx_cond_check: /* From this point on, 'offsets' are invalid. */ -got_row: /* We have an optimization to save CPU time: if this is a consistent read on a unique condition on the clustered index, then we do not store the pcur position, because any fetch next or prev will anyway @@ -4681,7 +4764,6 @@ idx_cond_failed: next_rec: /* Reset the old and new "did semi-consistent read" flags. */ - get_clust_rec = FALSE; if (UNIV_UNLIKELY(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT)) { prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; @@ -4692,6 +4774,7 @@ next_rec: /*-------------------------------------------------------------*/ /* PHASE 5: Move the cursor to the next index record */ + /*TODO: with ICP, do this when switching pages, every N pages */ if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) { /* We must commit mtr if we are moving to the next non-clustered index record, because we could break the |