diff options
author | unknown <istruewing@chilla.local> | 2006-10-11 22:28:06 +0200 |
---|---|---|
committer | unknown <istruewing@chilla.local> | 2006-10-11 22:28:06 +0200 |
commit | 403cc15508e9129db0196e8b98b72a4ff1a4db03 (patch) | |
tree | 91edd5965a34465b41215daf0115af0615c4c1aa | |
parent | 6abdffe4f6c55ce53efa73167b15be1e33769673 (diff) | |
parent | 679b5848f3af7e1833385ef20d099dfde277b8c0 (diff) | |
download | mariadb-git-403cc15508e9129db0196e8b98b72a4ff1a4db03.tar.gz |
Merge bk-internal.mysql.com:/home/bk/mysql-4.1-engines
into chilla.local:/home/mydev/mysql-4.1-bug8283-one
include/my_sys.h:
Auto merged
-rw-r--r-- | include/my_sys.h | 22 | ||||
-rw-r--r-- | include/myisam.h | 2 | ||||
-rw-r--r-- | myisam/mi_check.c | 306 | ||||
-rw-r--r-- | myisam/mi_open.c | 5 | ||||
-rw-r--r-- | myisam/mi_packrec.c | 76 | ||||
-rw-r--r-- | myisam/myisamdef.h | 30 | ||||
-rw-r--r-- | myisam/sort.c | 126 | ||||
-rw-r--r-- | mysql-test/r/myisam.result | 126 | ||||
-rw-r--r-- | mysql-test/t/myisam.test | 91 | ||||
-rw-r--r-- | mysys/mf_iocache.c | 582 |
10 files changed, 1132 insertions, 234 deletions
diff --git a/include/my_sys.h b/include/my_sys.h index 46e09e8ddf4..4c9a7a7964c 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -325,12 +325,18 @@ typedef int (*IO_CACHE_CALLBACK)(struct st_io_cache*); #ifdef THREAD typedef struct st_io_cache_share { - /* to sync on reads into buffer */ - pthread_mutex_t mutex; - pthread_cond_t cond; - int count, total; - /* actual IO_CACHE that filled the buffer */ - struct st_io_cache *active; + pthread_mutex_t mutex; /* To sync on reads into buffer. */ + pthread_cond_t cond; /* To wait for signals. */ + pthread_cond_t cond_writer; /* For a synchronized writer. */ + /* Offset in file corresponding to the first byte of buffer. */ + my_off_t pos_in_file; + /* If a synchronized write cache is the source of the data. */ + struct st_io_cache *source_cache; + byte *buffer; /* The read buffer. */ + byte *read_end; /* Behind last valid byte of buffer. */ + int running_threads; /* threads not in lock. */ + int total_threads; /* threads sharing the cache. */ + int error; /* Last error. */ #ifdef NOT_YET_IMPLEMENTED /* whether the structure should be free'd */ my_bool alloced; @@ -672,8 +678,8 @@ extern void setup_io_cache(IO_CACHE* info); extern int _my_b_read(IO_CACHE *info,byte *Buffer,uint Count); #ifdef THREAD extern int _my_b_read_r(IO_CACHE *info,byte *Buffer,uint Count); -extern void init_io_cache_share(IO_CACHE *info, - IO_CACHE_SHARE *s, uint num_threads); +extern void init_io_cache_share(IO_CACHE *read_cache, IO_CACHE_SHARE *cshare, + IO_CACHE *write_cache, uint num_threads); extern void remove_io_thread(IO_CACHE *info); #endif extern int _my_b_seq_read(IO_CACHE *info,byte *Buffer,uint Count); diff --git a/include/myisam.h b/include/myisam.h index c2d3d99a414..0a808070748 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -345,7 +345,7 @@ typedef struct st_mi_check_param uint testflag, key_cache_block_size; uint8 language; my_bool using_global_keycache, opt_lock_memory, opt_follow_links; - my_bool retry_repair, force_sort, calc_checksum; + my_bool retry_repair, force_sort; char temp_filename[FN_REFLEN],*isam_file_name; MY_TMPDIR *tmpdir; int tmpfile_createflag; diff --git a/myisam/mi_check.c b/myisam/mi_check.c index 301becf9c62..b07c9904247 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -16,6 +16,31 @@ /* Describe, check and repair of MyISAM tables */ +/* + About checksum calculation. + + There are two types of checksums. Table checksum and row checksum. + + Row checksum is an additional byte at the end of dynamic length + records. It must be calculated if the table is configured for them. + Otherwise they must not be used. The variable + MYISAM_SHARE::calc_checksum determines if row checksums are used. + MI_INFO::checksum is used as temporary storage during row handling. + For parallel repair we must assure that only one thread can use this + variable. There is no problem on the write side as this is done by one + thread only. But when checking a record after read this could go + wrong. But since all threads read through a common read buffer, it is + sufficient if only one thread checks it. + + Table checksum is an eight byte value in the header of the index file. + It can be calculated even if row checksums are not used. The variable + MI_CHECK::glob_crc is calculated over all records. + MI_SORT_PARAM::calc_checksum determines if this should be done. This + variable is not part of MI_CHECK because it must be set per thread for + parallel repair. The global glob_crc must be changed by one thread + only. And it is sufficient to calculate the checksum once only. +*/ + #include "ftdefs.h" #include <m_ctype.h> #include <stdarg.h> @@ -41,8 +66,7 @@ static int chk_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, ha_checksum *key_checksum, uint level); static uint isam_key_length(MI_INFO *info,MI_KEYDEF *keyinfo); static ha_checksum calc_checksum(ha_rows count); -static int writekeys(MI_CHECK *param, MI_INFO *info,byte *buff, - my_off_t filepos); +static int writekeys(MI_SORT_PARAM *sort_param); static int sort_one_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, my_off_t pagepos, File new_file); static int sort_key_read(MI_SORT_PARAM *sort_param,void *key); @@ -1101,7 +1125,8 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) goto err; start_recpos=pos; splits++; - VOID(_mi_pack_get_block_info(info,&block_info, -1, start_recpos)); + VOID(_mi_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, -1, start_recpos)); pos=block_info.filepos+block_info.rec_len; if (block_info.rec_len < (uint) info->s->min_pack_length || block_info.rec_len > (uint) info->s->max_pack_length) @@ -1115,7 +1140,8 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) if (_mi_read_cache(¶m->read_cache,(byte*) info->rec_buff, block_info.filepos, block_info.rec_len, READING_NEXT)) goto err; - if (_mi_pack_rec_unpack(info,record,info->rec_buff,block_info.rec_len)) + if (_mi_pack_rec_unpack(info, &info->bit_buff, record, + info->rec_buff, block_info.rec_len)) { mi_check_print_error(param,"Found wrong record at %s", llstr(start_recpos,llbuff)); @@ -1399,7 +1425,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, info->state->empty=0; param->glob_crc=0; if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; + sort_param.calc_checksum= 1; info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); for (i=0 ; i < info->s->base.keys ; i++) @@ -1423,7 +1449,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, lock_memory(param); /* Everything is alloced */ while (!(error=sort_get_next_record(&sort_param))) { - if (writekeys(param,info,(byte*)sort_param.record,sort_param.filepos)) + if (writekeys(&sort_param)) { if (my_errno != HA_ERR_FOUND_DUPP_KEY) goto err; @@ -1568,11 +1594,13 @@ err: /* Uppate keyfile when doing repair */ -static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff, - my_off_t filepos) +static int writekeys(MI_SORT_PARAM *sort_param) { register uint i; - uchar *key; + uchar *key; + MI_INFO *info= sort_param->sort_info->info; + byte *buff= sort_param->record; + my_off_t filepos= sort_param->filepos; DBUG_ENTER("writekeys"); key=info->lastkey+info->s->base.max_key_length; @@ -1626,8 +1654,8 @@ static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff, } } /* Remove checksum that was added to glob_crc in sort_get_next_record */ - if (param->calc_checksum) - param->glob_crc-= info->checksum; + if (sort_param->calc_checksum) + sort_param->sort_info->param->glob_crc-= info->checksum; DBUG_PRINT("error",("errno: %d",my_errno)); DBUG_RETURN(-1); } /* writekeys */ @@ -2133,7 +2161,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, del=info->state->del; param->glob_crc=0; if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; + sort_param.calc_checksum= 1; rec_per_key_part= param->rec_per_key_part; for (sort_param.key=0 ; sort_param.key < share->base.keys ; @@ -2195,7 +2223,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, param->retry_repair=1; goto err; } - param->calc_checksum=0; /* No need to calc glob_crc */ + /* No need to calculate checksum again. */ + sort_param.calc_checksum= 0; /* Set for next loop */ sort_info.max_records= (ha_rows) info->state->records; @@ -2358,6 +2387,28 @@ err: Each key is handled by a separate thread. TODO: make a number of threads a parameter + In parallel repair we use one thread per index. There are two modes: + + Quick + + Only the indexes are rebuilt. All threads share a read buffer. + Every thread that needs fresh data in the buffer enters the shared + cache lock. The last thread joining the lock reads the buffer from + the data file and wakes all other threads. + + Non-quick + + The data file is rebuilt and all indexes are rebuilt to point to + the new record positions. One thread is the master thread. It + reads from the old data file and writes to the new data file. It + also creates one of the indexes. The other threads read from a + buffer which is filled by the master. If they need fresh data, + they enter the shared cache lock. If the masters write buffer is + full, it flushes it to the new data file and enters the shared + cache lock too. When all threads joined in the lock, the master + copies its write buffer to the read buffer for the other threads + and wakes them. + RESULT 0 ok <>0 Error @@ -2380,6 +2431,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, ulong *rec_per_key_part; HA_KEYSEG *keyseg; char llbuff[22]; + IO_CACHE new_data_cache; /* For non-quick repair. */ IO_CACHE_SHARE io_share; SORT_INFO sort_info; ulonglong key_map=share->state.key_map; @@ -2401,19 +2453,55 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; + /* + Quick repair (not touching data file, rebuilding indexes): + { + Read cache is (MI_CHECK *param)->read_cache using info->dfile. + } + + Non-quick repair (rebuilding data file and indexes): + { + Master thread: + + Read cache is (MI_CHECK *param)->read_cache using info->dfile. + Write cache is (MI_INFO *info)->rec_cache using new_file. + + Slave threads: + + Read cache is new_data_cache synced to master rec_cache. + + The final assignment of the filedescriptor for rec_cache is done + after the cache creation. + + Don't check file size on new_data_cache, as the resulting file size + is not known yet. + + As rec_cache and new_data_cache are synced, write_buffer_length is + used for the read cache 'new_data_cache'. Both start at the same + position 'new_header_length'. + } + */ + DBUG_PRINT("info", ("is quick repair: %d", rep_quick)); bzero((char*)&sort_info,sizeof(sort_info)); + /* Initialize pthread structures before goto err. */ + pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&sort_info.cond, 0); + if (!(sort_info.key_block= - alloc_key_blocks(param, - (uint) param->sort_key_blocks, - share->base.max_key_block_length)) - || init_io_cache(¶m->read_cache,info->dfile, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || - (! rep_quick && - init_io_cache(&info->rec_cache,info->dfile, - (uint) param->write_buffer_length, - WRITE_CACHE,new_header_length,1, - MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))) + alloc_key_blocks(param, (uint) param->sort_key_blocks, + share->base.max_key_block_length)) || + init_io_cache(¶m->read_cache, info->dfile, + (uint) param->read_buffer_length, + READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) || + (!rep_quick && + (init_io_cache(&info->rec_cache, info->dfile, + (uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) || + init_io_cache(&new_data_cache, -1, + (uint) param->write_buffer_length, + READ_CACHE, new_header_length, 1, + MYF(MY_WME | MY_DONT_CHECK_FILESIZE))))) goto err; sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; info->opt_flag|=WRITE_CACHE_USED; @@ -2504,8 +2592,6 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, del=info->state->del; param->glob_crc=0; - if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; if (!(sort_param=(MI_SORT_PARAM *) my_malloc((uint) share->base.keys * @@ -2555,6 +2641,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, sort_param[i].sort_info=&sort_info; sort_param[i].master=0; sort_param[i].fix_datafile=0; + sort_param[i].calc_checksum= 0; sort_param[i].filepos=new_header_length; sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length; @@ -2591,19 +2678,45 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, sort_info.total_keys=i; sort_param[0].master= 1; sort_param[0].fix_datafile= (my_bool)(! rep_quick); + sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM); sort_info.got_error=0; - pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); - pthread_cond_init(&sort_info.cond, 0); pthread_mutex_lock(&sort_info.mutex); - init_io_cache_share(¶m->read_cache, &io_share, i); + /* + Initialize the I/O cache share for use with the read caches and, in + case of non-quick repair, the write cache. When all threads join on + the cache lock, the writer copies the write cache contents to the + read caches. + */ + if (i > 1) + { + if (rep_quick) + init_io_cache_share(¶m->read_cache, &io_share, NULL, i); + else + init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i); + } + else + io_share.total_threads= 0; /* share not used */ + (void) pthread_attr_init(&thr_attr); (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); for (i=0 ; i < sort_info.total_keys ; i++) { - sort_param[i].read_cache=param->read_cache; + /* + Copy the properly initialized IO_CACHE structure so that every + thread has its own copy. In quick mode param->read_cache is shared + for use by all threads. In non-quick mode all threads but the + first copy the shared new_data_cache, which is synchronized to the + write cache of the first thread. The first thread copies + param->read_cache, which is not shared. + */ + sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache : + new_data_cache); + DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx", + i, (long) &sort_param[i].read_cache)); + /* two approaches: the same amount of memory for each thread or the memory for the same number of keys for each thread... @@ -2621,7 +2734,10 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, (void *) (sort_param+i))) { mi_check_print_error(param,"Cannot start a repair thread"); - remove_io_thread(¶m->read_cache); + /* Cleanup: Detach from the share. Avoid others to be blocked. */ + if (io_share.total_threads) + remove_io_thread(&sort_param[i].read_cache); + DBUG_PRINT("error", ("Cannot start a repair thread")); sort_info.got_error=1; } else @@ -2643,6 +2759,11 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, if (sort_param[0].fix_datafile) { + /* + Append some nuls to the end of a memory mapped file. Destroy the + write cache. The master thread did already detach from the share + by remove_io_thread() in sort.c:thr_find_all_keys(). + */ if (write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) goto err; if (param->testflag & T_SAFE_REPAIR) @@ -2658,8 +2779,14 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, sort_param->filepos; /* Only whole records */ share->state.version=(ulong) time((time_t*) 0); + + /* + Exchange the data file descriptor of the table, so that we use the + new file from now on. + */ my_close(info->dfile,MYF(0)); info->dfile=new_file; + share->data_file_type=sort_info.new_data_file_type; share->pack.header_length=(ulong) new_header_length; } @@ -2714,7 +2841,20 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, err: got_error|= flush_blocks(param, share->key_cache, share->kfile); + /* + Destroy the write cache. The master thread did already detach from + the share by remove_io_thread() or it was not yet started (if the + error happend before creating the thread). + */ VOID(end_io_cache(&info->rec_cache)); + /* + Destroy the new data cache in case of non-quick repair. All slave + threads did either detach from the share by remove_io_thread() + already or they were not yet started (if the error happend before + creating the threads). + */ + if (!rep_quick) + VOID(end_io_cache(&new_data_cache)); if (!got_error) { /* Replace the actual file with the temporary file */ @@ -2845,12 +2985,41 @@ static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key) } /* sort_ft_key_read */ - /* Read next record from file using parameters in sort_info */ - /* Return -1 if end of file, 0 if ok and > 0 if error */ +/* + Read next record from file using parameters in sort_info. + + SYNOPSIS + sort_get_next_record() + sort_param Information about and for the sort process + + NOTE + + Dynamic Records With Non-Quick Parallel Repair + + For non-quick parallel repair we use a synchronized read/write + cache. This means that one thread is the master who fixes the data + file by reading each record from the old data file and writing it + to the new data file. By doing this the records in the new data + file are written contiguously. Whenever the write buffer is full, + it is copied to the read buffer. The slaves read from the read + buffer, which is not associated with a file. Thus read_cache.file + is -1. When using _mi_read_cache(), the slaves must always set + flag to READING_NEXT so that the function never tries to read from + file. This is safe because the records are contiguous. There is no + need to read outside the cache. This condition is evaluated in the + variable 'parallel_flag' for quick reference. read_cache.file must + be >= 0 in every other case. + + RETURN + -1 end of file + 0 ok + > 0 error +*/ static int sort_get_next_record(MI_SORT_PARAM *sort_param) { int searching; + int parallel_flag; uint found_record,b_type,left_length; my_off_t pos; byte *to; @@ -2888,7 +3057,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength); if (*sort_param->record) { - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc+= (info->checksum= mi_static_checksum(info,sort_param->record)); DBUG_RETURN(0); @@ -2903,6 +3072,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) LINT_INIT(to); pos=sort_param->pos; searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); + parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; for (;;) { found_record=block_info.second_read= 0; @@ -2933,7 +3103,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) (byte*) block_info.header,pos, MI_BLOCK_INFO_HEADER_LENGTH, (! found_record ? READING_NEXT : 0) | - READING_HEADER)) + parallel_flag | READING_HEADER)) { if (found_record) { @@ -3110,9 +3280,31 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) llstr(sort_param->start_recpos,llbuff)); goto try_next; } - if (_mi_read_cache(&sort_param->read_cache,to,block_info.filepos, - block_info.data_len, - (found_record == 1 ? READING_NEXT : 0))) + /* + Copy information that is already read. Avoid accessing data + below the cache start. This could happen if the header + streched over the end of the previous buffer contents. + */ + { + uint header_len= (uint) (block_info.filepos - pos); + uint prefetch_len= (MI_BLOCK_INFO_HEADER_LENGTH - header_len); + + if (prefetch_len > block_info.data_len) + prefetch_len= block_info.data_len; + if (prefetch_len) + { + memcpy(to, block_info.header + header_len, prefetch_len); + block_info.filepos+= prefetch_len; + block_info.data_len-= prefetch_len; + left_length-= prefetch_len; + to+= prefetch_len; + } + } + if (block_info.data_len && + _mi_read_cache(&sort_param->read_cache,to,block_info.filepos, + block_info.data_len, + (found_record == 1 ? READING_NEXT : 0) | + parallel_flag)) { mi_check_print_info(param, "Read error for block at: %s (error: %d); Skipped", @@ -3142,13 +3334,14 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) { if (sort_param->read_cache.error < 0) DBUG_RETURN(1); - if (info->s->calc_checksum) - info->checksum=mi_checksum(info,sort_param->record); + if (sort_param->calc_checksum) + info->checksum= mi_checksum(info, sort_param->record); if ((param->testflag & (T_EXTEND | T_REP)) || searching) { if (_mi_rec_check(info, sort_param->record, sort_param->rec_buff, sort_param->find_length, (param->testflag & T_QUICK) && + sort_param->calc_checksum && test(info->s->calc_checksum))) { mi_check_print_info(param,"Found wrong packed record at %s", @@ -3156,7 +3349,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) goto try_next; } } - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc+= info->checksum; DBUG_RETURN(0); } @@ -3183,7 +3376,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) DBUG_RETURN(1); /* Something wrong with data */ } sort_param->start_recpos=sort_param->pos; - if (_mi_pack_get_block_info(info,&block_info,-1,sort_param->pos)) + if (_mi_pack_get_block_info(info, &sort_param->bit_buff, &block_info, + &sort_param->rec_buff, -1, sort_param->pos)) DBUG_RETURN(-1); if (!block_info.rec_len && sort_param->pos + MEMMAP_EXTRA_MARGIN == @@ -3207,15 +3401,14 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) llstr(sort_param->pos,llbuff)); continue; } - if (_mi_pack_rec_unpack(info,sort_param->record,sort_param->rec_buff, - block_info.rec_len)) + if (_mi_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record, + sort_param->rec_buff, block_info.rec_len)) { if (! searching) mi_check_print_info(param,"Found wrong record at %s", llstr(sort_param->pos,llbuff)); continue; } - info->checksum=mi_checksum(info,sort_param->record); if (!sort_param->fix_datafile) { sort_param->filepos=sort_param->pos; @@ -3225,8 +3418,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) sort_param->max_pos=(sort_param->pos=block_info.filepos+ block_info.rec_len); info->packed_length=block_info.rec_len; - if (param->calc_checksum) - param->glob_crc+= info->checksum; + if (sort_param->calc_checksum) + param->glob_crc+= (info->checksum= + mi_checksum(info, sort_param->record)); DBUG_RETURN(0); } } @@ -3234,7 +3428,20 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) } - /* Write record to new file */ +/* + Write record to new file. + + SYNOPSIS + sort_write_record() + sort_param Sort parameters. + + NOTE + This is only called by a master thread if parallel repair is used. + + RETURN + 0 OK + 1 Error +*/ int sort_write_record(MI_SORT_PARAM *sort_param) { @@ -3283,6 +3490,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param) } from=sort_info->buff+ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER); } + /* We can use info->checksum here as only one thread calls this. */ info->checksum=mi_checksum(info,sort_param->record); reclength=_mi_rec_pack(info,from,sort_param->record); flag=0; @@ -3692,7 +3900,7 @@ static int sort_delete_record(MI_SORT_PARAM *sort_param) DBUG_RETURN(1); } } - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); } error=flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info); diff --git a/myisam/mi_open.c b/myisam/mi_open.c index a5b303f86d4..4f298397615 100644 --- a/myisam/mi_open.c +++ b/myisam/mi_open.c @@ -201,7 +201,10 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) ((open_flags & HA_OPEN_ABORT_IF_CRASHED) && (my_disable_locking && share->state.open_count)))) { - DBUG_PRINT("error",("Table is marked as crashed")); + DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u " + "changed: %u open_count: %u !locking: %d", + open_flags, share->state.changed, + share->state.open_count, my_disable_locking)); my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ? HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); goto err; diff --git a/myisam/mi_packrec.c b/myisam/mi_packrec.c index 0edb3ac1d5d..feb8d33b015 100644 --- a/myisam/mi_packrec.c +++ b/myisam/mi_packrec.c @@ -101,7 +101,8 @@ static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count); static void fill_buffer(MI_BIT_BUFF *bit_buff); static uint max_bit(uint value); #ifdef HAVE_MMAP -static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info, +static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, + MI_BLOCK_INFO *info, byte **rec_buff_p, uchar *header); #endif @@ -436,13 +437,15 @@ int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf) DBUG_RETURN(-1); /* _search() didn't find record */ file=info->dfile; - if (_mi_pack_get_block_info(info, &block_info, file, filepos)) + if (_mi_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, file, filepos)) goto err; if (my_read(file,(byte*) info->rec_buff + block_info.offset , block_info.rec_len - block_info.offset, MYF(MY_NABP))) goto panic; info->update|= HA_STATE_AKTIV; - DBUG_RETURN(_mi_pack_rec_unpack(info,buf,info->rec_buff,block_info.rec_len)); + DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); panic: my_errno=HA_ERR_WRONG_IN_RECORD; err: @@ -451,8 +454,8 @@ err: -int _mi_pack_rec_unpack(register MI_INFO *info, register byte *to, byte *from, - ulong reclength) +int _mi_pack_rec_unpack(register MI_INFO *info, MI_BIT_BUFF *bit_buff, + register byte *to, byte *from, ulong reclength) { byte *end_field; reg3 MI_COLUMNDEF *end; @@ -460,18 +463,18 @@ int _mi_pack_rec_unpack(register MI_INFO *info, register byte *to, byte *from, MYISAM_SHARE *share=info->s; DBUG_ENTER("_mi_pack_rec_unpack"); - init_bit_buffer(&info->bit_buff, (uchar*) from,reclength); + init_bit_buffer(bit_buff, (uchar*) from, reclength); for (current_field=share->rec, end=current_field+share->base.fields ; current_field < end ; current_field++,to=end_field) { end_field=to+current_field->length; - (*current_field->unpack)(current_field,&info->bit_buff,(uchar*) to, + (*current_field->unpack)(current_field, bit_buff, (uchar*) to, (uchar*) end_field); } - if (! info->bit_buff.error && - info->bit_buff.pos - info->bit_buff.bits/8 == info->bit_buff.end) + if (!bit_buff->error && + bit_buff->pos - bit_buff->bits / 8 == bit_buff->end) DBUG_RETURN(0); info->update&= ~HA_STATE_AKTIV; DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD); @@ -985,13 +988,16 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf, if (info->opt_flag & READ_CACHE_USED) { - if (_mi_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, - share->pack.ref_length, skip_deleted_blocks)) + if (_mi_read_cache(&info->rec_cache, (byte*) block_info.header, + filepos, share->pack.ref_length, + skip_deleted_blocks ? READING_NEXT : 0)) goto err; - b_type=_mi_pack_get_block_info(info,&block_info,-1, filepos); + b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, -1, filepos); } else - b_type=_mi_pack_get_block_info(info,&block_info,info->dfile,filepos); + b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, info->dfile, filepos); if (b_type) goto err; /* Error code is already set */ #ifndef DBUG_OFF @@ -1004,9 +1010,9 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf, if (info->opt_flag & READ_CACHE_USED) { - if (_mi_read_cache(&info->rec_cache,(byte*) info->rec_buff, - block_info.filepos, block_info.rec_len, - skip_deleted_blocks)) + if (_mi_read_cache(&info->rec_cache, (byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, + skip_deleted_blocks ? READING_NEXT : 0)) goto err; } else @@ -1021,8 +1027,8 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf, info->nextpos=block_info.filepos+block_info.rec_len; info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; - DBUG_RETURN (_mi_pack_rec_unpack(info,buf,info->rec_buff, - block_info.rec_len)); + DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); err: DBUG_RETURN(my_errno); } @@ -1030,8 +1036,9 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf, /* Read and process header from a huff-record-file */ -uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BLOCK_INFO *info, File file, - my_off_t filepos) +uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, + MI_BLOCK_INFO *info, byte **rec_buff_p, + File file, my_off_t filepos) { uchar *header=info->header; uint head_length,ref_length; @@ -1056,17 +1063,17 @@ uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BLOCK_INFO *info, File file, head_length+= read_pack_length((uint) myisam->s->pack.version, header + head_length, &info->blob_len); if (!(mi_alloc_rec_buff(myisam,info->rec_len + info->blob_len, - &myisam->rec_buff))) + rec_buff_p))) return BLOCK_FATAL_ERROR; /* not enough memory */ - myisam->bit_buff.blob_pos=(uchar*) myisam->rec_buff+info->rec_len; - myisam->bit_buff.blob_end= myisam->bit_buff.blob_pos+info->blob_len; + bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len; + bit_buff->blob_end= bit_buff->blob_pos + info->blob_len; myisam->blob_length=info->blob_len; } info->filepos=filepos+head_length; if (file > 0) { info->offset=min(info->rec_len, ref_length - head_length); - memcpy(myisam->rec_buff, header+head_length, info->offset); + memcpy(*rec_buff_p, header + head_length, info->offset); } return 0; } @@ -1206,7 +1213,8 @@ void _mi_unmap_file(MI_INFO *info) } -static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info, +static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, + MI_BLOCK_INFO *info, byte **rec_buff_p, uchar *header) { header+= read_pack_length((uint) myisam->s->pack.version, header, @@ -1217,10 +1225,10 @@ static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info, &info->blob_len); /* mi_alloc_rec_buff sets my_errno on error */ if (!(mi_alloc_rec_buff(myisam, info->blob_len, - &myisam->rec_buff))) + rec_buff_p))) return 0; /* not enough memory */ - myisam->bit_buff.blob_pos=(uchar*) myisam->rec_buff; - myisam->bit_buff.blob_end= (uchar*) myisam->rec_buff + info->blob_len; + bit_buff->blob_pos= (uchar*) *rec_buff_p; + bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len; } return header; } @@ -1236,11 +1244,13 @@ static int _mi_read_mempack_record(MI_INFO *info, my_off_t filepos, byte *buf) if (filepos == HA_OFFSET_ERROR) DBUG_RETURN(-1); /* _search() didn't find record */ - if (!(pos= (byte*) _mi_mempack_get_block_info(info,&block_info, + if (!(pos= (byte*) _mi_mempack_get_block_info(info, &info->bit_buff, + &block_info, &info->rec_buff, (uchar*) share->file_map+ filepos))) DBUG_RETURN(-1); - DBUG_RETURN(_mi_pack_rec_unpack(info, buf, pos, block_info.rec_len)); + DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); } @@ -1260,7 +1270,8 @@ static int _mi_read_rnd_mempack_record(MI_INFO *info, byte *buf, my_errno=HA_ERR_END_OF_FILE; goto err; } - if (!(pos= (byte*) _mi_mempack_get_block_info(info,&block_info, + if (!(pos= (byte*) _mi_mempack_get_block_info(info, &info->bit_buff, + &block_info, &info->rec_buff, (uchar*) (start=share->file_map+ filepos)))) @@ -1277,7 +1288,8 @@ static int _mi_read_rnd_mempack_record(MI_INFO *info, byte *buf, info->nextpos=filepos+(uint) (pos-start)+block_info.rec_len; info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; - DBUG_RETURN (_mi_pack_rec_unpack(info,buf,pos, block_info.rec_len)); + DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); err: DBUG_RETURN(my_errno); } diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h index a766d59d72a..6365f0e1b0c 100644 --- a/myisam/myisamdef.h +++ b/myisam/myisamdef.h @@ -75,7 +75,7 @@ typedef struct st_mi_state_info ulong sec_index_changed; /* Updated when new sec_index */ ulong sec_index_used; /* which extra index are in use */ ulonglong key_map; /* Which keys are in use */ - ha_checksum checksum; + ha_checksum checksum; /* Table checksum */ ulong version; /* timestamp of create */ time_t create_time; /* Time when created database */ time_t recover_time; /* Time for last recover */ @@ -176,6 +176,7 @@ typedef struct st_mi_isam_share { /* Shared between opens */ int (*delete_record)(struct st_myisam_info*); int (*read_rnd)(struct st_myisam_info*, byte*, my_off_t, my_bool); int (*compare_record)(struct st_myisam_info*, const byte *); + /* Function to use for a row checksum. */ ha_checksum (*calc_checksum)(struct st_myisam_info*, const byte *); int (*compare_unique)(struct st_myisam_info*, MI_UNIQUEDEF *, const byte *record, my_off_t pos); @@ -249,7 +250,7 @@ struct st_myisam_info { my_off_t last_keypage; /* Last key page read */ my_off_t last_search_keypage; /* Last keypage when searching */ my_off_t dupp_key_pos; - ha_checksum checksum; + ha_checksum checksum; /* Temp storage for row checksum */ /* QQ: the folloing two xxx_length fields should be removed, as they are not compatible with parallel repair */ ulong packed_length,blob_length; /* Length of found, packed record */ @@ -297,8 +298,9 @@ typedef struct st_mi_sort_param pthread_t thr; IO_CACHE read_cache, tempfile, tempfile_for_exceptions; DYNAMIC_ARRAY buffpek; - - /* + MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ + + /* The next two are used to collect statistics, see update_key_parts for description. */ @@ -309,6 +311,7 @@ typedef struct st_mi_sort_param uint key, key_length,real_key_length,sortbuff_size; uint maxbuffers, keys, find_length, sort_keys_length; my_bool fix_datafile, master; + my_bool calc_checksum; /* calculate table checksum */ MI_KEYDEF *keyinfo; HA_KEYSEG *seg; SORT_INFO *sort_info; @@ -361,8 +364,15 @@ typedef struct st_mi_sort_param #define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ mi_int2store(x,boh); } #define mi_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) -#define mi_mark_crashed(x) (x)->s->state.changed|=STATE_CRASHED -#define mi_mark_crashed_on_repair(x) { (x)->s->state.changed|=STATE_CRASHED|STATE_CRASHED_ON_REPAIR ; (x)->update|= HA_STATE_CHANGED; } +#define mi_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \ + DBUG_PRINT("error", ("Marked table crashed")); \ + }while(0) +#define mi_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \ + STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \ + (x)->update|= HA_STATE_CHANGED; \ + DBUG_PRINT("error", \ + ("Marked table crashed")); \ + }while(0) #define mi_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED) #define mi_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) @@ -600,8 +610,8 @@ extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key, extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys); extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,byte *buf); extern int _mi_read_rnd_pack_record(MI_INFO*, byte *,my_off_t, my_bool); -extern int _mi_pack_rec_unpack(MI_INFO *info,byte *to,byte *from, - ulong reclength); +extern int _mi_pack_rec_unpack(MI_INFO *info, MI_BIT_BUFF *bit_buff, + byte *to, byte *from, ulong reclength); extern ulonglong mi_safe_mul(ulonglong a,ulonglong b); extern int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, const byte *oldrec, const byte *newrec, my_off_t pos); @@ -666,7 +676,9 @@ extern "C" { extern uint _mi_get_block_info(MI_BLOCK_INFO *,File, my_off_t); extern uint _mi_rec_pack(MI_INFO *info,byte *to,const byte *from); -extern uint _mi_pack_get_block_info(MI_INFO *, MI_BLOCK_INFO *, File, my_off_t); +extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, + MI_BLOCK_INFO *info, byte **rec_buff_p, + File file, my_off_t filepos); extern void _my_store_blob_length(byte *pos,uint pack_length,uint length); extern void _myisam_log(enum myisam_log_commands command,MI_INFO *info, const byte *buffert,uint length); diff --git a/myisam/sort.c b/myisam/sort.c index 1a3dc147cd9..727840709da 100644 --- a/myisam/sort.c +++ b/myisam/sort.c @@ -309,7 +309,7 @@ static ha_rows NEAR_F find_all_keys(MI_SORT_PARAM *info, uint keys, pthread_handler_decl(thr_find_all_keys,arg) { - MI_SORT_PARAM *info= (MI_SORT_PARAM*) arg; + MI_SORT_PARAM *sort_param= (MI_SORT_PARAM*) arg; int error; uint memavl,old_memavl,keys,sort_length; uint idx, maxbuffer; @@ -321,32 +321,34 @@ pthread_handler_decl(thr_find_all_keys,arg) if (my_thread_init()) goto err; - if (info->sort_info->got_error) + DBUG_ENTER("thr_find_all_keys"); + DBUG_PRINT("enter", ("master: %d", sort_param->master)); + if (sort_param->sort_info->got_error) goto err; - if (info->keyinfo->flag && HA_VAR_LENGTH_KEY) + if (sort_param->keyinfo->flag && HA_VAR_LENGTH_KEY) { - info->write_keys=write_keys_varlen; - info->read_to_buffer=read_to_buffer_varlen; - info->write_key=write_merge_key_varlen; + sort_param->write_keys= write_keys_varlen; + sort_param->read_to_buffer= read_to_buffer_varlen; + sort_param->write_key= write_merge_key_varlen; } else { - info->write_keys=write_keys; - info->read_to_buffer=read_to_buffer; - info->write_key=write_merge_key; + sort_param->write_keys= write_keys; + sort_param->read_to_buffer= read_to_buffer; + sort_param->write_key= write_merge_key; } - my_b_clear(&info->tempfile); - my_b_clear(&info->tempfile_for_exceptions); - bzero((char*) &info->buffpek,sizeof(info->buffpek)); - bzero((char*) &info->unique, sizeof(info->unique)); + my_b_clear(&sort_param->tempfile); + my_b_clear(&sort_param->tempfile_for_exceptions); + bzero((char*) &sort_param->buffpek, sizeof(sort_param->buffpek)); + bzero((char*) &sort_param->unique, sizeof(sort_param->unique)); sort_keys= (uchar **) NULL; - memavl=max(info->sortbuff_size, MIN_SORT_MEMORY); - idx= info->sort_info->max_records; - sort_length= info->key_length; - maxbuffer= 1; + memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY); + idx= sort_param->sort_info->max_records; + sort_length= sort_param->key_length; + maxbuffer= 1; while (memavl >= MIN_SORT_MEMORY) { @@ -363,18 +365,19 @@ pthread_handler_decl(thr_find_all_keys,arg) (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/ (sort_length+sizeof(char*))) <= 1) { - mi_check_print_error(info->sort_info->param, + mi_check_print_error(sort_param->sort_info->param, "sort_buffer_size is to small"); goto err; } } while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr); } - if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+ - ((info->keyinfo->flag & HA_FULLTEXT) ? - HA_FT_MAXBYTELEN : 0), MYF(0)))) + if ((sort_keys= (uchar**) + my_malloc(keys*(sort_length+sizeof(char*))+ + ((sort_param->keyinfo->flag & HA_FULLTEXT) ? + HA_FT_MAXBYTELEN : 0), MYF(0)))) { - if (my_init_dynamic_array(&info->buffpek, sizeof(BUFFPEK), + if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK), maxbuffer, maxbuffer/2)) my_free((gptr) sort_keys,MYF(0)); else @@ -386,69 +389,87 @@ pthread_handler_decl(thr_find_all_keys,arg) } if (memavl < MIN_SORT_MEMORY) { - mi_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */ + mi_check_print_error(sort_param->sort_info->param, "Sort buffer too small"); goto err; /* purecov: tested */ } - if (info->sort_info->param->testflag & T_VERBOSE) - printf("Key %d - Allocating buffer for %d keys\n",info->key+1,keys); - info->sort_keys=sort_keys; + if (sort_param->sort_info->param->testflag & T_VERBOSE) + printf("Key %d - Allocating buffer for %d keys\n", + sort_param->key + 1, keys); + sort_param->sort_keys= sort_keys; idx=error=0; sort_keys[0]=(uchar*) (sort_keys+keys); - while (!(error=info->sort_info->got_error) && - !(error=(*info->key_read)(info,sort_keys[idx]))) + DBUG_PRINT("info", ("reading keys")); + while (!(error= sort_param->sort_info->got_error) && + !(error= (*sort_param->key_read)(sort_param, sort_keys[idx]))) { - if (info->real_key_length > info->key_length) + if (sort_param->real_key_length > sort_param->key_length) { - if (write_key(info,sort_keys[idx], &info->tempfile_for_exceptions)) + if (write_key(sort_param, sort_keys[idx], + &sort_param->tempfile_for_exceptions)) goto err; continue; } if (++idx == keys) { - if (info->write_keys(info,sort_keys,idx-1, - (BUFFPEK *)alloc_dynamic(&info->buffpek), - &info->tempfile)) + if (sort_param->write_keys(sort_param, sort_keys, idx - 1, + (BUFFPEK*) alloc_dynamic(&sort_param->buffpek), + &sort_param->tempfile)) goto err; sort_keys[0]=(uchar*) (sort_keys+keys); - memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); + memcpy(sort_keys[0], sort_keys[idx - 1], (size_t) sort_param->key_length); idx=1; } - sort_keys[idx]=sort_keys[idx-1]+info->key_length; + sort_keys[idx]= sort_keys[idx - 1] + sort_param->key_length; } if (error > 0) goto err; - if (info->buffpek.elements) + if (sort_param->buffpek.elements) { - if (info->write_keys(info,sort_keys, idx, - (BUFFPEK *) alloc_dynamic(&info->buffpek), &info->tempfile)) + if (sort_param->write_keys(sort_param, sort_keys, idx, + (BUFFPEK*) alloc_dynamic(&sort_param->buffpek), + &sort_param->tempfile)) goto err; - info->keys=(info->buffpek.elements-1)*(keys-1)+idx; + sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx; } else - info->keys=idx; + sort_param->keys= idx; - info->sort_keys_length=keys; + sort_param->sort_keys_length= keys; goto ok; err: - info->sort_info->got_error=1; /* no need to protect this with a mutex */ + DBUG_PRINT("error", ("got some error")); + sort_param->sort_info->got_error= 1; /* no need to protect with a mutex */ if (sort_keys) my_free((gptr) sort_keys,MYF(0)); - info->sort_keys=0; - delete_dynamic(& info->buffpek); - close_cached_file(&info->tempfile); - close_cached_file(&info->tempfile_for_exceptions); + sort_param->sort_keys= 0; + delete_dynamic(& sort_param->buffpek); + close_cached_file(&sort_param->tempfile); + close_cached_file(&sort_param->tempfile_for_exceptions); ok: - remove_io_thread(&info->read_cache); - pthread_mutex_lock(&info->sort_info->mutex); - info->sort_info->threads_running--; - pthread_cond_signal(&info->sort_info->cond); - pthread_mutex_unlock(&info->sort_info->mutex); + /* + Detach from the share if the writer is involved. Avoid others to + be blocked. This includes a flush of the write buffer. This will + also indicate EOF to the readers. + */ + if (sort_param->sort_info->info->rec_cache.share) + remove_io_thread(&sort_param->sort_info->info->rec_cache); + + /* Readers detach from the share if any. Avoid others to be blocked. */ + if (sort_param->read_cache.share) + remove_io_thread(&sort_param->read_cache); + + pthread_mutex_lock(&sort_param->sort_info->mutex); + if (!--sort_param->sort_info->threads_running) + pthread_cond_signal(&sort_param->sort_info->cond); + pthread_mutex_unlock(&sort_param->sort_info->mutex); + + DBUG_PRINT("exit", ("======== ending thread ========")); my_thread_end(); return NULL; } @@ -466,6 +487,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) MYISAM_SHARE *share=info->s; MI_SORT_PARAM *sinfo; byte *mergebuf=0; + DBUG_ENTER("thr_write_keys"); LINT_INIT(length); for (i= 0, sinfo= sort_param ; @@ -602,7 +624,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) } } my_free((gptr) mergebuf,MYF(MY_ALLOW_ZERO_PTR)); - return got_error; + DBUG_RETURN(got_error); } #endif /* THREAD */ diff --git a/mysql-test/r/myisam.result b/mysql-test/r/myisam.result index 05f72b22ed1..b34c127595f 100644 --- a/mysql-test/r/myisam.result +++ b/mysql-test/r/myisam.result @@ -797,6 +797,132 @@ a b xxxxxxxxx bbbbbb xxxxxxxxx bbbbbb DROP TABLE t1; +SET @@myisam_repair_threads=2; +SHOW VARIABLES LIKE 'myisam_repair%'; +Variable_name Value +myisam_repair_threads 2 +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 9 Dynamic 2 # # # # 0 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +REPAIR TABLE t1 QUICK; +Table Op Msg_type Msg_text +test.t1 repair status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +SET @@myisam_repair_threads=1; +SHOW VARIABLES LIKE 'myisam_repair%'; +Variable_name Value +myisam_repair_threads 1 show create table t1; Table Create Table t1 CREATE TEMPORARY TABLE `t1` ( diff --git a/mysql-test/t/myisam.test b/mysql-test/t/myisam.test index a62a6487882..745e3a2e377 100644 --- a/mysql-test/t/myisam.test +++ b/mysql-test/t/myisam.test @@ -763,6 +763,97 @@ SELECT * FROM t1; DROP TABLE t1; # +# Bug#8283 - OPTIMIZE TABLE causes data loss +# +SET @@myisam_repair_threads=2; +SHOW VARIABLES LIKE 'myisam_repair%'; +# +# Test OPTIMIZE. This creates a new data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +OPTIMIZE TABLE t1; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +# Test REPAIR QUICK. This retains the old data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +REPAIR TABLE t1 QUICK; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +SET @@myisam_repair_threads=1; +SHOW VARIABLES LIKE 'myisam_repair%'; # Bug#8706 - temporary table with data directory option fails # connect (session1,localhost,root,,); diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index 0007784c2b2..58d4756b702 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -70,7 +70,6 @@ static void my_aiowait(my_aio_result *result); #define IO_ROUND_UP(X) (((X)+IO_SIZE-1) & ~(IO_SIZE-1)) #define IO_ROUND_DN(X) ( (X) & ~(IO_SIZE-1)) - /* Setup internal pointers inside IO_CACHE @@ -500,65 +499,366 @@ int _my_b_read(register IO_CACHE *info, byte *Buffer, uint Count) DBUG_RETURN(0); } + #ifdef THREAD -/* Prepare IO_CACHE for shared use */ -void init_io_cache_share(IO_CACHE *info, IO_CACHE_SHARE *s, uint num_threads) +/* + Prepare IO_CACHE for shared use. + + SYNOPSIS + init_io_cache_share() + read_cache A read cache. This will be copied for + every thread after setup. + cshare The share. + write_cache If non-NULL a write cache that is to be + synchronized with the read caches. + num_threads Number of threads sharing the cache + including the write thread if any. + + DESCRIPTION + + The shared cache is used so: One IO_CACHE is initialized with + init_io_cache(). This includes the allocation of a buffer. Then a + share is allocated and init_io_cache_share() is called with the io + cache and the share. Then the io cache is copied for each thread. So + every thread has its own copy of IO_CACHE. But the allocated buffer + is shared because cache->buffer is the same for all caches. + + One thread reads data from the file into the buffer. All threads + read from the buffer, but every thread maintains its own set of + pointers into the buffer. When all threads have used up the buffer + contents, one of the threads reads the next block of data into the + buffer. To accomplish this, each thread enters the cache lock before + accessing the buffer. They wait in lock_io_cache() until all threads + joined the lock. The last thread entering the lock is in charge of + reading from file to buffer. It wakes all threads when done. + + Synchronizing a write cache to the read caches works so: Whenever + the write buffer needs a flush, the write thread enters the lock and + waits for all other threads to enter the lock too. They do this when + they have used up the read buffer. When all threads are in the lock, + the write thread copies the write buffer to the read buffer and + wakes all threads. + + share->running_threads is the number of threads not being in the + cache lock. When entering lock_io_cache() the number is decreased. + When the thread that fills the buffer enters unlock_io_cache() the + number is reset to the number of threads. The condition + running_threads == 0 means that all threads are in the lock. Bumping + up the number to the full count is non-intuitive. But increasing the + number by one for each thread that leaves the lock could lead to a + solo run of one thread. The last thread to join a lock reads from + file to buffer, wakes the other threads, processes the data in the + cache and enters the lock again. If no other thread left the lock + meanwhile, it would think it's the last one again and read the next + block... + + The share has copies of 'error', 'buffer', 'read_end', and + 'pos_in_file' from the thread that filled the buffer. We may not be + able to access this information directly from its cache because the + thread may be removed from the share before the variables could be + copied by all other threads. Or, if a write buffer is synchronized, + it would change its 'pos_in_file' after waking the other threads, + possibly before they could copy its value. + + However, the 'buffer' variable in the share is for a synchronized + write cache. It needs to know where to put the data. Otherwise it + would need access to the read cache of one of the threads that is + not yet removed from the share. + + RETURN + void +*/ + +void init_io_cache_share(IO_CACHE *read_cache, IO_CACHE_SHARE *cshare, + IO_CACHE *write_cache, uint num_threads) { - DBUG_ASSERT(info->type == READ_CACHE); - pthread_mutex_init(&s->mutex, MY_MUTEX_INIT_FAST); - pthread_cond_init (&s->cond, 0); - s->total=s->count=num_threads-1; - s->active=0; - info->share=s; - info->read_function=_my_b_read_r; - info->current_pos= info->current_end= 0; + DBUG_ENTER("init_io_cache_share"); + DBUG_PRINT("io_cache_share", ("read_cache: 0x%lx share: 0x%lx " + "write_cache: 0x%lx threads: %u", + read_cache, cshare, write_cache, num_threads)); + + DBUG_ASSERT(num_threads > 1); + DBUG_ASSERT(read_cache->type == READ_CACHE); + DBUG_ASSERT(!write_cache || (write_cache->type == WRITE_CACHE)); + + pthread_mutex_init(&cshare->mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&cshare->cond, 0); + pthread_cond_init(&cshare->cond_writer, 0); + + cshare->running_threads= num_threads; + cshare->total_threads= num_threads; + cshare->error= 0; /* Initialize. */ + cshare->buffer= read_cache->buffer; + cshare->read_end= NULL; /* See function comment of lock_io_cache(). */ + cshare->pos_in_file= 0; /* See function comment of lock_io_cache(). */ + cshare->source_cache= write_cache; /* Can be NULL. */ + + read_cache->share= cshare; + read_cache->read_function= _my_b_read_r; + read_cache->current_pos= NULL; + read_cache->current_end= NULL; + + if (write_cache) + write_cache->share= cshare; + + DBUG_VOID_RETURN; } + /* - Remove a thread from shared access to IO_CACHE - Every thread should do that on exit for not - to deadlock other threads + Remove a thread from shared access to IO_CACHE. + + SYNOPSIS + remove_io_thread() + cache The IO_CACHE to be removed from the share. + + NOTE + + Every thread must do that on exit for not to deadlock other threads. + + The last thread destroys the pthread resources. + + A writer flushes its cache first. + + RETURN + void */ -void remove_io_thread(IO_CACHE *info) + +void remove_io_thread(IO_CACHE *cache) { - IO_CACHE_SHARE *s=info->share; + IO_CACHE_SHARE *cshare= cache->share; + uint total; + DBUG_ENTER("remove_io_thread"); + + /* If the writer goes, it needs to flush the write cache. */ + if (cache == cshare->source_cache) + flush_io_cache(cache); - pthread_mutex_lock(&s->mutex); - s->total--; - if (! s->count--) - pthread_cond_signal(&s->cond); - pthread_mutex_unlock(&s->mutex); + pthread_mutex_lock(&cshare->mutex); + DBUG_PRINT("io_cache_share", ("%s: 0x%lx", + (cache == cshare->source_cache) ? + "writer" : "reader", cache)); + + /* Remove from share. */ + total= --cshare->total_threads; + DBUG_PRINT("io_cache_share", ("remaining threads: %u", total)); + + /* Detach from share. */ + cache->share= NULL; + + /* If the writer goes, let the readers know. */ + if (cache == cshare->source_cache) + { + DBUG_PRINT("io_cache_share", ("writer leaves")); + cshare->source_cache= NULL; + } + + /* If all threads are waiting for me to join the lock, wake them. */ + if (!--cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("the last running thread leaves, wake all")); + pthread_cond_signal(&cshare->cond_writer); + pthread_cond_broadcast(&cshare->cond); + } + + pthread_mutex_unlock(&cshare->mutex); + + if (!total) + { + DBUG_PRINT("io_cache_share", ("last thread removed, destroy share")); + pthread_cond_destroy (&cshare->cond_writer); + pthread_cond_destroy (&cshare->cond); + pthread_mutex_destroy(&cshare->mutex); + } + + DBUG_VOID_RETURN; } -static int lock_io_cache(IO_CACHE *info, my_off_t pos) -{ - int total; - IO_CACHE_SHARE *s=info->share; - pthread_mutex_lock(&s->mutex); - if (!s->count) +/* + Lock IO cache and wait for all other threads to join. + + SYNOPSIS + lock_io_cache() + cache The cache of the thread entering the lock. + pos File position of the block to read. + Unused for the write thread. + + DESCRIPTION + + Wait for all threads to finish with the current buffer. We want + all threads to proceed in concert. The last thread to join + lock_io_cache() will read the block from file and all threads start + to use it. Then they will join again for reading the next block. + + The waiting threads detect a fresh buffer by comparing + cshare->pos_in_file with the position they want to process next. + Since the first block may start at position 0, we take + cshare->read_end as an additional condition. This variable is + initialized to NULL and will be set after a block of data is written + to the buffer. + + RETURN + 1 OK, lock in place, go ahead and read. + 0 OK, unlocked, another thread did the read. +*/ + +static int lock_io_cache(IO_CACHE *cache, my_off_t pos) +{ + IO_CACHE_SHARE *cshare= cache->share; + DBUG_ENTER("lock_io_cache"); + + /* Enter the lock. */ + pthread_mutex_lock(&cshare->mutex); + cshare->running_threads--; + DBUG_PRINT("io_cache_share", ("%s: 0x%lx pos: %lu running: %u", + (cache == cshare->source_cache) ? + "writer" : "reader", cache, (ulong) pos, + cshare->running_threads)); + + if (cshare->source_cache) { - s->count=s->total; - return 1; + /* A write cache is synchronized to the read caches. */ + + if (cache == cshare->source_cache) + { + /* The writer waits until all readers are here. */ + while (cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("writer waits in lock")); + pthread_cond_wait(&cshare->cond_writer, &cshare->mutex); + } + DBUG_PRINT("io_cache_share", ("writer awoke, going to copy")); + + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } + + /* The last thread wakes the writer. */ + if (!cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("waking writer")); + pthread_cond_signal(&cshare->cond_writer); + } + + /* + Readers wait until the data is copied from the writer. Another + reason to stop waiting is the removal of the write thread. If this + happens, we leave the lock with old data in the buffer. + */ + while ((!cshare->read_end || (cshare->pos_in_file < pos)) && + cshare->source_cache) + { + DBUG_PRINT("io_cache_share", ("reader waits in lock")); + pthread_cond_wait(&cshare->cond, &cshare->mutex); + } + + /* + If the writer was removed from the share while this thread was + asleep, we need to simulate an EOF condition. The writer cannot + reset the share variables as they might still be in use by readers + of the last block. When we awake here then because the last + joining thread signalled us. If the writer is not the last, it + will not signal. So it is safe to clear the buffer here. + */ + if (!cshare->read_end || (cshare->pos_in_file < pos)) + { + DBUG_PRINT("io_cache_share", ("reader found writer removed. EOF")); + cshare->read_end= cshare->buffer; /* Empty buffer. */ + cshare->error= 0; /* EOF is not an error. */ + } } + else + { + /* + There are read caches only. The last thread arriving in + lock_io_cache() continues with a locked cache and reads the block. + */ + if (!cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("last thread joined, going to read")); + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } - total=s->total; - s->count--; - while (!s->active || s->active->pos_in_file < pos) - pthread_cond_wait(&s->cond, &s->mutex); + /* + All other threads wait until the requested block is read by the + last thread arriving. Another reason to stop waiting is the + removal of a thread. If this leads to all threads being in the + lock, we have to continue also. The first of the awaken threads + will then do the read. + */ + while ((!cshare->read_end || (cshare->pos_in_file < pos)) && + cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("reader waits in lock")); + pthread_cond_wait(&cshare->cond, &cshare->mutex); + } - if (s->total < total && - (!s->active || s->active->pos_in_file < pos)) - return 1; + /* If the block is not yet read, continue with a locked cache and read. */ + if (!cshare->read_end || (cshare->pos_in_file < pos)) + { + DBUG_PRINT("io_cache_share", ("reader awoke, going to read")); + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } - pthread_mutex_unlock(&s->mutex); - return 0; + /* Another thread did read the block already. */ + } + DBUG_PRINT("io_cache_share", ("reader awoke, going to process %u bytes", + cshare->read_end ? (uint) + (cshare->read_end - cshare->buffer) : 0)); + + /* + Leave the lock. Do not call unlock_io_cache() later. The thread that + filled the buffer did this and marked all threads as running. + */ + pthread_mutex_unlock(&cshare->mutex); + DBUG_RETURN(0); } -static void unlock_io_cache(IO_CACHE *info) + +/* + Unlock IO cache. + + SYNOPSIS + unlock_io_cache() + cache The cache of the thread leaving the lock. + + NOTE + This is called by the thread that filled the buffer. It marks all + threads as running and awakes them. This must not be done by any + other thread. + + Do not signal cond_writer. Either there is no writer or the writer + is the only one who can call this function. + + The reason for resetting running_threads to total_threads before + waking all other threads is that it could be possible that this + thread is so fast with processing the buffer that it enters the lock + before even one other thread has left it. If every awoken thread + would increase running_threads by one, this thread could think that + he is again the last to join and would not wait for the other + threads to process the data. + + RETURN + void +*/ + +static void unlock_io_cache(IO_CACHE *cache) { - pthread_cond_broadcast(&info->share->cond); - pthread_mutex_unlock(&info->share->mutex); + IO_CACHE_SHARE *cshare= cache->share; + DBUG_ENTER("unlock_io_cache"); + DBUG_PRINT("io_cache_share", ("%s: 0x%lx pos: %lu running: %u", + (cache == cshare->source_cache) ? + "writer" : "reader", + cache, (ulong) cshare->pos_in_file, + cshare->total_threads)); + + cshare->running_threads= cshare->total_threads; + pthread_cond_broadcast(&cshare->cond); + pthread_mutex_unlock(&cshare->mutex); + DBUG_VOID_RETURN; } @@ -567,7 +867,7 @@ static void unlock_io_cache(IO_CACHE *info) SYNOPSIS _my_b_read_r() - info IO_CACHE pointer + cache IO_CACHE pointer Buffer Buffer to retrieve count bytes from file Count Number of bytes to read into Buffer @@ -579,7 +879,7 @@ static void unlock_io_cache(IO_CACHE *info) It works as follows: when a thread tries to read from a file (that is, after using all the data from the (shared) buffer), it just - hangs on lock_io_cache(), wating for other threads. When the very + hangs on lock_io_cache(), waiting for other threads. When the very last thread attempts a read, lock_io_cache() returns 1, the thread does actual IO and unlock_io_cache(), which signals all the waiting threads that data is in the buffer. @@ -599,16 +899,17 @@ static void unlock_io_cache(IO_CACHE *info) 1 Error: can't read requested characters */ -int _my_b_read_r(register IO_CACHE *info, byte *Buffer, uint Count) +int _my_b_read_r(register IO_CACHE *cache, byte *Buffer, uint Count) { my_off_t pos_in_file; uint length, diff_length, left_length; + IO_CACHE_SHARE *cshare= cache->share; DBUG_ENTER("_my_b_read_r"); - if ((left_length= (uint) (info->read_end - info->read_pos))) + if ((left_length= (uint) (cache->read_end - cache->read_pos))) { DBUG_ASSERT(Count >= left_length); /* User is not using my_b_read() */ - memcpy(Buffer, info->read_pos, (size_t) (left_length)); + memcpy(Buffer, cache->read_pos, (size_t) (left_length)); Buffer+= left_length; Count-= left_length; } @@ -616,55 +917,133 @@ int _my_b_read_r(register IO_CACHE *info, byte *Buffer, uint Count) { int cnt, len; - pos_in_file= info->pos_in_file + (info->read_end - info->buffer); + pos_in_file= cache->pos_in_file + (cache->read_end - cache->buffer); diff_length= (uint) (pos_in_file & (IO_SIZE-1)); length=IO_ROUND_UP(Count+diff_length)-diff_length; - length=(length <= info->read_length) ? - length + IO_ROUND_DN(info->read_length - length) : - length - IO_ROUND_UP(length - info->read_length) ; - if (info->type != READ_FIFO && - (length > (info->end_of_file - pos_in_file))) - length= (uint) (info->end_of_file - pos_in_file); + length= ((length <= cache->read_length) ? + length + IO_ROUND_DN(cache->read_length - length) : + length - IO_ROUND_UP(length - cache->read_length)); + if (cache->type != READ_FIFO && + (length > (cache->end_of_file - pos_in_file))) + length= (uint) (cache->end_of_file - pos_in_file); if (length == 0) { - info->error= (int) left_length; + cache->error= (int) left_length; DBUG_RETURN(1); } - if (lock_io_cache(info, pos_in_file)) + if (lock_io_cache(cache, pos_in_file)) { - info->share->active=info; - if (info->seek_not_done) /* File touched, do seek */ - VOID(my_seek(info->file,pos_in_file,MY_SEEK_SET,MYF(0))); - len=(int)my_read(info->file,info->buffer, length, info->myflags); - info->read_end=info->buffer + (len == -1 ? 0 : len); - info->error=(len == (int)length ? 0 : len); - info->pos_in_file=pos_in_file; - unlock_io_cache(info); + /* With a synchronized write/read cache we won't come here... */ + DBUG_ASSERT(!cshare->source_cache); + /* + ... unless the writer has gone before this thread entered the + lock. Simulate EOF in this case. It can be distinguished by + cache->file. + */ + if (cache->file < 0) + len= 0; + else + { + if (cache->seek_not_done) /* File touched, do seek */ + VOID(my_seek(cache->file, pos_in_file, MY_SEEK_SET, MYF(0))); + len= (int) my_read(cache->file, cache->buffer, length, cache->myflags); + } + DBUG_PRINT("io_cache_share", ("read %d bytes", len)); + + cache->read_end= cache->buffer + (len == -1 ? 0 : len); + cache->error= (len == (int)length ? 0 : len); + cache->pos_in_file= pos_in_file; + + /* Copy important values to the share. */ + cshare->error= cache->error; + cshare->read_end= cache->read_end; + cshare->pos_in_file= pos_in_file; + + /* Mark all threads as running and wake them. */ + unlock_io_cache(cache); } else { - info->error= info->share->active->error; - info->read_end= info->share->active->read_end; - info->pos_in_file= info->share->active->pos_in_file; - len= (info->error == -1 ? -1 : info->read_end-info->buffer); + /* + With a synchronized write/read cache readers always come here. + Copy important values from the share. + */ + cache->error= cshare->error; + cache->read_end= cshare->read_end; + cache->pos_in_file= cshare->pos_in_file; + + len= ((cache->error == -1) ? -1 : cache->read_end - cache->buffer); } - info->read_pos=info->buffer; - info->seek_not_done=0; + cache->read_pos= cache->buffer; + cache->seek_not_done= 0; if (len <= 0) { - info->error= (int) left_length; + DBUG_PRINT("io_cache_share", ("reader error. len %d left %u", + len, left_length)); + cache->error= (int) left_length; DBUG_RETURN(1); } cnt= ((uint) len > Count) ? (int) Count : len; - memcpy(Buffer, info->read_pos, (size_t) cnt); + memcpy(Buffer, cache->read_pos, (size_t) cnt); Count -= cnt; Buffer+= cnt; left_length+= cnt; - info->read_pos+= cnt; + cache->read_pos+= cnt; } DBUG_RETURN(0); } -#endif + + +/* + Copy data from write cache to read cache. + + SYNOPSIS + copy_to_read_buffer() + write_cache The write cache. + write_buffer The source of data, mostly the cache buffer. + write_length The number of bytes to copy. + + NOTE + The write thread will wait for all read threads to join the cache + lock. Then it copies the data over and wakes the read threads. + + RETURN + void +*/ + +static void copy_to_read_buffer(IO_CACHE *write_cache, + const byte *write_buffer, uint write_length) +{ + IO_CACHE_SHARE *cshare= write_cache->share; + + DBUG_ASSERT(cshare->source_cache == write_cache); + /* + write_length is usually less or equal to buffer_length. + It can be bigger if _my_b_write() is called with a big length. + */ + while (write_length) + { + uint copy_length= min(write_length, write_cache->buffer_length); + int __attribute__((unused)) rc; + + rc= lock_io_cache(write_cache, write_cache->pos_in_file); + /* The writing thread does always have the lock when it awakes. */ + DBUG_ASSERT(rc); + + memcpy(cshare->buffer, write_buffer, copy_length); + + cshare->error= 0; + cshare->read_end= cshare->buffer + copy_length; + cshare->pos_in_file= write_cache->pos_in_file; + + /* Mark all threads as running and wake them. */ + unlock_io_cache(write_cache); + + write_buffer+= copy_length; + write_length-= copy_length; + } +} +#endif /*THREAD*/ /* @@ -1016,6 +1395,7 @@ int _my_b_write(register IO_CACHE *info, const byte *Buffer, uint Count) Buffer+=rest_length; Count-=rest_length; info->write_pos+=rest_length; + if (flush_io_cache(info)) return 1; if (Count >= IO_SIZE) @@ -1028,6 +1408,23 @@ int _my_b_write(register IO_CACHE *info, const byte *Buffer, uint Count) } if (my_write(info->file,Buffer,(uint) length,info->myflags | MY_NABP)) return info->error= -1; + +#ifdef THREAD + /* + In case of a shared I/O cache with a writer we normally do direct + write cache to read cache copy. Simulate this here by direct + caller buffer to read cache copy. Do it after the write so that + the cache readers actions on the flushed part can go in parallel + with the write of the extra stuff. copy_to_read_buffer() + synchronizes writer and readers so that after this call the + readers can act on the extra stuff while the writer can go ahead + and prepare the next output. copy_to_read_buffer() relies on + info->pos_in_file. + */ + if (info->share) + copy_to_read_buffer(info, Buffer, length); +#endif + Count-=length; Buffer+=length; info->pos_in_file+=length; @@ -1048,6 +1445,14 @@ int my_b_append(register IO_CACHE *info, const byte *Buffer, uint Count) { uint rest_length,length; +#ifdef THREAD + /* + Assert that we cannot come here with a shared cache. If we do one + day, we might need to add a call to copy_to_read_buffer(). + */ + DBUG_ASSERT(!info->share); +#endif + lock_append_buffer(info); rest_length=(uint) (info->write_end - info->write_pos); if (Count <= rest_length) @@ -1108,6 +1513,14 @@ int my_block_write(register IO_CACHE *info, const byte *Buffer, uint Count, uint length; int error=0; +#ifdef THREAD + /* + Assert that we cannot come here with a shared cache. If we do one + day, we might need to add a call to copy_to_read_buffer(). + */ + DBUG_ASSERT(!info->share); +#endif + if (pos < info->pos_in_file) { /* Of no overlap, write everything without buffering */ @@ -1184,6 +1597,17 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) if ((length=(uint) (info->write_pos - info->write_buffer))) { +#ifdef THREAD + /* + In case of a shared I/O cache with a writer we do direct write + cache to read cache copy. Do it before the write here so that + the readers can work in parallel with the write. + copy_to_read_buffer() relies on info->pos_in_file. + */ + if (info->share) + copy_to_read_buffer(info, info->write_buffer, length); +#endif + pos_in_file=info->pos_in_file; /* If we have append cache, we always open the file with @@ -1262,16 +1686,10 @@ int end_io_cache(IO_CACHE *info) #ifdef THREAD /* - if IO_CACHE is shared between several threads, only one - thread needs to call end_io_cache() - just as init_io_cache() - should be called only once and then memcopy'ed + Every thread must call remove_io_thread(). The last one destroys + the share elements. */ - if (info->share) - { - pthread_cond_destroy (&info->share->cond); - pthread_mutex_destroy(&info->share->mutex); - info->share=0; - } + DBUG_ASSERT(!info->share || !info->share->total_threads); #endif if ((pre_close=info->pre_close)) |