summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <istruewing@chilla.local>2006-10-11 22:28:06 +0200
committerunknown <istruewing@chilla.local>2006-10-11 22:28:06 +0200
commit403cc15508e9129db0196e8b98b72a4ff1a4db03 (patch)
tree91edd5965a34465b41215daf0115af0615c4c1aa
parent6abdffe4f6c55ce53efa73167b15be1e33769673 (diff)
parent679b5848f3af7e1833385ef20d099dfde277b8c0 (diff)
downloadmariadb-git-403cc15508e9129db0196e8b98b72a4ff1a4db03.tar.gz
Merge bk-internal.mysql.com:/home/bk/mysql-4.1-engines
into chilla.local:/home/mydev/mysql-4.1-bug8283-one include/my_sys.h: Auto merged
-rw-r--r--include/my_sys.h22
-rw-r--r--include/myisam.h2
-rw-r--r--myisam/mi_check.c306
-rw-r--r--myisam/mi_open.c5
-rw-r--r--myisam/mi_packrec.c76
-rw-r--r--myisam/myisamdef.h30
-rw-r--r--myisam/sort.c126
-rw-r--r--mysql-test/r/myisam.result126
-rw-r--r--mysql-test/t/myisam.test91
-rw-r--r--mysys/mf_iocache.c582
10 files changed, 1132 insertions, 234 deletions
diff --git a/include/my_sys.h b/include/my_sys.h
index 46e09e8ddf4..4c9a7a7964c 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -325,12 +325,18 @@ typedef int (*IO_CACHE_CALLBACK)(struct st_io_cache*);
#ifdef THREAD
typedef struct st_io_cache_share
{
- /* to sync on reads into buffer */
- pthread_mutex_t mutex;
- pthread_cond_t cond;
- int count, total;
- /* actual IO_CACHE that filled the buffer */
- struct st_io_cache *active;
+ pthread_mutex_t mutex; /* To sync on reads into buffer. */
+ pthread_cond_t cond; /* To wait for signals. */
+ pthread_cond_t cond_writer; /* For a synchronized writer. */
+ /* Offset in file corresponding to the first byte of buffer. */
+ my_off_t pos_in_file;
+ /* If a synchronized write cache is the source of the data. */
+ struct st_io_cache *source_cache;
+ byte *buffer; /* The read buffer. */
+ byte *read_end; /* Behind last valid byte of buffer. */
+ int running_threads; /* threads not in lock. */
+ int total_threads; /* threads sharing the cache. */
+ int error; /* Last error. */
#ifdef NOT_YET_IMPLEMENTED
/* whether the structure should be free'd */
my_bool alloced;
@@ -672,8 +678,8 @@ extern void setup_io_cache(IO_CACHE* info);
extern int _my_b_read(IO_CACHE *info,byte *Buffer,uint Count);
#ifdef THREAD
extern int _my_b_read_r(IO_CACHE *info,byte *Buffer,uint Count);
-extern void init_io_cache_share(IO_CACHE *info,
- IO_CACHE_SHARE *s, uint num_threads);
+extern void init_io_cache_share(IO_CACHE *read_cache, IO_CACHE_SHARE *cshare,
+ IO_CACHE *write_cache, uint num_threads);
extern void remove_io_thread(IO_CACHE *info);
#endif
extern int _my_b_seq_read(IO_CACHE *info,byte *Buffer,uint Count);
diff --git a/include/myisam.h b/include/myisam.h
index c2d3d99a414..0a808070748 100644
--- a/include/myisam.h
+++ b/include/myisam.h
@@ -345,7 +345,7 @@ typedef struct st_mi_check_param
uint testflag, key_cache_block_size;
uint8 language;
my_bool using_global_keycache, opt_lock_memory, opt_follow_links;
- my_bool retry_repair, force_sort, calc_checksum;
+ my_bool retry_repair, force_sort;
char temp_filename[FN_REFLEN],*isam_file_name;
MY_TMPDIR *tmpdir;
int tmpfile_createflag;
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index 301becf9c62..b07c9904247 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -16,6 +16,31 @@
/* Describe, check and repair of MyISAM tables */
+/*
+ About checksum calculation.
+
+ There are two types of checksums. Table checksum and row checksum.
+
+ Row checksum is an additional byte at the end of dynamic length
+ records. It must be calculated if the table is configured for them.
+ Otherwise they must not be used. The variable
+ MYISAM_SHARE::calc_checksum determines if row checksums are used.
+ MI_INFO::checksum is used as temporary storage during row handling.
+ For parallel repair we must assure that only one thread can use this
+ variable. There is no problem on the write side as this is done by one
+ thread only. But when checking a record after read this could go
+ wrong. But since all threads read through a common read buffer, it is
+ sufficient if only one thread checks it.
+
+ Table checksum is an eight byte value in the header of the index file.
+ It can be calculated even if row checksums are not used. The variable
+ MI_CHECK::glob_crc is calculated over all records.
+ MI_SORT_PARAM::calc_checksum determines if this should be done. This
+ variable is not part of MI_CHECK because it must be set per thread for
+ parallel repair. The global glob_crc must be changed by one thread
+ only. And it is sufficient to calculate the checksum once only.
+*/
+
#include "ftdefs.h"
#include <m_ctype.h>
#include <stdarg.h>
@@ -41,8 +66,7 @@ static int chk_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
ha_checksum *key_checksum, uint level);
static uint isam_key_length(MI_INFO *info,MI_KEYDEF *keyinfo);
static ha_checksum calc_checksum(ha_rows count);
-static int writekeys(MI_CHECK *param, MI_INFO *info,byte *buff,
- my_off_t filepos);
+static int writekeys(MI_SORT_PARAM *sort_param);
static int sort_one_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
my_off_t pagepos, File new_file);
static int sort_key_read(MI_SORT_PARAM *sort_param,void *key);
@@ -1101,7 +1125,8 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
goto err;
start_recpos=pos;
splits++;
- VOID(_mi_pack_get_block_info(info,&block_info, -1, start_recpos));
+ VOID(_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, -1, start_recpos));
pos=block_info.filepos+block_info.rec_len;
if (block_info.rec_len < (uint) info->s->min_pack_length ||
block_info.rec_len > (uint) info->s->max_pack_length)
@@ -1115,7 +1140,8 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
if (_mi_read_cache(&param->read_cache,(byte*) info->rec_buff,
block_info.filepos, block_info.rec_len, READING_NEXT))
goto err;
- if (_mi_pack_rec_unpack(info,record,info->rec_buff,block_info.rec_len))
+ if (_mi_pack_rec_unpack(info, &info->bit_buff, record,
+ info->rec_buff, block_info.rec_len))
{
mi_check_print_error(param,"Found wrong record at %s",
llstr(start_recpos,llbuff));
@@ -1399,7 +1425,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info,
info->state->empty=0;
param->glob_crc=0;
if (param->testflag & T_CALC_CHECKSUM)
- param->calc_checksum=1;
+ sort_param.calc_checksum= 1;
info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
for (i=0 ; i < info->s->base.keys ; i++)
@@ -1423,7 +1449,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info,
lock_memory(param); /* Everything is alloced */
while (!(error=sort_get_next_record(&sort_param)))
{
- if (writekeys(param,info,(byte*)sort_param.record,sort_param.filepos))
+ if (writekeys(&sort_param))
{
if (my_errno != HA_ERR_FOUND_DUPP_KEY)
goto err;
@@ -1568,11 +1594,13 @@ err:
/* Uppate keyfile when doing repair */
-static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff,
- my_off_t filepos)
+static int writekeys(MI_SORT_PARAM *sort_param)
{
register uint i;
- uchar *key;
+ uchar *key;
+ MI_INFO *info= sort_param->sort_info->info;
+ byte *buff= sort_param->record;
+ my_off_t filepos= sort_param->filepos;
DBUG_ENTER("writekeys");
key=info->lastkey+info->s->base.max_key_length;
@@ -1626,8 +1654,8 @@ static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff,
}
}
/* Remove checksum that was added to glob_crc in sort_get_next_record */
- if (param->calc_checksum)
- param->glob_crc-= info->checksum;
+ if (sort_param->calc_checksum)
+ sort_param->sort_info->param->glob_crc-= info->checksum;
DBUG_PRINT("error",("errno: %d",my_errno));
DBUG_RETURN(-1);
} /* writekeys */
@@ -2133,7 +2161,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
del=info->state->del;
param->glob_crc=0;
if (param->testflag & T_CALC_CHECKSUM)
- param->calc_checksum=1;
+ sort_param.calc_checksum= 1;
rec_per_key_part= param->rec_per_key_part;
for (sort_param.key=0 ; sort_param.key < share->base.keys ;
@@ -2195,7 +2223,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
param->retry_repair=1;
goto err;
}
- param->calc_checksum=0; /* No need to calc glob_crc */
+ /* No need to calculate checksum again. */
+ sort_param.calc_checksum= 0;
/* Set for next loop */
sort_info.max_records= (ha_rows) info->state->records;
@@ -2358,6 +2387,28 @@ err:
Each key is handled by a separate thread.
TODO: make a number of threads a parameter
+ In parallel repair we use one thread per index. There are two modes:
+
+ Quick
+
+ Only the indexes are rebuilt. All threads share a read buffer.
+ Every thread that needs fresh data in the buffer enters the shared
+ cache lock. The last thread joining the lock reads the buffer from
+ the data file and wakes all other threads.
+
+ Non-quick
+
+ The data file is rebuilt and all indexes are rebuilt to point to
+ the new record positions. One thread is the master thread. It
+ reads from the old data file and writes to the new data file. It
+ also creates one of the indexes. The other threads read from a
+ buffer which is filled by the master. If they need fresh data,
+ they enter the shared cache lock. If the masters write buffer is
+ full, it flushes it to the new data file and enters the shared
+ cache lock too. When all threads joined in the lock, the master
+ copies its write buffer to the read buffer for the other threads
+ and wakes them.
+
RESULT
0 ok
<>0 Error
@@ -2380,6 +2431,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
ulong *rec_per_key_part;
HA_KEYSEG *keyseg;
char llbuff[22];
+ IO_CACHE new_data_cache; /* For non-quick repair. */
IO_CACHE_SHARE io_share;
SORT_INFO sort_info;
ulonglong key_map=share->state.key_map;
@@ -2401,19 +2453,55 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
param->testflag|=T_CALC_CHECKSUM;
+ /*
+ Quick repair (not touching data file, rebuilding indexes):
+ {
+ Read cache is (MI_CHECK *param)->read_cache using info->dfile.
+ }
+
+ Non-quick repair (rebuilding data file and indexes):
+ {
+ Master thread:
+
+ Read cache is (MI_CHECK *param)->read_cache using info->dfile.
+ Write cache is (MI_INFO *info)->rec_cache using new_file.
+
+ Slave threads:
+
+ Read cache is new_data_cache synced to master rec_cache.
+
+ The final assignment of the filedescriptor for rec_cache is done
+ after the cache creation.
+
+ Don't check file size on new_data_cache, as the resulting file size
+ is not known yet.
+
+ As rec_cache and new_data_cache are synced, write_buffer_length is
+ used for the read cache 'new_data_cache'. Both start at the same
+ position 'new_header_length'.
+ }
+ */
+ DBUG_PRINT("info", ("is quick repair: %d", rep_quick));
bzero((char*)&sort_info,sizeof(sort_info));
+ /* Initialize pthread structures before goto err. */
+ pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&sort_info.cond, 0);
+
if (!(sort_info.key_block=
- alloc_key_blocks(param,
- (uint) param->sort_key_blocks,
- share->base.max_key_block_length))
- || init_io_cache(&param->read_cache,info->dfile,
- (uint) param->read_buffer_length,
- READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) ||
- (! rep_quick &&
- init_io_cache(&info->rec_cache,info->dfile,
- (uint) param->write_buffer_length,
- WRITE_CACHE,new_header_length,1,
- MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw)))
+ alloc_key_blocks(param, (uint) param->sort_key_blocks,
+ share->base.max_key_block_length)) ||
+ init_io_cache(&param->read_cache, info->dfile,
+ (uint) param->read_buffer_length,
+ READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) ||
+ (!rep_quick &&
+ (init_io_cache(&info->rec_cache, info->dfile,
+ (uint) param->write_buffer_length,
+ WRITE_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) ||
+ init_io_cache(&new_data_cache, -1,
+ (uint) param->write_buffer_length,
+ READ_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))))
goto err;
sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
info->opt_flag|=WRITE_CACHE_USED;
@@ -2504,8 +2592,6 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
del=info->state->del;
param->glob_crc=0;
- if (param->testflag & T_CALC_CHECKSUM)
- param->calc_checksum=1;
if (!(sort_param=(MI_SORT_PARAM *)
my_malloc((uint) share->base.keys *
@@ -2555,6 +2641,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
sort_param[i].sort_info=&sort_info;
sort_param[i].master=0;
sort_param[i].fix_datafile=0;
+ sort_param[i].calc_checksum= 0;
sort_param[i].filepos=new_header_length;
sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;
@@ -2591,19 +2678,45 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
sort_info.total_keys=i;
sort_param[0].master= 1;
sort_param[0].fix_datafile= (my_bool)(! rep_quick);
+ sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM);
sort_info.got_error=0;
- pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST);
- pthread_cond_init(&sort_info.cond, 0);
pthread_mutex_lock(&sort_info.mutex);
- init_io_cache_share(&param->read_cache, &io_share, i);
+ /*
+ Initialize the I/O cache share for use with the read caches and, in
+ case of non-quick repair, the write cache. When all threads join on
+ the cache lock, the writer copies the write cache contents to the
+ read caches.
+ */
+ if (i > 1)
+ {
+ if (rep_quick)
+ init_io_cache_share(&param->read_cache, &io_share, NULL, i);
+ else
+ init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
+ }
+ else
+ io_share.total_threads= 0; /* share not used */
+
(void) pthread_attr_init(&thr_attr);
(void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
for (i=0 ; i < sort_info.total_keys ; i++)
{
- sort_param[i].read_cache=param->read_cache;
+ /*
+ Copy the properly initialized IO_CACHE structure so that every
+ thread has its own copy. In quick mode param->read_cache is shared
+ for use by all threads. In non-quick mode all threads but the
+ first copy the shared new_data_cache, which is synchronized to the
+ write cache of the first thread. The first thread copies
+ param->read_cache, which is not shared.
+ */
+ sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
+ new_data_cache);
+ DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx",
+ i, (long) &sort_param[i].read_cache));
+
/*
two approaches: the same amount of memory for each thread
or the memory for the same number of keys for each thread...
@@ -2621,7 +2734,10 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
(void *) (sort_param+i)))
{
mi_check_print_error(param,"Cannot start a repair thread");
- remove_io_thread(&param->read_cache);
+ /* Cleanup: Detach from the share. Avoid others to be blocked. */
+ if (io_share.total_threads)
+ remove_io_thread(&sort_param[i].read_cache);
+ DBUG_PRINT("error", ("Cannot start a repair thread"));
sort_info.got_error=1;
}
else
@@ -2643,6 +2759,11 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
if (sort_param[0].fix_datafile)
{
+ /*
+ Append some nuls to the end of a memory mapped file. Destroy the
+ write cache. The master thread did already detach from the share
+ by remove_io_thread() in sort.c:thr_find_all_keys().
+ */
if (write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache))
goto err;
if (param->testflag & T_SAFE_REPAIR)
@@ -2658,8 +2779,14 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
sort_param->filepos;
/* Only whole records */
share->state.version=(ulong) time((time_t*) 0);
+
+ /*
+ Exchange the data file descriptor of the table, so that we use the
+ new file from now on.
+ */
my_close(info->dfile,MYF(0));
info->dfile=new_file;
+
share->data_file_type=sort_info.new_data_file_type;
share->pack.header_length=(ulong) new_header_length;
}
@@ -2714,7 +2841,20 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
err:
got_error|= flush_blocks(param, share->key_cache, share->kfile);
+ /*
+ Destroy the write cache. The master thread did already detach from
+ the share by remove_io_thread() or it was not yet started (if the
+ error happend before creating the thread).
+ */
VOID(end_io_cache(&info->rec_cache));
+ /*
+ Destroy the new data cache in case of non-quick repair. All slave
+ threads did either detach from the share by remove_io_thread()
+ already or they were not yet started (if the error happend before
+ creating the threads).
+ */
+ if (!rep_quick)
+ VOID(end_io_cache(&new_data_cache));
if (!got_error)
{
/* Replace the actual file with the temporary file */
@@ -2845,12 +2985,41 @@ static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key)
} /* sort_ft_key_read */
- /* Read next record from file using parameters in sort_info */
- /* Return -1 if end of file, 0 if ok and > 0 if error */
+/*
+ Read next record from file using parameters in sort_info.
+
+ SYNOPSIS
+ sort_get_next_record()
+ sort_param Information about and for the sort process
+
+ NOTE
+
+ Dynamic Records With Non-Quick Parallel Repair
+
+ For non-quick parallel repair we use a synchronized read/write
+ cache. This means that one thread is the master who fixes the data
+ file by reading each record from the old data file and writing it
+ to the new data file. By doing this the records in the new data
+ file are written contiguously. Whenever the write buffer is full,
+ it is copied to the read buffer. The slaves read from the read
+ buffer, which is not associated with a file. Thus read_cache.file
+ is -1. When using _mi_read_cache(), the slaves must always set
+ flag to READING_NEXT so that the function never tries to read from
+ file. This is safe because the records are contiguous. There is no
+ need to read outside the cache. This condition is evaluated in the
+ variable 'parallel_flag' for quick reference. read_cache.file must
+ be >= 0 in every other case.
+
+ RETURN
+ -1 end of file
+ 0 ok
+ > 0 error
+*/
static int sort_get_next_record(MI_SORT_PARAM *sort_param)
{
int searching;
+ int parallel_flag;
uint found_record,b_type,left_length;
my_off_t pos;
byte *to;
@@ -2888,7 +3057,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
if (*sort_param->record)
{
- if (param->calc_checksum)
+ if (sort_param->calc_checksum)
param->glob_crc+= (info->checksum=
mi_static_checksum(info,sort_param->record));
DBUG_RETURN(0);
@@ -2903,6 +3072,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
LINT_INIT(to);
pos=sort_param->pos;
searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
+ parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
for (;;)
{
found_record=block_info.second_read= 0;
@@ -2933,7 +3103,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
(byte*) block_info.header,pos,
MI_BLOCK_INFO_HEADER_LENGTH,
(! found_record ? READING_NEXT : 0) |
- READING_HEADER))
+ parallel_flag | READING_HEADER))
{
if (found_record)
{
@@ -3110,9 +3280,31 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
llstr(sort_param->start_recpos,llbuff));
goto try_next;
}
- if (_mi_read_cache(&sort_param->read_cache,to,block_info.filepos,
- block_info.data_len,
- (found_record == 1 ? READING_NEXT : 0)))
+ /*
+ Copy information that is already read. Avoid accessing data
+ below the cache start. This could happen if the header
+ streched over the end of the previous buffer contents.
+ */
+ {
+ uint header_len= (uint) (block_info.filepos - pos);
+ uint prefetch_len= (MI_BLOCK_INFO_HEADER_LENGTH - header_len);
+
+ if (prefetch_len > block_info.data_len)
+ prefetch_len= block_info.data_len;
+ if (prefetch_len)
+ {
+ memcpy(to, block_info.header + header_len, prefetch_len);
+ block_info.filepos+= prefetch_len;
+ block_info.data_len-= prefetch_len;
+ left_length-= prefetch_len;
+ to+= prefetch_len;
+ }
+ }
+ if (block_info.data_len &&
+ _mi_read_cache(&sort_param->read_cache,to,block_info.filepos,
+ block_info.data_len,
+ (found_record == 1 ? READING_NEXT : 0) |
+ parallel_flag))
{
mi_check_print_info(param,
"Read error for block at: %s (error: %d); Skipped",
@@ -3142,13 +3334,14 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
{
if (sort_param->read_cache.error < 0)
DBUG_RETURN(1);
- if (info->s->calc_checksum)
- info->checksum=mi_checksum(info,sort_param->record);
+ if (sort_param->calc_checksum)
+ info->checksum= mi_checksum(info, sort_param->record);
if ((param->testflag & (T_EXTEND | T_REP)) || searching)
{
if (_mi_rec_check(info, sort_param->record, sort_param->rec_buff,
sort_param->find_length,
(param->testflag & T_QUICK) &&
+ sort_param->calc_checksum &&
test(info->s->calc_checksum)))
{
mi_check_print_info(param,"Found wrong packed record at %s",
@@ -3156,7 +3349,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
goto try_next;
}
}
- if (param->calc_checksum)
+ if (sort_param->calc_checksum)
param->glob_crc+= info->checksum;
DBUG_RETURN(0);
}
@@ -3183,7 +3376,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
DBUG_RETURN(1); /* Something wrong with data */
}
sort_param->start_recpos=sort_param->pos;
- if (_mi_pack_get_block_info(info,&block_info,-1,sort_param->pos))
+ if (_mi_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
+ &sort_param->rec_buff, -1, sort_param->pos))
DBUG_RETURN(-1);
if (!block_info.rec_len &&
sort_param->pos + MEMMAP_EXTRA_MARGIN ==
@@ -3207,15 +3401,14 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
llstr(sort_param->pos,llbuff));
continue;
}
- if (_mi_pack_rec_unpack(info,sort_param->record,sort_param->rec_buff,
- block_info.rec_len))
+ if (_mi_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
+ sort_param->rec_buff, block_info.rec_len))
{
if (! searching)
mi_check_print_info(param,"Found wrong record at %s",
llstr(sort_param->pos,llbuff));
continue;
}
- info->checksum=mi_checksum(info,sort_param->record);
if (!sort_param->fix_datafile)
{
sort_param->filepos=sort_param->pos;
@@ -3225,8 +3418,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
sort_param->max_pos=(sort_param->pos=block_info.filepos+
block_info.rec_len);
info->packed_length=block_info.rec_len;
- if (param->calc_checksum)
- param->glob_crc+= info->checksum;
+ if (sort_param->calc_checksum)
+ param->glob_crc+= (info->checksum=
+ mi_checksum(info, sort_param->record));
DBUG_RETURN(0);
}
}
@@ -3234,7 +3428,20 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
}
- /* Write record to new file */
+/*
+ Write record to new file.
+
+ SYNOPSIS
+ sort_write_record()
+ sort_param Sort parameters.
+
+ NOTE
+ This is only called by a master thread if parallel repair is used.
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
int sort_write_record(MI_SORT_PARAM *sort_param)
{
@@ -3283,6 +3490,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
}
from=sort_info->buff+ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER);
}
+ /* We can use info->checksum here as only one thread calls this. */
info->checksum=mi_checksum(info,sort_param->record);
reclength=_mi_rec_pack(info,from,sort_param->record);
flag=0;
@@ -3692,7 +3900,7 @@ static int sort_delete_record(MI_SORT_PARAM *sort_param)
DBUG_RETURN(1);
}
}
- if (param->calc_checksum)
+ if (sort_param->calc_checksum)
param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record);
}
error=flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info);
diff --git a/myisam/mi_open.c b/myisam/mi_open.c
index a5b303f86d4..4f298397615 100644
--- a/myisam/mi_open.c
+++ b/myisam/mi_open.c
@@ -201,7 +201,10 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
((open_flags & HA_OPEN_ABORT_IF_CRASHED) &&
(my_disable_locking && share->state.open_count))))
{
- DBUG_PRINT("error",("Table is marked as crashed"));
+ DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u "
+ "changed: %u open_count: %u !locking: %d",
+ open_flags, share->state.changed,
+ share->state.open_count, my_disable_locking));
my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
goto err;
diff --git a/myisam/mi_packrec.c b/myisam/mi_packrec.c
index 0edb3ac1d5d..feb8d33b015 100644
--- a/myisam/mi_packrec.c
+++ b/myisam/mi_packrec.c
@@ -101,7 +101,8 @@ static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count);
static void fill_buffer(MI_BIT_BUFF *bit_buff);
static uint max_bit(uint value);
#ifdef HAVE_MMAP
-static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info,
+static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
+ MI_BLOCK_INFO *info, byte **rec_buff_p,
uchar *header);
#endif
@@ -436,13 +437,15 @@ int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf)
DBUG_RETURN(-1); /* _search() didn't find record */
file=info->dfile;
- if (_mi_pack_get_block_info(info, &block_info, file, filepos))
+ if (_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, file, filepos))
goto err;
if (my_read(file,(byte*) info->rec_buff + block_info.offset ,
block_info.rec_len - block_info.offset, MYF(MY_NABP)))
goto panic;
info->update|= HA_STATE_AKTIV;
- DBUG_RETURN(_mi_pack_rec_unpack(info,buf,info->rec_buff,block_info.rec_len));
+ DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf,
+ info->rec_buff, block_info.rec_len));
panic:
my_errno=HA_ERR_WRONG_IN_RECORD;
err:
@@ -451,8 +454,8 @@ err:
-int _mi_pack_rec_unpack(register MI_INFO *info, register byte *to, byte *from,
- ulong reclength)
+int _mi_pack_rec_unpack(register MI_INFO *info, MI_BIT_BUFF *bit_buff,
+ register byte *to, byte *from, ulong reclength)
{
byte *end_field;
reg3 MI_COLUMNDEF *end;
@@ -460,18 +463,18 @@ int _mi_pack_rec_unpack(register MI_INFO *info, register byte *to, byte *from,
MYISAM_SHARE *share=info->s;
DBUG_ENTER("_mi_pack_rec_unpack");
- init_bit_buffer(&info->bit_buff, (uchar*) from,reclength);
+ init_bit_buffer(bit_buff, (uchar*) from, reclength);
for (current_field=share->rec, end=current_field+share->base.fields ;
current_field < end ;
current_field++,to=end_field)
{
end_field=to+current_field->length;
- (*current_field->unpack)(current_field,&info->bit_buff,(uchar*) to,
+ (*current_field->unpack)(current_field, bit_buff, (uchar*) to,
(uchar*) end_field);
}
- if (! info->bit_buff.error &&
- info->bit_buff.pos - info->bit_buff.bits/8 == info->bit_buff.end)
+ if (!bit_buff->error &&
+ bit_buff->pos - bit_buff->bits / 8 == bit_buff->end)
DBUG_RETURN(0);
info->update&= ~HA_STATE_AKTIV;
DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD);
@@ -985,13 +988,16 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf,
if (info->opt_flag & READ_CACHE_USED)
{
- if (_mi_read_cache(&info->rec_cache,(byte*) block_info.header,filepos,
- share->pack.ref_length, skip_deleted_blocks))
+ if (_mi_read_cache(&info->rec_cache, (byte*) block_info.header,
+ filepos, share->pack.ref_length,
+ skip_deleted_blocks ? READING_NEXT : 0))
goto err;
- b_type=_mi_pack_get_block_info(info,&block_info,-1, filepos);
+ b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, -1, filepos);
}
else
- b_type=_mi_pack_get_block_info(info,&block_info,info->dfile,filepos);
+ b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, info->dfile, filepos);
if (b_type)
goto err; /* Error code is already set */
#ifndef DBUG_OFF
@@ -1004,9 +1010,9 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf,
if (info->opt_flag & READ_CACHE_USED)
{
- if (_mi_read_cache(&info->rec_cache,(byte*) info->rec_buff,
- block_info.filepos, block_info.rec_len,
- skip_deleted_blocks))
+ if (_mi_read_cache(&info->rec_cache, (byte*) info->rec_buff,
+ block_info.filepos, block_info.rec_len,
+ skip_deleted_blocks ? READING_NEXT : 0))
goto err;
}
else
@@ -1021,8 +1027,8 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf,
info->nextpos=block_info.filepos+block_info.rec_len;
info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
- DBUG_RETURN (_mi_pack_rec_unpack(info,buf,info->rec_buff,
- block_info.rec_len));
+ DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf,
+ info->rec_buff, block_info.rec_len));
err:
DBUG_RETURN(my_errno);
}
@@ -1030,8 +1036,9 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf,
/* Read and process header from a huff-record-file */
-uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BLOCK_INFO *info, File file,
- my_off_t filepos)
+uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
+ MI_BLOCK_INFO *info, byte **rec_buff_p,
+ File file, my_off_t filepos)
{
uchar *header=info->header;
uint head_length,ref_length;
@@ -1056,17 +1063,17 @@ uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BLOCK_INFO *info, File file,
head_length+= read_pack_length((uint) myisam->s->pack.version,
header + head_length, &info->blob_len);
if (!(mi_alloc_rec_buff(myisam,info->rec_len + info->blob_len,
- &myisam->rec_buff)))
+ rec_buff_p)))
return BLOCK_FATAL_ERROR; /* not enough memory */
- myisam->bit_buff.blob_pos=(uchar*) myisam->rec_buff+info->rec_len;
- myisam->bit_buff.blob_end= myisam->bit_buff.blob_pos+info->blob_len;
+ bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len;
+ bit_buff->blob_end= bit_buff->blob_pos + info->blob_len;
myisam->blob_length=info->blob_len;
}
info->filepos=filepos+head_length;
if (file > 0)
{
info->offset=min(info->rec_len, ref_length - head_length);
- memcpy(myisam->rec_buff, header+head_length, info->offset);
+ memcpy(*rec_buff_p, header + head_length, info->offset);
}
return 0;
}
@@ -1206,7 +1213,8 @@ void _mi_unmap_file(MI_INFO *info)
}
-static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info,
+static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
+ MI_BLOCK_INFO *info, byte **rec_buff_p,
uchar *header)
{
header+= read_pack_length((uint) myisam->s->pack.version, header,
@@ -1217,10 +1225,10 @@ static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info,
&info->blob_len);
/* mi_alloc_rec_buff sets my_errno on error */
if (!(mi_alloc_rec_buff(myisam, info->blob_len,
- &myisam->rec_buff)))
+ rec_buff_p)))
return 0; /* not enough memory */
- myisam->bit_buff.blob_pos=(uchar*) myisam->rec_buff;
- myisam->bit_buff.blob_end= (uchar*) myisam->rec_buff + info->blob_len;
+ bit_buff->blob_pos= (uchar*) *rec_buff_p;
+ bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len;
}
return header;
}
@@ -1236,11 +1244,13 @@ static int _mi_read_mempack_record(MI_INFO *info, my_off_t filepos, byte *buf)
if (filepos == HA_OFFSET_ERROR)
DBUG_RETURN(-1); /* _search() didn't find record */
- if (!(pos= (byte*) _mi_mempack_get_block_info(info,&block_info,
+ if (!(pos= (byte*) _mi_mempack_get_block_info(info, &info->bit_buff,
+ &block_info, &info->rec_buff,
(uchar*) share->file_map+
filepos)))
DBUG_RETURN(-1);
- DBUG_RETURN(_mi_pack_rec_unpack(info, buf, pos, block_info.rec_len));
+ DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf,
+ pos, block_info.rec_len));
}
@@ -1260,7 +1270,8 @@ static int _mi_read_rnd_mempack_record(MI_INFO *info, byte *buf,
my_errno=HA_ERR_END_OF_FILE;
goto err;
}
- if (!(pos= (byte*) _mi_mempack_get_block_info(info,&block_info,
+ if (!(pos= (byte*) _mi_mempack_get_block_info(info, &info->bit_buff,
+ &block_info, &info->rec_buff,
(uchar*)
(start=share->file_map+
filepos))))
@@ -1277,7 +1288,8 @@ static int _mi_read_rnd_mempack_record(MI_INFO *info, byte *buf,
info->nextpos=filepos+(uint) (pos-start)+block_info.rec_len;
info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
- DBUG_RETURN (_mi_pack_rec_unpack(info,buf,pos, block_info.rec_len));
+ DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf,
+ pos, block_info.rec_len));
err:
DBUG_RETURN(my_errno);
}
diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h
index a766d59d72a..6365f0e1b0c 100644
--- a/myisam/myisamdef.h
+++ b/myisam/myisamdef.h
@@ -75,7 +75,7 @@ typedef struct st_mi_state_info
ulong sec_index_changed; /* Updated when new sec_index */
ulong sec_index_used; /* which extra index are in use */
ulonglong key_map; /* Which keys are in use */
- ha_checksum checksum;
+ ha_checksum checksum; /* Table checksum */
ulong version; /* timestamp of create */
time_t create_time; /* Time when created database */
time_t recover_time; /* Time for last recover */
@@ -176,6 +176,7 @@ typedef struct st_mi_isam_share { /* Shared between opens */
int (*delete_record)(struct st_myisam_info*);
int (*read_rnd)(struct st_myisam_info*, byte*, my_off_t, my_bool);
int (*compare_record)(struct st_myisam_info*, const byte *);
+ /* Function to use for a row checksum. */
ha_checksum (*calc_checksum)(struct st_myisam_info*, const byte *);
int (*compare_unique)(struct st_myisam_info*, MI_UNIQUEDEF *,
const byte *record, my_off_t pos);
@@ -249,7 +250,7 @@ struct st_myisam_info {
my_off_t last_keypage; /* Last key page read */
my_off_t last_search_keypage; /* Last keypage when searching */
my_off_t dupp_key_pos;
- ha_checksum checksum;
+ ha_checksum checksum; /* Temp storage for row checksum */
/* QQ: the folloing two xxx_length fields should be removed,
as they are not compatible with parallel repair */
ulong packed_length,blob_length; /* Length of found, packed record */
@@ -297,8 +298,9 @@ typedef struct st_mi_sort_param
pthread_t thr;
IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
DYNAMIC_ARRAY buffpek;
-
- /*
+ MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */
+
+ /*
The next two are used to collect statistics, see update_key_parts for
description.
*/
@@ -309,6 +311,7 @@ typedef struct st_mi_sort_param
uint key, key_length,real_key_length,sortbuff_size;
uint maxbuffers, keys, find_length, sort_keys_length;
my_bool fix_datafile, master;
+ my_bool calc_checksum; /* calculate table checksum */
MI_KEYDEF *keyinfo;
HA_KEYSEG *seg;
SORT_INFO *sort_info;
@@ -361,8 +364,15 @@ typedef struct st_mi_sort_param
#define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\
mi_int2store(x,boh); }
#define mi_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0)
-#define mi_mark_crashed(x) (x)->s->state.changed|=STATE_CRASHED
-#define mi_mark_crashed_on_repair(x) { (x)->s->state.changed|=STATE_CRASHED|STATE_CRASHED_ON_REPAIR ; (x)->update|= HA_STATE_CHANGED; }
+#define mi_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \
+ DBUG_PRINT("error", ("Marked table crashed")); \
+ }while(0)
+#define mi_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \
+ STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \
+ (x)->update|= HA_STATE_CHANGED; \
+ DBUG_PRINT("error", \
+ ("Marked table crashed")); \
+ }while(0)
#define mi_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED)
#define mi_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR)
@@ -600,8 +610,8 @@ extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key,
extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys);
extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,byte *buf);
extern int _mi_read_rnd_pack_record(MI_INFO*, byte *,my_off_t, my_bool);
-extern int _mi_pack_rec_unpack(MI_INFO *info,byte *to,byte *from,
- ulong reclength);
+extern int _mi_pack_rec_unpack(MI_INFO *info, MI_BIT_BUFF *bit_buff,
+ byte *to, byte *from, ulong reclength);
extern ulonglong mi_safe_mul(ulonglong a,ulonglong b);
extern int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf,
const byte *oldrec, const byte *newrec, my_off_t pos);
@@ -666,7 +676,9 @@ extern "C" {
extern uint _mi_get_block_info(MI_BLOCK_INFO *,File, my_off_t);
extern uint _mi_rec_pack(MI_INFO *info,byte *to,const byte *from);
-extern uint _mi_pack_get_block_info(MI_INFO *, MI_BLOCK_INFO *, File, my_off_t);
+extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
+ MI_BLOCK_INFO *info, byte **rec_buff_p,
+ File file, my_off_t filepos);
extern void _my_store_blob_length(byte *pos,uint pack_length,uint length);
extern void _myisam_log(enum myisam_log_commands command,MI_INFO *info,
const byte *buffert,uint length);
diff --git a/myisam/sort.c b/myisam/sort.c
index 1a3dc147cd9..727840709da 100644
--- a/myisam/sort.c
+++ b/myisam/sort.c
@@ -309,7 +309,7 @@ static ha_rows NEAR_F find_all_keys(MI_SORT_PARAM *info, uint keys,
pthread_handler_decl(thr_find_all_keys,arg)
{
- MI_SORT_PARAM *info= (MI_SORT_PARAM*) arg;
+ MI_SORT_PARAM *sort_param= (MI_SORT_PARAM*) arg;
int error;
uint memavl,old_memavl,keys,sort_length;
uint idx, maxbuffer;
@@ -321,32 +321,34 @@ pthread_handler_decl(thr_find_all_keys,arg)
if (my_thread_init())
goto err;
- if (info->sort_info->got_error)
+ DBUG_ENTER("thr_find_all_keys");
+ DBUG_PRINT("enter", ("master: %d", sort_param->master));
+ if (sort_param->sort_info->got_error)
goto err;
- if (info->keyinfo->flag && HA_VAR_LENGTH_KEY)
+ if (sort_param->keyinfo->flag && HA_VAR_LENGTH_KEY)
{
- info->write_keys=write_keys_varlen;
- info->read_to_buffer=read_to_buffer_varlen;
- info->write_key=write_merge_key_varlen;
+ sort_param->write_keys= write_keys_varlen;
+ sort_param->read_to_buffer= read_to_buffer_varlen;
+ sort_param->write_key= write_merge_key_varlen;
}
else
{
- info->write_keys=write_keys;
- info->read_to_buffer=read_to_buffer;
- info->write_key=write_merge_key;
+ sort_param->write_keys= write_keys;
+ sort_param->read_to_buffer= read_to_buffer;
+ sort_param->write_key= write_merge_key;
}
- my_b_clear(&info->tempfile);
- my_b_clear(&info->tempfile_for_exceptions);
- bzero((char*) &info->buffpek,sizeof(info->buffpek));
- bzero((char*) &info->unique, sizeof(info->unique));
+ my_b_clear(&sort_param->tempfile);
+ my_b_clear(&sort_param->tempfile_for_exceptions);
+ bzero((char*) &sort_param->buffpek, sizeof(sort_param->buffpek));
+ bzero((char*) &sort_param->unique, sizeof(sort_param->unique));
sort_keys= (uchar **) NULL;
- memavl=max(info->sortbuff_size, MIN_SORT_MEMORY);
- idx= info->sort_info->max_records;
- sort_length= info->key_length;
- maxbuffer= 1;
+ memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY);
+ idx= sort_param->sort_info->max_records;
+ sort_length= sort_param->key_length;
+ maxbuffer= 1;
while (memavl >= MIN_SORT_MEMORY)
{
@@ -363,18 +365,19 @@ pthread_handler_decl(thr_find_all_keys,arg)
(keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/
(sort_length+sizeof(char*))) <= 1)
{
- mi_check_print_error(info->sort_info->param,
+ mi_check_print_error(sort_param->sort_info->param,
"sort_buffer_size is to small");
goto err;
}
}
while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr);
}
- if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+
- ((info->keyinfo->flag & HA_FULLTEXT) ?
- HA_FT_MAXBYTELEN : 0), MYF(0))))
+ if ((sort_keys= (uchar**)
+ my_malloc(keys*(sort_length+sizeof(char*))+
+ ((sort_param->keyinfo->flag & HA_FULLTEXT) ?
+ HA_FT_MAXBYTELEN : 0), MYF(0))))
{
- if (my_init_dynamic_array(&info->buffpek, sizeof(BUFFPEK),
+ if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK),
maxbuffer, maxbuffer/2))
my_free((gptr) sort_keys,MYF(0));
else
@@ -386,69 +389,87 @@ pthread_handler_decl(thr_find_all_keys,arg)
}
if (memavl < MIN_SORT_MEMORY)
{
- mi_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */
+ mi_check_print_error(sort_param->sort_info->param, "Sort buffer too small");
goto err; /* purecov: tested */
}
- if (info->sort_info->param->testflag & T_VERBOSE)
- printf("Key %d - Allocating buffer for %d keys\n",info->key+1,keys);
- info->sort_keys=sort_keys;
+ if (sort_param->sort_info->param->testflag & T_VERBOSE)
+ printf("Key %d - Allocating buffer for %d keys\n",
+ sort_param->key + 1, keys);
+ sort_param->sort_keys= sort_keys;
idx=error=0;
sort_keys[0]=(uchar*) (sort_keys+keys);
- while (!(error=info->sort_info->got_error) &&
- !(error=(*info->key_read)(info,sort_keys[idx])))
+ DBUG_PRINT("info", ("reading keys"));
+ while (!(error= sort_param->sort_info->got_error) &&
+ !(error= (*sort_param->key_read)(sort_param, sort_keys[idx])))
{
- if (info->real_key_length > info->key_length)
+ if (sort_param->real_key_length > sort_param->key_length)
{
- if (write_key(info,sort_keys[idx], &info->tempfile_for_exceptions))
+ if (write_key(sort_param, sort_keys[idx],
+ &sort_param->tempfile_for_exceptions))
goto err;
continue;
}
if (++idx == keys)
{
- if (info->write_keys(info,sort_keys,idx-1,
- (BUFFPEK *)alloc_dynamic(&info->buffpek),
- &info->tempfile))
+ if (sort_param->write_keys(sort_param, sort_keys, idx - 1,
+ (BUFFPEK*) alloc_dynamic(&sort_param->buffpek),
+ &sort_param->tempfile))
goto err;
sort_keys[0]=(uchar*) (sort_keys+keys);
- memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length);
+ memcpy(sort_keys[0], sort_keys[idx - 1], (size_t) sort_param->key_length);
idx=1;
}
- sort_keys[idx]=sort_keys[idx-1]+info->key_length;
+ sort_keys[idx]= sort_keys[idx - 1] + sort_param->key_length;
}
if (error > 0)
goto err;
- if (info->buffpek.elements)
+ if (sort_param->buffpek.elements)
{
- if (info->write_keys(info,sort_keys, idx,
- (BUFFPEK *) alloc_dynamic(&info->buffpek), &info->tempfile))
+ if (sort_param->write_keys(sort_param, sort_keys, idx,
+ (BUFFPEK*) alloc_dynamic(&sort_param->buffpek),
+ &sort_param->tempfile))
goto err;
- info->keys=(info->buffpek.elements-1)*(keys-1)+idx;
+ sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx;
}
else
- info->keys=idx;
+ sort_param->keys= idx;
- info->sort_keys_length=keys;
+ sort_param->sort_keys_length= keys;
goto ok;
err:
- info->sort_info->got_error=1; /* no need to protect this with a mutex */
+ DBUG_PRINT("error", ("got some error"));
+ sort_param->sort_info->got_error= 1; /* no need to protect with a mutex */
if (sort_keys)
my_free((gptr) sort_keys,MYF(0));
- info->sort_keys=0;
- delete_dynamic(& info->buffpek);
- close_cached_file(&info->tempfile);
- close_cached_file(&info->tempfile_for_exceptions);
+ sort_param->sort_keys= 0;
+ delete_dynamic(& sort_param->buffpek);
+ close_cached_file(&sort_param->tempfile);
+ close_cached_file(&sort_param->tempfile_for_exceptions);
ok:
- remove_io_thread(&info->read_cache);
- pthread_mutex_lock(&info->sort_info->mutex);
- info->sort_info->threads_running--;
- pthread_cond_signal(&info->sort_info->cond);
- pthread_mutex_unlock(&info->sort_info->mutex);
+ /*
+ Detach from the share if the writer is involved. Avoid others to
+ be blocked. This includes a flush of the write buffer. This will
+ also indicate EOF to the readers.
+ */
+ if (sort_param->sort_info->info->rec_cache.share)
+ remove_io_thread(&sort_param->sort_info->info->rec_cache);
+
+ /* Readers detach from the share if any. Avoid others to be blocked. */
+ if (sort_param->read_cache.share)
+ remove_io_thread(&sort_param->read_cache);
+
+ pthread_mutex_lock(&sort_param->sort_info->mutex);
+ if (!--sort_param->sort_info->threads_running)
+ pthread_cond_signal(&sort_param->sort_info->cond);
+ pthread_mutex_unlock(&sort_param->sort_info->mutex);
+
+ DBUG_PRINT("exit", ("======== ending thread ========"));
my_thread_end();
return NULL;
}
@@ -466,6 +487,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
MYISAM_SHARE *share=info->s;
MI_SORT_PARAM *sinfo;
byte *mergebuf=0;
+ DBUG_ENTER("thr_write_keys");
LINT_INIT(length);
for (i= 0, sinfo= sort_param ;
@@ -602,7 +624,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
}
}
my_free((gptr) mergebuf,MYF(MY_ALLOW_ZERO_PTR));
- return got_error;
+ DBUG_RETURN(got_error);
}
#endif /* THREAD */
diff --git a/mysql-test/r/myisam.result b/mysql-test/r/myisam.result
index 05f72b22ed1..b34c127595f 100644
--- a/mysql-test/r/myisam.result
+++ b/mysql-test/r/myisam.result
@@ -797,6 +797,132 @@ a b
xxxxxxxxx bbbbbb
xxxxxxxxx bbbbbb
DROP TABLE t1;
+SET @@myisam_repair_threads=2;
+SHOW VARIABLES LIKE 'myisam_repair%';
+Variable_name Value
+myisam_repair_threads 2
+CREATE TABLE t1 (
+`_id` int(11) NOT NULL default '0',
+`url` text,
+`email` text,
+`description` text,
+`loverlap` int(11) default NULL,
+`roverlap` int(11) default NULL,
+`lneighbor_id` int(11) default NULL,
+`rneighbor_id` int(11) default NULL,
+`length_` int(11) default NULL,
+`sequence` mediumtext,
+`name` text,
+`_obj_class` text NOT NULL,
+PRIMARY KEY (`_id`),
+UNIQUE KEY `sequence_name_index` (`name`(50)),
+KEY (`length_`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+INSERT INTO t1 VALUES
+(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+SELECT _id FROM t1;
+_id
+1
+2
+3
+4
+5
+6
+7
+8
+9
+DELETE FROM t1 WHERE _id < 8;
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # #
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MyISAM 9 Dynamic 2 # # # # 0 # # # # # #
+SELECT _id FROM t1;
+_id
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+`_id` int(11) NOT NULL default '0',
+`url` text,
+`email` text,
+`description` text,
+`loverlap` int(11) default NULL,
+`roverlap` int(11) default NULL,
+`lneighbor_id` int(11) default NULL,
+`rneighbor_id` int(11) default NULL,
+`length_` int(11) default NULL,
+`sequence` mediumtext,
+`name` text,
+`_obj_class` text NOT NULL,
+PRIMARY KEY (`_id`),
+UNIQUE KEY `sequence_name_index` (`name`(50)),
+KEY (`length_`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+INSERT INTO t1 VALUES
+(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+SELECT _id FROM t1;
+_id
+1
+2
+3
+4
+5
+6
+7
+8
+9
+DELETE FROM t1 WHERE _id < 8;
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # #
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+REPAIR TABLE t1 QUICK;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # #
+SELECT _id FROM t1;
+_id
+8
+9
+DROP TABLE t1;
+SET @@myisam_repair_threads=1;
+SHOW VARIABLES LIKE 'myisam_repair%';
+Variable_name Value
+myisam_repair_threads 1
show create table t1;
Table Create Table
t1 CREATE TEMPORARY TABLE `t1` (
diff --git a/mysql-test/t/myisam.test b/mysql-test/t/myisam.test
index a62a6487882..745e3a2e377 100644
--- a/mysql-test/t/myisam.test
+++ b/mysql-test/t/myisam.test
@@ -763,6 +763,97 @@ SELECT * FROM t1;
DROP TABLE t1;
#
+# Bug#8283 - OPTIMIZE TABLE causes data loss
+#
+SET @@myisam_repair_threads=2;
+SHOW VARIABLES LIKE 'myisam_repair%';
+#
+# Test OPTIMIZE. This creates a new data file.
+CREATE TABLE t1 (
+ `_id` int(11) NOT NULL default '0',
+ `url` text,
+ `email` text,
+ `description` text,
+ `loverlap` int(11) default NULL,
+ `roverlap` int(11) default NULL,
+ `lneighbor_id` int(11) default NULL,
+ `rneighbor_id` int(11) default NULL,
+ `length_` int(11) default NULL,
+ `sequence` mediumtext,
+ `name` text,
+ `_obj_class` text NOT NULL,
+ PRIMARY KEY (`_id`),
+ UNIQUE KEY `sequence_name_index` (`name`(50)),
+ KEY (`length_`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+#
+INSERT INTO t1 VALUES
+ (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+ (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+ (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+ (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+ (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+ (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+ (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+ (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+ (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+#
+SELECT _id FROM t1;
+DELETE FROM t1 WHERE _id < 8;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+CHECK TABLE t1 EXTENDED;
+OPTIMIZE TABLE t1;
+CHECK TABLE t1 EXTENDED;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+SELECT _id FROM t1;
+DROP TABLE t1;
+#
+# Test REPAIR QUICK. This retains the old data file.
+CREATE TABLE t1 (
+ `_id` int(11) NOT NULL default '0',
+ `url` text,
+ `email` text,
+ `description` text,
+ `loverlap` int(11) default NULL,
+ `roverlap` int(11) default NULL,
+ `lneighbor_id` int(11) default NULL,
+ `rneighbor_id` int(11) default NULL,
+ `length_` int(11) default NULL,
+ `sequence` mediumtext,
+ `name` text,
+ `_obj_class` text NOT NULL,
+ PRIMARY KEY (`_id`),
+ UNIQUE KEY `sequence_name_index` (`name`(50)),
+ KEY (`length_`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+#
+INSERT INTO t1 VALUES
+ (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+ (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+ (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+ (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+ (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+ (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+ (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+ (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+ (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+#
+SELECT _id FROM t1;
+DELETE FROM t1 WHERE _id < 8;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+CHECK TABLE t1 EXTENDED;
+REPAIR TABLE t1 QUICK;
+CHECK TABLE t1 EXTENDED;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+SELECT _id FROM t1;
+DROP TABLE t1;
+#
+SET @@myisam_repair_threads=1;
+SHOW VARIABLES LIKE 'myisam_repair%';
# Bug#8706 - temporary table with data directory option fails
#
connect (session1,localhost,root,,);
diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c
index 0007784c2b2..58d4756b702 100644
--- a/mysys/mf_iocache.c
+++ b/mysys/mf_iocache.c
@@ -70,7 +70,6 @@ static void my_aiowait(my_aio_result *result);
#define IO_ROUND_UP(X) (((X)+IO_SIZE-1) & ~(IO_SIZE-1))
#define IO_ROUND_DN(X) ( (X) & ~(IO_SIZE-1))
-
/*
Setup internal pointers inside IO_CACHE
@@ -500,65 +499,366 @@ int _my_b_read(register IO_CACHE *info, byte *Buffer, uint Count)
DBUG_RETURN(0);
}
+
#ifdef THREAD
-/* Prepare IO_CACHE for shared use */
-void init_io_cache_share(IO_CACHE *info, IO_CACHE_SHARE *s, uint num_threads)
+/*
+ Prepare IO_CACHE for shared use.
+
+ SYNOPSIS
+ init_io_cache_share()
+ read_cache A read cache. This will be copied for
+ every thread after setup.
+ cshare The share.
+ write_cache If non-NULL a write cache that is to be
+ synchronized with the read caches.
+ num_threads Number of threads sharing the cache
+ including the write thread if any.
+
+ DESCRIPTION
+
+ The shared cache is used so: One IO_CACHE is initialized with
+ init_io_cache(). This includes the allocation of a buffer. Then a
+ share is allocated and init_io_cache_share() is called with the io
+ cache and the share. Then the io cache is copied for each thread. So
+ every thread has its own copy of IO_CACHE. But the allocated buffer
+ is shared because cache->buffer is the same for all caches.
+
+ One thread reads data from the file into the buffer. All threads
+ read from the buffer, but every thread maintains its own set of
+ pointers into the buffer. When all threads have used up the buffer
+ contents, one of the threads reads the next block of data into the
+ buffer. To accomplish this, each thread enters the cache lock before
+ accessing the buffer. They wait in lock_io_cache() until all threads
+ joined the lock. The last thread entering the lock is in charge of
+ reading from file to buffer. It wakes all threads when done.
+
+ Synchronizing a write cache to the read caches works so: Whenever
+ the write buffer needs a flush, the write thread enters the lock and
+ waits for all other threads to enter the lock too. They do this when
+ they have used up the read buffer. When all threads are in the lock,
+ the write thread copies the write buffer to the read buffer and
+ wakes all threads.
+
+ share->running_threads is the number of threads not being in the
+ cache lock. When entering lock_io_cache() the number is decreased.
+ When the thread that fills the buffer enters unlock_io_cache() the
+ number is reset to the number of threads. The condition
+ running_threads == 0 means that all threads are in the lock. Bumping
+ up the number to the full count is non-intuitive. But increasing the
+ number by one for each thread that leaves the lock could lead to a
+ solo run of one thread. The last thread to join a lock reads from
+ file to buffer, wakes the other threads, processes the data in the
+ cache and enters the lock again. If no other thread left the lock
+ meanwhile, it would think it's the last one again and read the next
+ block...
+
+ The share has copies of 'error', 'buffer', 'read_end', and
+ 'pos_in_file' from the thread that filled the buffer. We may not be
+ able to access this information directly from its cache because the
+ thread may be removed from the share before the variables could be
+ copied by all other threads. Or, if a write buffer is synchronized,
+ it would change its 'pos_in_file' after waking the other threads,
+ possibly before they could copy its value.
+
+ However, the 'buffer' variable in the share is for a synchronized
+ write cache. It needs to know where to put the data. Otherwise it
+ would need access to the read cache of one of the threads that is
+ not yet removed from the share.
+
+ RETURN
+ void
+*/
+
+void init_io_cache_share(IO_CACHE *read_cache, IO_CACHE_SHARE *cshare,
+ IO_CACHE *write_cache, uint num_threads)
{
- DBUG_ASSERT(info->type == READ_CACHE);
- pthread_mutex_init(&s->mutex, MY_MUTEX_INIT_FAST);
- pthread_cond_init (&s->cond, 0);
- s->total=s->count=num_threads-1;
- s->active=0;
- info->share=s;
- info->read_function=_my_b_read_r;
- info->current_pos= info->current_end= 0;
+ DBUG_ENTER("init_io_cache_share");
+ DBUG_PRINT("io_cache_share", ("read_cache: 0x%lx share: 0x%lx "
+ "write_cache: 0x%lx threads: %u",
+ read_cache, cshare, write_cache, num_threads));
+
+ DBUG_ASSERT(num_threads > 1);
+ DBUG_ASSERT(read_cache->type == READ_CACHE);
+ DBUG_ASSERT(!write_cache || (write_cache->type == WRITE_CACHE));
+
+ pthread_mutex_init(&cshare->mutex, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&cshare->cond, 0);
+ pthread_cond_init(&cshare->cond_writer, 0);
+
+ cshare->running_threads= num_threads;
+ cshare->total_threads= num_threads;
+ cshare->error= 0; /* Initialize. */
+ cshare->buffer= read_cache->buffer;
+ cshare->read_end= NULL; /* See function comment of lock_io_cache(). */
+ cshare->pos_in_file= 0; /* See function comment of lock_io_cache(). */
+ cshare->source_cache= write_cache; /* Can be NULL. */
+
+ read_cache->share= cshare;
+ read_cache->read_function= _my_b_read_r;
+ read_cache->current_pos= NULL;
+ read_cache->current_end= NULL;
+
+ if (write_cache)
+ write_cache->share= cshare;
+
+ DBUG_VOID_RETURN;
}
+
/*
- Remove a thread from shared access to IO_CACHE
- Every thread should do that on exit for not
- to deadlock other threads
+ Remove a thread from shared access to IO_CACHE.
+
+ SYNOPSIS
+ remove_io_thread()
+ cache The IO_CACHE to be removed from the share.
+
+ NOTE
+
+ Every thread must do that on exit for not to deadlock other threads.
+
+ The last thread destroys the pthread resources.
+
+ A writer flushes its cache first.
+
+ RETURN
+ void
*/
-void remove_io_thread(IO_CACHE *info)
+
+void remove_io_thread(IO_CACHE *cache)
{
- IO_CACHE_SHARE *s=info->share;
+ IO_CACHE_SHARE *cshare= cache->share;
+ uint total;
+ DBUG_ENTER("remove_io_thread");
+
+ /* If the writer goes, it needs to flush the write cache. */
+ if (cache == cshare->source_cache)
+ flush_io_cache(cache);
- pthread_mutex_lock(&s->mutex);
- s->total--;
- if (! s->count--)
- pthread_cond_signal(&s->cond);
- pthread_mutex_unlock(&s->mutex);
+ pthread_mutex_lock(&cshare->mutex);
+ DBUG_PRINT("io_cache_share", ("%s: 0x%lx",
+ (cache == cshare->source_cache) ?
+ "writer" : "reader", cache));
+
+ /* Remove from share. */
+ total= --cshare->total_threads;
+ DBUG_PRINT("io_cache_share", ("remaining threads: %u", total));
+
+ /* Detach from share. */
+ cache->share= NULL;
+
+ /* If the writer goes, let the readers know. */
+ if (cache == cshare->source_cache)
+ {
+ DBUG_PRINT("io_cache_share", ("writer leaves"));
+ cshare->source_cache= NULL;
+ }
+
+ /* If all threads are waiting for me to join the lock, wake them. */
+ if (!--cshare->running_threads)
+ {
+ DBUG_PRINT("io_cache_share", ("the last running thread leaves, wake all"));
+ pthread_cond_signal(&cshare->cond_writer);
+ pthread_cond_broadcast(&cshare->cond);
+ }
+
+ pthread_mutex_unlock(&cshare->mutex);
+
+ if (!total)
+ {
+ DBUG_PRINT("io_cache_share", ("last thread removed, destroy share"));
+ pthread_cond_destroy (&cshare->cond_writer);
+ pthread_cond_destroy (&cshare->cond);
+ pthread_mutex_destroy(&cshare->mutex);
+ }
+
+ DBUG_VOID_RETURN;
}
-static int lock_io_cache(IO_CACHE *info, my_off_t pos)
-{
- int total;
- IO_CACHE_SHARE *s=info->share;
- pthread_mutex_lock(&s->mutex);
- if (!s->count)
+/*
+ Lock IO cache and wait for all other threads to join.
+
+ SYNOPSIS
+ lock_io_cache()
+ cache The cache of the thread entering the lock.
+ pos File position of the block to read.
+ Unused for the write thread.
+
+ DESCRIPTION
+
+ Wait for all threads to finish with the current buffer. We want
+ all threads to proceed in concert. The last thread to join
+ lock_io_cache() will read the block from file and all threads start
+ to use it. Then they will join again for reading the next block.
+
+ The waiting threads detect a fresh buffer by comparing
+ cshare->pos_in_file with the position they want to process next.
+ Since the first block may start at position 0, we take
+ cshare->read_end as an additional condition. This variable is
+ initialized to NULL and will be set after a block of data is written
+ to the buffer.
+
+ RETURN
+ 1 OK, lock in place, go ahead and read.
+ 0 OK, unlocked, another thread did the read.
+*/
+
+static int lock_io_cache(IO_CACHE *cache, my_off_t pos)
+{
+ IO_CACHE_SHARE *cshare= cache->share;
+ DBUG_ENTER("lock_io_cache");
+
+ /* Enter the lock. */
+ pthread_mutex_lock(&cshare->mutex);
+ cshare->running_threads--;
+ DBUG_PRINT("io_cache_share", ("%s: 0x%lx pos: %lu running: %u",
+ (cache == cshare->source_cache) ?
+ "writer" : "reader", cache, (ulong) pos,
+ cshare->running_threads));
+
+ if (cshare->source_cache)
{
- s->count=s->total;
- return 1;
+ /* A write cache is synchronized to the read caches. */
+
+ if (cache == cshare->source_cache)
+ {
+ /* The writer waits until all readers are here. */
+ while (cshare->running_threads)
+ {
+ DBUG_PRINT("io_cache_share", ("writer waits in lock"));
+ pthread_cond_wait(&cshare->cond_writer, &cshare->mutex);
+ }
+ DBUG_PRINT("io_cache_share", ("writer awoke, going to copy"));
+
+ /* Stay locked. Leave the lock later by unlock_io_cache(). */
+ DBUG_RETURN(1);
+ }
+
+ /* The last thread wakes the writer. */
+ if (!cshare->running_threads)
+ {
+ DBUG_PRINT("io_cache_share", ("waking writer"));
+ pthread_cond_signal(&cshare->cond_writer);
+ }
+
+ /*
+ Readers wait until the data is copied from the writer. Another
+ reason to stop waiting is the removal of the write thread. If this
+ happens, we leave the lock with old data in the buffer.
+ */
+ while ((!cshare->read_end || (cshare->pos_in_file < pos)) &&
+ cshare->source_cache)
+ {
+ DBUG_PRINT("io_cache_share", ("reader waits in lock"));
+ pthread_cond_wait(&cshare->cond, &cshare->mutex);
+ }
+
+ /*
+ If the writer was removed from the share while this thread was
+ asleep, we need to simulate an EOF condition. The writer cannot
+ reset the share variables as they might still be in use by readers
+ of the last block. When we awake here then because the last
+ joining thread signalled us. If the writer is not the last, it
+ will not signal. So it is safe to clear the buffer here.
+ */
+ if (!cshare->read_end || (cshare->pos_in_file < pos))
+ {
+ DBUG_PRINT("io_cache_share", ("reader found writer removed. EOF"));
+ cshare->read_end= cshare->buffer; /* Empty buffer. */
+ cshare->error= 0; /* EOF is not an error. */
+ }
}
+ else
+ {
+ /*
+ There are read caches only. The last thread arriving in
+ lock_io_cache() continues with a locked cache and reads the block.
+ */
+ if (!cshare->running_threads)
+ {
+ DBUG_PRINT("io_cache_share", ("last thread joined, going to read"));
+ /* Stay locked. Leave the lock later by unlock_io_cache(). */
+ DBUG_RETURN(1);
+ }
- total=s->total;
- s->count--;
- while (!s->active || s->active->pos_in_file < pos)
- pthread_cond_wait(&s->cond, &s->mutex);
+ /*
+ All other threads wait until the requested block is read by the
+ last thread arriving. Another reason to stop waiting is the
+ removal of a thread. If this leads to all threads being in the
+ lock, we have to continue also. The first of the awaken threads
+ will then do the read.
+ */
+ while ((!cshare->read_end || (cshare->pos_in_file < pos)) &&
+ cshare->running_threads)
+ {
+ DBUG_PRINT("io_cache_share", ("reader waits in lock"));
+ pthread_cond_wait(&cshare->cond, &cshare->mutex);
+ }
- if (s->total < total &&
- (!s->active || s->active->pos_in_file < pos))
- return 1;
+ /* If the block is not yet read, continue with a locked cache and read. */
+ if (!cshare->read_end || (cshare->pos_in_file < pos))
+ {
+ DBUG_PRINT("io_cache_share", ("reader awoke, going to read"));
+ /* Stay locked. Leave the lock later by unlock_io_cache(). */
+ DBUG_RETURN(1);
+ }
- pthread_mutex_unlock(&s->mutex);
- return 0;
+ /* Another thread did read the block already. */
+ }
+ DBUG_PRINT("io_cache_share", ("reader awoke, going to process %u bytes",
+ cshare->read_end ? (uint)
+ (cshare->read_end - cshare->buffer) : 0));
+
+ /*
+ Leave the lock. Do not call unlock_io_cache() later. The thread that
+ filled the buffer did this and marked all threads as running.
+ */
+ pthread_mutex_unlock(&cshare->mutex);
+ DBUG_RETURN(0);
}
-static void unlock_io_cache(IO_CACHE *info)
+
+/*
+ Unlock IO cache.
+
+ SYNOPSIS
+ unlock_io_cache()
+ cache The cache of the thread leaving the lock.
+
+ NOTE
+ This is called by the thread that filled the buffer. It marks all
+ threads as running and awakes them. This must not be done by any
+ other thread.
+
+ Do not signal cond_writer. Either there is no writer or the writer
+ is the only one who can call this function.
+
+ The reason for resetting running_threads to total_threads before
+ waking all other threads is that it could be possible that this
+ thread is so fast with processing the buffer that it enters the lock
+ before even one other thread has left it. If every awoken thread
+ would increase running_threads by one, this thread could think that
+ he is again the last to join and would not wait for the other
+ threads to process the data.
+
+ RETURN
+ void
+*/
+
+static void unlock_io_cache(IO_CACHE *cache)
{
- pthread_cond_broadcast(&info->share->cond);
- pthread_mutex_unlock(&info->share->mutex);
+ IO_CACHE_SHARE *cshare= cache->share;
+ DBUG_ENTER("unlock_io_cache");
+ DBUG_PRINT("io_cache_share", ("%s: 0x%lx pos: %lu running: %u",
+ (cache == cshare->source_cache) ?
+ "writer" : "reader",
+ cache, (ulong) cshare->pos_in_file,
+ cshare->total_threads));
+
+ cshare->running_threads= cshare->total_threads;
+ pthread_cond_broadcast(&cshare->cond);
+ pthread_mutex_unlock(&cshare->mutex);
+ DBUG_VOID_RETURN;
}
@@ -567,7 +867,7 @@ static void unlock_io_cache(IO_CACHE *info)
SYNOPSIS
_my_b_read_r()
- info IO_CACHE pointer
+ cache IO_CACHE pointer
Buffer Buffer to retrieve count bytes from file
Count Number of bytes to read into Buffer
@@ -579,7 +879,7 @@ static void unlock_io_cache(IO_CACHE *info)
It works as follows: when a thread tries to read from a file (that
is, after using all the data from the (shared) buffer), it just
- hangs on lock_io_cache(), wating for other threads. When the very
+ hangs on lock_io_cache(), waiting for other threads. When the very
last thread attempts a read, lock_io_cache() returns 1, the thread
does actual IO and unlock_io_cache(), which signals all the waiting
threads that data is in the buffer.
@@ -599,16 +899,17 @@ static void unlock_io_cache(IO_CACHE *info)
1 Error: can't read requested characters
*/
-int _my_b_read_r(register IO_CACHE *info, byte *Buffer, uint Count)
+int _my_b_read_r(register IO_CACHE *cache, byte *Buffer, uint Count)
{
my_off_t pos_in_file;
uint length, diff_length, left_length;
+ IO_CACHE_SHARE *cshare= cache->share;
DBUG_ENTER("_my_b_read_r");
- if ((left_length= (uint) (info->read_end - info->read_pos)))
+ if ((left_length= (uint) (cache->read_end - cache->read_pos)))
{
DBUG_ASSERT(Count >= left_length); /* User is not using my_b_read() */
- memcpy(Buffer, info->read_pos, (size_t) (left_length));
+ memcpy(Buffer, cache->read_pos, (size_t) (left_length));
Buffer+= left_length;
Count-= left_length;
}
@@ -616,55 +917,133 @@ int _my_b_read_r(register IO_CACHE *info, byte *Buffer, uint Count)
{
int cnt, len;
- pos_in_file= info->pos_in_file + (info->read_end - info->buffer);
+ pos_in_file= cache->pos_in_file + (cache->read_end - cache->buffer);
diff_length= (uint) (pos_in_file & (IO_SIZE-1));
length=IO_ROUND_UP(Count+diff_length)-diff_length;
- length=(length <= info->read_length) ?
- length + IO_ROUND_DN(info->read_length - length) :
- length - IO_ROUND_UP(length - info->read_length) ;
- if (info->type != READ_FIFO &&
- (length > (info->end_of_file - pos_in_file)))
- length= (uint) (info->end_of_file - pos_in_file);
+ length= ((length <= cache->read_length) ?
+ length + IO_ROUND_DN(cache->read_length - length) :
+ length - IO_ROUND_UP(length - cache->read_length));
+ if (cache->type != READ_FIFO &&
+ (length > (cache->end_of_file - pos_in_file)))
+ length= (uint) (cache->end_of_file - pos_in_file);
if (length == 0)
{
- info->error= (int) left_length;
+ cache->error= (int) left_length;
DBUG_RETURN(1);
}
- if (lock_io_cache(info, pos_in_file))
+ if (lock_io_cache(cache, pos_in_file))
{
- info->share->active=info;
- if (info->seek_not_done) /* File touched, do seek */
- VOID(my_seek(info->file,pos_in_file,MY_SEEK_SET,MYF(0)));
- len=(int)my_read(info->file,info->buffer, length, info->myflags);
- info->read_end=info->buffer + (len == -1 ? 0 : len);
- info->error=(len == (int)length ? 0 : len);
- info->pos_in_file=pos_in_file;
- unlock_io_cache(info);
+ /* With a synchronized write/read cache we won't come here... */
+ DBUG_ASSERT(!cshare->source_cache);
+ /*
+ ... unless the writer has gone before this thread entered the
+ lock. Simulate EOF in this case. It can be distinguished by
+ cache->file.
+ */
+ if (cache->file < 0)
+ len= 0;
+ else
+ {
+ if (cache->seek_not_done) /* File touched, do seek */
+ VOID(my_seek(cache->file, pos_in_file, MY_SEEK_SET, MYF(0)));
+ len= (int) my_read(cache->file, cache->buffer, length, cache->myflags);
+ }
+ DBUG_PRINT("io_cache_share", ("read %d bytes", len));
+
+ cache->read_end= cache->buffer + (len == -1 ? 0 : len);
+ cache->error= (len == (int)length ? 0 : len);
+ cache->pos_in_file= pos_in_file;
+
+ /* Copy important values to the share. */
+ cshare->error= cache->error;
+ cshare->read_end= cache->read_end;
+ cshare->pos_in_file= pos_in_file;
+
+ /* Mark all threads as running and wake them. */
+ unlock_io_cache(cache);
}
else
{
- info->error= info->share->active->error;
- info->read_end= info->share->active->read_end;
- info->pos_in_file= info->share->active->pos_in_file;
- len= (info->error == -1 ? -1 : info->read_end-info->buffer);
+ /*
+ With a synchronized write/read cache readers always come here.
+ Copy important values from the share.
+ */
+ cache->error= cshare->error;
+ cache->read_end= cshare->read_end;
+ cache->pos_in_file= cshare->pos_in_file;
+
+ len= ((cache->error == -1) ? -1 : cache->read_end - cache->buffer);
}
- info->read_pos=info->buffer;
- info->seek_not_done=0;
+ cache->read_pos= cache->buffer;
+ cache->seek_not_done= 0;
if (len <= 0)
{
- info->error= (int) left_length;
+ DBUG_PRINT("io_cache_share", ("reader error. len %d left %u",
+ len, left_length));
+ cache->error= (int) left_length;
DBUG_RETURN(1);
}
cnt= ((uint) len > Count) ? (int) Count : len;
- memcpy(Buffer, info->read_pos, (size_t) cnt);
+ memcpy(Buffer, cache->read_pos, (size_t) cnt);
Count -= cnt;
Buffer+= cnt;
left_length+= cnt;
- info->read_pos+= cnt;
+ cache->read_pos+= cnt;
}
DBUG_RETURN(0);
}
-#endif
+
+
+/*
+ Copy data from write cache to read cache.
+
+ SYNOPSIS
+ copy_to_read_buffer()
+ write_cache The write cache.
+ write_buffer The source of data, mostly the cache buffer.
+ write_length The number of bytes to copy.
+
+ NOTE
+ The write thread will wait for all read threads to join the cache
+ lock. Then it copies the data over and wakes the read threads.
+
+ RETURN
+ void
+*/
+
+static void copy_to_read_buffer(IO_CACHE *write_cache,
+ const byte *write_buffer, uint write_length)
+{
+ IO_CACHE_SHARE *cshare= write_cache->share;
+
+ DBUG_ASSERT(cshare->source_cache == write_cache);
+ /*
+ write_length is usually less or equal to buffer_length.
+ It can be bigger if _my_b_write() is called with a big length.
+ */
+ while (write_length)
+ {
+ uint copy_length= min(write_length, write_cache->buffer_length);
+ int __attribute__((unused)) rc;
+
+ rc= lock_io_cache(write_cache, write_cache->pos_in_file);
+ /* The writing thread does always have the lock when it awakes. */
+ DBUG_ASSERT(rc);
+
+ memcpy(cshare->buffer, write_buffer, copy_length);
+
+ cshare->error= 0;
+ cshare->read_end= cshare->buffer + copy_length;
+ cshare->pos_in_file= write_cache->pos_in_file;
+
+ /* Mark all threads as running and wake them. */
+ unlock_io_cache(write_cache);
+
+ write_buffer+= copy_length;
+ write_length-= copy_length;
+ }
+}
+#endif /*THREAD*/
/*
@@ -1016,6 +1395,7 @@ int _my_b_write(register IO_CACHE *info, const byte *Buffer, uint Count)
Buffer+=rest_length;
Count-=rest_length;
info->write_pos+=rest_length;
+
if (flush_io_cache(info))
return 1;
if (Count >= IO_SIZE)
@@ -1028,6 +1408,23 @@ int _my_b_write(register IO_CACHE *info, const byte *Buffer, uint Count)
}
if (my_write(info->file,Buffer,(uint) length,info->myflags | MY_NABP))
return info->error= -1;
+
+#ifdef THREAD
+ /*
+ In case of a shared I/O cache with a writer we normally do direct
+ write cache to read cache copy. Simulate this here by direct
+ caller buffer to read cache copy. Do it after the write so that
+ the cache readers actions on the flushed part can go in parallel
+ with the write of the extra stuff. copy_to_read_buffer()
+ synchronizes writer and readers so that after this call the
+ readers can act on the extra stuff while the writer can go ahead
+ and prepare the next output. copy_to_read_buffer() relies on
+ info->pos_in_file.
+ */
+ if (info->share)
+ copy_to_read_buffer(info, Buffer, length);
+#endif
+
Count-=length;
Buffer+=length;
info->pos_in_file+=length;
@@ -1048,6 +1445,14 @@ int my_b_append(register IO_CACHE *info, const byte *Buffer, uint Count)
{
uint rest_length,length;
+#ifdef THREAD
+ /*
+ Assert that we cannot come here with a shared cache. If we do one
+ day, we might need to add a call to copy_to_read_buffer().
+ */
+ DBUG_ASSERT(!info->share);
+#endif
+
lock_append_buffer(info);
rest_length=(uint) (info->write_end - info->write_pos);
if (Count <= rest_length)
@@ -1108,6 +1513,14 @@ int my_block_write(register IO_CACHE *info, const byte *Buffer, uint Count,
uint length;
int error=0;
+#ifdef THREAD
+ /*
+ Assert that we cannot come here with a shared cache. If we do one
+ day, we might need to add a call to copy_to_read_buffer().
+ */
+ DBUG_ASSERT(!info->share);
+#endif
+
if (pos < info->pos_in_file)
{
/* Of no overlap, write everything without buffering */
@@ -1184,6 +1597,17 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock)
if ((length=(uint) (info->write_pos - info->write_buffer)))
{
+#ifdef THREAD
+ /*
+ In case of a shared I/O cache with a writer we do direct write
+ cache to read cache copy. Do it before the write here so that
+ the readers can work in parallel with the write.
+ copy_to_read_buffer() relies on info->pos_in_file.
+ */
+ if (info->share)
+ copy_to_read_buffer(info, info->write_buffer, length);
+#endif
+
pos_in_file=info->pos_in_file;
/*
If we have append cache, we always open the file with
@@ -1262,16 +1686,10 @@ int end_io_cache(IO_CACHE *info)
#ifdef THREAD
/*
- if IO_CACHE is shared between several threads, only one
- thread needs to call end_io_cache() - just as init_io_cache()
- should be called only once and then memcopy'ed
+ Every thread must call remove_io_thread(). The last one destroys
+ the share elements.
*/
- if (info->share)
- {
- pthread_cond_destroy (&info->share->cond);
- pthread_mutex_destroy(&info->share->mutex);
- info->share=0;
- }
+ DBUG_ASSERT(!info->share || !info->share->total_threads);
#endif
if ((pre_close=info->pre_close))