summaryrefslogtreecommitdiff
path: root/myisam
diff options
context:
space:
mode:
Diffstat (limited to 'myisam')
-rw-r--r--myisam/Makefile.am3
-rw-r--r--myisam/ft_boolean_search.c92
-rw-r--r--myisam/ft_parser.c39
-rw-r--r--myisam/ft_static.c36
-rw-r--r--myisam/ft_stopwords.c2
-rw-r--r--myisam/ft_test1.c85
-rw-r--r--myisam/ft_update.c35
-rw-r--r--myisam/ftdefs.h3
-rw-r--r--myisam/mi_check.c38
-rw-r--r--myisam/mi_checksum.c8
-rw-r--r--myisam/mi_create.c101
-rw-r--r--myisam/mi_dbug.c16
-rw-r--r--myisam/mi_delete.c49
-rw-r--r--myisam/mi_dynrec.c87
-rw-r--r--myisam/mi_extra.c11
-rw-r--r--myisam/mi_info.c33
-rw-r--r--myisam/mi_key.c81
-rw-r--r--myisam/mi_keycache.c1
-rw-r--r--myisam/mi_locking.c51
-rw-r--r--myisam/mi_open.c68
-rw-r--r--myisam/mi_packrec.c53
-rw-r--r--myisam/mi_page.c2
-rw-r--r--myisam/mi_preload.c2
-rw-r--r--myisam/mi_range.c19
-rw-r--r--myisam/mi_rkey.c1
-rw-r--r--myisam/mi_rsame.c2
-rw-r--r--myisam/mi_rsamepos.c2
-rw-r--r--myisam/mi_search.c35
-rw-r--r--myisam/mi_static.c7
-rw-r--r--myisam/mi_statrec.c3
-rw-r--r--myisam/mi_test1.c80
-rw-r--r--myisam/mi_test3.c1
-rw-r--r--myisam/mi_test_all.res73
-rw-r--r--myisam/mi_unique.c95
-rw-r--r--myisam/mi_update.c11
-rw-r--r--myisam/mi_write.c32
-rw-r--r--myisam/myisamchk.c27
-rw-r--r--myisam/myisamdef.h12
-rw-r--r--myisam/myisamlog.c2
-rw-r--r--myisam/myisampack.c1310
-rw-r--r--myisam/rt_split.c19
-rw-r--r--myisam/sort.c5
42 files changed, 2052 insertions, 580 deletions
diff --git a/myisam/Makefile.am b/myisam/Makefile.am
index 6f304c8143d..e4327070997 100644
--- a/myisam/Makefile.am
+++ b/myisam/Makefile.am
@@ -17,8 +17,7 @@
EXTRA_DIST = mi_test_all.sh mi_test_all.res
pkgdata_DATA = mi_test_all mi_test_all.res
-INCLUDES = @MT_INCLUDES@ \
- -I$(top_builddir)/include -I$(top_srcdir)/include
+INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include
LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \
$(top_builddir)/mysys/libmysys.a \
$(top_builddir)/dbug/libdbug.a \
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c
index f1ff8f6d886..c18984c7584 100644
--- a/myisam/ft_boolean_search.c
+++ b/myisam/ft_boolean_search.c
@@ -68,7 +68,7 @@ struct st_ftb_expr
my_off_t docid[2];
float weight;
float cur_weight;
- byte *quot, *qend;
+ LIST *phrase; /* phrase words */
uint yesses; /* number of "yes" words matched */
uint nos; /* number of "no" words matched */
uint ythresh; /* number of "yes" words in expr */
@@ -132,20 +132,22 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
}
static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
- FTB_EXPR *up, uint depth)
+ FTB_EXPR *up, uint depth, byte *up_quot)
{
byte res;
FTB_PARAM param;
FT_WORD w;
FTB_WORD *ftbw;
FTB_EXPR *ftbe;
+ FT_WORD *phrase_word;
+ LIST *phrase_list;
uint extra=HA_FT_WLEN+ftb->info->s->rec_reflength; /* just a shortcut */
if (ftb->state != UNINITIALIZED)
return;
param.prev=' ';
- param.quot=up->quot;
+ param.quot= up_quot;
while ((res=ft_get_word(ftb->charset,start,end,&w,&param)))
{
int r=param.plusminus;
@@ -172,6 +174,14 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
if (param.yesno > 0) up->ythresh++;
queue_insert(& ftb->queue, (byte *)ftbw);
ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC);
+ case 4: /* not indexed word (stopword or too short/long) */
+ if (! up_quot) break;
+ phrase_word= (FT_WORD *)alloc_root(&ftb->mem_root, sizeof(FT_WORD));
+ phrase_list= (LIST *)alloc_root(&ftb->mem_root, sizeof(LIST));
+ phrase_word->pos= w.pos;
+ phrase_word->len= w.len;
+ phrase_list->data= (void *)phrase_word;
+ up->phrase= list_add(up->phrase, phrase_list);
break;
case 2: /* left bracket */
ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR));
@@ -182,13 +192,14 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
ftbe->up=up;
ftbe->ythresh=ftbe->yweaks=0;
ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
- if ((ftbe->quot=param.quot)) ftb->with_scan|=2;
+ ftbe->phrase= NULL;
+ if (param.quot) ftb->with_scan|=2;
if (param.yesno > 0) up->ythresh++;
- _ftb_parse_query(ftb, start, end, ftbe, depth+1);
+ _ftb_parse_query(ftb, start, end, ftbe, depth+1, param.quot);
param.quot=0;
break;
case 3: /* right bracket */
- if (up->quot) up->qend=param.quot;
+ if (up_quot) up->phrase= list_reverse(up->phrase);
return;
}
}
@@ -212,6 +223,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
byte *lastkey_buf=ftbw->word+ftbw->off;
LINT_INIT(off);
+ LINT_INIT(off);
if (ftbw->flags & FTB_FLAG_TRUNC)
lastkey_buf+=ftbw->len;
@@ -410,12 +422,12 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
ftbe->weight=1;
ftbe->flags=FTB_FLAG_YES;
ftbe->nos=1;
- ftbe->quot=0;
ftbe->up=0;
ftbe->ythresh=ftbe->yweaks=0;
ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
+ ftbe->phrase= NULL;
ftb->root=ftbe;
- _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0);
+ _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0, NULL);
ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
sizeof(FTB_WORD *)*ftb->queue.elements);
memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
@@ -431,29 +443,45 @@ err:
}
-/* returns 1 if str0 ~= /\bstr1\b/ */
-static int _ftb_strstr(const byte *s0, const byte *e0,
- const byte *s1, const byte *e1,
- CHARSET_INFO *cs)
+/*
+ Checks if given buffer matches phrase list.
+
+ SYNOPSIS
+ _ftb_check_phrase()
+ s0 start of buffer
+ e0 end of buffer
+ phrase broken into list phrase
+ cs charset info
+
+ RETURN VALUE
+ 1 is returned if phrase found, 0 else.
+*/
+
+static int _ftb_check_phrase(const byte *s0, const byte *e0,
+ LIST *phrase, CHARSET_INFO *cs)
{
- const byte *p0= s0;
- my_bool s_after= true_word_char(cs, s1[0]);
- my_bool e_before= true_word_char(cs, e1[-1]);
- uint p0_len;
- my_match_t m[2];
+ FT_WORD h_word;
+ const byte *h_start= s0;
+ DBUG_ENTER("_ftb_strstr");
+ DBUG_ASSERT(phrase);
- while (p0 < e0)
+ while (ft_simple_get_word(cs, (byte **)&h_start, e0, &h_word, FALSE))
{
- if (cs->coll->instr(cs, p0, e0 - p0, s1, e1 - s1, m, 2) != 2)
- return(0);
- if ((!s_after || p0 + m[1].beg == s0 || !true_word_char(cs, p0[m[1].beg-1])) &&
- (!e_before || p0 + m[1].end == e0 || !true_word_char(cs, p0[m[1].end])))
- return(1);
- p0+= m[1].beg;
- p0+= (p0_len= my_mbcharlen(cs, *(uchar *)p0)) ? p0_len : 1;
+ FT_WORD *n_word;
+ LIST *phrase_element= phrase;
+ const byte *h_start1= h_start;
+ for (;;)
+ {
+ n_word= (FT_WORD *)phrase_element->data;
+ if (my_strnncoll(cs, h_word.pos, h_word.len, n_word->pos, n_word->len))
+ break;
+ if (! (phrase_element= phrase_element->next))
+ DBUG_RETURN(1);
+ if (! ft_simple_get_word(cs, (byte **)&h_start1, e0, &h_word, FALSE))
+ DBUG_RETURN(0);
+ }
}
-
- return(0);
+ DBUG_RETURN(0);
}
@@ -484,7 +512,7 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
{
yn=ftbe->flags;
weight=ftbe->cur_weight*ftbe->weight;
- if (mode && ftbe->quot)
+ if (mode && ftbe->phrase)
{
int not_found=1;
@@ -493,8 +521,8 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
{
if (!ftsi.pos)
continue;
- not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len,
- ftbe->quot, ftbe->qend, ftb->charset);
+ not_found = ! _ftb_check_phrase(ftsi.pos, ftsi.pos+ftsi.len,
+ ftbe->phrase, ftb->charset);
}
if (not_found) break;
} /* ftbe->quot */
@@ -642,8 +670,8 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
continue;
end=ftsi.pos+ftsi.len;
- while (ft_simple_get_word(ftb->charset,
- (byte **) &ftsi.pos, (byte *) end, &word))
+ while (ft_simple_get_word(ftb->charset, (byte **) &ftsi.pos,
+ (byte *) end, &word, TRUE))
{
int a, b, c;
for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2)
diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c
index 0b1e68b0d70..2fad2363ae2 100644
--- a/myisam/ft_parser.c
+++ b/myisam/ft_parser.c
@@ -93,12 +93,14 @@ my_bool ft_boolean_check_syntax_string(const byte *str)
return 0;
}
-/* returns:
- * 0 - eof
- * 1 - word found
- * 2 - left bracket
- * 3 - right bracket
- */
+/*
+ RETURN VALUE
+ 0 - eof
+ 1 - word found
+ 2 - left bracket
+ 3 - right bracket
+ 4 - stopword found
+*/
byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
FT_WORD *word, FTB_PARAM *param)
{
@@ -161,6 +163,11 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
*start=doc;
return 1;
}
+ else if (length) /* make sure length > 0 (if start contains spaces only) */
+ {
+ *start= doc;
+ return 4;
+ }
}
if (param->quot)
{
@@ -170,18 +177,19 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
return 0;
}
-byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end,
- FT_WORD *word)
+byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
+ FT_WORD *word, my_bool skip_stopwords)
{
byte *doc= *start;
uint mwc, length, mbl;
DBUG_ENTER("ft_simple_get_word");
- while (doc<end)
+ do
{
- for (;doc<end;doc++)
+ for (;; doc++)
{
- if (true_word_char(cs,*doc)) break;
+ if (doc >= end) DBUG_RETURN(0);
+ if (true_word_char(cs, *doc)) break;
}
mwc= length= 0;
@@ -193,13 +201,14 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end,
word->len= (uint)(doc-word->pos) - mwc;
- if (length >= ft_min_word_len && length < ft_max_word_len &&
- !is_stopword(word->pos, word->len))
+ if (skip_stopwords == FALSE ||
+ (length >= ft_min_word_len && length < ft_max_word_len &&
+ !is_stopword(word->pos, word->len)))
{
*start= doc;
DBUG_RETURN(1);
}
- }
+ } while (doc < end);
DBUG_RETURN(0);
}
@@ -217,7 +226,7 @@ int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc)
FT_WORD w;
DBUG_ENTER("ft_parse");
- while (ft_simple_get_word(wtree->custom_arg, &doc,end,&w))
+ while (ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE))
{
if (with_alloc)
{
diff --git a/myisam/ft_static.c b/myisam/ft_static.c
index 994a94d0c49..e221950f445 100644
--- a/myisam/ft_static.c
+++ b/myisam/ft_static.c
@@ -25,23 +25,25 @@ char ft_boolean_syntax[]="+ -><()~*:\"\"&|";
const HA_KEYSEG ft_keysegs[FT_SEGS]={
{
- HA_KEYTYPE_VARTEXT, /* type */
- 63, /* language (will be overwritten) */
- 0, 0, 0, /* null_bit, bit_start, bit_end */
- HA_VAR_LENGTH | HA_PACK_KEY, /* flag */
- HA_FT_MAXBYTELEN, /* length */
- HA_FT_WLEN, /* start */
- 0, /* null_pos */
- NULL /* charset */
- },
- {
-/*
- Note, this (and the last HA_KEYTYPE_END) segment should NOT
- be packed in any way, otherwise w_search() won't be able to
- update key entry 'in vivo'
-*/
- HA_FT_WTYPE, 63, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 0, 0, NULL
- }
+ 0, /* charset */
+ HA_FT_WLEN, /* start */
+ 0, /* null_pos */
+ 0, /* Bit pos */
+ HA_VAR_LENGTH_PART | HA_PACK_KEY, /* flag */
+ HA_FT_MAXBYTELEN, /* length */
+ HA_KEYTYPE_VARTEXT2, /* type */
+ 63, /* language (will be overwritten) */
+ 0, /* null_bit */
+ 2, 0, 0 /* bit_start, bit_end, bit_length */
+},
+{
+ /*
+ Note, this (and the last HA_KEYTYPE_END) segment should NOT
+ be packed in any way, otherwise w_search() won't be able to
+ update key entry 'in vivo'
+ */
+ 0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, HA_FT_WTYPE, 63, 0, 0, 0, 0
+}
};
const struct _ft_vft _ft_vft_nlq = {
diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c
index a4bce6ad4e8..ab51afb0e82 100644
--- a/myisam/ft_stopwords.c
+++ b/myisam/ft_stopwords.c
@@ -81,7 +81,7 @@ int ft_init_stopwords()
goto err0;
len=my_read(fd, buffer, len, MYF(MY_WME));
end=start+len;
- while (ft_simple_get_word(default_charset_info, &start, end, &w))
+ while (ft_simple_get_word(default_charset_info, &start, end, &w, TRUE))
{
if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0))))
goto err1;
diff --git a/myisam/ft_test1.c b/myisam/ft_test1.c
index f4884f8ca39..14be9aa1e8c 100644
--- a/myisam/ft_test1.c
+++ b/myisam/ft_test1.c
@@ -79,24 +79,24 @@ static int run_test(const char *filename)
recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + mi_portable_sizeof_char_ptr :
extra_length);
if (extra_field == FIELD_VARCHAR)
- recinfo[0].length+=2;
+ recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length);
recinfo[1].type=key_field;
recinfo[1].length= (key_field == FIELD_BLOB ? 4+mi_portable_sizeof_char_ptr :
key_length);
if (key_field == FIELD_VARCHAR)
- recinfo[1].length+=2;
+ recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);
/* Define a key over the first column */
keyinfo[0].seg=keyseg;
keyinfo[0].keysegs=1;
keyinfo[0].seg[0].type= key_type;
- keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB)?HA_BLOB_PART:
- (key_field == FIELD_VARCHAR)?HA_VAR_LENGTH:0;
+ keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB) ? HA_BLOB_PART:
+ (key_field == FIELD_VARCHAR) ? HA_VAR_LENGTH_PART:0;
keyinfo[0].seg[0].start=recinfo[0].length;
keyinfo[0].seg[0].length=key_length;
keyinfo[0].seg[0].null_bit= 0;
keyinfo[0].seg[0].null_pos=0;
- keyinfo[0].seg[0].language=MY_CHARSET_CURRENT;
+ keyinfo[0].seg[0].language= default_charset_info->number;
keyinfo[0].flag = (no_fulltext?HA_PACK_KEY:HA_FULLTEXT);
if (!silent)
@@ -155,33 +155,42 @@ static int run_test(const char *filename)
if (!silent)
printf("- Reading rows with key\n");
for (i=0 ; i < NQUERIES ; i++)
- { FT_DOCLIST *result;
+ {
+ FT_DOCLIST *result;
result=ft_nlq_init_search(file,0,(char*) query[i],strlen(query[i]),1);
- if(!result) {
+ if(!result)
+ {
printf("Query %d: `%s' failed with errno %3d\n",i,query[i],my_errno);
continue;
}
printf("Query %d: `%s'. Found: %d. Top five documents:\n",
- i,query[i],result->ndocs);
- for(j=0;j<5;j++) { double w; int err;
- err=ft_nlq_read_next(result, read_record);
- if(err==HA_ERR_END_OF_FILE) {
- printf("No more matches!\n");
- break;
- } else if (err) {
- printf("ft_read_next %d failed with errno %3d\n",j,my_errno);
- break;
- }
- w=ft_nlq_get_relevance(result);
- if(key_field == FIELD_VARCHAR) {
- uint l;
- char *p;
- p=recinfo[0].length+read_record;
- l=uint2korr(p);
- printf("%10.7f: %.*s\n",w,(int) l,p+2);
- } else
- printf("%10.7f: %.*s\n",w,recinfo[1].length,
- recinfo[0].length+read_record);
+ i,query[i],result->ndocs);
+ for (j=0;j<5;j++)
+ {
+ double w; int err;
+ err= ft_nlq_read_next(result, read_record);
+ if (err==HA_ERR_END_OF_FILE)
+ {
+ printf("No more matches!\n");
+ break;
+ }
+ else if (err)
+ {
+ printf("ft_read_next %d failed with errno %3d\n",j,my_errno);
+ break;
+ }
+ w=ft_nlq_get_relevance(result);
+ if (key_field == FIELD_VARCHAR)
+ {
+ uint l;
+ char *p;
+ p=recinfo[0].length+read_record;
+ l=uint2korr(p);
+ printf("%10.7f: %.*s\n",w,(int) l,p+2);
+ }
+ else
+ printf("%10.7f: %.*s\n",w,recinfo[1].length,
+ recinfo[0].length+read_record);
}
ft_nlq_close_search(result);
}
@@ -215,9 +224,14 @@ void create_record(char *pos, int n)
else if (recinfo[0].type == FIELD_VARCHAR)
{
uint tmp;
- strnmov(pos+2,data[n].f0,keyinfo[0].seg[0].length);
- tmp=strlen(pos+2);
- int2store(pos,tmp);
+ /* -1 is here because pack_length is stored in seg->length */
+ uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1);
+ strnmov(pos+pack_length,data[n].f0,keyinfo[0].seg[0].length);
+ tmp=strlen(pos+pack_length);
+ if (pack_length == 1)
+ *pos= (char) tmp;
+ else
+ int2store(pos,tmp);
pos+=recinfo[0].length;
}
else
@@ -239,9 +253,14 @@ void create_record(char *pos, int n)
else if (recinfo[1].type == FIELD_VARCHAR)
{
uint tmp;
- strnmov(pos+2,data[n].f2,keyinfo[0].seg[0].length);
- tmp=strlen(pos+2);
- int2store(pos,tmp);
+ /* -1 is here because pack_length is stored in seg->length */
+ uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1);
+ strnmov(pos+pack_length,data[n].f2,keyinfo[0].seg[0].length);
+ tmp=strlen(pos+1);
+ if (pack_length == 1)
+ *pos= (char) tmp;
+ else
+ int2store(pos,tmp);
pos+=recinfo[1].length;
}
else
diff --git a/myisam/ft_update.c b/myisam/ft_update.c
index beccc062270..b8cd925bf4f 100644
--- a/myisam/ft_update.c
+++ b/myisam/ft_update.c
@@ -58,29 +58,27 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
DBUG_ENTER("_mi_ft_segiterator");
if (!ftsi->num)
- {
DBUG_RETURN(0);
- }
- else
- ftsi->num--;
+
+ ftsi->num--;
if (!ftsi->seg)
- {
DBUG_RETURN(1);
- }
- else
- ftsi->seg--;
+
+ ftsi->seg--;
if (ftsi->seg->null_bit &&
(ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit))
{
- ftsi->pos=0;
- DBUG_RETURN(1);
+ ftsi->pos=0;
+ DBUG_RETURN(1);
}
ftsi->pos= ftsi->rec+ftsi->seg->start;
- if (ftsi->seg->flag & HA_VAR_LENGTH)
+ if (ftsi->seg->flag & HA_VAR_LENGTH_PART)
{
- ftsi->len=uint2korr(ftsi->pos);
- ftsi->pos+=2; /* Skip VARCHAR length */
+ uint pack_length= (ftsi->seg->bit_start);
+ ftsi->len= (pack_length == 1 ? (uint) *(uchar*) ftsi->pos :
+ uint2korr(ftsi->pos));
+ ftsi->pos+= pack_length; /* Skip VARCHAR length */
DBUG_RETURN(1);
}
if (ftsi->seg->flag & HA_BLOB_PART)
@@ -296,9 +294,11 @@ uint _ft_make_key(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wptr,
DBUG_RETURN(_mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos));
}
+
/*
convert key value to ft2
*/
+
uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key)
{
my_off_t root;
@@ -316,9 +316,12 @@ uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key)
get_key_full_length_rdonly(key_length, key);
while (_mi_ck_delete(info, keynr, key, key_length) == 0)
- /* nothing to do here.
- _mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys
- */;
+ {
+ /*
+ nothing to do here.
+ _mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys
+ */
+ }
/* creating pageful of keys */
mi_putint(info->buff,length+2,0);
diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h
index ddb9fbfead2..91c679a1e58 100644
--- a/myisam/ftdefs.h
+++ b/myisam/ftdefs.h
@@ -112,7 +112,8 @@ int is_stopword(char *word, uint len);
uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t);
byte ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *, FTB_PARAM *);
-byte ft_simple_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *);
+byte ft_simple_get_word(CHARSET_INFO *, byte **, const byte *,
+ FT_WORD *, my_bool);
typedef struct _st_ft_seg_iterator {
uint num, len;
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index 0c85b5234a1..1f453278eea 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -282,7 +282,8 @@ int chk_size(MI_CHECK *param, register MI_INFO *info)
size=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0));
if ((skr=(my_off_t) info->state->key_file_length) != size)
{
- if (skr > size)
+ /* Don't give error if file generated by myisampack */
+ if (skr > size && mi_is_any_key_active(info->s->state.key_map))
{
error=1;
mi_check_print_error(param,
@@ -379,7 +380,7 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
{
param->key_crc[key]=0;
- if (!(((ulonglong) 1 << key) & share->state.key_map))
+ if (! mi_is_key_active(share->state.key_map, key))
{
/* Remember old statistics for key */
memcpy((char*) rec_per_key_part,
@@ -507,7 +508,7 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
(int) ((my_off_t2double(key_totlength) -
my_off_t2double(all_keydata))*100.0/
my_off_t2double(key_totlength)));
- else if (all_totaldata != 0L && share->state.key_map)
+ else if (all_totaldata != 0L && mi_is_any_key_active(share->state.key_map))
puts("");
}
if (param->key_file_blocks != info->state->key_file_length &&
@@ -1035,7 +1036,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
for (key=0,keyinfo= info->s->keyinfo; key < info->s->base.keys;
key++,keyinfo++)
{
- if ((((ulonglong) 1 << key) & info->s->state.key_map))
+ if (mi_is_key_active(info->s->state.key_map, key))
{
if(!(keyinfo->flag & HA_FULLTEXT))
{
@@ -1303,8 +1304,8 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info,
*/
if (param->testflag & T_CREATE_MISSING_KEYS)
- share->state.key_map= ((((ulonglong) 1L << share->base.keys)-1) &
- param->keys_in_use);
+ mi_copy_keys_active(share->state.key_map, share->base.keys,
+ param->keys_in_use);
info->state->key_file_length=share->base.keystart;
@@ -1466,7 +1467,7 @@ static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff,
key=info->lastkey+info->s->base.max_key_length;
for (i=0 ; i < info->s->base.keys ; i++)
{
- if (((ulonglong) 1 << i) & info->s->state.key_map)
+ if (mi_is_key_active(info->s->state.key_map, i))
{
if (info->s->keyinfo[i].flag & HA_FULLTEXT )
{
@@ -1497,7 +1498,7 @@ static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff,
info->errkey=(int) i; /* This key was found */
while ( i-- > 0 )
{
- if (((ulonglong) 1 << i) & info->s->state.key_map)
+ if (mi_is_key_active(info->s->state.key_map, i))
{
if (info->s->keyinfo[i].flag & HA_FULLTEXT)
{
@@ -1534,7 +1535,7 @@ int movepoint(register MI_INFO *info, byte *record, my_off_t oldpos,
key=info->lastkey+info->s->base.max_key_length;
for (i=0 ; i < info->s->base.keys; i++)
{
- if (i != prot_key && (((ulonglong) 1 << i) & info->s->state.key_map))
+ if (i != prot_key && mi_is_key_active(info->s->state.key_map, i))
{
key_length=_mi_make_key(info,i,key,record,oldpos);
if (info->s->keyinfo[i].flag & HA_NOSAME)
@@ -1633,7 +1634,7 @@ int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name)
for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
key++,keyinfo++)
{
- if (!(((ulonglong) 1 << key) & share->state.key_map))
+ if (! mi_is_key_active(info->s->state.key_map, key))
continue;
if (share->state.key_root[key] != HA_OFFSET_ERROR)
@@ -2028,7 +2029,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
sort_param.read_cache=param->read_cache;
sort_param.keyinfo=share->keyinfo+sort_param.key;
sort_param.seg=sort_param.keyinfo->seg;
- if (!(((ulonglong) 1 << sort_param.key) & key_map))
+ if (! mi_is_key_active(key_map, sort_param.key))
{
/* Remember old statistics for key */
memcpy((char*) rec_per_key_part,
@@ -2049,7 +2050,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
sort_param.key_length+=keyseg[i].length;
if (keyseg[i].flag & HA_SPACE_PACK)
sort_param.key_length+=get_pack_length(keyseg[i].length);
- if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH))
+ if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
sort_param.key_length+=2 + test(keyseg[i].length >= 127);
if (keyseg[i].flag & HA_NULL_PART)
sort_param.key_length++;
@@ -2089,7 +2090,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
if (param->testflag & T_STATISTICS)
update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
(ulonglong) info->state->records);
- share->state.key_map|=(ulonglong) 1 << sort_param.key;
+ mi_set_key_active(share->state.key_map, sort_param.key);
if (sort_param.fix_datafile)
{
@@ -2410,7 +2411,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
sort_param[i].key=key;
sort_param[i].keyinfo=share->keyinfo+key;
sort_param[i].seg=sort_param[i].keyinfo->seg;
- if (!(((ulonglong) 1 << key) & key_map))
+ if (! mi_is_key_active(key_map, key))
{
/* Remember old statistics for key */
memcpy((char*) rec_per_key_part,
@@ -2458,7 +2459,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
sort_param[i].key_length+=keyseg->length;
if (keyseg->flag & HA_SPACE_PACK)
sort_param[i].key_length+=get_pack_length(keyseg->length);
- if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH))
+ if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
sort_param[i].key_length+=2 + test(keyseg->length >= 127);
if (keyseg->flag & HA_NULL_PART)
sort_param[i].key_length++;
@@ -3925,8 +3926,7 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
{
byte *record;
if (!info->s->base.auto_key ||
- !(((ulonglong) 1 << (info->s->base.auto_key-1)
- & info->s->state.key_map)))
+ ! mi_is_key_active(info->s->state.key_map, info->s->base.auto_key - 1))
{
if (!(param->testflag & T_VERY_SILENT))
mi_check_print_info(param,
@@ -4079,7 +4079,7 @@ void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows)
if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) &&
! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1)
{
- share->state.key_map&= ~ ((ulonglong) 1 << i);
+ mi_clear_key_active(share->state.key_map, i);
info->update|= HA_STATE_CHANGED;
}
}
@@ -4103,7 +4103,7 @@ my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows,
mi_repair_by_sort only works if we have at least one key. If we don't
have any keys, we should use the normal repair.
*/
- if (!key_map)
+ if (! mi_is_any_key_active(key_map))
return FALSE; /* Can't use sort */
for (i=0 ; i < share->base.keys ; i++,key++)
{
diff --git a/myisam/mi_checksum.c b/myisam/mi_checksum.c
index 95338434211..33a51068fb0 100644
--- a/myisam/mi_checksum.c
+++ b/myisam/mi_checksum.c
@@ -40,8 +40,12 @@ ha_checksum mi_checksum(MI_INFO *info, const byte *buf)
}
case FIELD_VARCHAR:
{
- length=uint2korr(buf);
- pos=buf+2;
+ uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length-1);
+ if (pack_length == 1)
+ length= (ulong) *(uchar*) buf;
+ else
+ length= uint2korr(buf);
+ pos= buf+pack_length;
break;
}
default:
diff --git a/myisam/mi_create.c b/myisam/mi_create.c
index 41c965c7c80..6d4106afda5 100644
--- a/myisam/mi_create.c
+++ b/myisam/mi_create.c
@@ -43,7 +43,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
myf create_flag;
uint fields,length,max_key_length,packed,pointer,real_length_diff,
key_length,info_length,key_segs,options,min_key_length_skip,
- base_pos,varchar_count,long_varchar_count,varchar_length,
+ base_pos,long_varchar_count,varchar_length,
max_key_block_length,unique_key_parts,fulltext_keys,offset;
ulong reclength, real_reclength,min_pack_length;
char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr;
@@ -99,7 +99,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
/* Start by checking fields and field-types used */
- reclength=varchar_count=varchar_length=long_varchar_count=packed=
+ reclength=varchar_length=long_varchar_count=packed=
min_pack_length=pack_reclength=0;
for (rec=recinfo, fields=0 ;
fields != columns ;
@@ -130,14 +130,15 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
}
else if (type == FIELD_VARCHAR)
{
- varchar_count++;
- varchar_length+=rec->length-2;
+ varchar_length+= rec->length-1; /* Used for min_pack_length */
packed--;
- pack_reclength+=1;
- if (test(rec->length > 257))
- { /* May be packed on 3 bytes */
+ pack_reclength++;
+ min_pack_length++;
+ /* We must test for 257 as length includes pack-length */
+ if (test(rec->length >= 257))
+ {
long_varchar_count++;
- pack_reclength+=2;
+ pack_reclength+= 2; /* May be packed on 3 bytes */
}
}
else if (type != FIELD_SKIP_ZERO)
@@ -169,12 +170,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
/* We can't use checksum with static length rows */
if (!(options & HA_OPTION_PACK_RECORD))
options&= ~HA_OPTION_CHECKSUM;
- if (options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))
- min_pack_length+=varchar_count; /* Min length to pack */
- else
- {
- min_pack_length+=varchar_length+2*varchar_count;
- }
+ if (!(options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
+ min_pack_length+= varchar_length;
if (flags & HA_CREATE_TMP_TABLE)
{
options|= HA_OPTION_TMP_TABLE;
@@ -222,7 +219,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
reclength=pointer+1; /* reserve place for delete link */
}
else
- reclength+=long_varchar_count; /* We need space for this! */
+ reclength+= long_varchar_count; /* We need space for varchar! */
max_key_length=0; tot_length=0 ; key_segs=0;
fulltext_keys=0;
@@ -265,7 +262,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
j++, keyseg++)
{
if (keyseg->type != HA_KEYTYPE_BINARY &&
- keyseg->type != HA_KEYTYPE_VARBINARY)
+ keyseg->type != HA_KEYTYPE_VARBINARY1 &&
+ keyseg->type != HA_KEYTYPE_VARBINARY2)
{
my_errno=HA_WRONG_CREATE_OPTION;
goto err;
@@ -280,8 +278,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
goto err;
#endif /*HAVE_SPATIAL*/
}
- else
- if (keydef->flag & HA_FULLTEXT)
+ else if (keydef->flag & HA_FULLTEXT)
{
keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY;
options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
@@ -290,11 +287,22 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
j++, keyseg++)
{
if (keyseg->type != HA_KEYTYPE_TEXT &&
- keyseg->type != HA_KEYTYPE_VARTEXT)
+ keyseg->type != HA_KEYTYPE_VARTEXT1 &&
+ keyseg->type != HA_KEYTYPE_VARTEXT2)
{
my_errno=HA_WRONG_CREATE_OPTION;
goto err;
}
+ if (!(keyseg->flag & HA_BLOB_PART) &&
+ (keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARTEXT2))
+ {
+ /* Make a flag that this is a VARCHAR */
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ /* Store in bit_start number of bytes used to pack the length */
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1)?
+ 1 : 2);
+ }
}
fulltext_keys++;
@@ -315,7 +323,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
/* Only use HA_PACK_KEY when first segment is a variable length key */
if (!(keydef->seg[0].flag & (HA_SPACE_PACK | HA_BLOB_PART |
- HA_VAR_LENGTH)))
+ HA_VAR_LENGTH_PART)))
{
/* pack relative to previous key */
keydef->flag&= ~HA_PACK_KEY;
@@ -349,12 +357,27 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
case HA_KEYTYPE_UINT24:
case HA_KEYTYPE_INT8:
keyseg->flag|= HA_SWAP_KEY;
- /* fall through */
+ break;
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY1:
+ case HA_KEYTYPE_VARBINARY2:
+ if (!(keyseg->flag & HA_BLOB_PART))
+ {
+ /* Make a flag that this is a VARCHAR */
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ /* Store in bit_start number of bytes used to pack the length */
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARBINARY1) ?
+ 1 : 2);
+ }
+ break;
default:
break;
}
if (keyseg->flag & HA_SPACE_PACK)
{
+ DBUG_ASSERT(!(keyseg->flag & HA_VAR_LENGTH_PART));
keydef->flag |= HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY;
options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
length++; /* At least one length byte */
@@ -365,8 +388,10 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
length+=2;
}
}
- if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART))
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
{
+ DBUG_ASSERT(!test_all_bits(keyseg->flag,
+ (HA_VAR_LENGTH_PART | HA_BLOB_PART)));
keydef->flag|=HA_VAR_LENGTH_KEY;
length++; /* At least one length byte */
options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
@@ -485,7 +510,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
mi_int2store(share.state.header.key_parts,key_segs);
mi_int2store(share.state.header.unique_key_parts,unique_key_parts);
- share.state.key_map = ((ulonglong) 1 << keys)-1;
+ mi_set_all_keys_active(share.state.key_map, keys);
share.base.keystart = share.state.state.key_file_length=
MY_ALIGN(info_length, myisam_block_size);
share.base.max_key_block_length=max_key_block_length;
@@ -621,10 +646,12 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
{
HA_KEYSEG sseg;
sseg.type=SPTYPE;
- sseg.language= 7;
+ sseg.language= 7; /* Binary */
sseg.null_bit=0;
sseg.bit_start=0;
sseg.bit_end=0;
+ sseg.bit_length= 0;
+ sseg.bit_pos= 0;
sseg.length=SPLEN;
sseg.null_pos=0;
sseg.start=j*SPLEN;
@@ -642,7 +669,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
{
tmp_keydef.keysegs=1;
tmp_keydef.flag= HA_UNIQUE_CHECK;
- tmp_keydef.block_length= myisam_block_size;
+ tmp_keydef.block_length= (uint16)myisam_block_size;
tmp_keydef.keylength= MI_UNIQUE_HASH_LENGTH + pointer;
tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength;
tmp_keyseg.type= MI_UNIQUE_HASH_TYPE;
@@ -657,11 +684,31 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
/* Save unique definition */
for (i=0 ; i < share.state.header.uniques ; i++)
{
+ HA_KEYSEG *keyseg_end;
+ keyseg= uniquedefs[i].seg;
if (mi_uniquedef_write(file, &uniquedefs[i]))
goto err;
- for (j=0 ; j < uniquedefs[i].keysegs ; j++)
+ for (keyseg= uniquedefs[i].seg, keyseg_end= keyseg+ uniquedefs[i].keysegs;
+ keyseg < keyseg_end;
+ keyseg++)
{
- if (mi_keyseg_write(file, &uniquedefs[i].seg[j]))
+ switch (keyseg->type) {
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY1:
+ case HA_KEYTYPE_VARBINARY2:
+ if (!(keyseg->flag & HA_BLOB_PART))
+ {
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARBINARY1) ?
+ 1 : 2);
+ }
+ break;
+ default:
+ break;
+ }
+ if (mi_keyseg_write(file, keyseg))
goto err;
}
}
diff --git a/myisam/mi_dbug.c b/myisam/mi_dbug.c
index 34105c490e4..ddc8a403a33 100644
--- a/myisam/mi_dbug.c
+++ b/myisam/mi_dbug.c
@@ -131,9 +131,21 @@ void _mi_print_key(FILE *stream, register HA_KEYSEG *keyseg,
key=end;
break;
}
+ case HA_KEYTYPE_BIT:
+ {
+ uint i;
+ fputs("0x",stream);
+ for (i=0 ; i < keyseg->length ; i++)
+ fprintf(stream, "%02x", (uint) *key++);
+ key= end;
+ break;
+ }
+
#endif
- case HA_KEYTYPE_VARTEXT: /* VARCHAR and TEXT */
- case HA_KEYTYPE_VARBINARY: /* VARBINARY and BLOB */
+ case HA_KEYTYPE_VARTEXT1: /* VARCHAR and TEXT */
+ case HA_KEYTYPE_VARTEXT2: /* VARCHAR and TEXT */
+ case HA_KEYTYPE_VARBINARY1: /* VARBINARY and BLOB */
+ case HA_KEYTYPE_VARBINARY2: /* VARBINARY and BLOB */
{
uint tmp_length;
get_key_length(tmp_length,key);
diff --git a/myisam/mi_delete.c b/myisam/mi_delete.c
index b964cb35dd8..60a07254e82 100644
--- a/myisam/mi_delete.c
+++ b/myisam/mi_delete.c
@@ -45,6 +45,12 @@ int mi_delete(MI_INFO *info,const byte *record)
/* Test if record is in datafile */
+ DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_usage",
+ mi_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
+ DBUG_EXECUTE_IF("my_error_test_undefined_error",
+ mi_print_error(info->s, INT_MAX);
+ DBUG_RETURN(my_errno= INT_MAX););
if (!(info->update & HA_STATE_AKTIV))
{
DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No database read */
@@ -68,7 +74,7 @@ int mi_delete(MI_INFO *info,const byte *record)
old_key=info->lastkey2;
for (i=0 ; i < share->base.keys ; i++ )
{
- if (((ulonglong) 1 << i) & info->s->state.key_map)
+ if (mi_is_key_active(info->s->state.key_map, i))
{
info->s->keyinfo[i].version++;
if (info->s->keyinfo[i].flag & HA_FULLTEXT )
@@ -109,13 +115,19 @@ err:
mi_sizestore(lastpos,info->lastpos);
myisam_log_command(MI_LOG_DELETE,info,(byte*) lastpos, sizeof(lastpos),0);
if (save_errno != HA_ERR_RECORD_CHANGED)
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info); /* mark table crashed */
+ }
VOID(_mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
info->update|=HA_STATE_WRITTEN; /* Buffer changed */
allow_break(); /* Allow SIGHUP & SIGINT */
my_errno=save_errno;
if (save_errno == HA_ERR_KEY_NOT_FOUND)
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
+ }
DBUG_RETURN(my_errno);
} /* mi_delete */
@@ -142,6 +154,7 @@ static int _mi_ck_real_delete(register MI_INFO *info, MI_KEYDEF *keyinfo,
if ((old_root=*root) == HA_OFFSET_ERROR)
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
DBUG_RETURN(my_errno=HA_ERR_CRASHED);
}
if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
@@ -253,7 +266,12 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
my_off_t root;
uchar *kpos=keypos;
- tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey);
+ if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey)))
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ DBUG_RETURN(-1);
+ }
root=_mi_dpos(info,nod_flag,kpos);
if (subkeys == -1)
{
@@ -302,6 +320,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
if (!nod_flag)
{
DBUG_PRINT("error",("Didn't find key"));
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED; /* This should newer happend */
goto err;
}
@@ -313,13 +332,10 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{ /* Found key */
uint tmp;
length=mi_getint(anc_buff);
- tmp=remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length,
- &next_block);
- if (tmp == 0)
- {
- DBUG_PRINT("exit",("Return: %d",0));
- DBUG_RETURN(0);
- }
+ if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length,
+ &next_block)))
+ goto err;
+
length-= tmp;
mi_putint(anc_buff,length,nod_flag);
@@ -368,6 +384,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
my_afree((byte*) leaf_buff);
DBUG_PRINT("exit",("Return: %d",ret_value));
DBUG_RETURN(ret_value);
+
err:
my_afree((byte*) leaf_buff);
DBUG_PRINT("exit",("Error: %d",my_errno));
@@ -559,10 +576,10 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo,
/* remove key from anc_buff */
- s_length=remove_key(keyinfo,key_reflength,keypos,anc_key,
- anc_buff+anc_length,(my_off_t *) 0);
- if (!s_length)
+ if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key,
+ anc_buff+anc_length,(my_off_t *) 0)))
goto err;
+
anc_length-=s_length;
mi_putint(anc_buff,anc_length,key_reflength);
@@ -668,10 +685,10 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo,
mi_putint(buff,buff_length,nod_flag);
/* remove key from anc_buff */
- s_length=remove_key(keyinfo,key_reflength,keypos,anc_key,
- anc_buff+anc_length,(my_off_t *) 0);
- if (!s_length)
+ if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key,
+ anc_buff+anc_length,(my_off_t *) 0)))
goto err;
+
anc_length-=s_length;
mi_putint(anc_buff,anc_length,key_reflength);
@@ -731,6 +748,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo,
if (_mi_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff))
goto err;
DBUG_RETURN(anc_length <= (uint) keyinfo->block_length/2);
+
err:
DBUG_RETURN(-1);
} /* underflow */
@@ -768,6 +786,7 @@ static uint remove_key(MI_KEYDEF *keyinfo, uint nod_flag,
/* Calculate length of key */
if (!(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey))
DBUG_RETURN(0); /* Error */
+
if (next_block && nod_flag)
*next_block= _mi_kpos(nod_flag,keypos);
s_length=(int) (keypos-start);
diff --git a/myisam/mi_dynrec.c b/myisam/mi_dynrec.c
index 43783ca2d36..8de500a7351 100644
--- a/myisam/mi_dynrec.c
+++ b/myisam/mi_dynrec.c
@@ -149,7 +149,9 @@ static int write_dynamic_record(MI_INFO *info, const byte *record,
{
if (_mi_find_writepos(info,reclength,&filepos,&length))
goto err;
- if (_mi_write_part_record(info,filepos,length,info->s->state.dellink,
+ if (_mi_write_part_record(info,filepos,length,
+ (info->append_insert_at_end ?
+ HA_OFFSET_ERROR : info->s->state.dellink),
(byte**) &record,&reclength,&flag))
goto err;
} while (reclength);
@@ -171,7 +173,8 @@ static int _mi_find_writepos(MI_INFO *info,
ulong tmp;
DBUG_ENTER("_mi_find_writepos");
- if (info->s->state.dellink != HA_OFFSET_ERROR)
+ if (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end)
{
/* Deleted blocks exists; Get last used block */
*filepos=info->s->state.dellink;
@@ -420,8 +423,9 @@ int _mi_write_part_record(MI_INFO *info,
else if (length-long_block < *reclength+4)
{ /* To short block */
if (next_filepos == HA_OFFSET_ERROR)
- next_filepos=info->s->state.dellink != HA_OFFSET_ERROR ?
- info->s->state.dellink : info->state->data_file_length;
+ next_filepos= (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end ?
+ info->s->state.dellink : info->state->data_file_length);
if (*flag == 0) /* First block */
{
if (*reclength > MI_MAX_BLOCK_LENGTH)
@@ -768,11 +772,21 @@ uint _mi_rec_pack(MI_INFO *info, register byte *to, register const byte *from)
}
else if (type == FIELD_VARCHAR)
{
- uint tmp_length=uint2korr(from);
- store_key_length_inc(to,tmp_length);
- memcpy(to,from+2,tmp_length);
- to+=tmp_length;
- continue;
+ uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1);
+ uint tmp_length;
+ if (pack_length == 1)
+ {
+ tmp_length= (uint) *(uchar*) from;
+ *to++= *from;
+ }
+ else
+ {
+ tmp_length= uint2korr(from);
+ store_key_length_inc(to,tmp_length);
+ }
+ memcpy(to, from+pack_length,tmp_length);
+ to+= tmp_length;
+ continue;
}
else
{
@@ -878,9 +892,20 @@ my_bool _mi_rec_check(MI_INFO *info,const char *record, byte *rec_buff,
}
else if (type == FIELD_VARCHAR)
{
- uint tmp_length=uint2korr(record);
- to+=get_pack_length(tmp_length)+tmp_length;
- continue;
+ uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1);
+ uint tmp_length;
+ if (pack_length == 1)
+ {
+ tmp_length= (uint) *(uchar*) record;
+ to+= 1+ tmp_length;
+ continue;
+ }
+ else
+ {
+ tmp_length= uint2korr(record);
+ to+= get_pack_length(tmp_length)+tmp_length;
+ }
+ continue;
}
else
{
@@ -894,9 +919,7 @@ my_bool _mi_rec_check(MI_INFO *info,const char *record, byte *rec_buff,
}
}
else
- {
- to+=length;
- }
+ to+= length;
}
if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) ||
(bit != 1 && (flag & ~(bit - 1))))
@@ -944,13 +967,27 @@ ulong _mi_rec_unpack(register MI_INFO *info, register byte *to, byte *from,
{
if (type == FIELD_VARCHAR)
{
- get_key_length(length,from);
- if (length > rec_length-2)
- goto err;
- int2store(to,length);
- memcpy(to+2,from,length);
- from+=length;
- continue;
+ uint pack_length= HA_VARCHAR_PACKLENGTH(rec_length-1);
+ if (pack_length == 1)
+ {
+ length= (uint) *(uchar*) from;
+ if (length > rec_length-1)
+ goto err;
+ *to= *from++;
+ }
+ else
+ {
+ get_key_length(length, from);
+ if (length > rec_length-2)
+ goto err;
+ int2store(to,length);
+ }
+ if (from+length > from_end)
+ goto err;
+ memcpy(to+pack_length, from, length);
+ from+= length;
+ min_pack_length--;
+ continue;
}
if (flag & bit)
{
@@ -1018,15 +1055,17 @@ ulong _mi_rec_unpack(register MI_INFO *info, register byte *to, byte *from,
if (min_pack_length > (uint) (from_end - from))
goto err;
min_pack_length-=rec_length;
- memcpy(to,(byte*) from,(size_t) rec_length); from+=rec_length;
+ memcpy(to, (byte*) from, (size_t) rec_length);
+ from+=rec_length;
}
}
if (info->s->calc_checksum)
from++;
if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1))))
DBUG_RETURN(found_length);
+
err:
- my_errno=HA_ERR_RECORD_DELETED;
+ my_errno= HA_ERR_WRONG_IN_RECORD;
DBUG_PRINT("error",("to_end: %lx -> %lx from_end: %lx -> %lx",
to,to_end,from,from_end));
DBUG_DUMP("from",(byte*) info->rec_buff,info->s->base.min_pack_length);
diff --git a/myisam/mi_extra.c b/myisam/mi_extra.c
index 1827aed98c3..7c0dd13b870 100644
--- a/myisam/mi_extra.c
+++ b/myisam/mi_extra.c
@@ -15,7 +15,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "myisamdef.h"
-#ifdef HAVE_MMAP
+#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
@@ -186,7 +186,10 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
if (info->opt_flag & WRITE_CACHE_USED)
{
if ((error=flush_io_cache(&info->rec_cache)))
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info); /* Fatal error found */
+ }
}
break;
case HA_EXTRA_NO_READCHECK:
@@ -241,7 +244,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
error=1; /* Not possibly if not lock */
break;
}
- if (share->state.key_map)
+ if (mi_is_any_key_active(share->state.key_map))
{
MI_KEYDEF *key=share->keyinfo;
uint i;
@@ -249,7 +252,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
{
if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1)
{
- share->state.key_map&= ~ ((ulonglong) 1 << i);
+ mi_clear_key_active(share->state.key_map, i);
info->update|= HA_STATE_CHANGED;
}
}
@@ -285,6 +288,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
{
error=my_errno;
share->changed=1;
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info); /* Fatal error found */
}
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
@@ -339,6 +343,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
if (error)
{
share->changed=1;
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info); /* Fatal error found */
}
}
diff --git a/myisam/mi_info.c b/myisam/mi_info.c
index cf63ef63618..bdece9c2ee3 100644
--- a/myisam/mi_info.c
+++ b/myisam/mi_info.c
@@ -105,3 +105,36 @@ int mi_status(MI_INFO *info, register MI_ISAMINFO *x, uint flag)
}
DBUG_RETURN(0);
}
+
+
+/*
+ Write a message to the error log.
+
+ SYNOPSIS
+ mi_report_error()
+ file_name Name of table file (e.g. index_file_name).
+ errcode Error number.
+
+ DESCRIPTION
+ This function supplies my_error() with a table name. Most error
+ messages need one. Since string arguments in error messages are limited
+ to 64 characters by convention, we ensure that in case of truncation,
+ that the end of the index file path is in the message. This contains
+ the most valuable information (the table name and the database name).
+
+ RETURN
+ void
+*/
+
+void mi_report_error(int errcode, const char *file_name)
+{
+ size_t lgt;
+ DBUG_ENTER("mi_report_error");
+ DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name));
+
+ if ((lgt= strlen(file_name)) > 64)
+ file_name+= lgt - 64;
+ my_error(errcode, MYF(ME_NOREFRESH), file_name);
+ DBUG_VOID_RETURN;
+}
+
diff --git a/myisam/mi_key.c b/myisam/mi_key.c
index 9df22889b22..ae50900a190 100644
--- a/myisam/mi_key.c
+++ b/myisam/mi_key.c
@@ -34,10 +34,20 @@
static int _mi_put_key_in_record(MI_INFO *info,uint keynr,byte *record);
- /*
- ** Make a intern key from a record
- ** Ret: Length of key
- */
+/*
+ Make a intern key from a record
+
+ SYNOPSIS
+ _mi_make_key()
+ info MyiSAM handler
+ keynr key number
+ key Store created key here
+ record Record
+ filepos Position to record in the data file
+
+ RETURN
+ Length of key
+*/
uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key,
const byte *record, my_off_t filepos)
@@ -82,6 +92,19 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key,
length);
pos= (byte*) record+keyseg->start;
+ if (type == HA_KEYTYPE_BIT)
+ {
+ if (keyseg->bit_length)
+ {
+ uchar bits= get_rec_bits((uchar*) record + keyseg->bit_pos,
+ keyseg->bit_start, keyseg->bit_length);
+ *key++= bits;
+ length--;
+ }
+ memcpy((byte*) key, pos, length);
+ key+= length;
+ continue;
+ }
if (keyseg->flag & HA_SPACE_PACK)
{
end= pos + length;
@@ -102,10 +125,12 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key,
key+=char_length;
continue;
}
- if (keyseg->flag & HA_VAR_LENGTH)
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
{
- uint tmp_length=uint2korr(pos);
- pos+=2; /* Skip VARCHAR length */
+ uint pack_length= keyseg->bit_start;
+ uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos :
+ uint2korr(pos));
+ pos+= pack_length; /* Skip VARCHAR length */
set_if_smaller(length,tmp_length);
FIX_LENGTH(cs, pos, length, char_length);
store_key_length_inc(key,char_length);
@@ -161,7 +186,7 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key,
FIX_LENGTH(cs, pos, length, char_length);
memcpy((byte*) key, pos, char_length);
if (length > char_length)
- cs->cset->fill(cs, key+char_length, length-char_length, ' ');
+ cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' ');
key+= length;
}
_mi_dpointer(info,key,filepos);
@@ -216,8 +241,11 @@ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old,
if (!(*key++= (char) 1-*old++)) /* Copy null marker */
{
k_length-=length;
- if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART))
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
+ {
k_length-=2; /* Skip length */
+ old+= 2;
+ }
continue; /* Found NULL */
}
}
@@ -244,7 +272,7 @@ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old,
key+= char_length;
continue;
}
- else if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART))
+ else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
{
/* Length of key-part used with mi_rkey() always 2 */
uint tmp_length=uint2korr(pos);
@@ -271,7 +299,7 @@ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old,
FIX_LENGTH(cs, pos, length, char_length);
memcpy((byte*) key, pos, char_length);
if (length > char_length)
- cs->cset->fill(cs,key+char_length, length-char_length, ' ');
+ cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' ');
key+= length;
k_length-=length;
}
@@ -344,6 +372,26 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr,
}
record[keyseg->null_pos]&= ~keyseg->null_bit;
}
+ if (keyseg->type == HA_KEYTYPE_BIT)
+ {
+ uint length= keyseg->length;
+
+ if (keyseg->bit_length)
+ {
+ uchar bits= *key++;
+ set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start,
+ keyseg->bit_length);
+ length--;
+ }
+ else
+ {
+ clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start,
+ keyseg->bit_length);
+ }
+ memcpy(record + keyseg->start, (byte*) key, length);
+ key+= length;
+ continue;
+ }
if (keyseg->flag & HA_SPACE_PACK)
{
uint length;
@@ -367,7 +415,7 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr,
continue;
}
- if (keyseg->flag & HA_VAR_LENGTH)
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
{
uint length;
get_key_length(length,key);
@@ -375,7 +423,13 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr,
if (length > keyseg->length || key+length > key_end)
goto err;
#endif
- memcpy(record+keyseg->start,(byte*) key, length);
+ /* Store key length */
+ if (keyseg->bit_start == 1)
+ *(uchar*) (record+keyseg->start)= (uchar) length;
+ else
+ int2store(record+keyseg->start, length);
+ /* And key data */
+ memcpy(record+keyseg->start + keyseg->bit_start, (byte*) key, length);
key+= length;
}
else if (keyseg->flag & HA_BLOB_PART)
@@ -437,6 +491,7 @@ int _mi_read_key_record(MI_INFO *info, my_off_t filepos, byte *buf)
{ /* Read only key */
if (_mi_put_key_in_record(info,(uint) info->lastinx,buf))
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
return -1;
}
diff --git a/myisam/mi_keycache.c b/myisam/mi_keycache.c
index 99a2fd6db15..fb13f3703a2 100644
--- a/myisam/mi_keycache.c
+++ b/myisam/mi_keycache.c
@@ -79,6 +79,7 @@ int mi_assign_to_key_cache(MI_INFO *info,
if (flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE))
{
error= my_errno;
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info); /* Mark that table must be checked */
}
diff --git a/myisam/mi_locking.c b/myisam/mi_locking.c
index 66950f62321..8d48c5242e5 100644
--- a/myisam/mi_locking.c
+++ b/myisam/mi_locking.c
@@ -66,6 +66,7 @@ int mi_lock_database(MI_INFO *info, int lock_type)
share->kfile,FLUSH_KEEP))
{
error=my_errno;
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info); /* Mark that table must be checked */
}
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
@@ -73,6 +74,7 @@ int mi_lock_database(MI_INFO *info, int lock_type)
if (end_io_cache(&info->rec_cache))
{
error=my_errno;
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info);
}
}
@@ -98,7 +100,10 @@ int mi_lock_database(MI_INFO *info, int lock_type)
else
share->not_flushed=1;
if (error)
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info);
+ }
}
if (info->lock_type != F_EXTRA_LCK)
{
@@ -233,13 +238,24 @@ int mi_lock_database(MI_INFO *info, int lock_type)
The following functions are called by thr_lock() in threaded applications
****************************************************************************/
-void mi_get_status(void* param)
+/*
+ Create a copy of the current status for the table
+
+ SYNOPSIS
+ mi_get_status()
+ param Pointer to Myisam handler
+ concurrent_insert Set to 1 if we are going to do concurrent inserts
+ (THR_WRITE_CONCURRENT_INSERT was used)
+*/
+
+void mi_get_status(void* param, int concurrent_insert)
{
MI_INFO *info=(MI_INFO*) param;
DBUG_ENTER("mi_get_status");
- DBUG_PRINT("info",("key_file: %ld data_file: %ld",
+ DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d",
(long) info->s->state.state.key_file_length,
- (long) info->s->state.state.data_file_length));
+ (long) info->s->state.state.data_file_length,
+ concurrent_insert));
#ifndef DBUG_OFF
if (info->state->key_file_length > info->s->state.state.key_file_length ||
info->state->data_file_length > info->s->state.state.data_file_length)
@@ -249,9 +265,11 @@ void mi_get_status(void* param)
#endif
info->save_state=info->s->state.state;
info->state= &info->save_state;
+ info->append_insert_at_end= concurrent_insert;
DBUG_VOID_RETURN;
}
+
void mi_update_status(void* param)
{
MI_INFO *info=(MI_INFO*) param;
@@ -276,6 +294,7 @@ void mi_update_status(void* param)
info->s->state.state= *info->state;
info->state= &info->s->state.state;
}
+ info->append_insert_at_end= 0;
/*
We have to flush the write cache here as other threads may start
@@ -285,6 +304,7 @@ void mi_update_status(void* param)
{
if (end_io_cache(&info->rec_cache))
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info);
}
info->opt_flag&= ~WRITE_CACHE_USED;
@@ -301,20 +321,37 @@ void mi_copy_status(void* to,void *from)
Check if should allow concurrent inserts
IMPLEMENTATION
- Don't allow concurrent inserts if we have a hole in the table.
+ Allow concurrent inserts if we don't have a hole in the table or
+ if there is no active write lock and there is active read locks and
+ myisam_concurrent_insert == 2. In this last case the new
+ row('s) are inserted at end of file instead of filling up the hole.
+
+ The last case is to allow one to inserts into a heavily read-used table
+ even if there is holes.
NOTES
- Rtree indexes are disabled in mi_open()
+ If there is a an rtree indexes in the table, concurrent inserts are
+ disabled in mi_open()
RETURN
0 ok to use concurrent inserts
1 not ok
*/
-my_bool mi_check_status(void* param)
+my_bool mi_check_status(void *param)
{
MI_INFO *info=(MI_INFO*) param;
- return (my_bool) (info->s->state.dellink != HA_OFFSET_ERROR);
+ /*
+ The test for w_locks == 1 is here because this thread has already done an
+ external lock (in other words: w_locks == 1 means no other threads has
+ a write lock)
+ */
+ DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u",
+ (long) info->s->state.dellink, (uint) info->s->r_locks,
+ (uint) info->s->w_locks));
+ return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR ||
+ (myisam_concurrent_insert == 2 && info->s->r_locks &&
+ info->s->w_locks == 1));
}
diff --git a/myisam/mi_open.c b/myisam/mi_open.c
index a5b303f86d4..82663e0c318 100644
--- a/myisam/mi_open.c
+++ b/myisam/mi_open.c
@@ -106,6 +106,12 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
share_buff.state.key_del=key_del;
share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff));
+ DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_open",
+ if (strstr(name, "/t1"))
+ {
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ });
if ((kfile=my_open(name_buff,(open_mode=O_RDWR) | O_SHARE,MYF(0))) < 0)
{
if ((errno != EROFS && errno != EACCES) ||
@@ -186,14 +192,14 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
}
share->state_diff_length=len-MI_STATE_INFO_SIZE;
- mi_state_info_read(disk_cache, &share->state);
+ mi_state_info_read((uchar*) disk_cache, &share->state);
len= mi_uint2korr(share->state.header.base_info_length);
if (len != MI_BASE_INFO_SIZE)
{
DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d",
len,MI_BASE_INFO_SIZE))
}
- disk_pos=my_n_base_info_read(disk_cache+base_pos, &share->base);
+ disk_pos=my_n_base_info_read((uchar*) disk_cache + base_pos, &share->base);
share->state.state_length=base_pos;
if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
@@ -302,6 +308,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
HA_KEYSEG *pos=share->keyparts;
for (i=0 ; i < keys ; i++)
{
+ share->keyinfo[i].share= share;
disk_pos=mi_keydef_read(disk_pos, &share->keyinfo[i]);
disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE,
end_pos);
@@ -313,7 +320,9 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
{
disk_pos=mi_keyseg_read(disk_pos, pos);
- if (pos->type == HA_KEYTYPE_TEXT || pos->type == HA_KEYTYPE_VARTEXT)
+ if (pos->type == HA_KEYTYPE_TEXT ||
+ pos->type == HA_KEYTYPE_VARTEXT1 ||
+ pos->type == HA_KEYTYPE_VARTEXT2)
{
if (!pos->language)
pos->charset=default_charset_info;
@@ -388,7 +397,9 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++)
{
disk_pos=mi_keyseg_read(disk_pos, pos);
- if (pos->type == HA_KEYTYPE_TEXT || pos->type == HA_KEYTYPE_VARTEXT)
+ if (pos->type == HA_KEYTYPE_TEXT ||
+ pos->type == HA_KEYTYPE_VARTEXT1 ||
+ pos->type == HA_KEYTYPE_VARTEXT2)
{
if (!pos->language)
pos->charset=default_charset_info;
@@ -596,6 +607,10 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
err:
save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
+ if ((save_errno == HA_ERR_CRASHED) ||
+ (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
+ (save_errno == HA_ERR_CRASHED_ON_REPAIR))
+ mi_report_error(save_errno, name);
switch (errpos) {
case 6:
my_free((gptr) m_info,MYF(0));
@@ -848,7 +863,7 @@ uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite)
}
-char *mi_state_info_read(char *ptr, MI_STATE_INFO *state)
+char *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state)
{
uint i,keys,key_parts,key_blocks;
memcpy_fixed(&state->header,ptr, sizeof(state->header));
@@ -914,7 +929,7 @@ uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead)
}
else if (my_read(file, buff, state->state_length,MYF(MY_NABP)))
return (MY_FILE_ERROR);
- mi_state_info_read(buff, state);
+ mi_state_info_read((uchar*) buff, state);
}
return 0;
}
@@ -959,7 +974,7 @@ uint mi_base_info_write(File file, MI_BASE_INFO *base)
}
-char *my_n_base_info_read(char *ptr, MI_BASE_INFO *base)
+char *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base)
{
base->keystart = mi_sizekorr(ptr); ptr +=8;
base->max_data_file_length = mi_sizekorr(ptr); ptr +=8;
@@ -1042,18 +1057,21 @@ int mi_keyseg_write(File file, const HA_KEYSEG *keyseg)
{
uchar buff[HA_KEYSEG_SIZE];
uchar *ptr=buff;
-
- *ptr++ =keyseg->type;
- *ptr++ =keyseg->language;
- *ptr++ =keyseg->null_bit;
- *ptr++ =keyseg->bit_start;
- *ptr++ =keyseg->bit_end;
- *ptr++ =0; /* Not used */
+ ulong pos;
+
+ *ptr++= keyseg->type;
+ *ptr++= keyseg->language;
+ *ptr++= keyseg->null_bit;
+ *ptr++= keyseg->bit_start;
+ *ptr++= keyseg->bit_end;
+ *ptr++= keyseg->bit_length;
mi_int2store(ptr,keyseg->flag); ptr+=2;
mi_int2store(ptr,keyseg->length); ptr+=2;
mi_int4store(ptr,keyseg->start); ptr+=4;
- mi_int4store(ptr,keyseg->null_pos); ptr+=4;
-
+ pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos;
+ mi_int4store(ptr, pos);
+ ptr+=4;
+
return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP));
}
@@ -1065,12 +1083,19 @@ char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg)
keyseg->null_bit = *ptr++;
keyseg->bit_start = *ptr++;
keyseg->bit_end = *ptr++;
- ptr++;
+ keyseg->bit_length = *ptr++;
keyseg->flag = mi_uint2korr(ptr); ptr +=2;
keyseg->length = mi_uint2korr(ptr); ptr +=2;
keyseg->start = mi_uint4korr(ptr); ptr +=4;
keyseg->null_pos = mi_uint4korr(ptr); ptr +=4;
keyseg->charset=0; /* Will be filled in later */
+ if (keyseg->null_bit)
+ keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7));
+ else
+ {
+ keyseg->bit_pos= (uint16)keyseg->null_pos;
+ keyseg->null_pos= 0;
+ }
return ptr;
}
@@ -1179,7 +1204,7 @@ int mi_disable_indexes(MI_INFO *info)
{
MYISAM_SHARE *share= info->s;
- share->state.key_map= 0;
+ mi_clear_all_keys_active(share->state.key_map);
return 0;
}
@@ -1210,9 +1235,12 @@ int mi_enable_indexes(MI_INFO *info)
if (share->state.state.data_file_length ||
(share->state.state.key_file_length != share->base.keystart))
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
error= HA_ERR_CRASHED;
+ }
else
- share->state.key_map= ((ulonglong) 1L << share->base.keys) - 1;
+ mi_set_all_keys_active(share->state.key_map, share->base.keys);
return error;
}
@@ -1237,6 +1265,6 @@ int mi_indexes_are_disabled(MI_INFO *info)
{
MYISAM_SHARE *share= info->s;
- return (! share->state.key_map && share->base.keys);
+ return (! mi_is_any_key_active(share->state.key_map) && share->base.keys);
}
diff --git a/myisam/mi_packrec.c b/myisam/mi_packrec.c
index 322420b71db..e242e9d506d 100644
--- a/myisam/mi_packrec.c
+++ b/myisam/mi_packrec.c
@@ -91,8 +91,10 @@ static void uf_zero(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
uchar *to,uchar *end);
static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
uchar *to, uchar *end);
-static void uf_varchar(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
- uchar *to, uchar *end);
+static void uf_varchar1(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end);
+static void uf_varchar2(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end);
static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
uchar *to,uchar *end);
static uint decode_pos(MI_BIT_BUFF *bit_buff,MI_DECODE_TREE *decode_tree);
@@ -415,8 +417,19 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end)
}
- /* Read record from datafile */
- /* Returns length of packed record, -1 if error */
+/*
+ Read record from datafile.
+
+ SYNOPSIS
+ _mi_read_pack_record()
+ info A pointer to MI_INFO.
+ filepos File offset of the record.
+ buf RETURN The buffer to receive the record.
+
+ RETURN
+ 0 on success
+ HA_ERR_WRONG_IN_RECORD or -1 on error
+*/
int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf)
{
@@ -516,14 +529,16 @@ static void (*get_unpack_function(MI_COLUMNDEF *rec))
case FIELD_BLOB:
return &uf_blob;
case FIELD_VARCHAR:
- return &uf_varchar;
+ if (rec->length <= 256) /* 255 + 1 byte length */
+ return &uf_varchar1;
+ return &uf_varchar2;
case FIELD_LAST:
default:
return 0; /* This should never happend */
}
}
- /* De different functions to unpack a field */
+ /* The different functions to unpack a field */
static void uf_zerofill_skip_zero(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
uchar *to, uchar *end)
@@ -767,7 +782,22 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
}
}
-static void uf_varchar(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
+
+static void uf_varchar1(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end __attribute__((unused)))
+{
+ if (get_bit(bit_buff))
+ to[0]= 0; /* Zero lengths */
+ else
+ {
+ ulong length=get_bits(bit_buff,rec->space_length_bits);
+ *to= (uchar) length;
+ decode_bytes(rec,bit_buff,to+1,to+1+length);
+ }
+}
+
+
+static void uf_varchar2(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
uchar *to, uchar *end __attribute__((unused)))
{
if (get_bit(bit_buff))
@@ -1137,11 +1167,12 @@ static uint max_bit(register uint value)
/*****************************************************************************
Some redefined functions to handle files when we are using memmap
*****************************************************************************/
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
#ifdef HAVE_MMAP
-#include <sys/mman.h>
-
static int _mi_read_mempack_record(MI_INFO *info,my_off_t filepos,byte *buf);
static int _mi_read_rnd_mempack_record(MI_INFO*, byte *,my_off_t, my_bool);
@@ -1167,7 +1198,7 @@ my_bool _mi_memmap_file(MI_INFO *info)
DBUG_RETURN(0);
}
file_map=(byte*)
- mmap(0,share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN,PROT_READ,
+ my_mmap(0,(size_t)(share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN),PROT_READ,
MAP_SHARED | MAP_NORESERVE,info->dfile,0L);
if (file_map == (byte*) MAP_FAILED)
{
@@ -1186,7 +1217,7 @@ my_bool _mi_memmap_file(MI_INFO *info)
void _mi_unmap_file(MI_INFO *info)
{
- VOID(munmap((caddr_t) info->s->file_map,
+ VOID(my_munmap(info->s->file_map,
(size_t) info->s->state.state.data_file_length+
MEMMAP_EXTRA_MARGIN));
}
diff --git a/myisam/mi_page.c b/myisam/mi_page.c
index 16713c87e10..5240c063fba 100644
--- a/myisam/mi_page.c
+++ b/myisam/mi_page.c
@@ -40,6 +40,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo,
{
DBUG_PRINT("error",("Got errno: %d from key_cache_read",my_errno));
info->last_keypage=HA_OFFSET_ERROR;
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0);
}
@@ -51,6 +52,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo,
(ulong) page, page_size));
DBUG_DUMP("page", (char*) tmp, keyinfo->block_length);
info->last_keypage = HA_OFFSET_ERROR;
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno = HA_ERR_CRASHED;
tmp = 0;
}
diff --git a/myisam/mi_preload.c b/myisam/mi_preload.c
index 317ab4ad7fe..d63399b519d 100644
--- a/myisam/mi_preload.c
+++ b/myisam/mi_preload.c
@@ -51,7 +51,7 @@ int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves)
my_off_t pos= share->base.keystart;
DBUG_ENTER("mi_preload");
- if (!keys || !key_map || key_file_length == pos)
+ if (!keys || !mi_is_any_key_active(key_map) || key_file_length == pos)
DBUG_RETURN(0);
block_length= keyinfo[0].block_length;
diff --git a/myisam/mi_range.c b/myisam/mi_range.c
index 1e0fd42334e..e78f3b11625 100644
--- a/myisam/mi_range.c
+++ b/myisam/mi_range.c
@@ -172,9 +172,9 @@ static double _mi_search_pos(register MI_INFO *info,
if (flag == MI_FOUND_WRONG_KEY)
DBUG_RETURN(-1); /* error */
/*
- ** Didn't found match. keypos points at next (bigger) key
- * Try to find a smaller, better matching key.
- ** Matches keynr + [0-1]
+ Didn't found match. keypos points at next (bigger) key
+ Try to find a smaller, better matching key.
+ Matches keynr + [0-1]
*/
if (flag > 0 && ! nod_flag)
offset= 1.0;
@@ -185,8 +185,8 @@ static double _mi_search_pos(register MI_INFO *info,
else
{
/*
- ** Found match. Keypos points at the start of the found key
- ** Matches keynr+1
+ Found match. Keypos points at the start of the found key
+ Matches keynr+1
*/
offset=1.0; /* Matches keynr+1 */
if ((nextflag & SEARCH_FIND) && nod_flag &&
@@ -194,8 +194,8 @@ static double _mi_search_pos(register MI_INFO *info,
key_len != USE_WHOLE_KEY))
{
/*
- ** There may be identical keys in the tree. Try to match on of those.
- ** Matches keynr + [0-1]
+ There may be identical keys in the tree. Try to match on of those.
+ Matches keynr + [0-1]
*/
if ((offset=_mi_search_pos(info,keyinfo,key,key_len,SEARCH_FIND,
_mi_kpos(nod_flag,keypos))) < 0)
@@ -213,7 +213,8 @@ err:
/* Get keynummer of current key and max number of keys in nod */
-static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, uchar *keypos, uint *ret_max_key)
+static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
+ uchar *keypos, uint *ret_max_key)
{
uint nod_flag,keynr,max_key;
uchar t_buff[MI_MAX_KEY_BUFF],*end;
@@ -222,7 +223,7 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, u
nod_flag=mi_test_if_nod(page);
page+=2+nod_flag;
- if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY| HA_BINARY_PACK_KEY)))
+ if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
{
*ret_max_key= (uint) (end-page)/(keyinfo->keylength+nod_flag);
return (uint) (keypos-page)/(keyinfo->keylength+nod_flag);
diff --git a/myisam/mi_rkey.c b/myisam/mi_rkey.c
index 9aa2be3c706..575a5e34488 100644
--- a/myisam/mi_rkey.c
+++ b/myisam/mi_rkey.c
@@ -81,6 +81,7 @@ int mi_rkey(MI_INFO *info, byte *buf, int inx, const byte *key, uint key_len,
case HA_KEY_ALG_RTREE:
if (rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0)
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
goto err;
}
diff --git a/myisam/mi_rsame.c b/myisam/mi_rsame.c
index 56c8d1226ca..321097744b9 100644
--- a/myisam/mi_rsame.c
+++ b/myisam/mi_rsame.c
@@ -30,7 +30,7 @@ int mi_rsame(MI_INFO *info, byte *record, int inx)
{
DBUG_ENTER("mi_rsame");
- if (inx != -1 && ! (((ulonglong) 1 << inx) & info->s->state.key_map))
+ if (inx != -1 && ! mi_is_key_active(info->s->state.key_map, inx))
{
DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
}
diff --git a/myisam/mi_rsamepos.c b/myisam/mi_rsamepos.c
index a1d96fb7104..35cdd41e297 100644
--- a/myisam/mi_rsamepos.c
+++ b/myisam/mi_rsamepos.c
@@ -32,7 +32,7 @@ int mi_rsame_with_pos(MI_INFO *info, byte *record, int inx, my_off_t filepos)
{
DBUG_ENTER("mi_rsame_with_pos");
- if (inx < -1 || ! (((ulonglong) 1 << inx) & info->s->state.key_map))
+ if (inx < -1 || ! mi_is_key_active(info->s->state.key_map, inx))
{
DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
}
diff --git a/myisam/mi_search.c b/myisam/mi_search.c
index 82177d331b7..185f196a814 100644
--- a/myisam/mi_search.c
+++ b/myisam/mi_search.c
@@ -29,7 +29,7 @@ int _mi_check_index(MI_INFO *info, int inx)
{
if (inx == -1) /* Use last index */
inx=info->lastinx;
- if (inx < 0 || ! (((ulonglong) 1 << inx) & info->s->state.key_map))
+ if (inx < 0 || ! mi_is_key_active(info->s->state.key_map, inx))
{
my_errno=HA_ERR_WRONG_INDEX;
return -1;
@@ -159,6 +159,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
DBUG_PRINT("exit",("found key at %lu",(ulong) info->lastpos));
DBUG_RETURN(0);
+
err:
DBUG_PRINT("exit",("Error: %d",my_errno));
info->lastpos= HA_OFFSET_ERROR;
@@ -256,6 +257,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff);
if (length == 0 || page > end)
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_PRINT("error",("Found wrong key: length: %u page: %p end: %p",
length, page, end));
@@ -395,7 +397,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
if (!(*from++))
continue;
}
- if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART | HA_SPACE_PACK))
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
{
get_key_length(l,from);
}
@@ -411,6 +413,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
if (page > end)
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_PRINT("error",("Found wrong key: length: %u page: %p end: %p",
length, page, end));
@@ -461,7 +464,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
if (len < cmplen)
{
if ((keyinfo->seg->type != HA_KEYTYPE_TEXT &&
- keyinfo->seg->type != HA_KEYTYPE_VARTEXT))
+ keyinfo->seg->type != HA_KEYTYPE_VARTEXT1 &&
+ keyinfo->seg->type != HA_KEYTYPE_VARTEXT2))
my_flag= -1;
else
{
@@ -795,6 +799,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
{
if (length > (uint) keyseg->length)
{
+ mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
return 0; /* Error */
}
@@ -810,6 +815,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
("Found too long null packed key: %u of %u at %p",
length, keyseg->length, *page_pos));
DBUG_DUMP("key",(char*) *page_pos,16);
+ mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
return 0;
}
@@ -866,6 +872,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
DBUG_PRINT("error",("Found too long packed key: %u of %u at %p",
length, keyseg->length, *page_pos));
DBUG_DUMP("key",(char*) *page_pos,16);
+ mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
return 0; /* Error */
}
@@ -879,7 +886,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
continue;
}
if (keyseg->flag &
- (HA_VAR_LENGTH | HA_BLOB_PART | HA_SPACE_PACK))
+ (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
{
uchar *tmp=page;
get_key_length(length,tmp);
@@ -931,6 +938,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
DBUG_PRINT("error",("Found too long binary packed key: %u of %u at %p",
length, keyinfo->maxlength, *page_pos));
DBUG_DUMP("key",(char*) *page_pos,16);
+ mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0); /* Wrong key */
}
@@ -954,7 +962,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
if (!(*key++ = *from++))
continue; /* Null part */
}
- if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART | HA_SPACE_PACK))
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
{
/* Get length of dynamic length key part */
if (from == from_end) { from=page; from_end=page_end; }
@@ -992,6 +1000,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
if (from_end != page_end)
{
DBUG_PRINT("error",("Error when unpacking key"));
+ mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0); /* Error */
}
@@ -1026,6 +1035,7 @@ uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
*return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key);
if (*return_key_length == 0)
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0);
}
@@ -1063,6 +1073,7 @@ static my_bool _mi_get_prev_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
*return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key);
if (*return_key_length == 0)
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(1);
}
@@ -1103,6 +1114,7 @@ uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
if (*return_key_length == 0)
{
DBUG_PRINT("error",("Couldn't find last key: page: %p", page));
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0);
}
@@ -1129,7 +1141,7 @@ uint _mi_keylength(MI_KEYDEF *keyinfo, register uchar *key)
if (keyseg->flag & HA_NULL_PART)
if (!*key++)
continue;
- if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH))
+ if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART))
{
uint length;
get_key_length(length,key);
@@ -1161,7 +1173,7 @@ uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key,
if (keyseg->flag & HA_NULL_PART)
if (!*key++)
continue;
- if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH))
+ if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART))
{
uint length;
get_key_length(length,key);
@@ -1293,8 +1305,10 @@ int _mi_search_first(register MI_INFO *info, register MI_KEYDEF *keyinfo,
page=info->buff+2+nod_flag;
} while ((pos=_mi_kpos(nod_flag,page)) != HA_OFFSET_ERROR);
- info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,
- info->lastkey);
+ if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,
+ info->lastkey)))
+ DBUG_RETURN(-1); /* Crashed */
+
info->int_keypos=page; info->int_maxpos=info->buff+mi_getint(info->buff)-1;
info->int_nod_flag=nod_flag;
info->int_keytree_version=keyinfo->version;
@@ -1429,7 +1443,8 @@ _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,uchar *next_key,
sort_order=0;
if ((keyinfo->flag & HA_FULLTEXT) &&
((keyseg->type == HA_KEYTYPE_TEXT) ||
- (keyseg->type == HA_KEYTYPE_VARTEXT)) &&
+ (keyseg->type == HA_KEYTYPE_VARTEXT1) ||
+ (keyseg->type == HA_KEYTYPE_VARTEXT2)) &&
!use_strnxfrm(keyseg->charset))
sort_order=keyseg->charset->sort_order;
diff --git a/myisam/mi_static.c b/myisam/mi_static.c
index 9725c120f44..fc585eb5543 100644
--- a/myisam/mi_static.c
+++ b/myisam/mi_static.c
@@ -31,14 +31,13 @@ uchar NEAR myisam_pack_file_magic[]=
my_string myisam_log_filename=(char*) "myisam.log";
File myisam_log_file= -1;
uint myisam_quick_table_bits=9;
-uint myisam_block_size=MI_KEY_BLOCK_LENGTH; /* Best by test */
+ulong myisam_block_size= MI_KEY_BLOCK_LENGTH; /* Best by test */
my_bool myisam_flush=0, myisam_delay_key_write=0, myisam_single_user=0;
#if defined(THREAD) && !defined(DONT_USE_RW_LOCKS)
-my_bool myisam_concurrent_insert=1;
+ulong myisam_concurrent_insert= 2;
#else
-my_bool myisam_concurrent_insert=0;
+ulong myisam_concurrent_insert= 0;
#endif
-my_off_t myisam_max_extra_temp_length= (my_off_t)MI_MAX_TEMP_LENGTH;
my_off_t myisam_max_temp_length= MAX_FILE_SIZE;
ulong myisam_bulk_insert_tree_size=8192*1024;
ulong myisam_data_pointer_size=4;
diff --git a/myisam/mi_statrec.c b/myisam/mi_statrec.c
index 8f5cde45e24..42352f63c66 100644
--- a/myisam/mi_statrec.c
+++ b/myisam/mi_statrec.c
@@ -23,7 +23,8 @@ int _mi_write_static_record(MI_INFO *info, const byte *record)
{
uchar temp[8]; /* max pointer length */
- if (info->s->state.dellink != HA_OFFSET_ERROR)
+ if (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end)
{
my_off_t filepos=info->s->state.dellink;
info->rec_cache.seek_not_done=1; /* We have done a seek */
diff --git a/myisam/mi_test1.c b/myisam/mi_test1.c
index 77c4d3dfbad..5727c699469 100644
--- a/myisam/mi_test1.c
+++ b/myisam/mi_test1.c
@@ -75,11 +75,11 @@ static int run_test(const char *filename)
recinfo[1].length= (key_field == FIELD_BLOB ? 4+mi_portable_sizeof_char_ptr :
key_length);
if (key_field == FIELD_VARCHAR)
- recinfo[1].length+=2;
+ recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);;
recinfo[2].type=extra_field;
recinfo[2].length= (extra_field == FIELD_BLOB ? 4 + mi_portable_sizeof_char_ptr : 24);
if (extra_field == FIELD_VARCHAR)
- recinfo[2].length+=2;
+ recinfo[2].length+= HA_VARCHAR_PACKLENGTH(recinfo[2].length);
if (opt_unique)
{
recinfo[3].type=FIELD_CHECK;
@@ -88,6 +88,9 @@ static int run_test(const char *filename)
rec_length=recinfo[0].length+recinfo[1].length+recinfo[2].length+
recinfo[3].length;
+ if (key_type == HA_KEYTYPE_VARTEXT1 &&
+ key_length > 255)
+ key_type= HA_KEYTYPE_VARTEXT2;
/* Define a key over the first column */
keyinfo[0].seg=keyseg;
@@ -134,7 +137,7 @@ static int run_test(const char *filename)
uniqueseg[1].flag|= HA_BLOB_PART;
}
else if (extra_field == FIELD_VARCHAR)
- uniqueseg[1].flag|= HA_VAR_LENGTH;
+ uniqueseg[1].flag|= HA_VAR_LENGTH_PART;
}
else
uniques=0;
@@ -330,7 +333,8 @@ static void create_key_part(char *key,uint rownr)
{
sprintf(key,"%*d",keyinfo[0].seg[0].length,rownr);
}
- else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT)
+ else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT1 ||
+ keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT2)
{ /* Alpha record */
/* Create a key that may be easily packed */
bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B');
@@ -372,7 +376,7 @@ static void create_key(char *key,uint rownr)
}
*key++=0;
}
- if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH))
+ if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
{
uint tmp;
create_key_part(key+2,rownr);
@@ -410,11 +414,14 @@ static void create_record(char *record,uint rownr)
}
else if (recinfo[1].type == FIELD_VARCHAR)
{
- uint tmp;
- create_key_part(pos+2,rownr);
- tmp=strlen(pos+2);
- int2store(pos,tmp);
- pos+=recinfo[1].length;
+ uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1);
+ create_key_part(pos+pack_length,rownr);
+ tmp= strlen(pos+pack_length);
+ if (pack_length == 1)
+ *(uchar*) pos= (uchar) tmp;
+ else
+ int2store(pos,tmp);
+ pos+= recinfo[1].length;
}
else
{
@@ -434,10 +441,13 @@ static void create_record(char *record,uint rownr)
}
else if (recinfo[2].type == FIELD_VARCHAR)
{
- uint tmp;
- sprintf(pos+2,"... row: %d", rownr);
- tmp=strlen(pos+2);
- int2store(pos,tmp);
+ uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1);
+ sprintf(pos+pack_length, "... row: %d", rownr);
+ tmp= strlen(pos+pack_length);
+ if (pack_length == 1)
+ *(uchar*) pos= (uchar) tmp;
+ else
+ int2store(pos,tmp);
}
else
{
@@ -461,19 +471,25 @@ static void update_record(char *record)
ptr=blob_key;
memcpy_fixed(pos+4,&ptr,sizeof(char*)); /* Store pointer to new key */
if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM)
- my_casedn(default_charset_info,blob_key,length);
+ default_charset_info->cset->casedn(default_charset_info,
+ blob_key, length, blob_key, length);
pos+=recinfo[1].length;
}
else if (recinfo[1].type == FIELD_VARCHAR)
{
- uint length=uint2korr(pos);
- my_casedn(default_charset_info,pos+2,length);
+ uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1);
+ uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos);
+ default_charset_info->cset->casedn(default_charset_info,
+ pos + pack_length, length,
+ pos + pack_length, length);
pos+=recinfo[1].length;
}
else
{
if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM)
- my_casedn(default_charset_info,pos,keyinfo[0].seg[0].length);
+ default_charset_info->cset->casedn(default_charset_info,
+ pos, keyinfo[0].seg[0].length,
+ pos, keyinfo[0].seg[0].length);
pos+=recinfo[1].length;
}
@@ -493,10 +509,14 @@ static void update_record(char *record)
else if (recinfo[2].type == FIELD_VARCHAR)
{
/* Second field is longer than 10 characters */
- uint length=uint2korr(pos);
- bfill(pos+2+length,recinfo[2].length-length-2,'.');
- length=recinfo[2].length-2;
- int2store(pos,length);
+ uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1);
+ uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos);
+ bfill(pos+pack_length+length,recinfo[2].length-length-pack_length,'.');
+ length=recinfo[2].length-pack_length;
+ if (pack_length == 1)
+ *(uchar*) pos= (uchar) length;
+ else
+ int2store(pos,length);
}
else
{
@@ -519,12 +539,12 @@ static struct my_option my_long_options[] =
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"insert_rows", 'i', "Undocumented", (gptr*) &insert_count,
(gptr*) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0},
- {"key_alpha", 'a', "Undocumented",
+ {"key_alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"key_binary_pack", 'B', "Undocumented",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"key_blob", 'b', "Undocumented",
- 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"key_cache", 'K', "Undocumented", (gptr*) &key_cacheing,
(gptr*) &key_cacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"key_length", 'k', "Undocumented", (gptr*) &key_length, (gptr*) &key_length,
@@ -535,9 +555,9 @@ static struct my_option my_long_options[] =
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"key_space_pack", 'p', "Undocumented",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
- {"key_varchar", 'w', "Undocumented",
+ {"key_varchar", 'w', "Test VARCHAR keys",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
- {"null_fields", 'N', "Undocumented",
+ {"null_fields", 'N', "Define fields with NULL",
(gptr*) &null_fields, (gptr*) &null_fields, 0, GET_BOOL, NO_ARG,
0, 0, 0, 0, 0, 0},
{"row_fixed_size", 'S', "Undocumented",
@@ -604,7 +624,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
key_field=FIELD_BLOB; /* blob key */
extra_field= FIELD_BLOB;
pack_seg|= HA_BLOB_PART;
- key_type= HA_KEYTYPE_VARTEXT;
+ key_type= HA_KEYTYPE_VARTEXT1;
break;
case 'k':
if (key_length < 4 || key_length > MI_MAX_KEY_LENGTH)
@@ -616,11 +636,11 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
case 'w':
key_field=FIELD_VARCHAR; /* varchar keys */
extra_field= FIELD_VARCHAR;
- key_type= HA_KEYTYPE_VARTEXT;
- pack_seg|= HA_VAR_LENGTH;
+ key_type= HA_KEYTYPE_VARTEXT1;
+ pack_seg|= HA_VAR_LENGTH_PART;
create_flag|= HA_PACK_RECORD;
break;
- case 'K': /* Use key cacheing */
+ case 'K': /* Use key cacheing */
key_cacheing=1;
break;
case 'V':
diff --git a/myisam/mi_test3.c b/myisam/mi_test3.c
index 27d23317b5c..be4277cc65c 100644
--- a/myisam/mi_test3.c
+++ b/myisam/mi_test3.c
@@ -67,6 +67,7 @@ int main(int argc,char **argv)
bzero((char*) keyinfo,sizeof(keyinfo));
bzero((char*) recinfo,sizeof(recinfo));
+ bzero((char*) keyseg,sizeof(keyseg));
keyinfo[0].seg= &keyseg[0][0];
keyinfo[0].seg[0].start=0;
keyinfo[0].seg[0].length=8;
diff --git a/myisam/mi_test_all.res b/myisam/mi_test_all.res
index 94355bf1aa2..16b517d3f76 100644
--- a/myisam/mi_test_all.res
+++ b/myisam/mi_test_all.res
@@ -1,3 +1,6 @@
+myisamchk: MyISAM file test1
+myisamchk: warning: Size of indexfile is: 1024 Should be: 2048
+MyISAM-table 'test1' is usable but should be fixed
mi_test2 -s -L -K -R1 -m2000 ; Should give error 135
Error: 135 in write at record: 1105
got error: 135 when using MyISAM-database
@@ -5,46 +8,46 @@ myisamchk: MyISAM file test2
myisamchk: warning: Datafile is almost full, 65532 of 65534 used
MyISAM-table 'test2' is usable but should be fixed
Commands Used count Errors Recover errors
-open 17 0 0
-write 850 0 0
-update 85 0 0
-delete 850 0 0
-close 17 0 0
-extra 102 0 0
-Total 1921 0 0
+open 1 0 0
+write 50 0 0
+update 5 0 0
+delete 50 0 0
+close 1 0 0
+extra 6 0 0
+Total 113 0 0
Commands Used count Errors Recover errors
-open 18 0 0
-write 900 0 0
-update 90 0 0
-delete 900 0 0
-close 18 0 0
-extra 108 0 0
-Total 2034 0 0
+open 2 0 0
+write 100 0 0
+update 10 0 0
+delete 100 0 0
+close 2 0 0
+extra 12 0 0
+Total 226 0 0
-real 0m1.054s
-user 0m0.410s
-sys 0m0.640s
+real 0m0.791s
+user 0m0.137s
+sys 0m0.117s
-real 0m1.077s
-user 0m0.550s
-sys 0m0.530s
+real 0m0.659s
+user 0m0.252s
+sys 0m0.102s
-real 0m1.100s
-user 0m0.420s
-sys 0m0.680s
+real 0m0.571s
+user 0m0.188s
+sys 0m0.098s
-real 0m0.783s
-user 0m0.590s
-sys 0m0.200s
+real 0m1.111s
+user 0m0.236s
+sys 0m0.037s
-real 0m0.764s
-user 0m0.560s
-sys 0m0.210s
+real 0m0.621s
+user 0m0.242s
+sys 0m0.022s
-real 0m0.699s
-user 0m0.570s
-sys 0m0.130s
+real 0m0.698s
+user 0m0.248s
+sys 0m0.021s
-real 0m0.991s
-user 0m0.630s
-sys 0m0.350s
+real 0m0.683s
+user 0m0.265s
+sys 0m0.079s
diff --git a/myisam/mi_unique.c b/myisam/mi_unique.c
index ad685f4cbdc..34f5f595f30 100644
--- a/myisam/mi_unique.c
+++ b/myisam/mi_unique.c
@@ -64,7 +64,12 @@ my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, byte *record,
}
-/* Calculate a hash for a row */
+/*
+ Calculate a hash for a row
+
+ TODO
+ Add support for bit fields
+*/
ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record)
{
@@ -93,10 +98,12 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record)
}
}
pos= record+keyseg->start;
- if (keyseg->flag & HA_VAR_LENGTH)
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
{
- uint tmp_length=uint2korr(pos);
- pos+=2; /* Skip VARCHAR length */
+ uint pack_length= keyseg->bit_start;
+ uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos :
+ uint2korr(pos));
+ pos+= pack_length; /* Skip VARCHAR length */
set_if_smaller(length,tmp_length);
}
else if (keyseg->flag & HA_BLOB_PART)
@@ -107,7 +114,8 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record)
length=tmp_length; /* The whole blob */
}
end= pos+length;
- if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT)
+ if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
+ type == HA_KEYTYPE_VARTEXT2)
{
keyseg->charset->coll->hash_sort(keyseg->charset,
(const uchar*) pos, length, &seed1,
@@ -123,9 +131,17 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record)
return crc;
}
- /*
- Returns 0 if both rows have equal unique value
- */
+
+/*
+ compare unique key for two rows
+
+ TODO
+ Add support for bit fields
+
+ RETURN
+ 0 if both rows have equal unique value
+ # Rows are different
+*/
int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b,
my_bool null_are_equal)
@@ -136,7 +152,8 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b,
for (keyseg=def->seg ; keyseg < def->end ; keyseg++)
{
enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
- uint length=keyseg->length;
+ uint a_length, b_length;
+ a_length= b_length= keyseg->length;
/* If part is NULL it's regarded as different */
if (keyseg->null_bit)
@@ -154,43 +171,59 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b,
}
pos_a= a+keyseg->start;
pos_b= b+keyseg->start;
- if (keyseg->flag & HA_VAR_LENGTH)
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
{
- uint tmp_length=uint2korr(pos_a);
- if (tmp_length != uint2korr(pos_b))
- return 1;
- pos_a+=2; /* Skip VARCHAR length */
- pos_b+=2;
- set_if_smaller(length,tmp_length);
+ uint pack_length= keyseg->bit_start;
+ if (pack_length == 1)
+ {
+ a_length= (uint) *(uchar*) pos_a++;
+ b_length= (uint) *(uchar*) pos_b++;
+ }
+ else
+ {
+ a_length= uint2korr(pos_a);
+ b_length= uint2korr(pos_b);
+ pos_a+= 2; /* Skip VARCHAR length */
+ pos_b+= 2;
+ }
+ set_if_smaller(a_length, keyseg->length); /* Safety */
+ set_if_smaller(b_length, keyseg->length); /* safety */
}
else if (keyseg->flag & HA_BLOB_PART)
{
- /* Only compare 'length' characters if length<> 0 */
- uint a_length= _mi_calc_blob_length(keyseg->bit_start,pos_a);
- uint b_length= _mi_calc_blob_length(keyseg->bit_start,pos_b);
+ /* Only compare 'length' characters if length != 0 */
+ a_length= _mi_calc_blob_length(keyseg->bit_start,pos_a);
+ b_length= _mi_calc_blob_length(keyseg->bit_start,pos_b);
/* Check that a and b are of equal length */
- if (length && a_length > length)
- a_length=length;
- if (!length || length > b_length)
- length=b_length;
- if (length != a_length)
- return 1;
- /* Both strings are at least 'length' long */
+ if (keyseg->length)
+ {
+ /*
+ This is used in some cases when we are not interested in comparing
+ the whole length of the blob.
+ */
+ set_if_smaller(a_length, keyseg->length);
+ set_if_smaller(b_length, keyseg->length);
+ }
memcpy_fixed((byte*) &pos_a,pos_a+keyseg->bit_start,sizeof(char*));
memcpy_fixed((byte*) &pos_b,pos_b+keyseg->bit_start,sizeof(char*));
}
- if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT)
+ if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
+ type == HA_KEYTYPE_VARTEXT2)
{
- if (mi_compare_text(keyseg->charset, (uchar *) pos_a, length,
- (uchar *) pos_b, length, 0, 0))
- return 1;
+ if (mi_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
+ (uchar *) pos_b, b_length, 0, 1))
+ return 1;
}
else
{
- end= pos_a+length;
+ if (a_length != b_length)
+ return 1;
+ end= pos_a+a_length;
while (pos_a != end)
+ {
if (*pos_a++ != *pos_b++)
return 1;
+ }
}
}
return 0;
diff --git a/myisam/mi_update.c b/myisam/mi_update.c
index f62be133ed9..ab23f2e6da9 100644
--- a/myisam/mi_update.c
+++ b/myisam/mi_update.c
@@ -34,6 +34,9 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec)
LINT_INIT(changed);
LINT_INIT(old_checksum);
+ DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_usage",
+ mi_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
if (!(info->update & HA_STATE_AKTIV))
{
DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND);
@@ -84,7 +87,7 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec)
changed=0;
for (i=0 ; i < share->base.keys ; i++)
{
- if (((ulonglong) 1 << i) & share->state.key_map)
+ if (mi_is_key_active(share->state.key_map, i))
{
if (share->keyinfo[i].flag & HA_FULLTEXT )
{
@@ -205,7 +208,10 @@ err:
} while (i-- != 0);
}
else
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info);
+ }
info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED |
key_changed);
@@ -214,6 +220,9 @@ err:
VOID(_mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
allow_break(); /* Allow SIGHUP & SIGINT */
if (save_errno == HA_ERR_KEY_NOT_FOUND)
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
save_errno=HA_ERR_CRASHED;
+ }
DBUG_RETURN(my_errno=save_errno);
} /* mi_update */
diff --git a/myisam/mi_write.c b/myisam/mi_write.c
index cd9e73fba22..c8f9aa84a41 100644
--- a/myisam/mi_write.c
+++ b/myisam/mi_write.c
@@ -52,6 +52,9 @@ int mi_write(MI_INFO *info, byte *record)
DBUG_ENTER("mi_write");
DBUG_PRINT("enter",("isam: %d data: %d",info->s->kfile,info->dfile));
+ DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_usage",
+ mi_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
if (share->options & HA_OPTION_READ_ONLY_DATA)
{
DBUG_RETURN(my_errno=EACCES);
@@ -64,7 +67,8 @@ int mi_write(MI_INFO *info, byte *record)
MYF(MY_SEEK_NOT_DONE) | info->lock_wait))
goto err;
#endif
- filepos= ((share->state.dellink != HA_OFFSET_ERROR) ?
+ filepos= ((share->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end) ?
share->state.dellink :
info->state->data_file_length);
@@ -97,7 +101,7 @@ int mi_write(MI_INFO *info, byte *record)
buff=info->lastkey2;
for (i=0 ; i < share->base.keys ; i++)
{
- if (((ulonglong) 1 << i) & share->state.key_map)
+ if (mi_is_key_active(share->state.key_map, i))
{
bool local_lock_tree= (lock_tree &&
!(info->bulk_insert &&
@@ -171,7 +175,7 @@ err:
info->errkey= (int) i;
while ( i-- > 0)
{
- if (((ulonglong) 1 << i) & share->state.key_map)
+ if (mi_is_key_active(share->state.key_map, i))
{
bool local_lock_tree= (lock_tree &&
!(info->bulk_insert &&
@@ -203,7 +207,10 @@ err:
}
}
else
+ {
+ mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info);
+ }
info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED);
my_errno=save_errno;
err2:
@@ -249,7 +256,7 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key,
comp_flag=SEARCH_BIGGER; /* Put after same key */
else if (keyinfo->flag & (HA_NOSAME|HA_FULLTEXT))
{
- comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No dupplicates */
+ comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */
if (keyinfo->flag & HA_NULL_ARE_EQUAL)
comp_flag|= SEARCH_NULL_ARE_EQUAL;
}
@@ -348,6 +355,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
dupp_key_pos=_mi_dpos(info,0,keybuff+tmp_key_length);
else
dupp_key_pos= HA_OFFSET_ERROR;
+
if (keyinfo->flag & HA_FULLTEXT)
{
uint off;
@@ -478,6 +486,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{
if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH)
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(-1);
}
@@ -487,6 +496,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{
if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH)
{
+ mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(-1);
}
@@ -582,6 +592,7 @@ int _mi_split_page(register MI_INFO *info, register MI_KEYDEF *keyinfo,
&after_key);
if (!key_pos)
DBUG_RETURN(-1);
+
length=(uint) (key_pos-buff);
a_length=mi_getint(buff);
mi_putint(buff,length,nod_flag);
@@ -602,6 +613,7 @@ int _mi_split_page(register MI_INFO *info, register MI_KEYDEF *keyinfo,
/* Store new page */
if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff))
DBUG_RETURN(-1);
+
t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar *) 0,
(uchar*) 0, (uchar*) 0,
key_buff, &s_temp);
@@ -708,6 +720,7 @@ static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page,
memcpy(key, key_buff, length); /* previous key */
if (!(length=(*keyinfo->get_key)(keyinfo,0,&page,key_buff)))
{
+ mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0);
}
@@ -931,20 +944,21 @@ int mi_init_bulk_insert(MI_INFO *info, ulong cache_size, ha_rows rows)
MI_KEYDEF *key=share->keyinfo;
bulk_insert_param *params;
uint i, num_keys, total_keylength;
- ulonglong key_map=0;
+ ulonglong key_map;
DBUG_ENTER("_mi_init_bulk_insert");
DBUG_PRINT("enter",("cache_size: %lu", cache_size));
DBUG_ASSERT(!info->bulk_insert &&
(!rows || rows >= MI_MIN_ROWS_TO_USE_BULK_INSERT));
+ mi_clear_all_keys_active(key_map);
for (i=total_keylength=num_keys=0 ; i < share->base.keys ; i++)
{
- if (!(key[i].flag & HA_NOSAME) && share->base.auto_key != i+1
- && test(share->state.key_map & ((ulonglong) 1 << i)))
+ if (! (key[i].flag & HA_NOSAME) && (share->base.auto_key != i + 1) &&
+ mi_is_key_active(share->state.key_map, i))
{
num_keys++;
- key_map |=((ulonglong) 1 << i);
+ mi_set_key_active(key_map, i);
total_keylength+=key[i].maxlength+TREE_ELEMENT_EXTRA_SIZE;
}
}
@@ -968,7 +982,7 @@ int mi_init_bulk_insert(MI_INFO *info, ulong cache_size, ha_rows rows)
params=(bulk_insert_param *)(info->bulk_insert+share->base.keys);
for (i=0 ; i < share->base.keys ; i++)
{
- if (test(key_map & ((ulonglong) 1 << i)))
+ if (mi_is_key_active(key_map, i))
{
params->info=info;
params->keynr=i;
diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c
index 2ffc491fc91..10308408b1f 100644
--- a/myisam/myisamchk.c
+++ b/myisam/myisamchk.c
@@ -50,7 +50,7 @@ static int stopwords_inited= 0;
static MY_TMPDIR myisamchk_tmpdir;
static const char *type_names[]=
-{ "?","char","binary", "short", "long", "float",
+{ "impossible","char","binary", "short", "long", "float",
"double","number","unsigned short",
"unsigned long","longlong","ulonglong","int24",
"uint24","int8","varchar", "varbin","?",
@@ -705,8 +705,7 @@ get_one_option(int optid,
fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
exit(1);
}
- check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL :
- MI_STATS_METHOD_NULLS_NOT_EQUAL;
+ check_param.stats_method= method-1;
break;
}
#ifdef DEBUG /* Only useful if debugging */
@@ -905,8 +904,8 @@ static int myisamchk(MI_CHECK *param, my_string filename)
MI_STATE_INFO_SIZE ||
mi_uint2korr(share->state.header.base_info_length) !=
MI_BASE_INFO_SIZE ||
- ((param->keys_in_use & ~share->state.key_map) &
- (((ulonglong) 1L << share->base.keys)-1)) ||
+ mi_is_any_intersect_keys_active(param->keys_in_use, share->base.keys,
+ ~share->state.key_map) ||
test_if_almost_full(info) ||
info->s->state.header.file_version[3] != myisam_file_magic[3] ||
(set_collation &&
@@ -973,8 +972,8 @@ static int myisamchk(MI_CHECK *param, my_string filename)
if (param->testflag & T_REP_ANY)
{
ulonglong tmp=share->state.key_map;
- share->state.key_map= (((ulonglong) 1 << share->base.keys)-1)
- & param->keys_in_use;
+ mi_copy_keys_active(share->state.key_map, share->base.keys,
+ param->keys_in_use);
if (tmp != share->state.key_map)
info->update|=HA_STATE_CHANGED;
}
@@ -995,7 +994,7 @@ static int myisamchk(MI_CHECK *param, my_string filename)
if (!error)
{
if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) &&
- (share->state.key_map ||
+ (mi_is_any_key_active(share->state.key_map) ||
(rep_quick && !param->keys_in_use && !recreate)) &&
mi_test_if_sort_rep(info, info->state->records,
info->s->state.key_map,
@@ -1071,7 +1070,7 @@ static int myisamchk(MI_CHECK *param, my_string filename)
llstr(info->state->records,llbuff),
llstr(info->state->del,llbuff2));
error =chk_status(param,info);
- share->state.key_map &=param->keys_in_use;
+ mi_intersect_keys_active(share->state.key_map, param->keys_in_use);
error =chk_size(param,info);
if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE)))
error|=chk_del(param, info,param->testflag);
@@ -1300,7 +1299,7 @@ static void descript(MI_CHECK *param, register MI_INFO *info, my_string name)
}
printf("Recordlength: %13d\n",(int) share->base.pack_reclength);
- if (share->state.key_map != (((ulonglong) 1 << share->base.keys) -1))
+ if (! mi_is_all_keys_active(share->state.key_map, share->base.keys))
{
longlong2str(share->state.key_map,buff,2);
printf("Using only keys '%s' of %d possibly keys\n",
@@ -1482,7 +1481,7 @@ static int mi_sort_records(MI_CHECK *param,
temp_buff=0;
new_file= -1;
- if (!(((ulonglong) 1 << sort_key) & share->state.key_map))
+ if (! mi_is_key_active(share->state.key_map, sort_key))
{
mi_check_print_warning(param,
"Can't sort table '%s' on key %d; No such key",
@@ -1733,11 +1732,11 @@ err:
sorting
*/
-static my_bool not_killed= 0;
+static int not_killed= 0;
-volatile my_bool *killed_ptr(MI_CHECK *param __attribute__((unused)))
+volatile int *killed_ptr(MI_CHECK *param __attribute__((unused)))
{
- return &not_killed; /* always NULL */
+ return &not_killed; /* always NULL */
}
/* print warnings and errors */
diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h
index 93a7bf96f59..6b040f2ef0e 100644
--- a/myisam/myisamdef.h
+++ b/myisam/myisamdef.h
@@ -273,6 +273,7 @@ struct st_myisam_info {
uint preload_buff_size; /* When preloading indexes */
myf lock_wait; /* is 0 or MY_DONT_WAIT */
my_bool was_locked; /* Was locked in panic */
+ my_bool append_insert_at_end; /* Set if concurrent insert */
my_bool quick_mode;
my_bool page_changed; /* If info->buff can't be used for rnext */
my_bool buff_used; /* If info->buff has to be reread for rnext */
@@ -358,6 +359,8 @@ typedef struct st_mi_sort_param
#define mi_mark_crashed_on_repair(x) { (x)->s->state.changed|=STATE_CRASHED|STATE_CRASHED_ON_REPAIR ; (x)->update|= HA_STATE_CHANGED; }
#define mi_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED)
#define mi_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR)
+#define mi_print_error(SHARE, ERRNO) \
+ mi_report_error((ERRNO), (SHARE)->index_file_name)
/* Functions to store length of space packed keys, VARCHAR or BLOB keys */
@@ -669,6 +672,7 @@ extern void _myisam_log_command(enum myisam_log_commands command,
extern void _myisam_log_record(enum myisam_log_commands command,MI_INFO *info,
const byte *record,my_off_t filepos,
int result);
+extern void mi_report_error(int errcode, const char *file_name);
extern my_bool _mi_memmap_file(MI_INFO *info);
extern void _mi_unmap_file(MI_INFO *info);
extern uint save_pack_length(uint version, byte *block_buff, ulong length);
@@ -676,10 +680,10 @@ extern uint read_pack_length(uint version, const uchar *buf, ulong *length);
extern uint calc_pack_length(uint version, ulong length);
uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite);
-char *mi_state_info_read(char *ptr, MI_STATE_INFO *state);
+char *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state);
uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead);
uint mi_base_info_write(File file, MI_BASE_INFO *base);
-char *my_n_base_info_read(char *ptr, MI_BASE_INFO *base);
+char *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base);
int mi_keyseg_write(File file, const HA_KEYSEG *keyseg);
char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg);
uint mi_keydef_write(File file, MI_KEYDEF *keydef);
@@ -703,7 +707,7 @@ int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def,
const byte *record, my_off_t pos);
int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b,
my_bool null_are_equal);
-void mi_get_status(void* param);
+void mi_get_status(void* param, int concurrent_insert);
void mi_update_status(void* param);
void mi_copy_status(void* to,void *from);
my_bool mi_check_status(void* param);
@@ -716,7 +720,7 @@ int mi_open_keyfile(MYISAM_SHARE *share);
void mi_setup_functions(register MYISAM_SHARE *share);
/* Functions needed by mi_check */
-volatile my_bool *killed_ptr(MI_CHECK *param);
+volatile int *killed_ptr(MI_CHECK *param);
void mi_check_print_error _VARARGS((MI_CHECK *param, const char *fmt,...));
void mi_check_print_warning _VARARGS((MI_CHECK *param, const char *fmt,...));
void mi_check_print_info _VARARGS((MI_CHECK *param, const char *fmt,...));
diff --git a/myisam/myisamlog.c b/myisam/myisamlog.c
index dc98d813266..de55b86252c 100644
--- a/myisam/myisamlog.c
+++ b/myisam/myisamlog.c
@@ -810,7 +810,7 @@ static int find_record_with_key(struct file_info *file_info, byte *record)
for (key=0 ; key < info->s->base.keys ; key++)
{
- if ((((ulonglong) 1 << key) & info->s->state.key_map) &&
+ if (mi_is_key_active(info->s->state.key_map, key) &&
info->s->keyinfo[key].flag & HA_NOSAME)
{
VOID(_mi_make_key(info,key,tmp_key,record,0L));
diff --git a/myisam/myisampack.c b/myisam/myisampack.c
index f017e4d23ab..3b091cd6ea2 100644
--- a/myisam/myisampack.c
+++ b/myisam/myisampack.c
@@ -33,10 +33,10 @@
#include <my_getopt.h>
#include <assert.h>
-#if INT_MAX > 32767
-#define BITS_SAVED 32
+#if SIZEOF_LONG_LONG > 4
+#define BITS_SAVED 64
#else
-#define BITS_SAVED 16
+#define BITS_SAVED 32
#endif
#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */
@@ -49,10 +49,10 @@
struct st_file_buffer {
File file;
- char *buffer,*pos,*end;
+ uchar *buffer,*pos,*end;
my_off_t pos_in_file;
int bits;
- uint current_byte;
+ ulonglong bitbucket;
};
struct st_huff_tree;
@@ -69,13 +69,17 @@ typedef struct st_huff_counts {
my_off_t end_space[8];
my_off_t pre_space[8];
my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
- TREE int_tree;
- byte *tree_buff;
- byte *tree_pos;
+ TREE int_tree; /* Tree for detecting distinct column values. */
+ byte *tree_buff; /* Column values, 'field_length' each. */
+ byte *tree_pos; /* Points to end of column values in 'tree_buff'. */
} HUFF_COUNTS;
typedef struct st_huff_element HUFF_ELEMENT;
+/*
+ WARNING: It is crucial for the optimizations in calc_packed_length()
+ that 'count' is the first element of 'HUFF_ELEMENT'.
+*/
struct st_huff_element {
my_off_t count;
union un_element {
@@ -98,7 +102,7 @@ typedef struct st_huff_tree {
my_off_t bytes_packed;
uint tree_pack_length;
uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
- ulong *code;
+ ulonglong *code;
uchar *code_len;
} HUFF_TREE;
@@ -146,7 +150,7 @@ static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
static void make_traverse_code_tree(HUFF_TREE *huff_tree,
HUFF_ELEMENT *element,uint size,
- ulong code);
+ ulonglong code);
static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
my_off_t tot_elements,my_off_t filelength);
static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
@@ -161,7 +165,7 @@ static char *make_old_name(char *new_name,char *old_name);
static void init_file_buffer(File file,pbool read_buffer);
static int flush_buffer(ulong neaded_length);
static void end_file_buffer(void);
-static void write_bits(ulong value,uint bits);
+static void write_bits(ulonglong value, uint bits);
static void flush_bits(void);
static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
ha_checksum crc);
@@ -170,13 +174,23 @@ static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length
static int mrg_close(PACK_MRG_INFO *mrg);
static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf);
static void mrg_reset(PACK_MRG_INFO *mrg);
+#if !defined(DBUG_OFF)
+static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
+static int fakecmp(my_off_t **count1, my_off_t **count2);
+#endif
static int error_on_write=0,test_only=0,verbose=0,silent=0,
write_loop=0,force_pack=0, isamchk_neaded=0;
static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
static my_bool backup, opt_wait;
-static uint tree_buff_length=8196-MALLOC_OVERHEAD;
+/*
+ tree_buff_length is somewhat arbitrary. The bigger it is the better
+ the chance to win in terms of compression factor. On the other hand,
+ this table becomes part of the compressed file header. And its length
+ is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
+*/
+static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
static char tmp_dir[FN_REFLEN]={0},*join_table;
static my_off_t intervall_length;
static ha_checksum glob_crc;
@@ -225,7 +239,8 @@ int main(int argc, char **argv)
}
if (ok && isamchk_neaded && !silent)
puts("Remember to run myisamchk -rq on compressed tables");
- VOID(fflush(stdout)); VOID(fflush(stderr));
+ VOID(fflush(stdout));
+ VOID(fflush(stderr));
free_defaults(default_argv);
my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
exit(error ? 2 : 0);
@@ -264,7 +279,7 @@ static struct my_option my_long_options[] =
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"test", 't', "Don't pack table, only test packing it.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
- {"verbose", 'v', "Write info about progress and packing result.",
+ {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"version", 'V', "Output version information and exit.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
@@ -277,7 +292,8 @@ static struct my_option my_long_options[] =
static void print_version(void)
{
- printf("%s Ver 1.22 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE);
+ VOID(printf("%s Ver 1.23 for %s on %s\n",
+ my_progname, SYSTEM_TYPE, MACHINE_TYPE));
NETWARE_SET_SCREEN_MODE(1);
}
@@ -294,7 +310,7 @@ static void usage(void)
puts("afterwards to update the keys.");
puts("You should give the .MYI file as the filename argument.");
- printf("\nUsage: %s [OPTIONS] filename...\n", my_progname);
+ VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname));
my_print_help(my_long_options);
print_defaults("my", load_default_groups);
my_print_variables(my_long_options);
@@ -323,7 +339,10 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
silent= 1;
break;
case 't':
- test_only= verbose= 1;
+ test_only= 1;
+ /* Avoid to reset 'verbose' if it was already set > 1. */
+ if (! verbose)
+ verbose= 1;
break;
case 'T':
length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
@@ -334,7 +353,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
}
break;
case 'v':
- verbose= 1;
+ verbose++; /* Allow for selecting the level of verbosity. */
silent= 0;
break;
case '#':
@@ -389,7 +408,7 @@ static MI_INFO *open_isam_file(char *name,int mode)
(opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
HA_OPEN_ABORT_IF_LOCKED))))
{
- VOID(fprintf(stderr,"%s gave error %d on open\n",name,my_errno));
+ VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno));
DBUG_RETURN(0);
}
share=isam_file->s;
@@ -397,7 +416,7 @@ static MI_INFO *open_isam_file(char *name,int mode)
{
if (!force_pack)
{
- VOID(fprintf(stderr,"%s is already compressed\n",name));
+ VOID(fprintf(stderr, "%s is already compressed\n", name));
VOID(mi_close(isam_file));
DBUG_RETURN(0);
}
@@ -409,7 +428,7 @@ static MI_INFO *open_isam_file(char *name,int mode)
(share->state.state.records <= 1 ||
share->state.state.data_file_length < 1024))
{
- VOID(fprintf(stderr,"%s is too small to compress\n",name));
+ VOID(fprintf(stderr, "%s is too small to compress\n", name));
VOID(mi_close(isam_file));
DBUG_RETURN(0);
}
@@ -431,9 +450,9 @@ static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count)
if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY)))
goto error;
- mrg->src_file_has_indexes_disabled|= ((mrg->file[i]->s->state.key_map !=
- (((ulonglong) 1) <<
- mrg->file[i]->s->base. keys) - 1));
+ mrg->src_file_has_indexes_disabled|=
+ ! mi_is_all_keys_active(mrg->file[i]->s->state.key_map,
+ mrg->file[i]->s->base.keys);
}
/* Check that files are identical */
for (j=0 ; j < count-1 ; j++)
@@ -455,8 +474,8 @@ static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count)
return 0;
diff_file:
- fprintf(stderr,"%s: Tables '%s' and '%s' are not identical\n",
- my_progname,names[j],names[j+1]);
+ VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
+ my_progname, names[j], names[j+1]));
error:
while (i--)
mi_close(mrg->file[i]);
@@ -527,16 +546,25 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
mrg->records=0;
for (i=0 ; i < mrg->count ; i++)
mrg->records+=mrg->file[i]->s->state.state.records;
+
+ DBUG_PRINT("info", ("Compressing %s: (%lu records)",
+ result_table ? new_name : org_name,
+ (ulong) mrg->records));
if (write_loop || verbose)
{
- printf("Compressing %s: (%lu records)\n",
- result_table ? new_name : org_name,(ulong) mrg->records);
+ VOID(printf("Compressing %s: (%lu records)\n",
+ result_table ? new_name : org_name, (ulong) mrg->records));
}
trees=fields=share->base.fields;
huff_counts=init_huff_count(isam_file,mrg->records);
QUICK_SAFEMALLOC;
+
+ /*
+ Read the whole data file(s) for statistics.
+ */
+ DBUG_PRINT("info", ("- Calculating statistics"));
if (write_loop || verbose)
- printf("- Calculating statistics\n");
+ VOID(printf("- Calculating statistics\n"));
if (get_statistic(mrg,huff_counts))
goto err;
NORMAL_SAFEMALLOC;
@@ -545,29 +573,74 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
old_length+= (mrg->file[i]->s->state.state.data_file_length -
mrg->file[i]->s->state.state.empty);
+ /*
+ Create a global priority queue in preparation for making
+ temporary Huffman trees.
+ */
if (init_queue(&queue,256,0,0,compare_huff_elements,0))
goto err;
+
+ /*
+ Check each column if we should use pre-space-compress, end-space-
+ compress, empty-field-compress or zero-field-compress.
+ */
check_counts(huff_counts,fields,mrg->records);
+
+ /*
+ Build a Huffman tree for each column.
+ */
huff_trees=make_huff_trees(huff_counts,trees);
+
+ /*
+ If the packed lengths of combined columns is less then the sum of
+ the non-combined columns, then create common Huffman trees for them.
+ We do this only for byte compressed columns, not for distinct values
+ compressed columns.
+ */
if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
goto err;
+
+ /*
+ Assign codes to all byte or column values.
+ */
if (make_huff_decode_table(huff_trees,fields))
goto err;
+ /* Prepare a file buffer. */
init_file_buffer(new_file,0);
+
+ /*
+ Reserve space in the target file for the fixed compressed file header.
+ */
file_buffer.pos_in_file=HEAD_LENGTH;
if (! test_only)
VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0)));
+ /*
+ Write field infos: field type, pack type, length bits, tree number.
+ */
write_field_info(huff_counts,fields,used_trees);
+
+ /*
+ Write decode trees.
+ */
if (!(tot_elements=write_huff_tree(huff_trees,trees)))
goto err;
+
+ /*
+ Calculate the total length of the compression info header.
+ This includes the fixed compressed file header, the column compression
+ type descriptions, and the decode trees.
+ */
header_length=(uint) file_buffer.pos_in_file+
(uint) (file_buffer.pos-file_buffer.buffer);
- /* Compress file */
+ /*
+ Compress the source file into the target file.
+ */
+ DBUG_PRINT("info", ("- Compressing file"));
if (write_loop || verbose)
- printf("- Compressing file\n");
+ VOID(printf("- Compressing file\n"));
error=compress_isam_file(mrg,huff_counts);
new_length=file_buffer.pos_in_file;
if (!error && !test_only)
@@ -577,16 +650,28 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
error=my_write(file_buffer.file,buff,sizeof(buff),
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
}
+
+ /*
+ Write the fixed compressed file header.
+ */
if (!error)
error=write_header(mrg,header_length,used_trees,tot_elements,
new_length);
+
+ /* Flush the file buffer. */
end_file_buffer();
+ /* Display statistics. */
+ DBUG_PRINT("info", ("Min record length: %6d Max length: %6d "
+ "Mean total length: %6ld\n",
+ mrg->min_pack_length, mrg->max_pack_length,
+ (ulong) (mrg->records ? (new_length/mrg->records) : 0)));
if (verbose && mrg->records)
- printf("Min record length: %6d Max length: %6d Mean total length: %6ld\n",
- mrg->min_pack_length,mrg->max_pack_length,
- (ulong) (new_length/mrg->records));
+ VOID(printf("Min record length: %6d Max length: %6d "
+ "Mean total length: %6ld\n", mrg->min_pack_length,
+ mrg->max_pack_length, (ulong) (new_length/mrg->records)));
+ /* Close source and target file. */
if (!test_only)
{
error|=my_close(new_file,MYF(MY_WME));
@@ -597,6 +682,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
}
}
+ /* Cleanup. */
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
if (! test_only && ! error)
{
@@ -646,15 +732,16 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
error|=my_close(join_isam_file,MYF(MY_WME));
if (error)
{
- VOID(fprintf(stderr,"Aborting: %s is not compressed\n",org_name));
+ VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name));
VOID(my_delete(new_name,MYF(MY_WME)));
DBUG_RETURN(-1);
}
if (write_loop || verbose)
{
if (old_length)
- printf("%.4g%% \n", (((longlong) (old_length -new_length))*100.0/
- (longlong) old_length));
+ VOID(printf("%.4g%% \n",
+ (((longlong) (old_length - new_length)) * 100.0 /
+ (longlong) old_length)));
else
puts("Empty file saved in compressed format");
}
@@ -667,7 +754,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
if (join_isam_file >= 0)
VOID(my_close(join_isam_file,MYF(0)));
mrg_close(mrg);
- VOID(fprintf(stderr,"Aborted: %s is not compressed\n",org_name));
+ VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name));
DBUG_RETURN(-1);
}
@@ -694,6 +781,12 @@ static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records)
(type == FIELD_NORMAL ||
type == FIELD_SKIP_ZERO))
count[i].max_zero_fill= count[i].field_length;
+ /*
+ For every column initialize a tree, which is used to detect distinct
+ column values. 'int_tree' works together with 'tree_buff' and
+ 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
+ This is accomplished by '-1' as the element size.
+ */
init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL,
NULL);
if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
@@ -765,7 +858,8 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
static_row_size=1;
for (count=huff_counts ; count < end_count ; count++)
{
- if (count->field_type == FIELD_BLOB || count->field_type == FIELD_VARCHAR)
+ if (count->field_type == FIELD_BLOB ||
+ count->field_type == FIELD_VARCHAR)
{
static_row_size=0;
break;
@@ -778,10 +872,13 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
ulong tot_blob_length=0;
if (! error)
{
+ /* glob_crc is a checksum over all bytes of all records. */
if (static_row_size)
glob_crc+=mi_static_checksum(mrg->file[0],record);
else
glob_crc+=mi_checksum(mrg->file[0],record);
+
+ /* Count the incidence of values separately for every column. */
for (pos=record,count=huff_counts ;
count < end_count ;
count++,
@@ -789,15 +886,48 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
{
next_pos=end_pos=(start_pos=pos)+count->field_length;
- /* Put value in tree if there is room for it */
+ /*
+ Put the whole column value in a tree if there is room for it.
+ 'int_tree' is used to quickly check for duplicate values.
+ 'tree_buff' collects as many distinct column values as
+ possible. If the field length is > 1, it is tree_buff_length,
+ else 2 bytes. Each value is 'field_length' bytes big. If there
+ are more distinct column values than fit into the buffer, we
+ give up with this tree. BLOBs and VARCHARs do not have a
+ tree_buff as it can only be used with fixed length columns.
+ For the special case of field length == 1, we handle only the
+ case that there is only one distinct value in the table(s).
+ Otherwise, we can have a maximum of 256 distinct values. This
+ is then handled by the normal Huffman tree build.
+
+ Another limit for collecting distinct column values is the
+ number of values itself. Since we would need to build a
+ Huffman tree for the values, we are limited by the 'IS_OFFSET'
+ constant. This constant expresses a bit which is used to
+ determine if a tree element holds a final value or an offset
+ to a child element. Hence, all values and offsets need to be
+ smaller than 'IS_OFFSET'. A tree element is implemented with
+ two integer values, one for the left branch and one for the
+ right branch. For the extreme case that the first element
+ points to the last element, the number of integers in the tree
+ must be less or equal to IS_OFFSET. So the number of elements
+ must be less or equal to IS_OFFSET / 2.
+
+ WARNING: At first, we insert a pointer into the record buffer
+ as the key for the tree. If we got a new distinct value, which
+ is really inserted into the tree, instead of being counted
+ only, we will copy the column value from the record buffer to
+ 'tree_buff' and adjust the key pointer of the tree accordingly.
+ */
if (count->tree_buff)
{
global_count=count;
if (!(element=tree_insert(&count->int_tree,pos, 0,
count->int_tree.custom_arg)) ||
(element->count == 1 &&
- count->tree_buff + tree_buff_length <
- count->tree_pos + count->field_length) ||
+ (count->tree_buff + tree_buff_length <
+ count->tree_pos + count->field_length)) ||
+ (count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
(count->field_length == 1 &&
count->int_tree.elements_in_tree > 1))
{
@@ -807,10 +937,17 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
}
else
{
+ /*
+ If tree_insert() succeeds, it either creates a new element
+ or increments the counter of an existing element.
+ */
if (element->count == 1)
- { /* New element */
+ {
+ /* Copy the new column value into 'tree_buff'. */
memcpy(count->tree_pos,pos,(size_t) count->field_length);
+ /* Adjust the key pointer in the tree. */
tree_set_pointer(element,count->tree_pos);
+ /* Point behind the last column value so far. */
count->tree_pos+=count->field_length;
}
}
@@ -820,15 +957,21 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
if (count->field_type == FIELD_NORMAL ||
count->field_type == FIELD_SKIP_ENDSPACE)
{
+ /* Ignore trailing space. */
for ( ; end_pos > pos ; end_pos--)
if (end_pos[-1] != ' ')
break;
+ /* Empty fields are just counted. Go to the next record. */
if (end_pos == pos)
{
count->empty_fields++;
count->max_zero_fill=0;
continue;
}
+ /*
+ Count the total of all trailing spaces and the number of
+ short trailing spaces. Remember the longest trailing space.
+ */
length= (uint) (next_pos-end_pos);
count->tot_end_space+=length;
if (length < 8)
@@ -836,18 +979,25 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
if (count->max_end_space < length)
count->max_end_space = length;
}
+
if (count->field_type == FIELD_NORMAL ||
count->field_type == FIELD_SKIP_PRESPACE)
{
+ /* Ignore leading space. */
for (pos=start_pos; pos < end_pos ; pos++)
if (pos[0] != ' ')
break;
+ /* Empty fields are just counted. Go to the next record. */
if (end_pos == pos)
{
count->empty_fields++;
count->max_zero_fill=0;
continue;
}
+ /*
+ Count the total of all leading spaces and the number of
+ short leading spaces. Remember the longest leading space.
+ */
length= (uint) (pos-start_pos);
count->tot_pre_space+=length;
if (length < 8)
@@ -855,6 +1005,8 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
if (count->max_pre_space < length)
count->max_pre_space = length;
}
+
+ /* Calculate pos, end_pos, and max_length for variable length fields. */
if (count->field_type == FIELD_BLOB)
{
uint field_length=count->field_length -mi_portable_sizeof_char_ptr;
@@ -866,50 +1018,128 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
}
else if (count->field_type == FIELD_VARCHAR)
{
- length=uint2korr(start_pos);
- pos=start_pos+2;
- end_pos=start_pos+length;
+ uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
+ length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
+ uint2korr(start_pos));
+ pos= start_pos+pack_length;
+ end_pos= pos+length;
set_if_bigger(count->max_length,length);
}
+
+ /* Evaluate 'max_zero_fill' for short fields. */
if (count->field_length <= 8 &&
(count->field_type == FIELD_NORMAL ||
count->field_type == FIELD_SKIP_ZERO))
{
uint i;
+ /* Zero fields are just counted. Go to the next record. */
if (!memcmp((byte*) start_pos,zero_string,count->field_length))
{
count->zero_fields++;
continue;
}
+ /*
+ max_zero_fill starts with field_length. It is decreased every
+ time a shorter "zero trailer" is found. It is set to zero when
+ an empty field is found (see above). This suggests that the
+ variable should be called 'min_zero_fill'.
+ */
for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
i++) ;
if (i < count->max_zero_fill)
count->max_zero_fill=i;
}
+
+ /* Ignore zero fields and check fields. */
if (count->field_type == FIELD_ZERO ||
count->field_type == FIELD_CHECK)
continue;
+
+ /*
+ Count the incidence of every byte value in the
+ significant field value.
+ */
for ( ; pos < end_pos ; pos++)
count->counts[(uchar) *pos]++;
+
+ /* Step to next field. */
}
+
if (tot_blob_length > max_blob_length)
max_blob_length=tot_blob_length;
record_count++;
if (write_loop && record_count % WRITE_COUNT == 0)
{
- printf("%lu\r",(ulong) record_count); VOID(fflush(stdout));
+ VOID(printf("%lu\r", (ulong) record_count));
+ VOID(fflush(stdout));
}
}
else if (error != HA_ERR_RECORD_DELETED)
{
- fprintf(stderr,"Got error %d while reading rows",error);
+ VOID(fprintf(stderr, "Got error %d while reading rows", error));
break;
}
+
+ /* Step to next record. */
}
if (write_loop)
{
- printf(" \r"); VOID(fflush(stdout));
+ VOID(printf(" \r"));
+ VOID(fflush(stdout));
}
+
+ /*
+ If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
+ codes.
+ */
+ DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
+
+ DBUG_PRINT("info", ("Found the following number of incidents "
+ "of the byte codes:"));
+ if (verbose >= 2)
+ VOID(printf("Found the following number of incidents "
+ "of the byte codes:\n"));
+ for (count= huff_counts ; count < end_count; count++)
+ {
+ uint idx;
+ my_off_t total_count;
+ char llbuf[32];
+
+ DBUG_PRINT("info", ("column: %3u", count - huff_counts + 1));
+ if (verbose >= 2)
+ VOID(printf("column: %3u\n", count - huff_counts + 1));
+ if (count->tree_buff)
+ {
+ DBUG_PRINT("info", ("number of distinct values: %u",
+ (count->tree_pos - count->tree_buff) /
+ count->field_length));
+ if (verbose >= 2)
+ VOID(printf("number of distinct values: %u\n",
+ (count->tree_pos - count->tree_buff) /
+ count->field_length));
+ }
+ total_count= 0;
+ for (idx= 0; idx < 256; idx++)
+ {
+ if (count->counts[idx])
+ {
+ total_count+= count->counts[idx];
+ DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
+ llstr((longlong) count->counts[idx], llbuf)));
+ if (verbose >= 2)
+ VOID(printf("counts[0x%02x]: %12s\n", idx,
+ llstr((longlong) count->counts[idx], llbuf)));
+ }
+ }
+ DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count,
+ llbuf)));
+ if ((verbose >= 2) && total_count)
+ {
+ VOID(printf("total: %12s\n",
+ llstr((longlong) total_count, llbuf)));
+ }
+ }
+
mrg->records=record_count;
mrg->max_blob_length=max_blob_length;
my_afree((gptr) record);
@@ -958,9 +1188,14 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
huff_counts->field_type=FIELD_NORMAL;
huff_counts->pack_type=0;
+ /* Check for zero-filled records (in this column), or zero records. */
if (huff_counts->zero_fields || ! records)
{
my_off_t old_space_count;
+ /*
+ If there are only zero filled records (in this column),
+ or no records at all, we are done.
+ */
if (huff_counts->zero_fields == records)
{
huff_counts->field_type= FIELD_ZERO;
@@ -968,14 +1203,22 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
huff_counts->counts[0]=0;
goto found_pack;
}
+ /* Remeber the number of significant spaces. */
old_space_count=huff_counts->counts[' '];
- huff_counts->counts[' ']+=huff_counts->tot_end_space+
- huff_counts->tot_pre_space +
- huff_counts->empty_fields * huff_counts->field_length;
+ /* Add all leading and trailing spaces. */
+ huff_counts->counts[' ']+= (huff_counts->tot_end_space +
+ huff_counts->tot_pre_space +
+ huff_counts->empty_fields *
+ huff_counts->field_length);
+ /* Check, what the compressed length of this would be. */
old_length=calc_packed_length(huff_counts,0)+records/8;
+ /* Get the number of zero bytes. */
length=huff_counts->zero_fields*huff_counts->field_length;
+ /* Add it to the counts. */
huff_counts->counts[0]+=length;
+ /* Check, what the compressed length of this would be. */
new_length=calc_packed_length(huff_counts,0);
+ /* If the compression without the zeroes would be shorter, we are done. */
if (old_length < new_length && huff_counts->field_length > 1)
{
huff_counts->field_type=FIELD_SKIP_ZERO;
@@ -983,9 +1226,16 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
huff_counts->bytes_packed=old_length- records/8;
goto found_pack;
}
+ /* Remove the insignificant spaces, but keep the zeroes. */
huff_counts->counts[' ']=old_space_count;
}
+ /* Check, what the compressed length of this column would be. */
huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
+
+ /*
+ If there are enough empty records (in this column),
+ treating them specially may pay off.
+ */
if (huff_counts->empty_fields)
{
if (huff_counts->field_length > 2 &&
@@ -1017,6 +1267,11 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
}
}
}
+
+ /*
+ If there are enough trailing spaces (in this column),
+ treating them specially may pay off.
+ */
if (huff_counts->tot_end_space)
{
huff_counts->counts[' ']+=huff_counts->tot_pre_space;
@@ -1026,6 +1281,11 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
goto found_pack;
huff_counts->counts[' ']-=huff_counts->tot_pre_space;
}
+
+ /*
+ If there are enough leading spaces (in this column),
+ treating them specially may pay off.
+ */
if (huff_counts->tot_pre_space)
{
if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
@@ -1055,6 +1315,8 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
{
HUFF_TREE tree;
+ DBUG_EXECUTE_IF("forceintervall",
+ huff_counts->bytes_packed= ~ (my_off_t) 0;);
tree.element_buffer=0;
if (!make_huff_tree(&tree,huff_counts) &&
tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
@@ -1080,14 +1342,27 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
fill_zero_fields++;
field_count[huff_counts->field_type]++;
}
+ DBUG_PRINT("info", ("normal: %3d empty-space: %3d "
+ "empty-zero: %3d empty-fill: %3d",
+ field_count[FIELD_NORMAL],space_fields,
+ field_count[FIELD_SKIP_ZERO],fill_zero_fields));
+ DBUG_PRINT("info", ("pre-space: %3d end-space: %3d "
+ "intervall-fields: %3d zero: %3d",
+ field_count[FIELD_SKIP_PRESPACE],
+ field_count[FIELD_SKIP_ENDSPACE],
+ field_count[FIELD_INTERVALL],
+ field_count[FIELD_ZERO]));
if (verbose)
- printf("\nnormal: %3d empty-space: %3d empty-zero: %3d empty-fill: %3d\npre-space: %3d end-space: %3d intervall-fields: %3d zero: %3d\n",
- field_count[FIELD_NORMAL],space_fields,
- field_count[FIELD_SKIP_ZERO],fill_zero_fields,
- field_count[FIELD_SKIP_PRESPACE],
- field_count[FIELD_SKIP_ENDSPACE],
- field_count[FIELD_INTERVALL],
- field_count[FIELD_ZERO]);
+ VOID(printf("\nnormal: %3d empty-space: %3d "
+ "empty-zero: %3d empty-fill: %3d\n"
+ "pre-space: %3d end-space: %3d "
+ "intervall-fields: %3d zero: %3d\n",
+ field_count[FIELD_NORMAL],space_fields,
+ field_count[FIELD_SKIP_ZERO],fill_zero_fields,
+ field_count[FIELD_SKIP_PRESPACE],
+ field_count[FIELD_SKIP_ENDSPACE],
+ field_count[FIELD_INTERVALL],
+ field_count[FIELD_ZERO]));
DBUG_VOID_RETURN;
}
@@ -1184,8 +1459,24 @@ static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
DBUG_RETURN(huff_tree);
}
- /* Update huff_tree according to huff_counts->counts or
- huff_counts->tree_buff */
+/*
+ Build a Huffman tree.
+
+ SYNOPSIS
+ make_huff_tree()
+ huff_tree The Huffman tree.
+ huff_counts The counts.
+
+ DESCRIPTION
+ Build a Huffman tree according to huff_counts->counts or
+ huff_counts->tree_buff. tree_buff, if non-NULL contains up to
+ tree_buff_length of distinct column values. In that case, whole
+ values can be Huffman encoded instead of single bytes.
+
+ RETURN
+ 0 OK
+ != 0 Error
+*/
static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
{
@@ -1196,12 +1487,14 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
first=last=0;
if (huff_counts->tree_buff)
{
+ /* Calculate the number of distinct values in tree_buff. */
found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
huff_counts->field_length;
first=0; last=found-1;
}
else
{
+ /* Count the number of byte codes found in the column. */
for (i=found=0 ; i < 256 ; i++)
{
if (huff_counts->counts[i])
@@ -1215,6 +1508,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
found=2;
}
+ /* When using 'tree_buff' we can have more that 256 values. */
if (queue.max_elements < found)
{
delete_queue(&queue);
@@ -1222,6 +1516,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
return -1;
}
+ /* Allocate or reallocate an element buffer for the Huffman tree. */
if (!huff_tree->element_buffer)
{
if (!(huff_tree->element_buffer=
@@ -1249,15 +1544,25 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
if (huff_counts->tree_buff)
{
huff_tree->elements=0;
- tree_walk(&huff_counts->int_tree,
- (int (*)(void*, element_count,void*)) save_counts_in_queue,
- (gptr) huff_tree, left_root_right);
huff_tree->tree_pack_length=(1+15+16+5+5+
(huff_tree->char_bits+1)*found+
(huff_tree->offset_bits+1)*
(found-2)+7)/8 +
(uint) (huff_tree->counts->tree_pos-
huff_tree->counts->tree_buff);
+ /*
+ Put a HUFF_ELEMENT into the queue for every distinct column value.
+
+ tree_walk() calls save_counts_in_queue() for every element in
+ 'int_tree'. This takes elements from the target trees element
+ buffer and places references to them into the buffer of the
+ priority queue. We insert in column value order, but the order is
+ in fact irrelevant here. We will establish the correct order
+ later.
+ */
+ tree_walk(&huff_counts->int_tree,
+ (int (*)(void*, element_count,void*)) save_counts_in_queue,
+ (gptr) huff_tree, left_root_right);
}
else
{
@@ -1266,7 +1571,15 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
(huff_tree->char_bits+1)*found+
(huff_tree->offset_bits+1)*
(found-2)+7)/8;
+ /*
+ Put a HUFF_ELEMENT into the queue for every byte code found in the column.
+ The elements are taken from the target trees element buffer.
+ Instead of using queue_insert(), we just place references to the
+ elements into the buffer of the priority queue. We insert in byte
+ value order, but the order is in fact irrelevant here. We will
+ establish the correct order later.
+ */
for (i=first, found=0 ; i <= last ; i++)
{
if (huff_counts->counts[i])
@@ -1278,8 +1591,13 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
queue.root[found]=(byte*) new_huff_el;
}
}
+ /*
+ If there is only a single byte value in this field in all records,
+ add a second element with zero incidence. This is required to enter
+ the loop, which builds the Huffman tree.
+ */
while (found < 2)
- { /* Our huff_trees request at least 2 elements */
+ {
new_huff_el=huff_tree->element_buffer+(found++);
new_huff_el->count=0;
new_huff_el->a.leaf.null=0;
@@ -1290,21 +1608,53 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
queue.root[found]=(byte*) new_huff_el;
}
}
+
+ /* Make a queue from the queue buffer. */
queue.elements=found;
+ /*
+ Make a priority queue from the queue. Construct its index so that we
+ have a partially ordered tree.
+ */
for (i=found/2 ; i > 0 ; i--)
_downheap(&queue,i);
+
+ /* The Huffman algorithm. */
bytes_packed=0; bits_packed=0;
for (i=1 ; i < found ; i++)
{
+ /*
+ Pop the top element from the queue (the one with the least incidence).
+ Popping from a priority queue includes a re-ordering of the queue,
+ to get the next least incidence element to the top.
+ */
a=(HUFF_ELEMENT*) queue_remove(&queue,0);
+ /*
+ Copy the next least incidence element. The queue implementation
+ reserves root[0] for temporary purposes. root[1] is the top.
+ */
b=(HUFF_ELEMENT*) queue.root[1];
+ /* Get a new element from the element buffer. */
new_huff_el=huff_tree->element_buffer+found+i;
+ /* The new element gets the sum of the two least incidence elements. */
new_huff_el->count=a->count+b->count;
+ /*
+ The Huffman algorithm assigns another bit to the code for a byte
+ every time that bytes incidence is combined (directly or indirectly)
+ to a new element as one of the two least incidence elements.
+ This means that one more bit per incidence of that byte is required
+ in the resulting file. So we add the new combined incidence as the
+ number of bits by which the result grows.
+ */
bits_packed+=(uint) (new_huff_el->count & 7);
bytes_packed+=new_huff_el->count/8;
- new_huff_el->a.nod.left=a; /* lesser in left */
+ /* The new element points to its children, lesser in left. */
+ new_huff_el->a.nod.left=a;
new_huff_el->a.nod.right=b;
+ /*
+ Replace the copied top element by the new element and re-order the
+ queue.
+ */
queue.root[1]=(byte*) new_huff_el;
queue_replaced(&queue);
}
@@ -1323,7 +1673,26 @@ static int compare_tree(void* cmp_arg __attribute__((unused)),
return 0;
}
- /* Used by make_huff_tree to save intervall-counts in queue */
+/*
+ Organize distinct column values and their incidences into a priority queue.
+
+ SYNOPSIS
+ save_counts_in_queue()
+ key The column value.
+ count The incidence of this value.
+ tree The Huffman tree to be built later.
+
+ DESCRIPTION
+ We use the element buffer of the targeted tree. The distinct column
+ values are organized in a priority queue first. The Huffman
+ algorithm will later organize the elements into a Huffman tree. For
+ the time being, we just place references to the elements into the
+ queue buffer. The buffer will later be organized into a priority
+ queue.
+
+ RETURN
+ 0
+ */
static int save_counts_in_queue(byte *key, element_count count,
HUFF_TREE *tree)
@@ -1340,8 +1709,23 @@ static int save_counts_in_queue(byte *key, element_count count,
}
- /* Calculate length of file if given counts should be used */
- /* Its actually a faster version of make_huff_tree */
+/*
+ Calculate length of file if given counts should be used.
+
+ SYNOPSIS
+ calc_packed_length()
+ huff_counts The counts for a column of the table(s).
+ add_tree_lenght If the decode tree length should be added.
+
+ DESCRIPTION
+ We need to follow the Huffman algorithm until we know, how many bits
+ are required for each byte code. But we do not need the resulting
+ Huffman tree. Hence, we can leave out some steps which are essential
+ in make_huff_tree().
+
+ RETURN
+ Number of bytes required to compress this table column.
+*/
static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
uint add_tree_lenght)
@@ -1351,6 +1735,23 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
HUFF_ELEMENT element_buffer[256];
DBUG_ENTER("calc_packed_length");
+ /*
+ WARNING: We use a small hack for efficiency: Instead of placing
+ references to HUFF_ELEMENTs into the queue, we just insert
+ references to the counts of the byte codes which appeared in this
+ table column. During the Huffman algorithm they are successively
+ replaced by references to HUFF_ELEMENTs. This works, because
+ HUFF_ELEMENTs have the incidence count at their beginning.
+ Regardless, wether the queue array contains references to counts of
+ type my_off_t or references to HUFF_ELEMENTs which have the count of
+ type my_off_t at their beginning, it always points to a count of the
+ same type.
+
+ Instead of using queue_insert(), we just copy the references into
+ the buffer of the priority queue. We insert in byte value order, but
+ the order is in fact irrelevant here. We will establish the correct
+ order later.
+ */
first=last=0;
for (i=found=0 ; i < 256 ; i++)
{
@@ -1359,31 +1760,73 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
if (! found++)
first=i;
last=i;
+ /* We start with root[1], which is the queues top element. */
queue.root[found]=(byte*) &huff_counts->counts[i];
}
}
if (!found)
DBUG_RETURN(0); /* Empty tree */
+ /*
+ If there is only a single byte value in this field in all records,
+ add a second element with zero incidence. This is required to enter
+ the loop, which follows the Huffman algorithm.
+ */
if (found < 2)
queue.root[++found]=(byte*) &huff_counts->counts[last ? 0 : 1];
+ /* Make a queue from the queue buffer. */
queue.elements=found;
bytes_packed=0; bits_packed=0;
+ /* Add the length of the coding table, which would become part of the file. */
if (add_tree_lenght)
bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
(max_bit(found-1)+1+1)*(found-2) +7)/8;
+
+ /*
+ Make a priority queue from the queue. Construct its index so that we
+ have a partially ordered tree.
+ */
for (i=(found+1)/2 ; i > 0 ; i--)
_downheap(&queue,i);
+
+ /* The Huffman algorithm. */
for (i=0 ; i < found-1 ; i++)
{
- HUFF_ELEMENT *a,*b,*new_huff_el;
- a=(HUFF_ELEMENT*) queue_remove(&queue,0);
- b=(HUFF_ELEMENT*) queue.root[1];
- new_huff_el=element_buffer+i;
- new_huff_el->count=a->count+b->count;
+ my_off_t *a;
+ my_off_t *b;
+ HUFF_ELEMENT *new_huff_el;
+
+ /*
+ Pop the top element from the queue (the one with the least
+ incidence). Popping from a priority queue includes a re-ordering
+ of the queue, to get the next least incidence element to the top.
+ */
+ a= (my_off_t*) queue_remove(&queue, 0);
+ /*
+ Copy the next least incidence element. The queue implementation
+ reserves root[0] for temporary purposes. root[1] is the top.
+ */
+ b= (my_off_t*) queue.root[1];
+ /* Create a new element in a local (automatic) buffer. */
+ new_huff_el= element_buffer + i;
+ /* The new element gets the sum of the two least incidence elements. */
+ new_huff_el->count= *a + *b;
+ /*
+ The Huffman algorithm assigns another bit to the code for a byte
+ every time that bytes incidence is combined (directly or indirectly)
+ to a new element as one of the two least incidence elements.
+ This means that one more bit per incidence of that byte is required
+ in the resulting file. So we add the new combined incidence as the
+ number of bits by which the result grows.
+ */
bits_packed+=(uint) (new_huff_el->count & 7);
bytes_packed+=new_huff_el->count/8;
+ /*
+ Replace the copied top element by the new element and re-order the
+ queue. This successively replaces the references to counts by
+ references to HUFF_ELEMENTs.
+ */
queue.root[1]=(byte*) new_huff_el;
queue_replaced(&queue);
}
@@ -1431,13 +1874,26 @@ static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
}
}
}
+ DBUG_PRINT("info", ("Original trees: %d After join: %d",
+ trees, tree_number));
if (verbose)
- printf("Original trees: %d After join: %d\n",trees,tree_number);
+ VOID(printf("Original trees: %d After join: %d\n", trees, tree_number));
return tree_number; /* Return trees left */
}
- /* Fill in huff_tree decode tables */
+/*
+ Fill in huff_tree encode tables.
+
+ SYNOPSIS
+ make_huff_decode_table()
+ huff_tree An array of HUFF_TREE which are to be encoded.
+ trees The number of HUFF_TREE in the array.
+
+ RETURN
+ 0 success
+ != 0 error
+*/
static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
{
@@ -1448,12 +1904,13 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
{
elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
if (!(huff_tree->code =
- (ulong*) my_malloc(elements*
- (sizeof(ulong)+sizeof(uchar)),
- MYF(MY_WME | MY_ZEROFILL))))
+ (ulonglong*) my_malloc(elements*
+ (sizeof(ulonglong) + sizeof(uchar)),
+ MYF(MY_WME | MY_ZEROFILL))))
return 1;
huff_tree->code_len=(uchar*) (huff_tree->code+elements);
- make_traverse_code_tree(huff_tree,huff_tree->root,32,0);
+ make_traverse_code_tree(huff_tree, huff_tree->root,
+ 8 * sizeof(ulonglong), LL(0));
}
}
return 0;
@@ -1462,28 +1919,90 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
static void make_traverse_code_tree(HUFF_TREE *huff_tree,
HUFF_ELEMENT *element,
- uint size, ulong code)
+ uint size, ulonglong code)
{
uint chr;
if (!element->a.leaf.null)
{
chr=element->a.leaf.element_nr;
- huff_tree->code_len[chr]=(uchar) (32-size);
- huff_tree->code[chr]= (code >> size);
- if (huff_tree->height < 32-size)
- huff_tree->height= 32-size;
+ huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
+ huff_tree->code[chr]= (code >> size);
+ if (huff_tree->height < 8 * sizeof(ulonglong) - size)
+ huff_tree->height= 8 * sizeof(ulonglong) - size;
}
else
{
size--;
make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
- make_traverse_code_tree(huff_tree,element->a.nod.right,size,
- code+((ulong) 1L << size));
+ make_traverse_code_tree(huff_tree, element->a.nod.right, size,
+ code + (((ulonglong) 1) << size));
}
return;
}
+/*
+ Convert a value into binary digits.
+
+ SYNOPSIS
+ bindigits()
+ value The value.
+ length The number of low order bits to convert.
+
+ NOTE
+ The result string is in static storage. It is reused on every call.
+ So you cannot use it twice in one expression.
+
+ RETURN
+ A pointer to a static NUL-terminated string.
+ */
+
+static char *bindigits(ulonglong value, uint bits)
+{
+ static char digits[72];
+ char *ptr= digits;
+ uint idx= bits;
+
+ DBUG_ASSERT(idx < sizeof(digits));
+ while (idx)
+ *(ptr++)= '0' + ((value >> (--idx)) & 1);
+ *ptr= '\0';
+ return digits;
+}
+
+
+/*
+ Convert a value into hexadecimal digits.
+
+ SYNOPSIS
+ hexdigits()
+ value The value.
+
+ NOTE
+ The result string is in static storage. It is reused on every call.
+ So you cannot use it twice in one expression.
+
+ RETURN
+ A pointer to a static NUL-terminated string.
+ */
+
+static char *hexdigits(ulonglong value)
+{
+ static char digits[20];
+ char *ptr= digits;
+ uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
+
+ DBUG_ASSERT(idx < sizeof(digits));
+ while (idx)
+ {
+ if ((*(ptr++)= '0' + ((value >> (4 * (--idx))) & 0xf)) > '9')
+ *(ptr - 1)+= 'a' - '9' - 1;
+ }
+ *ptr= '\0';
+ return digits;
+}
+
+
/* Write header to new packed data file */
static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
@@ -1517,15 +2036,64 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
uint huff_tree_bits;
huff_tree_bits=max_bit(trees ? trees-1 : 0);
+ DBUG_PRINT("info", (""));
+ DBUG_PRINT("info", ("column types:"));
+ DBUG_PRINT("info", ("FIELD_NORMAL 0"));
+ DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1"));
+ DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2"));
+ DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3"));
+ DBUG_PRINT("info", ("FIELD_BLOB 4"));
+ DBUG_PRINT("info", ("FIELD_CONSTANT 5"));
+ DBUG_PRINT("info", ("FIELD_INTERVALL 6"));
+ DBUG_PRINT("info", ("FIELD_ZERO 7"));
+ DBUG_PRINT("info", ("FIELD_VARCHAR 8"));
+ DBUG_PRINT("info", ("FIELD_CHECK 9"));
+ DBUG_PRINT("info", (""));
+ DBUG_PRINT("info", ("pack type as a set of flags:"));
+ DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1"));
+ DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2"));
+ DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4"));
+ DBUG_PRINT("info", (""));
+ if (verbose >= 2)
+ {
+ VOID(printf("\n"));
+ VOID(printf("column types:\n"));
+ VOID(printf("FIELD_NORMAL 0\n"));
+ VOID(printf("FIELD_SKIP_ENDSPACE 1\n"));
+ VOID(printf("FIELD_SKIP_PRESPACE 2\n"));
+ VOID(printf("FIELD_SKIP_ZERO 3\n"));
+ VOID(printf("FIELD_BLOB 4\n"));
+ VOID(printf("FIELD_CONSTANT 5\n"));
+ VOID(printf("FIELD_INTERVALL 6\n"));
+ VOID(printf("FIELD_ZERO 7\n"));
+ VOID(printf("FIELD_VARCHAR 8\n"));
+ VOID(printf("FIELD_CHECK 9\n"));
+ VOID(printf("\n"));
+ VOID(printf("pack type as a set of flags:\n"));
+ VOID(printf("PACK_TYPE_SELECTED 1\n"));
+ VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n"));
+ VOID(printf("PACK_TYPE_ZERO_FILL 4\n"));
+ VOID(printf("\n"));
+ }
for (i=0 ; i++ < fields ; counts++)
{
- write_bits((ulong) (int) counts->field_type,5);
+ write_bits((ulonglong) (int) counts->field_type, 5);
write_bits(counts->pack_type,6);
if (counts->pack_type & PACK_TYPE_ZERO_FILL)
write_bits(counts->max_zero_fill,5);
else
write_bits(counts->length_bits,5);
- write_bits((ulong) counts->tree->tree_number-1,huff_tree_bits);
+ write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
+ DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u "
+ "lbits: %2u tree: %2u length: %4u",
+ i , counts->field_type, counts->pack_type,
+ counts->max_zero_fill, counts->length_bits,
+ counts->tree->tree_number, counts->field_length));
+ if (verbose >= 2)
+ VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u "
+ "tree: %2u length: %4u\n", i , counts->field_type,
+ counts->pack_type, counts->max_zero_fill, counts->length_bits,
+ counts->tree->tree_number, counts->field_length));
}
flush_bits();
return;
@@ -1538,45 +2106,72 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
{
uint i,int_length;
+ uint tree_no;
+ uint codes;
+ uint errors= 0;
uint *packed_tree,*offset,length;
my_off_t elements;
+ /* Find the highest number of elements in the trees. */
for (i=length=0 ; i < trees ; i++)
if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
length=huff_tree[i].elements;
+ /*
+ Allocate a buffer for packing a decode tree. Two numbers per element
+ (left child and right child).
+ */
if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
{
my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2);
return 0;
}
+ DBUG_PRINT("info", (""));
+ if (verbose >= 2)
+ VOID(printf("\n"));
+ tree_no= 0;
intervall_length=0;
for (elements=0; trees-- ; huff_tree++)
{
+ /* Skip columns that have been joined with other columns. */
if (huff_tree->tree_number == 0)
continue; /* Deleted tree */
+ tree_no++;
+ DBUG_PRINT("info", (""));
+ if (verbose >= 3)
+ VOID(printf("\n"));
+ /* Count the total number of elements (byte codes or column values). */
elements+=huff_tree->elements;
huff_tree->max_offset=2;
+ /* Build a tree of offsets and codes for decoding in 'packed_tree'. */
if (huff_tree->elements <= 1)
offset=packed_tree;
else
offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
+
+ /* This should be the same as 'length' above. */
huff_tree->offset_bits=max_bit(huff_tree->max_offset);
+
+ /*
+ Since we check this during collecting the distinct column values,
+ this should never happen.
+ */
if (huff_tree->max_offset >= IS_OFFSET)
{ /* This should be impossible */
- VOID(fprintf(stderr,"Tree offset got too big: %d, aborted\n",
- huff_tree->max_offset));
+ VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n",
+ huff_tree->max_offset));
my_afree((gptr) packed_tree);
return 0;
}
-#ifdef EXTRA_DBUG
- printf("pos: %d elements: %d tree-elements: %d char_bits: %d\n",
- (uint) (file_buffer.pos-file_buffer.buffer),
- huff_tree->elements, (offset-packed_tree),huff_tree->char_bits);
-#endif
+ DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu "
+ "char_bits: %u\n",
+ (ulong) (file_buffer.pos - file_buffer.buffer),
+ huff_tree->elements, (ulong) (offset - packed_tree),
+ huff_tree->char_bits));
if (!huff_tree->counts->tree_buff)
{
+ /* We do a byte compression on this column. Mark with bit 0. */
write_bits(0,1);
write_bits(huff_tree->min_chr,8);
write_bits(huff_tree->elements,9);
@@ -1588,6 +2183,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
{
int_length=(uint) (huff_tree->counts->tree_pos -
huff_tree->counts->tree_buff);
+ /* We have distinct column values for this column. Mark with bit 1. */
write_bits(1,1);
write_bits(huff_tree->elements,15);
write_bits(int_length,16);
@@ -1595,10 +2191,29 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
write_bits(huff_tree->offset_bits,5);
intervall_length+=int_length;
}
+ DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u "
+ "offset_bits: %2u %s: %5u codelen: %2u",
+ tree_no, huff_tree->elements, huff_tree->char_bits,
+ huff_tree->offset_bits, huff_tree->counts->tree_buff ?
+ "bufflen" : "min_chr", huff_tree->counts->tree_buff ?
+ int_length : huff_tree->min_chr, huff_tree->height));
+ if (verbose >= 2)
+ VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u "
+ "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements,
+ huff_tree->char_bits, huff_tree->offset_bits,
+ huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
+ huff_tree->counts->tree_buff ? int_length :
+ huff_tree->min_chr, huff_tree->height));
+
+ /* Check that the code tree length matches the element count. */
length=(uint) (offset-packed_tree);
if (length != huff_tree->elements*2-2)
- printf("error: Huff-tree-length: %d != calc_length: %d\n",
- length,huff_tree->elements*2-2);
+ {
+ VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
+ length, huff_tree->elements * 2 - 2));
+ errors++;
+ break;
+ }
for (i=0 ; i < length ; i++)
{
@@ -1607,16 +2222,122 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
huff_tree->offset_bits+1);
else
write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
+ DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
+ i, (packed_tree[i] & IS_OFFSET) ?
+ " -> " : "", (packed_tree[i] & IS_OFFSET) ?
+ packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
+ if (verbose >= 3)
+ VOID(printf("tree[0x%04x]: %s0x%04x\n",
+ i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
+ (packed_tree[i] & IS_OFFSET) ?
+ packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
}
flush_bits();
+
+ /*
+ Display coding tables and check their correctness.
+ */
+ codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
+ for (i= 0; i < codes; i++)
+ {
+ ulonglong code;
+ uint bits;
+ uint len;
+ uint idx;
+
+ if (! (len= huff_tree->code_len[i]))
+ continue;
+ DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i,
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
+ bindigits(huff_tree->code[i],
+ huff_tree->code_len[i])));
+ if (verbose >= 3)
+ VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i,
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
+ bindigits(huff_tree->code[i], huff_tree->code_len[i])));
+
+ /* Check that the encode table decodes correctly. */
+ code= 0;
+ bits= 0;
+ idx= 0;
+ DBUG_EXECUTE_IF("forcechkerr1", len--;);
+ DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
+ DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
+ for (;;)
+ {
+ if (! len)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n",
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i]));
+ errors++;
+ break;
+ }
+ code<<= 1;
+ code|= (huff_tree->code[i] >> (--len)) & 1;
+ bits++;
+ if (bits > 8 * sizeof(code))
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n",
+ bits, 8 * sizeof(code)));
+ errors++;
+ break;
+ }
+ idx+= code & 1;
+ if (idx >= length)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n",
+ idx, length));
+ errors++;
+ break;
+ }
+ if (packed_tree[idx] & IS_OFFSET)
+ idx+= packed_tree[idx] & ~IS_OFFSET;
+ else
+ break; /* Hit a leaf. This contains the result value. */
+ }
+ if (errors)
+ break;
+
+ DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
+ if (packed_tree[idx] != i)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n",
+ packed_tree[idx], i));
+ errors++;
+ break;
+ }
+ } /*end for (codes)*/
+ if (errors)
+ break;
+
+ /* Write column values in case of distinct column value compression. */
if (huff_tree->counts->tree_buff)
{
for (i=0 ; i < int_length ; i++)
- write_bits((uint) (uchar) huff_tree->counts->tree_buff[i],8);
+ {
+ write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
+ DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
+ i, (uchar) huff_tree->counts->tree_buff[i]));
+ if (verbose >= 3)
+ VOID(printf("column_values[0x%04x]: 0x%02x\n",
+ i, (uchar) huff_tree->counts->tree_buff[i]));
+ }
}
flush_bits();
}
+ DBUG_PRINT("info", (""));
+ if (verbose >= 2)
+ VOID(printf("\n"));
my_afree((gptr) packed_tree);
+ if (errors)
+ {
+ VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n"));
+ return 0;
+ }
return elements;
}
@@ -1627,23 +2348,43 @@ static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
uint *prev_offset;
prev_offset= offset;
+ /*
+ 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
+ then there is no left child and, hence no right child either. This
+ is a property of a binary tree. An element is either a node with two
+ childs, or a leaf without childs.
+
+ The current element is always a node with two childs. Go left first.
+ */
if (!element->a.nod.left->a.leaf.null)
{
- offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
+ /* Store the byte code or the index of the column value. */
+ prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
offset+=2;
}
else
{
+ /*
+ Recursively traverse the tree to the left. Mark it as an offset to
+ another tree node (in contrast to a byte code or column value index).
+ */
prev_offset[0]= IS_OFFSET+2;
offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
}
+
+ /* Now, check the right child. */
if (!element->a.nod.right->a.leaf.null)
{
+ /* Store the byte code or the index of the column value. */
prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
return offset;
}
else
{
+ /*
+ Recursively traverse the tree to the right. Mark it as an offset to
+ another tree node (in contrast to a byte code or column value index).
+ */
uint temp=(uint) (offset-prev_offset-1);
prev_offset[1]= IS_OFFSET+ temp;
if (huff_tree->max_offset < temp)
@@ -1670,6 +2411,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length,
intervall,field_length,max_pack_length,pack_blob_length;
my_off_t record_count;
+ char llbuf[32];
ulong length,pack_length;
byte *record,*pos,*end_pos,*record_pos,*start_pos;
HUFF_COUNTS *count,*end_count;
@@ -1678,12 +2420,23 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
uint pack_version= (uint) isam_file->s->pack.version;
DBUG_ENTER("compress_isam_file");
+ /* Allocate a buffer for the records (excluding blobs). */
if (!(record=(byte*) my_alloca(isam_file->s->base.reclength)))
return -1;
+
end_count=huff_counts+isam_file->s->base.fields;
min_record_length= (uint) ~0;
max_record_length=0;
+ /*
+ Calculate the maximum number of bits required to pack the records.
+ Remember to understand 'max_zero_fill' as 'min_zero_fill'.
+ The tree height determines the maximum number of bits per value.
+ Some fields skip leading or trailing spaces or zeroes. The skipped
+ number of bytes is encoded by 'length_bits' bits.
+ Empty blobs and varchar are encoded with a single 1 bit. Other blobs
+ and varchar get a leading 0 bit.
+ */
for (i=max_calc_length=0 ; i < isam_file->s->base.fields ; i++)
{
if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
@@ -1702,13 +2455,15 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
(huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
huff_counts[i].tree->height+huff_counts[i].length_bits;
}
- max_calc_length/=8;
+ max_calc_length= (max_calc_length + 7) / 8;
pack_ref_length= calc_pack_length(pack_version, max_calc_length);
record_count=0;
+ /* 'max_blob_length' is the max length of all blobs of a record. */
pack_blob_length= isam_file->s->base.blobs ?
calc_pack_length(pack_version, mrg->max_blob_length) : 0;
max_pack_length=pack_ref_length+pack_blob_length;
+ DBUG_PRINT("fields", ("==="));
mrg_reset(mrg);
while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
{
@@ -1724,15 +2479,29 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
end_pos=start_pos+(field_length=count->field_length);
tree=count->tree;
+ DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u "
+ "lbits: %2u tree: %2u length: %4u",
+ (ulong) (count - huff_counts + 1),
+ count->field_type,
+ count->pack_type, count->max_zero_fill,
+ count->length_bits, count->tree->tree_number,
+ count->field_length));
+
+ /* Check if the column contains spaces only. */
if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
{
for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
if (pos == end_pos)
{
+ DBUG_PRINT("fields",
+ ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1"));
+ DBUG_PRINT("fields", ("---"));
write_bits(1,1);
start_pos=end_pos;
continue;
}
+ DBUG_PRINT("fields",
+ ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1"));
write_bits(0,1);
}
end_pos-=count->max_zero_fill;
@@ -1742,65 +2511,129 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
case FIELD_SKIP_ZERO:
if (!memcmp((byte*) start_pos,zero_string,field_length))
{
+ DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1"));
write_bits(1,1);
start_pos=end_pos;
break;
}
+ DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1"));
write_bits(0,1);
/* Fall through */
case FIELD_NORMAL:
+ DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
+ (ulong) (end_pos - start_pos)));
for ( ; start_pos < end_pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *start_pos],
(uint) tree->code_len[(uchar) *start_pos]);
+ }
break;
case FIELD_SKIP_ENDSPACE:
for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
- length=(uint) (end_pos-pos);
+ length= (ulong) (end_pos - pos);
if (count->pack_type & PACK_TYPE_SELECTED)
{
if (length > count->min_space)
{
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1"));
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
write_bits(1,1);
write_bits(length,count->length_bits);
}
else
{
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE not more than min_space, "
+ "bits: 1"));
write_bits(0,1);
pos=end_pos;
}
}
else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
write_bits(length,count->length_bits);
+ }
+ /* Encode all significant bytes. */
+ DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
+ (ulong) (pos - start_pos)));
for ( ; start_pos < pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *start_pos],
(uint) tree->code_len[(uchar) *start_pos]);
+ }
start_pos=end_pos;
break;
case FIELD_SKIP_PRESPACE:
for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
- length=(uint) (pos-start_pos);
+ length= (ulong) (pos - start_pos);
if (count->pack_type & PACK_TYPE_SELECTED)
{
if (length > count->min_space)
{
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE more than min_space, bits: 1"));
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
write_bits(1,1);
write_bits(length,count->length_bits);
}
else
{
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE not more than min_space, "
+ "bits: 1"));
pos=start_pos;
write_bits(0,1);
}
}
else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
write_bits(length,count->length_bits);
+ }
+ /* Encode all significant bytes. */
+ DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
+ (ulong) (end_pos - start_pos)));
for (start_pos=pos ; start_pos < end_pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *start_pos],
(uint) tree->code_len[(uchar) *start_pos]);
+ }
break;
case FIELD_CONSTANT:
case FIELD_ZERO:
case FIELD_CHECK:
+ DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
start_pos=end_pos;
break;
case FIELD_INTERVALL:
@@ -1808,6 +2641,10 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
pos=(byte*) tree_search(&count->int_tree, start_pos,
count->int_tree.custom_arg);
intervall=(uint) (pos - count->tree_buff)/field_length;
+ DBUG_PRINT("fields", ("FIELD_INTERVALL"));
+ DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u",
+ intervall, hexdigits(tree->code[intervall]),
+ (uint) tree->code_len[intervall]));
write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
start_pos=end_pos;
break;
@@ -1816,21 +2653,36 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
ulong blob_length=_mi_calc_blob_length(field_length-
mi_portable_sizeof_char_ptr,
start_pos);
+ /* Empty blobs are encoded with a single 1 bit. */
if (!blob_length)
{
- write_bits(1,1); /* Empty blob */
+ DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1"));
+ write_bits(1,1);
}
else
{
byte *blob,*blob_end;
+ DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1"));
write_bits(0,1);
+ /* Write the blob length. */
+ DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
+ blob_length, count->length_bits));
write_bits(blob_length,count->length_bits);
memcpy_fixed(&blob,end_pos-mi_portable_sizeof_char_ptr,
sizeof(char*));
blob_end=blob+blob_length;
+ /* Encode the blob bytes. */
for ( ; blob < blob_end ; blob++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
+ (uint) tree->code_len[(uchar) *blob],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint)tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *blob],
(uint) tree->code_len[(uchar) *blob]);
+ }
tot_blob_length+=blob_length;
}
start_pos= end_pos;
@@ -1838,19 +2690,37 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
}
case FIELD_VARCHAR:
{
- ulong col_length= uint2korr(start_pos);
+ uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
+ ulong col_length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
+ uint2korr(start_pos));
+ /* Empty varchar are encoded with a single 1 bit. */
if (!col_length)
{
+ DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1"));
write_bits(1,1); /* Empty varchar */
}
else
{
- byte *end=start_pos+2+col_length;
+ byte *end=start_pos+pack_length+col_length;
+ DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1"));
write_bits(0,1);
+ /* Write the varchar length. */
+ DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
+ col_length, count->length_bits));
write_bits(col_length,count->length_bits);
- for (start_pos+=2 ; start_pos < end ; start_pos++)
+ /* Encode the varchar bytes. */
+ for (start_pos+=pack_length ; start_pos < end ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint)tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *start_pos],
(uint) tree->code_len[(uchar) *start_pos]);
+ }
}
start_pos= end_pos;
break;
@@ -1859,13 +2729,18 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
abort(); /* Impossible */
}
start_pos+=count->max_zero_fill;
+ DBUG_PRINT("fields", ("---"));
}
flush_bits();
- length=(ulong) (file_buffer.pos-record_pos)-max_pack_length;
+ length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length;
pack_length= save_pack_length(pack_version, record_pos, length);
if (pack_blob_length)
pack_length+= save_pack_length(pack_version, record_pos + pack_length,
tot_blob_length);
+ DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu "
+ "length-bytes: %lu", (ulong) record_count, length,
+ tot_blob_length, pack_length));
+ DBUG_PRINT("fields", ("==="));
/* Correct file buffer if the header was smaller */
if (pack_length != max_pack_length)
@@ -1877,9 +2752,11 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
min_record_length=(uint) length;
if (length > (ulong) max_record_length)
max_record_length=(uint) length;
- if (write_loop && ++record_count % WRITE_COUNT == 0)
+ record_count++;
+ if (write_loop && record_count % WRITE_COUNT == 0)
{
- printf("%lu\r",(ulong) record_count); VOID(fflush(stdout));
+ VOID(printf("%lu\r", (ulong) record_count));
+ VOID(fflush(stdout));
}
}
else if (error != HA_ERR_RECORD_DELETED)
@@ -1889,8 +2766,11 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
error=0;
else
{
- fprintf(stderr,"%s: Got error %d reading records\n",my_progname,error);
+ VOID(fprintf(stderr, "%s: Got error %d reading records\n",
+ my_progname, error));
}
+ if (verbose >= 2)
+ VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf)));
my_afree((gptr) record);
mrg->ref_length=max_pack_length;
@@ -1930,7 +2810,7 @@ static void init_file_buffer(File file, pbool read_buffer)
file_buffer.pos=file_buffer.buffer;
file_buffer.bits=BITS_SAVED;
}
- file_buffer.current_byte=0;
+ file_buffer.bitbucket= 0;
}
@@ -1973,7 +2853,8 @@ static int flush_buffer(ulong neaded_length)
tmp=my_realloc(file_buffer.buffer, neaded_length,MYF(MY_WME));
if (!tmp)
return 1;
- file_buffer.pos= tmp + (ulong) (file_buffer.pos - file_buffer.buffer);
+ file_buffer.pos= ((uchar*) tmp +
+ (ulong) (file_buffer.pos - file_buffer.buffer));
file_buffer.buffer=tmp;
file_buffer.end=tmp+neaded_length-8;
}
@@ -1988,68 +2869,59 @@ static void end_file_buffer(void)
/* output `bits` low bits of `value' */
-static void write_bits (register ulong value, register uint bits)
+static void write_bits(register ulonglong value, register uint bits)
{
- if ((file_buffer.bits-=(int) bits) >= 0)
+ DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
+ (bits == 8 * sizeof(value)));
+
+ if ((file_buffer.bits-= (int) bits) >= 0)
{
- file_buffer.current_byte|=value << file_buffer.bits;
+ file_buffer.bitbucket|= value << file_buffer.bits;
}
else
{
- reg3 uint byte_buff;
+ reg3 ulonglong bit_buffer;
bits= (uint) -file_buffer.bits;
- DBUG_ASSERT(bits <= 8 * sizeof(value));
- byte_buff= (file_buffer.current_byte |
- ((bits != 8 * sizeof(value)) ? (uint) (value >> bits) : 0));
-#if BITS_SAVED == 32
- *file_buffer.pos++= (byte) (byte_buff >> 24) ;
- *file_buffer.pos++= (byte) (byte_buff >> 16) ;
+ bit_buffer= (file_buffer.bitbucket |
+ ((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
+#if BITS_SAVED == 64
+ *file_buffer.pos++= (uchar) (bit_buffer >> 56);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 48);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 40);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 32);
#endif
- *file_buffer.pos++= (byte) (byte_buff >> 8) ;
- *file_buffer.pos++= (byte) byte_buff;
+ *file_buffer.pos++= (uchar) (bit_buffer >> 24);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 16);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 8);
+ *file_buffer.pos++= (uchar) (bit_buffer);
- DBUG_ASSERT(bits <= 8 * sizeof(ulong));
if (bits != 8 * sizeof(value))
- value&= (((ulong) 1) << bits) - 1;
-#if BITS_SAVED == 16
- if (bits >= sizeof(uint))
- {
- bits-=8;
- *file_buffer.pos++= (uchar) (value >> bits);
- value&= (1 << bits)-1;
- if (bits >= sizeof(uint))
- {
- bits-=8;
- *file_buffer.pos++= (uchar) (value >> bits);
- value&= (1 << bits)-1;
- }
- }
-#endif
+ value&= (((ulonglong) 1) << bits) - 1;
if (file_buffer.pos >= file_buffer.end)
VOID(flush_buffer(~ (ulong) 0));
file_buffer.bits=(int) (BITS_SAVED - bits);
- file_buffer.current_byte=(uint) (value << (BITS_SAVED - bits));
+ file_buffer.bitbucket= value << (BITS_SAVED - bits);
}
return;
}
/* Flush bits in bit_buffer to buffer */
-static void flush_bits (void)
+static void flush_bits(void)
{
- uint bits,byte_buff;
+ int bits;
+ ulonglong bit_buffer;
- bits=(file_buffer.bits) & ~7;
- byte_buff = file_buffer.current_byte >> bits;
- bits=BITS_SAVED - bits;
+ bits= file_buffer.bits & ~7;
+ bit_buffer= file_buffer.bitbucket >> bits;
+ bits= BITS_SAVED - bits;
while (bits > 0)
{
- bits-=8;
- *file_buffer.pos++= (byte) (uchar) (byte_buff >> bits) ;
+ bits-= 8;
+ *file_buffer.pos++= (uchar) (bit_buffer >> bits);
}
- file_buffer.bits=BITS_SAVED;
- file_buffer.current_byte=0;
- return;
+ file_buffer.bits= BITS_SAVED;
+ file_buffer.bitbucket= 0;
}
@@ -2074,7 +2946,7 @@ static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
share->state.dellink= HA_OFFSET_ERROR;
share->state.split=(ha_rows) mrg->records;
share->state.version=(ulong) time((time_t*) 0);
- if (share->state.key_map != (ULL(1) << share->base.keys) - 1)
+ if (! mi_is_all_keys_active(share->state.key_map, share->base.keys))
{
/*
Some indexes are disabled, cannot use current key_file_length value
@@ -2088,7 +2960,7 @@ static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
original file so "myisamchk -rq" can use this value (this is necessary
because index size cannot be easily calculated for fulltext keys)
*/
- share->state.key_map=0;
+ mi_clear_all_keys_active(share->state.key_map);
for (key=0 ; key < share->base.keys ; key++)
share->state.key_root[key]= HA_OFFSET_ERROR;
for (key=0 ; key < share->state.header.max_block_size ; key++)
@@ -2128,7 +3000,7 @@ static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
}
state.dellink= HA_OFFSET_ERROR;
state.version=(ulong) time((time_t*) 0);
- state.key_map=0;
+ mi_clear_all_keys_active(state.key_map);
state.checksum=crc;
if (isam_file->s->base.keys)
isamchk_neaded=1;
@@ -2197,3 +3069,131 @@ static int mrg_close(PACK_MRG_INFO *mrg)
my_free((gptr) mrg->file,MYF(0));
return error;
}
+
+
+#if !defined(DBUG_OFF)
+/*
+ Fake the counts to get big Huffman codes.
+
+ SYNOPSIS
+ fakebigcodes()
+ huff_counts A pointer to the counts array.
+ end_count A pointer past the counts array.
+
+ DESCRIPTION
+
+ Huffman coding works by removing the two least frequent values from
+ the list of values and add a new value with the sum of their
+ incidences in a loop until only one value is left. Every time a
+ value is reused for a new value, it gets one more bit for its
+ encoding. Hence, the least frequent values get the longest codes.
+
+ To get a maximum code length for a value, two of the values must
+ have an incidence of 1. As their sum is 2, the next infrequent value
+ must have at least an incidence of 2, then 4, 8, 16 and so on. This
+ means that one needs 2**n bytes (values) for a code length of n
+ bits. However, using more distinct values forces the use of longer
+ codes, or reaching the code length with less total bytes (values).
+
+ To get 64(32)-bit codes, I sort the counts by decreasing incidence.
+ I assign counts of 1 to the two most frequent values, a count of 2
+ for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
+ the remaining values get 1. That way every possible byte has an
+ assigned code, though not all codes are used if not all byte values
+ are present in the column.
+
+ This strategy would work with distinct column values too, but
+ requires that at least 64(32) values are present. To make things
+ easier here, I cancel all distinct column values and force byte
+ compression for all columns.
+
+ RETURN
+ void
+*/
+
+static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
+{
+ HUFF_COUNTS *count;
+ my_off_t *cur_count_p;
+ my_off_t *end_count_p;
+ my_off_t **cur_sort_p;
+ my_off_t **end_sort_p;
+ my_off_t *sort_counts[256];
+ my_off_t total;
+ DBUG_ENTER("fakebigcodes");
+
+ for (count= huff_counts; count < end_count; count++)
+ {
+ /*
+ Remove distinct column values.
+ */
+ if (huff_counts->tree_buff)
+ {
+ my_free((gptr) huff_counts->tree_buff, MYF(0));
+ delete_tree(&huff_counts->int_tree);
+ huff_counts->tree_buff= NULL;
+ DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
+ }
+
+ /*
+ Sort counts by decreasing incidence.
+ */
+ cur_count_p= count->counts;
+ end_count_p= cur_count_p + 256;
+ cur_sort_p= sort_counts;
+ while (cur_count_p < end_count_p)
+ *(cur_sort_p++)= cur_count_p++;
+ (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
+
+ /*
+ Assign faked counts.
+ */
+ cur_sort_p= sort_counts;
+#if SIZEOF_LONG_LONG > 4
+ end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
+#else
+ end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
+#endif
+ /* Most frequent value gets a faked count of 1. */
+ **(cur_sort_p++)= 1;
+ total= 1;
+ while (cur_sort_p < end_sort_p)
+ {
+ **(cur_sort_p++)= total;
+ total<<= 1;
+ }
+ /* Set the last value. */
+ **(cur_sort_p++)= --total;
+ /*
+ Set the remaining counts.
+ */
+ end_sort_p= sort_counts + 256;
+ while (cur_sort_p < end_sort_p)
+ **(cur_sort_p++)= 1;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Compare two counts for reverse sorting.
+
+ SYNOPSIS
+ fakecmp()
+ count1 One count.
+ count2 Another count.
+
+ RETURN
+ 1 count1 < count2
+ 0 count1 == count2
+ -1 count1 > count2
+*/
+
+static int fakecmp(my_off_t **count1, my_off_t **count2)
+{
+ return ((**count1 < **count2) ? 1 :
+ (**count1 > **count2) ? -1 : 0);
+}
+#endif
+
+
diff --git a/myisam/rt_split.c b/myisam/rt_split.c
index 664dd2c75e3..31a7d09ab4f 100644
--- a/myisam/rt_split.c
+++ b/myisam/rt_split.c
@@ -257,19 +257,17 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key,
int n_dim;
uchar *source_cur, *cur1, *cur2;
uchar *new_page;
- int err_code = 0;
-
- uint nod_flag = mi_test_if_nod(page);
- uint full_length = key_length + (nod_flag ? nod_flag :
- info->s->base.rec_reflength);
-
- int max_keys = (mi_getint(page)-2) / (full_length);
+ int err_code= 0;
+ uint nod_flag= mi_test_if_nod(page);
+ uint full_length= key_length + (nod_flag ? nod_flag :
+ info->s->base.rec_reflength);
+ int max_keys= (mi_getint(page)-2) / (full_length);
n_dim = keyinfo->keysegs / 2;
if (!(coord_buf= (double*) my_alloca(n_dim * 2 * sizeof(double) *
- (max_keys + 1 + 4) +
- sizeof(SplitStruct) * (max_keys + 1))))
+ (max_keys + 1 + 4) +
+ sizeof(SplitStruct) * (max_keys + 1))))
return -1;
task= (SplitStruct *)(coord_buf + n_dim * 2 * (max_keys + 1 + 4));
@@ -312,8 +310,7 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key,
cur1 = rt_PAGE_FIRST_KEY(page, nod_flag);
cur2 = rt_PAGE_FIRST_KEY(new_page, nod_flag);
- n1 = 0;
- n2 = 0;
+ n1= n2 = 0;
for (cur = task; cur < stop; ++cur)
{
uchar *to;
diff --git a/myisam/sort.c b/myisam/sort.c
index e8cd9938e42..fabd713ef45 100644
--- a/myisam/sort.c
+++ b/myisam/sort.c
@@ -479,7 +479,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
}
if (!got_error)
{
- share->state.key_map|=(ulonglong) 1 << sinfo->key;
+ mi_set_key_active(share->state.key_map, sinfo->key);
if (param->testflag & T_STATISTICS)
update_key_parts(sinfo->keyinfo, rec_per_key_part,
sinfo->unique, (ulonglong) info->state->records);
@@ -862,7 +862,8 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
uchar *strpos;
BUFFPEK *buffpek,**refpek;
QUEUE queue;
- volatile my_bool *killed= killed_ptr(info->sort_info->param);
+ volatile int *killed= killed_ptr(info->sort_info->param);
+
DBUG_ENTER("merge_buffers");
count=error=0;