diff options
Diffstat (limited to 'myisam')
42 files changed, 2052 insertions, 580 deletions
diff --git a/myisam/Makefile.am b/myisam/Makefile.am index 6f304c8143d..e4327070997 100644 --- a/myisam/Makefile.am +++ b/myisam/Makefile.am @@ -17,8 +17,7 @@ EXTRA_DIST = mi_test_all.sh mi_test_all.res pkgdata_DATA = mi_test_all mi_test_all.res -INCLUDES = @MT_INCLUDES@ \ - -I$(top_builddir)/include -I$(top_srcdir)/include +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index f1ff8f6d886..c18984c7584 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -68,7 +68,7 @@ struct st_ftb_expr my_off_t docid[2]; float weight; float cur_weight; - byte *quot, *qend; + LIST *phrase; /* phrase words */ uint yesses; /* number of "yes" words matched */ uint nos; /* number of "no" words matched */ uint ythresh; /* number of "yes" words in expr */ @@ -132,20 +132,22 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) } static void _ftb_parse_query(FTB *ftb, byte **start, byte *end, - FTB_EXPR *up, uint depth) + FTB_EXPR *up, uint depth, byte *up_quot) { byte res; FTB_PARAM param; FT_WORD w; FTB_WORD *ftbw; FTB_EXPR *ftbe; + FT_WORD *phrase_word; + LIST *phrase_list; uint extra=HA_FT_WLEN+ftb->info->s->rec_reflength; /* just a shortcut */ if (ftb->state != UNINITIALIZED) return; param.prev=' '; - param.quot=up->quot; + param.quot= up_quot; while ((res=ft_get_word(ftb->charset,start,end,&w,¶m))) { int r=param.plusminus; @@ -172,6 +174,14 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end, if (param.yesno > 0) up->ythresh++; queue_insert(& ftb->queue, (byte *)ftbw); ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC); + case 4: /* not indexed word (stopword or too short/long) */ + if (! up_quot) break; + phrase_word= (FT_WORD *)alloc_root(&ftb->mem_root, sizeof(FT_WORD)); + phrase_list= (LIST *)alloc_root(&ftb->mem_root, sizeof(LIST)); + phrase_word->pos= w.pos; + phrase_word->len= w.len; + phrase_list->data= (void *)phrase_word; + up->phrase= list_add(up->phrase, phrase_list); break; case 2: /* left bracket */ ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); @@ -182,13 +192,14 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbe->up=up; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR; - if ((ftbe->quot=param.quot)) ftb->with_scan|=2; + ftbe->phrase= NULL; + if (param.quot) ftb->with_scan|=2; if (param.yesno > 0) up->ythresh++; - _ftb_parse_query(ftb, start, end, ftbe, depth+1); + _ftb_parse_query(ftb, start, end, ftbe, depth+1, param.quot); param.quot=0; break; case 3: /* right bracket */ - if (up->quot) up->qend=param.quot; + if (up_quot) up->phrase= list_reverse(up->phrase); return; } } @@ -212,6 +223,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) byte *lastkey_buf=ftbw->word+ftbw->off; LINT_INIT(off); + LINT_INIT(off); if (ftbw->flags & FTB_FLAG_TRUNC) lastkey_buf+=ftbw->len; @@ -410,12 +422,12 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftbe->weight=1; ftbe->flags=FTB_FLAG_YES; ftbe->nos=1; - ftbe->quot=0; ftbe->up=0; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR; + ftbe->phrase= NULL; ftb->root=ftbe; - _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0); + _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0, NULL); ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root, sizeof(FTB_WORD *)*ftb->queue.elements); memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); @@ -431,29 +443,45 @@ err: } -/* returns 1 if str0 ~= /\bstr1\b/ */ -static int _ftb_strstr(const byte *s0, const byte *e0, - const byte *s1, const byte *e1, - CHARSET_INFO *cs) +/* + Checks if given buffer matches phrase list. + + SYNOPSIS + _ftb_check_phrase() + s0 start of buffer + e0 end of buffer + phrase broken into list phrase + cs charset info + + RETURN VALUE + 1 is returned if phrase found, 0 else. +*/ + +static int _ftb_check_phrase(const byte *s0, const byte *e0, + LIST *phrase, CHARSET_INFO *cs) { - const byte *p0= s0; - my_bool s_after= true_word_char(cs, s1[0]); - my_bool e_before= true_word_char(cs, e1[-1]); - uint p0_len; - my_match_t m[2]; + FT_WORD h_word; + const byte *h_start= s0; + DBUG_ENTER("_ftb_strstr"); + DBUG_ASSERT(phrase); - while (p0 < e0) + while (ft_simple_get_word(cs, (byte **)&h_start, e0, &h_word, FALSE)) { - if (cs->coll->instr(cs, p0, e0 - p0, s1, e1 - s1, m, 2) != 2) - return(0); - if ((!s_after || p0 + m[1].beg == s0 || !true_word_char(cs, p0[m[1].beg-1])) && - (!e_before || p0 + m[1].end == e0 || !true_word_char(cs, p0[m[1].end]))) - return(1); - p0+= m[1].beg; - p0+= (p0_len= my_mbcharlen(cs, *(uchar *)p0)) ? p0_len : 1; + FT_WORD *n_word; + LIST *phrase_element= phrase; + const byte *h_start1= h_start; + for (;;) + { + n_word= (FT_WORD *)phrase_element->data; + if (my_strnncoll(cs, h_word.pos, h_word.len, n_word->pos, n_word->len)) + break; + if (! (phrase_element= phrase_element->next)) + DBUG_RETURN(1); + if (! ft_simple_get_word(cs, (byte **)&h_start1, e0, &h_word, FALSE)) + DBUG_RETURN(0); + } } - - return(0); + DBUG_RETURN(0); } @@ -484,7 +512,7 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_ { yn=ftbe->flags; weight=ftbe->cur_weight*ftbe->weight; - if (mode && ftbe->quot) + if (mode && ftbe->phrase) { int not_found=1; @@ -493,8 +521,8 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_ { if (!ftsi.pos) continue; - not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len, - ftbe->quot, ftbe->qend, ftb->charset); + not_found = ! _ftb_check_phrase(ftsi.pos, ftsi.pos+ftsi.len, + ftbe->phrase, ftb->charset); } if (not_found) break; } /* ftbe->quot */ @@ -642,8 +670,8 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) continue; end=ftsi.pos+ftsi.len; - while (ft_simple_get_word(ftb->charset, - (byte **) &ftsi.pos, (byte *) end, &word)) + while (ft_simple_get_word(ftb->charset, (byte **) &ftsi.pos, + (byte *) end, &word, TRUE)) { int a, b, c; for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2) diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 0b1e68b0d70..2fad2363ae2 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -93,12 +93,14 @@ my_bool ft_boolean_check_syntax_string(const byte *str) return 0; } -/* returns: - * 0 - eof - * 1 - word found - * 2 - left bracket - * 3 - right bracket - */ +/* + RETURN VALUE + 0 - eof + 1 - word found + 2 - left bracket + 3 - right bracket + 4 - stopword found +*/ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) { @@ -161,6 +163,11 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, *start=doc; return 1; } + else if (length) /* make sure length > 0 (if start contains spaces only) */ + { + *start= doc; + return 4; + } } if (param->quot) { @@ -170,18 +177,19 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, return 0; } -byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end, - FT_WORD *word) +byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, + FT_WORD *word, my_bool skip_stopwords) { byte *doc= *start; uint mwc, length, mbl; DBUG_ENTER("ft_simple_get_word"); - while (doc<end) + do { - for (;doc<end;doc++) + for (;; doc++) { - if (true_word_char(cs,*doc)) break; + if (doc >= end) DBUG_RETURN(0); + if (true_word_char(cs, *doc)) break; } mwc= length= 0; @@ -193,13 +201,14 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end, word->len= (uint)(doc-word->pos) - mwc; - if (length >= ft_min_word_len && length < ft_max_word_len && - !is_stopword(word->pos, word->len)) + if (skip_stopwords == FALSE || + (length >= ft_min_word_len && length < ft_max_word_len && + !is_stopword(word->pos, word->len))) { *start= doc; DBUG_RETURN(1); } - } + } while (doc < end); DBUG_RETURN(0); } @@ -217,7 +226,7 @@ int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc) FT_WORD w; DBUG_ENTER("ft_parse"); - while (ft_simple_get_word(wtree->custom_arg, &doc,end,&w)) + while (ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE)) { if (with_alloc) { diff --git a/myisam/ft_static.c b/myisam/ft_static.c index 994a94d0c49..e221950f445 100644 --- a/myisam/ft_static.c +++ b/myisam/ft_static.c @@ -25,23 +25,25 @@ char ft_boolean_syntax[]="+ -><()~*:\"\"&|"; const HA_KEYSEG ft_keysegs[FT_SEGS]={ { - HA_KEYTYPE_VARTEXT, /* type */ - 63, /* language (will be overwritten) */ - 0, 0, 0, /* null_bit, bit_start, bit_end */ - HA_VAR_LENGTH | HA_PACK_KEY, /* flag */ - HA_FT_MAXBYTELEN, /* length */ - HA_FT_WLEN, /* start */ - 0, /* null_pos */ - NULL /* charset */ - }, - { -/* - Note, this (and the last HA_KEYTYPE_END) segment should NOT - be packed in any way, otherwise w_search() won't be able to - update key entry 'in vivo' -*/ - HA_FT_WTYPE, 63, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 0, 0, NULL - } + 0, /* charset */ + HA_FT_WLEN, /* start */ + 0, /* null_pos */ + 0, /* Bit pos */ + HA_VAR_LENGTH_PART | HA_PACK_KEY, /* flag */ + HA_FT_MAXBYTELEN, /* length */ + HA_KEYTYPE_VARTEXT2, /* type */ + 63, /* language (will be overwritten) */ + 0, /* null_bit */ + 2, 0, 0 /* bit_start, bit_end, bit_length */ +}, +{ + /* + Note, this (and the last HA_KEYTYPE_END) segment should NOT + be packed in any way, otherwise w_search() won't be able to + update key entry 'in vivo' + */ + 0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, HA_FT_WTYPE, 63, 0, 0, 0, 0 +} }; const struct _ft_vft _ft_vft_nlq = { diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c index a4bce6ad4e8..ab51afb0e82 100644 --- a/myisam/ft_stopwords.c +++ b/myisam/ft_stopwords.c @@ -81,7 +81,7 @@ int ft_init_stopwords() goto err0; len=my_read(fd, buffer, len, MYF(MY_WME)); end=start+len; - while (ft_simple_get_word(default_charset_info, &start, end, &w)) + while (ft_simple_get_word(default_charset_info, &start, end, &w, TRUE)) { if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0)))) goto err1; diff --git a/myisam/ft_test1.c b/myisam/ft_test1.c index f4884f8ca39..14be9aa1e8c 100644 --- a/myisam/ft_test1.c +++ b/myisam/ft_test1.c @@ -79,24 +79,24 @@ static int run_test(const char *filename) recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + mi_portable_sizeof_char_ptr : extra_length); if (extra_field == FIELD_VARCHAR) - recinfo[0].length+=2; + recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length); recinfo[1].type=key_field; recinfo[1].length= (key_field == FIELD_BLOB ? 4+mi_portable_sizeof_char_ptr : key_length); if (key_field == FIELD_VARCHAR) - recinfo[1].length+=2; + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length); /* Define a key over the first column */ keyinfo[0].seg=keyseg; keyinfo[0].keysegs=1; keyinfo[0].seg[0].type= key_type; - keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB)?HA_BLOB_PART: - (key_field == FIELD_VARCHAR)?HA_VAR_LENGTH:0; + keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB) ? HA_BLOB_PART: + (key_field == FIELD_VARCHAR) ? HA_VAR_LENGTH_PART:0; keyinfo[0].seg[0].start=recinfo[0].length; keyinfo[0].seg[0].length=key_length; keyinfo[0].seg[0].null_bit= 0; keyinfo[0].seg[0].null_pos=0; - keyinfo[0].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[0].seg[0].language= default_charset_info->number; keyinfo[0].flag = (no_fulltext?HA_PACK_KEY:HA_FULLTEXT); if (!silent) @@ -155,33 +155,42 @@ static int run_test(const char *filename) if (!silent) printf("- Reading rows with key\n"); for (i=0 ; i < NQUERIES ; i++) - { FT_DOCLIST *result; + { + FT_DOCLIST *result; result=ft_nlq_init_search(file,0,(char*) query[i],strlen(query[i]),1); - if(!result) { + if(!result) + { printf("Query %d: `%s' failed with errno %3d\n",i,query[i],my_errno); continue; } printf("Query %d: `%s'. Found: %d. Top five documents:\n", - i,query[i],result->ndocs); - for(j=0;j<5;j++) { double w; int err; - err=ft_nlq_read_next(result, read_record); - if(err==HA_ERR_END_OF_FILE) { - printf("No more matches!\n"); - break; - } else if (err) { - printf("ft_read_next %d failed with errno %3d\n",j,my_errno); - break; - } - w=ft_nlq_get_relevance(result); - if(key_field == FIELD_VARCHAR) { - uint l; - char *p; - p=recinfo[0].length+read_record; - l=uint2korr(p); - printf("%10.7f: %.*s\n",w,(int) l,p+2); - } else - printf("%10.7f: %.*s\n",w,recinfo[1].length, - recinfo[0].length+read_record); + i,query[i],result->ndocs); + for (j=0;j<5;j++) + { + double w; int err; + err= ft_nlq_read_next(result, read_record); + if (err==HA_ERR_END_OF_FILE) + { + printf("No more matches!\n"); + break; + } + else if (err) + { + printf("ft_read_next %d failed with errno %3d\n",j,my_errno); + break; + } + w=ft_nlq_get_relevance(result); + if (key_field == FIELD_VARCHAR) + { + uint l; + char *p; + p=recinfo[0].length+read_record; + l=uint2korr(p); + printf("%10.7f: %.*s\n",w,(int) l,p+2); + } + else + printf("%10.7f: %.*s\n",w,recinfo[1].length, + recinfo[0].length+read_record); } ft_nlq_close_search(result); } @@ -215,9 +224,14 @@ void create_record(char *pos, int n) else if (recinfo[0].type == FIELD_VARCHAR) { uint tmp; - strnmov(pos+2,data[n].f0,keyinfo[0].seg[0].length); - tmp=strlen(pos+2); - int2store(pos,tmp); + /* -1 is here because pack_length is stored in seg->length */ + uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1); + strnmov(pos+pack_length,data[n].f0,keyinfo[0].seg[0].length); + tmp=strlen(pos+pack_length); + if (pack_length == 1) + *pos= (char) tmp; + else + int2store(pos,tmp); pos+=recinfo[0].length; } else @@ -239,9 +253,14 @@ void create_record(char *pos, int n) else if (recinfo[1].type == FIELD_VARCHAR) { uint tmp; - strnmov(pos+2,data[n].f2,keyinfo[0].seg[0].length); - tmp=strlen(pos+2); - int2store(pos,tmp); + /* -1 is here because pack_length is stored in seg->length */ + uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1); + strnmov(pos+pack_length,data[n].f2,keyinfo[0].seg[0].length); + tmp=strlen(pos+1); + if (pack_length == 1) + *pos= (char) tmp; + else + int2store(pos,tmp); pos+=recinfo[1].length; } else diff --git a/myisam/ft_update.c b/myisam/ft_update.c index beccc062270..b8cd925bf4f 100644 --- a/myisam/ft_update.c +++ b/myisam/ft_update.c @@ -58,29 +58,27 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi) DBUG_ENTER("_mi_ft_segiterator"); if (!ftsi->num) - { DBUG_RETURN(0); - } - else - ftsi->num--; + + ftsi->num--; if (!ftsi->seg) - { DBUG_RETURN(1); - } - else - ftsi->seg--; + + ftsi->seg--; if (ftsi->seg->null_bit && (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit)) { - ftsi->pos=0; - DBUG_RETURN(1); + ftsi->pos=0; + DBUG_RETURN(1); } ftsi->pos= ftsi->rec+ftsi->seg->start; - if (ftsi->seg->flag & HA_VAR_LENGTH) + if (ftsi->seg->flag & HA_VAR_LENGTH_PART) { - ftsi->len=uint2korr(ftsi->pos); - ftsi->pos+=2; /* Skip VARCHAR length */ + uint pack_length= (ftsi->seg->bit_start); + ftsi->len= (pack_length == 1 ? (uint) *(uchar*) ftsi->pos : + uint2korr(ftsi->pos)); + ftsi->pos+= pack_length; /* Skip VARCHAR length */ DBUG_RETURN(1); } if (ftsi->seg->flag & HA_BLOB_PART) @@ -296,9 +294,11 @@ uint _ft_make_key(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wptr, DBUG_RETURN(_mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos)); } + /* convert key value to ft2 */ + uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key) { my_off_t root; @@ -316,9 +316,12 @@ uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key) get_key_full_length_rdonly(key_length, key); while (_mi_ck_delete(info, keynr, key, key_length) == 0) - /* nothing to do here. - _mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys - */; + { + /* + nothing to do here. + _mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys + */ + } /* creating pageful of keys */ mi_putint(info->buff,length+2,0); diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h index ddb9fbfead2..91c679a1e58 100644 --- a/myisam/ftdefs.h +++ b/myisam/ftdefs.h @@ -112,7 +112,8 @@ int is_stopword(char *word, uint len); uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t); byte ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *, FTB_PARAM *); -byte ft_simple_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *); +byte ft_simple_get_word(CHARSET_INFO *, byte **, const byte *, + FT_WORD *, my_bool); typedef struct _st_ft_seg_iterator { uint num, len; diff --git a/myisam/mi_check.c b/myisam/mi_check.c index 0c85b5234a1..1f453278eea 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -282,7 +282,8 @@ int chk_size(MI_CHECK *param, register MI_INFO *info) size=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0)); if ((skr=(my_off_t) info->state->key_file_length) != size) { - if (skr > size) + /* Don't give error if file generated by myisampack */ + if (skr > size && mi_is_any_key_active(info->s->state.key_map)) { error=1; mi_check_print_error(param, @@ -379,7 +380,7 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++) { param->key_crc[key]=0; - if (!(((ulonglong) 1 << key) & share->state.key_map)) + if (! mi_is_key_active(share->state.key_map, key)) { /* Remember old statistics for key */ memcpy((char*) rec_per_key_part, @@ -507,7 +508,7 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) (int) ((my_off_t2double(key_totlength) - my_off_t2double(all_keydata))*100.0/ my_off_t2double(key_totlength))); - else if (all_totaldata != 0L && share->state.key_map) + else if (all_totaldata != 0L && mi_is_any_key_active(share->state.key_map)) puts(""); } if (param->key_file_blocks != info->state->key_file_length && @@ -1035,7 +1036,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) for (key=0,keyinfo= info->s->keyinfo; key < info->s->base.keys; key++,keyinfo++) { - if ((((ulonglong) 1 << key) & info->s->state.key_map)) + if (mi_is_key_active(info->s->state.key_map, key)) { if(!(keyinfo->flag & HA_FULLTEXT)) { @@ -1303,8 +1304,8 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, */ if (param->testflag & T_CREATE_MISSING_KEYS) - share->state.key_map= ((((ulonglong) 1L << share->base.keys)-1) & - param->keys_in_use); + mi_copy_keys_active(share->state.key_map, share->base.keys, + param->keys_in_use); info->state->key_file_length=share->base.keystart; @@ -1466,7 +1467,7 @@ static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff, key=info->lastkey+info->s->base.max_key_length; for (i=0 ; i < info->s->base.keys ; i++) { - if (((ulonglong) 1 << i) & info->s->state.key_map) + if (mi_is_key_active(info->s->state.key_map, i)) { if (info->s->keyinfo[i].flag & HA_FULLTEXT ) { @@ -1497,7 +1498,7 @@ static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff, info->errkey=(int) i; /* This key was found */ while ( i-- > 0 ) { - if (((ulonglong) 1 << i) & info->s->state.key_map) + if (mi_is_key_active(info->s->state.key_map, i)) { if (info->s->keyinfo[i].flag & HA_FULLTEXT) { @@ -1534,7 +1535,7 @@ int movepoint(register MI_INFO *info, byte *record, my_off_t oldpos, key=info->lastkey+info->s->base.max_key_length; for (i=0 ; i < info->s->base.keys; i++) { - if (i != prot_key && (((ulonglong) 1 << i) & info->s->state.key_map)) + if (i != prot_key && mi_is_key_active(info->s->state.key_map, i)) { key_length=_mi_make_key(info,i,key,record,oldpos); if (info->s->keyinfo[i].flag & HA_NOSAME) @@ -1633,7 +1634,7 @@ int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name) for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; key++,keyinfo++) { - if (!(((ulonglong) 1 << key) & share->state.key_map)) + if (! mi_is_key_active(info->s->state.key_map, key)) continue; if (share->state.key_root[key] != HA_OFFSET_ERROR) @@ -2028,7 +2029,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, sort_param.read_cache=param->read_cache; sort_param.keyinfo=share->keyinfo+sort_param.key; sort_param.seg=sort_param.keyinfo->seg; - if (!(((ulonglong) 1 << sort_param.key) & key_map)) + if (! mi_is_key_active(key_map, sort_param.key)) { /* Remember old statistics for key */ memcpy((char*) rec_per_key_part, @@ -2049,7 +2050,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, sort_param.key_length+=keyseg[i].length; if (keyseg[i].flag & HA_SPACE_PACK) sort_param.key_length+=get_pack_length(keyseg[i].length); - if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH)) + if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) sort_param.key_length+=2 + test(keyseg[i].length >= 127); if (keyseg[i].flag & HA_NULL_PART) sort_param.key_length++; @@ -2089,7 +2090,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, if (param->testflag & T_STATISTICS) update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, (ulonglong) info->state->records); - share->state.key_map|=(ulonglong) 1 << sort_param.key; + mi_set_key_active(share->state.key_map, sort_param.key); if (sort_param.fix_datafile) { @@ -2410,7 +2411,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, sort_param[i].key=key; sort_param[i].keyinfo=share->keyinfo+key; sort_param[i].seg=sort_param[i].keyinfo->seg; - if (!(((ulonglong) 1 << key) & key_map)) + if (! mi_is_key_active(key_map, key)) { /* Remember old statistics for key */ memcpy((char*) rec_per_key_part, @@ -2458,7 +2459,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, sort_param[i].key_length+=keyseg->length; if (keyseg->flag & HA_SPACE_PACK) sort_param[i].key_length+=get_pack_length(keyseg->length); - if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH)) + if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) sort_param[i].key_length+=2 + test(keyseg->length >= 127); if (keyseg->flag & HA_NULL_PART) sort_param[i].key_length++; @@ -3925,8 +3926,7 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, { byte *record; if (!info->s->base.auto_key || - !(((ulonglong) 1 << (info->s->base.auto_key-1) - & info->s->state.key_map))) + ! mi_is_key_active(info->s->state.key_map, info->s->base.auto_key - 1)) { if (!(param->testflag & T_VERY_SILENT)) mi_check_print_info(param, @@ -4079,7 +4079,7 @@ void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows) if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) && ! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1) { - share->state.key_map&= ~ ((ulonglong) 1 << i); + mi_clear_key_active(share->state.key_map, i); info->update|= HA_STATE_CHANGED; } } @@ -4103,7 +4103,7 @@ my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, mi_repair_by_sort only works if we have at least one key. If we don't have any keys, we should use the normal repair. */ - if (!key_map) + if (! mi_is_any_key_active(key_map)) return FALSE; /* Can't use sort */ for (i=0 ; i < share->base.keys ; i++,key++) { diff --git a/myisam/mi_checksum.c b/myisam/mi_checksum.c index 95338434211..33a51068fb0 100644 --- a/myisam/mi_checksum.c +++ b/myisam/mi_checksum.c @@ -40,8 +40,12 @@ ha_checksum mi_checksum(MI_INFO *info, const byte *buf) } case FIELD_VARCHAR: { - length=uint2korr(buf); - pos=buf+2; + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length-1); + if (pack_length == 1) + length= (ulong) *(uchar*) buf; + else + length= uint2korr(buf); + pos= buf+pack_length; break; } default: diff --git a/myisam/mi_create.c b/myisam/mi_create.c index 41c965c7c80..6d4106afda5 100644 --- a/myisam/mi_create.c +++ b/myisam/mi_create.c @@ -43,7 +43,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, myf create_flag; uint fields,length,max_key_length,packed,pointer,real_length_diff, key_length,info_length,key_segs,options,min_key_length_skip, - base_pos,varchar_count,long_varchar_count,varchar_length, + base_pos,long_varchar_count,varchar_length, max_key_block_length,unique_key_parts,fulltext_keys,offset; ulong reclength, real_reclength,min_pack_length; char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr; @@ -99,7 +99,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, /* Start by checking fields and field-types used */ - reclength=varchar_count=varchar_length=long_varchar_count=packed= + reclength=varchar_length=long_varchar_count=packed= min_pack_length=pack_reclength=0; for (rec=recinfo, fields=0 ; fields != columns ; @@ -130,14 +130,15 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, } else if (type == FIELD_VARCHAR) { - varchar_count++; - varchar_length+=rec->length-2; + varchar_length+= rec->length-1; /* Used for min_pack_length */ packed--; - pack_reclength+=1; - if (test(rec->length > 257)) - { /* May be packed on 3 bytes */ + pack_reclength++; + min_pack_length++; + /* We must test for 257 as length includes pack-length */ + if (test(rec->length >= 257)) + { long_varchar_count++; - pack_reclength+=2; + pack_reclength+= 2; /* May be packed on 3 bytes */ } } else if (type != FIELD_SKIP_ZERO) @@ -169,12 +170,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, /* We can't use checksum with static length rows */ if (!(options & HA_OPTION_PACK_RECORD)) options&= ~HA_OPTION_CHECKSUM; - if (options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) - min_pack_length+=varchar_count; /* Min length to pack */ - else - { - min_pack_length+=varchar_length+2*varchar_count; - } + if (!(options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) + min_pack_length+= varchar_length; if (flags & HA_CREATE_TMP_TABLE) { options|= HA_OPTION_TMP_TABLE; @@ -222,7 +219,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, reclength=pointer+1; /* reserve place for delete link */ } else - reclength+=long_varchar_count; /* We need space for this! */ + reclength+= long_varchar_count; /* We need space for varchar! */ max_key_length=0; tot_length=0 ; key_segs=0; fulltext_keys=0; @@ -265,7 +262,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, j++, keyseg++) { if (keyseg->type != HA_KEYTYPE_BINARY && - keyseg->type != HA_KEYTYPE_VARBINARY) + keyseg->type != HA_KEYTYPE_VARBINARY1 && + keyseg->type != HA_KEYTYPE_VARBINARY2) { my_errno=HA_WRONG_CREATE_OPTION; goto err; @@ -280,8 +278,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, goto err; #endif /*HAVE_SPATIAL*/ } - else - if (keydef->flag & HA_FULLTEXT) + else if (keydef->flag & HA_FULLTEXT) { keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY; options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ @@ -290,11 +287,22 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, j++, keyseg++) { if (keyseg->type != HA_KEYTYPE_TEXT && - keyseg->type != HA_KEYTYPE_VARTEXT) + keyseg->type != HA_KEYTYPE_VARTEXT1 && + keyseg->type != HA_KEYTYPE_VARTEXT2) { my_errno=HA_WRONG_CREATE_OPTION; goto err; } + if (!(keyseg->flag & HA_BLOB_PART) && + (keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARTEXT2)) + { + /* Make a flag that this is a VARCHAR */ + keyseg->flag|= HA_VAR_LENGTH_PART; + /* Store in bit_start number of bytes used to pack the length */ + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1)? + 1 : 2); + } } fulltext_keys++; @@ -315,7 +323,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, /* Only use HA_PACK_KEY when first segment is a variable length key */ if (!(keydef->seg[0].flag & (HA_SPACE_PACK | HA_BLOB_PART | - HA_VAR_LENGTH))) + HA_VAR_LENGTH_PART))) { /* pack relative to previous key */ keydef->flag&= ~HA_PACK_KEY; @@ -349,12 +357,27 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, case HA_KEYTYPE_UINT24: case HA_KEYTYPE_INT8: keyseg->flag|= HA_SWAP_KEY; - /* fall through */ + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + if (!(keyseg->flag & HA_BLOB_PART)) + { + /* Make a flag that this is a VARCHAR */ + keyseg->flag|= HA_VAR_LENGTH_PART; + /* Store in bit_start number of bytes used to pack the length */ + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARBINARY1) ? + 1 : 2); + } + break; default: break; } if (keyseg->flag & HA_SPACE_PACK) { + DBUG_ASSERT(!(keyseg->flag & HA_VAR_LENGTH_PART)); keydef->flag |= HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY; options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ length++; /* At least one length byte */ @@ -365,8 +388,10 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, length+=2; } } - if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART)) + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) { + DBUG_ASSERT(!test_all_bits(keyseg->flag, + (HA_VAR_LENGTH_PART | HA_BLOB_PART))); keydef->flag|=HA_VAR_LENGTH_KEY; length++; /* At least one length byte */ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ @@ -485,7 +510,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, mi_int2store(share.state.header.key_parts,key_segs); mi_int2store(share.state.header.unique_key_parts,unique_key_parts); - share.state.key_map = ((ulonglong) 1 << keys)-1; + mi_set_all_keys_active(share.state.key_map, keys); share.base.keystart = share.state.state.key_file_length= MY_ALIGN(info_length, myisam_block_size); share.base.max_key_block_length=max_key_block_length; @@ -621,10 +646,12 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, { HA_KEYSEG sseg; sseg.type=SPTYPE; - sseg.language= 7; + sseg.language= 7; /* Binary */ sseg.null_bit=0; sseg.bit_start=0; sseg.bit_end=0; + sseg.bit_length= 0; + sseg.bit_pos= 0; sseg.length=SPLEN; sseg.null_pos=0; sseg.start=j*SPLEN; @@ -642,7 +669,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, { tmp_keydef.keysegs=1; tmp_keydef.flag= HA_UNIQUE_CHECK; - tmp_keydef.block_length= myisam_block_size; + tmp_keydef.block_length= (uint16)myisam_block_size; tmp_keydef.keylength= MI_UNIQUE_HASH_LENGTH + pointer; tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength; tmp_keyseg.type= MI_UNIQUE_HASH_TYPE; @@ -657,11 +684,31 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, /* Save unique definition */ for (i=0 ; i < share.state.header.uniques ; i++) { + HA_KEYSEG *keyseg_end; + keyseg= uniquedefs[i].seg; if (mi_uniquedef_write(file, &uniquedefs[i])) goto err; - for (j=0 ; j < uniquedefs[i].keysegs ; j++) + for (keyseg= uniquedefs[i].seg, keyseg_end= keyseg+ uniquedefs[i].keysegs; + keyseg < keyseg_end; + keyseg++) { - if (mi_keyseg_write(file, &uniquedefs[i].seg[j])) + switch (keyseg->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + if (!(keyseg->flag & HA_BLOB_PART)) + { + keyseg->flag|= HA_VAR_LENGTH_PART; + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARBINARY1) ? + 1 : 2); + } + break; + default: + break; + } + if (mi_keyseg_write(file, keyseg)) goto err; } } diff --git a/myisam/mi_dbug.c b/myisam/mi_dbug.c index 34105c490e4..ddc8a403a33 100644 --- a/myisam/mi_dbug.c +++ b/myisam/mi_dbug.c @@ -131,9 +131,21 @@ void _mi_print_key(FILE *stream, register HA_KEYSEG *keyseg, key=end; break; } + case HA_KEYTYPE_BIT: + { + uint i; + fputs("0x",stream); + for (i=0 ; i < keyseg->length ; i++) + fprintf(stream, "%02x", (uint) *key++); + key= end; + break; + } + #endif - case HA_KEYTYPE_VARTEXT: /* VARCHAR and TEXT */ - case HA_KEYTYPE_VARBINARY: /* VARBINARY and BLOB */ + case HA_KEYTYPE_VARTEXT1: /* VARCHAR and TEXT */ + case HA_KEYTYPE_VARTEXT2: /* VARCHAR and TEXT */ + case HA_KEYTYPE_VARBINARY1: /* VARBINARY and BLOB */ + case HA_KEYTYPE_VARBINARY2: /* VARBINARY and BLOB */ { uint tmp_length; get_key_length(tmp_length,key); diff --git a/myisam/mi_delete.c b/myisam/mi_delete.c index b964cb35dd8..60a07254e82 100644 --- a/myisam/mi_delete.c +++ b/myisam/mi_delete.c @@ -45,6 +45,12 @@ int mi_delete(MI_INFO *info,const byte *record) /* Test if record is in datafile */ + DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_usage", + mi_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + DBUG_EXECUTE_IF("my_error_test_undefined_error", + mi_print_error(info->s, INT_MAX); + DBUG_RETURN(my_errno= INT_MAX);); if (!(info->update & HA_STATE_AKTIV)) { DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No database read */ @@ -68,7 +74,7 @@ int mi_delete(MI_INFO *info,const byte *record) old_key=info->lastkey2; for (i=0 ; i < share->base.keys ; i++ ) { - if (((ulonglong) 1 << i) & info->s->state.key_map) + if (mi_is_key_active(info->s->state.key_map, i)) { info->s->keyinfo[i].version++; if (info->s->keyinfo[i].flag & HA_FULLTEXT ) @@ -109,13 +115,19 @@ err: mi_sizestore(lastpos,info->lastpos); myisam_log_command(MI_LOG_DELETE,info,(byte*) lastpos, sizeof(lastpos),0); if (save_errno != HA_ERR_RECORD_CHANGED) + { + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* mark table crashed */ + } VOID(_mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); info->update|=HA_STATE_WRITTEN; /* Buffer changed */ allow_break(); /* Allow SIGHUP & SIGINT */ my_errno=save_errno; if (save_errno == HA_ERR_KEY_NOT_FOUND) + { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; + } DBUG_RETURN(my_errno); } /* mi_delete */ @@ -142,6 +154,7 @@ static int _mi_ck_real_delete(register MI_INFO *info, MI_KEYDEF *keyinfo, if ((old_root=*root) == HA_OFFSET_ERROR) { + mi_print_error(info->s, HA_ERR_CRASHED); DBUG_RETURN(my_errno=HA_ERR_CRASHED); } if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ @@ -253,7 +266,12 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, my_off_t root; uchar *kpos=keypos; - tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey); + if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey))) + { + mi_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + DBUG_RETURN(-1); + } root=_mi_dpos(info,nod_flag,kpos); if (subkeys == -1) { @@ -302,6 +320,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (!nod_flag) { DBUG_PRINT("error",("Didn't find key")); + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; /* This should newer happend */ goto err; } @@ -313,13 +332,10 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { /* Found key */ uint tmp; length=mi_getint(anc_buff); - tmp=remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length, - &next_block); - if (tmp == 0) - { - DBUG_PRINT("exit",("Return: %d",0)); - DBUG_RETURN(0); - } + if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length, + &next_block))) + goto err; + length-= tmp; mi_putint(anc_buff,length,nod_flag); @@ -368,6 +384,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, my_afree((byte*) leaf_buff); DBUG_PRINT("exit",("Return: %d",ret_value)); DBUG_RETURN(ret_value); + err: my_afree((byte*) leaf_buff); DBUG_PRINT("exit",("Error: %d",my_errno)); @@ -559,10 +576,10 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, /* remove key from anc_buff */ - s_length=remove_key(keyinfo,key_reflength,keypos,anc_key, - anc_buff+anc_length,(my_off_t *) 0); - if (!s_length) + if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key, + anc_buff+anc_length,(my_off_t *) 0))) goto err; + anc_length-=s_length; mi_putint(anc_buff,anc_length,key_reflength); @@ -668,10 +685,10 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, mi_putint(buff,buff_length,nod_flag); /* remove key from anc_buff */ - s_length=remove_key(keyinfo,key_reflength,keypos,anc_key, - anc_buff+anc_length,(my_off_t *) 0); - if (!s_length) + if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key, + anc_buff+anc_length,(my_off_t *) 0))) goto err; + anc_length-=s_length; mi_putint(anc_buff,anc_length,key_reflength); @@ -731,6 +748,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (_mi_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) goto err; DBUG_RETURN(anc_length <= (uint) keyinfo->block_length/2); + err: DBUG_RETURN(-1); } /* underflow */ @@ -768,6 +786,7 @@ static uint remove_key(MI_KEYDEF *keyinfo, uint nod_flag, /* Calculate length of key */ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey)) DBUG_RETURN(0); /* Error */ + if (next_block && nod_flag) *next_block= _mi_kpos(nod_flag,keypos); s_length=(int) (keypos-start); diff --git a/myisam/mi_dynrec.c b/myisam/mi_dynrec.c index 43783ca2d36..8de500a7351 100644 --- a/myisam/mi_dynrec.c +++ b/myisam/mi_dynrec.c @@ -149,7 +149,9 @@ static int write_dynamic_record(MI_INFO *info, const byte *record, { if (_mi_find_writepos(info,reclength,&filepos,&length)) goto err; - if (_mi_write_part_record(info,filepos,length,info->s->state.dellink, + if (_mi_write_part_record(info,filepos,length, + (info->append_insert_at_end ? + HA_OFFSET_ERROR : info->s->state.dellink), (byte**) &record,&reclength,&flag)) goto err; } while (reclength); @@ -171,7 +173,8 @@ static int _mi_find_writepos(MI_INFO *info, ulong tmp; DBUG_ENTER("_mi_find_writepos"); - if (info->s->state.dellink != HA_OFFSET_ERROR) + if (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) { /* Deleted blocks exists; Get last used block */ *filepos=info->s->state.dellink; @@ -420,8 +423,9 @@ int _mi_write_part_record(MI_INFO *info, else if (length-long_block < *reclength+4) { /* To short block */ if (next_filepos == HA_OFFSET_ERROR) - next_filepos=info->s->state.dellink != HA_OFFSET_ERROR ? - info->s->state.dellink : info->state->data_file_length; + next_filepos= (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end ? + info->s->state.dellink : info->state->data_file_length); if (*flag == 0) /* First block */ { if (*reclength > MI_MAX_BLOCK_LENGTH) @@ -768,11 +772,21 @@ uint _mi_rec_pack(MI_INFO *info, register byte *to, register const byte *from) } else if (type == FIELD_VARCHAR) { - uint tmp_length=uint2korr(from); - store_key_length_inc(to,tmp_length); - memcpy(to,from+2,tmp_length); - to+=tmp_length; - continue; + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1); + uint tmp_length; + if (pack_length == 1) + { + tmp_length= (uint) *(uchar*) from; + *to++= *from; + } + else + { + tmp_length= uint2korr(from); + store_key_length_inc(to,tmp_length); + } + memcpy(to, from+pack_length,tmp_length); + to+= tmp_length; + continue; } else { @@ -878,9 +892,20 @@ my_bool _mi_rec_check(MI_INFO *info,const char *record, byte *rec_buff, } else if (type == FIELD_VARCHAR) { - uint tmp_length=uint2korr(record); - to+=get_pack_length(tmp_length)+tmp_length; - continue; + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1); + uint tmp_length; + if (pack_length == 1) + { + tmp_length= (uint) *(uchar*) record; + to+= 1+ tmp_length; + continue; + } + else + { + tmp_length= uint2korr(record); + to+= get_pack_length(tmp_length)+tmp_length; + } + continue; } else { @@ -894,9 +919,7 @@ my_bool _mi_rec_check(MI_INFO *info,const char *record, byte *rec_buff, } } else - { - to+=length; - } + to+= length; } if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) || (bit != 1 && (flag & ~(bit - 1)))) @@ -944,13 +967,27 @@ ulong _mi_rec_unpack(register MI_INFO *info, register byte *to, byte *from, { if (type == FIELD_VARCHAR) { - get_key_length(length,from); - if (length > rec_length-2) - goto err; - int2store(to,length); - memcpy(to+2,from,length); - from+=length; - continue; + uint pack_length= HA_VARCHAR_PACKLENGTH(rec_length-1); + if (pack_length == 1) + { + length= (uint) *(uchar*) from; + if (length > rec_length-1) + goto err; + *to= *from++; + } + else + { + get_key_length(length, from); + if (length > rec_length-2) + goto err; + int2store(to,length); + } + if (from+length > from_end) + goto err; + memcpy(to+pack_length, from, length); + from+= length; + min_pack_length--; + continue; } if (flag & bit) { @@ -1018,15 +1055,17 @@ ulong _mi_rec_unpack(register MI_INFO *info, register byte *to, byte *from, if (min_pack_length > (uint) (from_end - from)) goto err; min_pack_length-=rec_length; - memcpy(to,(byte*) from,(size_t) rec_length); from+=rec_length; + memcpy(to, (byte*) from, (size_t) rec_length); + from+=rec_length; } } if (info->s->calc_checksum) from++; if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1)))) DBUG_RETURN(found_length); + err: - my_errno=HA_ERR_RECORD_DELETED; + my_errno= HA_ERR_WRONG_IN_RECORD; DBUG_PRINT("error",("to_end: %lx -> %lx from_end: %lx -> %lx", to,to_end,from,from_end)); DBUG_DUMP("from",(byte*) info->rec_buff,info->s->base.min_pack_length); diff --git a/myisam/mi_extra.c b/myisam/mi_extra.c index 1827aed98c3..7c0dd13b870 100644 --- a/myisam/mi_extra.c +++ b/myisam/mi_extra.c @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "myisamdef.h" -#ifdef HAVE_MMAP +#ifdef HAVE_SYS_MMAN_H #include <sys/mman.h> #endif @@ -186,7 +186,10 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) if (info->opt_flag & WRITE_CACHE_USED) { if ((error=flush_io_cache(&info->rec_cache))) + { + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* Fatal error found */ + } } break; case HA_EXTRA_NO_READCHECK: @@ -241,7 +244,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) error=1; /* Not possibly if not lock */ break; } - if (share->state.key_map) + if (mi_is_any_key_active(share->state.key_map)) { MI_KEYDEF *key=share->keyinfo; uint i; @@ -249,7 +252,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) { if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1) { - share->state.key_map&= ~ ((ulonglong) 1 << i); + mi_clear_key_active(share->state.key_map, i); info->update|= HA_STATE_CHANGED; } } @@ -285,6 +288,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) { error=my_errno; share->changed=1; + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* Fatal error found */ } if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) @@ -339,6 +343,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) if (error) { share->changed=1; + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* Fatal error found */ } } diff --git a/myisam/mi_info.c b/myisam/mi_info.c index cf63ef63618..bdece9c2ee3 100644 --- a/myisam/mi_info.c +++ b/myisam/mi_info.c @@ -105,3 +105,36 @@ int mi_status(MI_INFO *info, register MI_ISAMINFO *x, uint flag) } DBUG_RETURN(0); } + + +/* + Write a message to the error log. + + SYNOPSIS + mi_report_error() + file_name Name of table file (e.g. index_file_name). + errcode Error number. + + DESCRIPTION + This function supplies my_error() with a table name. Most error + messages need one. Since string arguments in error messages are limited + to 64 characters by convention, we ensure that in case of truncation, + that the end of the index file path is in the message. This contains + the most valuable information (the table name and the database name). + + RETURN + void +*/ + +void mi_report_error(int errcode, const char *file_name) +{ + size_t lgt; + DBUG_ENTER("mi_report_error"); + DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name)); + + if ((lgt= strlen(file_name)) > 64) + file_name+= lgt - 64; + my_error(errcode, MYF(ME_NOREFRESH), file_name); + DBUG_VOID_RETURN; +} + diff --git a/myisam/mi_key.c b/myisam/mi_key.c index 9df22889b22..ae50900a190 100644 --- a/myisam/mi_key.c +++ b/myisam/mi_key.c @@ -34,10 +34,20 @@ static int _mi_put_key_in_record(MI_INFO *info,uint keynr,byte *record); - /* - ** Make a intern key from a record - ** Ret: Length of key - */ +/* + Make a intern key from a record + + SYNOPSIS + _mi_make_key() + info MyiSAM handler + keynr key number + key Store created key here + record Record + filepos Position to record in the data file + + RETURN + Length of key +*/ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, const byte *record, my_off_t filepos) @@ -82,6 +92,19 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, length); pos= (byte*) record+keyseg->start; + if (type == HA_KEYTYPE_BIT) + { + if (keyseg->bit_length) + { + uchar bits= get_rec_bits((uchar*) record + keyseg->bit_pos, + keyseg->bit_start, keyseg->bit_length); + *key++= bits; + length--; + } + memcpy((byte*) key, pos, length); + key+= length; + continue; + } if (keyseg->flag & HA_SPACE_PACK) { end= pos + length; @@ -102,10 +125,12 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, key+=char_length; continue; } - if (keyseg->flag & HA_VAR_LENGTH) + if (keyseg->flag & HA_VAR_LENGTH_PART) { - uint tmp_length=uint2korr(pos); - pos+=2; /* Skip VARCHAR length */ + uint pack_length= keyseg->bit_start; + uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : + uint2korr(pos)); + pos+= pack_length; /* Skip VARCHAR length */ set_if_smaller(length,tmp_length); FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); @@ -161,7 +186,7 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, FIX_LENGTH(cs, pos, length, char_length); memcpy((byte*) key, pos, char_length); if (length > char_length) - cs->cset->fill(cs, key+char_length, length-char_length, ' '); + cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); key+= length; } _mi_dpointer(info,key,filepos); @@ -216,8 +241,11 @@ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old, if (!(*key++= (char) 1-*old++)) /* Copy null marker */ { k_length-=length; - if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART)) + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { k_length-=2; /* Skip length */ + old+= 2; + } continue; /* Found NULL */ } } @@ -244,7 +272,7 @@ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old, key+= char_length; continue; } - else if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART)) + else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) { /* Length of key-part used with mi_rkey() always 2 */ uint tmp_length=uint2korr(pos); @@ -271,7 +299,7 @@ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old, FIX_LENGTH(cs, pos, length, char_length); memcpy((byte*) key, pos, char_length); if (length > char_length) - cs->cset->fill(cs,key+char_length, length-char_length, ' '); + cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); key+= length; k_length-=length; } @@ -344,6 +372,26 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr, } record[keyseg->null_pos]&= ~keyseg->null_bit; } + if (keyseg->type == HA_KEYTYPE_BIT) + { + uint length= keyseg->length; + + if (keyseg->bit_length) + { + uchar bits= *key++; + set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start, + keyseg->bit_length); + length--; + } + else + { + clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start, + keyseg->bit_length); + } + memcpy(record + keyseg->start, (byte*) key, length); + key+= length; + continue; + } if (keyseg->flag & HA_SPACE_PACK) { uint length; @@ -367,7 +415,7 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr, continue; } - if (keyseg->flag & HA_VAR_LENGTH) + if (keyseg->flag & HA_VAR_LENGTH_PART) { uint length; get_key_length(length,key); @@ -375,7 +423,13 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr, if (length > keyseg->length || key+length > key_end) goto err; #endif - memcpy(record+keyseg->start,(byte*) key, length); + /* Store key length */ + if (keyseg->bit_start == 1) + *(uchar*) (record+keyseg->start)= (uchar) length; + else + int2store(record+keyseg->start, length); + /* And key data */ + memcpy(record+keyseg->start + keyseg->bit_start, (byte*) key, length); key+= length; } else if (keyseg->flag & HA_BLOB_PART) @@ -437,6 +491,7 @@ int _mi_read_key_record(MI_INFO *info, my_off_t filepos, byte *buf) { /* Read only key */ if (_mi_put_key_in_record(info,(uint) info->lastinx,buf)) { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return -1; } diff --git a/myisam/mi_keycache.c b/myisam/mi_keycache.c index 99a2fd6db15..fb13f3703a2 100644 --- a/myisam/mi_keycache.c +++ b/myisam/mi_keycache.c @@ -79,6 +79,7 @@ int mi_assign_to_key_cache(MI_INFO *info, if (flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE)) { error= my_errno; + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* Mark that table must be checked */ } diff --git a/myisam/mi_locking.c b/myisam/mi_locking.c index 66950f62321..8d48c5242e5 100644 --- a/myisam/mi_locking.c +++ b/myisam/mi_locking.c @@ -66,6 +66,7 @@ int mi_lock_database(MI_INFO *info, int lock_type) share->kfile,FLUSH_KEEP)) { error=my_errno; + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* Mark that table must be checked */ } if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) @@ -73,6 +74,7 @@ int mi_lock_database(MI_INFO *info, int lock_type) if (end_io_cache(&info->rec_cache)) { error=my_errno; + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); } } @@ -98,7 +100,10 @@ int mi_lock_database(MI_INFO *info, int lock_type) else share->not_flushed=1; if (error) + { + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); + } } if (info->lock_type != F_EXTRA_LCK) { @@ -233,13 +238,24 @@ int mi_lock_database(MI_INFO *info, int lock_type) The following functions are called by thr_lock() in threaded applications ****************************************************************************/ -void mi_get_status(void* param) +/* + Create a copy of the current status for the table + + SYNOPSIS + mi_get_status() + param Pointer to Myisam handler + concurrent_insert Set to 1 if we are going to do concurrent inserts + (THR_WRITE_CONCURRENT_INSERT was used) +*/ + +void mi_get_status(void* param, int concurrent_insert) { MI_INFO *info=(MI_INFO*) param; DBUG_ENTER("mi_get_status"); - DBUG_PRINT("info",("key_file: %ld data_file: %ld", + DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d", (long) info->s->state.state.key_file_length, - (long) info->s->state.state.data_file_length)); + (long) info->s->state.state.data_file_length, + concurrent_insert)); #ifndef DBUG_OFF if (info->state->key_file_length > info->s->state.state.key_file_length || info->state->data_file_length > info->s->state.state.data_file_length) @@ -249,9 +265,11 @@ void mi_get_status(void* param) #endif info->save_state=info->s->state.state; info->state= &info->save_state; + info->append_insert_at_end= concurrent_insert; DBUG_VOID_RETURN; } + void mi_update_status(void* param) { MI_INFO *info=(MI_INFO*) param; @@ -276,6 +294,7 @@ void mi_update_status(void* param) info->s->state.state= *info->state; info->state= &info->s->state.state; } + info->append_insert_at_end= 0; /* We have to flush the write cache here as other threads may start @@ -285,6 +304,7 @@ void mi_update_status(void* param) { if (end_io_cache(&info->rec_cache)) { + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); } info->opt_flag&= ~WRITE_CACHE_USED; @@ -301,20 +321,37 @@ void mi_copy_status(void* to,void *from) Check if should allow concurrent inserts IMPLEMENTATION - Don't allow concurrent inserts if we have a hole in the table. + Allow concurrent inserts if we don't have a hole in the table or + if there is no active write lock and there is active read locks and + myisam_concurrent_insert == 2. In this last case the new + row('s) are inserted at end of file instead of filling up the hole. + + The last case is to allow one to inserts into a heavily read-used table + even if there is holes. NOTES - Rtree indexes are disabled in mi_open() + If there is a an rtree indexes in the table, concurrent inserts are + disabled in mi_open() RETURN 0 ok to use concurrent inserts 1 not ok */ -my_bool mi_check_status(void* param) +my_bool mi_check_status(void *param) { MI_INFO *info=(MI_INFO*) param; - return (my_bool) (info->s->state.dellink != HA_OFFSET_ERROR); + /* + The test for w_locks == 1 is here because this thread has already done an + external lock (in other words: w_locks == 1 means no other threads has + a write lock) + */ + DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u", + (long) info->s->state.dellink, (uint) info->s->r_locks, + (uint) info->s->w_locks)); + return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR || + (myisam_concurrent_insert == 2 && info->s->r_locks && + info->s->w_locks == 1)); } diff --git a/myisam/mi_open.c b/myisam/mi_open.c index a5b303f86d4..82663e0c318 100644 --- a/myisam/mi_open.c +++ b/myisam/mi_open.c @@ -106,6 +106,12 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) share_buff.state.key_del=key_del; share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff)); + DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_open", + if (strstr(name, "/t1")) + { + my_errno= HA_ERR_CRASHED; + goto err; + }); if ((kfile=my_open(name_buff,(open_mode=O_RDWR) | O_SHARE,MYF(0))) < 0) { if ((errno != EROFS && errno != EACCES) || @@ -186,14 +192,14 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) } share->state_diff_length=len-MI_STATE_INFO_SIZE; - mi_state_info_read(disk_cache, &share->state); + mi_state_info_read((uchar*) disk_cache, &share->state); len= mi_uint2korr(share->state.header.base_info_length); if (len != MI_BASE_INFO_SIZE) { DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d", len,MI_BASE_INFO_SIZE)) } - disk_pos=my_n_base_info_read(disk_cache+base_pos, &share->base); + disk_pos=my_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); share->state.state_length=base_pos; if (!(open_flags & HA_OPEN_FOR_REPAIR) && @@ -302,6 +308,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) HA_KEYSEG *pos=share->keyparts; for (i=0 ; i < keys ; i++) { + share->keyinfo[i].share= share; disk_pos=mi_keydef_read(disk_pos, &share->keyinfo[i]); disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE, end_pos); @@ -313,7 +320,9 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) { disk_pos=mi_keyseg_read(disk_pos, pos); - if (pos->type == HA_KEYTYPE_TEXT || pos->type == HA_KEYTYPE_VARTEXT) + if (pos->type == HA_KEYTYPE_TEXT || + pos->type == HA_KEYTYPE_VARTEXT1 || + pos->type == HA_KEYTYPE_VARTEXT2) { if (!pos->language) pos->charset=default_charset_info; @@ -388,7 +397,9 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++) { disk_pos=mi_keyseg_read(disk_pos, pos); - if (pos->type == HA_KEYTYPE_TEXT || pos->type == HA_KEYTYPE_VARTEXT) + if (pos->type == HA_KEYTYPE_TEXT || + pos->type == HA_KEYTYPE_VARTEXT1 || + pos->type == HA_KEYTYPE_VARTEXT2) { if (!pos->language) pos->charset=default_charset_info; @@ -596,6 +607,10 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) err: save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE; + if ((save_errno == HA_ERR_CRASHED) || + (save_errno == HA_ERR_CRASHED_ON_USAGE) || + (save_errno == HA_ERR_CRASHED_ON_REPAIR)) + mi_report_error(save_errno, name); switch (errpos) { case 6: my_free((gptr) m_info,MYF(0)); @@ -848,7 +863,7 @@ uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite) } -char *mi_state_info_read(char *ptr, MI_STATE_INFO *state) +char *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state) { uint i,keys,key_parts,key_blocks; memcpy_fixed(&state->header,ptr, sizeof(state->header)); @@ -914,7 +929,7 @@ uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead) } else if (my_read(file, buff, state->state_length,MYF(MY_NABP))) return (MY_FILE_ERROR); - mi_state_info_read(buff, state); + mi_state_info_read((uchar*) buff, state); } return 0; } @@ -959,7 +974,7 @@ uint mi_base_info_write(File file, MI_BASE_INFO *base) } -char *my_n_base_info_read(char *ptr, MI_BASE_INFO *base) +char *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base) { base->keystart = mi_sizekorr(ptr); ptr +=8; base->max_data_file_length = mi_sizekorr(ptr); ptr +=8; @@ -1042,18 +1057,21 @@ int mi_keyseg_write(File file, const HA_KEYSEG *keyseg) { uchar buff[HA_KEYSEG_SIZE]; uchar *ptr=buff; - - *ptr++ =keyseg->type; - *ptr++ =keyseg->language; - *ptr++ =keyseg->null_bit; - *ptr++ =keyseg->bit_start; - *ptr++ =keyseg->bit_end; - *ptr++ =0; /* Not used */ + ulong pos; + + *ptr++= keyseg->type; + *ptr++= keyseg->language; + *ptr++= keyseg->null_bit; + *ptr++= keyseg->bit_start; + *ptr++= keyseg->bit_end; + *ptr++= keyseg->bit_length; mi_int2store(ptr,keyseg->flag); ptr+=2; mi_int2store(ptr,keyseg->length); ptr+=2; mi_int4store(ptr,keyseg->start); ptr+=4; - mi_int4store(ptr,keyseg->null_pos); ptr+=4; - + pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos; + mi_int4store(ptr, pos); + ptr+=4; + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); } @@ -1065,12 +1083,19 @@ char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg) keyseg->null_bit = *ptr++; keyseg->bit_start = *ptr++; keyseg->bit_end = *ptr++; - ptr++; + keyseg->bit_length = *ptr++; keyseg->flag = mi_uint2korr(ptr); ptr +=2; keyseg->length = mi_uint2korr(ptr); ptr +=2; keyseg->start = mi_uint4korr(ptr); ptr +=4; keyseg->null_pos = mi_uint4korr(ptr); ptr +=4; keyseg->charset=0; /* Will be filled in later */ + if (keyseg->null_bit) + keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7)); + else + { + keyseg->bit_pos= (uint16)keyseg->null_pos; + keyseg->null_pos= 0; + } return ptr; } @@ -1179,7 +1204,7 @@ int mi_disable_indexes(MI_INFO *info) { MYISAM_SHARE *share= info->s; - share->state.key_map= 0; + mi_clear_all_keys_active(share->state.key_map); return 0; } @@ -1210,9 +1235,12 @@ int mi_enable_indexes(MI_INFO *info) if (share->state.state.data_file_length || (share->state.state.key_file_length != share->base.keystart)) + { + mi_print_error(info->s, HA_ERR_CRASHED); error= HA_ERR_CRASHED; + } else - share->state.key_map= ((ulonglong) 1L << share->base.keys) - 1; + mi_set_all_keys_active(share->state.key_map, share->base.keys); return error; } @@ -1237,6 +1265,6 @@ int mi_indexes_are_disabled(MI_INFO *info) { MYISAM_SHARE *share= info->s; - return (! share->state.key_map && share->base.keys); + return (! mi_is_any_key_active(share->state.key_map) && share->base.keys); } diff --git a/myisam/mi_packrec.c b/myisam/mi_packrec.c index 322420b71db..e242e9d506d 100644 --- a/myisam/mi_packrec.c +++ b/myisam/mi_packrec.c @@ -91,8 +91,10 @@ static void uf_zero(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff, uchar *to,uchar *end); static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to, uchar *end); -static void uf_varchar(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, - uchar *to, uchar *end); +static void uf_varchar1(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, + uchar *to, uchar *end); +static void uf_varchar2(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, + uchar *to, uchar *end); static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff, uchar *to,uchar *end); static uint decode_pos(MI_BIT_BUFF *bit_buff,MI_DECODE_TREE *decode_tree); @@ -415,8 +417,19 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end) } - /* Read record from datafile */ - /* Returns length of packed record, -1 if error */ +/* + Read record from datafile. + + SYNOPSIS + _mi_read_pack_record() + info A pointer to MI_INFO. + filepos File offset of the record. + buf RETURN The buffer to receive the record. + + RETURN + 0 on success + HA_ERR_WRONG_IN_RECORD or -1 on error +*/ int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf) { @@ -516,14 +529,16 @@ static void (*get_unpack_function(MI_COLUMNDEF *rec)) case FIELD_BLOB: return &uf_blob; case FIELD_VARCHAR: - return &uf_varchar; + if (rec->length <= 256) /* 255 + 1 byte length */ + return &uf_varchar1; + return &uf_varchar2; case FIELD_LAST: default: return 0; /* This should never happend */ } } - /* De different functions to unpack a field */ + /* The different functions to unpack a field */ static void uf_zerofill_skip_zero(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to, uchar *end) @@ -767,7 +782,22 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, } } -static void uf_varchar(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, + +static void uf_varchar1(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, + uchar *to, uchar *end __attribute__((unused))) +{ + if (get_bit(bit_buff)) + to[0]= 0; /* Zero lengths */ + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + *to= (uchar) length; + decode_bytes(rec,bit_buff,to+1,to+1+length); + } +} + + +static void uf_varchar2(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to, uchar *end __attribute__((unused))) { if (get_bit(bit_buff)) @@ -1137,11 +1167,12 @@ static uint max_bit(register uint value) /***************************************************************************** Some redefined functions to handle files when we are using memmap *****************************************************************************/ +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif #ifdef HAVE_MMAP -#include <sys/mman.h> - static int _mi_read_mempack_record(MI_INFO *info,my_off_t filepos,byte *buf); static int _mi_read_rnd_mempack_record(MI_INFO*, byte *,my_off_t, my_bool); @@ -1167,7 +1198,7 @@ my_bool _mi_memmap_file(MI_INFO *info) DBUG_RETURN(0); } file_map=(byte*) - mmap(0,share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN,PROT_READ, + my_mmap(0,(size_t)(share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN),PROT_READ, MAP_SHARED | MAP_NORESERVE,info->dfile,0L); if (file_map == (byte*) MAP_FAILED) { @@ -1186,7 +1217,7 @@ my_bool _mi_memmap_file(MI_INFO *info) void _mi_unmap_file(MI_INFO *info) { - VOID(munmap((caddr_t) info->s->file_map, + VOID(my_munmap(info->s->file_map, (size_t) info->s->state.state.data_file_length+ MEMMAP_EXTRA_MARGIN)); } diff --git a/myisam/mi_page.c b/myisam/mi_page.c index 16713c87e10..5240c063fba 100644 --- a/myisam/mi_page.c +++ b/myisam/mi_page.c @@ -40,6 +40,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo, { DBUG_PRINT("error",("Got errno: %d from key_cache_read",my_errno)); info->last_keypage=HA_OFFSET_ERROR; + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); } @@ -51,6 +52,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo, (ulong) page, page_size)); DBUG_DUMP("page", (char*) tmp, keyinfo->block_length); info->last_keypage = HA_OFFSET_ERROR; + mi_print_error(info->s, HA_ERR_CRASHED); my_errno = HA_ERR_CRASHED; tmp = 0; } diff --git a/myisam/mi_preload.c b/myisam/mi_preload.c index 317ab4ad7fe..d63399b519d 100644 --- a/myisam/mi_preload.c +++ b/myisam/mi_preload.c @@ -51,7 +51,7 @@ int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves) my_off_t pos= share->base.keystart; DBUG_ENTER("mi_preload"); - if (!keys || !key_map || key_file_length == pos) + if (!keys || !mi_is_any_key_active(key_map) || key_file_length == pos) DBUG_RETURN(0); block_length= keyinfo[0].block_length; diff --git a/myisam/mi_range.c b/myisam/mi_range.c index 1e0fd42334e..e78f3b11625 100644 --- a/myisam/mi_range.c +++ b/myisam/mi_range.c @@ -172,9 +172,9 @@ static double _mi_search_pos(register MI_INFO *info, if (flag == MI_FOUND_WRONG_KEY) DBUG_RETURN(-1); /* error */ /* - ** Didn't found match. keypos points at next (bigger) key - * Try to find a smaller, better matching key. - ** Matches keynr + [0-1] + Didn't found match. keypos points at next (bigger) key + Try to find a smaller, better matching key. + Matches keynr + [0-1] */ if (flag > 0 && ! nod_flag) offset= 1.0; @@ -185,8 +185,8 @@ static double _mi_search_pos(register MI_INFO *info, else { /* - ** Found match. Keypos points at the start of the found key - ** Matches keynr+1 + Found match. Keypos points at the start of the found key + Matches keynr+1 */ offset=1.0; /* Matches keynr+1 */ if ((nextflag & SEARCH_FIND) && nod_flag && @@ -194,8 +194,8 @@ static double _mi_search_pos(register MI_INFO *info, key_len != USE_WHOLE_KEY)) { /* - ** There may be identical keys in the tree. Try to match on of those. - ** Matches keynr + [0-1] + There may be identical keys in the tree. Try to match on of those. + Matches keynr + [0-1] */ if ((offset=_mi_search_pos(info,keyinfo,key,key_len,SEARCH_FIND, _mi_kpos(nod_flag,keypos))) < 0) @@ -213,7 +213,8 @@ err: /* Get keynummer of current key and max number of keys in nod */ -static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, uchar *keypos, uint *ret_max_key) +static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, + uchar *keypos, uint *ret_max_key) { uint nod_flag,keynr,max_key; uchar t_buff[MI_MAX_KEY_BUFF],*end; @@ -222,7 +223,7 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, u nod_flag=mi_test_if_nod(page); page+=2+nod_flag; - if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY| HA_BINARY_PACK_KEY))) + if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) { *ret_max_key= (uint) (end-page)/(keyinfo->keylength+nod_flag); return (uint) (keypos-page)/(keyinfo->keylength+nod_flag); diff --git a/myisam/mi_rkey.c b/myisam/mi_rkey.c index 9aa2be3c706..575a5e34488 100644 --- a/myisam/mi_rkey.c +++ b/myisam/mi_rkey.c @@ -81,6 +81,7 @@ int mi_rkey(MI_INFO *info, byte *buf, int inx, const byte *key, uint key_len, case HA_KEY_ALG_RTREE: if (rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0) { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; goto err; } diff --git a/myisam/mi_rsame.c b/myisam/mi_rsame.c index 56c8d1226ca..321097744b9 100644 --- a/myisam/mi_rsame.c +++ b/myisam/mi_rsame.c @@ -30,7 +30,7 @@ int mi_rsame(MI_INFO *info, byte *record, int inx) { DBUG_ENTER("mi_rsame"); - if (inx != -1 && ! (((ulonglong) 1 << inx) & info->s->state.key_map)) + if (inx != -1 && ! mi_is_key_active(info->s->state.key_map, inx)) { DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); } diff --git a/myisam/mi_rsamepos.c b/myisam/mi_rsamepos.c index a1d96fb7104..35cdd41e297 100644 --- a/myisam/mi_rsamepos.c +++ b/myisam/mi_rsamepos.c @@ -32,7 +32,7 @@ int mi_rsame_with_pos(MI_INFO *info, byte *record, int inx, my_off_t filepos) { DBUG_ENTER("mi_rsame_with_pos"); - if (inx < -1 || ! (((ulonglong) 1 << inx) & info->s->state.key_map)) + if (inx < -1 || ! mi_is_key_active(info->s->state.key_map, inx)) { DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); } diff --git a/myisam/mi_search.c b/myisam/mi_search.c index 82177d331b7..185f196a814 100644 --- a/myisam/mi_search.c +++ b/myisam/mi_search.c @@ -29,7 +29,7 @@ int _mi_check_index(MI_INFO *info, int inx) { if (inx == -1) /* Use last index */ inx=info->lastinx; - if (inx < 0 || ! (((ulonglong) 1 << inx) & info->s->state.key_map)) + if (inx < 0 || ! mi_is_key_active(info->s->state.key_map, inx)) { my_errno=HA_ERR_WRONG_INDEX; return -1; @@ -159,6 +159,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, DBUG_PRINT("exit",("found key at %lu",(ulong) info->lastpos)); DBUG_RETURN(0); + err: DBUG_PRINT("exit",("Error: %d",my_errno)); info->lastpos= HA_OFFSET_ERROR; @@ -256,6 +257,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff); if (length == 0 || page > end) { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_PRINT("error",("Found wrong key: length: %u page: %p end: %p", length, page, end)); @@ -395,7 +397,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, if (!(*from++)) continue; } - if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART | HA_SPACE_PACK)) + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) { get_key_length(l,from); } @@ -411,6 +413,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, if (page > end) { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_PRINT("error",("Found wrong key: length: %u page: %p end: %p", length, page, end)); @@ -461,7 +464,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, if (len < cmplen) { if ((keyinfo->seg->type != HA_KEYTYPE_TEXT && - keyinfo->seg->type != HA_KEYTYPE_VARTEXT)) + keyinfo->seg->type != HA_KEYTYPE_VARTEXT1 && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT2)) my_flag= -1; else { @@ -795,6 +799,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, { if (length > (uint) keyseg->length) { + mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; /* Error */ } @@ -810,6 +815,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, ("Found too long null packed key: %u of %u at %p", length, keyseg->length, *page_pos)); DBUG_DUMP("key",(char*) *page_pos,16); + mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; } @@ -866,6 +872,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_PRINT("error",("Found too long packed key: %u of %u at %p", length, keyseg->length, *page_pos)); DBUG_DUMP("key",(char*) *page_pos,16); + mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; /* Error */ } @@ -879,7 +886,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, continue; } if (keyseg->flag & - (HA_VAR_LENGTH | HA_BLOB_PART | HA_SPACE_PACK)) + (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) { uchar *tmp=page; get_key_length(length,tmp); @@ -931,6 +938,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_PRINT("error",("Found too long binary packed key: %u of %u at %p", length, keyinfo->maxlength, *page_pos)); DBUG_DUMP("key",(char*) *page_pos,16); + mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); /* Wrong key */ } @@ -954,7 +962,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, if (!(*key++ = *from++)) continue; /* Null part */ } - if (keyseg->flag & (HA_VAR_LENGTH | HA_BLOB_PART | HA_SPACE_PACK)) + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) { /* Get length of dynamic length key part */ if (from == from_end) { from=page; from_end=page_end; } @@ -992,6 +1000,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, if (from_end != page_end) { DBUG_PRINT("error",("Error when unpacking key")); + mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); /* Error */ } @@ -1026,6 +1035,7 @@ uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key); if (*return_key_length == 0) { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); } @@ -1063,6 +1073,7 @@ static my_bool _mi_get_prev_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key); if (*return_key_length == 0) { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(1); } @@ -1103,6 +1114,7 @@ uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, if (*return_key_length == 0) { DBUG_PRINT("error",("Couldn't find last key: page: %p", page)); + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); } @@ -1129,7 +1141,7 @@ uint _mi_keylength(MI_KEYDEF *keyinfo, register uchar *key) if (keyseg->flag & HA_NULL_PART) if (!*key++) continue; - if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH)) + if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART)) { uint length; get_key_length(length,key); @@ -1161,7 +1173,7 @@ uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key, if (keyseg->flag & HA_NULL_PART) if (!*key++) continue; - if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH)) + if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART)) { uint length; get_key_length(length,key); @@ -1293,8 +1305,10 @@ int _mi_search_first(register MI_INFO *info, register MI_KEYDEF *keyinfo, page=info->buff+2+nod_flag; } while ((pos=_mi_kpos(nod_flag,page)) != HA_OFFSET_ERROR); - info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page, - info->lastkey); + if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page, + info->lastkey))) + DBUG_RETURN(-1); /* Crashed */ + info->int_keypos=page; info->int_maxpos=info->buff+mi_getint(info->buff)-1; info->int_nod_flag=nod_flag; info->int_keytree_version=keyinfo->version; @@ -1429,7 +1443,8 @@ _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,uchar *next_key, sort_order=0; if ((keyinfo->flag & HA_FULLTEXT) && ((keyseg->type == HA_KEYTYPE_TEXT) || - (keyseg->type == HA_KEYTYPE_VARTEXT)) && + (keyseg->type == HA_KEYTYPE_VARTEXT1) || + (keyseg->type == HA_KEYTYPE_VARTEXT2)) && !use_strnxfrm(keyseg->charset)) sort_order=keyseg->charset->sort_order; diff --git a/myisam/mi_static.c b/myisam/mi_static.c index 9725c120f44..fc585eb5543 100644 --- a/myisam/mi_static.c +++ b/myisam/mi_static.c @@ -31,14 +31,13 @@ uchar NEAR myisam_pack_file_magic[]= my_string myisam_log_filename=(char*) "myisam.log"; File myisam_log_file= -1; uint myisam_quick_table_bits=9; -uint myisam_block_size=MI_KEY_BLOCK_LENGTH; /* Best by test */ +ulong myisam_block_size= MI_KEY_BLOCK_LENGTH; /* Best by test */ my_bool myisam_flush=0, myisam_delay_key_write=0, myisam_single_user=0; #if defined(THREAD) && !defined(DONT_USE_RW_LOCKS) -my_bool myisam_concurrent_insert=1; +ulong myisam_concurrent_insert= 2; #else -my_bool myisam_concurrent_insert=0; +ulong myisam_concurrent_insert= 0; #endif -my_off_t myisam_max_extra_temp_length= (my_off_t)MI_MAX_TEMP_LENGTH; my_off_t myisam_max_temp_length= MAX_FILE_SIZE; ulong myisam_bulk_insert_tree_size=8192*1024; ulong myisam_data_pointer_size=4; diff --git a/myisam/mi_statrec.c b/myisam/mi_statrec.c index 8f5cde45e24..42352f63c66 100644 --- a/myisam/mi_statrec.c +++ b/myisam/mi_statrec.c @@ -23,7 +23,8 @@ int _mi_write_static_record(MI_INFO *info, const byte *record) { uchar temp[8]; /* max pointer length */ - if (info->s->state.dellink != HA_OFFSET_ERROR) + if (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) { my_off_t filepos=info->s->state.dellink; info->rec_cache.seek_not_done=1; /* We have done a seek */ diff --git a/myisam/mi_test1.c b/myisam/mi_test1.c index 77c4d3dfbad..5727c699469 100644 --- a/myisam/mi_test1.c +++ b/myisam/mi_test1.c @@ -75,11 +75,11 @@ static int run_test(const char *filename) recinfo[1].length= (key_field == FIELD_BLOB ? 4+mi_portable_sizeof_char_ptr : key_length); if (key_field == FIELD_VARCHAR) - recinfo[1].length+=2; + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);; recinfo[2].type=extra_field; recinfo[2].length= (extra_field == FIELD_BLOB ? 4 + mi_portable_sizeof_char_ptr : 24); if (extra_field == FIELD_VARCHAR) - recinfo[2].length+=2; + recinfo[2].length+= HA_VARCHAR_PACKLENGTH(recinfo[2].length); if (opt_unique) { recinfo[3].type=FIELD_CHECK; @@ -88,6 +88,9 @@ static int run_test(const char *filename) rec_length=recinfo[0].length+recinfo[1].length+recinfo[2].length+ recinfo[3].length; + if (key_type == HA_KEYTYPE_VARTEXT1 && + key_length > 255) + key_type= HA_KEYTYPE_VARTEXT2; /* Define a key over the first column */ keyinfo[0].seg=keyseg; @@ -134,7 +137,7 @@ static int run_test(const char *filename) uniqueseg[1].flag|= HA_BLOB_PART; } else if (extra_field == FIELD_VARCHAR) - uniqueseg[1].flag|= HA_VAR_LENGTH; + uniqueseg[1].flag|= HA_VAR_LENGTH_PART; } else uniques=0; @@ -330,7 +333,8 @@ static void create_key_part(char *key,uint rownr) { sprintf(key,"%*d",keyinfo[0].seg[0].length,rownr); } - else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT) + else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT1 || + keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT2) { /* Alpha record */ /* Create a key that may be easily packed */ bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B'); @@ -372,7 +376,7 @@ static void create_key(char *key,uint rownr) } *key++=0; } - if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH)) + if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) { uint tmp; create_key_part(key+2,rownr); @@ -410,11 +414,14 @@ static void create_record(char *record,uint rownr) } else if (recinfo[1].type == FIELD_VARCHAR) { - uint tmp; - create_key_part(pos+2,rownr); - tmp=strlen(pos+2); - int2store(pos,tmp); - pos+=recinfo[1].length; + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + create_key_part(pos+pack_length,rownr); + tmp= strlen(pos+pack_length); + if (pack_length == 1) + *(uchar*) pos= (uchar) tmp; + else + int2store(pos,tmp); + pos+= recinfo[1].length; } else { @@ -434,10 +441,13 @@ static void create_record(char *record,uint rownr) } else if (recinfo[2].type == FIELD_VARCHAR) { - uint tmp; - sprintf(pos+2,"... row: %d", rownr); - tmp=strlen(pos+2); - int2store(pos,tmp); + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + sprintf(pos+pack_length, "... row: %d", rownr); + tmp= strlen(pos+pack_length); + if (pack_length == 1) + *(uchar*) pos= (uchar) tmp; + else + int2store(pos,tmp); } else { @@ -461,19 +471,25 @@ static void update_record(char *record) ptr=blob_key; memcpy_fixed(pos+4,&ptr,sizeof(char*)); /* Store pointer to new key */ if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) - my_casedn(default_charset_info,blob_key,length); + default_charset_info->cset->casedn(default_charset_info, + blob_key, length, blob_key, length); pos+=recinfo[1].length; } else if (recinfo[1].type == FIELD_VARCHAR) { - uint length=uint2korr(pos); - my_casedn(default_charset_info,pos+2,length); + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); + default_charset_info->cset->casedn(default_charset_info, + pos + pack_length, length, + pos + pack_length, length); pos+=recinfo[1].length; } else { if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) - my_casedn(default_charset_info,pos,keyinfo[0].seg[0].length); + default_charset_info->cset->casedn(default_charset_info, + pos, keyinfo[0].seg[0].length, + pos, keyinfo[0].seg[0].length); pos+=recinfo[1].length; } @@ -493,10 +509,14 @@ static void update_record(char *record) else if (recinfo[2].type == FIELD_VARCHAR) { /* Second field is longer than 10 characters */ - uint length=uint2korr(pos); - bfill(pos+2+length,recinfo[2].length-length-2,'.'); - length=recinfo[2].length-2; - int2store(pos,length); + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); + bfill(pos+pack_length+length,recinfo[2].length-length-pack_length,'.'); + length=recinfo[2].length-pack_length; + if (pack_length == 1) + *(uchar*) pos= (uchar) length; + else + int2store(pos,length); } else { @@ -519,12 +539,12 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"insert_rows", 'i', "Undocumented", (gptr*) &insert_count, (gptr*) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, - {"key_alpha", 'a', "Undocumented", + {"key_alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"key_binary_pack", 'B', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"key_blob", 'b', "Undocumented", - 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"key_cache", 'K', "Undocumented", (gptr*) &key_cacheing, (gptr*) &key_cacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"key_length", 'k', "Undocumented", (gptr*) &key_length, (gptr*) &key_length, @@ -535,9 +555,9 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"key_space_pack", 'p', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_varchar", 'w', "Undocumented", + {"key_varchar", 'w', "Test VARCHAR keys", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"null_fields", 'N', "Undocumented", + {"null_fields", 'N', "Define fields with NULL", (gptr*) &null_fields, (gptr*) &null_fields, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"row_fixed_size", 'S', "Undocumented", @@ -604,7 +624,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), key_field=FIELD_BLOB; /* blob key */ extra_field= FIELD_BLOB; pack_seg|= HA_BLOB_PART; - key_type= HA_KEYTYPE_VARTEXT; + key_type= HA_KEYTYPE_VARTEXT1; break; case 'k': if (key_length < 4 || key_length > MI_MAX_KEY_LENGTH) @@ -616,11 +636,11 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), case 'w': key_field=FIELD_VARCHAR; /* varchar keys */ extra_field= FIELD_VARCHAR; - key_type= HA_KEYTYPE_VARTEXT; - pack_seg|= HA_VAR_LENGTH; + key_type= HA_KEYTYPE_VARTEXT1; + pack_seg|= HA_VAR_LENGTH_PART; create_flag|= HA_PACK_RECORD; break; - case 'K': /* Use key cacheing */ + case 'K': /* Use key cacheing */ key_cacheing=1; break; case 'V': diff --git a/myisam/mi_test3.c b/myisam/mi_test3.c index 27d23317b5c..be4277cc65c 100644 --- a/myisam/mi_test3.c +++ b/myisam/mi_test3.c @@ -67,6 +67,7 @@ int main(int argc,char **argv) bzero((char*) keyinfo,sizeof(keyinfo)); bzero((char*) recinfo,sizeof(recinfo)); + bzero((char*) keyseg,sizeof(keyseg)); keyinfo[0].seg= &keyseg[0][0]; keyinfo[0].seg[0].start=0; keyinfo[0].seg[0].length=8; diff --git a/myisam/mi_test_all.res b/myisam/mi_test_all.res index 94355bf1aa2..16b517d3f76 100644 --- a/myisam/mi_test_all.res +++ b/myisam/mi_test_all.res @@ -1,3 +1,6 @@ +myisamchk: MyISAM file test1 +myisamchk: warning: Size of indexfile is: 1024 Should be: 2048 +MyISAM-table 'test1' is usable but should be fixed mi_test2 -s -L -K -R1 -m2000 ; Should give error 135 Error: 135 in write at record: 1105 got error: 135 when using MyISAM-database @@ -5,46 +8,46 @@ myisamchk: MyISAM file test2 myisamchk: warning: Datafile is almost full, 65532 of 65534 used MyISAM-table 'test2' is usable but should be fixed Commands Used count Errors Recover errors -open 17 0 0 -write 850 0 0 -update 85 0 0 -delete 850 0 0 -close 17 0 0 -extra 102 0 0 -Total 1921 0 0 +open 1 0 0 +write 50 0 0 +update 5 0 0 +delete 50 0 0 +close 1 0 0 +extra 6 0 0 +Total 113 0 0 Commands Used count Errors Recover errors -open 18 0 0 -write 900 0 0 -update 90 0 0 -delete 900 0 0 -close 18 0 0 -extra 108 0 0 -Total 2034 0 0 +open 2 0 0 +write 100 0 0 +update 10 0 0 +delete 100 0 0 +close 2 0 0 +extra 12 0 0 +Total 226 0 0 -real 0m1.054s -user 0m0.410s -sys 0m0.640s +real 0m0.791s +user 0m0.137s +sys 0m0.117s -real 0m1.077s -user 0m0.550s -sys 0m0.530s +real 0m0.659s +user 0m0.252s +sys 0m0.102s -real 0m1.100s -user 0m0.420s -sys 0m0.680s +real 0m0.571s +user 0m0.188s +sys 0m0.098s -real 0m0.783s -user 0m0.590s -sys 0m0.200s +real 0m1.111s +user 0m0.236s +sys 0m0.037s -real 0m0.764s -user 0m0.560s -sys 0m0.210s +real 0m0.621s +user 0m0.242s +sys 0m0.022s -real 0m0.699s -user 0m0.570s -sys 0m0.130s +real 0m0.698s +user 0m0.248s +sys 0m0.021s -real 0m0.991s -user 0m0.630s -sys 0m0.350s +real 0m0.683s +user 0m0.265s +sys 0m0.079s diff --git a/myisam/mi_unique.c b/myisam/mi_unique.c index ad685f4cbdc..34f5f595f30 100644 --- a/myisam/mi_unique.c +++ b/myisam/mi_unique.c @@ -64,7 +64,12 @@ my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, byte *record, } -/* Calculate a hash for a row */ +/* + Calculate a hash for a row + + TODO + Add support for bit fields +*/ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record) { @@ -93,10 +98,12 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record) } } pos= record+keyseg->start; - if (keyseg->flag & HA_VAR_LENGTH) + if (keyseg->flag & HA_VAR_LENGTH_PART) { - uint tmp_length=uint2korr(pos); - pos+=2; /* Skip VARCHAR length */ + uint pack_length= keyseg->bit_start; + uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : + uint2korr(pos)); + pos+= pack_length; /* Skip VARCHAR length */ set_if_smaller(length,tmp_length); } else if (keyseg->flag & HA_BLOB_PART) @@ -107,7 +114,8 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record) length=tmp_length; /* The whole blob */ } end= pos+length; - if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT) + if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || + type == HA_KEYTYPE_VARTEXT2) { keyseg->charset->coll->hash_sort(keyseg->charset, (const uchar*) pos, length, &seed1, @@ -123,9 +131,17 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record) return crc; } - /* - Returns 0 if both rows have equal unique value - */ + +/* + compare unique key for two rows + + TODO + Add support for bit fields + + RETURN + 0 if both rows have equal unique value + # Rows are different +*/ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, my_bool null_are_equal) @@ -136,7 +152,8 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, for (keyseg=def->seg ; keyseg < def->end ; keyseg++) { enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; - uint length=keyseg->length; + uint a_length, b_length; + a_length= b_length= keyseg->length; /* If part is NULL it's regarded as different */ if (keyseg->null_bit) @@ -154,43 +171,59 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, } pos_a= a+keyseg->start; pos_b= b+keyseg->start; - if (keyseg->flag & HA_VAR_LENGTH) + if (keyseg->flag & HA_VAR_LENGTH_PART) { - uint tmp_length=uint2korr(pos_a); - if (tmp_length != uint2korr(pos_b)) - return 1; - pos_a+=2; /* Skip VARCHAR length */ - pos_b+=2; - set_if_smaller(length,tmp_length); + uint pack_length= keyseg->bit_start; + if (pack_length == 1) + { + a_length= (uint) *(uchar*) pos_a++; + b_length= (uint) *(uchar*) pos_b++; + } + else + { + a_length= uint2korr(pos_a); + b_length= uint2korr(pos_b); + pos_a+= 2; /* Skip VARCHAR length */ + pos_b+= 2; + } + set_if_smaller(a_length, keyseg->length); /* Safety */ + set_if_smaller(b_length, keyseg->length); /* safety */ } else if (keyseg->flag & HA_BLOB_PART) { - /* Only compare 'length' characters if length<> 0 */ - uint a_length= _mi_calc_blob_length(keyseg->bit_start,pos_a); - uint b_length= _mi_calc_blob_length(keyseg->bit_start,pos_b); + /* Only compare 'length' characters if length != 0 */ + a_length= _mi_calc_blob_length(keyseg->bit_start,pos_a); + b_length= _mi_calc_blob_length(keyseg->bit_start,pos_b); /* Check that a and b are of equal length */ - if (length && a_length > length) - a_length=length; - if (!length || length > b_length) - length=b_length; - if (length != a_length) - return 1; - /* Both strings are at least 'length' long */ + if (keyseg->length) + { + /* + This is used in some cases when we are not interested in comparing + the whole length of the blob. + */ + set_if_smaller(a_length, keyseg->length); + set_if_smaller(b_length, keyseg->length); + } memcpy_fixed((byte*) &pos_a,pos_a+keyseg->bit_start,sizeof(char*)); memcpy_fixed((byte*) &pos_b,pos_b+keyseg->bit_start,sizeof(char*)); } - if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT) + if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || + type == HA_KEYTYPE_VARTEXT2) { - if (mi_compare_text(keyseg->charset, (uchar *) pos_a, length, - (uchar *) pos_b, length, 0, 0)) - return 1; + if (mi_compare_text(keyseg->charset, (uchar *) pos_a, a_length, + (uchar *) pos_b, b_length, 0, 1)) + return 1; } else { - end= pos_a+length; + if (a_length != b_length) + return 1; + end= pos_a+a_length; while (pos_a != end) + { if (*pos_a++ != *pos_b++) return 1; + } } } return 0; diff --git a/myisam/mi_update.c b/myisam/mi_update.c index f62be133ed9..ab23f2e6da9 100644 --- a/myisam/mi_update.c +++ b/myisam/mi_update.c @@ -34,6 +34,9 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec) LINT_INIT(changed); LINT_INIT(old_checksum); + DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_usage", + mi_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); if (!(info->update & HA_STATE_AKTIV)) { DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); @@ -84,7 +87,7 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec) changed=0; for (i=0 ; i < share->base.keys ; i++) { - if (((ulonglong) 1 << i) & share->state.key_map) + if (mi_is_key_active(share->state.key_map, i)) { if (share->keyinfo[i].flag & HA_FULLTEXT ) { @@ -205,7 +208,10 @@ err: } while (i-- != 0); } else + { + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); + } info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED | key_changed); @@ -214,6 +220,9 @@ err: VOID(_mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); allow_break(); /* Allow SIGHUP & SIGINT */ if (save_errno == HA_ERR_KEY_NOT_FOUND) + { + mi_print_error(info->s, HA_ERR_CRASHED); save_errno=HA_ERR_CRASHED; + } DBUG_RETURN(my_errno=save_errno); } /* mi_update */ diff --git a/myisam/mi_write.c b/myisam/mi_write.c index cd9e73fba22..c8f9aa84a41 100644 --- a/myisam/mi_write.c +++ b/myisam/mi_write.c @@ -52,6 +52,9 @@ int mi_write(MI_INFO *info, byte *record) DBUG_ENTER("mi_write"); DBUG_PRINT("enter",("isam: %d data: %d",info->s->kfile,info->dfile)); + DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_usage", + mi_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); if (share->options & HA_OPTION_READ_ONLY_DATA) { DBUG_RETURN(my_errno=EACCES); @@ -64,7 +67,8 @@ int mi_write(MI_INFO *info, byte *record) MYF(MY_SEEK_NOT_DONE) | info->lock_wait)) goto err; #endif - filepos= ((share->state.dellink != HA_OFFSET_ERROR) ? + filepos= ((share->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) ? share->state.dellink : info->state->data_file_length); @@ -97,7 +101,7 @@ int mi_write(MI_INFO *info, byte *record) buff=info->lastkey2; for (i=0 ; i < share->base.keys ; i++) { - if (((ulonglong) 1 << i) & share->state.key_map) + if (mi_is_key_active(share->state.key_map, i)) { bool local_lock_tree= (lock_tree && !(info->bulk_insert && @@ -171,7 +175,7 @@ err: info->errkey= (int) i; while ( i-- > 0) { - if (((ulonglong) 1 << i) & share->state.key_map) + if (mi_is_key_active(share->state.key_map, i)) { bool local_lock_tree= (lock_tree && !(info->bulk_insert && @@ -203,7 +207,10 @@ err: } } else + { + mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); + } info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED); my_errno=save_errno; err2: @@ -249,7 +256,7 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key, comp_flag=SEARCH_BIGGER; /* Put after same key */ else if (keyinfo->flag & (HA_NOSAME|HA_FULLTEXT)) { - comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No dupplicates */ + comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */ if (keyinfo->flag & HA_NULL_ARE_EQUAL) comp_flag|= SEARCH_NULL_ARE_EQUAL; } @@ -348,6 +355,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, dupp_key_pos=_mi_dpos(info,0,keybuff+tmp_key_length); else dupp_key_pos= HA_OFFSET_ERROR; + if (keyinfo->flag & HA_FULLTEXT) { uint off; @@ -478,6 +486,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, { if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(-1); } @@ -487,6 +496,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, { if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) { + mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(-1); } @@ -582,6 +592,7 @@ int _mi_split_page(register MI_INFO *info, register MI_KEYDEF *keyinfo, &after_key); if (!key_pos) DBUG_RETURN(-1); + length=(uint) (key_pos-buff); a_length=mi_getint(buff); mi_putint(buff,length,nod_flag); @@ -602,6 +613,7 @@ int _mi_split_page(register MI_INFO *info, register MI_KEYDEF *keyinfo, /* Store new page */ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff)) DBUG_RETURN(-1); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar *) 0, (uchar*) 0, (uchar*) 0, key_buff, &s_temp); @@ -708,6 +720,7 @@ static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page, memcpy(key, key_buff, length); /* previous key */ if (!(length=(*keyinfo->get_key)(keyinfo,0,&page,key_buff))) { + mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); } @@ -931,20 +944,21 @@ int mi_init_bulk_insert(MI_INFO *info, ulong cache_size, ha_rows rows) MI_KEYDEF *key=share->keyinfo; bulk_insert_param *params; uint i, num_keys, total_keylength; - ulonglong key_map=0; + ulonglong key_map; DBUG_ENTER("_mi_init_bulk_insert"); DBUG_PRINT("enter",("cache_size: %lu", cache_size)); DBUG_ASSERT(!info->bulk_insert && (!rows || rows >= MI_MIN_ROWS_TO_USE_BULK_INSERT)); + mi_clear_all_keys_active(key_map); for (i=total_keylength=num_keys=0 ; i < share->base.keys ; i++) { - if (!(key[i].flag & HA_NOSAME) && share->base.auto_key != i+1 - && test(share->state.key_map & ((ulonglong) 1 << i))) + if (! (key[i].flag & HA_NOSAME) && (share->base.auto_key != i + 1) && + mi_is_key_active(share->state.key_map, i)) { num_keys++; - key_map |=((ulonglong) 1 << i); + mi_set_key_active(key_map, i); total_keylength+=key[i].maxlength+TREE_ELEMENT_EXTRA_SIZE; } } @@ -968,7 +982,7 @@ int mi_init_bulk_insert(MI_INFO *info, ulong cache_size, ha_rows rows) params=(bulk_insert_param *)(info->bulk_insert+share->base.keys); for (i=0 ; i < share->base.keys ; i++) { - if (test(key_map & ((ulonglong) 1 << i))) + if (mi_is_key_active(key_map, i)) { params->info=info; params->keynr=i; diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c index 2ffc491fc91..10308408b1f 100644 --- a/myisam/myisamchk.c +++ b/myisam/myisamchk.c @@ -50,7 +50,7 @@ static int stopwords_inited= 0; static MY_TMPDIR myisamchk_tmpdir; static const char *type_names[]= -{ "?","char","binary", "short", "long", "float", +{ "impossible","char","binary", "short", "long", "float", "double","number","unsigned short", "unsigned long","longlong","ulonglong","int24", "uint24","int8","varchar", "varbin","?", @@ -705,8 +705,7 @@ get_one_option(int optid, fprintf(stderr, "Invalid value of stats_method: %s.\n", argument); exit(1); } - check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL : - MI_STATS_METHOD_NULLS_NOT_EQUAL; + check_param.stats_method= method-1; break; } #ifdef DEBUG /* Only useful if debugging */ @@ -905,8 +904,8 @@ static int myisamchk(MI_CHECK *param, my_string filename) MI_STATE_INFO_SIZE || mi_uint2korr(share->state.header.base_info_length) != MI_BASE_INFO_SIZE || - ((param->keys_in_use & ~share->state.key_map) & - (((ulonglong) 1L << share->base.keys)-1)) || + mi_is_any_intersect_keys_active(param->keys_in_use, share->base.keys, + ~share->state.key_map) || test_if_almost_full(info) || info->s->state.header.file_version[3] != myisam_file_magic[3] || (set_collation && @@ -973,8 +972,8 @@ static int myisamchk(MI_CHECK *param, my_string filename) if (param->testflag & T_REP_ANY) { ulonglong tmp=share->state.key_map; - share->state.key_map= (((ulonglong) 1 << share->base.keys)-1) - & param->keys_in_use; + mi_copy_keys_active(share->state.key_map, share->base.keys, + param->keys_in_use); if (tmp != share->state.key_map) info->update|=HA_STATE_CHANGED; } @@ -995,7 +994,7 @@ static int myisamchk(MI_CHECK *param, my_string filename) if (!error) { if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) && - (share->state.key_map || + (mi_is_any_key_active(share->state.key_map) || (rep_quick && !param->keys_in_use && !recreate)) && mi_test_if_sort_rep(info, info->state->records, info->s->state.key_map, @@ -1071,7 +1070,7 @@ static int myisamchk(MI_CHECK *param, my_string filename) llstr(info->state->records,llbuff), llstr(info->state->del,llbuff2)); error =chk_status(param,info); - share->state.key_map &=param->keys_in_use; + mi_intersect_keys_active(share->state.key_map, param->keys_in_use); error =chk_size(param,info); if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE))) error|=chk_del(param, info,param->testflag); @@ -1300,7 +1299,7 @@ static void descript(MI_CHECK *param, register MI_INFO *info, my_string name) } printf("Recordlength: %13d\n",(int) share->base.pack_reclength); - if (share->state.key_map != (((ulonglong) 1 << share->base.keys) -1)) + if (! mi_is_all_keys_active(share->state.key_map, share->base.keys)) { longlong2str(share->state.key_map,buff,2); printf("Using only keys '%s' of %d possibly keys\n", @@ -1482,7 +1481,7 @@ static int mi_sort_records(MI_CHECK *param, temp_buff=0; new_file= -1; - if (!(((ulonglong) 1 << sort_key) & share->state.key_map)) + if (! mi_is_key_active(share->state.key_map, sort_key)) { mi_check_print_warning(param, "Can't sort table '%s' on key %d; No such key", @@ -1733,11 +1732,11 @@ err: sorting */ -static my_bool not_killed= 0; +static int not_killed= 0; -volatile my_bool *killed_ptr(MI_CHECK *param __attribute__((unused))) +volatile int *killed_ptr(MI_CHECK *param __attribute__((unused))) { - return ¬_killed; /* always NULL */ + return ¬_killed; /* always NULL */ } /* print warnings and errors */ diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h index 93a7bf96f59..6b040f2ef0e 100644 --- a/myisam/myisamdef.h +++ b/myisam/myisamdef.h @@ -273,6 +273,7 @@ struct st_myisam_info { uint preload_buff_size; /* When preloading indexes */ myf lock_wait; /* is 0 or MY_DONT_WAIT */ my_bool was_locked; /* Was locked in panic */ + my_bool append_insert_at_end; /* Set if concurrent insert */ my_bool quick_mode; my_bool page_changed; /* If info->buff can't be used for rnext */ my_bool buff_used; /* If info->buff has to be reread for rnext */ @@ -358,6 +359,8 @@ typedef struct st_mi_sort_param #define mi_mark_crashed_on_repair(x) { (x)->s->state.changed|=STATE_CRASHED|STATE_CRASHED_ON_REPAIR ; (x)->update|= HA_STATE_CHANGED; } #define mi_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED) #define mi_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) +#define mi_print_error(SHARE, ERRNO) \ + mi_report_error((ERRNO), (SHARE)->index_file_name) /* Functions to store length of space packed keys, VARCHAR or BLOB keys */ @@ -669,6 +672,7 @@ extern void _myisam_log_command(enum myisam_log_commands command, extern void _myisam_log_record(enum myisam_log_commands command,MI_INFO *info, const byte *record,my_off_t filepos, int result); +extern void mi_report_error(int errcode, const char *file_name); extern my_bool _mi_memmap_file(MI_INFO *info); extern void _mi_unmap_file(MI_INFO *info); extern uint save_pack_length(uint version, byte *block_buff, ulong length); @@ -676,10 +680,10 @@ extern uint read_pack_length(uint version, const uchar *buf, ulong *length); extern uint calc_pack_length(uint version, ulong length); uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite); -char *mi_state_info_read(char *ptr, MI_STATE_INFO *state); +char *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state); uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead); uint mi_base_info_write(File file, MI_BASE_INFO *base); -char *my_n_base_info_read(char *ptr, MI_BASE_INFO *base); +char *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base); int mi_keyseg_write(File file, const HA_KEYSEG *keyseg); char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg); uint mi_keydef_write(File file, MI_KEYDEF *keydef); @@ -703,7 +707,7 @@ int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def, const byte *record, my_off_t pos); int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, my_bool null_are_equal); -void mi_get_status(void* param); +void mi_get_status(void* param, int concurrent_insert); void mi_update_status(void* param); void mi_copy_status(void* to,void *from); my_bool mi_check_status(void* param); @@ -716,7 +720,7 @@ int mi_open_keyfile(MYISAM_SHARE *share); void mi_setup_functions(register MYISAM_SHARE *share); /* Functions needed by mi_check */ -volatile my_bool *killed_ptr(MI_CHECK *param); +volatile int *killed_ptr(MI_CHECK *param); void mi_check_print_error _VARARGS((MI_CHECK *param, const char *fmt,...)); void mi_check_print_warning _VARARGS((MI_CHECK *param, const char *fmt,...)); void mi_check_print_info _VARARGS((MI_CHECK *param, const char *fmt,...)); diff --git a/myisam/myisamlog.c b/myisam/myisamlog.c index dc98d813266..de55b86252c 100644 --- a/myisam/myisamlog.c +++ b/myisam/myisamlog.c @@ -810,7 +810,7 @@ static int find_record_with_key(struct file_info *file_info, byte *record) for (key=0 ; key < info->s->base.keys ; key++) { - if ((((ulonglong) 1 << key) & info->s->state.key_map) && + if (mi_is_key_active(info->s->state.key_map, key) && info->s->keyinfo[key].flag & HA_NOSAME) { VOID(_mi_make_key(info,key,tmp_key,record,0L)); diff --git a/myisam/myisampack.c b/myisam/myisampack.c index f017e4d23ab..3b091cd6ea2 100644 --- a/myisam/myisampack.c +++ b/myisam/myisampack.c @@ -33,10 +33,10 @@ #include <my_getopt.h> #include <assert.h> -#if INT_MAX > 32767 -#define BITS_SAVED 32 +#if SIZEOF_LONG_LONG > 4 +#define BITS_SAVED 64 #else -#define BITS_SAVED 16 +#define BITS_SAVED 32 #endif #define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */ @@ -49,10 +49,10 @@ struct st_file_buffer { File file; - char *buffer,*pos,*end; + uchar *buffer,*pos,*end; my_off_t pos_in_file; int bits; - uint current_byte; + ulonglong bitbucket; }; struct st_huff_tree; @@ -69,13 +69,17 @@ typedef struct st_huff_counts { my_off_t end_space[8]; my_off_t pre_space[8]; my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed; - TREE int_tree; - byte *tree_buff; - byte *tree_pos; + TREE int_tree; /* Tree for detecting distinct column values. */ + byte *tree_buff; /* Column values, 'field_length' each. */ + byte *tree_pos; /* Points to end of column values in 'tree_buff'. */ } HUFF_COUNTS; typedef struct st_huff_element HUFF_ELEMENT; +/* + WARNING: It is crucial for the optimizations in calc_packed_length() + that 'count' is the first element of 'HUFF_ELEMENT'. +*/ struct st_huff_element { my_off_t count; union un_element { @@ -98,7 +102,7 @@ typedef struct st_huff_tree { my_off_t bytes_packed; uint tree_pack_length; uint min_chr,max_chr,char_bits,offset_bits,max_offset,height; - ulong *code; + ulonglong *code; uchar *code_len; } HUFF_TREE; @@ -146,7 +150,7 @@ static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees); static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees); static void make_traverse_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,uint size, - ulong code); + ulonglong code); static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees, my_off_t tot_elements,my_off_t filelength); static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees); @@ -161,7 +165,7 @@ static char *make_old_name(char *new_name,char *old_name); static void init_file_buffer(File file,pbool read_buffer); static int flush_buffer(ulong neaded_length); static void end_file_buffer(void); -static void write_bits(ulong value,uint bits); +static void write_bits(ulonglong value, uint bits); static void flush_bits(void); static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, ha_checksum crc); @@ -170,13 +174,23 @@ static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length static int mrg_close(PACK_MRG_INFO *mrg); static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf); static void mrg_reset(PACK_MRG_INFO *mrg); +#if !defined(DBUG_OFF) +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count); +static int fakecmp(my_off_t **count1, my_off_t **count2); +#endif static int error_on_write=0,test_only=0,verbose=0,silent=0, write_loop=0,force_pack=0, isamchk_neaded=0; static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; static my_bool backup, opt_wait; -static uint tree_buff_length=8196-MALLOC_OVERHEAD; +/* + tree_buff_length is somewhat arbitrary. The bigger it is the better + the chance to win in terms of compression factor. On the other hand, + this table becomes part of the compressed file header. And its length + is coded with 16 bits in the header. Hence the limit is 2**16 - 1. +*/ +static uint tree_buff_length= 65536 - MALLOC_OVERHEAD; static char tmp_dir[FN_REFLEN]={0},*join_table; static my_off_t intervall_length; static ha_checksum glob_crc; @@ -225,7 +239,8 @@ int main(int argc, char **argv) } if (ok && isamchk_neaded && !silent) puts("Remember to run myisamchk -rq on compressed tables"); - VOID(fflush(stdout)); VOID(fflush(stderr)); + VOID(fflush(stdout)); + VOID(fflush(stderr)); free_defaults(default_argv); my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); exit(error ? 2 : 0); @@ -264,7 +279,7 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"test", 't', "Don't pack table, only test packing it.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"verbose", 'v', "Write info about progress and packing result.", + {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"version", 'V', "Output version information and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -277,7 +292,8 @@ static struct my_option my_long_options[] = static void print_version(void) { - printf("%s Ver 1.22 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE); + VOID(printf("%s Ver 1.23 for %s on %s\n", + my_progname, SYSTEM_TYPE, MACHINE_TYPE)); NETWARE_SET_SCREEN_MODE(1); } @@ -294,7 +310,7 @@ static void usage(void) puts("afterwards to update the keys."); puts("You should give the .MYI file as the filename argument."); - printf("\nUsage: %s [OPTIONS] filename...\n", my_progname); + VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname)); my_print_help(my_long_options); print_defaults("my", load_default_groups); my_print_variables(my_long_options); @@ -323,7 +339,10 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), silent= 1; break; case 't': - test_only= verbose= 1; + test_only= 1; + /* Avoid to reset 'verbose' if it was already set > 1. */ + if (! verbose) + verbose= 1; break; case 'T': length= (uint) (strmov(tmp_dir, argument) - tmp_dir); @@ -334,7 +353,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), } break; case 'v': - verbose= 1; + verbose++; /* Allow for selecting the level of verbosity. */ silent= 0; break; case '#': @@ -389,7 +408,7 @@ static MI_INFO *open_isam_file(char *name,int mode) (opt_wait ? HA_OPEN_WAIT_IF_LOCKED : HA_OPEN_ABORT_IF_LOCKED)))) { - VOID(fprintf(stderr,"%s gave error %d on open\n",name,my_errno)); + VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno)); DBUG_RETURN(0); } share=isam_file->s; @@ -397,7 +416,7 @@ static MI_INFO *open_isam_file(char *name,int mode) { if (!force_pack) { - VOID(fprintf(stderr,"%s is already compressed\n",name)); + VOID(fprintf(stderr, "%s is already compressed\n", name)); VOID(mi_close(isam_file)); DBUG_RETURN(0); } @@ -409,7 +428,7 @@ static MI_INFO *open_isam_file(char *name,int mode) (share->state.state.records <= 1 || share->state.state.data_file_length < 1024)) { - VOID(fprintf(stderr,"%s is too small to compress\n",name)); + VOID(fprintf(stderr, "%s is too small to compress\n", name)); VOID(mi_close(isam_file)); DBUG_RETURN(0); } @@ -431,9 +450,9 @@ static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count) if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY))) goto error; - mrg->src_file_has_indexes_disabled|= ((mrg->file[i]->s->state.key_map != - (((ulonglong) 1) << - mrg->file[i]->s->base. keys) - 1)); + mrg->src_file_has_indexes_disabled|= + ! mi_is_all_keys_active(mrg->file[i]->s->state.key_map, + mrg->file[i]->s->base.keys); } /* Check that files are identical */ for (j=0 ; j < count-1 ; j++) @@ -455,8 +474,8 @@ static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count) return 0; diff_file: - fprintf(stderr,"%s: Tables '%s' and '%s' are not identical\n", - my_progname,names[j],names[j+1]); + VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n", + my_progname, names[j], names[j+1])); error: while (i--) mi_close(mrg->file[i]); @@ -527,16 +546,25 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) mrg->records=0; for (i=0 ; i < mrg->count ; i++) mrg->records+=mrg->file[i]->s->state.state.records; + + DBUG_PRINT("info", ("Compressing %s: (%lu records)", + result_table ? new_name : org_name, + (ulong) mrg->records)); if (write_loop || verbose) { - printf("Compressing %s: (%lu records)\n", - result_table ? new_name : org_name,(ulong) mrg->records); + VOID(printf("Compressing %s: (%lu records)\n", + result_table ? new_name : org_name, (ulong) mrg->records)); } trees=fields=share->base.fields; huff_counts=init_huff_count(isam_file,mrg->records); QUICK_SAFEMALLOC; + + /* + Read the whole data file(s) for statistics. + */ + DBUG_PRINT("info", ("- Calculating statistics")); if (write_loop || verbose) - printf("- Calculating statistics\n"); + VOID(printf("- Calculating statistics\n")); if (get_statistic(mrg,huff_counts)) goto err; NORMAL_SAFEMALLOC; @@ -545,29 +573,74 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) old_length+= (mrg->file[i]->s->state.state.data_file_length - mrg->file[i]->s->state.state.empty); + /* + Create a global priority queue in preparation for making + temporary Huffman trees. + */ if (init_queue(&queue,256,0,0,compare_huff_elements,0)) goto err; + + /* + Check each column if we should use pre-space-compress, end-space- + compress, empty-field-compress or zero-field-compress. + */ check_counts(huff_counts,fields,mrg->records); + + /* + Build a Huffman tree for each column. + */ huff_trees=make_huff_trees(huff_counts,trees); + + /* + If the packed lengths of combined columns is less then the sum of + the non-combined columns, then create common Huffman trees for them. + We do this only for byte compressed columns, not for distinct values + compressed columns. + */ if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0) goto err; + + /* + Assign codes to all byte or column values. + */ if (make_huff_decode_table(huff_trees,fields)) goto err; + /* Prepare a file buffer. */ init_file_buffer(new_file,0); + + /* + Reserve space in the target file for the fixed compressed file header. + */ file_buffer.pos_in_file=HEAD_LENGTH; if (! test_only) VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0))); + /* + Write field infos: field type, pack type, length bits, tree number. + */ write_field_info(huff_counts,fields,used_trees); + + /* + Write decode trees. + */ if (!(tot_elements=write_huff_tree(huff_trees,trees))) goto err; + + /* + Calculate the total length of the compression info header. + This includes the fixed compressed file header, the column compression + type descriptions, and the decode trees. + */ header_length=(uint) file_buffer.pos_in_file+ (uint) (file_buffer.pos-file_buffer.buffer); - /* Compress file */ + /* + Compress the source file into the target file. + */ + DBUG_PRINT("info", ("- Compressing file")); if (write_loop || verbose) - printf("- Compressing file\n"); + VOID(printf("- Compressing file\n")); error=compress_isam_file(mrg,huff_counts); new_length=file_buffer.pos_in_file; if (!error && !test_only) @@ -577,16 +650,28 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) error=my_write(file_buffer.file,buff,sizeof(buff), MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; } + + /* + Write the fixed compressed file header. + */ if (!error) error=write_header(mrg,header_length,used_trees,tot_elements, new_length); + + /* Flush the file buffer. */ end_file_buffer(); + /* Display statistics. */ + DBUG_PRINT("info", ("Min record length: %6d Max length: %6d " + "Mean total length: %6ld\n", + mrg->min_pack_length, mrg->max_pack_length, + (ulong) (mrg->records ? (new_length/mrg->records) : 0))); if (verbose && mrg->records) - printf("Min record length: %6d Max length: %6d Mean total length: %6ld\n", - mrg->min_pack_length,mrg->max_pack_length, - (ulong) (new_length/mrg->records)); + VOID(printf("Min record length: %6d Max length: %6d " + "Mean total length: %6ld\n", mrg->min_pack_length, + mrg->max_pack_length, (ulong) (new_length/mrg->records))); + /* Close source and target file. */ if (!test_only) { error|=my_close(new_file,MYF(MY_WME)); @@ -597,6 +682,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) } } + /* Cleanup. */ free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); if (! test_only && ! error) { @@ -646,15 +732,16 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) error|=my_close(join_isam_file,MYF(MY_WME)); if (error) { - VOID(fprintf(stderr,"Aborting: %s is not compressed\n",org_name)); + VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name)); VOID(my_delete(new_name,MYF(MY_WME))); DBUG_RETURN(-1); } if (write_loop || verbose) { if (old_length) - printf("%.4g%% \n", (((longlong) (old_length -new_length))*100.0/ - (longlong) old_length)); + VOID(printf("%.4g%% \n", + (((longlong) (old_length - new_length)) * 100.0 / + (longlong) old_length))); else puts("Empty file saved in compressed format"); } @@ -667,7 +754,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) if (join_isam_file >= 0) VOID(my_close(join_isam_file,MYF(0))); mrg_close(mrg); - VOID(fprintf(stderr,"Aborted: %s is not compressed\n",org_name)); + VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name)); DBUG_RETURN(-1); } @@ -694,6 +781,12 @@ static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records) (type == FIELD_NORMAL || type == FIELD_SKIP_ZERO)) count[i].max_zero_fill= count[i].field_length; + /* + For every column initialize a tree, which is used to detect distinct + column values. 'int_tree' works together with 'tree_buff' and + 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'. + This is accomplished by '-1' as the element size. + */ init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL, NULL); if (records && type != FIELD_BLOB && type != FIELD_VARCHAR) @@ -765,7 +858,8 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) static_row_size=1; for (count=huff_counts ; count < end_count ; count++) { - if (count->field_type == FIELD_BLOB || count->field_type == FIELD_VARCHAR) + if (count->field_type == FIELD_BLOB || + count->field_type == FIELD_VARCHAR) { static_row_size=0; break; @@ -778,10 +872,13 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) ulong tot_blob_length=0; if (! error) { + /* glob_crc is a checksum over all bytes of all records. */ if (static_row_size) glob_crc+=mi_static_checksum(mrg->file[0],record); else glob_crc+=mi_checksum(mrg->file[0],record); + + /* Count the incidence of values separately for every column. */ for (pos=record,count=huff_counts ; count < end_count ; count++, @@ -789,15 +886,48 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) { next_pos=end_pos=(start_pos=pos)+count->field_length; - /* Put value in tree if there is room for it */ + /* + Put the whole column value in a tree if there is room for it. + 'int_tree' is used to quickly check for duplicate values. + 'tree_buff' collects as many distinct column values as + possible. If the field length is > 1, it is tree_buff_length, + else 2 bytes. Each value is 'field_length' bytes big. If there + are more distinct column values than fit into the buffer, we + give up with this tree. BLOBs and VARCHARs do not have a + tree_buff as it can only be used with fixed length columns. + For the special case of field length == 1, we handle only the + case that there is only one distinct value in the table(s). + Otherwise, we can have a maximum of 256 distinct values. This + is then handled by the normal Huffman tree build. + + Another limit for collecting distinct column values is the + number of values itself. Since we would need to build a + Huffman tree for the values, we are limited by the 'IS_OFFSET' + constant. This constant expresses a bit which is used to + determine if a tree element holds a final value or an offset + to a child element. Hence, all values and offsets need to be + smaller than 'IS_OFFSET'. A tree element is implemented with + two integer values, one for the left branch and one for the + right branch. For the extreme case that the first element + points to the last element, the number of integers in the tree + must be less or equal to IS_OFFSET. So the number of elements + must be less or equal to IS_OFFSET / 2. + + WARNING: At first, we insert a pointer into the record buffer + as the key for the tree. If we got a new distinct value, which + is really inserted into the tree, instead of being counted + only, we will copy the column value from the record buffer to + 'tree_buff' and adjust the key pointer of the tree accordingly. + */ if (count->tree_buff) { global_count=count; if (!(element=tree_insert(&count->int_tree,pos, 0, count->int_tree.custom_arg)) || (element->count == 1 && - count->tree_buff + tree_buff_length < - count->tree_pos + count->field_length) || + (count->tree_buff + tree_buff_length < + count->tree_pos + count->field_length)) || + (count->int_tree.elements_in_tree > IS_OFFSET / 2) || (count->field_length == 1 && count->int_tree.elements_in_tree > 1)) { @@ -807,10 +937,17 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) } else { + /* + If tree_insert() succeeds, it either creates a new element + or increments the counter of an existing element. + */ if (element->count == 1) - { /* New element */ + { + /* Copy the new column value into 'tree_buff'. */ memcpy(count->tree_pos,pos,(size_t) count->field_length); + /* Adjust the key pointer in the tree. */ tree_set_pointer(element,count->tree_pos); + /* Point behind the last column value so far. */ count->tree_pos+=count->field_length; } } @@ -820,15 +957,21 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) if (count->field_type == FIELD_NORMAL || count->field_type == FIELD_SKIP_ENDSPACE) { + /* Ignore trailing space. */ for ( ; end_pos > pos ; end_pos--) if (end_pos[-1] != ' ') break; + /* Empty fields are just counted. Go to the next record. */ if (end_pos == pos) { count->empty_fields++; count->max_zero_fill=0; continue; } + /* + Count the total of all trailing spaces and the number of + short trailing spaces. Remember the longest trailing space. + */ length= (uint) (next_pos-end_pos); count->tot_end_space+=length; if (length < 8) @@ -836,18 +979,25 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) if (count->max_end_space < length) count->max_end_space = length; } + if (count->field_type == FIELD_NORMAL || count->field_type == FIELD_SKIP_PRESPACE) { + /* Ignore leading space. */ for (pos=start_pos; pos < end_pos ; pos++) if (pos[0] != ' ') break; + /* Empty fields are just counted. Go to the next record. */ if (end_pos == pos) { count->empty_fields++; count->max_zero_fill=0; continue; } + /* + Count the total of all leading spaces and the number of + short leading spaces. Remember the longest leading space. + */ length= (uint) (pos-start_pos); count->tot_pre_space+=length; if (length < 8) @@ -855,6 +1005,8 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) if (count->max_pre_space < length) count->max_pre_space = length; } + + /* Calculate pos, end_pos, and max_length for variable length fields. */ if (count->field_type == FIELD_BLOB) { uint field_length=count->field_length -mi_portable_sizeof_char_ptr; @@ -866,50 +1018,128 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) } else if (count->field_type == FIELD_VARCHAR) { - length=uint2korr(start_pos); - pos=start_pos+2; - end_pos=start_pos+length; + uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); + length= (pack_length == 1 ? (uint) *(uchar*) start_pos : + uint2korr(start_pos)); + pos= start_pos+pack_length; + end_pos= pos+length; set_if_bigger(count->max_length,length); } + + /* Evaluate 'max_zero_fill' for short fields. */ if (count->field_length <= 8 && (count->field_type == FIELD_NORMAL || count->field_type == FIELD_SKIP_ZERO)) { uint i; + /* Zero fields are just counted. Go to the next record. */ if (!memcmp((byte*) start_pos,zero_string,count->field_length)) { count->zero_fields++; continue; } + /* + max_zero_fill starts with field_length. It is decreased every + time a shorter "zero trailer" is found. It is set to zero when + an empty field is found (see above). This suggests that the + variable should be called 'min_zero_fill'. + */ for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ; i++) ; if (i < count->max_zero_fill) count->max_zero_fill=i; } + + /* Ignore zero fields and check fields. */ if (count->field_type == FIELD_ZERO || count->field_type == FIELD_CHECK) continue; + + /* + Count the incidence of every byte value in the + significant field value. + */ for ( ; pos < end_pos ; pos++) count->counts[(uchar) *pos]++; + + /* Step to next field. */ } + if (tot_blob_length > max_blob_length) max_blob_length=tot_blob_length; record_count++; if (write_loop && record_count % WRITE_COUNT == 0) { - printf("%lu\r",(ulong) record_count); VOID(fflush(stdout)); + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); } } else if (error != HA_ERR_RECORD_DELETED) { - fprintf(stderr,"Got error %d while reading rows",error); + VOID(fprintf(stderr, "Got error %d while reading rows", error)); break; } + + /* Step to next record. */ } if (write_loop) { - printf(" \r"); VOID(fflush(stdout)); + VOID(printf(" \r")); + VOID(fflush(stdout)); } + + /* + If --debug=d,fakebigcodes is set, fake the counts to get big Huffman + codes. + */ + DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count);); + + DBUG_PRINT("info", ("Found the following number of incidents " + "of the byte codes:")); + if (verbose >= 2) + VOID(printf("Found the following number of incidents " + "of the byte codes:\n")); + for (count= huff_counts ; count < end_count; count++) + { + uint idx; + my_off_t total_count; + char llbuf[32]; + + DBUG_PRINT("info", ("column: %3u", count - huff_counts + 1)); + if (verbose >= 2) + VOID(printf("column: %3u\n", count - huff_counts + 1)); + if (count->tree_buff) + { + DBUG_PRINT("info", ("number of distinct values: %u", + (count->tree_pos - count->tree_buff) / + count->field_length)); + if (verbose >= 2) + VOID(printf("number of distinct values: %u\n", + (count->tree_pos - count->tree_buff) / + count->field_length)); + } + total_count= 0; + for (idx= 0; idx < 256; idx++) + { + if (count->counts[idx]) + { + total_count+= count->counts[idx]; + DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx, + llstr((longlong) count->counts[idx], llbuf))); + if (verbose >= 2) + VOID(printf("counts[0x%02x]: %12s\n", idx, + llstr((longlong) count->counts[idx], llbuf))); + } + } + DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count, + llbuf))); + if ((verbose >= 2) && total_count) + { + VOID(printf("total: %12s\n", + llstr((longlong) total_count, llbuf))); + } + } + mrg->records=record_count; mrg->max_blob_length=max_blob_length; my_afree((gptr) record); @@ -958,9 +1188,14 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, huff_counts->field_type=FIELD_NORMAL; huff_counts->pack_type=0; + /* Check for zero-filled records (in this column), or zero records. */ if (huff_counts->zero_fields || ! records) { my_off_t old_space_count; + /* + If there are only zero filled records (in this column), + or no records at all, we are done. + */ if (huff_counts->zero_fields == records) { huff_counts->field_type= FIELD_ZERO; @@ -968,14 +1203,22 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, huff_counts->counts[0]=0; goto found_pack; } + /* Remeber the number of significant spaces. */ old_space_count=huff_counts->counts[' ']; - huff_counts->counts[' ']+=huff_counts->tot_end_space+ - huff_counts->tot_pre_space + - huff_counts->empty_fields * huff_counts->field_length; + /* Add all leading and trailing spaces. */ + huff_counts->counts[' ']+= (huff_counts->tot_end_space + + huff_counts->tot_pre_space + + huff_counts->empty_fields * + huff_counts->field_length); + /* Check, what the compressed length of this would be. */ old_length=calc_packed_length(huff_counts,0)+records/8; + /* Get the number of zero bytes. */ length=huff_counts->zero_fields*huff_counts->field_length; + /* Add it to the counts. */ huff_counts->counts[0]+=length; + /* Check, what the compressed length of this would be. */ new_length=calc_packed_length(huff_counts,0); + /* If the compression without the zeroes would be shorter, we are done. */ if (old_length < new_length && huff_counts->field_length > 1) { huff_counts->field_type=FIELD_SKIP_ZERO; @@ -983,9 +1226,16 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, huff_counts->bytes_packed=old_length- records/8; goto found_pack; } + /* Remove the insignificant spaces, but keep the zeroes. */ huff_counts->counts[' ']=old_space_count; } + /* Check, what the compressed length of this column would be. */ huff_counts->bytes_packed=calc_packed_length(huff_counts,0); + + /* + If there are enough empty records (in this column), + treating them specially may pay off. + */ if (huff_counts->empty_fields) { if (huff_counts->field_length > 2 && @@ -1017,6 +1267,11 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, } } } + + /* + If there are enough trailing spaces (in this column), + treating them specially may pay off. + */ if (huff_counts->tot_end_space) { huff_counts->counts[' ']+=huff_counts->tot_pre_space; @@ -1026,6 +1281,11 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, goto found_pack; huff_counts->counts[' ']-=huff_counts->tot_pre_space; } + + /* + If there are enough leading spaces (in this column), + treating them specially may pay off. + */ if (huff_counts->tot_pre_space) { if (test_space_compress(huff_counts,records,huff_counts->max_pre_space, @@ -1055,6 +1315,8 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, { HUFF_TREE tree; + DBUG_EXECUTE_IF("forceintervall", + huff_counts->bytes_packed= ~ (my_off_t) 0;); tree.element_buffer=0; if (!make_huff_tree(&tree,huff_counts) && tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed) @@ -1080,14 +1342,27 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, fill_zero_fields++; field_count[huff_counts->field_type]++; } + DBUG_PRINT("info", ("normal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields)); + DBUG_PRINT("info", ("pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d", + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); if (verbose) - printf("\nnormal: %3d empty-space: %3d empty-zero: %3d empty-fill: %3d\npre-space: %3d end-space: %3d intervall-fields: %3d zero: %3d\n", - field_count[FIELD_NORMAL],space_fields, - field_count[FIELD_SKIP_ZERO],fill_zero_fields, - field_count[FIELD_SKIP_PRESPACE], - field_count[FIELD_SKIP_ENDSPACE], - field_count[FIELD_INTERVALL], - field_count[FIELD_ZERO]); + VOID(printf("\nnormal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d\n" + "pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d\n", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields, + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); DBUG_VOID_RETURN; } @@ -1184,8 +1459,24 @@ static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees) DBUG_RETURN(huff_tree); } - /* Update huff_tree according to huff_counts->counts or - huff_counts->tree_buff */ +/* + Build a Huffman tree. + + SYNOPSIS + make_huff_tree() + huff_tree The Huffman tree. + huff_counts The counts. + + DESCRIPTION + Build a Huffman tree according to huff_counts->counts or + huff_counts->tree_buff. tree_buff, if non-NULL contains up to + tree_buff_length of distinct column values. In that case, whole + values can be Huffman encoded instead of single bytes. + + RETURN + 0 OK + != 0 Error +*/ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) { @@ -1196,12 +1487,14 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) first=last=0; if (huff_counts->tree_buff) { + /* Calculate the number of distinct values in tree_buff. */ found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) / huff_counts->field_length; first=0; last=found-1; } else { + /* Count the number of byte codes found in the column. */ for (i=found=0 ; i < 256 ; i++) { if (huff_counts->counts[i]) @@ -1215,6 +1508,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) found=2; } + /* When using 'tree_buff' we can have more that 256 values. */ if (queue.max_elements < found) { delete_queue(&queue); @@ -1222,6 +1516,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) return -1; } + /* Allocate or reallocate an element buffer for the Huffman tree. */ if (!huff_tree->element_buffer) { if (!(huff_tree->element_buffer= @@ -1249,15 +1544,25 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) if (huff_counts->tree_buff) { huff_tree->elements=0; - tree_walk(&huff_counts->int_tree, - (int (*)(void*, element_count,void*)) save_counts_in_queue, - (gptr) huff_tree, left_root_right); huff_tree->tree_pack_length=(1+15+16+5+5+ (huff_tree->char_bits+1)*found+ (huff_tree->offset_bits+1)* (found-2)+7)/8 + (uint) (huff_tree->counts->tree_pos- huff_tree->counts->tree_buff); + /* + Put a HUFF_ELEMENT into the queue for every distinct column value. + + tree_walk() calls save_counts_in_queue() for every element in + 'int_tree'. This takes elements from the target trees element + buffer and places references to them into the buffer of the + priority queue. We insert in column value order, but the order is + in fact irrelevant here. We will establish the correct order + later. + */ + tree_walk(&huff_counts->int_tree, + (int (*)(void*, element_count,void*)) save_counts_in_queue, + (gptr) huff_tree, left_root_right); } else { @@ -1266,7 +1571,15 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) (huff_tree->char_bits+1)*found+ (huff_tree->offset_bits+1)* (found-2)+7)/8; + /* + Put a HUFF_ELEMENT into the queue for every byte code found in the column. + The elements are taken from the target trees element buffer. + Instead of using queue_insert(), we just place references to the + elements into the buffer of the priority queue. We insert in byte + value order, but the order is in fact irrelevant here. We will + establish the correct order later. + */ for (i=first, found=0 ; i <= last ; i++) { if (huff_counts->counts[i]) @@ -1278,8 +1591,13 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) queue.root[found]=(byte*) new_huff_el; } } + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which builds the Huffman tree. + */ while (found < 2) - { /* Our huff_trees request at least 2 elements */ + { new_huff_el=huff_tree->element_buffer+(found++); new_huff_el->count=0; new_huff_el->a.leaf.null=0; @@ -1290,21 +1608,53 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) queue.root[found]=(byte*) new_huff_el; } } + + /* Make a queue from the queue buffer. */ queue.elements=found; + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ for (i=found/2 ; i > 0 ; i--) _downheap(&queue,i); + + /* The Huffman algorithm. */ bytes_packed=0; bits_packed=0; for (i=1 ; i < found ; i++) { + /* + Pop the top element from the queue (the one with the least incidence). + Popping from a priority queue includes a re-ordering of the queue, + to get the next least incidence element to the top. + */ a=(HUFF_ELEMENT*) queue_remove(&queue,0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ b=(HUFF_ELEMENT*) queue.root[1]; + /* Get a new element from the element buffer. */ new_huff_el=huff_tree->element_buffer+found+i; + /* The new element gets the sum of the two least incidence elements. */ new_huff_el->count=a->count+b->count; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ bits_packed+=(uint) (new_huff_el->count & 7); bytes_packed+=new_huff_el->count/8; - new_huff_el->a.nod.left=a; /* lesser in left */ + /* The new element points to its children, lesser in left. */ + new_huff_el->a.nod.left=a; new_huff_el->a.nod.right=b; + /* + Replace the copied top element by the new element and re-order the + queue. + */ queue.root[1]=(byte*) new_huff_el; queue_replaced(&queue); } @@ -1323,7 +1673,26 @@ static int compare_tree(void* cmp_arg __attribute__((unused)), return 0; } - /* Used by make_huff_tree to save intervall-counts in queue */ +/* + Organize distinct column values and their incidences into a priority queue. + + SYNOPSIS + save_counts_in_queue() + key The column value. + count The incidence of this value. + tree The Huffman tree to be built later. + + DESCRIPTION + We use the element buffer of the targeted tree. The distinct column + values are organized in a priority queue first. The Huffman + algorithm will later organize the elements into a Huffman tree. For + the time being, we just place references to the elements into the + queue buffer. The buffer will later be organized into a priority + queue. + + RETURN + 0 + */ static int save_counts_in_queue(byte *key, element_count count, HUFF_TREE *tree) @@ -1340,8 +1709,23 @@ static int save_counts_in_queue(byte *key, element_count count, } - /* Calculate length of file if given counts should be used */ - /* Its actually a faster version of make_huff_tree */ +/* + Calculate length of file if given counts should be used. + + SYNOPSIS + calc_packed_length() + huff_counts The counts for a column of the table(s). + add_tree_lenght If the decode tree length should be added. + + DESCRIPTION + We need to follow the Huffman algorithm until we know, how many bits + are required for each byte code. But we do not need the resulting + Huffman tree. Hence, we can leave out some steps which are essential + in make_huff_tree(). + + RETURN + Number of bytes required to compress this table column. +*/ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, uint add_tree_lenght) @@ -1351,6 +1735,23 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, HUFF_ELEMENT element_buffer[256]; DBUG_ENTER("calc_packed_length"); + /* + WARNING: We use a small hack for efficiency: Instead of placing + references to HUFF_ELEMENTs into the queue, we just insert + references to the counts of the byte codes which appeared in this + table column. During the Huffman algorithm they are successively + replaced by references to HUFF_ELEMENTs. This works, because + HUFF_ELEMENTs have the incidence count at their beginning. + Regardless, wether the queue array contains references to counts of + type my_off_t or references to HUFF_ELEMENTs which have the count of + type my_off_t at their beginning, it always points to a count of the + same type. + + Instead of using queue_insert(), we just copy the references into + the buffer of the priority queue. We insert in byte value order, but + the order is in fact irrelevant here. We will establish the correct + order later. + */ first=last=0; for (i=found=0 ; i < 256 ; i++) { @@ -1359,31 +1760,73 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, if (! found++) first=i; last=i; + /* We start with root[1], which is the queues top element. */ queue.root[found]=(byte*) &huff_counts->counts[i]; } } if (!found) DBUG_RETURN(0); /* Empty tree */ + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which follows the Huffman algorithm. + */ if (found < 2) queue.root[++found]=(byte*) &huff_counts->counts[last ? 0 : 1]; + /* Make a queue from the queue buffer. */ queue.elements=found; bytes_packed=0; bits_packed=0; + /* Add the length of the coding table, which would become part of the file. */ if (add_tree_lenght) bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+ (max_bit(found-1)+1+1)*(found-2) +7)/8; + + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ for (i=(found+1)/2 ; i > 0 ; i--) _downheap(&queue,i); + + /* The Huffman algorithm. */ for (i=0 ; i < found-1 ; i++) { - HUFF_ELEMENT *a,*b,*new_huff_el; - a=(HUFF_ELEMENT*) queue_remove(&queue,0); - b=(HUFF_ELEMENT*) queue.root[1]; - new_huff_el=element_buffer+i; - new_huff_el->count=a->count+b->count; + my_off_t *a; + my_off_t *b; + HUFF_ELEMENT *new_huff_el; + + /* + Pop the top element from the queue (the one with the least + incidence). Popping from a priority queue includes a re-ordering + of the queue, to get the next least incidence element to the top. + */ + a= (my_off_t*) queue_remove(&queue, 0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ + b= (my_off_t*) queue.root[1]; + /* Create a new element in a local (automatic) buffer. */ + new_huff_el= element_buffer + i; + /* The new element gets the sum of the two least incidence elements. */ + new_huff_el->count= *a + *b; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ bits_packed+=(uint) (new_huff_el->count & 7); bytes_packed+=new_huff_el->count/8; + /* + Replace the copied top element by the new element and re-order the + queue. This successively replaces the references to counts by + references to HUFF_ELEMENTs. + */ queue.root[1]=(byte*) new_huff_el; queue_replaced(&queue); } @@ -1431,13 +1874,26 @@ static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees) } } } + DBUG_PRINT("info", ("Original trees: %d After join: %d", + trees, tree_number)); if (verbose) - printf("Original trees: %d After join: %d\n",trees,tree_number); + VOID(printf("Original trees: %d After join: %d\n", trees, tree_number)); return tree_number; /* Return trees left */ } - /* Fill in huff_tree decode tables */ +/* + Fill in huff_tree encode tables. + + SYNOPSIS + make_huff_decode_table() + huff_tree An array of HUFF_TREE which are to be encoded. + trees The number of HUFF_TREE in the array. + + RETURN + 0 success + != 0 error +*/ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) { @@ -1448,12 +1904,13 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) { elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256; if (!(huff_tree->code = - (ulong*) my_malloc(elements* - (sizeof(ulong)+sizeof(uchar)), - MYF(MY_WME | MY_ZEROFILL)))) + (ulonglong*) my_malloc(elements* + (sizeof(ulonglong) + sizeof(uchar)), + MYF(MY_WME | MY_ZEROFILL)))) return 1; huff_tree->code_len=(uchar*) (huff_tree->code+elements); - make_traverse_code_tree(huff_tree,huff_tree->root,32,0); + make_traverse_code_tree(huff_tree, huff_tree->root, + 8 * sizeof(ulonglong), LL(0)); } } return 0; @@ -1462,28 +1919,90 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) static void make_traverse_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, - uint size, ulong code) + uint size, ulonglong code) { uint chr; if (!element->a.leaf.null) { chr=element->a.leaf.element_nr; - huff_tree->code_len[chr]=(uchar) (32-size); - huff_tree->code[chr]= (code >> size); - if (huff_tree->height < 32-size) - huff_tree->height= 32-size; + huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size); + huff_tree->code[chr]= (code >> size); + if (huff_tree->height < 8 * sizeof(ulonglong) - size) + huff_tree->height= 8 * sizeof(ulonglong) - size; } else { size--; make_traverse_code_tree(huff_tree,element->a.nod.left,size,code); - make_traverse_code_tree(huff_tree,element->a.nod.right,size, - code+((ulong) 1L << size)); + make_traverse_code_tree(huff_tree, element->a.nod.right, size, + code + (((ulonglong) 1) << size)); } return; } +/* + Convert a value into binary digits. + + SYNOPSIS + bindigits() + value The value. + length The number of low order bits to convert. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *bindigits(ulonglong value, uint bits) +{ + static char digits[72]; + char *ptr= digits; + uint idx= bits; + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + *(ptr++)= '0' + ((value >> (--idx)) & 1); + *ptr= '\0'; + return digits; +} + + +/* + Convert a value into hexadecimal digits. + + SYNOPSIS + hexdigits() + value The value. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *hexdigits(ulonglong value) +{ + static char digits[20]; + char *ptr= digits; + uint idx= 2 * sizeof(value); /* Two hex digits per byte. */ + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + { + if ((*(ptr++)= '0' + ((value >> (4 * (--idx))) & 0xf)) > '9') + *(ptr - 1)+= 'a' - '9' - 1; + } + *ptr= '\0'; + return digits; +} + + /* Write header to new packed data file */ static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees, @@ -1517,15 +2036,64 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) uint huff_tree_bits; huff_tree_bits=max_bit(trees ? trees-1 : 0); + DBUG_PRINT("info", ("")); + DBUG_PRINT("info", ("column types:")); + DBUG_PRINT("info", ("FIELD_NORMAL 0")); + DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1")); + DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2")); + DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3")); + DBUG_PRINT("info", ("FIELD_BLOB 4")); + DBUG_PRINT("info", ("FIELD_CONSTANT 5")); + DBUG_PRINT("info", ("FIELD_INTERVALL 6")); + DBUG_PRINT("info", ("FIELD_ZERO 7")); + DBUG_PRINT("info", ("FIELD_VARCHAR 8")); + DBUG_PRINT("info", ("FIELD_CHECK 9")); + DBUG_PRINT("info", ("")); + DBUG_PRINT("info", ("pack type as a set of flags:")); + DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1")); + DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2")); + DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4")); + DBUG_PRINT("info", ("")); + if (verbose >= 2) + { + VOID(printf("\n")); + VOID(printf("column types:\n")); + VOID(printf("FIELD_NORMAL 0\n")); + VOID(printf("FIELD_SKIP_ENDSPACE 1\n")); + VOID(printf("FIELD_SKIP_PRESPACE 2\n")); + VOID(printf("FIELD_SKIP_ZERO 3\n")); + VOID(printf("FIELD_BLOB 4\n")); + VOID(printf("FIELD_CONSTANT 5\n")); + VOID(printf("FIELD_INTERVALL 6\n")); + VOID(printf("FIELD_ZERO 7\n")); + VOID(printf("FIELD_VARCHAR 8\n")); + VOID(printf("FIELD_CHECK 9\n")); + VOID(printf("\n")); + VOID(printf("pack type as a set of flags:\n")); + VOID(printf("PACK_TYPE_SELECTED 1\n")); + VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n")); + VOID(printf("PACK_TYPE_ZERO_FILL 4\n")); + VOID(printf("\n")); + } for (i=0 ; i++ < fields ; counts++) { - write_bits((ulong) (int) counts->field_type,5); + write_bits((ulonglong) (int) counts->field_type, 5); write_bits(counts->pack_type,6); if (counts->pack_type & PACK_TYPE_ZERO_FILL) write_bits(counts->max_zero_fill,5); else write_bits(counts->length_bits,5); - write_bits((ulong) counts->tree->tree_number-1,huff_tree_bits); + write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits); + DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + i , counts->field_type, counts->pack_type, + counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); + if (verbose >= 2) + VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u " + "tree: %2u length: %4u\n", i , counts->field_type, + counts->pack_type, counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); } flush_bits(); return; @@ -1538,45 +2106,72 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) { uint i,int_length; + uint tree_no; + uint codes; + uint errors= 0; uint *packed_tree,*offset,length; my_off_t elements; + /* Find the highest number of elements in the trees. */ for (i=length=0 ; i < trees ; i++) if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length) length=huff_tree[i].elements; + /* + Allocate a buffer for packing a decode tree. Two numbers per element + (left child and right child). + */ if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2))) { my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2); return 0; } + DBUG_PRINT("info", ("")); + if (verbose >= 2) + VOID(printf("\n")); + tree_no= 0; intervall_length=0; for (elements=0; trees-- ; huff_tree++) { + /* Skip columns that have been joined with other columns. */ if (huff_tree->tree_number == 0) continue; /* Deleted tree */ + tree_no++; + DBUG_PRINT("info", ("")); + if (verbose >= 3) + VOID(printf("\n")); + /* Count the total number of elements (byte codes or column values). */ elements+=huff_tree->elements; huff_tree->max_offset=2; + /* Build a tree of offsets and codes for decoding in 'packed_tree'. */ if (huff_tree->elements <= 1) offset=packed_tree; else offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree); + + /* This should be the same as 'length' above. */ huff_tree->offset_bits=max_bit(huff_tree->max_offset); + + /* + Since we check this during collecting the distinct column values, + this should never happen. + */ if (huff_tree->max_offset >= IS_OFFSET) { /* This should be impossible */ - VOID(fprintf(stderr,"Tree offset got too big: %d, aborted\n", - huff_tree->max_offset)); + VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n", + huff_tree->max_offset)); my_afree((gptr) packed_tree); return 0; } -#ifdef EXTRA_DBUG - printf("pos: %d elements: %d tree-elements: %d char_bits: %d\n", - (uint) (file_buffer.pos-file_buffer.buffer), - huff_tree->elements, (offset-packed_tree),huff_tree->char_bits); -#endif + DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu " + "char_bits: %u\n", + (ulong) (file_buffer.pos - file_buffer.buffer), + huff_tree->elements, (ulong) (offset - packed_tree), + huff_tree->char_bits)); if (!huff_tree->counts->tree_buff) { + /* We do a byte compression on this column. Mark with bit 0. */ write_bits(0,1); write_bits(huff_tree->min_chr,8); write_bits(huff_tree->elements,9); @@ -1588,6 +2183,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) { int_length=(uint) (huff_tree->counts->tree_pos - huff_tree->counts->tree_buff); + /* We have distinct column values for this column. Mark with bit 1. */ write_bits(1,1); write_bits(huff_tree->elements,15); write_bits(int_length,16); @@ -1595,10 +2191,29 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) write_bits(huff_tree->offset_bits,5); intervall_length+=int_length; } + DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u " + "offset_bits: %2u %s: %5u codelen: %2u", + tree_no, huff_tree->elements, huff_tree->char_bits, + huff_tree->offset_bits, huff_tree->counts->tree_buff ? + "bufflen" : "min_chr", huff_tree->counts->tree_buff ? + int_length : huff_tree->min_chr, huff_tree->height)); + if (verbose >= 2) + VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u " + "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements, + huff_tree->char_bits, huff_tree->offset_bits, + huff_tree->counts->tree_buff ? "bufflen" : "min_chr", + huff_tree->counts->tree_buff ? int_length : + huff_tree->min_chr, huff_tree->height)); + + /* Check that the code tree length matches the element count. */ length=(uint) (offset-packed_tree); if (length != huff_tree->elements*2-2) - printf("error: Huff-tree-length: %d != calc_length: %d\n", - length,huff_tree->elements*2-2); + { + VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n", + length, huff_tree->elements * 2 - 2)); + errors++; + break; + } for (i=0 ; i < length ; i++) { @@ -1607,16 +2222,122 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) huff_tree->offset_bits+1); else write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1); + DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x", + i, (packed_tree[i] & IS_OFFSET) ? + " -> " : "", (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); + if (verbose >= 3) + VOID(printf("tree[0x%04x]: %s0x%04x\n", + i, (packed_tree[i] & IS_OFFSET) ? " -> " : "", + (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); } flush_bits(); + + /* + Display coding tables and check their correctness. + */ + codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256; + for (i= 0; i < codes; i++) + { + ulonglong code; + uint bits; + uint len; + uint idx; + + if (! (len= huff_tree->code_len[i])) + continue; + DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], + huff_tree->code_len[i]))); + if (verbose >= 3) + VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], huff_tree->code_len[i]))); + + /* Check that the encode table decodes correctly. */ + code= 0; + bits= 0; + idx= 0; + DBUG_EXECUTE_IF("forcechkerr1", len--;); + DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code);); + DBUG_EXECUTE_IF("forcechkerr3", idx= length;); + for (;;) + { + if (! len) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n", + hexdigits(huff_tree->code[i]), huff_tree->code_len[i])); + errors++; + break; + } + code<<= 1; + code|= (huff_tree->code[i] >> (--len)) & 1; + bits++; + if (bits > 8 * sizeof(code)) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", + bits, 8 * sizeof(code))); + errors++; + break; + } + idx+= code & 1; + if (idx >= length) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n", + idx, length)); + errors++; + break; + } + if (packed_tree[idx] & IS_OFFSET) + idx+= packed_tree[idx] & ~IS_OFFSET; + else + break; /* Hit a leaf. This contains the result value. */ + } + if (errors) + break; + + DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;); + if (packed_tree[idx] != i) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n", + packed_tree[idx], i)); + errors++; + break; + } + } /*end for (codes)*/ + if (errors) + break; + + /* Write column values in case of distinct column value compression. */ if (huff_tree->counts->tree_buff) { for (i=0 ; i < int_length ; i++) - write_bits((uint) (uchar) huff_tree->counts->tree_buff[i],8); + { + write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8); + DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x", + i, (uchar) huff_tree->counts->tree_buff[i])); + if (verbose >= 3) + VOID(printf("column_values[0x%04x]: 0x%02x\n", + i, (uchar) huff_tree->counts->tree_buff[i])); + } } flush_bits(); } + DBUG_PRINT("info", ("")); + if (verbose >= 2) + VOID(printf("\n")); my_afree((gptr) packed_tree); + if (errors) + { + VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n")); + return 0; + } return elements; } @@ -1627,23 +2348,43 @@ static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, uint *prev_offset; prev_offset= offset; + /* + 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null, + then there is no left child and, hence no right child either. This + is a property of a binary tree. An element is either a node with two + childs, or a leaf without childs. + + The current element is always a node with two childs. Go left first. + */ if (!element->a.nod.left->a.leaf.null) { - offset[0] =(uint) element->a.nod.left->a.leaf.element_nr; + /* Store the byte code or the index of the column value. */ + prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr; offset+=2; } else { + /* + Recursively traverse the tree to the left. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ prev_offset[0]= IS_OFFSET+2; offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2); } + + /* Now, check the right child. */ if (!element->a.nod.right->a.leaf.null) { + /* Store the byte code or the index of the column value. */ prev_offset[1]=element->a.nod.right->a.leaf.element_nr; return offset; } else { + /* + Recursively traverse the tree to the right. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ uint temp=(uint) (offset-prev_offset-1); prev_offset[1]= IS_OFFSET+ temp; if (huff_tree->max_offset < temp) @@ -1670,6 +2411,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length, intervall,field_length,max_pack_length,pack_blob_length; my_off_t record_count; + char llbuf[32]; ulong length,pack_length; byte *record,*pos,*end_pos,*record_pos,*start_pos; HUFF_COUNTS *count,*end_count; @@ -1678,12 +2420,23 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) uint pack_version= (uint) isam_file->s->pack.version; DBUG_ENTER("compress_isam_file"); + /* Allocate a buffer for the records (excluding blobs). */ if (!(record=(byte*) my_alloca(isam_file->s->base.reclength))) return -1; + end_count=huff_counts+isam_file->s->base.fields; min_record_length= (uint) ~0; max_record_length=0; + /* + Calculate the maximum number of bits required to pack the records. + Remember to understand 'max_zero_fill' as 'min_zero_fill'. + The tree height determines the maximum number of bits per value. + Some fields skip leading or trailing spaces or zeroes. The skipped + number of bytes is encoded by 'length_bits' bits. + Empty blobs and varchar are encoded with a single 1 bit. Other blobs + and varchar get a leading 0 bit. + */ for (i=max_calc_length=0 ; i < isam_file->s->base.fields ; i++) { if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL)) @@ -1702,13 +2455,15 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) (huff_counts[i].field_length - huff_counts[i].max_zero_fill)* huff_counts[i].tree->height+huff_counts[i].length_bits; } - max_calc_length/=8; + max_calc_length= (max_calc_length + 7) / 8; pack_ref_length= calc_pack_length(pack_version, max_calc_length); record_count=0; + /* 'max_blob_length' is the max length of all blobs of a record. */ pack_blob_length= isam_file->s->base.blobs ? calc_pack_length(pack_version, mrg->max_blob_length) : 0; max_pack_length=pack_ref_length+pack_blob_length; + DBUG_PRINT("fields", ("===")); mrg_reset(mrg); while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) { @@ -1724,15 +2479,29 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) end_pos=start_pos+(field_length=count->field_length); tree=count->tree; + DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + (ulong) (count - huff_counts + 1), + count->field_type, + count->pack_type, count->max_zero_fill, + count->length_bits, count->tree->tree_number, + count->field_length)); + + /* Check if the column contains spaces only. */ if (count->pack_type & PACK_TYPE_SPACE_FIELDS) { for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ; if (pos == end_pos) { + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1")); + DBUG_PRINT("fields", ("---")); write_bits(1,1); start_pos=end_pos; continue; } + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1")); write_bits(0,1); } end_pos-=count->max_zero_fill; @@ -1742,65 +2511,129 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) case FIELD_SKIP_ZERO: if (!memcmp((byte*) start_pos,zero_string,field_length)) { + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1")); write_bits(1,1); start_pos=end_pos; break; } + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1")); write_bits(0,1); /* Fall through */ case FIELD_NORMAL: + DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes", + (ulong) (end_pos - start_pos))); for ( ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *start_pos], (uint) tree->code_len[(uchar) *start_pos]); + } break; case FIELD_SKIP_ENDSPACE: for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ; - length=(uint) (end_pos-pos); + length= (ulong) (end_pos - pos); if (count->pack_type & PACK_TYPE_SELECTED) { if (length > count->min_space) { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); write_bits(1,1); write_bits(length,count->length_bits); } else { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE not more than min_space, " + "bits: 1")); write_bits(0,1); pos=end_pos; } } else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes", + (ulong) (pos - start_pos))); for ( ; start_pos < pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *start_pos], (uint) tree->code_len[(uchar) *start_pos]); + } start_pos=end_pos; break; case FIELD_SKIP_PRESPACE: for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ; - length=(uint) (pos-start_pos); + length= (ulong) (pos - start_pos); if (count->pack_type & PACK_TYPE_SELECTED) { if (length > count->min_space) { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); write_bits(1,1); write_bits(length,count->length_bits); } else { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE not more than min_space, " + "bits: 1")); pos=start_pos; write_bits(0,1); } } else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes", + (ulong) (end_pos - start_pos))); for (start_pos=pos ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *start_pos], (uint) tree->code_len[(uchar) *start_pos]); + } break; case FIELD_CONSTANT: case FIELD_ZERO: case FIELD_CHECK: + DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK")); start_pos=end_pos; break; case FIELD_INTERVALL: @@ -1808,6 +2641,10 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) pos=(byte*) tree_search(&count->int_tree, start_pos, count->int_tree.custom_arg); intervall=(uint) (pos - count->tree_buff)/field_length; + DBUG_PRINT("fields", ("FIELD_INTERVALL")); + DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u", + intervall, hexdigits(tree->code[intervall]), + (uint) tree->code_len[intervall])); write_bits(tree->code[intervall],(uint) tree->code_len[intervall]); start_pos=end_pos; break; @@ -1816,21 +2653,36 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) ulong blob_length=_mi_calc_blob_length(field_length- mi_portable_sizeof_char_ptr, start_pos); + /* Empty blobs are encoded with a single 1 bit. */ if (!blob_length) { - write_bits(1,1); /* Empty blob */ + DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1")); + write_bits(1,1); } else { byte *blob,*blob_end; + DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1")); write_bits(0,1); + /* Write the blob length. */ + DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u", + blob_length, count->length_bits)); write_bits(blob_length,count->length_bits); memcpy_fixed(&blob,end_pos-mi_portable_sizeof_char_ptr, sizeof(char*)); blob_end=blob+blob_length; + /* Encode the blob bytes. */ for ( ; blob < blob_end ; blob++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *blob, hexdigits(tree->code[(uchar) *blob]), + (uint) tree->code_len[(uchar) *blob], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *blob], (uint) tree->code_len[(uchar) *blob]); + } tot_blob_length+=blob_length; } start_pos= end_pos; @@ -1838,19 +2690,37 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) } case FIELD_VARCHAR: { - ulong col_length= uint2korr(start_pos); + uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); + ulong col_length= (pack_length == 1 ? (uint) *(uchar*) start_pos : + uint2korr(start_pos)); + /* Empty varchar are encoded with a single 1 bit. */ if (!col_length) { + DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1")); write_bits(1,1); /* Empty varchar */ } else { - byte *end=start_pos+2+col_length; + byte *end=start_pos+pack_length+col_length; + DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1")); write_bits(0,1); + /* Write the varchar length. */ + DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u", + col_length, count->length_bits)); write_bits(col_length,count->length_bits); - for (start_pos+=2 ; start_pos < end ; start_pos++) + /* Encode the varchar bytes. */ + for (start_pos+=pack_length ; start_pos < end ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *start_pos], (uint) tree->code_len[(uchar) *start_pos]); + } } start_pos= end_pos; break; @@ -1859,13 +2729,18 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) abort(); /* Impossible */ } start_pos+=count->max_zero_fill; + DBUG_PRINT("fields", ("---")); } flush_bits(); - length=(ulong) (file_buffer.pos-record_pos)-max_pack_length; + length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length; pack_length= save_pack_length(pack_version, record_pos, length); if (pack_blob_length) pack_length+= save_pack_length(pack_version, record_pos + pack_length, tot_blob_length); + DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu " + "length-bytes: %lu", (ulong) record_count, length, + tot_blob_length, pack_length)); + DBUG_PRINT("fields", ("===")); /* Correct file buffer if the header was smaller */ if (pack_length != max_pack_length) @@ -1877,9 +2752,11 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) min_record_length=(uint) length; if (length > (ulong) max_record_length) max_record_length=(uint) length; - if (write_loop && ++record_count % WRITE_COUNT == 0) + record_count++; + if (write_loop && record_count % WRITE_COUNT == 0) { - printf("%lu\r",(ulong) record_count); VOID(fflush(stdout)); + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); } } else if (error != HA_ERR_RECORD_DELETED) @@ -1889,8 +2766,11 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) error=0; else { - fprintf(stderr,"%s: Got error %d reading records\n",my_progname,error); + VOID(fprintf(stderr, "%s: Got error %d reading records\n", + my_progname, error)); } + if (verbose >= 2) + VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf))); my_afree((gptr) record); mrg->ref_length=max_pack_length; @@ -1930,7 +2810,7 @@ static void init_file_buffer(File file, pbool read_buffer) file_buffer.pos=file_buffer.buffer; file_buffer.bits=BITS_SAVED; } - file_buffer.current_byte=0; + file_buffer.bitbucket= 0; } @@ -1973,7 +2853,8 @@ static int flush_buffer(ulong neaded_length) tmp=my_realloc(file_buffer.buffer, neaded_length,MYF(MY_WME)); if (!tmp) return 1; - file_buffer.pos= tmp + (ulong) (file_buffer.pos - file_buffer.buffer); + file_buffer.pos= ((uchar*) tmp + + (ulong) (file_buffer.pos - file_buffer.buffer)); file_buffer.buffer=tmp; file_buffer.end=tmp+neaded_length-8; } @@ -1988,68 +2869,59 @@ static void end_file_buffer(void) /* output `bits` low bits of `value' */ -static void write_bits (register ulong value, register uint bits) +static void write_bits(register ulonglong value, register uint bits) { - if ((file_buffer.bits-=(int) bits) >= 0) + DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) || + (bits == 8 * sizeof(value))); + + if ((file_buffer.bits-= (int) bits) >= 0) { - file_buffer.current_byte|=value << file_buffer.bits; + file_buffer.bitbucket|= value << file_buffer.bits; } else { - reg3 uint byte_buff; + reg3 ulonglong bit_buffer; bits= (uint) -file_buffer.bits; - DBUG_ASSERT(bits <= 8 * sizeof(value)); - byte_buff= (file_buffer.current_byte | - ((bits != 8 * sizeof(value)) ? (uint) (value >> bits) : 0)); -#if BITS_SAVED == 32 - *file_buffer.pos++= (byte) (byte_buff >> 24) ; - *file_buffer.pos++= (byte) (byte_buff >> 16) ; + bit_buffer= (file_buffer.bitbucket | + ((bits != 8 * sizeof(value)) ? (value >> bits) : 0)); +#if BITS_SAVED == 64 + *file_buffer.pos++= (uchar) (bit_buffer >> 56); + *file_buffer.pos++= (uchar) (bit_buffer >> 48); + *file_buffer.pos++= (uchar) (bit_buffer >> 40); + *file_buffer.pos++= (uchar) (bit_buffer >> 32); #endif - *file_buffer.pos++= (byte) (byte_buff >> 8) ; - *file_buffer.pos++= (byte) byte_buff; + *file_buffer.pos++= (uchar) (bit_buffer >> 24); + *file_buffer.pos++= (uchar) (bit_buffer >> 16); + *file_buffer.pos++= (uchar) (bit_buffer >> 8); + *file_buffer.pos++= (uchar) (bit_buffer); - DBUG_ASSERT(bits <= 8 * sizeof(ulong)); if (bits != 8 * sizeof(value)) - value&= (((ulong) 1) << bits) - 1; -#if BITS_SAVED == 16 - if (bits >= sizeof(uint)) - { - bits-=8; - *file_buffer.pos++= (uchar) (value >> bits); - value&= (1 << bits)-1; - if (bits >= sizeof(uint)) - { - bits-=8; - *file_buffer.pos++= (uchar) (value >> bits); - value&= (1 << bits)-1; - } - } -#endif + value&= (((ulonglong) 1) << bits) - 1; if (file_buffer.pos >= file_buffer.end) VOID(flush_buffer(~ (ulong) 0)); file_buffer.bits=(int) (BITS_SAVED - bits); - file_buffer.current_byte=(uint) (value << (BITS_SAVED - bits)); + file_buffer.bitbucket= value << (BITS_SAVED - bits); } return; } /* Flush bits in bit_buffer to buffer */ -static void flush_bits (void) +static void flush_bits(void) { - uint bits,byte_buff; + int bits; + ulonglong bit_buffer; - bits=(file_buffer.bits) & ~7; - byte_buff = file_buffer.current_byte >> bits; - bits=BITS_SAVED - bits; + bits= file_buffer.bits & ~7; + bit_buffer= file_buffer.bitbucket >> bits; + bits= BITS_SAVED - bits; while (bits > 0) { - bits-=8; - *file_buffer.pos++= (byte) (uchar) (byte_buff >> bits) ; + bits-= 8; + *file_buffer.pos++= (uchar) (bit_buffer >> bits); } - file_buffer.bits=BITS_SAVED; - file_buffer.current_byte=0; - return; + file_buffer.bits= BITS_SAVED; + file_buffer.bitbucket= 0; } @@ -2074,7 +2946,7 @@ static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, share->state.dellink= HA_OFFSET_ERROR; share->state.split=(ha_rows) mrg->records; share->state.version=(ulong) time((time_t*) 0); - if (share->state.key_map != (ULL(1) << share->base.keys) - 1) + if (! mi_is_all_keys_active(share->state.key_map, share->base.keys)) { /* Some indexes are disabled, cannot use current key_file_length value @@ -2088,7 +2960,7 @@ static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, original file so "myisamchk -rq" can use this value (this is necessary because index size cannot be easily calculated for fulltext keys) */ - share->state.key_map=0; + mi_clear_all_keys_active(share->state.key_map); for (key=0 ; key < share->base.keys ; key++) share->state.key_root[key]= HA_OFFSET_ERROR; for (key=0 ; key < share->state.header.max_block_size ; key++) @@ -2128,7 +3000,7 @@ static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length, } state.dellink= HA_OFFSET_ERROR; state.version=(ulong) time((time_t*) 0); - state.key_map=0; + mi_clear_all_keys_active(state.key_map); state.checksum=crc; if (isam_file->s->base.keys) isamchk_neaded=1; @@ -2197,3 +3069,131 @@ static int mrg_close(PACK_MRG_INFO *mrg) my_free((gptr) mrg->file,MYF(0)); return error; } + + +#if !defined(DBUG_OFF) +/* + Fake the counts to get big Huffman codes. + + SYNOPSIS + fakebigcodes() + huff_counts A pointer to the counts array. + end_count A pointer past the counts array. + + DESCRIPTION + + Huffman coding works by removing the two least frequent values from + the list of values and add a new value with the sum of their + incidences in a loop until only one value is left. Every time a + value is reused for a new value, it gets one more bit for its + encoding. Hence, the least frequent values get the longest codes. + + To get a maximum code length for a value, two of the values must + have an incidence of 1. As their sum is 2, the next infrequent value + must have at least an incidence of 2, then 4, 8, 16 and so on. This + means that one needs 2**n bytes (values) for a code length of n + bits. However, using more distinct values forces the use of longer + codes, or reaching the code length with less total bytes (values). + + To get 64(32)-bit codes, I sort the counts by decreasing incidence. + I assign counts of 1 to the two most frequent values, a count of 2 + for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All + the remaining values get 1. That way every possible byte has an + assigned code, though not all codes are used if not all byte values + are present in the column. + + This strategy would work with distinct column values too, but + requires that at least 64(32) values are present. To make things + easier here, I cancel all distinct column values and force byte + compression for all columns. + + RETURN + void +*/ + +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count) +{ + HUFF_COUNTS *count; + my_off_t *cur_count_p; + my_off_t *end_count_p; + my_off_t **cur_sort_p; + my_off_t **end_sort_p; + my_off_t *sort_counts[256]; + my_off_t total; + DBUG_ENTER("fakebigcodes"); + + for (count= huff_counts; count < end_count; count++) + { + /* + Remove distinct column values. + */ + if (huff_counts->tree_buff) + { + my_free((gptr) huff_counts->tree_buff, MYF(0)); + delete_tree(&huff_counts->int_tree); + huff_counts->tree_buff= NULL; + DBUG_PRINT("fakebigcodes", ("freed distinct column values")); + } + + /* + Sort counts by decreasing incidence. + */ + cur_count_p= count->counts; + end_count_p= cur_count_p + 256; + cur_sort_p= sort_counts; + while (cur_count_p < end_count_p) + *(cur_sort_p++)= cur_count_p++; + (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp); + + /* + Assign faked counts. + */ + cur_sort_p= sort_counts; +#if SIZEOF_LONG_LONG > 4 + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1; +#else + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2; +#endif + /* Most frequent value gets a faked count of 1. */ + **(cur_sort_p++)= 1; + total= 1; + while (cur_sort_p < end_sort_p) + { + **(cur_sort_p++)= total; + total<<= 1; + } + /* Set the last value. */ + **(cur_sort_p++)= --total; + /* + Set the remaining counts. + */ + end_sort_p= sort_counts + 256; + while (cur_sort_p < end_sort_p) + **(cur_sort_p++)= 1; + } + DBUG_VOID_RETURN; +} + + +/* + Compare two counts for reverse sorting. + + SYNOPSIS + fakecmp() + count1 One count. + count2 Another count. + + RETURN + 1 count1 < count2 + 0 count1 == count2 + -1 count1 > count2 +*/ + +static int fakecmp(my_off_t **count1, my_off_t **count2) +{ + return ((**count1 < **count2) ? 1 : + (**count1 > **count2) ? -1 : 0); +} +#endif + + diff --git a/myisam/rt_split.c b/myisam/rt_split.c index 664dd2c75e3..31a7d09ab4f 100644 --- a/myisam/rt_split.c +++ b/myisam/rt_split.c @@ -257,19 +257,17 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, int n_dim; uchar *source_cur, *cur1, *cur2; uchar *new_page; - int err_code = 0; - - uint nod_flag = mi_test_if_nod(page); - uint full_length = key_length + (nod_flag ? nod_flag : - info->s->base.rec_reflength); - - int max_keys = (mi_getint(page)-2) / (full_length); + int err_code= 0; + uint nod_flag= mi_test_if_nod(page); + uint full_length= key_length + (nod_flag ? nod_flag : + info->s->base.rec_reflength); + int max_keys= (mi_getint(page)-2) / (full_length); n_dim = keyinfo->keysegs / 2; if (!(coord_buf= (double*) my_alloca(n_dim * 2 * sizeof(double) * - (max_keys + 1 + 4) + - sizeof(SplitStruct) * (max_keys + 1)))) + (max_keys + 1 + 4) + + sizeof(SplitStruct) * (max_keys + 1)))) return -1; task= (SplitStruct *)(coord_buf + n_dim * 2 * (max_keys + 1 + 4)); @@ -312,8 +310,7 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, cur1 = rt_PAGE_FIRST_KEY(page, nod_flag); cur2 = rt_PAGE_FIRST_KEY(new_page, nod_flag); - n1 = 0; - n2 = 0; + n1= n2 = 0; for (cur = task; cur < stop; ++cur) { uchar *to; diff --git a/myisam/sort.c b/myisam/sort.c index e8cd9938e42..fabd713ef45 100644 --- a/myisam/sort.c +++ b/myisam/sort.c @@ -479,7 +479,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) } if (!got_error) { - share->state.key_map|=(ulonglong) 1 << sinfo->key; + mi_set_key_active(share->state.key_map, sinfo->key); if (param->testflag & T_STATISTICS) update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique, (ulonglong) info->state->records); @@ -862,7 +862,8 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, uchar *strpos; BUFFPEK *buffpek,**refpek; QUEUE queue; - volatile my_bool *killed= killed_ptr(info->sort_info->param); + volatile int *killed= killed_ptr(info->sort_info->param); + DBUG_ENTER("merge_buffers"); count=error=0; |