diff options
author | unknown <istruewing@chilla.local> | 2006-12-08 12:20:48 +0100 |
---|---|---|
committer | unknown <istruewing@chilla.local> | 2006-12-08 12:20:48 +0100 |
commit | 966e018e3e0a6f9b4a8596cbd6f3e5242ae7e038 (patch) | |
tree | 1073b3844bbdd54dc05c09a896e56a285196fdfc /myisam | |
parent | 39efdfcb20bb8c5e883b7539c686d9b206d4d7f0 (diff) | |
parent | 3fa6480b105f316a0f0651e153652bcccc6da5fb (diff) | |
download | mariadb-git-966e018e3e0a6f9b4a8596cbd6f3e5242ae7e038.tar.gz |
Merge bk-internal.mysql.com:/home/bk/mysql-4.1
into chilla.local:/home/mydev/mysql-4.1-axmrg
Diffstat (limited to 'myisam')
-rw-r--r-- | myisam/mi_check.c | 90 | ||||
-rw-r--r-- | myisam/mi_dynrec.c | 8 | ||||
-rw-r--r-- | myisam/mi_packrec.c | 365 |
3 files changed, 406 insertions, 57 deletions
diff --git a/myisam/mi_check.c b/myisam/mi_check.c index b07c9904247..3a7817b7f03 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -251,11 +251,12 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) my_off_t next_link; uint block_size=(nr+1)*MI_MIN_KEY_BLOCK_LENGTH; ha_rows records; - char llbuff[21],*buff; + char llbuff[21], llbuff2[21], *buff; DBUG_ENTER("check_k_link"); + DBUG_PRINT("enter", ("block_size: %u", block_size)); if (param->testflag & T_VERBOSE) - printf("block_size %4d:",block_size); + printf("block_size %4u:", block_size); /* purecov: tested */ next_link=info->s->state.key_del[nr]; records= (ha_rows) (info->state->key_file_length / block_size); @@ -265,14 +266,46 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) DBUG_RETURN(1); if (param->testflag & T_VERBOSE) printf("%16s",llstr(next_link,llbuff)); - if (next_link > info->state->key_file_length || - next_link & (info->s->blocksize-1)) + + /* Key blocks must lay within the key file length entirely. */ + if (next_link + block_size > info->state->key_file_length) + { + /* purecov: begin tested */ + mi_check_print_error(param, "Invalid key block position: %s " + "key block size: %u file_length: %s", + llstr(next_link, llbuff), block_size, + llstr(info->state->key_file_length, llbuff2)); + DBUG_RETURN(1); + /* purecov: end */ + } + + /* Key blocks must be aligned at MI_MIN_KEY_BLOCK_LENGTH. */ + if (next_link & (MI_MIN_KEY_BLOCK_LENGTH - 1)) + { + /* purecov: begin tested */ + mi_check_print_error(param, "Mis-aligned key block: %s " + "minimum key block length: %u", + llstr(next_link, llbuff), MI_MIN_KEY_BLOCK_LENGTH); DBUG_RETURN(1); + /* purecov: end */ + } + + /* + Read the key block with MI_MIN_KEY_BLOCK_LENGTH to find next link. + If the key cache block size is smaller than block_size, we can so + avoid unecessary eviction of cache block. + */ if (!(buff=key_cache_read(info->s->key_cache, info->s->kfile, next_link, DFLT_INIT_HITS, - (byte*) info->buff, - myisam_block_size, block_size, 1))) + (byte*) info->buff, MI_MIN_KEY_BLOCK_LENGTH, + MI_MIN_KEY_BLOCK_LENGTH, 1))) + { + /* purecov: begin tested */ + mi_check_print_error(param, "key cache read error for block: %s", + llstr(next_link,llbuff)); DBUG_RETURN(1); + /* purecov: end */ + } next_link=mi_sizekorr(buff); records--; param->key_file_blocks+=block_size; @@ -555,17 +588,37 @@ static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, ha_checksum *key_checksum, uint level) { char llbuff[22],llbuff2[22]; - if (page > info->state->key_file_length || (page & (info->s->blocksize -1))) - { - my_off_t max_length=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0)); - mi_check_print_error(param,"Wrong pagepointer: %s at page: %s", - llstr(page,llbuff),llstr(page,llbuff2)); - - if (page+info->s->blocksize > max_length) + DBUG_ENTER("chk_index_down"); + + /* Key blocks must lay within the key file length entirely. */ + if (page + keyinfo->block_length > info->state->key_file_length) + { + /* purecov: begin tested */ + /* Give it a chance to fit in the real file size. */ + my_off_t max_length= my_seek(info->s->kfile, 0L, MY_SEEK_END, MYF(0)); + mi_check_print_error(param, "Invalid key block position: %s " + "key block size: %u file_length: %s", + llstr(page, llbuff), keyinfo->block_length, + llstr(info->state->key_file_length, llbuff2)); + if (page + keyinfo->block_length > max_length) goto err; - info->state->key_file_length=(max_length & - ~ (my_off_t) (info->s->blocksize-1)); + /* Fix the remebered key file length. */ + info->state->key_file_length= (max_length & + ~ (my_off_t) (keyinfo->block_length - 1)); + /* purecov: end */ + } + + /* Key blocks must be aligned at MI_MIN_KEY_BLOCK_LENGTH. */ + if (page & (MI_MIN_KEY_BLOCK_LENGTH - 1)) + { + /* purecov: begin tested */ + mi_check_print_error(param, "Mis-aligned key block: %s " + "minimum key block length: %u", + llstr(page, llbuff), MI_MIN_KEY_BLOCK_LENGTH); + goto err; + /* purecov: end */ } + if (!_mi_fetch_keypage(info,keyinfo,page, DFLT_INIT_HITS,buff,0)) { mi_check_print_error(param,"Can't read key from filepos: %s", @@ -576,9 +629,12 @@ static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level)) goto err; - return 0; + DBUG_RETURN(0); + + /* purecov: begin tested */ err: - return 1; + DBUG_RETURN(1); + /* purecov: end */ } diff --git a/myisam/mi_dynrec.c b/myisam/mi_dynrec.c index 4dec3055fa1..727f44341b1 100644 --- a/myisam/mi_dynrec.c +++ b/myisam/mi_dynrec.c @@ -992,9 +992,11 @@ ulong _mi_rec_unpack(register MI_INFO *info, register byte *to, byte *from, { uint size_length=rec_length- mi_portable_sizeof_char_ptr; ulong blob_length=_mi_calc_blob_length(size_length,from); - if ((ulong) (from_end-from) - size_length < blob_length || - min_pack_length > (uint) (from_end -(from+size_length+blob_length))) - goto err; + ulong from_left= (ulong) (from_end - from); + if (from_left < size_length || + from_left - size_length < blob_length || + from_left - size_length - blob_length < min_pack_length) + goto err; memcpy((byte*) to,(byte*) from,(size_t) size_length); from+=size_length; memcpy_fixed((byte*) to+size_length,(byte*) &from,sizeof(char*)); diff --git a/myisam/mi_packrec.c b/myisam/mi_packrec.c index feb8d33b015..c88b8ab6607 100644 --- a/myisam/mi_packrec.c +++ b/myisam/mi_packrec.c @@ -20,7 +20,10 @@ #define IS_CHAR ((uint) 32768) /* Bit if char (not offset) in tree */ -#if INT_MAX > 65536L +/* Some definitions to keep in sync with myisampack.c */ +#define HEAD_LENGTH 32 /* Length of fixed header */ + +#if INT_MAX > 32767 #define BITS_SAVED 32 #define MAX_QUICK_TABLE_BITS 9 /* Because we may shift in 24 bits */ #else @@ -42,6 +45,7 @@ { bits-=(bit+1); break; } \ pos+= *pos +/* Size in uint16 of a Huffman tree for byte compression of 256 byte values. */ #define OFFSET_TABLE_SIZE 512 static uint read_huff_table(MI_BIT_BUFF *bit_buff,MI_DECODE_TREE *decode_tree, @@ -132,7 +136,7 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) uint16 *decode_table,*tmp_buff; ulong elements,intervall_length; char *disk_cache,*intervall_buff; - uchar header[32]; + uchar header[HEAD_LENGTH]; MYISAM_SHARE *share=info->s; MI_BIT_BUFF bit_buff; DBUG_ENTER("_mi_read_pack_info"); @@ -150,12 +154,13 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) my_errno=HA_ERR_END_OF_FILE; goto err0; } + /* Only the first three bytes of magic number are independent of version. */ if (memcmp((byte*) header, (byte*) myisam_pack_file_magic, 3)) { my_errno=HA_ERR_WRONG_IN_RECORD; goto err0; } - share->pack.version= header[3]; + share->pack.version= header[3]; /* fourth byte of magic number */ share->pack.header_length= uint4korr(header+4); share->min_pack_length=(uint) uint4korr(header+8); share->max_pack_length=(uint) uint4korr(header+12); @@ -171,7 +176,22 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) share->base.min_block_length=share->min_pack_length+1; if (share->min_pack_length > 254) share->base.min_block_length+=2; - + DBUG_PRINT("info", ("fixed header length: %u", HEAD_LENGTH)); + DBUG_PRINT("info", ("total header length: %u", share->pack.header_length)); + DBUG_PRINT("info", ("pack file version: %u", share->pack.version)); + DBUG_PRINT("info", ("min pack length: %u", share->min_pack_length)); + DBUG_PRINT("info", ("max pack length: %u", share->max_pack_length)); + DBUG_PRINT("info", ("elements of all trees: %u", elements)); + DBUG_PRINT("info", ("distinct values bytes: %u", intervall_length)); + DBUG_PRINT("info", ("number of code trees: %u", trees)); + DBUG_PRINT("info", ("bytes for record lgt: %u", share->pack.ref_length)); + DBUG_PRINT("info", ("record pointer length: %u", rec_reflength)); + + /* + Memory segment #1: + - Decode tree heads + - Distinct column values + */ if (!(share->decode_trees=(MI_DECODE_TREE*) my_malloc((uint) (trees*sizeof(MI_DECODE_TREE)+ intervall_length*sizeof(byte)), @@ -179,11 +199,19 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) goto err0; intervall_buff=(byte*) (share->decode_trees+trees); + /* + Memory segment #2: + - Decode tables + - Quick decode tables + - Temporary decode table + - Compressed data file header cache + This segment will be reallocated after construction of the tables. + */ length=(uint) (elements*2+trees*(1 << myisam_quick_table_bits)); if (!(share->decode_tables=(uint16*) - my_malloc((length+OFFSET_TABLE_SIZE)*sizeof(uint16)+ - (uint) (share->pack.header_length+7), - MYF(MY_WME | MY_ZEROFILL)))) + my_malloc((length + OFFSET_TABLE_SIZE) * sizeof(uint16) + + (uint) (share->pack.header_length - sizeof(header)), + MYF(MY_WME | MY_ZEROFILL)))) goto err1; tmp_buff=share->decode_tables+length; disk_cache=(byte*) (tmp_buff+OFFSET_TABLE_SIZE); @@ -196,7 +224,7 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) huff_tree_bits=max_bit(trees ? trees-1 : 0); init_bit_buffer(&bit_buff, (uchar*) disk_cache, (uint) (share->pack.header_length-sizeof(header))); - /* Read new info for each field */ + /* Read new info for each field */ for (i=0 ; i < share->base.fields ; i++) { share->rec[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5); @@ -205,17 +233,26 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff, huff_tree_bits); share->rec[i].unpack=get_unpack_function(share->rec+i); + DBUG_PRINT("info", ("col: %2u type: %2u pack: %u slbits: %2u", + i, share->rec[i].base_type, share->rec[i].pack_type, + share->rec[i].space_length_bits)); } skip_to_next_byte(&bit_buff); + /* + Construct the decoding tables from the file header. Keep track of + the used memory. + */ decode_table=share->decode_tables; for (i=0 ; i < trees ; i++) if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table, &intervall_buff,tmp_buff)) goto err3; + /* Reallocate the decoding tables to the used size. */ decode_table=(uint16*) my_realloc((gptr) share->decode_tables, (uint) ((byte*) decode_table - (byte*) share->decode_tables), MYF(MY_HOLD_ON_ERROR)); + /* Fix the table addresses in the tree heads. */ { long diff=PTR_BYTE_DIFF(decode_table,share->decode_tables); share->decode_tables=decode_table; @@ -224,7 +261,7 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) diff, uint16*); } - /* Fix record-ref-length for keys */ + /* Fix record-ref-length for keys */ if (fix_keys) { for (i=0 ; i < share->base.keys ; i++) @@ -261,7 +298,23 @@ err0: } - /* Read on huff-code-table from datafile */ +/* + Read a huff-code-table from datafile. + + SYNOPSIS + read_huff_table() + bit_buff Bit buffer pointing at start of the + decoding table in the file header cache. + decode_tree Pointer to the decode tree head. + decode_table IN/OUT Address of a pointer to the next free space. + intervall_buff IN/OUT Address of a pointer to the next unused values. + tmp_buff Buffer for temporary extraction of a full + decoding table as read from bit_buff. + + RETURN + 0 OK. + 1 Error. +*/ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, uint16 **decode_table, byte **intervall_buff, @@ -270,19 +323,32 @@ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits, next_free_offset; uint16 *ptr,*end; + DBUG_ENTER("read_huff_table"); - LINT_INIT(ptr); if (!get_bits(bit_buff,1)) { + /* Byte value compression. */ min_chr=get_bits(bit_buff,8); elements=get_bits(bit_buff,9); char_bits=get_bits(bit_buff,5); offset_bits=get_bits(bit_buff,5); intervall_length=0; ptr=tmp_buff; + DBUG_PRINT("info", ("byte value compression")); + DBUG_PRINT("info", ("minimum byte value: %u", min_chr)); + DBUG_PRINT("info", ("number of tree nodes: %u", elements)); + DBUG_PRINT("info", ("bits for values: %u", char_bits)); + DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits)); + if (elements > 256) + { + DBUG_PRINT("error", ("ERROR: illegal number of tree elements: %u", + elements)); + DBUG_RETURN(1); + } } else { + /* Distinct column value compression. */ min_chr=0; elements=get_bits(bit_buff,15); intervall_length=get_bits(bit_buff,16); @@ -290,13 +356,27 @@ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, offset_bits=get_bits(bit_buff,5); decode_tree->quick_table_bits=0; ptr= *decode_table; + DBUG_PRINT("info", ("distinct column value compression")); + DBUG_PRINT("info", ("number of tree nodes: %u", elements)); + DBUG_PRINT("info", ("value buffer length: %u", intervall_length)); + DBUG_PRINT("info", ("bits for value index: %u", char_bits)); + DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits)); } size=elements*2-2; + DBUG_PRINT("info", ("tree size in uint16: %u", size)); + DBUG_PRINT("info", ("tree size in bytes: %u", size * sizeof(uint16))); for (end=ptr+size ; ptr < end ; ptr++) { if (get_bit(bit_buff)) + { *ptr= (uint16) get_bits(bit_buff,offset_bits); + if ((ptr + *ptr >= end) || !*ptr) + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + DBUG_RETURN(1); + } + } else *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr)); } @@ -306,11 +386,15 @@ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, decode_tree->intervalls= *intervall_buff; if (! intervall_length) { - table_bits=find_longest_bitstream(tmp_buff, tmp_buff+OFFSET_TABLE_SIZE); - if (table_bits == (uint) ~0) - return 1; + /* Byte value compression. ptr started from tmp_buff. */ + /* Find longest Huffman code from begin to end of tree in bits. */ + table_bits= find_longest_bitstream(tmp_buff, ptr); + if (table_bits >= OFFSET_TABLE_SIZE) + DBUG_RETURN(1); if (table_bits > myisam_quick_table_bits) table_bits=myisam_quick_table_bits; + DBUG_PRINT("info", ("table bits: %u", table_bits)); + next_free_offset= (1 << table_bits); make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits, table_bits); @@ -319,105 +403,279 @@ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, } else { + /* Distinct column value compression. ptr started from *decode_table */ (*decode_table)=end; + /* + get_bits() moves some bytes to a cache buffer in advance. May need + to step back. + */ bit_buff->pos-= bit_buff->bits/8; + /* Copy the distinct column values from the buffer. */ memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length); (*intervall_buff)+=intervall_length; bit_buff->pos+=intervall_length; bit_buff->bits=0; } - return 0; + DBUG_RETURN(0); } +/* + Make a quick_table for faster decoding. + + SYNOPSIS + make_quick_table() + to_table Target quick_table and remaining decode table. + decode_table Source Huffman (sub-)tree within tmp_buff. + next_free_offset IN/OUT Next free offset from to_table. + Starts behind quick_table on the top-level. + value Huffman bits found so far. + bits Remaining bits to be collected. + max_bits Total number of bits to collect (table_bits). + + DESCRIPTION + + The quick table is an array of 16-bit values. There exists one value + for each possible code representable by max_bits (table_bits) bits. + In most cases table_bits is 9. So there are 512 16-bit values. + + If the high-order bit (16) is set (IS_CHAR) then the array slot for + this value is a valid Huffman code for a resulting byte value. + + The low-order 8 bits (1..8) are the resulting byte value. + + Bits 9..14 are the length of the Huffman code for this byte value. + This means so many bits from the input stream were needed to + represent this byte value. The remaining bits belong to later + Huffman codes. This also means that for every Huffman code shorter + than table_bits there are multiple entires in the array, which + differ just in the unused bits. + + If the high-order bit (16) is clear (0) then the remaining bits are + the position of the remaining Huffman decode tree segment behind the + quick table. + + RETURN + void +*/ + static void make_quick_table(uint16 *to_table, uint16 *decode_table, uint *next_free_offset, uint value, uint bits, uint max_bits) { + DBUG_ENTER("make_quick_table"); + + /* + When down the table to the requested maximum, copy the rest of the + Huffman table. + */ if (!bits--) { + /* + Remaining left Huffman tree segment starts behind quick table. + Remaining right Huffman tree segment starts behind left segment. + */ to_table[value]= (uint16) *next_free_offset; - *next_free_offset=copy_decode_table(to_table, *next_free_offset, - decode_table); - return; + /* + Re-construct the remaining Huffman tree segment at + next_free_offset in to_table. + */ + *next_free_offset= copy_decode_table(to_table, *next_free_offset, + decode_table); + DBUG_VOID_RETURN; } + + /* Descent on the left side. Left side bits are clear (0). */ if (!(*decode_table & IS_CHAR)) { - make_quick_table(to_table,decode_table+ *decode_table, - next_free_offset,value,bits,max_bits); + /* Not a leaf. Follow the pointer. */ + make_quick_table(to_table, decode_table + *decode_table, + next_free_offset, value, bits, max_bits); } else - fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); + { + /* + A leaf. A Huffman code is complete. Fill the quick_table + array for all possible bit strings starting with this Huffman + code. + */ + fill_quick_table(to_table + value, bits, max_bits, (uint) *decode_table); + } + + /* Descent on the right side. Right side bits are set (1). */ decode_table++; value|= (1 << bits); if (!(*decode_table & IS_CHAR)) { - make_quick_table(to_table,decode_table+ *decode_table, - next_free_offset,value,bits,max_bits); + /* Not a leaf. Follow the pointer. */ + make_quick_table(to_table, decode_table + *decode_table, + next_free_offset, value, bits, max_bits); } else - fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); - return; + { + /* + A leaf. A Huffman code is complete. Fill the quick_table + array for all possible bit strings starting with this Huffman + code. + */ + fill_quick_table(to_table + value, bits, max_bits, (uint) *decode_table); + } + + DBUG_VOID_RETURN; } +/* + Fill quick_table for all possible values starting with this Huffman code. + + SYNOPSIS + fill_quick_table() + table Target quick_table position. + bits Unused bits from max_bits. + max_bits Total number of bits to collect (table_bits). + value The byte encoded by the found Huffman code. + + DESCRIPTION + + Fill the segment (all slots) of the quick_table array with the + resulting value for the found Huffman code. There are as many slots + as there are combinations representable by the unused bits. + + In most cases we use 9 table bits. Assume a 3-bit Huffman code. Then + there are 6 unused bits. Hence we fill 2**6 = 64 slots with the + value. + + RETURN + void +*/ + static void fill_quick_table(uint16 *table, uint bits, uint max_bits, uint value) { uint16 *end; - value|=(max_bits-bits) << 8; - for (end=table+ (1 << bits) ; - table < end ; - *table++ = (uint16) value | IS_CHAR) ; + DBUG_ENTER("fill_quick_table"); + + /* + Bits 1..8 of value represent the decoded byte value. + Bits 9..14 become the length of the Huffman code for this byte value. + Bit 16 flags a valid code (IS_CHAR). + */ + value|= (max_bits - bits) << 8 | IS_CHAR; + + for (end= table + (1 << bits); table < end; table++) + { + *table= (uint16) value; + } + DBUG_VOID_RETURN; } +/* + Reconstruct a decode subtree at the target position. + + SYNOPSIS + copy_decode_table() + to_pos Target quick_table and remaining decode table. + offset Next free offset from to_pos. + decode_table Source Huffman subtree within tmp_buff. + + NOTE + Pointers in the decode tree are relative to the pointers position. + + RETURN + next free offset from to_pos. +*/ + static uint copy_decode_table(uint16 *to_pos, uint offset, uint16 *decode_table) { uint prev_offset; prev_offset= offset; + DBUG_ENTER("copy_decode_table"); + /* Descent on the left side. */ if (!(*decode_table & IS_CHAR)) { + /* Set a pointer to the next target node. */ to_pos[offset]=2; + /* Copy the left hand subtree there. */ offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table); } else { + /* Copy the byte value. */ to_pos[offset]= *decode_table; + /* Step behind this node. */ offset+=2; } - decode_table++; + /* Descent on the right side. */ + decode_table++; if (!(*decode_table & IS_CHAR)) { + /* Set a pointer to the next free target node. */ to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1); + /* Copy the right hand subtree to the entry of that node. */ offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table); } else + { + /* Copy the byte value. */ to_pos[prev_offset+1]= *decode_table; - return offset; + } + DBUG_RETURN(offset); } +/* + Find the length of the longest Huffman code in this table in bits. + + SYNOPSIS + find_longest_bitstream() + table Code (sub-)table start. + end End of code table. + + IMPLEMENTATION + + Recursively follow the branch(es) of the code pair on every level of + the tree until two byte values (and no branch) are found. Add one to + each level when returning back from each recursion stage. + + 'end' is used for error checking only. A clean tree terminates + before reaching 'end'. Hence the exact value of 'end' is not too + important. However having it higher than necessary could lead to + misbehaviour should 'next' jump into the dirty area. + + RETURN + length Length of longest Huffman code in bits. + >= OFFSET_TABLE_SIZE Error, broken tree. It does not end before 'end'. +*/ + static uint find_longest_bitstream(uint16 *table, uint16 *end) { - uint length=1,length2; + uint length= 1; + uint length2; + if (!(*table & IS_CHAR)) { uint16 *next= table + *table; if (next > end || next == table) - return ~0; - length=find_longest_bitstream(next, end)+1; + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + return OFFSET_TABLE_SIZE; + } + length= find_longest_bitstream(next, end) + 1; } table++; if (!(*table & IS_CHAR)) { uint16 *next= table + *table; if (next > end || next == table) - return ~0; - length2=find_longest_bitstream(table+ *table, end)+1; + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + return OFFSET_TABLE_SIZE; + } + length2= find_longest_bitstream(next, end) + 1; length=max(length,length2); } return length; @@ -825,18 +1083,46 @@ static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,uchar *to, bit_buff->pos+=4; bits+=32; } - /* First use info in quick_table */ + /* + First use info in quick_table. + + The quick table is an array of 16-bit values. There exists one + value for each possible code representable by table_bits bits. + In most cases table_bits is 9. So there are 512 16-bit values. + + If the high-order bit (16) is set (IS_CHAR) then the array slot + for this value is a valid Huffman code for a resulting byte value. + + The low-order 8 bits (1..8) are the resulting byte value. + + Bits 9..14 are the length of the Huffman code for this byte value. + This means so many bits from the input stream were needed to + represent this byte value. The remaining bits belong to later + Huffman codes. This also means that for every Huffman code shorter + than table_bits there are multiple entires in the array, which + differ just in the unused bits. + + If the high-order bit (16) is clear (0) then the remaining bits are + the position of the remaining Huffman decode tree segment behind the + quick table. + */ low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and; low_byte=decode_tree->table[low_byte]; if (low_byte & IS_CHAR) { + /* + All Huffman codes of less or equal table_bits length are in the + quick table. This is one of them. + */ *to++ = (low_byte & 255); /* Found char in quick table */ bits-= ((low_byte >> 8) & 31); /* Remove bits used */ } else { /* Map through rest of decode-table */ + /* This means that the Huffman code must be longer than table_bits. */ pos=decode_tree->table+low_byte; bits-=table_bits; + /* NOTE: decode_bytes_test_bit() is a macro wich contains a break !!! */ for (;;) { low_byte=(uint) (bit_buff->current_byte >> (bits-8)); @@ -1062,6 +1348,11 @@ uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, { head_length+= read_pack_length((uint) myisam->s->pack.version, header + head_length, &info->blob_len); + /* + Ensure that the record buffer is big enough for the compressed + record plus all expanded blobs. [We do not have an extra buffer + for the resulting blobs. Sigh.] + */ if (!(mi_alloc_rec_buff(myisam,info->rec_len + info->blob_len, rec_buff_p))) return BLOCK_FATAL_ERROR; /* not enough memory */ |