From 30f83050f3a1d89b4beee81f123750387b2dbf99 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 28 Nov 2006 15:46:01 +0100 Subject: Bug#23139 - myisamchk and mysqld crash when trying to access table A corrupted compressed table could crash the server and myisamchk. The data file of an uncompressed table contains just the records. There is no header in the data file. However the data file of a compressed table has a header. The header describes how the table was compressed. This information is necessary to extract the records from the compressed data file. Part of the compressed data file header are the [de]code tables. They are numeric representations of the Huffman trees used for coding and decoding. A Huffman tree is a binary tree. Every node has two childs. A child can be a leaf or a branch. Leaves contain the decoded value. Branches point to another tree node. Since the [de]code table is represented as an array of childs, the branches need to point at a child within the same array. The corruption of the compressed data file from the bug report was a couple of branches that pointed outside their array. This condition had not been correctly checked. I added some checks for the pointers in the decode tables. This type of corruption will no longer crash the server or myisamchk. No test case. A corrupted compressed table is required. myisam/mi_packrec.c: Bug#23139 - myisamchk and mysqld crash when trying to access table Added some checks for the pointers in the decode tables. Added comments, DBUG prints, style fixes. --- myisam/mi_packrec.c | 365 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 328 insertions(+), 37 deletions(-) (limited to 'myisam') diff --git a/myisam/mi_packrec.c b/myisam/mi_packrec.c index feb8d33b015..c88b8ab6607 100644 --- a/myisam/mi_packrec.c +++ b/myisam/mi_packrec.c @@ -20,7 +20,10 @@ #define IS_CHAR ((uint) 32768) /* Bit if char (not offset) in tree */ -#if INT_MAX > 65536L +/* Some definitions to keep in sync with myisampack.c */ +#define HEAD_LENGTH 32 /* Length of fixed header */ + +#if INT_MAX > 32767 #define BITS_SAVED 32 #define MAX_QUICK_TABLE_BITS 9 /* Because we may shift in 24 bits */ #else @@ -42,6 +45,7 @@ { bits-=(bit+1); break; } \ pos+= *pos +/* Size in uint16 of a Huffman tree for byte compression of 256 byte values. */ #define OFFSET_TABLE_SIZE 512 static uint read_huff_table(MI_BIT_BUFF *bit_buff,MI_DECODE_TREE *decode_tree, @@ -132,7 +136,7 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) uint16 *decode_table,*tmp_buff; ulong elements,intervall_length; char *disk_cache,*intervall_buff; - uchar header[32]; + uchar header[HEAD_LENGTH]; MYISAM_SHARE *share=info->s; MI_BIT_BUFF bit_buff; DBUG_ENTER("_mi_read_pack_info"); @@ -150,12 +154,13 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) my_errno=HA_ERR_END_OF_FILE; goto err0; } + /* Only the first three bytes of magic number are independent of version. */ if (memcmp((byte*) header, (byte*) myisam_pack_file_magic, 3)) { my_errno=HA_ERR_WRONG_IN_RECORD; goto err0; } - share->pack.version= header[3]; + share->pack.version= header[3]; /* fourth byte of magic number */ share->pack.header_length= uint4korr(header+4); share->min_pack_length=(uint) uint4korr(header+8); share->max_pack_length=(uint) uint4korr(header+12); @@ -171,7 +176,22 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) share->base.min_block_length=share->min_pack_length+1; if (share->min_pack_length > 254) share->base.min_block_length+=2; - + DBUG_PRINT("info", ("fixed header length: %u", HEAD_LENGTH)); + DBUG_PRINT("info", ("total header length: %u", share->pack.header_length)); + DBUG_PRINT("info", ("pack file version: %u", share->pack.version)); + DBUG_PRINT("info", ("min pack length: %u", share->min_pack_length)); + DBUG_PRINT("info", ("max pack length: %u", share->max_pack_length)); + DBUG_PRINT("info", ("elements of all trees: %u", elements)); + DBUG_PRINT("info", ("distinct values bytes: %u", intervall_length)); + DBUG_PRINT("info", ("number of code trees: %u", trees)); + DBUG_PRINT("info", ("bytes for record lgt: %u", share->pack.ref_length)); + DBUG_PRINT("info", ("record pointer length: %u", rec_reflength)); + + /* + Memory segment #1: + - Decode tree heads + - Distinct column values + */ if (!(share->decode_trees=(MI_DECODE_TREE*) my_malloc((uint) (trees*sizeof(MI_DECODE_TREE)+ intervall_length*sizeof(byte)), @@ -179,11 +199,19 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) goto err0; intervall_buff=(byte*) (share->decode_trees+trees); + /* + Memory segment #2: + - Decode tables + - Quick decode tables + - Temporary decode table + - Compressed data file header cache + This segment will be reallocated after construction of the tables. + */ length=(uint) (elements*2+trees*(1 << myisam_quick_table_bits)); if (!(share->decode_tables=(uint16*) - my_malloc((length+OFFSET_TABLE_SIZE)*sizeof(uint16)+ - (uint) (share->pack.header_length+7), - MYF(MY_WME | MY_ZEROFILL)))) + my_malloc((length + OFFSET_TABLE_SIZE) * sizeof(uint16) + + (uint) (share->pack.header_length - sizeof(header)), + MYF(MY_WME | MY_ZEROFILL)))) goto err1; tmp_buff=share->decode_tables+length; disk_cache=(byte*) (tmp_buff+OFFSET_TABLE_SIZE); @@ -196,7 +224,7 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) huff_tree_bits=max_bit(trees ? trees-1 : 0); init_bit_buffer(&bit_buff, (uchar*) disk_cache, (uint) (share->pack.header_length-sizeof(header))); - /* Read new info for each field */ + /* Read new info for each field */ for (i=0 ; i < share->base.fields ; i++) { share->rec[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5); @@ -205,17 +233,26 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff, huff_tree_bits); share->rec[i].unpack=get_unpack_function(share->rec+i); + DBUG_PRINT("info", ("col: %2u type: %2u pack: %u slbits: %2u", + i, share->rec[i].base_type, share->rec[i].pack_type, + share->rec[i].space_length_bits)); } skip_to_next_byte(&bit_buff); + /* + Construct the decoding tables from the file header. Keep track of + the used memory. + */ decode_table=share->decode_tables; for (i=0 ; i < trees ; i++) if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table, &intervall_buff,tmp_buff)) goto err3; + /* Reallocate the decoding tables to the used size. */ decode_table=(uint16*) my_realloc((gptr) share->decode_tables, (uint) ((byte*) decode_table - (byte*) share->decode_tables), MYF(MY_HOLD_ON_ERROR)); + /* Fix the table addresses in the tree heads. */ { long diff=PTR_BYTE_DIFF(decode_table,share->decode_tables); share->decode_tables=decode_table; @@ -224,7 +261,7 @@ my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys) diff, uint16*); } - /* Fix record-ref-length for keys */ + /* Fix record-ref-length for keys */ if (fix_keys) { for (i=0 ; i < share->base.keys ; i++) @@ -261,7 +298,23 @@ err0: } - /* Read on huff-code-table from datafile */ +/* + Read a huff-code-table from datafile. + + SYNOPSIS + read_huff_table() + bit_buff Bit buffer pointing at start of the + decoding table in the file header cache. + decode_tree Pointer to the decode tree head. + decode_table IN/OUT Address of a pointer to the next free space. + intervall_buff IN/OUT Address of a pointer to the next unused values. + tmp_buff Buffer for temporary extraction of a full + decoding table as read from bit_buff. + + RETURN + 0 OK. + 1 Error. +*/ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, uint16 **decode_table, byte **intervall_buff, @@ -270,19 +323,32 @@ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits, next_free_offset; uint16 *ptr,*end; + DBUG_ENTER("read_huff_table"); - LINT_INIT(ptr); if (!get_bits(bit_buff,1)) { + /* Byte value compression. */ min_chr=get_bits(bit_buff,8); elements=get_bits(bit_buff,9); char_bits=get_bits(bit_buff,5); offset_bits=get_bits(bit_buff,5); intervall_length=0; ptr=tmp_buff; + DBUG_PRINT("info", ("byte value compression")); + DBUG_PRINT("info", ("minimum byte value: %u", min_chr)); + DBUG_PRINT("info", ("number of tree nodes: %u", elements)); + DBUG_PRINT("info", ("bits for values: %u", char_bits)); + DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits)); + if (elements > 256) + { + DBUG_PRINT("error", ("ERROR: illegal number of tree elements: %u", + elements)); + DBUG_RETURN(1); + } } else { + /* Distinct column value compression. */ min_chr=0; elements=get_bits(bit_buff,15); intervall_length=get_bits(bit_buff,16); @@ -290,13 +356,27 @@ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, offset_bits=get_bits(bit_buff,5); decode_tree->quick_table_bits=0; ptr= *decode_table; + DBUG_PRINT("info", ("distinct column value compression")); + DBUG_PRINT("info", ("number of tree nodes: %u", elements)); + DBUG_PRINT("info", ("value buffer length: %u", intervall_length)); + DBUG_PRINT("info", ("bits for value index: %u", char_bits)); + DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits)); } size=elements*2-2; + DBUG_PRINT("info", ("tree size in uint16: %u", size)); + DBUG_PRINT("info", ("tree size in bytes: %u", size * sizeof(uint16))); for (end=ptr+size ; ptr < end ; ptr++) { if (get_bit(bit_buff)) + { *ptr= (uint16) get_bits(bit_buff,offset_bits); + if ((ptr + *ptr >= end) || !*ptr) + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + DBUG_RETURN(1); + } + } else *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr)); } @@ -306,11 +386,15 @@ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, decode_tree->intervalls= *intervall_buff; if (! intervall_length) { - table_bits=find_longest_bitstream(tmp_buff, tmp_buff+OFFSET_TABLE_SIZE); - if (table_bits == (uint) ~0) - return 1; + /* Byte value compression. ptr started from tmp_buff. */ + /* Find longest Huffman code from begin to end of tree in bits. */ + table_bits= find_longest_bitstream(tmp_buff, ptr); + if (table_bits >= OFFSET_TABLE_SIZE) + DBUG_RETURN(1); if (table_bits > myisam_quick_table_bits) table_bits=myisam_quick_table_bits; + DBUG_PRINT("info", ("table bits: %u", table_bits)); + next_free_offset= (1 << table_bits); make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits, table_bits); @@ -319,105 +403,279 @@ static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree, } else { + /* Distinct column value compression. ptr started from *decode_table */ (*decode_table)=end; + /* + get_bits() moves some bytes to a cache buffer in advance. May need + to step back. + */ bit_buff->pos-= bit_buff->bits/8; + /* Copy the distinct column values from the buffer. */ memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length); (*intervall_buff)+=intervall_length; bit_buff->pos+=intervall_length; bit_buff->bits=0; } - return 0; + DBUG_RETURN(0); } +/* + Make a quick_table for faster decoding. + + SYNOPSIS + make_quick_table() + to_table Target quick_table and remaining decode table. + decode_table Source Huffman (sub-)tree within tmp_buff. + next_free_offset IN/OUT Next free offset from to_table. + Starts behind quick_table on the top-level. + value Huffman bits found so far. + bits Remaining bits to be collected. + max_bits Total number of bits to collect (table_bits). + + DESCRIPTION + + The quick table is an array of 16-bit values. There exists one value + for each possible code representable by max_bits (table_bits) bits. + In most cases table_bits is 9. So there are 512 16-bit values. + + If the high-order bit (16) is set (IS_CHAR) then the array slot for + this value is a valid Huffman code for a resulting byte value. + + The low-order 8 bits (1..8) are the resulting byte value. + + Bits 9..14 are the length of the Huffman code for this byte value. + This means so many bits from the input stream were needed to + represent this byte value. The remaining bits belong to later + Huffman codes. This also means that for every Huffman code shorter + than table_bits there are multiple entires in the array, which + differ just in the unused bits. + + If the high-order bit (16) is clear (0) then the remaining bits are + the position of the remaining Huffman decode tree segment behind the + quick table. + + RETURN + void +*/ + static void make_quick_table(uint16 *to_table, uint16 *decode_table, uint *next_free_offset, uint value, uint bits, uint max_bits) { + DBUG_ENTER("make_quick_table"); + + /* + When down the table to the requested maximum, copy the rest of the + Huffman table. + */ if (!bits--) { + /* + Remaining left Huffman tree segment starts behind quick table. + Remaining right Huffman tree segment starts behind left segment. + */ to_table[value]= (uint16) *next_free_offset; - *next_free_offset=copy_decode_table(to_table, *next_free_offset, - decode_table); - return; + /* + Re-construct the remaining Huffman tree segment at + next_free_offset in to_table. + */ + *next_free_offset= copy_decode_table(to_table, *next_free_offset, + decode_table); + DBUG_VOID_RETURN; } + + /* Descent on the left side. Left side bits are clear (0). */ if (!(*decode_table & IS_CHAR)) { - make_quick_table(to_table,decode_table+ *decode_table, - next_free_offset,value,bits,max_bits); + /* Not a leaf. Follow the pointer. */ + make_quick_table(to_table, decode_table + *decode_table, + next_free_offset, value, bits, max_bits); } else - fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); + { + /* + A leaf. A Huffman code is complete. Fill the quick_table + array for all possible bit strings starting with this Huffman + code. + */ + fill_quick_table(to_table + value, bits, max_bits, (uint) *decode_table); + } + + /* Descent on the right side. Right side bits are set (1). */ decode_table++; value|= (1 << bits); if (!(*decode_table & IS_CHAR)) { - make_quick_table(to_table,decode_table+ *decode_table, - next_free_offset,value,bits,max_bits); + /* Not a leaf. Follow the pointer. */ + make_quick_table(to_table, decode_table + *decode_table, + next_free_offset, value, bits, max_bits); } else - fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); - return; + { + /* + A leaf. A Huffman code is complete. Fill the quick_table + array for all possible bit strings starting with this Huffman + code. + */ + fill_quick_table(to_table + value, bits, max_bits, (uint) *decode_table); + } + + DBUG_VOID_RETURN; } +/* + Fill quick_table for all possible values starting with this Huffman code. + + SYNOPSIS + fill_quick_table() + table Target quick_table position. + bits Unused bits from max_bits. + max_bits Total number of bits to collect (table_bits). + value The byte encoded by the found Huffman code. + + DESCRIPTION + + Fill the segment (all slots) of the quick_table array with the + resulting value for the found Huffman code. There are as many slots + as there are combinations representable by the unused bits. + + In most cases we use 9 table bits. Assume a 3-bit Huffman code. Then + there are 6 unused bits. Hence we fill 2**6 = 64 slots with the + value. + + RETURN + void +*/ + static void fill_quick_table(uint16 *table, uint bits, uint max_bits, uint value) { uint16 *end; - value|=(max_bits-bits) << 8; - for (end=table+ (1 << bits) ; - table < end ; - *table++ = (uint16) value | IS_CHAR) ; + DBUG_ENTER("fill_quick_table"); + + /* + Bits 1..8 of value represent the decoded byte value. + Bits 9..14 become the length of the Huffman code for this byte value. + Bit 16 flags a valid code (IS_CHAR). + */ + value|= (max_bits - bits) << 8 | IS_CHAR; + + for (end= table + (1 << bits); table < end; table++) + { + *table= (uint16) value; + } + DBUG_VOID_RETURN; } +/* + Reconstruct a decode subtree at the target position. + + SYNOPSIS + copy_decode_table() + to_pos Target quick_table and remaining decode table. + offset Next free offset from to_pos. + decode_table Source Huffman subtree within tmp_buff. + + NOTE + Pointers in the decode tree are relative to the pointers position. + + RETURN + next free offset from to_pos. +*/ + static uint copy_decode_table(uint16 *to_pos, uint offset, uint16 *decode_table) { uint prev_offset; prev_offset= offset; + DBUG_ENTER("copy_decode_table"); + /* Descent on the left side. */ if (!(*decode_table & IS_CHAR)) { + /* Set a pointer to the next target node. */ to_pos[offset]=2; + /* Copy the left hand subtree there. */ offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table); } else { + /* Copy the byte value. */ to_pos[offset]= *decode_table; + /* Step behind this node. */ offset+=2; } - decode_table++; + /* Descent on the right side. */ + decode_table++; if (!(*decode_table & IS_CHAR)) { + /* Set a pointer to the next free target node. */ to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1); + /* Copy the right hand subtree to the entry of that node. */ offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table); } else + { + /* Copy the byte value. */ to_pos[prev_offset+1]= *decode_table; - return offset; + } + DBUG_RETURN(offset); } +/* + Find the length of the longest Huffman code in this table in bits. + + SYNOPSIS + find_longest_bitstream() + table Code (sub-)table start. + end End of code table. + + IMPLEMENTATION + + Recursively follow the branch(es) of the code pair on every level of + the tree until two byte values (and no branch) are found. Add one to + each level when returning back from each recursion stage. + + 'end' is used for error checking only. A clean tree terminates + before reaching 'end'. Hence the exact value of 'end' is not too + important. However having it higher than necessary could lead to + misbehaviour should 'next' jump into the dirty area. + + RETURN + length Length of longest Huffman code in bits. + >= OFFSET_TABLE_SIZE Error, broken tree. It does not end before 'end'. +*/ + static uint find_longest_bitstream(uint16 *table, uint16 *end) { - uint length=1,length2; + uint length= 1; + uint length2; + if (!(*table & IS_CHAR)) { uint16 *next= table + *table; if (next > end || next == table) - return ~0; - length=find_longest_bitstream(next, end)+1; + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + return OFFSET_TABLE_SIZE; + } + length= find_longest_bitstream(next, end) + 1; } table++; if (!(*table & IS_CHAR)) { uint16 *next= table + *table; if (next > end || next == table) - return ~0; - length2=find_longest_bitstream(table+ *table, end)+1; + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + return OFFSET_TABLE_SIZE; + } + length2= find_longest_bitstream(next, end) + 1; length=max(length,length2); } return length; @@ -825,18 +1083,46 @@ static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,uchar *to, bit_buff->pos+=4; bits+=32; } - /* First use info in quick_table */ + /* + First use info in quick_table. + + The quick table is an array of 16-bit values. There exists one + value for each possible code representable by table_bits bits. + In most cases table_bits is 9. So there are 512 16-bit values. + + If the high-order bit (16) is set (IS_CHAR) then the array slot + for this value is a valid Huffman code for a resulting byte value. + + The low-order 8 bits (1..8) are the resulting byte value. + + Bits 9..14 are the length of the Huffman code for this byte value. + This means so many bits from the input stream were needed to + represent this byte value. The remaining bits belong to later + Huffman codes. This also means that for every Huffman code shorter + than table_bits there are multiple entires in the array, which + differ just in the unused bits. + + If the high-order bit (16) is clear (0) then the remaining bits are + the position of the remaining Huffman decode tree segment behind the + quick table. + */ low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and; low_byte=decode_tree->table[low_byte]; if (low_byte & IS_CHAR) { + /* + All Huffman codes of less or equal table_bits length are in the + quick table. This is one of them. + */ *to++ = (low_byte & 255); /* Found char in quick table */ bits-= ((low_byte >> 8) & 31); /* Remove bits used */ } else { /* Map through rest of decode-table */ + /* This means that the Huffman code must be longer than table_bits. */ pos=decode_tree->table+low_byte; bits-=table_bits; + /* NOTE: decode_bytes_test_bit() is a macro wich contains a break !!! */ for (;;) { low_byte=(uint) (bit_buff->current_byte >> (bits-8)); @@ -1062,6 +1348,11 @@ uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, { head_length+= read_pack_length((uint) myisam->s->pack.version, header + head_length, &info->blob_len); + /* + Ensure that the record buffer is big enough for the compressed + record plus all expanded blobs. [We do not have an extra buffer + for the resulting blobs. Sigh.] + */ if (!(mi_alloc_rec_buff(myisam,info->rec_len + info->blob_len, rec_buff_p))) return BLOCK_FATAL_ERROR; /* not enough memory */ -- cgit v1.2.1