diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/field.h | 8 | ||||
-rw-r--r-- | sql/key.cc | 248 | ||||
-rw-r--r-- | sql/mysql_priv.h | 3 | ||||
-rw-r--r-- | sql/sql_join_cache.cc | 121 | ||||
-rw-r--r-- | sql/sql_join_cache.h | 34 |
5 files changed, 399 insertions, 15 deletions
diff --git a/sql/field.h b/sql/field.h index fc43c044d87..b4dab7561ef 100644 --- a/sql/field.h +++ b/sql/field.h @@ -593,6 +593,7 @@ public: /* Check whether the field can be used as a join attribute in hash join */ virtual bool hash_join_is_possible() { return TRUE; } + virtual bool eq_cmp_as_binary() { return TRUE; } friend bool reopen_table(THD *,struct st_table *,bool); friend int cre_myisam(char * name, register TABLE *form, uint options, @@ -769,12 +770,7 @@ public: my_decimal *val_decimal(my_decimal *); virtual bool str_needs_quotes() { return TRUE; } uint is_equal(Create_field *new_field); - - bool hash_join_is_possible() - { - /* TODO: support hash joins for non-binary collations */ - return (flags & BINARY_FLAG); - } + bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); } }; diff --git a/sql/key.cc b/sql/key.cc index 1d27fdcf208..0de1d1fd642 100644 --- a/sql/key.cc +++ b/sql/key.cc @@ -618,3 +618,251 @@ int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, } +/** + Get hash value for the key from a key buffer + + @param key_info the key descriptor + @param used_key_part number of key parts used for the key + @param key pointer to the buffer with the key value + + @datails + When hashing we should take special care only of: + 1. NULLs (and keyparts which can be null so one byte reserved for it); + 2. Strings for which we have to take into account their collations + and the values of their lengths in the prefixes. + + @return hash value calculated for the key +*/ + +ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key) +{ + ulong nr=1, nr2=4; + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end_key_part= key_part + used_key_parts; + + for (; key_part < end_key_part; key_part++) + { + uchar *pos= (uchar*)key; + CHARSET_INFO *cs; + uint length, pack_length; + bool is_string= TRUE; + LINT_INIT(cs); + key+= key_part->length; + if (key_part->null_bit) + { + key++; /* Skip null byte */ + if (*pos) /* Found null */ + { + nr^= (nr << 1) | 1; + /* Add key pack length to key for VARCHAR segments */ + switch (key_part->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARBINARY1: + key++; + break; + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY2: + key+= 2; + break; + default: + ; + } + continue; + } + pos++; /* Skip null byte */ + } + /* If it is string set parameters of the string */ + switch (key_part->type) { + case HA_KEYTYPE_TEXT: + cs= key_part->field->charset(); + length= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_BINARY : + cs= &my_charset_bin; + length= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_VARTEXT1: + cs= key_part->field->charset(); + length= (uint)(pos[0]); + pack_length= 1; + break; + case HA_KEYTYPE_VARBINARY1: + cs= &my_charset_bin; + length= (uint)(pos[0]); + pack_length= 1; + break; + case HA_KEYTYPE_VARTEXT2: + cs= key_part->field->charset(); + length= uint2korr(pos); + pack_length= 2; + break; + case HA_KEYTYPE_VARBINARY2: + cs= &my_charset_bin; + length= uint2korr(pos); + pack_length= 2; + break; + default: + is_string= FALSE; + } + + if (is_string) + { + if (cs->mbmaxlen > 1) + { + uint char_length= my_charpos(cs, pos + pack_length, + pos + pack_length + length, + length / cs->mbmaxlen); + set_if_smaller(length, char_length); + } + cs->coll->hash_sort(cs, pos+pack_length, length, &nr, &nr2); + key+= pack_length; + } + else + { + for (; pos < (uchar*)key ; pos++) + { + nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos)) + (nr << 8); + nr2+=3; + } + } + } + DBUG_PRINT("exit", ("hash: %lx", nr)); + return(nr); +} + + +/** + Check whether two keys in the key buffers are equal + + @param key_info the key descriptor + @param used_key_part number of key parts used for the keys + @param key1 pointer to the buffer with the first key + @param key2 pointer to the buffer with the second key + + @detail See details of key_hashnr(). + + @retval TRUE keys in the buffers are NOT equal + @retval FALSE keys in the buffers are equal +*/ + +bool key_buf_cmp(KEY *key_info, uint used_key_parts, + const uchar *key1, const uchar *key2) +{ + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end_key_part= key_part + used_key_parts; + + for (; key_part < end_key_part; key_part++) + { + uchar *pos1= (uchar*)key1; + uchar *pos2= (uchar*)key2; + CHARSET_INFO *cs; + uint length1, length2, pack_length; + bool is_string= TRUE; + LINT_INIT(cs); + key1+= key_part->length; + key2+= key_part->length; + if (key_part->null_bit) + { + key1++; key2++; /* Skip null byte */ + if (*pos1 && *pos2) /* Both are null */ + { + /* Add key pack length to key for VARCHAR segments */ + switch (key_part->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARBINARY1: + key1++; key2++; + break; + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY2: + key1+= 2; key2+= 2; + break; + default: + ; + } + continue; + } + if (*pos1 != *pos2) + return FALSE; + pos1++; pos2++; + } + + /* If it is string set parameters of the string */ + switch (key_part->type) { + case HA_KEYTYPE_TEXT: + cs= key_part->field->charset(); + length1= length2= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_BINARY : + cs= &my_charset_bin; + length1= length2= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_VARTEXT1: + cs= key_part->field->charset(); + length1= (uint)(pos1[0]); + length2= (uint)(pos2[0]); + pack_length= 1; + break; + case HA_KEYTYPE_VARBINARY1: + cs= &my_charset_bin; + length1= (uint)(pos1[0]); + length2= (uint)(pos2[0]); + pack_length= 1; + break; + case HA_KEYTYPE_VARTEXT2: + cs= key_part->field->charset(); + length1= uint2korr(pos1); + length2= uint2korr(pos2); + pack_length= 2; + break; + case HA_KEYTYPE_VARBINARY2: + cs= &my_charset_bin; + length1= uint2korr(pos1); + length2= uint2korr(pos2); + pack_length= 2; + break; + default: + is_string= FALSE; + } + + if (is_string) + { + /* + Compare the strings taking into account length in characters + and collation + */ + uint byte_len1= length1, byte_len2= length2; + if (cs->mbmaxlen > 1) + { + uint char_length1= my_charpos(cs, pos1 + pack_length, + pos1 + pack_length + length1, + length1 / cs->mbmaxlen); + uint char_length2= my_charpos(cs, pos2 + pack_length, + pos2 + pack_length + length2, + length2 / cs->mbmaxlen); + set_if_smaller(length1, char_length1); + set_if_smaller(length2, char_length2); + } + if (length1 != length2 || + cs->coll->strnncollsp(cs, + pos1 + pack_length, byte_len1, + pos2 + pack_length, byte_len2, + 1)) + return TRUE; + key1+= pack_length; key2+= pack_length; + } + else + { + /* it is OK to compare non-string byte per byte */ + for (; pos1 < (uchar*)key1 ; pos1++, pos2++) + { + if (pos1[0] != pos2[0]) + return TRUE; + } + } + } + return FALSE; +} diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 122baf47bac..ca5ccc699cd 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1883,6 +1883,9 @@ bool key_cmp_if_same(TABLE *form,const uchar *key,uint index,uint key_length); void key_unpack(String *to,TABLE *form,uint index); bool is_key_used(TABLE *table, uint idx, const MY_BITMAP *fields); int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length); +ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key); +bool key_buf_cmp(KEY *key_info, uint used_key_parts, + const uchar *key1, const uchar *key2); extern "C" int key_rec_cmp(void *key_info, uchar *a, uchar *b); int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, uint tuple_length); diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 2c28aae781c..01635c56585 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -2520,6 +2520,24 @@ int JOIN_CACHE_HASHED::init() pack_length+= get_size_of_rec_offset(); pack_length_with_blob_ptrs+= get_size_of_rec_offset(); + ref_key_info= join_tab->table->key_info+join_tab->ref.key; + ref_used_key_parts= join_tab->ref.key_parts; + + hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple; + hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple; + + KEY_PART_INFO *key_part= ref_key_info->key_part; + KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts; + for ( ; key_part < key_part_end; key_part++) + { + if (!key_part->field->eq_cmp_as_binary()) + { + hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex; + hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex; + break; + } + } + init_hash_table(); rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+ @@ -2904,7 +2922,7 @@ bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len, uchar **key_ref_ptr) { bool is_found= FALSE; - uint idx= get_hash_idx(key, key_length); + uint idx= (this->*hash_func)(key, key_length); uchar *ref_ptr= hash_table+size_of_key_ofs*idx; while (!is_null_key_ref(ref_ptr)) { @@ -2913,7 +2931,7 @@ bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len, next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) : ref_ptr-key_length; - if (memcmp(next_key, key, key_len) == 0) + if ((this->*hash_cmp_func)(next_key, key, key_len)) { is_found= TRUE; break; @@ -2925,22 +2943,24 @@ bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len, /* - Calclulate hash value for a key in the hash table of the join buffer + Hash function that considers a key in the hash table as byte array SYNOPSIS - get_hash_idx() + get_hash_idx_simple() key pointer to the key value key_len key value length DESCRIPTION The function calculates an index of the hash entry in the hash table - of the join buffer for the given key + of the join buffer for the given key. It considers the key just as + a sequence of bytes of the length key_len. RETURN VALUE - the calculated index of the hash entry for the given key. + the calculated index of the hash entry for the given key */ -uint JOIN_CACHE_HASHED::get_hash_idx(uchar* key, uint key_len) +inline +uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len) { ulong nr= 1; ulong nr2= 4; @@ -2956,6 +2976,93 @@ uint JOIN_CACHE_HASHED::get_hash_idx(uchar* key, uint key_len) /* + Hash function that takes into account collations of the components of the key + + SYNOPSIS + get_hash_idx_complex() + key pointer to the key value + key_len key value length + + DESCRIPTION + The function calculates an index of the hash entry in the hash table + of the join buffer for the given key. It takes into account that the + components of the key may be of a varchar type with different collations. + The function guarantees that the same hash value for any two equal + keys that may differ as byte sequences. + The function takes the info about the components of the key, their + types and used collations from the class member ref_key_info containing + a pointer to the descriptor of the index that can be used for the join + operation. + + RETURN VALUE + the calculated index of the hash entry for the given key +*/ + +inline +uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len) +{ + return + (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries); +} + + +/* + Compare two key entries in the hash table as sequence of bytes + + SYNOPSIS + equal_keys_simple() + key1 pointer to the first key entry + key2 pointer to the second key entry + key_len the length of the key values + + DESCRIPTION + The function compares two key entries in the hash table key1 and key2 + as two sequences bytes of the length key_len + + RETURN VALUE + TRUE key1 coincides with key2 + FALSE otherwise +*/ + +inline +bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2, + uint key_len) +{ + return memcmp(key1, key2, key_len) == 0; +} + + +/* + Compare two key entries taking into account the used collation + + SYNOPSIS + equal_keys_complex() + key1 pointer to the first key entry + key2 pointer to the second key entry + key_len the length of the key values + + DESCRIPTION + The function checks whether two key entries in the hash table + key1 and key2 are equal as, possibly, compound keys of a certain + structure whose components may be of a varchar type and may + employ different collations. + The descriptor of the key structure is taken from the class + member ref_key_info. + + RETURN VALUE + TRUE key1 is equal tokey2 + FALSE otherwise +*/ + +inline +bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2, + uint key_len) +{ + return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0; +} + + +/* Clean up the hash table of the join buffer SYNOPSIS diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h index fccd32e1319..546067fad24 100644 --- a/sql/sql_join_cache.h +++ b/sql/sql_join_cache.h @@ -738,6 +738,10 @@ public: class JOIN_CACHE_HASHED: public JOIN_CACHE { + typedef uint (JOIN_CACHE_HASHED::*Hash_func) (uchar *key, uint key_len); + typedef bool (JOIN_CACHE_HASHED::*Hash_cmp_func) (uchar *key1, uchar *key2, + uint key_len); + private: /* Size of the offset of a key entry in the hash table */ @@ -761,8 +765,12 @@ private: /* The offset of the data fields from the beginning of the record fields */ uint data_fields_offset; - - uint get_hash_idx(uchar* key, uint key_len); + + inline uint get_hash_idx_simple(uchar *key, uint key_len); + inline uint get_hash_idx_complex(uchar *key, uint key_len); + + inline bool equal_keys_simple(uchar *key1, uchar *key2, uint key_len); + inline bool equal_keys_complex(uchar *key1, uchar *key2, uint key_len); int init_hash_table(); void cleanup_hash_table(); @@ -770,6 +778,28 @@ private: protected: /* + Index info on the TABLE_REF object used by the hash join + to look for matching records + */ + KEY *ref_key_info; + /* + Number of the key parts the TABLE_REF object used by the hash join + to look for matching records + */ + uint ref_used_key_parts; + + /* + The hash function used in the hash table, + usually set by the init() method + */ + Hash_func hash_func; + /* + The function to check whether two key entries in the hash table + are equal or not, usually set by the init() method + */ + Hash_cmp_func hash_cmp_func; + + /* Length of a key value. It is assumed that all key values have the same length. */ |