summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/field.h8
-rw-r--r--sql/key.cc248
-rw-r--r--sql/mysql_priv.h3
-rw-r--r--sql/sql_join_cache.cc121
-rw-r--r--sql/sql_join_cache.h34
5 files changed, 399 insertions, 15 deletions
diff --git a/sql/field.h b/sql/field.h
index fc43c044d87..b4dab7561ef 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -593,6 +593,7 @@ public:
/* Check whether the field can be used as a join attribute in hash join */
virtual bool hash_join_is_possible() { return TRUE; }
+ virtual bool eq_cmp_as_binary() { return TRUE; }
friend bool reopen_table(THD *,struct st_table *,bool);
friend int cre_myisam(char * name, register TABLE *form, uint options,
@@ -769,12 +770,7 @@ public:
my_decimal *val_decimal(my_decimal *);
virtual bool str_needs_quotes() { return TRUE; }
uint is_equal(Create_field *new_field);
-
- bool hash_join_is_possible()
- {
- /* TODO: support hash joins for non-binary collations */
- return (flags & BINARY_FLAG);
- }
+ bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); }
};
diff --git a/sql/key.cc b/sql/key.cc
index 1d27fdcf208..0de1d1fd642 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -618,3 +618,251 @@ int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2,
}
+/**
+ Get hash value for the key from a key buffer
+
+ @param key_info the key descriptor
+ @param used_key_part number of key parts used for the key
+ @param key pointer to the buffer with the key value
+
+ @datails
+ When hashing we should take special care only of:
+ 1. NULLs (and keyparts which can be null so one byte reserved for it);
+ 2. Strings for which we have to take into account their collations
+ and the values of their lengths in the prefixes.
+
+ @return hash value calculated for the key
+*/
+
+ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key)
+{
+ ulong nr=1, nr2=4;
+ KEY_PART_INFO *key_part= key_info->key_part;
+ KEY_PART_INFO *end_key_part= key_part + used_key_parts;
+
+ for (; key_part < end_key_part; key_part++)
+ {
+ uchar *pos= (uchar*)key;
+ CHARSET_INFO *cs;
+ uint length, pack_length;
+ bool is_string= TRUE;
+ LINT_INIT(cs);
+ key+= key_part->length;
+ if (key_part->null_bit)
+ {
+ key++; /* Skip null byte */
+ if (*pos) /* Found null */
+ {
+ nr^= (nr << 1) | 1;
+ /* Add key pack length to key for VARCHAR segments */
+ switch (key_part->type) {
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARBINARY1:
+ key++;
+ break;
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY2:
+ key+= 2;
+ break;
+ default:
+ ;
+ }
+ continue;
+ }
+ pos++; /* Skip null byte */
+ }
+ /* If it is string set parameters of the string */
+ switch (key_part->type) {
+ case HA_KEYTYPE_TEXT:
+ cs= key_part->field->charset();
+ length= key_part->length;
+ pack_length= 0;
+ break;
+ case HA_KEYTYPE_BINARY :
+ cs= &my_charset_bin;
+ length= key_part->length;
+ pack_length= 0;
+ break;
+ case HA_KEYTYPE_VARTEXT1:
+ cs= key_part->field->charset();
+ length= (uint)(pos[0]);
+ pack_length= 1;
+ break;
+ case HA_KEYTYPE_VARBINARY1:
+ cs= &my_charset_bin;
+ length= (uint)(pos[0]);
+ pack_length= 1;
+ break;
+ case HA_KEYTYPE_VARTEXT2:
+ cs= key_part->field->charset();
+ length= uint2korr(pos);
+ pack_length= 2;
+ break;
+ case HA_KEYTYPE_VARBINARY2:
+ cs= &my_charset_bin;
+ length= uint2korr(pos);
+ pack_length= 2;
+ break;
+ default:
+ is_string= FALSE;
+ }
+
+ if (is_string)
+ {
+ if (cs->mbmaxlen > 1)
+ {
+ uint char_length= my_charpos(cs, pos + pack_length,
+ pos + pack_length + length,
+ length / cs->mbmaxlen);
+ set_if_smaller(length, char_length);
+ }
+ cs->coll->hash_sort(cs, pos+pack_length, length, &nr, &nr2);
+ key+= pack_length;
+ }
+ else
+ {
+ for (; pos < (uchar*)key ; pos++)
+ {
+ nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos)) + (nr << 8);
+ nr2+=3;
+ }
+ }
+ }
+ DBUG_PRINT("exit", ("hash: %lx", nr));
+ return(nr);
+}
+
+
+/**
+ Check whether two keys in the key buffers are equal
+
+ @param key_info the key descriptor
+ @param used_key_part number of key parts used for the keys
+ @param key1 pointer to the buffer with the first key
+ @param key2 pointer to the buffer with the second key
+
+ @detail See details of key_hashnr().
+
+ @retval TRUE keys in the buffers are NOT equal
+ @retval FALSE keys in the buffers are equal
+*/
+
+bool key_buf_cmp(KEY *key_info, uint used_key_parts,
+ const uchar *key1, const uchar *key2)
+{
+ KEY_PART_INFO *key_part= key_info->key_part;
+ KEY_PART_INFO *end_key_part= key_part + used_key_parts;
+
+ for (; key_part < end_key_part; key_part++)
+ {
+ uchar *pos1= (uchar*)key1;
+ uchar *pos2= (uchar*)key2;
+ CHARSET_INFO *cs;
+ uint length1, length2, pack_length;
+ bool is_string= TRUE;
+ LINT_INIT(cs);
+ key1+= key_part->length;
+ key2+= key_part->length;
+ if (key_part->null_bit)
+ {
+ key1++; key2++; /* Skip null byte */
+ if (*pos1 && *pos2) /* Both are null */
+ {
+ /* Add key pack length to key for VARCHAR segments */
+ switch (key_part->type) {
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARBINARY1:
+ key1++; key2++;
+ break;
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY2:
+ key1+= 2; key2+= 2;
+ break;
+ default:
+ ;
+ }
+ continue;
+ }
+ if (*pos1 != *pos2)
+ return FALSE;
+ pos1++; pos2++;
+ }
+
+ /* If it is string set parameters of the string */
+ switch (key_part->type) {
+ case HA_KEYTYPE_TEXT:
+ cs= key_part->field->charset();
+ length1= length2= key_part->length;
+ pack_length= 0;
+ break;
+ case HA_KEYTYPE_BINARY :
+ cs= &my_charset_bin;
+ length1= length2= key_part->length;
+ pack_length= 0;
+ break;
+ case HA_KEYTYPE_VARTEXT1:
+ cs= key_part->field->charset();
+ length1= (uint)(pos1[0]);
+ length2= (uint)(pos2[0]);
+ pack_length= 1;
+ break;
+ case HA_KEYTYPE_VARBINARY1:
+ cs= &my_charset_bin;
+ length1= (uint)(pos1[0]);
+ length2= (uint)(pos2[0]);
+ pack_length= 1;
+ break;
+ case HA_KEYTYPE_VARTEXT2:
+ cs= key_part->field->charset();
+ length1= uint2korr(pos1);
+ length2= uint2korr(pos2);
+ pack_length= 2;
+ break;
+ case HA_KEYTYPE_VARBINARY2:
+ cs= &my_charset_bin;
+ length1= uint2korr(pos1);
+ length2= uint2korr(pos2);
+ pack_length= 2;
+ break;
+ default:
+ is_string= FALSE;
+ }
+
+ if (is_string)
+ {
+ /*
+ Compare the strings taking into account length in characters
+ and collation
+ */
+ uint byte_len1= length1, byte_len2= length2;
+ if (cs->mbmaxlen > 1)
+ {
+ uint char_length1= my_charpos(cs, pos1 + pack_length,
+ pos1 + pack_length + length1,
+ length1 / cs->mbmaxlen);
+ uint char_length2= my_charpos(cs, pos2 + pack_length,
+ pos2 + pack_length + length2,
+ length2 / cs->mbmaxlen);
+ set_if_smaller(length1, char_length1);
+ set_if_smaller(length2, char_length2);
+ }
+ if (length1 != length2 ||
+ cs->coll->strnncollsp(cs,
+ pos1 + pack_length, byte_len1,
+ pos2 + pack_length, byte_len2,
+ 1))
+ return TRUE;
+ key1+= pack_length; key2+= pack_length;
+ }
+ else
+ {
+ /* it is OK to compare non-string byte per byte */
+ for (; pos1 < (uchar*)key1 ; pos1++, pos2++)
+ {
+ if (pos1[0] != pos2[0])
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 122baf47bac..ca5ccc699cd 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -1883,6 +1883,9 @@ bool key_cmp_if_same(TABLE *form,const uchar *key,uint index,uint key_length);
void key_unpack(String *to,TABLE *form,uint index);
bool is_key_used(TABLE *table, uint idx, const MY_BITMAP *fields);
int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length);
+ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key);
+bool key_buf_cmp(KEY *key_info, uint used_key_parts,
+ const uchar *key1, const uchar *key2);
extern "C" int key_rec_cmp(void *key_info, uchar *a, uchar *b);
int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, uint tuple_length);
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
index 2c28aae781c..01635c56585 100644
--- a/sql/sql_join_cache.cc
+++ b/sql/sql_join_cache.cc
@@ -2520,6 +2520,24 @@ int JOIN_CACHE_HASHED::init()
pack_length+= get_size_of_rec_offset();
pack_length_with_blob_ptrs+= get_size_of_rec_offset();
+ ref_key_info= join_tab->table->key_info+join_tab->ref.key;
+ ref_used_key_parts= join_tab->ref.key_parts;
+
+ hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple;
+ hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple;
+
+ KEY_PART_INFO *key_part= ref_key_info->key_part;
+ KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts;
+ for ( ; key_part < key_part_end; key_part++)
+ {
+ if (!key_part->field->eq_cmp_as_binary())
+ {
+ hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex;
+ hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex;
+ break;
+ }
+ }
+
init_hash_table();
rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
@@ -2904,7 +2922,7 @@ bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
uchar **key_ref_ptr)
{
bool is_found= FALSE;
- uint idx= get_hash_idx(key, key_length);
+ uint idx= (this->*hash_func)(key, key_length);
uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
while (!is_null_key_ref(ref_ptr))
{
@@ -2913,7 +2931,7 @@ bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
ref_ptr-key_length;
- if (memcmp(next_key, key, key_len) == 0)
+ if ((this->*hash_cmp_func)(next_key, key, key_len))
{
is_found= TRUE;
break;
@@ -2925,22 +2943,24 @@ bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
/*
- Calclulate hash value for a key in the hash table of the join buffer
+ Hash function that considers a key in the hash table as byte array
SYNOPSIS
- get_hash_idx()
+ get_hash_idx_simple()
key pointer to the key value
key_len key value length
DESCRIPTION
The function calculates an index of the hash entry in the hash table
- of the join buffer for the given key
+ of the join buffer for the given key. It considers the key just as
+ a sequence of bytes of the length key_len.
RETURN VALUE
- the calculated index of the hash entry for the given key.
+ the calculated index of the hash entry for the given key
*/
-uint JOIN_CACHE_HASHED::get_hash_idx(uchar* key, uint key_len)
+inline
+uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len)
{
ulong nr= 1;
ulong nr2= 4;
@@ -2956,6 +2976,93 @@ uint JOIN_CACHE_HASHED::get_hash_idx(uchar* key, uint key_len)
/*
+ Hash function that takes into account collations of the components of the key
+
+ SYNOPSIS
+ get_hash_idx_complex()
+ key pointer to the key value
+ key_len key value length
+
+ DESCRIPTION
+ The function calculates an index of the hash entry in the hash table
+ of the join buffer for the given key. It takes into account that the
+ components of the key may be of a varchar type with different collations.
+ The function guarantees that the same hash value for any two equal
+ keys that may differ as byte sequences.
+ The function takes the info about the components of the key, their
+ types and used collations from the class member ref_key_info containing
+ a pointer to the descriptor of the index that can be used for the join
+ operation.
+
+ RETURN VALUE
+ the calculated index of the hash entry for the given key
+*/
+
+inline
+uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len)
+{
+ return
+ (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries);
+}
+
+
+/*
+ Compare two key entries in the hash table as sequence of bytes
+
+ SYNOPSIS
+ equal_keys_simple()
+ key1 pointer to the first key entry
+ key2 pointer to the second key entry
+ key_len the length of the key values
+
+ DESCRIPTION
+ The function compares two key entries in the hash table key1 and key2
+ as two sequences bytes of the length key_len
+
+ RETURN VALUE
+ TRUE key1 coincides with key2
+ FALSE otherwise
+*/
+
+inline
+bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2,
+ uint key_len)
+{
+ return memcmp(key1, key2, key_len) == 0;
+}
+
+
+/*
+ Compare two key entries taking into account the used collation
+
+ SYNOPSIS
+ equal_keys_complex()
+ key1 pointer to the first key entry
+ key2 pointer to the second key entry
+ key_len the length of the key values
+
+ DESCRIPTION
+ The function checks whether two key entries in the hash table
+ key1 and key2 are equal as, possibly, compound keys of a certain
+ structure whose components may be of a varchar type and may
+ employ different collations.
+ The descriptor of the key structure is taken from the class
+ member ref_key_info.
+
+ RETURN VALUE
+ TRUE key1 is equal tokey2
+ FALSE otherwise
+*/
+
+inline
+bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2,
+ uint key_len)
+{
+ return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0;
+}
+
+
+/*
Clean up the hash table of the join buffer
SYNOPSIS
diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h
index fccd32e1319..546067fad24 100644
--- a/sql/sql_join_cache.h
+++ b/sql/sql_join_cache.h
@@ -738,6 +738,10 @@ public:
class JOIN_CACHE_HASHED: public JOIN_CACHE
{
+ typedef uint (JOIN_CACHE_HASHED::*Hash_func) (uchar *key, uint key_len);
+ typedef bool (JOIN_CACHE_HASHED::*Hash_cmp_func) (uchar *key1, uchar *key2,
+ uint key_len);
+
private:
/* Size of the offset of a key entry in the hash table */
@@ -761,8 +765,12 @@ private:
/* The offset of the data fields from the beginning of the record fields */
uint data_fields_offset;
-
- uint get_hash_idx(uchar* key, uint key_len);
+
+ inline uint get_hash_idx_simple(uchar *key, uint key_len);
+ inline uint get_hash_idx_complex(uchar *key, uint key_len);
+
+ inline bool equal_keys_simple(uchar *key1, uchar *key2, uint key_len);
+ inline bool equal_keys_complex(uchar *key1, uchar *key2, uint key_len);
int init_hash_table();
void cleanup_hash_table();
@@ -770,6 +778,28 @@ private:
protected:
/*
+ Index info on the TABLE_REF object used by the hash join
+ to look for matching records
+ */
+ KEY *ref_key_info;
+ /*
+ Number of the key parts the TABLE_REF object used by the hash join
+ to look for matching records
+ */
+ uint ref_used_key_parts;
+
+ /*
+ The hash function used in the hash table,
+ usually set by the init() method
+ */
+ Hash_func hash_func;
+ /*
+ The function to check whether two key entries in the hash table
+ are equal or not, usually set by the init() method
+ */
+ Hash_cmp_func hash_cmp_func;
+
+ /*
Length of a key value.
It is assumed that all key values have the same length.
*/