diff options
author | Igor Babaev <igor@askmonty.org> | 2010-12-22 00:37:35 -0800 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2010-12-22 00:37:35 -0800 |
commit | a095346a9d32ca583d211d07075e805881fdb4e5 (patch) | |
tree | 72c52f60b7509706564838db8130bb2e8828de54 /sql/key.cc | |
parent | 4f28dcbe327139d9d5cb71afc9f4ce99cecec25a (diff) | |
download | mariadb-git-a095346a9d32ca583d211d07075e805881fdb4e5.tar.gz |
Fixed LP bug #670380.
Lifted the limitation that hash join could not be used over
varchar fields with non-binary collation.
Diffstat (limited to 'sql/key.cc')
-rw-r--r-- | sql/key.cc | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/sql/key.cc b/sql/key.cc index a1793e9e5f4..08502cfe6cc 100644 --- a/sql/key.cc +++ b/sql/key.cc @@ -567,3 +567,254 @@ next_loop: } while (key_info); /* no more keys to test */ DBUG_RETURN(0); } + + +/** + Get hash value for the key from a key buffer + + @param key_info the key descriptor + @param used_key_part number of key parts used for the key + @param key pointer to the buffer with the key value + + @datails + When hashing we should take special care only of: + 1. NULLs (and keyparts which can be null so one byte reserved for it); + 2. Strings for which we have to take into account their collations + and the values of their lengths in the prefixes. + + @return hash value calculated for the key +*/ + +ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key) +{ + ulong nr=1, nr2=4; + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end_key_part= key_part + used_key_parts; + + for (; key_part < end_key_part; key_part++) + { + uchar *pos= (uchar*)key; + CHARSET_INFO *cs; + uint length, pack_length; + bool is_string= TRUE; + LINT_INIT(cs); + key+= key_part->length; + if (key_part->null_bit) + { + key++; /* Skip null byte */ + if (*pos) /* Found null */ + { + nr^= (nr << 1) | 1; + /* Add key pack length to key for VARCHAR segments */ + switch (key_part->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARBINARY1: + key++; + break; + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY2: + key+= 2; + break; + default: + ; + } + continue; + } + pos++; /* Skip null byte */ + } + /* If it is string set parameters of the string */ + switch (key_part->type) { + case HA_KEYTYPE_TEXT: + cs= key_part->field->charset(); + length= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_BINARY : + cs= &my_charset_bin; + length= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_VARTEXT1: + cs= key_part->field->charset(); + length= (uint)(pos[0]); + pack_length= 1; + break; + case HA_KEYTYPE_VARBINARY1: + cs= &my_charset_bin; + length= (uint)(pos[0]); + pack_length= 1; + break; + case HA_KEYTYPE_VARTEXT2: + cs= key_part->field->charset(); + length= uint2korr(pos); + pack_length= 2; + break; + case HA_KEYTYPE_VARBINARY2: + cs= &my_charset_bin; + length= uint2korr(pos); + pack_length= 2; + break; + default: + is_string= FALSE; + } + + if (is_string) + { + if (cs->mbmaxlen > 1) + { + uint char_length= my_charpos(cs, pos + pack_length, + pos + pack_length + length, + length / cs->mbmaxlen); + set_if_smaller(length, char_length); + } + cs->coll->hash_sort(cs, pos+pack_length, length, &nr, &nr2); + key+= pack_length; + } + else + { + for (; pos < (uchar*)key ; pos++) + { + nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos)) + (nr << 8); + nr2+=3; + } + } + } + DBUG_PRINT("exit", ("hash: %lx", nr)); + return(nr); +} + + +/** + Check whether two keys in the key buffers are equal + + @param key_info the key descriptor + @param used_key_part number of key parts used for the keys + @param key1 pointer to the buffer with the first key + @param key2 pointer to the buffer with the second key + + @detail See details of key_hashnr(). + + @retval TRUE keys in the buffers are NOT equal + @retval FALSE keys in the buffers are equal +*/ + +bool key_buf_cmp(KEY *key_info, uint used_key_parts, + const uchar *key1, const uchar *key2) +{ + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end_key_part= key_part + used_key_parts; + + for (; key_part < end_key_part; key_part++) + { + uchar *pos1= (uchar*)key1; + uchar *pos2= (uchar*)key2; + CHARSET_INFO *cs; + uint length1, length2, pack_length; + bool is_string= TRUE; + LINT_INIT(cs); + key1+= key_part->length; + key2+= key_part->length; + if (key_part->null_bit) + { + key1++; key2++; /* Skip null byte */ + if (*pos1 && *pos2) /* Both are null */ + { + /* Add key pack length to key for VARCHAR segments */ + switch (key_part->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARBINARY1: + key1++; key2++; + break; + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY2: + key1+= 2; key2+= 2; + break; + default: + ; + } + continue; + } + if (*pos1 != *pos2) + return FALSE; + pos1++; pos2++; + } + + /* If it is string set parameters of the string */ + switch (key_part->type) { + case HA_KEYTYPE_TEXT: + cs= key_part->field->charset(); + length1= length2= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_BINARY : + cs= &my_charset_bin; + length1= length2= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_VARTEXT1: + cs= key_part->field->charset(); + length1= (uint)(pos1[0]); + length2= (uint)(pos2[0]); + pack_length= 1; + break; + case HA_KEYTYPE_VARBINARY1: + cs= &my_charset_bin; + length1= (uint)(pos1[0]); + length2= (uint)(pos2[0]); + pack_length= 1; + break; + case HA_KEYTYPE_VARTEXT2: + cs= key_part->field->charset(); + length1= uint2korr(pos1); + length2= uint2korr(pos2); + pack_length= 2; + break; + case HA_KEYTYPE_VARBINARY2: + cs= &my_charset_bin; + length1= uint2korr(pos1); + length2= uint2korr(pos2); + pack_length= 2; + break; + default: + is_string= FALSE; + } + + if (is_string) + { + /* + Compare the strings taking into account length in characters + and collation + */ + uint byte_len1= length1, byte_len2= length2; + if (cs->mbmaxlen > 1) + { + uint char_length1= my_charpos(cs, pos1 + pack_length, + pos1 + pack_length + length1, + length1 / cs->mbmaxlen); + uint char_length2= my_charpos(cs, pos2 + pack_length, + pos2 + pack_length + length2, + length2 / cs->mbmaxlen); + set_if_smaller(length1, char_length1); + set_if_smaller(length2, char_length2); + } + if (length1 != length2 || + cs->coll->strnncollsp(cs, + pos1 + pack_length, byte_len1, + pos2 + pack_length, byte_len2, + 1)) + return TRUE; + key1+= pack_length; key2+= pack_length; + } + else + { + /* it is OK to compare non-string byte per byte */ + for (; pos1 < (uchar*)key1 ; pos1++, pos2++) + { + if (pos1[0] != pos2[0]) + return TRUE; + } + } + } + return FALSE; +} + |