summaryrefslogtreecommitdiff
path: root/sql/key.cc
diff options
context:
space:
mode:
authorIgor Babaev <igor@askmonty.org>2010-12-22 00:37:35 -0800
committerIgor Babaev <igor@askmonty.org>2010-12-22 00:37:35 -0800
commita095346a9d32ca583d211d07075e805881fdb4e5 (patch)
tree72c52f60b7509706564838db8130bb2e8828de54 /sql/key.cc
parent4f28dcbe327139d9d5cb71afc9f4ce99cecec25a (diff)
downloadmariadb-git-a095346a9d32ca583d211d07075e805881fdb4e5.tar.gz
Fixed LP bug #670380.
Lifted the limitation that hash join could not be used over varchar fields with non-binary collation.
Diffstat (limited to 'sql/key.cc')
-rw-r--r--sql/key.cc251
1 files changed, 251 insertions, 0 deletions
diff --git a/sql/key.cc b/sql/key.cc
index a1793e9e5f4..08502cfe6cc 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -567,3 +567,254 @@ next_loop:
} while (key_info); /* no more keys to test */
DBUG_RETURN(0);
}
+
+
+/**
+ Get hash value for the key from a key buffer
+
+ @param key_info the key descriptor
+ @param used_key_part number of key parts used for the key
+ @param key pointer to the buffer with the key value
+
+ @datails
+ When hashing we should take special care only of:
+ 1. NULLs (and keyparts which can be null so one byte reserved for it);
+ 2. Strings for which we have to take into account their collations
+ and the values of their lengths in the prefixes.
+
+ @return hash value calculated for the key
+*/
+
+ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key)
+{
+ ulong nr=1, nr2=4;
+ KEY_PART_INFO *key_part= key_info->key_part;
+ KEY_PART_INFO *end_key_part= key_part + used_key_parts;
+
+ for (; key_part < end_key_part; key_part++)
+ {
+ uchar *pos= (uchar*)key;
+ CHARSET_INFO *cs;
+ uint length, pack_length;
+ bool is_string= TRUE;
+ LINT_INIT(cs);
+ key+= key_part->length;
+ if (key_part->null_bit)
+ {
+ key++; /* Skip null byte */
+ if (*pos) /* Found null */
+ {
+ nr^= (nr << 1) | 1;
+ /* Add key pack length to key for VARCHAR segments */
+ switch (key_part->type) {
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARBINARY1:
+ key++;
+ break;
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY2:
+ key+= 2;
+ break;
+ default:
+ ;
+ }
+ continue;
+ }
+ pos++; /* Skip null byte */
+ }
+ /* If it is string set parameters of the string */
+ switch (key_part->type) {
+ case HA_KEYTYPE_TEXT:
+ cs= key_part->field->charset();
+ length= key_part->length;
+ pack_length= 0;
+ break;
+ case HA_KEYTYPE_BINARY :
+ cs= &my_charset_bin;
+ length= key_part->length;
+ pack_length= 0;
+ break;
+ case HA_KEYTYPE_VARTEXT1:
+ cs= key_part->field->charset();
+ length= (uint)(pos[0]);
+ pack_length= 1;
+ break;
+ case HA_KEYTYPE_VARBINARY1:
+ cs= &my_charset_bin;
+ length= (uint)(pos[0]);
+ pack_length= 1;
+ break;
+ case HA_KEYTYPE_VARTEXT2:
+ cs= key_part->field->charset();
+ length= uint2korr(pos);
+ pack_length= 2;
+ break;
+ case HA_KEYTYPE_VARBINARY2:
+ cs= &my_charset_bin;
+ length= uint2korr(pos);
+ pack_length= 2;
+ break;
+ default:
+ is_string= FALSE;
+ }
+
+ if (is_string)
+ {
+ if (cs->mbmaxlen > 1)
+ {
+ uint char_length= my_charpos(cs, pos + pack_length,
+ pos + pack_length + length,
+ length / cs->mbmaxlen);
+ set_if_smaller(length, char_length);
+ }
+ cs->coll->hash_sort(cs, pos+pack_length, length, &nr, &nr2);
+ key+= pack_length;
+ }
+ else
+ {
+ for (; pos < (uchar*)key ; pos++)
+ {
+ nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos)) + (nr << 8);
+ nr2+=3;
+ }
+ }
+ }
+ DBUG_PRINT("exit", ("hash: %lx", nr));
+ return(nr);
+}
+
+
+/**
+ Check whether two keys in the key buffers are equal
+
+ @param key_info the key descriptor
+ @param used_key_part number of key parts used for the keys
+ @param key1 pointer to the buffer with the first key
+ @param key2 pointer to the buffer with the second key
+
+ @detail See details of key_hashnr().
+
+ @retval TRUE keys in the buffers are NOT equal
+ @retval FALSE keys in the buffers are equal
+*/
+
+bool key_buf_cmp(KEY *key_info, uint used_key_parts,
+ const uchar *key1, const uchar *key2)
+{
+ KEY_PART_INFO *key_part= key_info->key_part;
+ KEY_PART_INFO *end_key_part= key_part + used_key_parts;
+
+ for (; key_part < end_key_part; key_part++)
+ {
+ uchar *pos1= (uchar*)key1;
+ uchar *pos2= (uchar*)key2;
+ CHARSET_INFO *cs;
+ uint length1, length2, pack_length;
+ bool is_string= TRUE;
+ LINT_INIT(cs);
+ key1+= key_part->length;
+ key2+= key_part->length;
+ if (key_part->null_bit)
+ {
+ key1++; key2++; /* Skip null byte */
+ if (*pos1 && *pos2) /* Both are null */
+ {
+ /* Add key pack length to key for VARCHAR segments */
+ switch (key_part->type) {
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARBINARY1:
+ key1++; key2++;
+ break;
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY2:
+ key1+= 2; key2+= 2;
+ break;
+ default:
+ ;
+ }
+ continue;
+ }
+ if (*pos1 != *pos2)
+ return FALSE;
+ pos1++; pos2++;
+ }
+
+ /* If it is string set parameters of the string */
+ switch (key_part->type) {
+ case HA_KEYTYPE_TEXT:
+ cs= key_part->field->charset();
+ length1= length2= key_part->length;
+ pack_length= 0;
+ break;
+ case HA_KEYTYPE_BINARY :
+ cs= &my_charset_bin;
+ length1= length2= key_part->length;
+ pack_length= 0;
+ break;
+ case HA_KEYTYPE_VARTEXT1:
+ cs= key_part->field->charset();
+ length1= (uint)(pos1[0]);
+ length2= (uint)(pos2[0]);
+ pack_length= 1;
+ break;
+ case HA_KEYTYPE_VARBINARY1:
+ cs= &my_charset_bin;
+ length1= (uint)(pos1[0]);
+ length2= (uint)(pos2[0]);
+ pack_length= 1;
+ break;
+ case HA_KEYTYPE_VARTEXT2:
+ cs= key_part->field->charset();
+ length1= uint2korr(pos1);
+ length2= uint2korr(pos2);
+ pack_length= 2;
+ break;
+ case HA_KEYTYPE_VARBINARY2:
+ cs= &my_charset_bin;
+ length1= uint2korr(pos1);
+ length2= uint2korr(pos2);
+ pack_length= 2;
+ break;
+ default:
+ is_string= FALSE;
+ }
+
+ if (is_string)
+ {
+ /*
+ Compare the strings taking into account length in characters
+ and collation
+ */
+ uint byte_len1= length1, byte_len2= length2;
+ if (cs->mbmaxlen > 1)
+ {
+ uint char_length1= my_charpos(cs, pos1 + pack_length,
+ pos1 + pack_length + length1,
+ length1 / cs->mbmaxlen);
+ uint char_length2= my_charpos(cs, pos2 + pack_length,
+ pos2 + pack_length + length2,
+ length2 / cs->mbmaxlen);
+ set_if_smaller(length1, char_length1);
+ set_if_smaller(length2, char_length2);
+ }
+ if (length1 != length2 ||
+ cs->coll->strnncollsp(cs,
+ pos1 + pack_length, byte_len1,
+ pos2 + pack_length, byte_len2,
+ 1))
+ return TRUE;
+ key1+= pack_length; key2+= pack_length;
+ }
+ else
+ {
+ /* it is OK to compare non-string byte per byte */
+ for (; pos1 < (uchar*)key1 ; pos1++, pos2++)
+ {
+ if (pos1[0] != pos2[0])
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+