summaryrefslogtreecommitdiff
path: root/myisam/mi_check.c
diff options
context:
space:
mode:
authorsergefp@mysql.com <>2005-10-21 06:29:17 +0400
committersergefp@mysql.com <>2005-10-21 06:29:17 +0400
commit15a78334c32782327580933668f8dbeafb4ee2e3 (patch)
tree2ec77c30e97d8e7c0b2634973992317e30dffe08 /myisam/mi_check.c
parentda625424e05dcb648b6e3e60934677f42697209b (diff)
downloadmariadb-git-15a78334c32782327580933668f8dbeafb4ee2e3.tar.gz
BUG#9622, stage 2, work together with fix for BUG#12232:
added "nulls_ignored" index statistics collection method for MyISAM tables. (notification trigger: this is about BUG#9622).
Diffstat (limited to 'myisam/mi_check.c')
-rw-r--r--myisam/mi_check.c176
1 files changed, 161 insertions, 15 deletions
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index 7397ee4e204..1fd7f1b45f6 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
found_keys++;
param->record_checksum=init_checksum;
+
bzero((char*) &param->unique_count,sizeof(param->unique_count));
+ bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
+
if ((!(param->testflag & T_SILENT)))
printf ("- check data record references index: %d\n",key+1);
if (keyinfo->flag & HA_FULLTEXT)
@@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
if (param->testflag & T_STATISTICS)
update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
- (ulonglong) info->state->records);
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ param->notnull_count: NULL,
+ (ulonglong)info->state->records);
}
if (param->testflag & T_INFO)
{
@@ -552,6 +557,87 @@ err:
return 1;
}
+
+/*
+ "Ignore NULLs" statistics collection method: process first index tuple.
+
+ SYNOPSIS
+ mi_collect_stats_nonulls_first()
+ keyseg IN Array of key part descriptions
+ notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
+ tuples that don't contain NULLs)
+ key IN Key values tuple
+
+ DESCRIPTION
+ Process the first index tuple - find out which prefix tuples don't
+ contain NULLs, and update the array of notnull counters accordingly.
+*/
+
+static
+void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
+ uchar *key)
+{
+ uint first_null, kp;
+ first_null= ha_find_null(keyseg, key) - keyseg;
+ /*
+ All prefix tuples that don't include keypart_{first_null} are not-null
+ tuples (and all others aren't), increment counters for them.
+ */
+ for (kp= 0; kp < first_null; kp++)
+ notnull[kp]++;
+}
+
+
+/*
+ "Ignore NULLs" statistics collection method: process next index tuple.
+
+ SYNOPSIS
+ mi_collect_stats_nonulls_next()
+ keyseg IN Array of key part descriptions
+ notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
+ tuples that don't contain NULLs)
+ prev_key IN Previous key values tuple
+ last_key IN Next key values tuple
+
+ DESCRIPTION
+ Process the next index tuple:
+ 1. Find out which prefix tuples of last_key don't contain NULLs, and
+ update the array of notnull counters accordingly.
+ 2. Find the first keypart number where the tuples are different(A), or
+ last_key has NULL value (B), and return it, so caller can count
+ number of unique tuples for each key prefix. We don't need (B) to be
+ counted, and that is compensated back in update_key_parts().
+
+ RETURN
+ 1 + number of first keypart where values differ or last_key tuple has NULL
+*/
+
+static
+int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
+ uchar *prev_key, uchar *last_key)
+{
+ uint diffs[2];
+ uint first_null_seg, kp;
+
+ /* Find first keypart where values are different or either of them is NULL */
+ ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
+ SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS,
+ diffs);
+ HA_KEYSEG *seg= keyseg + diffs[0] - 1;
+ /* Find first NULL in last_key */
+ first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
+ for (kp= 0; kp < first_null_seg; kp++)
+ notnull[kp]++;
+
+ /*
+ Return 1+ number of first key part where values differ. Don't care if
+ these were NULLs and not .... We compensate for that in
+ update_key_parts.
+ */
+ return diffs[0];
+}
+
+
/* Check if index is ok */
static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
@@ -641,8 +727,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
&diff_pos);
+ else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ {
+ diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg,
+ param->notnull_count,
+ info->lastkey, key);
+ }
param->unique_count[diff_pos-1]++;
}
+ else
+ {
+ if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
+ key);
+ }
}
(*key_checksum)+= mi_byte_checksum((byte*) key,
key_length- info->s->rec_reflength);
@@ -2088,7 +2186,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
if (param->testflag & T_STATISTICS)
update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
- (ulonglong) info->state->records);
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ sort_param.notnull: NULL,(ulonglong) info->state->records);
share->state.key_map|=(ulonglong) 1 << sort_param.key;
if (sort_param.fix_datafile)
@@ -3255,11 +3354,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos);
+ else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ {
+ diff_pos= mi_collect_stats_nonulls_next(sort_param->seg,
+ sort_param->notnull,
+ sort_info->key_block->lastkey,
+ (uchar*)a);
+ }
sort_param->unique[diff_pos-1]++;
}
else
{
cmp= -1;
+ if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
+ (uchar*)a);
}
if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
{
@@ -3981,21 +4090,30 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
SYNOPSIS
update_key_parts()
- keyinfo Index information (only key->keysegs used)
+ keyinfo IN Index information (only key->keysegs used)
rec_per_key_part OUT Store statistics here
- unique IN Array of #distinct values collected over index
- run.
+ unique IN Array of (#distinct tuples)
+ notnull_tuples IN Array of (#tuples), or NULL
records Number of records in the table
NOTES
+ This function handles all 3 index statistics collection methods.
+
Unique is an array:
unique[0]= (#different values of {keypart1}) - 1
- unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1
+ unique[1]= (#different values of {keypart1,keypart2} tuple) - unique[0] - 1
...
+
+ For MI_STATS_METHOD_IGNORE_NULLS notnull_tuples is an array too:
+ notnull_tuples[0]= (# of {keypart1} tuples such that keypart1 is not NULL)
+ notnull_tuples[1]= (# of {keypart1,keypart2} tuples such that all
+ keypart{i} are not NULL)
+ ...
+ For all other statistics collection methods notnull_tuples=NULL.
+
The 'unique' array is collected in one sequential scan through the entire
index. This is done in two places: in chk_index() and in sort_key_write().
- Statistics collection may consider NULLs as either equal or unequal (see
- SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*).
+ notnull_tuples, if present, is collected during the same index scan.
Output is an array:
rec_per_key_part[k] =
@@ -4007,25 +4125,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
index tuples}
= #tuples-in-the-index / #distinct-tuples-in-the-index.
+
+ The #tuples-in-the-index and #distinct-tuples-in-the-index have different
+ meaning depending on which statistics collection method is used:
+
+ MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
+ NULLS_EQUAL NULL == NULL all tuples in table
+ NULLS_NOT_EQUAL NULL != NULL all tuples in table
+ IGNORE_NULLS n/a tuples that don't have NULLs
*/
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
- ulonglong *unique, ulonglong records)
+ ulonglong *unique, ulonglong *notnull,
+ ulonglong records)
{
- ulonglong count=0,tmp;
+ ulonglong count=0,tmp, unique_tuples;
+ ulonglong tuples= records;
uint parts;
for (parts=0 ; parts < keyinfo->keysegs ; parts++)
{
count+=unique[parts];
- if (count == 0)
- tmp=records;
+ unique_tuples= count + 1;
+ if (notnull)
+ {
+ tuples= notnull[parts];
+ /*
+ #(unique_tuples not counting tuples with NULLs) =
+ #(unique_tuples counting tuples with NULLs as different) -
+ #(tuples with NULLs)
+ */
+ unique_tuples -= (records - notnull[parts]);
+ }
+
+ if (unique_tuples == 0)
+ tmp= 1;
+ else if (count == 0)
+ tmp= tuples; /* 1 unique tuple */
else
- tmp= (records + (count+1)/2) / (count+1);
- /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
- let's ensure it is not */
+ tmp= (tuples + unique_tuples/2) / unique_tuples;
+
+ /*
+ for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
+ let's ensure it is not
+ */
set_if_bigger(tmp,1);
if (tmp >= (ulonglong) ~(ulong) 0)
tmp=(ulonglong) ~(ulong) 0;
+
*rec_per_key_part=(ulong) tmp;
rec_per_key_part++;
}