diff options
author | sergefp@mysql.com <> | 2005-10-21 06:29:17 +0400 |
---|---|---|
committer | sergefp@mysql.com <> | 2005-10-21 06:29:17 +0400 |
commit | 15a78334c32782327580933668f8dbeafb4ee2e3 (patch) | |
tree | 2ec77c30e97d8e7c0b2634973992317e30dffe08 /myisam/mi_check.c | |
parent | da625424e05dcb648b6e3e60934677f42697209b (diff) | |
download | mariadb-git-15a78334c32782327580933668f8dbeafb4ee2e3.tar.gz |
BUG#9622, stage 2, work together with fix for BUG#12232:
added "nulls_ignored" index statistics collection method for MyISAM tables.
(notification trigger: this is about BUG#9622).
Diffstat (limited to 'myisam/mi_check.c')
-rw-r--r-- | myisam/mi_check.c | 176 |
1 files changed, 161 insertions, 15 deletions
diff --git a/myisam/mi_check.c b/myisam/mi_check.c index 7397ee4e204..1fd7f1b45f6 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) found_keys++; param->record_checksum=init_checksum; + bzero((char*) ¶m->unique_count,sizeof(param->unique_count)); + bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count)); + if ((!(param->testflag & T_SILENT))) printf ("- check data record references index: %d\n",key+1); if (keyinfo->flag & HA_FULLTEXT) @@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) if (param->testflag & T_STATISTICS) update_key_parts(keyinfo, rec_per_key_part, param->unique_count, - (ulonglong) info->state->records); + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + param->notnull_count: NULL, + (ulonglong)info->state->records); } if (param->testflag & T_INFO) { @@ -552,6 +557,87 @@ err: return 1; } + +/* + "Ignore NULLs" statistics collection method: process first index tuple. + + SYNOPSIS + mi_collect_stats_nonulls_first() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + key IN Key values tuple + + DESCRIPTION + Process the first index tuple - find out which prefix tuples don't + contain NULLs, and update the array of notnull counters accordingly. +*/ + +static +void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, + uchar *key) +{ + uint first_null, kp; + first_null= ha_find_null(keyseg, key) - keyseg; + /* + All prefix tuples that don't include keypart_{first_null} are not-null + tuples (and all others aren't), increment counters for them. + */ + for (kp= 0; kp < first_null; kp++) + notnull[kp]++; +} + + +/* + "Ignore NULLs" statistics collection method: process next index tuple. + + SYNOPSIS + mi_collect_stats_nonulls_next() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + prev_key IN Previous key values tuple + last_key IN Next key values tuple + + DESCRIPTION + Process the next index tuple: + 1. Find out which prefix tuples of last_key don't contain NULLs, and + update the array of notnull counters accordingly. + 2. Find the first keypart number where the tuples are different(A), or + last_key has NULL value (B), and return it, so caller can count + number of unique tuples for each key prefix. We don't need (B) to be + counted, and that is compensated back in update_key_parts(). + + RETURN + 1 + number of first keypart where values differ or last_key tuple has NULL +*/ + +static +int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, + uchar *prev_key, uchar *last_key) +{ + uint diffs[2]; + uint first_null_seg, kp; + + /* Find first keypart where values are different or either of them is NULL */ + ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS, + diffs); + HA_KEYSEG *seg= keyseg + diffs[0] - 1; + /* Find first NULL in last_key */ + first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg; + for (kp= 0; kp < first_null_seg; kp++) + notnull[kp]++; + + /* + Return 1+ number of first key part where values differ. Don't care if + these were NULLs and not .... We compensate for that in + update_key_parts. + */ + return diffs[0]; +} + + /* Check if index is ok */ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, @@ -641,8 +727,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg, + param->notnull_count, + info->lastkey, key); + } param->unique_count[diff_pos-1]++; } + else + { + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count, + key); + } } (*key_checksum)+= mi_byte_checksum((byte*) key, key_length- info->s->rec_reflength); @@ -2088,7 +2186,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, if (param->testflag & T_STATISTICS) update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, - (ulonglong) info->state->records); + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sort_param.notnull: NULL,(ulonglong) info->state->records); share->state.key_map|=(ulonglong) 1 << sort_param.key; if (sort_param.fix_datafile) @@ -3255,11 +3354,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos= mi_collect_stats_nonulls_next(sort_param->seg, + sort_param->notnull, + sort_info->key_block->lastkey, + (uchar*)a); + } sort_param->unique[diff_pos-1]++; } else { cmp= -1; + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull, + (uchar*)a); } if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) { @@ -3981,21 +4090,30 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, SYNOPSIS update_key_parts() - keyinfo Index information (only key->keysegs used) + keyinfo IN Index information (only key->keysegs used) rec_per_key_part OUT Store statistics here - unique IN Array of #distinct values collected over index - run. + unique IN Array of (#distinct tuples) + notnull_tuples IN Array of (#tuples), or NULL records Number of records in the table NOTES + This function handles all 3 index statistics collection methods. + Unique is an array: unique[0]= (#different values of {keypart1}) - 1 - unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1 + unique[1]= (#different values of {keypart1,keypart2} tuple) - unique[0] - 1 ... + + For MI_STATS_METHOD_IGNORE_NULLS notnull_tuples is an array too: + notnull_tuples[0]= (# of {keypart1} tuples such that keypart1 is not NULL) + notnull_tuples[1]= (# of {keypart1,keypart2} tuples such that all + keypart{i} are not NULL) + ... + For all other statistics collection methods notnull_tuples=NULL. + The 'unique' array is collected in one sequential scan through the entire index. This is done in two places: in chk_index() and in sort_key_write(). - Statistics collection may consider NULLs as either equal or unequal (see - SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*). + notnull_tuples, if present, is collected during the same index scan. Output is an array: rec_per_key_part[k] = @@ -4007,25 +4125,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, index tuples} = #tuples-in-the-index / #distinct-tuples-in-the-index. + + The #tuples-in-the-index and #distinct-tuples-in-the-index have different + meaning depending on which statistics collection method is used: + + MI_STATS_METHOD_* how are nulls compared? which tuples are counted? + NULLS_EQUAL NULL == NULL all tuples in table + NULLS_NOT_EQUAL NULL != NULL all tuples in table + IGNORE_NULLS n/a tuples that don't have NULLs */ void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, - ulonglong *unique, ulonglong records) + ulonglong *unique, ulonglong *notnull, + ulonglong records) { - ulonglong count=0,tmp; + ulonglong count=0,tmp, unique_tuples; + ulonglong tuples= records; uint parts; for (parts=0 ; parts < keyinfo->keysegs ; parts++) { count+=unique[parts]; - if (count == 0) - tmp=records; + unique_tuples= count + 1; + if (notnull) + { + tuples= notnull[parts]; + /* + #(unique_tuples not counting tuples with NULLs) = + #(unique_tuples counting tuples with NULLs as different) - + #(tuples with NULLs) + */ + unique_tuples -= (records - notnull[parts]); + } + + if (unique_tuples == 0) + tmp= 1; + else if (count == 0) + tmp= tuples; /* 1 unique tuple */ else - tmp= (records + (count+1)/2) / (count+1); - /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here. - let's ensure it is not */ + tmp= (tuples + unique_tuples/2) / unique_tuples; + + /* + for some weird keys (e.g. FULLTEXT) tmp can be <1 here. + let's ensure it is not + */ set_if_bigger(tmp,1); if (tmp >= (ulonglong) ~(ulong) 0) tmp=(ulonglong) ~(ulong) 0; + *rec_per_key_part=(ulong) tmp; rec_per_key_part++; } |