diff options
author | unknown <sergefp@mysql.com> | 2005-10-23 02:49:57 +0400 |
---|---|---|
committer | unknown <sergefp@mysql.com> | 2005-10-23 02:49:57 +0400 |
commit | 3c02a0534d4b5450b2a5126bbf84428595c4a803 (patch) | |
tree | 496319ac159d34aa63a9666078ddc28558ca04c3 /myisam | |
parent | 1c9783e854965376844a021ab1312b373852f287 (diff) | |
parent | 02dc7bfb1a52c21621356cc0799f3f896a2e2fea (diff) | |
download | mariadb-git-3c02a0534d4b5450b2a5126bbf84428595c4a803.tar.gz |
Merge spetrunia@bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/home/psergey/mysql-4.1-nulls-stats-r2
sql/mysqld.cc:
Auto merged
Diffstat (limited to 'myisam')
-rw-r--r-- | myisam/mi_check.c | 200 | ||||
-rw-r--r-- | myisam/myisamchk.c | 24 | ||||
-rw-r--r-- | myisam/myisamdef.h | 7 | ||||
-rw-r--r-- | myisam/sort.c | 8 |
4 files changed, 211 insertions, 28 deletions
diff --git a/myisam/mi_check.c b/myisam/mi_check.c index 7397ee4e204..75f25f1361a 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) found_keys++; param->record_checksum=init_checksum; + bzero((char*) ¶m->unique_count,sizeof(param->unique_count)); + bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count)); + if ((!(param->testflag & T_SILENT))) printf ("- check data record references index: %d\n",key+1); if (keyinfo->flag & HA_FULLTEXT) @@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) if (param->testflag & T_STATISTICS) update_key_parts(keyinfo, rec_per_key_part, param->unique_count, - (ulonglong) info->state->records); + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + param->notnull_count: NULL, + (ulonglong)info->state->records); } if (param->testflag & T_INFO) { @@ -552,6 +557,96 @@ err: return 1; } + +/* + "Ignore NULLs" statistics collection method: process first index tuple. + + SYNOPSIS + mi_collect_stats_nonulls_first() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + key IN Key values tuple + + DESCRIPTION + Process the first index tuple - find out which prefix tuples don't + contain NULLs, and update the array of notnull counters accordingly. +*/ + +static +void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, + uchar *key) +{ + uint first_null, kp; + first_null= ha_find_null(keyseg, key) - keyseg; + /* + All prefix tuples that don't include keypart_{first_null} are not-null + tuples (and all others aren't), increment counters for them. + */ + for (kp= 0; kp < first_null; kp++) + notnull[kp]++; +} + + +/* + "Ignore NULLs" statistics collection method: process next index tuple. + + SYNOPSIS + mi_collect_stats_nonulls_next() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + prev_key IN Previous key values tuple + last_key IN Next key values tuple + + DESCRIPTION + Process the next index tuple: + 1. Find out which prefix tuples of last_key don't contain NULLs, and + update the array of notnull counters accordingly. + 2. Find the first keypart number where the prev_key and last_key tuples + are different(A), or last_key has NULL value(B), and return it, so the + caller can count number of unique tuples for each key prefix. We don't + need (B) to be counted, and that is compensated back in + update_key_parts(). + + RETURN + 1 + number of first keypart where values differ or last_key tuple has NULL +*/ + +static +int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, + uchar *prev_key, uchar *last_key) +{ + uint diffs[2]; + uint first_null_seg, kp; + + /* + Find the first keypart where values are different or either of them is + NULL. We get results in diffs array: + diffs[0]= 1 + number of first different keypart + diffs[1]=offset: (last_key + diffs[1]) points to first value in + last_key that is NULL or different from corresponding + value in prev_key. + */ + ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS, + diffs); + HA_KEYSEG *seg= keyseg + diffs[0] - 1; + + /* Find first NULL in last_key */ + first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg; + for (kp= 0; kp < first_null_seg; kp++) + notnull[kp]++; + + /* + Return 1+ number of first key part where values differ. Don't care if + these were NULLs and not .... We compensate for that in + update_key_parts. + */ + return diffs[0]; +} + + /* Check if index is ok */ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, @@ -641,8 +736,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg, + param->notnull_count, + info->lastkey, key); + } param->unique_count[diff_pos-1]++; } + else + { + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count, + key); + } } (*key_checksum)+= mi_byte_checksum((byte*) key, key_length- info->s->rec_reflength); @@ -2088,7 +2195,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, if (param->testflag & T_STATISTICS) update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, - (ulonglong) info->state->records); + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sort_param.notnull: NULL,(ulonglong) info->state->records); share->state.key_map|=(ulonglong) 1 << sort_param.key; if (sort_param.fix_datafile) @@ -3255,11 +3363,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos= mi_collect_stats_nonulls_next(sort_param->seg, + sort_param->notnull, + sort_info->key_block->lastkey, + (uchar*)a); + } sort_param->unique[diff_pos-1]++; } else { cmp= -1; + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull, + (uchar*)a); } if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) { @@ -3978,24 +4096,34 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, /* Update statistics for each part of an index - + SYNOPSIS update_key_parts() - keyinfo Index information (only key->keysegs used) + keyinfo IN Index information (only key->keysegs used) rec_per_key_part OUT Store statistics here - unique IN Array of #distinct values collected over index - run. + unique IN Array of (#distinct tuples) + notnull_tuples IN Array of (#tuples), or NULL records Number of records in the table - - NOTES + + DESCRIPTION + This function is called produce index statistics values from unique and + notnull_tuples arrays after these arrays were produced with sequential + index scan (the scan is done in two places: chk_index() and + sort_key_write()). + + This function handles all 3 index statistics collection methods. + Unique is an array: - unique[0]= (#different values of {keypart1}) - 1 - unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1 - ... - The 'unique' array is collected in one sequential scan through the entire - index. This is done in two places: in chk_index() and in sort_key_write(). - Statistics collection may consider NULLs as either equal or unequal (see - SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*). + unique[0]= (#different values of {keypart1}) - 1 + unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1 + ... + + For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too: + notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL) + notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all + keypart{i} are not NULL) + ... + For all other statistics collection methods notnull_tuples==NULL. Output is an array: rec_per_key_part[k] = @@ -4007,25 +4135,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, index tuples} = #tuples-in-the-index / #distinct-tuples-in-the-index. + + The #tuples-in-the-index and #distinct-tuples-in-the-index have different + meaning depending on which statistics collection method is used: + + MI_STATS_METHOD_* how are nulls compared? which tuples are counted? + NULLS_EQUAL NULL == NULL all tuples in table + NULLS_NOT_EQUAL NULL != NULL all tuples in table + IGNORE_NULLS n/a tuples that don't have NULLs */ void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, - ulonglong *unique, ulonglong records) + ulonglong *unique, ulonglong *notnull, + ulonglong records) { - ulonglong count=0,tmp; + ulonglong count=0,tmp, unique_tuples; + ulonglong tuples= records; uint parts; for (parts=0 ; parts < keyinfo->keysegs ; parts++) { count+=unique[parts]; - if (count == 0) - tmp=records; + unique_tuples= count + 1; + if (notnull) + { + tuples= notnull[parts]; + /* + #(unique_tuples not counting tuples with NULLs) = + #(unique_tuples counting tuples with NULLs as different) - + #(tuples with NULLs) + */ + unique_tuples -= (records - notnull[parts]); + } + + if (unique_tuples == 0) + tmp= 1; + else if (count == 0) + tmp= tuples; /* 1 unique tuple */ else - tmp= (records + (count+1)/2) / (count+1); - /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here. - let's ensure it is not */ + tmp= (tuples + unique_tuples/2) / unique_tuples; + + /* + for some weird keys (e.g. FULLTEXT) tmp can be <1 here. + let's ensure it is not + */ set_if_bigger(tmp,1); if (tmp >= (ulonglong) ~(ulong) 0) tmp=(ulonglong) ~(ulong) 0; + *rec_per_key_part=(ulong) tmp; rec_per_key_part++; } diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c index 2dd05cf7e67..d68ea410852 100644 --- a/myisam/myisamchk.c +++ b/myisam/myisamchk.c @@ -339,7 +339,8 @@ static struct my_option my_long_options[] = REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"stats_method", OPT_STATS_METHOD, "Specifies how index statistics collection code should threat NULLs. " - "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).", + "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " + "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", (gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} @@ -451,6 +452,10 @@ static void usage(void) -a, --analyze Analyze distribution of keys. Will make some joins in\n\ MySQL faster. You can check the calculated distribution\n\ by using '--description --verbose table_name'.\n\ + --stats_method=name Specifies how index statistics collection code should\n\ + threat NULLs. Possible values of name are \"nulls_unequal\"\n\ + (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\ + \"nulls_ignored\".\n\ -d, --description Prints some information about table.\n\ -A, --set-auto-increment[=value]\n\ Force auto_increment to start at this or higher value\n\ @@ -472,7 +477,7 @@ static void usage(void) #include <help_end.h> const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal", - NullS}; + "nulls_ignored", NullS}; TYPELIB myisam_stats_method_typelib= { array_elements(myisam_stats_method_names) - 1, "", myisam_stats_method_names, NULL}; @@ -699,14 +704,25 @@ get_one_option(int optid, case OPT_STATS_METHOD: { int method; + enum_mi_stats_method method_conv; myisam_stats_method_str= argument; if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) { fprintf(stderr, "Invalid value of stats_method: %s.\n", argument); exit(1); } - check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL : - MI_STATS_METHOD_NULLS_NOT_EQUAL; + switch (method-1) { + case 0: + method_conv= MI_STATS_METHOD_NULLS_EQUAL; + break; + case 1: + method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL; + break; + case 2: + method_conv= MI_STATS_METHOD_IGNORE_NULLS; + break; + } + check_param.stats_method= method_conv; break; } #ifdef DEBUG /* Only useful if debugging */ diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h index 93a7bf96f59..a766d59d72a 100644 --- a/myisam/myisamdef.h +++ b/myisam/myisamdef.h @@ -297,7 +297,14 @@ typedef struct st_mi_sort_param pthread_t thr; IO_CACHE read_cache, tempfile, tempfile_for_exceptions; DYNAMIC_ARRAY buffpek; + + /* + The next two are used to collect statistics, see update_key_parts for + description. + */ ulonglong unique[MI_MAX_KEY_SEG+1]; + ulonglong notnull[MI_MAX_KEY_SEG+1]; + my_off_t pos,max_pos,filepos,start_recpos; uint key, key_length,real_key_length,sortbuff_size; uint maxbuffers, keys, find_length, sort_keys_length; diff --git a/myisam/sort.c b/myisam/sort.c index e8cd9938e42..96b55d599c8 100644 --- a/myisam/sort.c +++ b/myisam/sort.c @@ -481,8 +481,12 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) { share->state.key_map|=(ulonglong) 1 << sinfo->key; if (param->testflag & T_STATISTICS) - update_key_parts(sinfo->keyinfo, rec_per_key_part, - sinfo->unique, (ulonglong) info->state->records); + update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sinfo->notnull: NULL, + (ulonglong) info->state->records); + + if (!sinfo->buffpek.elements) { if (param->testflag & T_VERBOSE) |