summaryrefslogtreecommitdiff
path: root/myisam
diff options
context:
space:
mode:
authorunknown <sergefp@mysql.com>2005-10-23 02:49:57 +0400
committerunknown <sergefp@mysql.com>2005-10-23 02:49:57 +0400
commit3c02a0534d4b5450b2a5126bbf84428595c4a803 (patch)
tree496319ac159d34aa63a9666078ddc28558ca04c3 /myisam
parent1c9783e854965376844a021ab1312b373852f287 (diff)
parent02dc7bfb1a52c21621356cc0799f3f896a2e2fea (diff)
downloadmariadb-git-3c02a0534d4b5450b2a5126bbf84428595c4a803.tar.gz
Merge spetrunia@bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/home/psergey/mysql-4.1-nulls-stats-r2 sql/mysqld.cc: Auto merged
Diffstat (limited to 'myisam')
-rw-r--r--myisam/mi_check.c200
-rw-r--r--myisam/myisamchk.c24
-rw-r--r--myisam/myisamdef.h7
-rw-r--r--myisam/sort.c8
4 files changed, 211 insertions, 28 deletions
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index 7397ee4e204..75f25f1361a 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
found_keys++;
param->record_checksum=init_checksum;
+
bzero((char*) &param->unique_count,sizeof(param->unique_count));
+ bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
+
if ((!(param->testflag & T_SILENT)))
printf ("- check data record references index: %d\n",key+1);
if (keyinfo->flag & HA_FULLTEXT)
@@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
if (param->testflag & T_STATISTICS)
update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
- (ulonglong) info->state->records);
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ param->notnull_count: NULL,
+ (ulonglong)info->state->records);
}
if (param->testflag & T_INFO)
{
@@ -552,6 +557,96 @@ err:
return 1;
}
+
+/*
+ "Ignore NULLs" statistics collection method: process first index tuple.
+
+ SYNOPSIS
+ mi_collect_stats_nonulls_first()
+ keyseg IN Array of key part descriptions
+ notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
+ tuples that don't contain NULLs)
+ key IN Key values tuple
+
+ DESCRIPTION
+ Process the first index tuple - find out which prefix tuples don't
+ contain NULLs, and update the array of notnull counters accordingly.
+*/
+
+static
+void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
+ uchar *key)
+{
+ uint first_null, kp;
+ first_null= ha_find_null(keyseg, key) - keyseg;
+ /*
+ All prefix tuples that don't include keypart_{first_null} are not-null
+ tuples (and all others aren't), increment counters for them.
+ */
+ for (kp= 0; kp < first_null; kp++)
+ notnull[kp]++;
+}
+
+
+/*
+ "Ignore NULLs" statistics collection method: process next index tuple.
+
+ SYNOPSIS
+ mi_collect_stats_nonulls_next()
+ keyseg IN Array of key part descriptions
+ notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
+ tuples that don't contain NULLs)
+ prev_key IN Previous key values tuple
+ last_key IN Next key values tuple
+
+ DESCRIPTION
+ Process the next index tuple:
+ 1. Find out which prefix tuples of last_key don't contain NULLs, and
+ update the array of notnull counters accordingly.
+ 2. Find the first keypart number where the prev_key and last_key tuples
+ are different(A), or last_key has NULL value(B), and return it, so the
+ caller can count number of unique tuples for each key prefix. We don't
+ need (B) to be counted, and that is compensated back in
+ update_key_parts().
+
+ RETURN
+ 1 + number of first keypart where values differ or last_key tuple has NULL
+*/
+
+static
+int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
+ uchar *prev_key, uchar *last_key)
+{
+ uint diffs[2];
+ uint first_null_seg, kp;
+
+ /*
+ Find the first keypart where values are different or either of them is
+ NULL. We get results in diffs array:
+ diffs[0]= 1 + number of first different keypart
+ diffs[1]=offset: (last_key + diffs[1]) points to first value in
+ last_key that is NULL or different from corresponding
+ value in prev_key.
+ */
+ ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
+ SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS,
+ diffs);
+ HA_KEYSEG *seg= keyseg + diffs[0] - 1;
+
+ /* Find first NULL in last_key */
+ first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
+ for (kp= 0; kp < first_null_seg; kp++)
+ notnull[kp]++;
+
+ /*
+ Return 1+ number of first key part where values differ. Don't care if
+ these were NULLs and not .... We compensate for that in
+ update_key_parts.
+ */
+ return diffs[0];
+}
+
+
/* Check if index is ok */
static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
@@ -641,8 +736,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
&diff_pos);
+ else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ {
+ diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg,
+ param->notnull_count,
+ info->lastkey, key);
+ }
param->unique_count[diff_pos-1]++;
}
+ else
+ {
+ if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
+ key);
+ }
}
(*key_checksum)+= mi_byte_checksum((byte*) key,
key_length- info->s->rec_reflength);
@@ -2088,7 +2195,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
if (param->testflag & T_STATISTICS)
update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
- (ulonglong) info->state->records);
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ sort_param.notnull: NULL,(ulonglong) info->state->records);
share->state.key_map|=(ulonglong) 1 << sort_param.key;
if (sort_param.fix_datafile)
@@ -3255,11 +3363,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos);
+ else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ {
+ diff_pos= mi_collect_stats_nonulls_next(sort_param->seg,
+ sort_param->notnull,
+ sort_info->key_block->lastkey,
+ (uchar*)a);
+ }
sort_param->unique[diff_pos-1]++;
}
else
{
cmp= -1;
+ if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
+ (uchar*)a);
}
if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
{
@@ -3978,24 +4096,34 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
/*
Update statistics for each part of an index
-
+
SYNOPSIS
update_key_parts()
- keyinfo Index information (only key->keysegs used)
+ keyinfo IN Index information (only key->keysegs used)
rec_per_key_part OUT Store statistics here
- unique IN Array of #distinct values collected over index
- run.
+ unique IN Array of (#distinct tuples)
+ notnull_tuples IN Array of (#tuples), or NULL
records Number of records in the table
-
- NOTES
+
+ DESCRIPTION
+ This function is called produce index statistics values from unique and
+ notnull_tuples arrays after these arrays were produced with sequential
+ index scan (the scan is done in two places: chk_index() and
+ sort_key_write()).
+
+ This function handles all 3 index statistics collection methods.
+
Unique is an array:
- unique[0]= (#different values of {keypart1}) - 1
- unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1
- ...
- The 'unique' array is collected in one sequential scan through the entire
- index. This is done in two places: in chk_index() and in sort_key_write().
- Statistics collection may consider NULLs as either equal or unequal (see
- SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*).
+ unique[0]= (#different values of {keypart1}) - 1
+ unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
+ ...
+
+ For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
+ notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
+ notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
+ keypart{i} are not NULL)
+ ...
+ For all other statistics collection methods notnull_tuples==NULL.
Output is an array:
rec_per_key_part[k] =
@@ -4007,25 +4135,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
index tuples}
= #tuples-in-the-index / #distinct-tuples-in-the-index.
+
+ The #tuples-in-the-index and #distinct-tuples-in-the-index have different
+ meaning depending on which statistics collection method is used:
+
+ MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
+ NULLS_EQUAL NULL == NULL all tuples in table
+ NULLS_NOT_EQUAL NULL != NULL all tuples in table
+ IGNORE_NULLS n/a tuples that don't have NULLs
*/
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
- ulonglong *unique, ulonglong records)
+ ulonglong *unique, ulonglong *notnull,
+ ulonglong records)
{
- ulonglong count=0,tmp;
+ ulonglong count=0,tmp, unique_tuples;
+ ulonglong tuples= records;
uint parts;
for (parts=0 ; parts < keyinfo->keysegs ; parts++)
{
count+=unique[parts];
- if (count == 0)
- tmp=records;
+ unique_tuples= count + 1;
+ if (notnull)
+ {
+ tuples= notnull[parts];
+ /*
+ #(unique_tuples not counting tuples with NULLs) =
+ #(unique_tuples counting tuples with NULLs as different) -
+ #(tuples with NULLs)
+ */
+ unique_tuples -= (records - notnull[parts]);
+ }
+
+ if (unique_tuples == 0)
+ tmp= 1;
+ else if (count == 0)
+ tmp= tuples; /* 1 unique tuple */
else
- tmp= (records + (count+1)/2) / (count+1);
- /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
- let's ensure it is not */
+ tmp= (tuples + unique_tuples/2) / unique_tuples;
+
+ /*
+ for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
+ let's ensure it is not
+ */
set_if_bigger(tmp,1);
if (tmp >= (ulonglong) ~(ulong) 0)
tmp=(ulonglong) ~(ulong) 0;
+
*rec_per_key_part=(ulong) tmp;
rec_per_key_part++;
}
diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c
index 2dd05cf7e67..d68ea410852 100644
--- a/myisam/myisamchk.c
+++ b/myisam/myisamchk.c
@@ -339,7 +339,8 @@ static struct my_option my_long_options[] =
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"stats_method", OPT_STATS_METHOD,
"Specifies how index statistics collection code should threat NULLs. "
- "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).",
+ "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
+ "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
(gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
@@ -451,6 +452,10 @@ static void usage(void)
-a, --analyze Analyze distribution of keys. Will make some joins in\n\
MySQL faster. You can check the calculated distribution\n\
by using '--description --verbose table_name'.\n\
+ --stats_method=name Specifies how index statistics collection code should\n\
+ threat NULLs. Possible values of name are \"nulls_unequal\"\n\
+ (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\
+ \"nulls_ignored\".\n\
-d, --description Prints some information about table.\n\
-A, --set-auto-increment[=value]\n\
Force auto_increment to start at this or higher value\n\
@@ -472,7 +477,7 @@ static void usage(void)
#include <help_end.h>
const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal",
- NullS};
+ "nulls_ignored", NullS};
TYPELIB myisam_stats_method_typelib= {
array_elements(myisam_stats_method_names) - 1, "",
myisam_stats_method_names, NULL};
@@ -699,14 +704,25 @@ get_one_option(int optid,
case OPT_STATS_METHOD:
{
int method;
+ enum_mi_stats_method method_conv;
myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{
fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
exit(1);
}
- check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL :
- MI_STATS_METHOD_NULLS_NOT_EQUAL;
+ switch (method-1) {
+ case 0:
+ method_conv= MI_STATS_METHOD_NULLS_EQUAL;
+ break;
+ case 1:
+ method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+ break;
+ case 2:
+ method_conv= MI_STATS_METHOD_IGNORE_NULLS;
+ break;
+ }
+ check_param.stats_method= method_conv;
break;
}
#ifdef DEBUG /* Only useful if debugging */
diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h
index 93a7bf96f59..a766d59d72a 100644
--- a/myisam/myisamdef.h
+++ b/myisam/myisamdef.h
@@ -297,7 +297,14 @@ typedef struct st_mi_sort_param
pthread_t thr;
IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
DYNAMIC_ARRAY buffpek;
+
+ /*
+ The next two are used to collect statistics, see update_key_parts for
+ description.
+ */
ulonglong unique[MI_MAX_KEY_SEG+1];
+ ulonglong notnull[MI_MAX_KEY_SEG+1];
+
my_off_t pos,max_pos,filepos,start_recpos;
uint key, key_length,real_key_length,sortbuff_size;
uint maxbuffers, keys, find_length, sort_keys_length;
diff --git a/myisam/sort.c b/myisam/sort.c
index e8cd9938e42..96b55d599c8 100644
--- a/myisam/sort.c
+++ b/myisam/sort.c
@@ -481,8 +481,12 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
{
share->state.key_map|=(ulonglong) 1 << sinfo->key;
if (param->testflag & T_STATISTICS)
- update_key_parts(sinfo->keyinfo, rec_per_key_part,
- sinfo->unique, (ulonglong) info->state->records);
+ update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ sinfo->notnull: NULL,
+ (ulonglong) info->state->records);
+
+
if (!sinfo->buffpek.elements)
{
if (param->testflag & T_VERBOSE)