BUG#9622, stage 2, work together with fix for BUG#12232:

added "nulls_ignored" index statistics collection method for MyISAM tables. (notification trigger: this is about BUG#9622).
author: sergefp@mysql.com <> 2005-10-21 06:29:17 +0400
committer: sergefp@mysql.com <> 2005-10-21 06:29:17 +0400
commit: 15a78334c32782327580933668f8dbeafb4ee2e3 (patch)
tree: 2ec77c30e97d8e7c0b2634973992317e30dffe08 /myisam/mi_check.c
parent: da625424e05dcb648b6e3e60934677f42697209b (diff)
download: mariadb-git-15a78334c32782327580933668f8dbeafb4ee2e3.tar.gz
1 files changed, 161 insertions, 15 deletions
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index 7397ee4e204..1fd7f1b45f6 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
     found_keys++;
 
     param->record_checksum=init_checksum;
+    
     bzero((char*) &param->unique_count,sizeof(param->unique_count));
+    bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
+
     if ((!(param->testflag & T_SILENT)))
       printf ("- check data record references index: %d\n",key+1);
     if (keyinfo->flag & HA_FULLTEXT)
@@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
 
     if (param->testflag & T_STATISTICS)
       update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
-		       (ulonglong) info->state->records);
+                       param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+                       param->notnull_count: NULL, 
+                       (ulonglong)info->state->records);
   }
   if (param->testflag & T_INFO)
   {
@@ -552,6 +557,87 @@ err:
   return 1;
 }
 
+
+/*
+  "Ignore NULLs" statistics collection method: process first index tuple.
+
+  SYNOPSIS
+    mi_collect_stats_nonulls_first()
+      keyseg   IN     Array of key part descriptions
+      notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
+                                           tuples that don't contain NULLs)
+      key      IN     Key values tuple
+
+  DESCRIPTION
+    Process the first index tuple - find out which prefix tuples don't
+    contain NULLs, and update the array of notnull counters accordingly.
+*/
+
+static
+void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
+                                    uchar *key)
+{
+  uint first_null, kp;
+  first_null= ha_find_null(keyseg, key) - keyseg;
+  /*
+    All prefix tuples that don't include keypart_{first_null} are not-null
+    tuples (and all others aren't), increment counters for them.
+  */
+  for (kp= 0; kp < first_null; kp++)
+    notnull[kp]++;
+}
+
+
+/*
+  "Ignore NULLs" statistics collection method: process next index tuple.
+
+  SYNOPSIS
+    mi_collect_stats_nonulls_next()
+      keyseg   IN     Array of key part descriptions
+      notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
+                                           tuples that don't contain NULLs)
+      prev_key IN     Previous key values tuple
+      last_key IN     Next key values tuple
+
+  DESCRIPTION
+    Process the next index tuple:
+    1. Find out which prefix tuples of last_key don't contain NULLs, and
+       update the array of notnull counters accordingly.
+    2. Find the first keypart number where the tuples are different(A), or
+       last_key has NULL value (B), and return it, so caller can count
+       number of unique tuples for each key prefix. We don't need (B) to be
+       counted, and that is compensated back in update_key_parts().
+
+  RETURN
+    1 + number of first keypart where values differ or last_key tuple has NULL
+*/
+
+static
+int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
+                                  uchar *prev_key, uchar *last_key)
+{
+  uint diffs[2];
+  uint first_null_seg, kp;
+
+  /* Find first keypart where values are different or either of them is NULL */
+  ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY, 
+             SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS,
+             diffs);
+  HA_KEYSEG *seg= keyseg + diffs[0] - 1;
+  /* Find first NULL in last_key */
+  first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
+  for (kp= 0; kp < first_null_seg; kp++)
+    notnull[kp]++;
+
+  /* 
+    Return 1+ number of first key part where values differ. Don't care if
+    these were NULLs and not .... We compensate for that in
+    update_key_parts.
+  */
+  return diffs[0];
+}
+
+
 	/* Check if index is ok */
 
 static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
@@ -641,8 +727,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
           ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
                      SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
                      &diff_pos);
+        else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+        {
+          diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg, 
+                                                  param->notnull_count,
+                                                  info->lastkey, key);
+        }
 	param->unique_count[diff_pos-1]++;
       }
+      else
+      {  
+        if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+          mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
+                                         key);
+      }
     }
     (*key_checksum)+= mi_byte_checksum((byte*) key,
 				       key_length- info->s->rec_reflength);
@@ -2088,7 +2186,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
 
     if (param->testflag & T_STATISTICS)
       update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
-		       (ulonglong) info->state->records);
+                       param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+                       sort_param.notnull: NULL,(ulonglong) info->state->records);
     share->state.key_map|=(ulonglong) 1 << sort_param.key;
 
     if (sort_param.fix_datafile)
@@ -3255,11 +3354,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
       ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
                  (uchar*) a, USE_WHOLE_KEY, 
                  SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos);
+    else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+    {
+      diff_pos= mi_collect_stats_nonulls_next(sort_param->seg,
+                                              sort_param->notnull,
+                                              sort_info->key_block->lastkey,
+                                              (uchar*)a);
+    }
     sort_param->unique[diff_pos-1]++;
   }
   else
   {
     cmp= -1;
+    if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+      mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
+                                     (uchar*)a);
   }
   if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
   {
@@ -3981,21 +4090,30 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
   
   SYNOPSIS
     update_key_parts()
-      keyinfo               Index information (only key->keysegs used)
+      keyinfo           IN  Index information (only key->keysegs used)
       rec_per_key_part  OUT Store statistics here
-      unique            IN  Array of #distinct values collected over index
-                            run.
+      unique            IN  Array of (#distinct tuples)
+      notnull_tuples    IN  Array of (#tuples), or NULL
       records               Number of records in the table
       
   NOTES
+    This function handles all 3 index statistics collection methods.
+
     Unique is an array:
     unique[0]= (#different values of {keypart1}) - 1
-    unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1
+    unique[1]= (#different values of {keypart1,keypart2} tuple) - unique[0] - 1
     ...
+    
+    For MI_STATS_METHOD_IGNORE_NULLS notnull_tuples is an array too:
+    notnull_tuples[0]= (# of {keypart1} tuples such that keypart1 is not NULL)
+    notnull_tuples[1]= (# of {keypart1,keypart2} tuples such that all 
+                        keypart{i} are not NULL)
+    ...
+    For all other statistics collection methods notnull_tuples=NULL.
+      
     The 'unique' array is collected in one sequential scan through the entire
     index. This is done in two places: in chk_index() and in sort_key_write().
-    Statistics collection may consider NULLs as either equal or unequal (see
-    SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*).
+    notnull_tuples, if present, is collected during the same index scan.
 
     Output is an array:
     rec_per_key_part[k] = 
@@ -4007,25 +4125,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
         index tuples}
      
      = #tuples-in-the-index / #distinct-tuples-in-the-index.
+    
+    The #tuples-in-the-index and #distinct-tuples-in-the-index have different 
+    meaning depending on which statistics collection method is used:
+    
+    MI_STATS_METHOD_*  how are nulls compared?  which tuples are counted?
+     NULLS_EQUAL            NULL == NULL           all tuples in table
+     NULLS_NOT_EQUAL        NULL != NULL           all tuples in table
+     IGNORE_NULLS               n/a             tuples that don't have NULLs
 */
 
 void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
-			     ulonglong *unique, ulonglong records)
+                      ulonglong *unique, ulonglong *notnull,
+                      ulonglong records)
 {
-  ulonglong count=0,tmp;
+  ulonglong count=0,tmp, unique_tuples;
+  ulonglong tuples= records;
   uint parts;
   for (parts=0 ; parts < keyinfo->keysegs  ; parts++)
   {
     count+=unique[parts];
-    if (count == 0)
-      tmp=records;
+    unique_tuples= count + 1;    
+    if (notnull)
+    {
+      tuples= notnull[parts];
+      /* 
+        #(unique_tuples not counting tuples with NULLs) = 
+          #(unique_tuples counting tuples with NULLs as different) - 
+          #(tuples with NULLs)
+      */
+      unique_tuples -= (records - notnull[parts]);
+    }
+    
+    if (unique_tuples == 0)
+      tmp= 1;
+    else if (count == 0)
+      tmp= tuples; /* 1 unique tuple */
     else
-      tmp= (records + (count+1)/2) / (count+1);
-    /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
-       let's ensure it is not */
+      tmp= (tuples + unique_tuples/2) / unique_tuples;
+
+    /* 
+      for some weird keys (e.g. FULLTEXT) tmp can be <1 here. 
+      let's ensure it is not
+    */
     set_if_bigger(tmp,1);
     if (tmp >= (ulonglong) ~(ulong) 0)
       tmp=(ulonglong) ~(ulong) 0;
+
     *rec_per_key_part=(ulong) tmp;
     rec_per_key_part++;
   }
author	sergefp@mysql.com <>	2005-10-21 06:29:17 +0400
committer	sergefp@mysql.com <>	2005-10-21 06:29:17 +0400
commit	15a78334c32782327580933668f8dbeafb4ee2e3 (patch)
tree	2ec77c30e97d8e7c0b2634973992317e30dffe08 /myisam/mi_check.c
parent	da625424e05dcb648b6e3e60934677f42697209b (diff)
download	mariadb-git-15a78334c32782327580933668f8dbeafb4ee2e3.tar.gz