8 files changed, 243 insertions, 57 deletions
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index 7397ee4e204..8783a5fef54 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
     found_keys++;
 
     param->record_checksum=init_checksum;
+    
     bzero((char*) &param->unique_count,sizeof(param->unique_count));
+    bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
+
     if ((!(param->testflag & T_SILENT)))
       printf ("- check data record references index: %d\n",key+1);
     if (keyinfo->flag & HA_FULLTEXT)
@@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
 
     if (param->testflag & T_STATISTICS)
       update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
-		       (ulonglong) info->state->records);
+                       param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+                       param->notnull_count: NULL, 
+                       (ulonglong)info->state->records);
   }
   if (param->testflag & T_INFO)
   {
@@ -552,6 +557,96 @@ err:
   return 1;
 }
 
+
+/*
+  "Ignore NULLs" statistics collection method: process first index tuple.
+
+  SYNOPSIS
+    mi_collect_stats_nonulls_first()
+      keyseg   IN     Array of key part descriptions
+      notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
+                                           tuples that don't contain NULLs)
+      key      IN     Key values tuple
+
+  DESCRIPTION
+    Process the first index tuple - find out which prefix tuples don't
+    contain NULLs, and update the array of notnull counters accordingly.
+*/
+
+static
+void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
+                                    uchar *key)
+{
+  uint first_null, kp;
+  first_null= ha_find_null(keyseg, key) - keyseg;
+  /*
+    All prefix tuples that don't include keypart_{first_null} are not-null
+    tuples (and all others aren't), increment counters for them.
+  */
+  for (kp= 0; kp < first_null; kp++)
+    notnull[kp]++;
+}
+
+
+/*
+  "Ignore NULLs" statistics collection method: process next index tuple.
+
+  SYNOPSIS
+    mi_collect_stats_nonulls_next()
+      keyseg   IN     Array of key part descriptions
+      notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
+                                           tuples that don't contain NULLs)
+      prev_key IN     Previous key values tuple
+      last_key IN     Next key values tuple
+
+  DESCRIPTION
+    Process the next index tuple:
+    1. Find out which prefix tuples of last_key don't contain NULLs, and
+       update the array of notnull counters accordingly.
+    2. Find the first keypart number where the prev_key and last_key tuples
+       are different(A), or last_key has NULL value(B), and return it, so the 
+       caller can count number of unique tuples for each key prefix. We don't 
+       need (B) to be counted, and that is compensated back in 
+       update_key_parts().
+
+  RETURN
+    1 + number of first keypart where values differ or last_key tuple has NULL
+*/
+
+static
+int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
+                                  uchar *prev_key, uchar *last_key)
+{
+  uint diffs[2];
+  uint first_null_seg, kp;
+  HA_KEYSEG *seg;
+
+  /* 
+     Find the first keypart where values are different or either of them is
+     NULL. We get results in diffs array:
+     diffs[0]= 1 + number of first different keypart
+     diffs[1]=offset: (last_key + diffs[1]) points to first value in
+                      last_key that is NULL or different from corresponding
+                      value in prev_key.
+  */
+  ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY, 
+             SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
+  seg= keyseg + diffs[0] - 1;
+
+  /* Find first NULL in last_key */
+  first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
+  for (kp= 0; kp < first_null_seg; kp++)
+    notnull[kp]++;
+
+  /* 
+    Return 1+ number of first key part where values differ. Don't care if
+    these were NULLs and not .... We compensate for that in
+    update_key_parts.
+  */
+  return diffs[0];
+}
+
+
 	/* Check if index is ok */
 
 static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
@@ -563,7 +658,7 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
   uchar key[MI_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
   my_off_t next_page,record;
   char llbuff[22];
-  uint diff_pos;
+  uint diff_pos[2];
   DBUG_ENTER("chk_index");
   DBUG_DUMP("buff",(byte*) buff,mi_getint(buff));
 
@@ -621,7 +716,7 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
     }
     if ((*keys)++ &&
 	(flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length,
-			 comp_flag, &diff_pos)) >=0)
+			 comp_flag, diff_pos)) >=0)
     {
       DBUG_DUMP("old",(byte*) info->lastkey, info->lastkey_length);
       DBUG_DUMP("new",(byte*) key, key_length);
@@ -640,8 +735,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
         if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
           ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
                      SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
-                     &diff_pos);
-	param->unique_count[diff_pos-1]++;
+                     diff_pos);
+        else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+        {
+          diff_pos[0]= mi_collect_stats_nonulls_next(keyinfo->seg, 
+                                                  param->notnull_count,
+                                                  info->lastkey, key);
+        }
+	param->unique_count[diff_pos[0]-1]++;
+      }
+      else
+      {  
+        if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+          mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
+                                         key);
       }
     }
     (*key_checksum)+= mi_byte_checksum((byte*) key,
@@ -1731,9 +1838,10 @@ static int sort_one_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
 	_mi_kpointer(info,keypos-nod_flag,param->new_file_pos); /* Save new pos */
 	if (sort_one_index(param,info,keyinfo,next_page,new_file))
 	{
-	  DBUG_PRINT("error",("From page: %ld, keyoffset: %d  used_length: %d",
-			      (ulong) pagepos, (int) (keypos - buff),
-			      (int) used_length));
+	  DBUG_PRINT("error",
+		     ("From page: %ld, keyoffset: 0x%lx  used_length: %d",
+		      (ulong) pagepos, (ulong) (keypos - buff),
+		      (int) used_length));
 	  DBUG_DUMP("buff",(byte*) buff,used_length);
 	  goto err;
 	}
@@ -2088,7 +2196,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
 
     if (param->testflag & T_STATISTICS)
       update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
-		       (ulonglong) info->state->records);
+                       param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+                       sort_param.notnull: NULL,(ulonglong) info->state->records);
     share->state.key_map|=(ulonglong) 1 << sort_param.key;
 
     if (sort_param.fix_datafile)
@@ -3232,15 +3341,15 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
 static int sort_key_cmp(MI_SORT_PARAM *sort_param, const void *a,
 			const void *b)
 {
-  uint not_used;
+  uint not_used[2];
   return (ha_key_cmp(sort_param->seg, *((uchar**) a), *((uchar**) b),
-		     USE_WHOLE_KEY, SEARCH_SAME,&not_used));
+		     USE_WHOLE_KEY, SEARCH_SAME, not_used));
 } /* sort_key_cmp */
 
 
 static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
 {
-  uint diff_pos;
+  uint diff_pos[2];
   char llbuff[22],llbuff2[22];
   SORT_INFO *sort_info=sort_param->sort_info;
   MI_CHECK *param= sort_info->param;
@@ -3250,16 +3359,26 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
   {
     cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
 		   (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
-		   &diff_pos);
+		   diff_pos);
     if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
       ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
                  (uchar*) a, USE_WHOLE_KEY, 
-                 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos);
-    sort_param->unique[diff_pos-1]++;
+                 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
+    else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+    {
+      diff_pos[0]= mi_collect_stats_nonulls_next(sort_param->seg,
+                                                 sort_param->notnull,
+                                                 sort_info->key_block->lastkey,
+                                                 (uchar*)a);
+    }
+    sort_param->unique[diff_pos[0]-1]++;
   }
   else
   {
     cmp= -1;
+    if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+      mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
+                                     (uchar*)a);
   }
   if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
   {
@@ -3978,24 +4097,34 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
 
 /*
   Update statistics for each part of an index
-  
+
   SYNOPSIS
     update_key_parts()
-      keyinfo               Index information (only key->keysegs used)
+      keyinfo           IN  Index information (only key->keysegs used)
       rec_per_key_part  OUT Store statistics here
-      unique            IN  Array of #distinct values collected over index
-                            run.
+      unique            IN  Array of (#distinct tuples)
+      notnull_tuples    IN  Array of (#tuples), or NULL
       records               Number of records in the table
-      
-  NOTES
+
+  DESCRIPTION
+    This function is called produce index statistics values from unique and 
+    notnull_tuples arrays after these arrays were produced with sequential
+    index scan (the scan is done in two places: chk_index() and
+    sort_key_write()).
+
+    This function handles all 3 index statistics collection methods.
+
     Unique is an array:
-    unique[0]= (#different values of {keypart1}) - 1
-    unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1
-    ...
-    The 'unique' array is collected in one sequential scan through the entire
-    index. This is done in two places: in chk_index() and in sort_key_write().
-    Statistics collection may consider NULLs as either equal or unequal (see
-    SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*).
+      unique[0]= (#different values of {keypart1}) - 1
+      unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
+      ...
+
+    For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
+      notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
+      notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all 
+                          keypart{i} are not NULL)
+      ...
+    For all other statistics collection methods notnull_tuples==NULL.
 
     Output is an array:
     rec_per_key_part[k] = 
@@ -4007,25 +4136,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
         index tuples}
      
      = #tuples-in-the-index / #distinct-tuples-in-the-index.
+    
+    The #tuples-in-the-index and #distinct-tuples-in-the-index have different 
+    meaning depending on which statistics collection method is used:
+    
+    MI_STATS_METHOD_*  how are nulls compared?  which tuples are counted?
+     NULLS_EQUAL            NULL == NULL           all tuples in table
+     NULLS_NOT_EQUAL        NULL != NULL           all tuples in table
+     IGNORE_NULLS               n/a             tuples that don't have NULLs
 */
 
 void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
-			     ulonglong *unique, ulonglong records)
+                      ulonglong *unique, ulonglong *notnull,
+                      ulonglong records)
 {
-  ulonglong count=0,tmp;
+  ulonglong count=0,tmp, unique_tuples;
+  ulonglong tuples= records;
   uint parts;
   for (parts=0 ; parts < keyinfo->keysegs  ; parts++)
   {
     count+=unique[parts];
-    if (count == 0)
-      tmp=records;
+    unique_tuples= count + 1;    
+    if (notnull)
+    {
+      tuples= notnull[parts];
+      /* 
+        #(unique_tuples not counting tuples with NULLs) = 
+          #(unique_tuples counting tuples with NULLs as different) - 
+          #(tuples with NULLs)
+      */
+      unique_tuples -= (records - notnull[parts]);
+    }
+    
+    if (unique_tuples == 0)
+      tmp= 1;
+    else if (count == 0)
+      tmp= tuples; /* 1 unique tuple */
     else
-      tmp= (records + (count+1)/2) / (count+1);
-    /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
-       let's ensure it is not */
+      tmp= (tuples + unique_tuples/2) / unique_tuples;
+
+    /* 
+      for some weird keys (e.g. FULLTEXT) tmp can be <1 here. 
+      let's ensure it is not
+    */
     set_if_bigger(tmp,1);
     if (tmp >= (ulonglong) ~(ulong) 0)
       tmp=(ulonglong) ~(ulong) 0;
+
     *rec_per_key_part=(ulong) tmp;
     rec_per_key_part++;
   }
diff --git a/myisam/mi_delete.c b/myisam/mi_delete.c
index b964cb35dd8..833223221ec 100644
--- a/myisam/mi_delete.c
+++ b/myisam/mi_delete.c
@@ -390,7 +390,8 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key,
   MYISAM_SHARE *share=info->s;
   MI_KEY_PARAM s_temp;
   DBUG_ENTER("del");
-  DBUG_PRINT("enter",("leaf_page: %ld   keypos: %lx",leaf_page,keypos));
+  DBUG_PRINT("enter",("leaf_page: %ld  keypos: 0x%lx", leaf_page,
+		      (ulong) keypos));
   DBUG_DUMP("leaf_buff",(byte*) leaf_buff,mi_getint(leaf_buff));
 
   endpos=leaf_buff+mi_getint(leaf_buff);
@@ -495,7 +496,8 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo,
   MI_KEY_PARAM s_temp;
   MYISAM_SHARE *share=info->s;
   DBUG_ENTER("underflow");
-  DBUG_PRINT("enter",("leaf_page: %ld   keypos: %lx",(long) leaf_page,keypos));
+  DBUG_PRINT("enter",("leaf_page: %ld  keypos: 0x%lx",(long) leaf_page,
+		      (ulong) keypos));
   DBUG_DUMP("anc_buff",(byte*) anc_buff,mi_getint(anc_buff));
   DBUG_DUMP("leaf_buff",(byte*) leaf_buff,mi_getint(leaf_buff));
 
diff --git a/myisam/mi_rnext_same.c b/myisam/mi_rnext_same.c
index 4d770258a72..92692d0517f 100644
--- a/myisam/mi_rnext_same.c
+++ b/myisam/mi_rnext_same.c
@@ -28,7 +28,7 @@
 int mi_rnext_same(MI_INFO *info, byte *buf)
 {
   int error;
-  uint inx,not_used;
+  uint inx,not_used[2];
   MI_KEYDEF *keyinfo;
   DBUG_ENTER("mi_rnext_same");
 
@@ -69,7 +69,7 @@ int mi_rnext_same(MI_INFO *info, byte *buf)
 			       info->s->state.key_root[inx])))
           break;
         if (ha_key_cmp(keyinfo->seg, info->lastkey, info->lastkey2,
-                       info->last_rkey_length, SEARCH_FIND, &not_used))
+                       info->last_rkey_length, SEARCH_FIND, not_used))
         {
           error=1;
           my_errno=HA_ERR_END_OF_FILE;
diff --git a/myisam/mi_search.c b/myisam/mi_search.c
index 4ea2480889e..9321f5b87d5 100644
--- a/myisam/mi_search.c
+++ b/myisam/mi_search.c
@@ -128,13 +128,13 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
 
   if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0)
   {
-    uint not_used;
+    uint not_used[2];
     if (_mi_get_prev_key(info,keyinfo, buff, info->lastkey, keypos,
                          &info->lastkey_length))
       goto err;
     if (!(nextflag & SEARCH_SMALLER) &&
         ha_key_cmp(keyinfo->seg, info->lastkey, key, key_len, SEARCH_FIND,
-                    &not_used))
+                   not_used))
     {
       my_errno=HA_ERR_KEY_NOT_FOUND;                    /* Didn't find key */
       goto err;
@@ -178,7 +178,7 @@ int _mi_bin_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
 {
   reg4 int start,mid,end,save_end;
   int flag;
-  uint totlength,nod_flag,not_used;
+  uint totlength,nod_flag,not_used[2];
   DBUG_ENTER("_mi_bin_search");
 
   LINT_INIT(flag);
@@ -192,7 +192,7 @@ int _mi_bin_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
   {
     mid= (start+end)/2;
     if ((flag=ha_key_cmp(keyinfo->seg,page+(uint) mid*totlength,key,key_len,
-                          comp_flag,&not_used))
+                         comp_flag, not_used))
         >= 0)
       end=mid;
     else
@@ -200,7 +200,7 @@ int _mi_bin_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
   }
   if (mid != start)
     flag=ha_key_cmp(keyinfo->seg,page+(uint) start*totlength,key,key_len,
-                     comp_flag,&not_used);
+                     comp_flag, not_used);
   if (flag < 0)
     start++;                    /* point at next, bigger key */
   *ret_pos=page+(uint) start*totlength;
@@ -241,7 +241,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
                    uchar *buff, my_bool *last_key)
 {
   int flag;
-  uint nod_flag,length,not_used;
+  uint nod_flag,length,not_used[2];
   uchar t_buff[MI_MAX_KEY_BUFF],*end;
   DBUG_ENTER("_mi_seq_search");
 
@@ -262,7 +262,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
       DBUG_RETURN(MI_FOUND_WRONG_KEY);
     }
     if ((flag=ha_key_cmp(keyinfo->seg,t_buff,key,key_len,comp_flag,
-                          &not_used)) >= 0)
+                         not_used)) >= 0)
       break;
 #ifdef EXTRA_DEBUG
     DBUG_PRINT("loop",("page: %lx  key: '%s'  flag: %d", (long) page, t_buff,
@@ -503,9 +503,9 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
       cmp_rest:
 	  if (key_len_left>0)
 	  {
-	    uint not_used;
+	    uint not_used[2];
 	    if ((flag = ha_key_cmp(keyinfo->seg+1,vseg,
-				   k,key_len_left,nextflag,&not_used)) >= 0)
+				   k, key_len_left, nextflag, not_used)) >= 0)
 	      break;
 	  }
 	  else
diff --git a/myisam/mi_write.c b/myisam/mi_write.c
index cd9e73fba22..52455320515 100644
--- a/myisam/mi_write.c
+++ b/myisam/mi_write.c
@@ -885,10 +885,10 @@ int _mi_ck_write_tree(register MI_INFO *info, uint keynr, uchar *key,
 
 static int keys_compare(bulk_insert_param *param, uchar *key1, uchar *key2)
 {
-  uint not_used;
+  uint not_used[2];
   return ha_key_cmp(param->info->s->keyinfo[param->keynr].seg,
-		     key1, key2, USE_WHOLE_KEY, SEARCH_SAME,
-		     &not_used);
+                    key1, key2, USE_WHOLE_KEY, SEARCH_SAME,
+                    not_used);
 }
 
 
diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c
index 2dd05cf7e67..d68ea410852 100644
--- a/myisam/myisamchk.c
+++ b/myisam/myisamchk.c
@@ -339,7 +339,8 @@ static struct my_option my_long_options[] =
     REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   {"stats_method", OPT_STATS_METHOD,
    "Specifies how index statistics collection code should threat NULLs. "
-   "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).",
+   "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
+   "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
    (gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
     GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
@@ -451,6 +452,10 @@ static void usage(void)
   -a, --analyze	      Analyze distribution of keys. Will make some joins in\n\
 		      MySQL faster.  You can check the calculated distribution\n\
 		      by using '--description --verbose table_name'.\n\
+  --stats_method=name Specifies how index statistics collection code should\n\
+                      threat NULLs. Possible values of name are \"nulls_unequal\"\n\
+                      (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\
+                      \"nulls_ignored\".\n\
   -d, --description   Prints some information about table.\n\
   -A, --set-auto-increment[=value]\n\
 		      Force auto_increment to start at this or higher value\n\
@@ -472,7 +477,7 @@ static void usage(void)
 #include <help_end.h>
 
 const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal",
-                                           NullS};
+                                           "nulls_ignored", NullS};
 TYPELIB myisam_stats_method_typelib= {
   array_elements(myisam_stats_method_names) - 1, "",
   myisam_stats_method_names, NULL};
@@ -699,14 +704,25 @@ get_one_option(int optid,
   case OPT_STATS_METHOD:
   {
     int method;
+    enum_mi_stats_method method_conv;
     myisam_stats_method_str= argument;
     if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
     {
       fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
       exit(1);
     }
-    check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL :
-                                              MI_STATS_METHOD_NULLS_NOT_EQUAL;
+    switch (method-1) {
+    case 0: 
+      method_conv= MI_STATS_METHOD_NULLS_EQUAL;
+      break;
+    case 1:
+      method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+      break;
+    case 2:
+      method_conv= MI_STATS_METHOD_IGNORE_NULLS;
+      break;
+    }
+    check_param.stats_method= method_conv;
     break;
   }
 #ifdef DEBUG					/* Only useful if debugging */
diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h
index 93a7bf96f59..a766d59d72a 100644
--- a/myisam/myisamdef.h
+++ b/myisam/myisamdef.h
@@ -297,7 +297,14 @@ typedef struct st_mi_sort_param
   pthread_t  thr;
   IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
   DYNAMIC_ARRAY buffpek;
+  
+  /* 
+    The next two are used to collect statistics, see update_key_parts for
+    description.
+  */
   ulonglong unique[MI_MAX_KEY_SEG+1];
+  ulonglong notnull[MI_MAX_KEY_SEG+1];
+
   my_off_t pos,max_pos,filepos,start_recpos;
   uint key, key_length,real_key_length,sortbuff_size;
   uint maxbuffers, keys, find_length, sort_keys_length;
diff --git a/myisam/sort.c b/myisam/sort.c
index e8cd9938e42..96b55d599c8 100644
--- a/myisam/sort.c
+++ b/myisam/sort.c
@@ -481,8 +481,12 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
     {
       share->state.key_map|=(ulonglong) 1 << sinfo->key;
       if (param->testflag & T_STATISTICS)
-        update_key_parts(sinfo->keyinfo, rec_per_key_part,
-                         sinfo->unique, (ulonglong) info->state->records);
+        update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
+                         param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+                         sinfo->notnull: NULL,
+                         (ulonglong) info->state->records);
+
+
       if (!sinfo->buffpek.elements)
       {
         if (param->testflag & T_VERBOSE)