fixes/optimizations for count(distinct)

more extensive testing of count(distinct) sql/item_sum.cc: fixes/optimizations for count(distinct)
author: unknown <sasha@mysql.sashanet.com> 2001-05-12 17:52:54 -0600
committer: unknown <sasha@mysql.sashanet.com> 2001-05-12 17:52:54 -0600
commit: 64984296241873f90e39d744b59690c7b3b408bb (patch)
tree: 21b4d45558a445f285ffe318246b699a11303126
parent: 1d56c0324e21e0e1b92e46445a8896006cb9a8e8 (diff)
download: mariadb-git-64984296241873f90e39d744b59690c7b3b408bb.tar.gz
3 files changed, 130 insertions, 9 deletions
diff --git a/mysql-test/r/count_distinct2.result b/mysql-test/r/count_distinct2.result
new file mode 100644
index 00000000000..b2bcf11a37c
--- /dev/null
+++ b/mysql-test/r/count_distinct2.result
@@ -0,0 +1,72 @@
+n1
+1
+2
+NULL
+count(distinct n1)
+2
+n2
+11
+12
+13
+NULL
+count(distinct n2)
+3
+s
+one
+two
+NULL
+count(distinct s)
+2
+vs
+eleven
+twevle
+thirteen
+NULL
+count(distinct vs)
+3
+t
+eleven
+twelve
+foo
+bar
+NULL
+count(distinct t)
+4
+n1	n2
+1	11
+2	11
+2	12
+2	13
+NULL	13
+2	NULL
+count(distinct n1,n2)
+4
+n1	s
+1	one
+2	two
+NULL	two
+2	NULL
+count(distinct n1,s)
+2
+s	n1	vs
+one	1	eleven
+two	2	eleven
+two	2	twevle
+two	2	thirteen
+two	NULL	thirteen
+NULL	2	thirteen
+two	2	NULL
+count(distinct s,n1,vs)
+4
+s	t
+one	eleven
+two	eleven
+two	twelve
+two	foo
+two	bar
+NULL	bar
+two	NULL
+count(distinct s,t)
+5
+count(distinct n1)	count(distinct n2)
+2	3
diff --git a/mysql-test/t/count_distinct2.test b/mysql-test/t/count_distinct2.test
new file mode 100644
index 00000000000..cfdce77622c
--- /dev/null
+++ b/mysql-test/t/count_distinct2.test
@@ -0,0 +1,42 @@
+create table t1(n1 int, n2 int, s char(20), vs varchar(20), t text);
+insert into t1 values (1,11, 'one','eleven', 'eleven'),
+ (1,11, 'one','eleven', 'eleven'),
+ (2,11, 'two','eleven', 'eleven'),
+ (2,12, 'two','twevle', 'twelve'),
+ (2,13, 'two','thirteen', 'foo'),
+ (2,13, 'two','thirteen', 'foo'),
+ (2,13, 'two','thirteen', 'bar'),
+ (NULL,13, 'two','thirteen', 'bar'),
+ (2,NULL, 'two','thirteen', 'bar'),
+ (2,13, NULL,'thirteen', 'bar'),
+ (2,13, 'two',NULL, 'bar'),
+ (2,13, 'two','thirteen', NULL);
+
+select distinct n1 from t1;
+select count(distinct n1) from t1;
+
+select distinct n2 from t1;
+select count(distinct n2) from t1;
+
+select distinct s from t1;
+select count(distinct s) from t1;
+
+select distinct vs from t1;
+select count(distinct vs) from t1;
+
+select distinct t from t1;
+select count(distinct t) from t1;
+
+select distinct n1,n2 from t1;
+select count(distinct n1,n2) from t1;
+
+select distinct n1,s from t1;
+select count(distinct n1,s) from t1;
+
+select distinct s,n1,vs from t1;
+select count(distinct s,n1,vs) from t1;
+
+select distinct s,t from t1;
+select count(distinct s,t) from t1;
+
+select count(distinct n1), count(distinct n2) from t1;
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index da0bc71ca1f..c0fba1dd80d 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -810,12 +810,13 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
   for(; field < field_end; ++field)
     {
       int res;
-      int len = (*field)->field_length;
+      Field* f = *field;
+      int len = f->field_length;
       switch((*field)->type())
 	{
 	case FIELD_TYPE_STRING:
 	case FIELD_TYPE_VAR_STRING:
-	  res = my_sortcmp(key1, key2, len);
+	  res = f->key_cmp(key1, key2);
 	  break;
 	default:
 	  res = memcmp(key1, key2, len);
@@ -879,20 +880,22 @@ bool Item_sum_count_distinct::setup(THD *thd)
 	// to use a simpler key compare method that can take advantage
 	// of not having to worry about other fields
 	{
-	  switch(table->field[0]->type())
+	  Field* field = table->field[0];
+	  switch(field->type())
 	    {
 	      // if we have a string, we must take care of charsets
 	      // and case sensitivity
 	    case FIELD_TYPE_STRING:
 	    case FIELD_TYPE_VAR_STRING:
-	      compare_key = (qsort_cmp2)simple_str_key_cmp;
+	      compare_key = (qsort_cmp2)(field->binary() ? simple_raw_key_cmp:
+					 simple_str_key_cmp);
 	      break;
 	    default: // since at this point we cannot have blobs
 	      // anything else can be compared with memcmp
 	      compare_key = (qsort_cmp2)simple_raw_key_cmp;
 	      break;
 	    }
-	  cmp_arg = (void*)(key_len = table->field[0]->field_length);
+	  cmp_arg = (void*)(key_len = field->field_length);
 	  rec_offset = 1;
 	}
       else // too bad, cannot cheat - there is more than one field
@@ -908,7 +911,8 @@ bool Item_sum_count_distinct::setup(THD *thd)
 	  rec_offset = table->reclength - key_len;
 	}
 
-      init_tree(&tree, 0, key_len, compare_key, 0, 0);
+      init_tree(&tree, min(max_heap_table_size, sortbuff_size/16),
+		key_len, compare_key, 0, 0);
       tree.cmp_arg = cmp_arg;
       use_tree = 1;
     }
@@ -919,11 +923,14 @@ bool Item_sum_count_distinct::setup(THD *thd)
 
 void Item_sum_count_distinct::reset()
 {
-  table->file->extra(HA_EXTRA_NO_CACHE);
-  table->file->delete_all_rows();
-  table->file->extra(HA_EXTRA_WRITE_CACHE);
   if(use_tree)
     delete_tree(&tree);
+  else
+    {
+      table->file->extra(HA_EXTRA_NO_CACHE);
+      table->file->delete_all_rows();
+      table->file->extra(HA_EXTRA_WRITE_CACHE);
+    }
   (void) add();
 }
author	unknown <sasha@mysql.sashanet.com>	2001-05-12 17:52:54 -0600
committer	unknown <sasha@mysql.sashanet.com>	2001-05-12 17:52:54 -0600
commit	64984296241873f90e39d744b59690c7b3b408bb (patch)
tree	21b4d45558a445f285ffe318246b699a11303126
parent	1d56c0324e21e0e1b92e46445a8896006cb9a8e8 (diff)
download	mariadb-git-64984296241873f90e39d744b59690c7b3b408bb.tar.gz