summaryrefslogtreecommitdiff
path: root/sql/uniques.cc
diff options
context:
space:
mode:
authorunknown <konstantin@oak.local>2003-12-19 19:04:03 +0300
committerunknown <konstantin@oak.local>2003-12-19 19:04:03 +0300
commit58a52a2a84eafe535317a533af56a81d81a8fd56 (patch)
tree951b1a6033c70ae5444a97eb91abce40c599a567 /sql/uniques.cc
parent844d9b766a70f840641ac0da501c7aa46a586135 (diff)
downloadmariadb-git-58a52a2a84eafe535317a533af56a81d81a8fd56.tar.gz
Implementation of SUM(DISTINCT), tests cases
sql/filesort.cc: Snippet of filesort() code moved to function reuse_freed_buff() - change buffpek pointers to use buff from freed BUFFPEK Used in filesort() and merge_walk(). sql/item_sum.cc: Implementation of Item_sum_sum_distinct - SUM(DISTINCT) item, which uses Unique to resolve duplicates sql/item_sum.h: New sum Item added - Item_sum_sum_distinct - for SUM(DISTINCT) function sql/sql_class.h: added walk() and reset() methods to Unique, used in Item_sum_sum_distinct. sql/sql_sort.h: declaration for reuse_freed_buff() to be able to use it in uniques.cc sql/sql_yacc.yy: parser extended to handle MIN(DISTICNT), MAX(DISTINCT), SUM(DISTINCT) sql/uniques.cc: Implementation for Unique::reset(), Unique::walk() as well as for merge_walk() algorithm.
Diffstat (limited to 'sql/uniques.cc')
-rw-r--r--sql/uniques.cc234
1 files changed, 233 insertions, 1 deletions
diff --git a/sql/uniques.cc b/sql/uniques.cc
index 02146426782..7f8793abc91 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -84,6 +84,7 @@ bool Unique::flush()
elements+= tree.elements_in_tree;
file_ptr.count=tree.elements_in_tree;
file_ptr.file_pos=my_b_tell(&file);
+
if (tree_walk(&tree, (tree_walk_action) unique_write_to_file,
(void*) this, left_root_right) ||
insert_dynamic(&file_ptrs, (gptr) &file_ptr))
@@ -94,6 +95,237 @@ bool Unique::flush()
/*
+ Clear the tree and the file.
+ You must call reset() if you want to reuse Unique after walk().
+*/
+
+void
+Unique::reset()
+{
+ reset_tree(&tree);
+ /*
+ If elements != 0, some trees were stored in the file (see how
+ flush() works). Note, that we can not count on my_b_tell(&file) == 0
+ here, because it can return 0 right after walk(), and walk() does not
+ reset any Unique member.
+ */
+ if (elements)
+ {
+ reset_dynamic(&file_ptrs);
+ reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
+ }
+ elements= 0;
+}
+
+/*
+ The comparison function, passed to queue_init() in merge_walk() must
+ use comparison function of Uniques::tree, but compare members of struct
+ BUFFPEK.
+*/
+
+struct BUFFPEK_COMPARE_CONTEXT
+{
+ qsort_cmp2 key_compare;
+ void *key_compare_arg;
+};
+
+C_MODE_START
+
+static int buffpek_compare(void *arg, byte *key_ptr1, byte *key_ptr2)
+{
+ BUFFPEK_COMPARE_CONTEXT *ctx= (BUFFPEK_COMPARE_CONTEXT *) arg;
+ return ctx->key_compare(ctx->key_compare_arg,
+ *((byte **) key_ptr1), *((byte **)key_ptr2));
+}
+
+C_MODE_END
+
+
+/*
+ DESCRIPTION
+ Function is very similar to merge_buffers, but instead of writing sorted
+ unique keys to the output file, it invokes walk_action for each key.
+ This saves I/O if you need to pass through all unique keys only once.
+ SYNOPSIS
+ merge_walk()
+ All params are 'IN' (but see comment for begin, end):
+ merge_buffer buffer to perform cached piece-by-piece loading
+ of trees; initially the buffer is empty
+ merge_buffer_size size of merge_buffer. Must be aligned with
+ key_length
+ key_length size of tree element; key_length * (end - begin)
+ must be less or equal than merge_buffer_size.
+ begin pointer to BUFFPEK struct for the first tree.
+ end pointer to BUFFPEK struct for the last tree;
+ end > begin and [begin, end) form a consecutive
+ range. BUFFPEKs structs in that range are used and
+ overwritten in merge_walk().
+ walk_action element visitor. Action is called for each unique
+ key.
+ walk_action_arg argument to walk action. Passed to it on each call.
+ compare elements comparison function
+ compare_arg comparison function argument
+ file file with all trees dumped. Trees in the file
+ must contain sorted unique values. Cache must be
+ initialized in read mode.
+ RETURN VALUE
+ 0 ok
+ <> 0 error
+*/
+
+static bool merge_walk(uchar *merge_buffer, uint merge_buffer_size,
+ uint key_length, BUFFPEK *begin, BUFFPEK *end,
+ tree_walk_action walk_action, void *walk_action_arg,
+ qsort_cmp2 compare, void *compare_arg,
+ IO_CACHE *file)
+{
+ BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
+ QUEUE queue;
+ if (end <= begin ||
+ merge_buffer_size < key_length * (end - begin + 1) ||
+ init_queue(&queue, end - begin, offsetof(BUFFPEK, key), 0,
+ buffpek_compare, &compare_context))
+ return 1;
+ /* we need space for one key when a piece of merge buffer is re-read */
+ merge_buffer_size-= key_length;
+ uchar *save_key_buff= merge_buffer + merge_buffer_size;
+ uint max_key_count_per_piece= merge_buffer_size/(end-begin)/key_length;
+ /* if piece_size is aligned reuse_freed_buffer will always hit */
+ uint piece_size= max_key_count_per_piece * key_length;
+ uint bytes_read; /* to hold return value of read_to_buffer */
+ BUFFPEK *top;
+ int res= 1;
+ /*
+ Invariant: queue must contain top element from each tree, until a tree
+ is not completely walked through.
+ Here we're forcing the invariant, inserting one element from each tree
+ to the queue.
+ */
+ for (top= begin; top != end; ++top)
+ {
+ top->base= merge_buffer + (top - begin) * piece_size;
+ top->max_keys= max_key_count_per_piece;
+ bytes_read= read_to_buffer(file, top, key_length);
+ if (bytes_read == (uint) (-1))
+ goto end;
+ DBUG_ASSERT(bytes_read);
+ queue_insert(&queue, (byte *) top);
+ }
+ top= (BUFFPEK *) queue_top(&queue);
+ while (queue.elements > 1)
+ {
+ /*
+ Every iteration one element is removed from the queue, and one is
+ inserted by the rules of the invariant. If two adjacent elements on
+ the top of the queue are not equal, biggest one is unique, because all
+ elements in each tree are unique. Action is applied only to unique
+ elements.
+ */
+ void *old_key= top->key;
+ /*
+ read next key from the cache or from the file and push it to the
+ queue; this gives new top.
+ */
+ top->key+= key_length;
+ if (--top->mem_count)
+ queue_replaced(&queue);
+ else /* next piece should be read */
+ {
+ /* save old_key not to overwrite it in read_to_buffer */
+ memcpy(save_key_buff, old_key, key_length);
+ old_key= save_key_buff;
+ bytes_read= read_to_buffer(file, top, key_length);
+ if (bytes_read == (uint) (-1))
+ goto end;
+ else if (bytes_read > 0) /* top->key, top->mem_count are reset */
+ queue_replaced(&queue); /* in read_to_buffer */
+ else
+ {
+ /*
+ Tree for old 'top' element is empty: remove it from the queue and
+ give all its memory to the nearest tree.
+ */
+ queue_remove(&queue, 0);
+ reuse_freed_buff(&queue, top, key_length);
+ }
+ }
+ top= (BUFFPEK *) queue_top(&queue);
+ /* new top has been obtained; if old top is unique, apply the action */
+ if (compare(compare_arg, old_key, top->key))
+ {
+ if (walk_action(old_key, 1, walk_action_arg))
+ goto end;
+ }
+ }
+ /*
+ Applying walk_action to the tail of the last tree: this is safe because
+ either we had only one tree in the beginning, either we work with the
+ last tree in the queue.
+ */
+ do
+ {
+ do
+ {
+ if (walk_action(top->key, 1, walk_action_arg))
+ goto end;
+ top->key+= key_length;
+ }
+ while (--top->mem_count);
+ bytes_read= read_to_buffer(file, top, key_length);
+ if (bytes_read == (uint) (-1))
+ goto end;
+ }
+ while (bytes_read);
+ res= 0;
+end:
+ delete_queue(&queue);
+ return res;
+}
+
+
+/*
+ DESCRIPTION
+ Walks consecutively through all unique elements:
+ if all elements are in memory, then it simply invokes 'tree_walk', else
+ all flushed trees are loaded to memory piece-by-piece, pieces are
+ sorted, and action is called for each unique value.
+ Note: so as merging resets file_ptrs state, this method can change
+ internal Unique state to undefined: if you want to reuse Unique after
+ walk() you must call reset() first!
+ SYNOPSIS
+ Unique:walk()
+ All params are 'IN':
+ action function-visitor, typed in include/my_tree.h
+ function is called for each unique element
+ arg argument for visitor, which is passed to it on each call
+ RETURN VALUE
+ 0 OK
+ <> 0 error
+ */
+
+bool Unique::walk(tree_walk_action action, void *walk_action_arg)
+{
+ if (elements == 0) /* the whole tree is in memory */
+ return tree_walk(&tree, action, walk_action_arg, left_root_right);
+
+ /* flush current tree to the file to have some memory for merge buffer */
+ if (flush())
+ return 1;
+ if (flush_io_cache(&file) || reinit_io_cache(&file, READ_CACHE, 0L, 0, 0))
+ return 1;
+ uchar *merge_buffer= (uchar *) my_malloc(max_in_memory_size, MYF(0));
+ if (merge_buffer == 0)
+ return 1;
+ int res= merge_walk(merge_buffer, max_in_memory_size, size,
+ (BUFFPEK *) file_ptrs.buffer,
+ (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
+ action, walk_action_arg,
+ tree.compare, tree.custom_arg, &file);
+ x_free(merge_buffer);
+ return res;
+}
+
+/*
Modify the TABLE element so that when one calls init_records()
the rows will be read in priority order.
*/
@@ -114,7 +346,7 @@ bool Unique::get(TABLE *table)
return 0;
}
}
- /* Not enough memory; Save the result to file */
+ /* Not enough memory; Save the result to file && free memory used by tree */
if (flush())
return 1;