From 32d1ad79b098055af4f7af2836c7411fd3bc1f8e Mon Sep 17 00:00:00 2001 From: "kaa@polly.local" <> Date: Mon, 27 Nov 2006 19:12:10 +0300 Subject: Fix for bug #24261 "crash when WHERE contains NOT IN ('') for unsigned column type" When calculating a SEL_TREE for the "c_{i-1} < X < c_i" interval, check if the tree returned for the "-inf < X < c_0" interval is NULL --- sql/opt_range.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'sql') diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 96239315026..efef9361bda 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3703,7 +3703,8 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, for (uint idx= 0; idx < param->keys; idx++) { SEL_ARG *new_interval, *last_val; - if (((new_interval= tree2->keys[idx])) && + if (((new_interval= tree2->keys[idx])) && + (tree->keys[idx]) && ((last_val= tree->keys[idx]->last()))) { new_interval->min_value= last_val->max_value; -- cgit v1.2.1 From ecbb4eb987c50060e8f33be9857de35168035d65 Mon Sep 17 00:00:00 2001 From: "Kristofer.Pettersson@naruto." <> Date: Thu, 7 Dec 2006 17:01:00 +0100 Subject: Bug#17498 failed to put data file in custom directory use "data directory" option - Using DATA/INDEX DIRECTORY option on Windows put data/index file into default directory because the OS doesn't support readlink(). - The procedure for changing data/index file directory is different under Windows. - With this fix we report a warning if DATA/INDEX option is used, but OS doesn't support readlink(). --- sql/sql_parse.cc | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'sql') diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index cb2fa0f7014..9f443fae215 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -2485,6 +2485,12 @@ mysql_execute_command(THD *thd) goto unsent_create_error; #ifndef HAVE_READLINK + if (lex->create_info.data_file_name) + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 0, + "DATA DIRECTORY option ignored"); + if (lex->create_info.index_file_name) + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 0, + "INDEX DIRECTORY option ignored"); lex->create_info.data_file_name=lex->create_info.index_file_name=0; #else /* Fix names if symlinked tables */ -- cgit v1.2.1 From 4162e009cb2fc4dc72f35c15bda34768bfaf2807 Mon Sep 17 00:00:00 2001 From: "kaa@polly.local" <> Date: Thu, 14 Dec 2006 20:58:07 +0300 Subject: Fix for bug #24117 "server crash on a FETCH with a cursor on a table which is not in the table cache" Problem: When creating a temporary field for a temporary table in create_tmp_field_from_field(), a resulting field is created as an exact copy of an original one (in Field::new_field()). However, Field_enum and Field_set contain a pointer (typelib) to memory allocated in the parent table's MEM_ROOT, which under some circumstances may be deallocated later by the time a temporary table is used. Solution: Override the new_field() method for Field_enum and Field_set and create a separate copy of the typelib structure in there. --- sql/field.cc | 10 ++++++++++ sql/field.h | 1 + 2 files changed, 11 insertions(+) (limited to 'sql') diff --git a/sql/field.cc b/sql/field.cc index ec97bc92d24..684ce5602d4 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -7672,6 +7672,16 @@ void Field_enum::sql_type(String &res) const } +Field *Field_enum::new_field(MEM_ROOT *root, struct st_table *new_table, + bool keep_type) +{ + Field_enum *res= (Field_enum*) Field::new_field(root, new_table, keep_type); + if (res) + res->typelib= copy_typelib(root, typelib); + return res; +} + + /* set type. This is a string which can have a collection of different values. diff --git a/sql/field.h b/sql/field.h index b79c2bf77a8..01b05d886a8 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1277,6 +1277,7 @@ public: { flags|=ENUM_FLAG; } + Field *new_field(MEM_ROOT *root, struct st_table *new_table, bool keep_type); enum_field_types type() const { return FIELD_TYPE_STRING; } enum Item_result cmp_type () const { return INT_RESULT; } enum Item_result cast_to_int_type () const { return INT_RESULT; } -- cgit v1.2.1 From 110b8af30396ba92efad2d51eb4ed82914903332 Mon Sep 17 00:00:00 2001 From: "msvensson@neptunus.(none)" <> Date: Thu, 14 Dec 2006 21:48:08 +0100 Subject: Put the DBUG_DUMP inside ifdef DEBUG_DATA_PACKETS --- sql/net_serv.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'sql') diff --git a/sql/net_serv.cc b/sql/net_serv.cc index e84b2266e82..3037007ae35 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -597,7 +597,10 @@ net_real_write(NET *net,const char *packet,ulong len) } #endif /* HAVE_COMPRESS */ - /* DBUG_DUMP("net",packet,len); */ +#ifdef DEBUG_DATA_PACKETS + DBUG_DUMP("data",packet,len); +#endif + #ifndef NO_ALARM thr_alarm_init(&alarmed); if (net_blocking) -- cgit v1.2.1 From fe341ad904493610bc6f698ec3999a091f4e7bfc Mon Sep 17 00:00:00 2001 From: "msvensson@shellback." <> Date: Fri, 15 Dec 2006 10:41:24 +0100 Subject: Add macro for retrieving sec part of "struct timespec" Use macros for working with "struct timespec" in event_queue.cc Fix merge problem --- sql/event_queue.cc | 9 ++++----- sql/log_event.cc | 14 +++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'sql') diff --git a/sql/event_queue.cc b/sql/event_queue.cc index 7ec665fcd5f..879235c3e49 100644 --- a/sql/event_queue.cc +++ b/sql/event_queue.cc @@ -719,7 +719,6 @@ Event_queue::get_top_for_execution_if_time(THD *thd, Event_job_data **job_data) *job_data= NULL; DBUG_ENTER("Event_queue::get_top_for_execution_if_time"); - top_time.tv_nsec= 0; LOCK_QUEUE_DATA(); for (;;) { @@ -732,12 +731,12 @@ Event_queue::get_top_for_execution_if_time(THD *thd, Event_job_data **job_data) if (queue.elements) { top= ((Event_queue_element*) queue_element(&queue, 0)); - top_time.tv_sec= sec_since_epoch_TIME(&top->execute_at); + set_timespec(top_time, sec_since_epoch_TIME(&top->execute_at)); abstime= &top_time; } - if (!abstime || abstime->tv_sec > now) + if (!abstime || get_timespec_sec(*abstime) > now) { const char *msg; if (abstime) @@ -816,8 +815,8 @@ end: if (to_free) delete top; - DBUG_PRINT("info", ("returning %d et_new: 0x%lx abstime.tv_sec: %ld ", - ret, (long) *job_data, abstime ? abstime->tv_sec : 0)); + DBUG_PRINT("info", ("returning %d et_new: 0x%lx get_timespec_sec(abstime): %ld ", + ret, (long) *job_data, abstime ? get_timespec_sec(*abstime) : 0)); if (*job_data) DBUG_PRINT("info", ("db: %s name: %s definer=%s", (*job_data)->dbname.str, diff --git a/sql/log_event.cc b/sql/log_event.cc index 44cba324a02..8fc65c1a717 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -5558,9 +5558,9 @@ unpack_row(RELAY_LOG_INFO *rli, if (bitmap_is_set(cols, field_ptr - begin_ptr)) { - DBUG_ASSERT(table->record[0] <= f->ptr); - DBUG_ASSERT(f->ptr < (table->record[0] + table->s->reclength + - (f->pack_length_in_rec() == 0))); + DBUG_ASSERT((char*)table->record[0] <= f->ptr); + DBUG_ASSERT(f->ptr < (char*)(table->record[0] + table->s->reclength + + (f->pack_length_in_rec() == 0))); DBUG_PRINT("info", ("unpacking column '%s' to 0x%lx", f->field_name, (long) f->ptr)); @@ -6843,8 +6843,8 @@ static int find_and_fetch_row(TABLE *table, byte *key) trigger false warnings. */ #ifndef HAVE_purify - DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength); - DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength); + DBUG_DUMP("table->record[0]", (const char*)table->record[0], table->s->reclength); + DBUG_DUMP("table->record[1]", (const char*)table->record[1], table->s->reclength); #endif /* @@ -6870,8 +6870,8 @@ static int find_and_fetch_row(TABLE *table, byte *key) trigger false warnings. */ #ifndef HAVE_purify - DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength); - DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength); + DBUG_DUMP("table->record[0]", (const char*)table->record[0], table->s->reclength); + DBUG_DUMP("table->record[1]", (const char*)table->record[1], table->s->reclength); #endif /* Below is a minor "optimization". If the key (i.e., key number -- cgit v1.2.1 From 30deafa958e7e9e95abf5e70f54a2ceedd87b443 Mon Sep 17 00:00:00 2001 From: "msvensson@neptunus.(none)" <> Date: Fri, 15 Dec 2006 17:47:20 +0100 Subject: Calculate offset for wait time passed to set_timespec Pass different msg if waiting on empty queue --- sql/event_queue.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'sql') diff --git a/sql/event_queue.cc b/sql/event_queue.cc index 879235c3e49..53fa4f6b5cd 100644 --- a/sql/event_queue.cc +++ b/sql/event_queue.cc @@ -731,7 +731,8 @@ Event_queue::get_top_for_execution_if_time(THD *thd, Event_job_data **job_data) if (queue.elements) { top= ((Event_queue_element*) queue_element(&queue, 0)); - set_timespec(top_time, sec_since_epoch_TIME(&top->execute_at)); + set_timespec(top_time, + sec_since_epoch_TIME(&top->execute_at) - now); abstime= &top_time; } @@ -747,7 +748,7 @@ Event_queue::get_top_for_execution_if_time(THD *thd, Event_job_data **job_data) else { set_zero_time(&next_activation_at, MYSQL_TIMESTAMP_DATETIME); - msg= queue_wait_msg; + msg= queue_empty_msg; } cond_wait(thd, abstime, msg, SCHED_FUNC, __LINE__); -- cgit v1.2.1 From 395a0167c188d41c0d4cbd082b502e8c34da4afd Mon Sep 17 00:00:00 2001 From: "msvensson@shellback." <> Date: Mon, 18 Dec 2006 12:00:35 +0100 Subject: Reorganize the wait for event to be scheduled loop Only use "set_timespec" when there is a need to use it --- sql/event_queue.cc | 66 ++++++++++++++++++++++++------------------------------ 1 file changed, 29 insertions(+), 37 deletions(-) (limited to 'sql') diff --git a/sql/event_queue.cc b/sql/event_queue.cc index 879235c3e49..14a6a0dfa93 100644 --- a/sql/event_queue.cc +++ b/sql/event_queue.cc @@ -699,10 +699,7 @@ static const char *queue_wait_msg= "Waiting for next activation"; RETURN VALUE FALSE No error. If *job_data==NULL then top not elligible for execution. - Could be that there is no top. If abstime->tv_sec is set to value - greater than zero then use abstime with pthread_cond_timedwait(). - If abstime->tv_sec is zero then sleep with pthread_cond_wait(). - abstime->tv_nsec is always zero. + Could be that there is no top. TRUE Error */ @@ -712,7 +709,6 @@ Event_queue::get_top_for_execution_if_time(THD *thd, Event_job_data **job_data) { bool ret= FALSE; struct timespec top_time; - struct timespec *abstime; Event_queue_element *top= NULL; bool to_free= FALSE; bool to_drop= FALSE; @@ -724,43 +720,40 @@ Event_queue::get_top_for_execution_if_time(THD *thd, Event_job_data **job_data) { int res; - thd->end_time(); - time_t now= thd->query_start(); - abstime= NULL; + /* Break loop if thd has been killed */ + if (thd->killed) + { + DBUG_PRINT("info", ("thd->killed=%d", thd->killed)); + goto end; + } - if (queue.elements) + if (!queue.elements) { - top= ((Event_queue_element*) queue_element(&queue, 0)); - set_timespec(top_time, sec_since_epoch_TIME(&top->execute_at)); + /* There are no events in the queue */ + set_zero_time(&next_activation_at, MYSQL_TIMESTAMP_DATETIME); + + /* Wait on condition until signaled. Release LOCK_queue while waiting. */ + cond_wait(thd, NULL, queue_empty_msg, SCHED_FUNC, __LINE__); - abstime= &top_time; + continue; } - if (!abstime || get_timespec_sec(*abstime) > now) - { - const char *msg; - if (abstime) - { - next_activation_at= top->execute_at; - msg= queue_wait_msg; - } - else - { - set_zero_time(&next_activation_at, MYSQL_TIMESTAMP_DATETIME); - msg= queue_wait_msg; - } + top= ((Event_queue_element*) queue_element(&queue, 0)); - cond_wait(thd, abstime, msg, SCHED_FUNC, __LINE__); - if (thd->killed) - { - DBUG_PRINT("info", ("thd->killed=%d", thd->killed)); - goto end; - } + thd->end_time(); /* Get current time */ + + time_t seconds_to_next_event= + sec_since_epoch_TIME(&top->execute_at) - thd->query_start(); + next_activation_at= top->execute_at; + if (seconds_to_next_event > 0) + { /* - The queue could have been emptied. Therefore it's safe to start from - the beginning. Moreover, this way we will get also the new top, if - the element at the top has been changed. + Not yet time for top event, wait on condition with + time or until signaled. Release LOCK_queue while waiting. */ + set_timespec(top_time, seconds_to_next_event); + cond_wait(thd, &top_time, queue_wait_msg, SCHED_FUNC, __LINE__); + continue; } @@ -802,7 +795,7 @@ Event_queue::get_top_for_execution_if_time(THD *thd, Event_job_data **job_data) else queue_replaced(&queue); - dbug_dump_queue(now); + dbug_dump_queue(thd->query_start()); break; } end: @@ -815,8 +808,7 @@ end: if (to_free) delete top; - DBUG_PRINT("info", ("returning %d et_new: 0x%lx get_timespec_sec(abstime): %ld ", - ret, (long) *job_data, abstime ? get_timespec_sec(*abstime) : 0)); + DBUG_PRINT("info", ("returning %d et_new: 0x%lx ", ret, (long) *job_data)); if (*job_data) DBUG_PRINT("info", ("db: %s name: %s definer=%s", (*job_data)->dbname.str, -- cgit v1.2.1 From fc45754fb0095f7eb888ef773c862677ec42d3ef Mon Sep 17 00:00:00 2001 From: "msvensson@shellback." <> Date: Mon, 18 Dec 2006 14:12:19 +0100 Subject: Update function description for Event_queue::get_top_for_execution_if_time --- sql/event_queue.cc | 2 -- 1 file changed, 2 deletions(-) (limited to 'sql') diff --git a/sql/event_queue.cc b/sql/event_queue.cc index 14a6a0dfa93..efd309e30e2 100644 --- a/sql/event_queue.cc +++ b/sql/event_queue.cc @@ -693,9 +693,7 @@ static const char *queue_wait_msg= "Waiting for next activation"; SYNOPSIS Event_queue::get_top_for_execution_if_time() thd [in] Thread - now [in] Current timestamp job_data [out] The object to execute - abstime [out] Time to sleep RETURN VALUE FALSE No error. If *job_data==NULL then top not elligible for execution. -- cgit v1.2.1 From abe43135b16fb0cbb2aa8e5fff05660bc11dbb7f Mon Sep 17 00:00:00 2001 From: "msvensson@pilot.mysql.com" <> Date: Fri, 22 Dec 2006 12:12:58 +0100 Subject: Bug#22694 "function plugin_foreach_with_mask() uses an uninitialized pointer" Fix uninitialized memory. --- sql/sql_plugin.cc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'sql') diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 8cd4c661fb8..9ff88b2054a 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -951,29 +951,30 @@ my_bool plugin_foreach_with_mask(THD *thd, plugin_foreach_func *func, state_mask= ~state_mask; // do it only once rw_rdlock(&THR_LOCK_plugin); + total= type == MYSQL_ANY_PLUGIN ? plugin_array.elements + : plugin_hash[type].records; + /* + Do the alloca out here in case we do have a working alloca: + leaving the nested stack frame invalidates alloca allocation. + */ + plugins=(struct st_plugin_int **)my_alloca(total*sizeof(*plugins)); if (type == MYSQL_ANY_PLUGIN) { - total=plugin_array.elements; - plugins=(struct st_plugin_int **)my_alloca(total*sizeof(*plugins)); for (idx= 0; idx < total; idx++) { plugin= dynamic_element(&plugin_array, idx, struct st_plugin_int *); - if (plugin->state & state_mask) - continue; - plugins[idx]= plugin; + plugins[idx]= !(plugin->state & state_mask) ? plugin : NULL; } } else { - HASH *hash= &plugin_hash[type]; - total=hash->records; - plugins=(struct st_plugin_int **)my_alloca(total*sizeof(*plugins)); + HASH *hash= plugin_hash + type; for (idx= 0; idx < total; idx++) { plugin= (struct st_plugin_int *) hash_element(hash, idx); if (plugin->state & state_mask) continue; - plugins[idx]= plugin; + plugins[idx]= !(plugin->state & state_mask) ? plugin : NULL; } } rw_unlock(&THR_LOCK_plugin); -- cgit v1.2.1 From c3b6f11091377595ee77f12cec4587bd0a8b803b Mon Sep 17 00:00:00 2001 From: "msvensson@pilot.mysql.com" <> Date: Fri, 22 Dec 2006 12:35:06 +0100 Subject: Potential use of NULL pointer in 'plugin_for_each_with_mask', check pointer before referencing it. --- sql/sql_plugin.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sql') diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 9ff88b2054a..0b203002dec 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -985,7 +985,7 @@ my_bool plugin_foreach_with_mask(THD *thd, plugin_foreach_func *func, { rw_rdlock(&THR_LOCK_plugin); for (uint i=idx; i < total; i++) - if (plugins[i]->state & state_mask) + if (plugins[i] && plugins[i]->state & state_mask) plugins[i]=0; rw_unlock(&THR_LOCK_plugin); } -- cgit v1.2.1 From 86a9ad6883325e2034887984bc1dc0fcc9423092 Mon Sep 17 00:00:00 2001 From: "kaa@polly.local" <> Date: Fri, 22 Dec 2006 15:30:37 +0300 Subject: Fix for the bug #24037 "Lossy Hebrew to Unicode conversion". Added definitions for the following Hebrew characters as specified by the ISO/IEC 8859-8:1999: LEFT-TO-RIGHT MARK (LRM) RIGHT-TO-LEFT MARK (RLM) --- sql/share/charsets/hebrew.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'sql') diff --git a/sql/share/charsets/hebrew.xml b/sql/share/charsets/hebrew.xml index 5bcf222a728..981f308bfb5 100644 --- a/sql/share/charsets/hebrew.xml +++ b/sql/share/charsets/hebrew.xml @@ -40,7 +40,7 @@ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 - 02 02 02 02 02 02 02 02 02 02 02 00 00 00 00 00 + 02 02 02 02 02 02 02 02 02 02 02 00 00 20 20 00 @@ -106,7 +106,7 @@ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 2017 05D0 05D1 05D2 05D3 05D4 05D5 05D6 05D7 05D8 05D9 05DA 05DB 05DC 05DD 05DE 05DF -05E0 05E1 05E2 05E3 05E4 05E5 05E6 05E7 05E8 05E9 05EA 0000 0000 0000 0000 0000 +05E0 05E1 05E2 05E3 05E4 05E5 05E6 05E7 05E8 05E9 05EA 0000 0000 200E 200F 0000 -- cgit v1.2.1 From 50726b2322edac8e93489fcb5ef75a5634222d44 Mon Sep 17 00:00:00 2001 From: "cmiller@zippy.cornsilk.net" <> Date: Fri, 22 Dec 2006 15:37:37 -0500 Subject: Bug#22555: STDDEV yields positive result for groups with only one row When only one row was present, the subtraction of nearly the same number resulted in catastropic cancellation, introducing an error in the VARIANCE calculation near 1e-15. That was sqrt()ed to get STDDEV, the error was escallated to near 1e-8. The simple fix of testing for a row count of 1 and forcing that to yield 0.0 is insufficient, as two rows of the same value should also have a variance of 0.0, yet the error would be about the same. So, this patch changes the formula that computes the VARIANCE to be one that is not subject to catastrophic cancellation. In addition, it now uses only (faster-than-decimal) floating point numbers to calculate, and renders that to other types on demand. --- sql/item_sum.cc | 358 +++++++++++++++++++------------------------------------- sql/item_sum.h | 20 +++- 2 files changed, 134 insertions(+), 244 deletions(-) (limited to 'sql') diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 77c6e17607f..3b2a10ad666 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -1098,7 +1098,7 @@ Field *Item_sum_avg::create_tmp_field(bool group, TABLE *table, { /* We must store both value and counter in the temporary table in one field. - The easyest way is to do this is to store both value in a string + The easiest way is to do this is to store both value in a string and unpack on access. */ return new Field_string(((hybrid_type == DECIMAL_RESULT) ? @@ -1172,8 +1172,9 @@ String *Item_sum_avg::val_str(String *str) double Item_sum_std::val_real() { DBUG_ASSERT(fixed == 1); - double tmp= Item_sum_variance::val_real(); - return tmp <= 0.0 ? 0.0 : sqrt(tmp); + double nr= Item_sum_variance::val_real(); + DBUG_ASSERT(nr >= 0.0); + return sqrt(nr); } Item *Item_sum_std::copy_or_same(THD* thd) @@ -1187,40 +1188,77 @@ Item *Item_sum_std::copy_or_same(THD* thd) */ -Item_sum_variance::Item_sum_variance(THD *thd, Item_sum_variance *item): - Item_sum_num(thd, item), hybrid_type(item->hybrid_type), - cur_dec(item->cur_dec), count(item->count), sample(item->sample), - prec_increment(item->prec_increment) +/** + Variance implementation for floating-point implementations, without + catastrophic cancellation, from Knuth's _TAoCP_, 3rd ed, volume 2, pg232. + This alters the value at m, s, and increments count. +*/ + +/* + These two functions are used by the Item_sum_variance and the + Item_variance_field classes, which are unrelated, and each need to calculate + variance. The difference between the two classes is that the first is used + for a mundane SELECT, while the latter is used in a GROUPing SELECT. +*/ +static void variance_fp_recurrence_next(double *m, double *s, ulonglong *count, double nr) { - if (hybrid_type == DECIMAL_RESULT) + *count += 1; + + if (*count == 1) { - memcpy(dec_sum, item->dec_sum, sizeof(item->dec_sum)); - memcpy(dec_sqr, item->dec_sqr, sizeof(item->dec_sqr)); - for (int i=0; i<2; i++) - { - dec_sum[i].fix_buffer_pointer(); - dec_sqr[i].fix_buffer_pointer(); - } + *m= nr; + *s= 0; } else { - sum= item->sum; - sum_sqr= item->sum_sqr; + double m_kminusone= *m; + *m= m_kminusone + (nr - m_kminusone) / (double) *count; + *s= *s + (nr - m_kminusone) * (nr - *m); } } +static double variance_fp_recurrence_result(double s, ulonglong count, bool is_sample_variance) +{ + if (count == 1) + return 0.0; + + if (is_sample_variance) + return s / (count - 1); + + /* else, is a population variance */ + return s / count; +} + + +Item_sum_variance::Item_sum_variance(THD *thd, Item_sum_variance *item): + Item_sum_num(thd, item), hybrid_type(item->hybrid_type), + count(item->count), sample(item->sample), + prec_increment(item->prec_increment) +{ + recurrence_m= item->recurrence_m; + recurrence_s= item->recurrence_s; +} + + void Item_sum_variance::fix_length_and_dec() { DBUG_ENTER("Item_sum_variance::fix_length_and_dec"); maybe_null= null_value= 1; prec_increment= current_thd->variables.div_precincrement; + + /* + According to the SQL2003 standard (Part 2, Foundations; sec 10.9, + aggregate function; paragraph 7h of Syntax Rules), "the declared + type of the result is an implementation-defined aproximate numeric + type. + */ + hybrid_type= REAL_RESULT; + switch (args[0]->result_type()) { case REAL_RESULT: case STRING_RESULT: decimals= min(args[0]->decimals + 4, NOT_FIXED_DEC); - hybrid_type= REAL_RESULT; - sum= 0.0; break; case INT_RESULT: case DECIMAL_RESULT: @@ -1229,37 +1267,14 @@ void Item_sum_variance::fix_length_and_dec() decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE); max_length= my_decimal_precision_to_length(precision, decimals, unsigned_flag); - cur_dec= 0; - hybrid_type= DECIMAL_RESULT; - my_decimal_set_zero(dec_sum); - my_decimal_set_zero(dec_sqr); - /* - The maxium value to usable for variance is DECIMAL_MAX_LENGTH/2 - becasue we need to be able to calculate in dec_bin_size1 - column_value * column_value - */ - f_scale0= args[0]->decimals; - f_precision0= min(args[0]->decimal_precision() + DECIMAL_LONGLONG_DIGITS, - DECIMAL_MAX_PRECISION); - f_scale1= min(args[0]->decimals * 2, DECIMAL_MAX_SCALE); - f_precision1= min(args[0]->decimal_precision()*2 + DECIMAL_LONGLONG_DIGITS, - DECIMAL_MAX_PRECISION); - dec_bin_size0= my_decimal_get_binary_size(f_precision0, f_scale0); - dec_bin_size1= my_decimal_get_binary_size(f_precision1, f_scale1); break; } case ROW_RESULT: default: DBUG_ASSERT(0); } - DBUG_PRINT("info", ("Type: %s (%d, %d)", - (hybrid_type == REAL_RESULT ? "REAL_RESULT" : - hybrid_type == DECIMAL_RESULT ? "DECIMAL_RESULT" : - hybrid_type == INT_RESULT ? "INT_RESULT" : - "--ILLEGAL!!!--"), - max_length, - (int)decimals)); + DBUG_PRINT("info", ("Type: REAL_RESULT (%d, %d)", max_length, (int)decimals)); DBUG_VOID_RETURN; } @@ -1270,6 +1285,11 @@ Item *Item_sum_variance::copy_or_same(THD* thd) } +/** + Create a new field to match the type of value we're expected to yield. + If we're grouping, then we need some space to serialize variables into, to + pass around. +*/ Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, uint convert_blob_len) { @@ -1277,13 +1297,10 @@ Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, { /* We must store both value and counter in the temporary table in one field. - The easyest way is to do this is to store both value in a string + The easiest way is to do this is to store both value in a string and unpack on access. */ - return new Field_string(((hybrid_type == DECIMAL_RESULT) ? - dec_bin_size0 + dec_bin_size1 : - sizeof(double)*2) + sizeof(longlong), - 0, name, table, &my_charset_bin); + return new Field_string(sizeof(double)*2 + sizeof(longlong), 0, name, table, &my_charset_bin); } return new Field_double(max_length, maybe_null,name,table,decimals); } @@ -1291,90 +1308,51 @@ Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, void Item_sum_variance::clear() { - if (hybrid_type == DECIMAL_RESULT) - { - my_decimal_set_zero(dec_sum); - my_decimal_set_zero(dec_sqr); - cur_dec= 0; - } - else - sum=sum_sqr=0.0; - count=0; + count= 0; } bool Item_sum_variance::add() { - if (hybrid_type == DECIMAL_RESULT) - { - my_decimal dec_buf, *dec= args[0]->val_decimal(&dec_buf); - my_decimal sqr_buf; - if (!args[0]->null_value) - { - count++; - int next_dec= cur_dec ^ 1; - my_decimal_mul(E_DEC_FATAL_ERROR, &sqr_buf, dec, dec); - my_decimal_add(E_DEC_FATAL_ERROR, dec_sqr+next_dec, - dec_sqr+cur_dec, &sqr_buf); - my_decimal_add(E_DEC_FATAL_ERROR, dec_sum+next_dec, - dec_sum+cur_dec, dec); - cur_dec= next_dec; - } - } - else - { - double nr= args[0]->val_real(); - if (!args[0]->null_value) - { - sum+=nr; - sum_sqr+=nr*nr; - count++; - } - } + /* + Why use a temporary variable? We don't know if it is null until we + evaluate it, which has the side-effect of setting null_value . + */ + double nr= args[0]->val_real(); + + if (!args[0]->null_value) + variance_fp_recurrence_next(&recurrence_m, &recurrence_s, &count, nr); return 0; } double Item_sum_variance::val_real() { DBUG_ASSERT(fixed == 1); - if (hybrid_type == DECIMAL_RESULT) - return val_real_from_decimal(); + /* + 'sample' is a 1/0 boolean value. If it is 1/true, id est this is a sample + variance call, then we should set nullness when the count of the items + is one or zero. If it's zero, i.e. a population variance, then we only + set nullness when the count is zero. + + Another way to read it is that 'sample' is the numerical threshhold, at and + below which a 'count' number of items is called NULL. + */ + DBUG_ASSERT((sample == 0) || (sample == 1)); if (count <= sample) { null_value=1; return 0.0; } + null_value=0; - /* Avoid problems when the precision isn't good enough */ - double tmp=ulonglong2double(count); - double tmp2= (sum_sqr - sum*sum/tmp)/(tmp - (double)sample); - return tmp2 <= 0.0 ? 0.0 : tmp2; + return variance_fp_recurrence_result(recurrence_s, count, sample); } my_decimal *Item_sum_variance::val_decimal(my_decimal *dec_buf) { - my_decimal count_buf, count1_buf, sum_sqr_buf; - DBUG_ASSERT(fixed ==1 ); - if (hybrid_type == REAL_RESULT) - return val_decimal_from_real(dec_buf); - - if (count <= sample) - { - null_value= 1; - return 0; - } - null_value= 0; - int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &count_buf); - int2my_decimal(E_DEC_FATAL_ERROR, count-sample, 0, &count1_buf); - my_decimal_mul(E_DEC_FATAL_ERROR, &sum_sqr_buf, - dec_sum+cur_dec, dec_sum+cur_dec); - my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, - &sum_sqr_buf, &count_buf, prec_increment); - my_decimal_sub(E_DEC_FATAL_ERROR, &sum_sqr_buf, dec_sqr+cur_dec, dec_buf); - my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, - &sum_sqr_buf, &count1_buf, prec_increment); - return dec_buf; + DBUG_ASSERT(fixed == 1); + return val_decimal_from_real(dec_buf); } @@ -1383,89 +1361,44 @@ void Item_sum_variance::reset_field() double nr; char *res= result_field->ptr; - if (hybrid_type == DECIMAL_RESULT) - { - my_decimal value, *arg_dec, *arg2_dec; - longlong tmp; - - arg_dec= args[0]->val_decimal(&value); - if (args[0]->null_value) - { - arg_dec= arg2_dec= &decimal_zero; - tmp= 0; - } - else - { - my_decimal_mul(E_DEC_FATAL_ERROR, dec_sum, arg_dec, arg_dec); - arg2_dec= dec_sum; - tmp= 1; - } - my_decimal2binary(E_DEC_FATAL_ERROR, arg_dec, - res, f_precision0, f_scale0); - my_decimal2binary(E_DEC_FATAL_ERROR, arg2_dec, - res+dec_bin_size0, f_precision1, f_scale1); - res+= dec_bin_size0 + dec_bin_size1; - int8store(res,tmp); - return; - } - nr= args[0]->val_real(); + nr= args[0]->val_real(); /* sets null_value as side-effect */ if (args[0]->null_value) bzero(res,sizeof(double)*2+sizeof(longlong)); else { - longlong tmp; - float8store(res,nr); - nr*=nr; - float8store(res+sizeof(double),nr); - tmp= 1; - int8store(res+sizeof(double)*2,tmp); + /* Serialize format is (double)m, (double)s, (longlong)count */ + ulonglong tmp_count; + double tmp_s; + float8store(res, nr); /* recurrence variable m */ + tmp_s= 0.0; + float8store(res + sizeof(double), tmp_s); + tmp_count= 1; + int8store(res + sizeof(double)*2, tmp_count); } } void Item_sum_variance::update_field() { - longlong field_count; + ulonglong field_count; char *res=result_field->ptr; - if (hybrid_type == DECIMAL_RESULT) - { - my_decimal value, *arg_val= args[0]->val_decimal(&value); - if (!args[0]->null_value) - { - binary2my_decimal(E_DEC_FATAL_ERROR, res, - dec_sum+1, f_precision0, f_scale0); - binary2my_decimal(E_DEC_FATAL_ERROR, res+dec_bin_size0, - dec_sqr+1, f_precision1, f_scale1); - field_count= sint8korr(res + (dec_bin_size0 + dec_bin_size1)); - my_decimal_add(E_DEC_FATAL_ERROR, dec_sum, arg_val, dec_sum+1); - my_decimal_mul(E_DEC_FATAL_ERROR, dec_sum+1, arg_val, arg_val); - my_decimal_add(E_DEC_FATAL_ERROR, dec_sqr, dec_sqr+1, dec_sum+1); - field_count++; - my_decimal2binary(E_DEC_FATAL_ERROR, dec_sum, - res, f_precision0, f_scale0); - my_decimal2binary(E_DEC_FATAL_ERROR, dec_sqr, - res+dec_bin_size0, f_precision1, f_scale1); - res+= dec_bin_size0 + dec_bin_size1; - int8store(res, field_count); - } + + double nr= args[0]->val_real(); /* sets null_value as side-effect */ + + if (args[0]->null_value) return; - } - double nr,old_nr,old_sqr; - float8get(old_nr, res); - float8get(old_sqr, res+sizeof(double)); + /* Serialize format is (double)m, (double)s, (longlong)count */ + double field_recurrence_m, field_recurrence_s; + float8get(field_recurrence_m, res); + float8get(field_recurrence_s, res + sizeof(double)); field_count=sint8korr(res+sizeof(double)*2); - nr= args[0]->val_real(); - if (!args[0]->null_value) - { - old_nr+=nr; - old_sqr+=nr*nr; - field_count++; - } - float8store(res,old_nr); - float8store(res+sizeof(double),old_sqr); + variance_fp_recurrence_next(&field_recurrence_m, &field_recurrence_s, &field_count, nr); + + float8store(res, field_recurrence_m); + float8store(res + sizeof(double), field_recurrence_s); res+= sizeof(double)*2; int8store(res,field_count); } @@ -2295,25 +2228,9 @@ double Item_std_field::val_real() { double nr; // fix_fields() never calls for this Item - if (hybrid_type == REAL_RESULT) - { - /* - We can't call Item_variance_field::val_real() on a DECIMAL_RESULT - as this would call Item_std_field::val_decimal() and we would - calculate sqrt() twice - */ - nr= Item_variance_field::val_real(); - } - else - { - my_decimal dec_buf,*dec; - dec= Item_variance_field::val_decimal(&dec_buf); - if (!dec) - nr= 0.0; // NULL; Return 0.0 - else - my_decimal2double(E_DEC_FATAL_ERROR, dec, &nr); - } - return nr <= 0.0 ? 0.0 : sqrt(nr); + nr= Item_variance_field::val_real(); + DBUG_ASSERT(nr >= 0.0); + return sqrt(nr); } @@ -2327,11 +2244,13 @@ my_decimal *Item_std_field::val_decimal(my_decimal *dec_buf) double nr; if (hybrid_type == REAL_RESULT) return val_decimal_from_real(dec_buf); + dec= Item_variance_field::val_decimal(dec_buf); if (!dec) return 0; my_decimal2double(E_DEC_FATAL_ERROR, dec, &nr); - nr= nr <= 0.0 ? 0.0 : sqrt(nr); + DBUG_ASSERT(nr >= 0.0); + nr= sqrt(nr); double2my_decimal(E_DEC_FATAL_ERROR, nr, &tmp_dec); my_decimal_round(E_DEC_FATAL_ERROR, &tmp_dec, decimals, FALSE, dec_buf); return dec_buf; @@ -2366,52 +2285,15 @@ double Item_variance_field::val_real() if (hybrid_type == DECIMAL_RESULT) return val_real_from_decimal(); - double sum,sum_sqr; - longlong count; - float8get(sum,field->ptr); - float8get(sum_sqr,(field->ptr+sizeof(double))); + double recurrence_s; + ulonglong count; + float8get(recurrence_s, (field->ptr + sizeof(double))); count=sint8korr(field->ptr+sizeof(double)*2); if ((null_value= (count <= sample))) return 0.0; - double tmp= (double) count; - double tmp2= (sum_sqr - sum*sum/tmp)/(tmp - (double)sample); - return tmp2 <= 0.0 ? 0.0 : tmp2; -} - - -String *Item_variance_field::val_str(String *str) -{ - if (hybrid_type == DECIMAL_RESULT) - return val_string_from_decimal(str); - return val_string_from_real(str); -} - - -my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf) -{ - // fix_fields() never calls for this Item - if (hybrid_type == REAL_RESULT) - return val_decimal_from_real(dec_buf); - - longlong count= sint8korr(field->ptr+dec_bin_size0+dec_bin_size1); - if ((null_value= (count <= sample))) - return 0; - - my_decimal dec_count, dec1_count, dec_sum, dec_sqr, tmp; - int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &dec_count); - int2my_decimal(E_DEC_FATAL_ERROR, count-sample, 0, &dec1_count); - binary2my_decimal(E_DEC_FATAL_ERROR, field->ptr, - &dec_sum, f_precision0, f_scale0); - binary2my_decimal(E_DEC_FATAL_ERROR, field->ptr+dec_bin_size0, - &dec_sqr, f_precision1, f_scale1); - my_decimal_mul(E_DEC_FATAL_ERROR, &tmp, &dec_sum, &dec_sum); - my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &tmp, &dec_count, prec_increment); - my_decimal_sub(E_DEC_FATAL_ERROR, &dec_sum, &dec_sqr, dec_buf); - my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, - &dec_sum, &dec1_count, prec_increment); - return dec_buf; + return variance_fp_recurrence_result(recurrence_s, count, sample); } diff --git a/sql/item_sum.h b/sql/item_sum.h index fe7edd76ecf..989e72654fe 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -665,8 +665,10 @@ public: double val_real(); longlong val_int() { /* can't be fix_fields()ed */ return (longlong) rint(val_real()); } - String *val_str(String*); - my_decimal *val_decimal(my_decimal *); + String *val_str(String *str) + { return val_string_from_real(str); } + my_decimal *val_decimal(my_decimal *dec_buf) + { return val_decimal_from_real(dec_buf); } bool is_null() { (void) val_int(); return null_value; } enum_field_types field_type() const { @@ -688,6 +690,14 @@ public: = (sum(ai^2) - 2*sum(a)*sum(a)/count(a) + count(a)*sum(a)^2/count(a)^2 )/count(a) = = (sum(ai^2) - 2*sum(a)^2/count(a) + sum(a)^2/count(a) )/count(a) = = (sum(ai^2) - sum(a)^2/count(a))/count(a) + +But, this falls prey to catastrophic cancellation. Instead, use the recurrence formulas + + M_{1} = x_{1}, ~ M_{k} = M_{k-1} + (x_{k} - M_{k-1}) / k newline + S_{1} = 0, ~ S_{k} = S_{k-1} + (x_{k} - M_{k-1}) times (x_{k} - M_{k}) newline + for 2 <= k <= n newline + ital variance = S_{n} / (n-1) + */ class Item_sum_variance : public Item_sum_num @@ -696,9 +706,8 @@ class Item_sum_variance : public Item_sum_num public: Item_result hybrid_type; - double sum, sum_sqr; - my_decimal dec_sum[2], dec_sqr[2]; int cur_dec; + double recurrence_m, recurrence_s; /* Used in recurrence relation. */ ulonglong count; uint f_precision0, f_scale0; uint f_precision1, f_scale1; @@ -707,7 +716,7 @@ public: uint prec_increment; Item_sum_variance(Item *item_par, uint sample_arg) :Item_sum_num(item_par), - hybrid_type(REAL_RESULT), cur_dec(0), count(0), sample(sample_arg) + hybrid_type(REAL_RESULT), count(0), sample(sample_arg) {} Item_sum_variance(THD *thd, Item_sum_variance *item); enum Sumfunctype sum_func () const { return VARIANCE_FUNC; } @@ -727,7 +736,6 @@ public: enum Item_result result_type () const { return REAL_RESULT; } void cleanup() { - cur_dec= 0; count= 0; Item_sum_num::cleanup(); } -- cgit v1.2.1 From 1176caa58d2ee6bbbb6ebe6c0969317c5ae84055 Mon Sep 17 00:00:00 2001 From: "cmiller@zippy.cornsilk.net" <> Date: Fri, 22 Dec 2006 21:35:40 -0500 Subject: Fixed error in merge. --- sql/item_sum.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sql') diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 714ccbf99f0..6c791cdd1f4 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -1338,7 +1338,7 @@ Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, field= new Field_double(max_length, maybe_null, name, decimals); if (field != NULL) - field->init(table) + field->init(table); return field; } -- cgit v1.2.1