diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2019-05-29 22:17:00 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2019-05-29 22:17:00 +0300 |
commit | f98bb23168ee9bc0da8aa7111f35cf2539986387 (patch) | |
tree | b7e9c3dc28e9d1fd82eebea077f3ca0aac640218 /sql | |
parent | e35676f5557d68c7b51ba47aa73dcdf72eafa436 (diff) | |
parent | e99ed820d790617a029b03d9c4ab437c246c956a (diff) | |
download | mariadb-git-f98bb23168ee9bc0da8aa7111f35cf2539986387.tar.gz |
Merge 10.3 into 10.4
Diffstat (limited to 'sql')
-rw-r--r-- | sql/log_event.cc | 3 | ||||
-rw-r--r-- | sql/mysqld.h | 6 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 73 | ||||
-rw-r--r-- | sql/records.cc | 33 | ||||
-rw-r--r-- | sql/records.h | 6 | ||||
-rw-r--r-- | sql/sql_const.h | 8 | ||||
-rw-r--r-- | sql/sql_lex.h | 32 | ||||
-rw-r--r-- | sql/sql_parse.cc | 7 | ||||
-rw-r--r-- | sql/sql_select.cc | 179 | ||||
-rw-r--r-- | sql/sql_window.h | 1 | ||||
-rw-r--r-- | sql/temporary_tables.cc | 15 |
11 files changed, 225 insertions, 138 deletions
diff --git a/sql/log_event.cc b/sql/log_event.cc index f25ebd56792..731cbf99060 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -7957,6 +7957,7 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, flags2((standalone ? FL_STANDALONE : 0) | (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)) { cache_type= Log_event::EVENT_NO_CACHE; + bool is_tmp_table= thd_arg->lex->stmt_accessed_temp_table(); if (thd_arg->transaction.stmt.trans_did_wait() || thd_arg->transaction.all.trans_did_wait()) flags2|= FL_WAITED; @@ -7965,7 +7966,7 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, thd_arg->transaction.all.trans_did_ddl() || thd_arg->transaction.all.has_created_dropped_temp_table()) flags2|= FL_DDL; - else if (is_transactional) + else if (is_transactional && !is_tmp_table) flags2|= FL_TRANSACTIONAL; if (!(thd_arg->variables.option_bits & OPTION_RPL_SKIP_PARALLEL)) flags2|= FL_ALLOW_PARALLEL; diff --git a/sql/mysqld.h b/sql/mysqld.h index b6d2d688809..a29ca9ef8fe 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -1,5 +1,5 @@ /* Copyright (c) 2006, 2016, Oracle and/or its affiliates. - Copyright (c) 2010, 2017, MariaDB Corporation. + Copyright (c) 2010, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -613,10 +613,10 @@ extern mysql_mutex_t LOCK_item_func_sleep, LOCK_status, LOCK_error_log, LOCK_delayed_insert, LOCK_short_uuid_generator, LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone, - LOCK_active_mi, LOCK_manager, - LOCK_global_system_variables, LOCK_user_conn, + LOCK_active_mi, LOCK_manager, LOCK_user_conn, LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_connection_count, LOCK_slave_background; +extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_global_system_variables; extern mysql_rwlock_t LOCK_all_status_vars; extern mysql_mutex_t LOCK_start_thread; #ifdef HAVE_OPENSSL diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index d80603c5ebc..599642b3a26 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2010, 2015, MariaDB + Copyright (c) 2010, 2019, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include "mariadb.h" #include "sql_base.h" +#include "sql_const.h" #include "sql_select.h" #include "filesort.h" #include "opt_subselect.h" @@ -522,7 +523,7 @@ bool is_materialization_applicable(THD *thd, Item_in_subselect *in_subs, !child_select->is_part_of_union() && // 1 parent_unit->first_select()->leaf_tables.elements && // 2 child_select->outer_select() && - child_select->outer_select()->leaf_tables.elements && // 2A + child_select->outer_select()->table_list.first && // 2A subquery_types_allow_materialization(thd, in_subs) && (in_subs->is_top_level_item() || //3 optimizer_flag(thd, @@ -1418,8 +1419,8 @@ void get_delayed_table_estimates(TABLE *table, *startup_cost= item->jtbm_read_time; /* Calculate cost of scanning the temptable */ - double data_size= item->jtbm_record_count * - hash_sj_engine->tmp_table->s->reclength; + double data_size= COST_MULT(item->jtbm_record_count, + hash_sj_engine->tmp_table->s->reclength); /* Do like in handler::read_time */ *scan_time= data_size/IO_SIZE + 2; } @@ -2494,7 +2495,8 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) int tableno; double rows= 1.0; while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) - rows *= join->map2table[tableno]->table->quick_condition_rows; + rows= COST_MULT(rows, + join->map2table[tableno]->table->quick_condition_rows); sjm->rows= MY_MIN(sjm->rows, rows); } memcpy((uchar*) sjm->positions, @@ -2607,7 +2609,7 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) double get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size) { - if (row_count * row_size > thd->variables.max_heap_table_size) + if (row_count > thd->variables.max_heap_table_size / (double) row_size) return (double) DISK_TEMPTABLE_LOOKUP_COST; else return (double) HEAP_TEMPTABLE_LOOKUP_COST; @@ -3014,8 +3016,11 @@ bool Sj_materialization_picker::check_qep(JOIN *join, } double mat_read_time= prefix_cost.total_cost(); - mat_read_time += mat_info->materialization_cost.total_cost() + - prefix_rec_count * mat_info->lookup_cost.total_cost(); + mat_read_time= + COST_ADD(mat_read_time, + COST_ADD(mat_info->materialization_cost.total_cost(), + COST_MULT(prefix_rec_count, + mat_info->lookup_cost.total_cost()))); /* NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION @@ -3055,9 +3060,12 @@ bool Sj_materialization_picker::check_qep(JOIN *join, } /* Add materialization cost */ - prefix_cost += mat_info->materialization_cost.total_cost() + - prefix_rec_count * mat_info->scan_cost.total_cost(); - prefix_rec_count *= mat_info->rows; + prefix_cost= + COST_ADD(prefix_cost, + COST_ADD(mat_info->materialization_cost.total_cost(), + COST_MULT(prefix_rec_count, + mat_info->scan_cost.total_cost()))); + prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows); uint i; table_map rem_tables= remaining_tables; @@ -3072,8 +3080,8 @@ bool Sj_materialization_picker::check_qep(JOIN *join, { best_access_path(join, join->positions[i].table, rem_tables, i, disable_jbuf, prefix_rec_count, &curpos, &dummy); - prefix_rec_count *= curpos.records_read; - prefix_cost += curpos.read_time; + prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_read); + prefix_cost= COST_ADD(prefix_cost, curpos.read_time); } *strategy= SJ_OPT_MATERIALIZE_SCAN; @@ -3380,16 +3388,18 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, for (uint j= first_dupsweedout_table; j <= idx; j++) { POSITION *p= join->positions + j; - current_fanout *= p->records_read; - dups_cost += p->read_time + current_fanout / TIME_FOR_COMPARE; + current_fanout= COST_MULT(current_fanout, p->records_read); + dups_cost= COST_ADD(dups_cost, + COST_ADD(p->read_time, + current_fanout / TIME_FOR_COMPARE)); if (p->table->emb_sj_nest) { - sj_inner_fanout *= p->records_read; + sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_read); dups_removed_fanout |= p->table->table->map; } else { - sj_outer_fanout *= p->records_read; + sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_read); temptable_rec_size += p->table->table->file->ref_length; } } @@ -3408,12 +3418,13 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, sj_outer_fanout, temptable_rec_size); - double write_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout * one_write_cost; - double full_lookup_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout* sj_inner_fanout * - one_lookup_cost; - dups_cost += write_cost + full_lookup_cost; + double write_cost= COST_MULT(join->positions[first_tab].prefix_record_count, + sj_outer_fanout * one_write_cost); + double full_lookup_cost= + COST_MULT(join->positions[first_tab].prefix_record_count, + COST_MULT(sj_outer_fanout, + sj_inner_fanout * one_lookup_cost)); + dups_cost= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost)); *read_time= dups_cost; *record_count= prefix_rec_count * sj_outer_fanout; @@ -3560,8 +3571,8 @@ static void recalculate_prefix_record_count(JOIN *join, uint start, uint end) if (j == join->const_tables) prefix_count= 1.0; else - prefix_count= join->best_positions[j-1].prefix_record_count * - join->best_positions[j-1].records_read; + prefix_count= COST_MULT(join->best_positions[j-1].prefix_record_count, + join->best_positions[j-1].records_read); join->best_positions[j].prefix_record_count= prefix_count; } @@ -6387,14 +6398,16 @@ bool JOIN::choose_subquery_plan(table_map join_tables) The cost of executing the subquery and storing its result in an indexed temporary table. */ - double materialization_cost= inner_read_time_1 + - write_cost * inner_record_count_1; + double materialization_cost= COST_ADD(inner_read_time_1, + COST_MULT(write_cost, + inner_record_count_1)); - materialize_strategy_cost= materialization_cost + - outer_lookup_keys * lookup_cost; + materialize_strategy_cost= COST_ADD(materialization_cost, + COST_MULT(outer_lookup_keys, + lookup_cost)); /* C.2 Compute the cost of the IN=>EXISTS strategy. */ - in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2; + in_exists_strategy_cost= COST_MULT(outer_lookup_keys, inner_read_time_2); /* C.3 Compare the costs and choose the cheaper strategy. */ if (materialize_strategy_cost >= in_exists_strategy_cost) diff --git a/sql/records.cc b/sql/records.cc index 3decb8f1d24..c2db29c4912 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -194,7 +194,6 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, bzero((char*) info,sizeof(*info)); info->thd=thd; info->table=table; - info->forms= &info->table; /* Only one table */ info->addon_field= addon_field; if ((table->s->tmp_table == INTERNAL_TMP_TABLE || @@ -583,33 +582,34 @@ static int rr_unpack_from_buffer(READ_RECORD *info) } /* cacheing of records from a database */ +static const uint STRUCT_LENGTH= 3 + MAX_REFLENGTH; + static int init_rr_cache(THD *thd, READ_RECORD *info) { - uint rec_cache_size; + uint rec_cache_size, cache_records; DBUG_ENTER("init_rr_cache"); - info->struct_length= 3+MAX_REFLENGTH; info->reclength= ALIGN_SIZE(info->table->s->reclength+1); - if (info->reclength < info->struct_length) - info->reclength= ALIGN_SIZE(info->struct_length); + if (info->reclength < STRUCT_LENGTH) + info->reclength= ALIGN_SIZE(STRUCT_LENGTH); info->error_offset= info->table->s->reclength; - info->cache_records= (thd->variables.read_rnd_buff_size / - (info->reclength+info->struct_length)); - rec_cache_size= info->cache_records*info->reclength; - info->rec_cache_size= info->cache_records*info->ref_length; + cache_records= thd->variables.read_rnd_buff_size / + (info->reclength + STRUCT_LENGTH); + rec_cache_size= cache_records * info->reclength; + info->rec_cache_size= cache_records * info->ref_length; // We have to allocate one more byte to use uint3korr (see comments for it) - if (info->cache_records <= 2 || - !(info->cache=(uchar*) my_malloc_lock(rec_cache_size+info->cache_records* - info->struct_length+1, - MYF(MY_THREAD_SPECIFIC)))) + if (cache_records <= 2 || + !(info->cache= (uchar*) my_malloc_lock(rec_cache_size + cache_records * + STRUCT_LENGTH + 1, + MYF(MY_THREAD_SPECIFIC)))) DBUG_RETURN(1); #ifdef HAVE_valgrind // Avoid warnings in qsort - bzero(info->cache,rec_cache_size+info->cache_records* info->struct_length+1); + bzero(info->cache, rec_cache_size + cache_records * STRUCT_LENGTH + 1); #endif - DBUG_PRINT("info",("Allocated buffert for %d records",info->cache_records)); + DBUG_PRINT("info", ("Allocated buffer for %d records", cache_records)); info->read_positions=info->cache+rec_cache_size; info->cache_pos=info->cache_end=info->cache; DBUG_RETURN(0); @@ -664,8 +664,7 @@ static int rr_from_cache(READ_RECORD *info) int3store(ref_position,(long) i); ref_position+=3; } - my_qsort(info->read_positions, length, info->struct_length, - (qsort_cmp) rr_cmp); + my_qsort(info->read_positions, length, STRUCT_LENGTH, (qsort_cmp) rr_cmp); position=info->read_positions; for (i=0 ; i < length ; i++) diff --git a/sql/records.h b/sql/records.h index 4f8e14da9ac..e97f6b273cc 100644 --- a/sql/records.h +++ b/sql/records.h @@ -52,15 +52,11 @@ struct READ_RECORD typedef int (*Setup_func)(struct st_join_table*); TABLE *table; /* Head-form */ - //handler *file; - TABLE **forms; /* head and ref forms */ Unlock_row_func unlock_row; Read_func read_record_func; THD *thd; SQL_SELECT *select; - uint cache_records; - uint ref_length,struct_length,reclength,rec_cache_size,error_offset; - uint index; + uint ref_length, reclength, rec_cache_size, error_offset; uchar *ref_pos; /* pointer to form->refpos */ uchar *record; uchar *rec_buf; /* to read field values after filesort */ diff --git a/sql/sql_const.h b/sql/sql_const.h index 1f9353cdef3..7aa4249f5ad 100644 --- a/sql/sql_const.h +++ b/sql/sql_const.h @@ -247,6 +247,14 @@ #define DISK_TEMPTABLE_LOOKUP_COST 1.0 #define SORT_INDEX_CMP_COST 0.02 + +#define COST_MAX (DBL_MAX * (1.0 - DBL_EPSILON)) + +#define COST_ADD(c,d) (COST_MAX - (d) > (c) ? (c) + (d) : COST_MAX) + +#define COST_MULT(c,f) (COST_MAX / (f) > (c) ? (c) * (f) : COST_MAX) + + #define MY_CHARSET_BIN_MB_MAXLEN 1 /** Don't pack string keys shorter than this (if PACK_KEYS=1 isn't used). */ diff --git a/sql/sql_lex.h b/sql/sql_lex.h index b205207f64d..0e1d17d13f0 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -2143,6 +2143,38 @@ public: } /** + Checks either a trans/non trans temporary table is being accessed while + executing a statement. + + @return + @retval TRUE if a temporary table is being accessed + @retval FALSE otherwise + */ + inline bool stmt_accessed_temp_table() + { + DBUG_ENTER("THD::stmt_accessed_temp_table"); + DBUG_RETURN(stmt_accessed_non_trans_temp_table() || + stmt_accessed_trans_temp_table()); + } + + /** + Checks if a temporary transactional table is being accessed while executing + a statement. + + @return + @retval TRUE if a temporary transactional table is being accessed + @retval FALSE otherwise + */ + inline bool stmt_accessed_trans_temp_table() + { + DBUG_ENTER("THD::stmt_accessed_trans_temp_table"); + + DBUG_RETURN((stmt_accessed_table_flag & + ((1U << STMT_READS_TEMP_TRANS_TABLE) | + (1U << STMT_WRITES_TEMP_TRANS_TABLE))) != 0); + } + + /** Checks if a temporary non-transactional table is about to be accessed while executing a statement. diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 0305f70edad..deb8c383bde 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -4878,6 +4878,13 @@ end_with_restore_list: */ /* Skip first table, which is the table we are inserting in */ TABLE_LIST *second_table= first_table->next_local; + /* + This is a hack: this leaves select_lex->table_list in an inconsistent + state as 'elements' does not contain number of elements in the list. + Moreover, if second_table == NULL then 'next' becomes invalid. + TODO: fix it by removing the front element (restoring of it should + be done properly as well) + */ select_lex->table_list.first= second_table; select_lex->context.table_list= select_lex->context.first_name_resolution_table= second_table; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 598f46594e5..46be6dfef27 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1,5 +1,5 @@ -/* Copyright (c) 2000, 2016 Oracle and/or its affiliates. - Copyright (c) 2009, 2019 MariaDB Corporation +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -7451,7 +7451,7 @@ best_access_path(JOIN *join, else tmp= table->file->read_time(key, 1, (ha_rows) MY_MIN(tmp,s->worst_seeks)); - tmp*= record_count; + tmp= COST_MULT(tmp, record_count); } } else @@ -7632,7 +7632,7 @@ best_access_path(JOIN *join, else tmp= table->file->read_time(key, 1, (ha_rows) MY_MIN(tmp,s->worst_seeks)); - tmp*= record_count; + tmp= COST_MULT(tmp, record_count); } else { @@ -7642,7 +7642,7 @@ best_access_path(JOIN *join, } } - tmp += s->startup_cost; + tmp= COST_ADD(tmp, s->startup_cost); loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); } /* not ft_key */ @@ -7665,7 +7665,7 @@ best_access_path(JOIN *join, if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE) { trace_access_idx.add("chosen", true); - best_time= tmp + records/(double) TIME_FOR_COMPARE; + best_time= COST_ADD(tmp, records/(double) TIME_FOR_COMPARE); best= tmp; best_records= records; best_key= start_key; @@ -7707,14 +7707,18 @@ best_access_path(JOIN *join, use_cond_selectivity); tmp= s->quick ? s->quick->read_time : s->scan_time(); - tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE; + double cmp_time= (s->records - rnd_records)/(double) TIME_FOR_COMPARE; + tmp= COST_ADD(tmp, cmp_time); /* We read the table as many times as join buffer becomes full. */ - tmp*= (1.0 + floor((double) cache_record_length(join,idx) * - record_count / - (double) thd->variables.join_buff_size)); - best_time= tmp + - (record_count*join_sel) / TIME_FOR_COMPARE * rnd_records; + + double refills= (1.0 + floor((double) cache_record_length(join,idx) * + record_count / + (double) thd->variables.join_buff_size)); + tmp= COST_MULT(tmp, refills); + best_time= COST_ADD(tmp, + COST_MULT((record_count*join_sel) / TIME_FOR_COMPARE, + rnd_records)); best= tmp; records= rnd_records; best_key= hj_start_key; @@ -7746,7 +7750,8 @@ best_access_path(JOIN *join, 'range' access using index IDX, and the best way to perform 'ref' access is to use the same index IDX, with the same or more key parts. (note: it is not clear how this rule is/should be extended to - index_merge quick selects) + index_merge quick selects). Also if we have a hash join we prefer that + over a table scan (3) See above note about InnoDB. (4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access path, but there is no quick select) @@ -7763,6 +7768,7 @@ best_access_path(JOIN *join, */ Json_writer_object trace_access_scan(thd); if ((records >= s->found_records || best > s->read_time) && // (1) + !(best_key && best_key->key == MAX_KEY) && // (2) !(s->quick && best_key && s->quick->index == best_key->key && // (2) best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2) !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) @@ -7795,9 +7801,9 @@ best_access_path(JOIN *join, access (see first else-branch below), but we don't take it into account here for range/index_merge access. Find out why this is so. */ - tmp= record_count * - (s->quick->read_time + - (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE); + double cmp_time= (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE; + tmp= COST_MULT(record_count, + COST_ADD(s->quick->read_time, cmp_time)); if ( s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) { @@ -7836,16 +7842,15 @@ best_access_path(JOIN *join, - read the whole table record - skip rows which does not satisfy join condition */ - tmp= record_count * - (tmp + - (s->records - rnd_records)/(double) TIME_FOR_COMPARE); + double cmp_time= (s->records - rnd_records)/(double) TIME_FOR_COMPARE; + tmp= COST_MULT(record_count, COST_ADD(tmp,cmp_time)); } else { - /* We read the table as many times as join buffer becomes full. */ - tmp*= (1.0 + floor((double) cache_record_length(join,idx) * - record_count / - (double) thd->variables.join_buff_size)); + double refills= (1.0 + floor((double) cache_record_length(join,idx) * + (record_count / + (double) thd->variables.join_buff_size))); + tmp= COST_MULT(tmp, refills); /* We don't make full cartesian product between rows in the scanned table and existing records because we skip all rows from the @@ -7853,7 +7858,8 @@ best_access_path(JOIN *join, we read the table (see flush_cached_records for details). Here we take into account cost to read and skip these records. */ - tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE; + double cmp_time= (s->records - rnd_records)/(double) TIME_FOR_COMPARE; + tmp= COST_ADD(tmp, cmp_time); } } @@ -7869,19 +7875,17 @@ best_access_path(JOIN *join, tmp give us total cost of using TABLE SCAN */ - double best_filter_cmp_gain= 0; - if (best_filter) - { - best_filter_cmp_gain= best_filter->get_cmp_gain(record_count * records); - } + const double best_filter_cmp_gain= best_filter + ? best_filter->get_cmp_gain(record_count * records) + : 0; trace_access_scan.add("resulting_rows", rnd_records); trace_access_scan.add("cost", tmp); if (best == DBL_MAX || - (tmp + record_count/(double) TIME_FOR_COMPARE*rnd_records < + COST_ADD(tmp, record_count/(double) TIME_FOR_COMPARE*rnd_records) < (best_key->is_for_hash_join() ? best_time : - best + record_count/(double) TIME_FOR_COMPARE*records - - best_filter_cmp_gain))) + COST_ADD(best - best_filter_cmp_gain, + record_count/(double) TIME_FOR_COMPARE*records))) { /* If the table has a range (s->quick is set) make_join_select() @@ -8420,16 +8424,13 @@ optimize_straight_join(JOIN *join, table_map join_tables) position, &loose_scan_pos); /* compute the cost of the new plan extended with 's' */ - record_count*= position->records_read; - double filter_cmp_gain= 0; - if (position->range_rowid_filter_info) - { - filter_cmp_gain= - position->range_rowid_filter_info->get_cmp_gain(record_count); - } - read_time+= position->read_time + - record_count / (double) TIME_FOR_COMPARE - - filter_cmp_gain; + record_count= COST_MULT(record_count, position->records_read); + const double filter_cmp_gain= position->range_rowid_filter_info + ? position->range_rowid_filter_info->get_cmp_gain(record_count) + : 0; + read_time+= COST_ADD(read_time - filter_cmp_gain, + COST_ADD(position->read_time, + record_count / (double) TIME_FOR_COMPARE)); advance_sj_state(join, join_tables, idx, &record_count, &read_time, &loose_scan_pos); @@ -8619,9 +8620,10 @@ greedy_search(JOIN *join, swap_variables(JOIN_TAB*, join->best_ref[idx], join->best_ref[best_idx]); /* compute the cost of the new plan extended with 'best_table' */ - record_count*= join->positions[idx].records_read; - read_time+= join->positions[idx].read_time + - record_count / (double) TIME_FOR_COMPARE; + record_count= COST_MULT(record_count, join->positions[idx].records_read); + read_time= COST_ADD(read_time, + COST_ADD(join->positions[idx].read_time, + record_count / (double) TIME_FOR_COMPARE)); remaining_tables&= ~(best_table->table->map); --size_remain; @@ -8728,11 +8730,13 @@ void JOIN::get_partial_cost_and_fanout(int end_tab_idx, } if (tab->records_read && (cur_table_map & filter_map)) { - record_count *= tab->records_read; - read_time += tab->read_time + record_count / (double) TIME_FOR_COMPARE; + record_count= COST_MULT(record_count, tab->records_read); + read_time= COST_ADD(read_time, + COST_ADD(tab->read_time, + record_count / (double) TIME_FOR_COMPARE)); if (tab->emb_sj_nest) - sj_inner_fanout *= tab->records_read; - } + sj_inner_fanout= COST_MULT(sj_inner_fanout, tab->records_read); + } if (i == last_sj_table) { @@ -8770,8 +8774,8 @@ void JOIN::get_prefix_cost_and_fanout(uint n_tables, { if (best_positions[i].records_read) { - record_count *= best_positions[i].records_read; - read_time += best_positions[i].read_time; + record_count= COST_MULT(record_count, best_positions[i].records_read); + read_time= COST_ADD(read_time, best_positions[i].read_time); } } *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE; @@ -9351,20 +9355,16 @@ best_extension_by_limited_search(JOIN *join, best_access_path(join, s, remaining_tables, idx, disable_jbuf, record_count, position, &loose_scan_pos); - /* Compute the cost of extending the plan with 's', avoid overflow */ - if (position->records_read < DBL_MAX / record_count) - current_record_count= record_count * position->records_read; - else - current_record_count= DBL_MAX; - double filter_cmp_gain= 0; - if (position->range_rowid_filter_info) - { - filter_cmp_gain= - position->range_rowid_filter_info->get_cmp_gain(current_record_count); - } - current_read_time=read_time + position->read_time + - current_record_count / (double) TIME_FOR_COMPARE - - filter_cmp_gain; + /* Compute the cost of extending the plan with 's' */ + current_record_count= COST_MULT(record_count, position->records_read); + const double filter_cmp_gain= position->range_rowid_filter_info + ? position->range_rowid_filter_info->get_cmp_gain(current_record_count) + : 0; + current_read_time=COST_ADD(read_time, + COST_ADD(position->read_time - + filter_cmp_gain, + current_record_count / + (double) TIME_FOR_COMPARE)); advance_sj_state(join, remaining_tables, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); @@ -9449,12 +9449,12 @@ best_extension_by_limited_search(JOIN *join, if (join->sort_by_table && join->sort_by_table != join->positions[join->const_tables].table->table) - /* - We may have to make a temp table, note that this is only a - heuristic since we cannot know for sure at this point. + /* + We may have to make a temp table, note that this is only a + heuristic since we cannot know for sure at this point. Hence it may be wrong. */ - current_read_time+= current_record_count; + current_read_time= COST_ADD(current_read_time, current_record_count); if (current_read_time < join->best_read) { memcpy((uchar*) join->best_positions, (uchar*) join->positions, @@ -9772,8 +9772,8 @@ prev_record_reads(POSITION *positions, uint idx, table_map found_ref) #max_nested_outer_joins=64-1) will not make it any more precise. */ if (pos->records_read) - found*= pos->records_read; - } + found= COST_MULT(found, pos->records_read); + } } return found; } @@ -11353,8 +11353,16 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) /* We plan to scan all rows. Check again if we should use an index. - We could have used an column from a previous table in - the index if we are using limit and this is the first table + + There are two cases: + 1) There could be an index usage the refers to a previous + table that we didn't consider before, but could be consider + now as a "last resort". For example + SELECT * from t1,t2 where t1.a between t2.a and t2.b; + 2) If the current table is the first non const table + and there is a limit it still possibly beneficial + to use the index even if the index range is big as + we can stop when we've found limit rows. (1) - Don't switch the used index if we are using semi-join LooseScan on this table. Using different index will not @@ -16049,8 +16057,20 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top, table->table->maybe_null= FALSE; table->outer_join= 0; if (!(straight_join || table->straight)) - table->dep_tables= table->embedding && !table->embedding->sj_subq_pred ? - table->embedding->dep_tables : 0; + { + table->dep_tables= 0; + TABLE_LIST *embedding= table->embedding; + while (embedding) + { + if (embedding->nested_join->join_list.head()->outer_join) + { + if (!embedding->sj_subq_pred) + table->dep_tables= embedding->dep_tables; + break; + } + embedding= embedding->embedding; + } + } if (table->on_expr) { /* Add ON expression to the WHERE or upper-level ON condition. */ @@ -16575,11 +16595,12 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, pos= loose_scan_pos; reopt_remaining_tables &= ~rs->table->map; - rec_count *= pos.records_read; - cost += pos.read_time; + rec_count= COST_MULT(rec_count, pos.records_read); + cost= COST_ADD(cost, pos.read_time); + if (!rs->emb_sj_nest) - *outer_rec_count *= pos.records_read; + *outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read); } join->cur_sj_inner_tables= save_cur_sj_inner_tables; @@ -21039,7 +21060,6 @@ join_read_first(JOIN_TAB *tab) tab->table->status=0; tab->read_record.read_record_func= join_read_next; tab->read_record.table=table; - tab->read_record.index=tab->index; tab->read_record.record=table->record[0]; if (!table->file->inited) error= table->file->ha_index_init(tab->index, tab->sorted); @@ -21080,7 +21100,6 @@ join_read_last(JOIN_TAB *tab) tab->table->status=0; tab->read_record.read_record_func= join_read_prev; tab->read_record.table=table; - tab->read_record.index=tab->index; tab->read_record.record=table->record[0]; if (!table->file->inited) error= table->file->ha_index_init(tab->index, 1); diff --git a/sql/sql_window.h b/sql/sql_window.h index 21f2c8af108..373b367b211 100644 --- a/sql/sql_window.h +++ b/sql/sql_window.h @@ -18,7 +18,6 @@ #define SQL_WINDOW_INCLUDED #include "filesort.h" -#include "records.h" class Item_window_func; diff --git a/sql/temporary_tables.cc b/sql/temporary_tables.cc index 89f40b55f86..eca5dd2a7d5 100644 --- a/sql/temporary_tables.cc +++ b/sql/temporary_tables.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2016 MariaDB Corporation + Copyright (c) 2016, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -377,6 +377,19 @@ bool THD::open_temporary_table(TABLE_LIST *tl) if (!table && (share= find_tmp_table_share(tl))) { table= open_temporary_table(share, tl->get_table_name()); + /* + Temporary tables are not safe for parallel replication. They were + designed to be visible to one thread only, so have no table locking. + Thus there is no protection against two conflicting transactions + committing in parallel and things like that. + + So for now, anything that uses temporary tables will be serialised + with anything before it, when using parallel replication. + */ + if (table && rgi_slave && + rgi_slave->is_parallel_exec && + wait_for_prior_commit()) + DBUG_RETURN(true); } if (!table) |