summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorIgor Babaev <igor@askmonty.org>2010-12-10 23:23:34 -0800
committerIgor Babaev <igor@askmonty.org>2010-12-10 23:23:34 -0800
commit7f52af655a04c4fecfe32bede79bd65cac0e043d (patch)
tree071fe4a63bdce21ee8a0a6cfe5e7aa4b3b26f354 /sql
parenteb70e64ceaa7aec6a35580643a3e5fc01b6a0630 (diff)
parent4e05898f539f299bbb12c49834502c1e471f2fc9 (diff)
downloadmariadb-git-7f52af655a04c4fecfe32bede79bd65cac0e043d.tar.gz
Merge.
Diffstat (limited to 'sql')
-rwxr-xr-xsql/CMakeLists.txt3
-rw-r--r--sql/Makefile.am1
-rw-r--r--[-rwxr-xr-x]sql/event_scheduler.cc0
-rw-r--r--sql/field.h11
-rw-r--r--sql/field_conv.cc13
-rw-r--r--sql/handler.h2
-rw-r--r--sql/multi_range_read.cc2
-rw-r--r--sql/mysql_priv.h21
-rw-r--r--sql/mysqld.cc33
-rw-r--r--sql/opt_index_cond_pushdown.cc7
-rw-r--r--sql/set_var.cc5
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_join_cache.cc3311
-rw-r--r--sql/sql_join_cache.h1376
-rw-r--r--sql/sql_select.cc662
-rw-r--r--sql/sql_select.h907
16 files changed, 4165 insertions, 2190 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 39cd29da16c..4d2088e52f0 100755
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -59,7 +59,8 @@ SET (SQL_SOURCE
sql_cache.cc sql_class.cc sql_client.cc sql_crypt.cc sql_crypt.h
sql_cursor.cc sql_db.cc sql_delete.cc sql_derived.cc sql_do.cc
sql_error.cc sql_handler.cc sql_help.cc sql_insert.cc
- sql_join_cache.cc sql_lex.cc sql_list.cc sql_load.cc sql_manager.cc
+ sql_join_cache.h sql_join_cache.cc
+ sql_lex.cc sql_list.cc sql_load.cc sql_manager.cc
sql_map.cc sql_parse.cc sql_partition.cc sql_plugin.cc
sql_prepare.cc sql_rename.cc
debug_sync.cc debug_sync.h
diff --git a/sql/Makefile.am b/sql/Makefile.am
index 3c19d1a26fb..07499050146 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -81,6 +81,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
sql_partition.h partition_info.h partition_element.h \
contributors.h sql_servers.h \
multi_range_read.h \
+ sql_join_cache.h \
create_options.h \
sql_expression_cache.h
diff --git a/sql/event_scheduler.cc b/sql/event_scheduler.cc
index 4d6636eedb2..4d6636eedb2 100755..100644
--- a/sql/event_scheduler.cc
+++ b/sql/event_scheduler.cc
diff --git a/sql/field.h b/sql/field.h
index 0161f69ac4d..fc43c044d87 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -590,6 +590,10 @@ public:
}
/* Hash value */
virtual void hash(ulong *nr, ulong *nr2);
+
+ /* Check whether the field can be used as a join attribute in hash join */
+ virtual bool hash_join_is_possible() { return TRUE; }
+
friend bool reopen_table(THD *,struct st_table *,bool);
friend int cre_myisam(char * name, register TABLE *form, uint options,
ulonglong auto_increment_value);
@@ -765,6 +769,12 @@ public:
my_decimal *val_decimal(my_decimal *);
virtual bool str_needs_quotes() { return TRUE; }
uint is_equal(Create_field *new_field);
+
+ bool hash_join_is_possible()
+ {
+ /* TODO: support hash joins for non-binary collations */
+ return (flags & BINARY_FLAG);
+ }
};
@@ -1909,6 +1919,7 @@ public:
uint size_of() const { return sizeof(*this); }
int reset(void) { return !maybe_null() || Field_blob::reset(); }
geometry_type get_geometry_type() { return geom_type; };
+ bool hash_join_is_possible() { return FALSE; }
};
#endif /*HAVE_SPATIAL*/
diff --git a/sql/field_conv.cc b/sql/field_conv.cc
index 67ef4f95368..74ffee3f33e 100644
--- a/sql/field_conv.cc
+++ b/sql/field_conv.cc
@@ -449,7 +449,8 @@ static void do_varstring1(Copy_field *copy)
if (length > copy->to_length- 1)
{
length=copy->to_length - 1;
- if (copy->from_field->table->in_use->count_cuted_fields)
+ if (copy->from_field->table->in_use->count_cuted_fields &&
+ copy->to_field)
copy->to_field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
WARN_DATA_TRUNCATED, 1);
}
@@ -485,7 +486,8 @@ static void do_varstring2(Copy_field *copy)
if (length > copy->to_length- HA_KEY_BLOB_LENGTH)
{
length=copy->to_length-HA_KEY_BLOB_LENGTH;
- if (copy->from_field->table->in_use->count_cuted_fields)
+ if (copy->from_field->table->in_use->count_cuted_fields &&
+ copy->to_field)
copy->to_field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
WARN_DATA_TRUNCATED, 1);
}
@@ -549,9 +551,9 @@ void Copy_field::set(uchar *to,Field *from)
do_copy= do_field_to_null_str;
}
else
- {
+ {
to_null_ptr= 0; // For easy debugging
- do_copy= do_field_eq;
+ do_copy= do_field_eq;
}
}
@@ -710,6 +712,9 @@ Copy_field::get_copy_func(Field *to,Field *from)
do_varstring1_mb) :
(from->charset()->mbmaxlen == 1 ? do_varstring2 :
do_varstring2_mb));
+ else
+ return (((Field_varstring*) from)->length_bytes == 1 ?
+ do_varstring1 : do_varstring2);
}
else if (to_length < from_length)
return (from->charset()->mbmaxlen == 1 ?
diff --git a/sql/handler.h b/sql/handler.h
index 8be3db6f36b..4346ccf97cc 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1216,6 +1216,8 @@ typedef struct st_range_seq_if
bool (*skip_index_tuple) (range_seq_t seq, char *range_info);
} RANGE_SEQ_IF;
+typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, char *range_info);
+
class COST_VECT
{
public:
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index ef6d8e0cc68..70fcbd605e6 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -223,7 +223,7 @@ handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
int handler::multi_range_read_next(char **range_info)
{
- int UNINIT_VAR(result);
+ int result= HA_ERR_END_OF_FILE;
int range_res;
DBUG_ENTER("handler::multi_range_read_next");
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 3baebcde9df..666882d6890 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -570,12 +570,17 @@ protected:
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11)
+#define OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE (1<<12)
+#define OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE (1<<13)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL (1<<14)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_HASHED (1<<15)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_BKA (1<<16)
#ifdef DBUG_OFF
-# define OPTIMIZER_SWITCH_LAST (1<<12)
+# define OPTIMIZER_SWITCH_LAST (1<<17)
#else
-# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
-# define OPTIMIZER_SWITCH_LAST (1<<13)
+# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<17)
+# define OPTIMIZER_SWITCH_LAST (1<<18)
#endif
#ifdef DBUG_OFF
@@ -591,7 +596,10 @@ protected:
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
- OPTIMIZER_SWITCH_SUBQUERY_CACHE)
+ OPTIMIZER_SWITCH_SUBQUERY_CACHE | \
+ OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL | \
+ OPTIMIZER_SWITCH_JOIN_CACHE_HASHED | \
+ OPTIMIZER_SWITCH_JOIN_CACHE_BKA)
#else
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
@@ -605,7 +613,10 @@ protected:
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
- OPTIMIZER_SWITCH_SUBQUERY_CACHE)
+ OPTIMIZER_SWITCH_SUBQUERY_CACHE | \
+ OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL | \
+ OPTIMIZER_SWITCH_JOIN_CACHE_HASHED | \
+ OPTIMIZER_SWITCH_JOIN_CACHE_BKA)
#endif
/*
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 9ef76849647..b9faf09f4f4 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -345,6 +345,11 @@ static const char *optimizer_switch_names[]=
"partial_match_rowid_merge",
"partial_match_table_scan",
"subquery_cache",
+ "outer_join_with_cache",
+ "semijoin_with_cache",
+ "join_cache_incremental",
+ "join_cache_hashed",
+ "join_cache_bka",
#ifndef DBUG_OFF
"table_elimination",
#endif
@@ -366,6 +371,11 @@ static const unsigned int optimizer_switch_names_len[]=
sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1,
sizeof("subquery_cache") - 1,
+ sizeof("outer_join_with_cache") - 1,
+ sizeof("semijoin_with_cache") - 1,
+ sizeof("join_cache_incremental") - 1,
+ sizeof("join_cache_hashed") - 1,
+ sizeof("join_cache_bka") - 1,
#ifndef DBUG_OFF
sizeof("table_elimination") - 1,
#endif
@@ -464,7 +474,10 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"semijoin=on,"
"partial_match_rowid_merge=on,"
"partial_match_table_scan=on,"
- "subquery_cache=on"
+ "subquery_cache=on,"
+ "join_cache_incremental=on,"
+ "join_cache_hashed=on,"
+ "join_cache_bka=on"
#ifndef DBUG_OFF
",table_elimination=on";
#else
@@ -5939,7 +5952,8 @@ enum options_mysqld
OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE,
OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN, OPT_FT_BOOLEAN_SYNTAX,
OPT_FT_MAX_WORD_LEN, OPT_FT_QUERY_EXPANSION_LIMIT, OPT_FT_STOPWORD_FILE,
- OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE, OPT_JOIN_CACHE_LEVEL,
+ OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
+ OPT_JOIN_BUFF_SPACE_LIMIT, OPT_JOIN_CACHE_LEVEL,
OPT_KEY_BUFFER_SIZE, OPT_KEY_CACHE_BLOCK_SIZE,
OPT_KEY_CACHE_DIVISION_LIMIT, OPT_KEY_CACHE_AGE_THRESHOLD,
OPT_KEY_CACHE_PARTITIONS,
@@ -7087,11 +7101,17 @@ thread is in the relay logs.",
&max_system_variables.net_interactive_timeout, 0,
GET_ULONG, REQUIRED_ARG, NET_WAIT_TIMEOUT, 1, LONG_TIMEOUT, 0, 1, 0},
{"join_buffer_size", OPT_JOIN_BUFF_SIZE,
- "The size of the buffer that is used for full joins.",
- &global_system_variables.join_buff_size,
- &max_system_variables.join_buff_size, 0, GET_ULONG,
+ "The size of the buffer that is used for joins.",
+ &global_system_variables.join_buff_size,
+ &max_system_variables.join_buff_size, 0, GET_ULONG,
REQUIRED_ARG, 128*1024L, 128+MALLOC_OVERHEAD, (longlong) ULONG_MAX,
MALLOC_OVERHEAD, 128, 0},
+ {"join_buffer_space_limit", OPT_JOIN_BUFF_SPACE_LIMIT,
+ "The limit of the space for all join buffers used by a query.",
+ &global_system_variables.join_buff_space_limit,
+ &max_system_variables.join_buff_space_limit, 0, GET_ULL,
+ REQUIRED_ARG, 8*128*1024L, 2048+MALLOC_OVERHEAD, (longlong) ULONGLONG_MAX,
+ MALLOC_OVERHEAD, 2048, 0},
{"join_cache_level", OPT_JOIN_CACHE_LEVEL,
"Controls what join operations can be executed with join buffers. Odd numbers are used for plain join buffers while even numbers are used for linked buffers",
&global_system_variables.join_cache_level,
@@ -7383,7 +7403,8 @@ thread is in the relay logs.",
"index_merge_union, index_merge_sort_union, index_merge_intersection, "
"index_condition_pushdown, firstmatch, loosescan, materialization, "
"semijoin, partial_match_rowid_merge, partial_match_table_scan, "
- "subquery_cache"
+ "subquery_cache, outer_join_with_cache, semijoin_with_cache, "
+ "join_cache_incremental, join_cache_hashed, join_cache_bka"
#ifndef DBUG_OFF
", table_elimination"
#endif
diff --git a/sql/opt_index_cond_pushdown.cc b/sql/opt_index_cond_pushdown.cc
index f6437e04ec5..71eadfb45cf 100644
--- a/sql/opt_index_cond_pushdown.cc
+++ b/sql/opt_index_cond_pushdown.cc
@@ -272,12 +272,14 @@ Item *make_cond_remainder(Item *cond, bool exclude_index)
in tab->select_cond
keyno Index for which extract and push the condition
other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
+ factor_out TRUE <=> Factor out the extracted condition
DESCRIPTION
Try to extract and push the index condition down to table handler
*/
-void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok)
+void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok,
+ bool factor_out)
{
DBUG_ENTER("push_index_cond");
Item *idx_cond;
@@ -350,7 +352,8 @@ void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok)
if (idx_remainder_cond != idx_cond)
tab->ref.disable_cache= TRUE;
- Item *row_cond= make_cond_remainder(tab->select_cond, TRUE);
+ Item *row_cond= factor_out ? make_cond_remainder(tab->select_cond, TRUE) :
+ tab->pre_idx_push_select_cond;
DBUG_EXECUTE("where",
print_where(row_cond, "remainder cond", QT_ORDINARY););
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 9fb2a74bfe1..ee0e870cd93 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -320,6 +320,9 @@ static sys_var_thd_ulong sys_interactive_timeout(&vars, "interactive_timeout",
&SV::net_interactive_timeout);
static sys_var_thd_ulong sys_join_buffer_size(&vars, "join_buffer_size",
&SV::join_buff_size);
+static sys_var_thd_ulonglong sys_join_buffer_space_limit(&vars,
+ "join_buffer_space_limit",
+ &SV::join_buff_space_limit);
static sys_var_thd_ulong sys_join_cache_level(&vars, "join_cache_level",
&SV::join_cache_level);
static sys_var_key_buffer_size sys_key_buffer_size(&vars, "key_buffer_size");
@@ -4058,7 +4061,7 @@ bool
sys_var_thd_optimizer_switch::
symbolic_mode_representation(THD *thd, ulonglong val, LEX_STRING *rep)
{
- char buff[STRING_BUFFER_USUAL_SIZE*8];
+ char buff[STRING_BUFFER_USUAL_SIZE*18];
String tmp(buff, sizeof(buff), &my_charset_latin1);
int i;
ulonglong bit;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index bd6402053f3..7f3b0cbfb03 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -380,6 +380,7 @@ struct system_variables
ulonglong max_heap_table_size;
ulonglong tmp_table_size;
ulonglong long_query_time;
+ ulonglong join_buff_space_limit;
ha_rows select_limit;
ha_rows max_join_size;
ulong auto_increment_increment, auto_increment_offset;
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
index 10064590a75..713d6f9e93f 100644
--- a/sql/sql_join_cache.cc
+++ b/sql/sql_join_cache.cc
@@ -57,7 +57,7 @@
The function ignores the fields 'blob_length' and 'ofset' of the
descriptor.
- RETURN
+ RETURN VALUE
the length of the field
*/
@@ -98,7 +98,7 @@ uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
descriptor while 'descr_ptr' points to the position right after the
last added pointer.
- RETURN
+ RETURN VALUE
the total length of the added fields
*/
@@ -137,7 +137,44 @@ uint add_table_data_fields_to_join_cache(JOIN_TAB *tab,
*descr_ptr= copy_ptr;
return len;
}
-
+
+/*
+ Get the next table whose records are stored in the join buffer of this cache
+
+ SYNOPSIS
+ get_next_table()
+ tab the table for which the next table is to be returned
+
+ DESCRIPTION
+ For a given table whose records are stored in this cache the function
+ returns the next such table if there is any.
+ The function takes into account that the tables whose records are
+ are stored in the same cache now can interleave with tables from
+ materialized semijoin subqueries.
+
+ TODO
+ This function should be modified/simplified after the new code for
+ materialized semijoins is merged.
+
+ RETURN
+ The next join table whose records are stored in the buffer of this cache
+ if such table exists, 0 - otherwise
+*/
+
+JOIN_TAB *JOIN_CACHE::get_next_table(JOIN_TAB *tab)
+{
+
+ if (++tab == join_tab)
+ return NULL;
+ if (join_tab->first_sjm_sibling)
+ return tab;
+ uint i= tab-join->join_tab;
+ while (sj_is_materialize_strategy(join->best_positions[i].sj_strategy) &&
+ i < join->tables)
+ i+= join->best_positions[i].n_sj_tables;
+ return join->join_tab+i < join_tab ? join->join_tab+i : NULL;
+}
+
/*
Determine different counters of fields associated with a record in the cache
@@ -152,14 +189,16 @@ uint add_table_data_fields_to_join_cache(JOIN_TAB *tab,
The function sets 'with_match_flag' on if 'join_tab' needs a match flag
i.e. if it is the first inner table of an outer join or a semi-join.
- RETURN
+ RETURN VALUE
none
*/
void JOIN_CACHE::calc_record_fields()
{
JOIN_TAB *tab = prev_cache ? prev_cache->join_tab :
- join->join_tab+join->const_tables;
+ (join_tab->first_sjm_sibling ?
+ join_tab->first_sjm_sibling :
+ join->join_tab+join->const_tables);
tables= join_tab-tab;
fields= 0;
@@ -169,9 +208,9 @@ void JOIN_CACHE::calc_record_fields()
data_field_ptr_count= 0;
referenced_fields= 0;
- for ( ; tab < join_tab ; tab++)
+ for ( ; tab ; tab= get_next_table(tab))
{
- calc_used_field_length(join->thd, tab);
+ tab->calc_used_field_length(FALSE);
flag_fields+= test(tab->used_null_fields || tab->used_uneven_bit_fields);
flag_fields+= test(tab->table->maybe_null);
fields+= tab->used_fields;
@@ -184,6 +223,73 @@ void JOIN_CACHE::calc_record_fields()
fields+= flag_fields;
}
+
+/*
+ Collect information on join key arguments
+
+ SYNOPSIS
+ collect_info_on_key_args()
+
+ DESCRIPTION
+ The function traverses the ref expressions that are used to access the
+ joined table join_tab. For each table 'tab' whose fields are to be stored
+ in the join buffer of the cache the function finds the fields from 'tab'
+ that occur in the ref expressions and marks these fields in the bitmap
+ tab->table->tmp_set. The function counts the number of them stored
+ in this cache and the total number of them stored in the previous caches
+ and saves the results of the counting in 'local_key_arg_fields' and 'external_key_arg_fields' respectively.
+
+ NOTES
+ The function does not do anything if no key is used to join the records
+ from join_tab.
+
+ RETURN VALUE
+ none
+*/
+
+void JOIN_CACHE::collect_info_on_key_args()
+{
+ JOIN_TAB *tab;
+ JOIN_CACHE *cache;
+ local_key_arg_fields= 0;
+ external_key_arg_fields= 0;
+
+ if (!is_key_access())
+ return;
+
+ TABLE_REF *ref= &join_tab->ref;
+ cache= this;
+ do
+ {
+ for (tab= cache->join_tab-cache->tables; tab ;
+ tab= cache->get_next_table(tab))
+ {
+ uint key_args;
+ bitmap_clear_all(&tab->table->tmp_set);
+ for (uint i= 0; i < ref->key_parts; i++)
+ {
+ Item *ref_item= ref->items[i];
+ if (!(tab->table->map & ref_item->used_tables()))
+ continue;
+ ref_item->walk(&Item::add_field_to_set_processor, 1,
+ (uchar *) tab->table);
+ }
+ if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
+ {
+ if (cache == this)
+ local_key_arg_fields+= key_args;
+ else
+ external_key_arg_fields+= key_args;
+ }
+ }
+ cache= cache->prev_cache;
+ }
+ while (cache);
+
+ return;
+}
+
+
/*
Allocate memory for descriptors and pointers to them associated with the cache
@@ -195,23 +301,22 @@ void JOIN_CACHE::calc_record_fields()
and the array of pointers to the field descriptors used to copy
join record data from record buffers into the join buffer and
backward. Some pointers refer to the field descriptor associated
- with previous caches. They are placed at the beginning of the
- array of pointers and its total number is specified by the parameter
- 'external fields'.
- The pointer of the first array is assigned to field_descr and the
- number of elements is precalculated by the function calc_record_fields.
+ with previous caches. They are placed at the beginning of the array
+ of pointers and its total number is stored in external_key_arg_fields.
+ The pointer of the first array is assigned to field_descr and the number
+ of the elements in it is precalculated by the function calc_record_fields.
The allocated arrays are adjacent.
NOTES
The memory is allocated in join->thd->memroot
- RETURN
+ RETURN VALUE
pointer to the first array
*/
-int JOIN_CACHE::alloc_fields(uint external_fields)
+int JOIN_CACHE::alloc_fields()
{
- uint ptr_cnt= external_fields+blobs+1;
+ uint ptr_cnt= external_key_arg_fields+blobs+1;
uint fields_size= sizeof(CACHE_FIELD)*fields;
field_descr= (CACHE_FIELD*) sql_alloc(fields_size +
sizeof(CACHE_FIELD*)*ptr_cnt);
@@ -219,6 +324,7 @@ int JOIN_CACHE::alloc_fields(uint external_fields)
return (field_descr == NULL);
}
+
/*
Create descriptors of the record flag fields stored in the join buffer
@@ -252,7 +358,7 @@ int JOIN_CACHE::alloc_fields(uint external_fields)
The function sets the value of 'length' to the total length of the
flag fields.
- RETURN
+ RETURN VALUE
none
*/
@@ -272,7 +378,7 @@ void JOIN_CACHE::create_flag_fields()
&copy);
/* Create fields for all null bitmaps and null row flags that are needed */
- for (tab= join_tab-tables; tab < join_tab; tab++)
+ for (tab= join_tab-tables; tab; tab= get_next_table(tab))
{
TABLE *table= tab->table;
@@ -295,17 +401,145 @@ void JOIN_CACHE::create_flag_fields()
/*
+ Create descriptors of the fields used to build access keys to the joined table
+
+ SYNOPSIS
+ create_key_arg_fields()
+
+ DESCRIPTION
+ The function creates descriptors of the record fields stored in the join
+ buffer that are used to build access keys to the joined table. These
+ fields are put into the buffer ahead of other records fields stored in
+ the buffer. Such placement helps to optimize construction of access keys.
+ For each field that is used to build access keys to the joined table but
+ is stored in some other join cache buffer the function saves a pointer
+ to the the field descriptor. The array of such pointers are placed in the
+ the join cache structure just before the array of pointers to the
+ blob fields blob_ptr.
+ Any field stored in a join cache buffer that is used to construct keys
+ to access tables associated with other join caches is called a referenced
+ field. It receives a unique number that is saved by the function in the
+ member 'referenced_field_no' of the CACHE_FIELD descriptor for the field.
+ This number is used as index to the array of offsets to the referenced
+ fields that are saved and put in the join cache buffer after all record
+ fields.
+ The function also finds out whether that the keys to access join_tab
+ can be considered as embedded and, if so, sets the flag 'use_emb_key' in
+ this join cache appropriately.
+
+ NOTES.
+ When a key to access the joined table 'join_tab' is constructed the array
+ of pointers to the field descriptors for the external fields is looked
+ through. For each of this pointers we find out in what previous key cache
+ the referenced field is stored. The value of 'referenced_field_no'
+ provides us with the index into the array of offsets for referenced
+ fields stored in the join cache. The offset read by the the index allows
+ us to read the field without reading all other fields of the record
+ stored the join cache buffer. This optimizes the construction of keys
+ to access 'join_tab' when some key arguments are stored in the previous
+ join caches.
+
+ NOTES
+ The function does not do anything if no key is used to join the records
+ from join_tab.
+
+ RETURN VALUE
+ none
+*/
+void JOIN_CACHE::create_key_arg_fields()
+{
+ JOIN_TAB *tab;
+ JOIN_CACHE *cache;
+
+ if (!is_key_access())
+ return;
+
+ /*
+ Save pointers to the cache fields in previous caches
+ that are used to build keys for this key access.
+ */
+ cache= this;
+ uint ext_key_arg_cnt= external_key_arg_fields;
+ CACHE_FIELD *copy;
+ CACHE_FIELD **copy_ptr= blob_ptr;
+ while (ext_key_arg_cnt)
+ {
+ cache= cache->prev_cache;
+ for (tab= cache->join_tab-cache->tables; tab;
+ tab= cache->get_next_table(tab))
+ {
+ CACHE_FIELD *copy_end;
+ MY_BITMAP *key_read_set= &tab->table->tmp_set;
+ /* key_read_set contains the bitmap of tab's fields referenced by ref */
+ if (bitmap_is_clear_all(key_read_set))
+ continue;
+ copy_end= cache->field_descr+cache->fields;
+ for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
+ {
+ /*
+ (1) - when we store rowids for DuplicateWeedout, they have
+ copy->field==NULL
+ */
+ if (copy->field && // (1)
+ copy->field->table == tab->table &&
+ bitmap_is_set(key_read_set, copy->field->field_index))
+ {
+ *copy_ptr++= copy;
+ ext_key_arg_cnt--;
+ if (!copy->referenced_field_no)
+ {
+ /*
+ Register the referenced field 'copy':
+ - set the offset number in copy->referenced_field_no,
+ - adjust the value of the flag 'with_length',
+ - adjust the values of 'pack_length' and
+ of 'pack_length_with_blob_ptrs'.
+ */
+ copy->referenced_field_no= ++cache->referenced_fields;
+ if (!cache->with_length)
+ {
+ cache->with_length= TRUE;
+ uint sz= cache->get_size_of_rec_length();
+ cache->base_prefix_length+= sz;
+ cache->pack_length+= sz;
+ cache->pack_length_with_blob_ptrs+= sz;
+ }
+ cache->pack_length+= cache->get_size_of_fld_offset();
+ cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
+ }
+ }
+ }
+ }
+ }
+ /* After this 'blob_ptr' shall not be be changed */
+ blob_ptr= copy_ptr;
+
+ /* Now create local fields that are used to build ref for this key access */
+ copy= field_descr+flag_fields;
+ for (tab= join_tab-tables; tab; tab= get_next_table(tab))
+ {
+ length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
+ &data_field_count, &copy,
+ &data_field_ptr_count,
+ &copy_ptr);
+ }
+
+ use_emb_key= check_emb_key_usage();
+
+ return;
+}
+
+
+/*
Create descriptors of all remaining data fields stored in the join buffer
SYNOPSIS
create_remaining_fields()
- all_read_fields indicates that descriptors for all read data fields
- are to be created
DESCRIPTION
The function creates descriptors for all remaining data fields of a
- record from the join buffer. If the parameter 'all_read_fields' is
- true the function creates fields for all read record fields that
+ record from the join buffer. If the value returned by is_key_access() is
+ false the function creates fields for all read record fields that
comprise the partial join record joined with join_tab. Otherwise,
for each table tab, the set of the read fields for which the descriptors
have to be added is determined as the difference between all read fields
@@ -315,7 +549,7 @@ void JOIN_CACHE::create_flag_fields()
the added fields.
NOTES
- If 'all_read_fields' is false the function modifies the value of
+ If is_key_access() returns true the function modifies the value of
tab->table->tmp_set for a each table whose fields are stored in the cache.
The function calls the method Field::fill_cache_field to figure out
the type of the cache field and the maximal length of its representation
@@ -327,17 +561,18 @@ void JOIN_CACHE::create_flag_fields()
contains the number of the pointers to such descriptors having been
stored up to the moment.
- RETURN
+ RETURN VALUE
none
*/
-void JOIN_CACHE:: create_remaining_fields(bool all_read_fields)
+void JOIN_CACHE:: create_remaining_fields()
{
JOIN_TAB *tab;
+ bool all_read_fields= !is_key_access();
CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
- for (tab= join_tab-tables; tab < join_tab; tab++)
+ for (tab= join_tab-tables; tab; tab= get_next_table(tab))
{
MY_BITMAP *rem_field_set;
TABLE *table= tab->table;
@@ -372,6 +607,7 @@ void JOIN_CACHE:: create_remaining_fields(bool all_read_fields)
}
+
/*
Calculate and set all cache constants
@@ -389,7 +625,7 @@ void JOIN_CACHE:: create_remaining_fields(bool all_read_fields)
making a dicision whether more records should be added into the join
buffer or not.
- RETURN
+ RETURN VALUE
none
*/
@@ -424,6 +660,8 @@ void JOIN_CACHE::set_constants()
size_of_rec_ofs= offset_size(buff_size);
size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len);
size_of_fld_ofs= size_of_rec_len;
+ base_prefix_length= (with_length ? size_of_rec_len : 0) +
+ (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
/*
The size of the offsets for referenced fields will be added later.
The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
@@ -437,236 +675,336 @@ void JOIN_CACHE::set_constants()
/*
- Allocate memory for a join buffer
+ Get maximum total length of all affixes of a record in the join cache buffer
SYNOPSIS
- alloc_buffer()
+ get_record_max_affix_length()
DESCRIPTION
- The function allocates a lump of memory for the cache join buffer. The
- size of the allocated memory is 'buff_size' bytes.
-
- RETURN
- 0 - if the memory has been successfully allocated
- 1 - otherwise
+ The function calculates the maximum possible total length of all affixes
+ of a record in the join cache buffer, that is made of:
+ - the length of all prefixes used in this cache,
+ - the length of the match flag if it's needed
+ - the total length of the maximum possible offsets to the fields of
+ a record in the buffer.
+
+ RETURN VALUE
+ The maximum total length of all affixes of a record in the join buffer
+*/
+
+uint JOIN_CACHE::get_record_max_affix_length()
+{
+ uint len= get_prefix_length() +
+ test(with_match_flag) +
+ size_of_fld_ofs * data_field_count;
+ return len;
+}
+
+
+/*
+ Get the minimum possible size of the cache join buffer
+
+ SYNOPSIS
+ get_min_join_buffer_size()
+
+ DESCRIPTION
+ At the first its invocation for the cache the function calculates the
+ minimum possible size of the join buffer of the cache. This value depends
+ on the minimal number of records 'min_records' to be stored in the join
+ buffer. The number is supposed to be determined by the procedure that
+ chooses the best access path to the joined table join_tab in the execution
+ plan. After the calculation of the interesting size the function saves it
+ in the field 'min_buff_size' in order to use it directly at the next
+ invocations of the function.
+
+ NOTES
+ Currently the number of minimal records is just set to 1.
+
+ RETURN VALUE
+ The minimal possible size of the join buffer of this cache
*/
-int JOIN_CACHE::alloc_buffer()
+ulong JOIN_CACHE::get_min_join_buffer_size()
{
- buff= (uchar*) my_malloc(buff_size, MYF(0));
- return buff == NULL;
-}
-
+ if (!min_buff_size)
+ {
+ ulong len= 0;
+ for (JOIN_TAB *tab= join_tab-tables; tab < join_tab; tab++)
+ len+= tab->get_max_used_fieldlength();
+ len+= get_record_max_affix_length() + get_max_key_addon_space_per_record();
+ ulong min_sz= len*min_records;
+ ulong add_sz= 0;
+ for (uint i=0; i < min_records; i++)
+ add_sz+= join_tab_scan->aux_buffer_incr(i+1);
+ avg_aux_buffer_incr= add_sz/min_records;
+ min_sz+= add_sz;
+ min_sz+= pack_length_with_blob_ptrs;
+ min_buff_size= min_sz;
+ }
+ return min_buff_size;
+}
+
/*
- Initialize a BNL cache
+ Get the maximum possible size of the cache join buffer
SYNOPSIS
- init()
+ get_max_join_buffer_size()
DESCRIPTION
- The function initializes the cache structure. It supposed to be called
- right after a constructor for the JOIN_CACHE_BNL.
- The function allocates memory for the join buffer and for descriptors of
- the record fields stored in the buffer.
+ At the first its invocation for the cache the function calculates the
+ maximum possible size of join buffer for the cache. This value does not
+ exceed the estimate of the number of records 'max_records' in the partial
+ join that joins tables from the first one through join_tab. This value
+ is also capped off by the value of join_tab->join_buffer_size_limit, if it
+ has been set a to non-zero value, and by the value of the system parameter
+ join_buffer_size - otherwise. After the calculation of the interesting size
+ the function saves the value in the field 'max_buff_size' in order to use
+ it directly at the next invocations of the function.
NOTES
- The code of this function should have been included into the constructor
- code itself. However the new operator for the class JOIN_CACHE_BNL would
- never fail while memory allocation for the join buffer is not absolutely
- unlikely to fail. That's why this memory allocation has to be placed in a
- separate function that is called in a couple with a cache constructor.
- It is quite natural to put almost all other constructor actions into
- this function.
-
- RETURN
- 0 initialization with buffer allocations has been succeeded
- 1 otherwise
+ Currently the value of join_tab->join_buffer_size_limit is initialized
+ to 0 and is never reset.
+
+ RETURN VALUE
+ The maximum possible size of the join buffer of this cache
*/
-int JOIN_CACHE_BNL::init()
+ulong JOIN_CACHE::get_max_join_buffer_size()
{
- DBUG_ENTER("JOIN_CACHE::init");
+ if (!max_buff_size)
+ {
+ ulong max_sz;
+ ulong min_sz= get_min_join_buffer_size();
+ ulong len= 0;
+ for (JOIN_TAB *tab= join_tab-tables; tab < join_tab; tab++)
+ len+= tab->get_used_fieldlength();
+ len+= get_record_max_affix_length();
+ avg_record_length= len;
+ len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr;
+ space_per_record= len;
+
+ ulong limit_sz= join->thd->variables.join_buff_size;
+ if (join_tab->join_buffer_size_limit)
+ set_if_smaller(limit_sz, join_tab->join_buffer_size_limit);
+ if (limit_sz / max_records > space_per_record)
+ max_sz= space_per_record * max_records;
+ else
+ max_sz= limit_sz;
+ max_sz+= pack_length_with_blob_ptrs;
+ set_if_smaller(max_sz, limit_sz);
+ set_if_bigger(max_sz, min_sz);
+ max_buff_size= max_sz;
+ }
+ return max_buff_size;
+}
+
- calc_record_fields();
+/*
+ Allocate memory for a join buffer
- if (alloc_fields(0))
- DBUG_RETURN(1);
+ SYNOPSIS
+ alloc_buffer()
- create_flag_fields();
+ DESCRIPTION
+ The function allocates a lump of memory for the cache join buffer.
+ Initially the function sets the size of the buffer buff_size equal to
+ the value returned by get_max_join_buffer_size(). If the total size of
+ the space intended to be used for the join buffers employed by the
+ tables from the first one through join_tab exceeds the value of the
+ system parameter join_buff_space_limit, then the function first tries
+ to shrink the used buffers to make the occupied space fit the maximum
+ memory allowed to be used for all join buffers in total. After
+ this the function tries to allocate a join buffer for join_tab.
+ If it fails to do so, it decrements the requested size of the join
+ buffer, shrinks proportionally the join buffers used for the previous
+ tables and tries to allocate a buffer for join_tab. In the case of a
+ failure the function repeats its attempts with smaller and smaller
+ requested sizes of the buffer, but not more than 4 times.
- create_remaining_fields(TRUE);
+ RETURN VALUE
+ 0 if the memory has been successfully allocated
+ 1 otherwise
+*/
- set_constants();
+int JOIN_CACHE::alloc_buffer()
+{
+ JOIN_TAB *tab;
+ JOIN_CACHE *cache;
+ ulonglong curr_buff_space_sz= 0;
+ ulonglong curr_min_buff_space_sz= 0;
+ ulonglong join_buff_space_limit=
+ join->thd->variables.join_buff_space_limit;
+ double partial_join_cardinality= (join_tab-1)->get_partial_join_cardinality();
+ buff= NULL;
+ min_buff_size= 0;
+ max_buff_size= 0;
+ min_records= 1;
+ max_records= partial_join_cardinality <= join_buff_space_limit ?
+ (ulonglong) partial_join_cardinality : join_buff_space_limit;
+ set_if_bigger(max_records, 10);
+ min_buff_size= get_min_join_buffer_size();
+ buff_size= get_max_join_buffer_size();
+ for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++)
+ {
+ cache= tab->cache;
+ if (cache)
+ {
+ curr_min_buff_space_sz+= cache->get_min_join_buffer_size();
+ curr_buff_space_sz+= cache->get_join_buffer_size();
+ }
+ }
- if (alloc_buffer())
- DBUG_RETURN(1);
-
- reset(TRUE);
+ if (curr_min_buff_space_sz > join_buff_space_limit ||
+ (curr_buff_space_sz > join_buff_space_limit &&
+ join->shrink_join_buffers(join_tab, curr_buff_space_sz,
+ join_buff_space_limit)))
+ goto fail;
+
+ for (ulong buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; )
+ {
+ ulong next_buff_size;
- DBUG_RETURN(0);
+ if ((buff= (uchar*) my_malloc(buff_size, MYF(0))))
+ break;
+
+ next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0;
+ if (next_buff_size < min_buff_size ||
+ join->shrink_join_buffers(join_tab, curr_buff_space_sz,
+ curr_buff_space_sz-buff_size_decr))
+ goto fail;
+ buff_size= next_buff_size;
+
+ curr_buff_space_sz= 0;
+ for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++)
+ {
+ cache= tab->cache;
+ if (cache)
+ curr_buff_space_sz+= cache->get_join_buffer_size();
+ }
+ }
+ return 0;
+
+fail:
+ buff_size= 0;
+ return 1;
}
+
+/*
+ Shrink the size if the cache join buffer in a given ratio
+
+ SYNOPSIS
+ shrink_join_buffer_in_ratio()
+ n nominator of the ratio to shrink the buffer in
+ d denominator if the ratio
+
+ DESCRIPTION
+ The function first deallocates the join buffer of the cache. Then
+ it allocates a buffer that is (n/d) times smaller.
+
+ RETURN VALUE
+ FALSE on success with allocation of the smaller join buffer
+ TRUE otherwise
+*/
+
+bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d)
+{
+ ulonglong next_buff_size;
+ if (n < d)
+ return FALSE;
+ next_buff_size= (ulonglong) ((double) buff_size / n * d);
+ set_if_bigger(next_buff_size, min_buff_size);
+ buff_size= next_buff_size;
+ return realloc_buffer();
+}
+
+
+/*
+ Reallocate the join buffer of a join cache
+
+ SYNOPSIS
+ realloc_buffer()
+
+ DESCRITION
+ The function reallocates the join buffer of the join cache. After this
+ it resets the buffer for writing.
+
+ NOTES
+ The function assumes that buff_size contains the new value for the join
+ buffer size.
+
+ RETURN VALUE
+ 0 if the buffer has been successfully reallocated
+ 1 otherwise
+*/
+
+int JOIN_CACHE::realloc_buffer()
+{
+ int rc;
+ free();
+ rc= test(!(buff= (uchar*) my_malloc(buff_size, MYF(0))));
+ reset(TRUE);
+ return rc;
+}
+
/*
- Initialize a BKA cache
+ Initialize a join cache
SYNOPSIS
init()
DESCRIPTION
- The function initializes the cache structure. It supposed to be called
- right after a constructor for the JOIN_CACHE_BKA.
+ The function initializes the join cache structure. It supposed to be called
+ by init methods for classes derived from the JOIN_CACHE.
The function allocates memory for the join buffer and for descriptors of
the record fields stored in the buffer.
NOTES
The code of this function should have been included into the constructor
- code itself. However the new operator for the class JOIN_CACHE_BKA would
+ code itself. However the new operator for the class JOIN_CACHE would
never fail while memory allocation for the join buffer is not absolutely
unlikely to fail. That's why this memory allocation has to be placed in a
separate function that is called in a couple with a cache constructor.
It is quite natural to put almost all other constructor actions into
this function.
- RETURN
+ RETURN VALUE
0 initialization with buffer allocations has been succeeded
1 otherwise
*/
-int JOIN_CACHE_BKA::init()
+int JOIN_CACHE::init()
{
- JOIN_TAB *tab;
- JOIN_CACHE *cache;
- local_key_arg_fields= 0;
- external_key_arg_fields= 0;
- DBUG_ENTER("JOIN_CACHE_BKA::init");
+ DBUG_ENTER("JOIN_CACHE::init");
calc_record_fields();
- /* Mark all fields that can be used as arguments for this key access */
- TABLE_REF *ref= &join_tab->ref;
- cache= this;
- do
- {
- /*
- Traverse the ref expressions and find the occurrences of fields in them for
- each table 'tab' whose fields are to be stored in the 'cache' join buffer.
- Mark these fields in the bitmap tab->table->tmp_set.
- For these fields count the number of them stored in this cache and the
- total number of them stored in the previous caches. Save the result
- of the counting 'in local_key_arg_fields' and 'external_key_arg_fields'
- respectively.
- */
- for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
- {
- uint key_args;
- bitmap_clear_all(&tab->table->tmp_set);
- for (uint i= 0; i < ref->key_parts; i++)
- {
- Item *ref_item= ref->items[i];
- if (!(tab->table->map & ref_item->used_tables()))
- continue;
- ref_item->walk(&Item::add_field_to_set_processor, 1,
- (uchar *) tab->table);
- }
- if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
- {
- if (cache == this)
- local_key_arg_fields+= key_args;
- else
- external_key_arg_fields+= key_args;
- }
- }
- cache= cache->prev_cache;
- }
- while (cache);
+ collect_info_on_key_args();
- if (alloc_fields(external_key_arg_fields))
+ if (alloc_fields())
DBUG_RETURN(1);
create_flag_fields();
-
- /*
- Save pointers to the cache fields in previous caches
- that are used to build keys for this key access.
- */
- cache= this;
- uint ext_key_arg_cnt= external_key_arg_fields;
- CACHE_FIELD *copy;
- CACHE_FIELD **copy_ptr= blob_ptr;
- while (ext_key_arg_cnt)
- {
- cache= cache->prev_cache;
- for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
- {
- CACHE_FIELD *copy_end;
- MY_BITMAP *key_read_set= &tab->table->tmp_set;
- /* key_read_set contains the bitmap of tab's fields referenced by ref */
- if (bitmap_is_clear_all(key_read_set))
- continue;
- copy_end= cache->field_descr+cache->fields;
- for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
- {
- /*
- (1) - when we store rowids for DuplicateWeedout, they have
- copy->field==NULL
- */
- if (copy->field && // (1)
- copy->field->table == tab->table &&
- bitmap_is_set(key_read_set, copy->field->field_index))
- {
- *copy_ptr++= copy;
- ext_key_arg_cnt--;
- if (!copy->referenced_field_no)
- {
- /*
- Register the referenced field 'copy':
- - set the offset number in copy->referenced_field_no,
- - adjust the value of the flag 'with_length',
- - adjust the values of 'pack_length' and
- of 'pack_length_with_blob_ptrs'.
- */
- copy->referenced_field_no= ++cache->referenced_fields;
- cache->with_length= TRUE;
- cache->pack_length+= cache->get_size_of_fld_offset();
- cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
- }
- }
- }
- }
- }
- /* After this 'blob_ptr' shall not be be changed */
- blob_ptr= copy_ptr;
-
- /* Now create local fields that are used to build ref for this key access */
- copy= field_descr+flag_fields;
- for (tab= join_tab-tables; tab < join_tab ; tab++)
- {
- length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
- &data_field_count, &copy,
- &data_field_ptr_count,
- &copy_ptr);
- }
- use_emb_key= check_emb_key_usage();
+ create_key_arg_fields();
- create_remaining_fields(FALSE);
+ create_remaining_fields();
set_constants();
if (alloc_buffer())
DBUG_RETURN(1);
-
- reset(TRUE);
+
+ reset(TRUE);
DBUG_RETURN(0);
-}
+}
/*
Check the possibility to read the access keys directly from the join buffer
-
SYNOPSIS
check_emb_key_usage()
@@ -693,13 +1031,21 @@ int JOIN_CACHE_BKA::init()
we still do not consider them embedded. In the future we'll expand the
the class of keys which we identify as embedded.
- RETURN
- TRUE - key values will be considered as embedded,
- FALSE - otherwise.
+ NOTES
+ The function returns FALSE if no key is used to join the records
+ from join_tab.
+
+ RETURN VALUE
+ TRUE key values will be considered as embedded,
+ FALSE otherwise.
*/
-bool JOIN_CACHE_BKA::check_emb_key_usage()
+bool JOIN_CACHE::check_emb_key_usage()
{
+
+ if (!is_key_access())
+ return FALSE;
+
uint i;
Item *item;
KEY_PART_INFO *key_part;
@@ -800,110 +1146,6 @@ bool JOIN_CACHE_BKA::check_emb_key_usage()
/*
- Calculate the increment of the MRR buffer for a record write
-
- SYNOPSIS
- aux_buffer_incr()
-
- DESCRIPTION
- This implementation of the virtual function aux_buffer_incr determines
- for how much the size of the MRR buffer should be increased when another
- record is added to the cache.
-
- RETURN
- the increment of the size of the MRR buffer for the next record
-*/
-
-uint JOIN_CACHE_BKA::aux_buffer_incr()
-{
- uint incr= 0;
- TABLE_REF *ref= &join_tab->ref;
- TABLE *tab= join_tab->table;
- uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1];
- set_if_bigger(rec_per_key, 1);
- if (records == 1)
- incr= ref->key_length + tab->file->ref_length;
- incr+= tab->file->stats.mrr_length_per_rec * rec_per_key;
- return incr;
-}
-
-
-/*
- Check if the record combination matches the index condition
-
- SYNOPSIS
- JOIN_CACHE_BKA::skip_index_tuple()
- rseq Value returned by bka_range_seq_init()
- range_info MRR range association data
-
- DESCRIPTION
- This function is invoked from MRR implementation to check if an index
- tuple matches the index condition. It is used in the case where the index
- condition actually depends on both columns of the used index and columns
- from previous tables.
-
- Accessing columns of the previous tables requires special handling with
- BKA. The idea of BKA is to collect record combinations in a buffer and
- then do a batch of ref access lookups, i.e. by the time we're doing a
- lookup its previous-records-combination is not in prev_table->record[0]
- but somewhere in the join buffer.
-
- We need to get it from there back into prev_table(s)->record[0] before we
- can evaluate the index condition, and that's why we need this function
- instead of regular IndexConditionPushdown.
-
- NOTE
- Possible optimization:
- Before we unpack the record from a previous table
- check if this table is used in the condition.
- If so then unpack the record otherwise skip the unpacking.
- This should be done by a special virtual method
- get_partial_record_by_pos().
-
- RETURN
- 0 The record combination satisfies the index condition
- 1 Otherwise
-*/
-
-bool JOIN_CACHE_BKA::skip_index_tuple(range_seq_t rseq, char *range_info)
-{
- DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
- JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
- cache->get_record_by_pos((uchar*)range_info);
- DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
-}
-
-
-/*
- Check if the record combination matches the index condition
-
- SYNOPSIS
- bka_skip_index_tuple()
- rseq Value returned by bka_range_seq_init()
- range_info MRR range association data
-
- DESCRIPTION
- This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
- see comments there.
-
- NOTE
- This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
-
- RETURN
- 0 The record combination satisfies the index condition
- 1 Otherwise
-*/
-
-static
-bool bka_skip_index_tuple(range_seq_t rseq, char *range_info)
-{
- DBUG_ENTER("bka_skip_index_tuple");
- JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
- DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
-}
-
-
-/*
Write record fields and their required offsets into the join cache buffer
SYNOPSIS
@@ -942,9 +1184,11 @@ bool bka_skip_index_tuple(range_seq_t rseq, char *range_info)
The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data
remains in the record buffers and not copied to the join buffer. It may
happen only to the blob data from the last record added into the cache.
-
-
- RETURN
+ If on_precond is attached to join_tab and it is not evaluated to TRUE
+ then MATCH_IMPOSSIBLE is placed in the match flag field of the record
+ written into the join buffer.
+
+ RETURN VALUE
length of the written record data
*/
@@ -954,16 +1198,18 @@ uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
bool last_record;
CACHE_FIELD *copy;
CACHE_FIELD *copy_end;
+ uchar *flags_pos;
uchar *cp= pos;
uchar *init_pos= cp;
uchar *rec_len_ptr= 0;
+ uint key_extra= extra_key_length();
records++; /* Increment the counter of records in the cache */
- len= pack_length;
+ len= pack_length + key_extra;
/* Make an adjustment for the size of the auxiliary buffer if there is any */
- uint incr= aux_buffer_incr();
+ uint incr= aux_buffer_incr(records);
ulong rem= rem_space();
aux_buff_size+= len+incr < rem ? incr : rem;
@@ -1000,7 +1246,7 @@ uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
This function is called only in the case when there is enough space left in
the cache to store at least non-blob parts of the current record.
*/
- last_record= (len+pack_length_with_blob_ptrs) > rem_space();
+ last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space();
/*
Save the position for the length of the record in the cache if it's needed.
@@ -1032,6 +1278,7 @@ uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
/* First put into the cache the values of all flag fields */
copy_end= field_descr+flag_fields;
+ flags_pos= cp;
for ( ; copy < copy_end; copy++)
{
memcpy(cp, copy->str, copy->length);
@@ -1134,6 +1381,19 @@ uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
last_rec_pos= curr_rec_pos;
end_pos= pos= cp;
*is_full= last_record;
+
+ last_written_is_null_compl= 0;
+ if (!join_tab->first_unmatched && join_tab->on_precond)
+ {
+ join_tab->found= 0;
+ join_tab->not_null_compl= 1;
+ if (!join_tab->on_precond->val_int())
+ {
+ flags_pos[0]= MATCH_IMPOSSIBLE;
+ last_written_is_null_compl= 1;
+ }
+ }
+
return (uint) (cp-init_pos);
}
@@ -1158,7 +1418,7 @@ uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
- the size of the auxiliary buffer is reset to 0,
- the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
- RETURN
+ RETURN VALUE
none
*/
@@ -1176,6 +1436,7 @@ void JOIN_CACHE::reset(bool for_writing)
}
}
+
/*
Add a record into the join buffer: the default implementation
@@ -1190,7 +1451,7 @@ void JOIN_CACHE::reset(bool for_writing)
The implementation assumes that the function get_curr_link()
will return exactly the pointer to this matched record.
- RETURN
+ RETURN VALUE
TRUE if it has been decided that it should be the last record
in the join buffer,
FALSE otherwise
@@ -1227,9 +1488,9 @@ bool JOIN_CACHE::put_record()
point to the beginning of the first field of the record in the
join buffer.
- RETURN
- TRUE - there are no more records to read from the join buffer
- FALSE - otherwise
+ RETURN VALUE
+ TRUE there are no more records to read from the join buffer
+ FALSE otherwise
*/
bool JOIN_CACHE::get_record()
@@ -1268,7 +1529,7 @@ bool JOIN_CACHE::get_record()
from the the join buffers of the previous caches. The fields are read
into the corresponding record buffers.
- RETURN
+ RETURN VALUE
none
*/
@@ -1287,7 +1548,7 @@ void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
/*
- Test the match flag from the referenced record: the default implementation
+ Get the match flag from the referenced record: the default implementation
SYNOPSIS
get_match_flag_by_pos()
@@ -1295,30 +1556,55 @@ void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
DESCRIPTION
This default implementation of the virtual function get_match_flag_by_pos
- test the match flag for the record pointed by the reference at the position
- rec_ptr. If the match flag in placed one of the previous buffers the function
- first reaches the linked record fields in this buffer.
+ get the match flag for the record pointed by the reference at the position
+ rec_ptr. If the match flag is placed in one of the previous buffers the
+ function first reaches the linked record fields in this buffer.
- RETURN
- TRUE if the match flag is set on
- FALSE otherwise
+ RETURN VALUE
+ match flag for the record at the position rec_ptr
*/
-bool JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
+enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
{
+ Match_flag match_fl= MATCH_NOT_FOUND;
if (with_match_flag)
- return test(*rec_ptr);
+ {
+ match_fl= (enum Match_flag) rec_ptr[0];
+ return match_fl;
+ }
if (prev_cache)
{
uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
}
DBUG_ASSERT(0);
- return FALSE;
+ return match_fl;
}
/*
+ Calculate the increment of the auxiliary buffer for a record write
+
+ SYNOPSIS
+ aux_buffer_incr()
+ recno the number of the record the increment to be calculated for
+
+ DESCRIPTION
+ This function calls the aux_buffer_incr the method of the
+ companion member join_tab_scan to calculate the growth of the
+ auxiliary buffer when the recno-th record is added to the
+ join_buffer of this cache.
+
+ RETURN VALUE
+ the number of bytes in the increment
+*/
+
+uint JOIN_CACHE::aux_buffer_incr(ulong recno)
+{
+ return join_tab_scan->aux_buffer_incr(recno);
+}
+
+/*
Read all flag and data fields of a record from the join buffer
SYNOPSIS
@@ -1332,8 +1618,8 @@ bool JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
The function increments the value of 'pos' by the length of the
read data.
- RETURN
- (-1) - if there is no more records in the join buffer
+ RETURN VALUE
+ (-1) if there is no more records in the join buffer
length of the data read from the join buffer - otherwise
*/
@@ -1371,7 +1657,7 @@ uint JOIN_CACHE::read_all_record_fields()
The function increments the value of 'pos' by the length of the
read data.
- RETURN
+ RETURN VALUE
length of the data read from the join buffer
*/
@@ -1380,6 +1666,12 @@ uint JOIN_CACHE::read_flag_fields()
uchar *init_pos= pos;
CACHE_FIELD *copy= field_descr;
CACHE_FIELD *copy_end= copy+flag_fields;
+ if (with_match_flag)
+ {
+ copy->str[0]= test((Match_flag) pos[0] == MATCH_FOUND);
+ pos+= copy->length;
+ copy++;
+ }
for ( ; copy < copy_end; copy++)
{
memcpy(copy->str, pos, copy->length);
@@ -1406,7 +1698,7 @@ uint JOIN_CACHE::read_flag_fields()
The function increments the value of 'pos' by the length of the
read data.
- RETURN
+ RETURN VALUE
length of the data read from the join buffer
*/
@@ -1484,9 +1776,15 @@ uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
If the value of *len is 0 then the function sets it to the total
length of the record fields including possible trailing offset
values. Otherwise *len is supposed to provide this value that
- has been obtained earlier.
+ has been obtained earlier.
- RETURN
+ NOTE
+ If the value of the referenced field is null then the offset
+ for the value is set to 0. If the value of a field can be null
+ then the value of flag_fields is always positive. So the offset
+ for any non-null value cannot be 0 in this case.
+
+ RETURN VALUE
TRUE 'copy' points to a data descriptor of this join cache
FALSE otherwise
*/
@@ -1513,14 +1811,21 @@ bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
size_of_fld_ofs*
(referenced_fields+1-copy->referenced_field_no));
bool is_null= FALSE;
+ Field *field= copy->field;
if (offset == 0 && flag_fields)
is_null= TRUE;
if (is_null)
- copy->field->set_null();
+ {
+ field->set_null();
+ if (!field->real_maybe_null())
+ field->table->null_row= 1;
+ }
else
{
uchar *save_pos= pos;
- copy->field->set_notnull();
+ field->set_notnull();
+ if (!field->real_maybe_null())
+ field->table->null_row= 0;
pos= rec_ptr+offset;
read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
pos= save_pos;
@@ -1530,30 +1835,69 @@ bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
/*
- Skip record from join buffer if its match flag is on: default implementation
+ Skip record from join buffer if's already matched: default implementation
SYNOPSIS
- skip_record_if_match()
+ skip_if_matched()
DESCRIPTION
- This default implementation of the virtual function skip_record_if_match
- skips the next record from the join buffer if its match flag is set on.
- If the record is skipped the value of 'pos' is set to points to the position
+ This default implementation of the virtual function skip_if_matched
+ skips the next record from the join buffer if its match flag is set to
+ MATCH_FOUND.
+ If the record is skipped the value of 'pos' is set to point to the position
right after the record.
- RETURN
- TRUE - the match flag is on and the record has been skipped
- FALSE - the match flag is off
+ RETURN VALUE
+ TRUE the match flag is set to MATCH_FOUND and the record has been skipped
+ FALSE otherwise
+*/
+
+bool JOIN_CACHE::skip_if_matched()
+{
+ DBUG_ASSERT(with_length);
+ uint offset= size_of_rec_len;
+ if (prev_cache)
+ offset+= prev_cache->get_size_of_rec_offset();
+ /* Check whether the match flag is MATCH_FOUND */
+ if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND)
+ {
+ pos+= size_of_rec_len + get_rec_length(pos);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+/*
+ Skip record from join buffer if the match isn't needed: default implementation
+
+ SYNOPSIS
+ skip_if_not_needed_match()
+
+ DESCRIPTION
+ This default implementation of the virtual function skip_if_not_needed_match
+ skips the next record from the join buffer if its match flag is not
+ MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the
+ first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE
+ and join_tab is the first inner table of an outer join.
+ If the record is skipped the value of 'pos' is set to point to the position
+ right after the record.
+
+ RETURN VALUE
+ TRUE the record has to be skipped
+ FALSE otherwise
*/
-bool JOIN_CACHE::skip_record_if_match()
+bool JOIN_CACHE::skip_if_not_needed_match()
{
DBUG_ASSERT(with_length);
+ enum Match_flag match_fl;
uint offset= size_of_rec_len;
if (prev_cache)
offset+= prev_cache->get_size_of_rec_offset();
- /* Check whether the match flag is on */
- if (get_match_flag_by_pos(pos+offset))
+
+ if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND &&
+ (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) )
{
pos+= size_of_rec_len + get_rec_length(pos);
return TRUE;
@@ -1617,7 +1961,7 @@ void JOIN_CACHE::restore_last_record()
that have matches, after which null complementing extension for all
unmatched records from the join buffer are generated.
- RETURN
+ RETURN VALUE
return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
*/
@@ -1711,16 +2055,16 @@ finish:
}
-/*
- Using BNL find matches from the next table for records from the join buffer
+/*
+ Find matches from the next table for records from the join buffer
SYNOPSIS
join_matching_records()
skip_last do not look for matches for the last partial join record
DESCRIPTION
- The function retrieves all rows of the join_tab table and check whether
- they match partial join records from the join buffer. If a match is found
+ The function retrieves rows of the join_tab table and checks whether they
+ match partial join records from the join buffer. If a match is found
the function will call the sub_select function trying to look for matches
for the remaining join operations.
This function currently is called only from the function join_records.
@@ -1729,27 +2073,46 @@ finish:
the future processing in the caller function.
NOTES
+ If employed by BNL or BNLH join algorithms the function performs a full
+ scan of join_tab for each refill of the join buffer. If BKA or BKAH
+ algorithms are used then the function iterates only over those records
+ from join_tab that can be accessed by keys built over records in the join
+ buffer. To apply a proper method of iteration the function just calls
+ virtual iterator methods (open, next, close) of the member join_tab_scan.
+ The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type.
+ The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join
+ algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods
+ for BKA/BKAH join algorithms.
+ When the function looks for records from the join buffer that would
+ match a record from join_tab it iterates either over all records in
+ the buffer or only over selected records. If BNL join operation is
+ performed all records are checked for the match. If BNLH or BKAH
+ algorithm is employed to join join_tab then the function looks only
+ through the records with the same join key as the record from join_tab.
+ With the BKA join algorithm only one record from the join buffer is checked
+ for a match for any record from join_tab. To iterate over the candidates
+ for a match the virtual function get_next_candidate_for_match is used,
+ while the virtual function prepare_look_for_matches is called to prepare
+ for such iteration proccess.
+
+ NOTES
The function produces all matching extensions for the records in the
- join buffer following the path of the Blocked Nested Loops algorithm.
+ join buffer following the path of the employed blocked algorithm.
When an outer join operation is performed all unmatched records from
the join buffer must be extended by null values. The function
'join_null_complements' serves this purpose.
- RETURN
- return one of enum_nested_loop_state.
+ RETURN VALUE
+ return one of enum_nested_loop_state
*/
-enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last)
+enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
{
- uint cnt;
int error;
- JOIN_TAB *tab;
- READ_RECORD *info;
enum_nested_loop_state rc= NESTED_LOOP_OK;
- bool check_only_first_match= join_tab->check_only_first_match();
- SQL_SELECT *select= join_tab->cache_select;
-
join_tab->table->null_row= 0;
+ bool check_only_first_match= join_tab->check_only_first_match();
+ bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
/* Return at once if there are no records in the join buffer */
if (!records)
@@ -1771,25 +2134,12 @@ enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last)
join_tab->select->quick= 0;
}
- for (tab= join->join_tab; tab != join_tab ; tab++)
- {
- tab->status= tab->table->status;
- tab->table->status= 0;
- }
-
- /* Start retrieving all records of the joined table */
- if ((error= join_init_read_record(join_tab)))
- {
- rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+ /* Prepare to retrieve all records of the joined table */
+ if ((error= join_tab_scan->open()))
goto finish;
- }
- info= &join_tab->read_record;
- do
+ while (!(error= join_tab_scan->next()))
{
- if (join_tab->keep_current_rowid)
- join_tab->table->file->position(join_tab->table->record[0]);
-
if (join->thd->killed)
{
/* The user has aborted the execution of the query */
@@ -1797,52 +2147,43 @@ enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last)
rc= NESTED_LOOP_KILLED;
goto finish;
}
- int err= 0;
-
- if (rc == NESTED_LOOP_OK)
- update_virtual_fields(join->thd, join_tab->table);
-
- /*
- Do not look for matches if the last read record of the joined table
- does not meet the conditions that have been pushed to this table
- */
- if (rc == NESTED_LOOP_OK &&
- (!select || (err= select->skip_record(join->thd)) != 0))
- {
- if (err < 0)
- return NESTED_LOOP_ERROR;
- rc= NESTED_LOOP_OK;
- /* Prepare to read records from the join buffer */
- reset(FALSE);
+ if (join_tab->keep_current_rowid)
+ join_tab->table->file->position(join_tab->table->record[0]);
+
+ /* Prepare to read matching candidates from the join buffer */
+ if (prepare_look_for_matches(skip_last))
+ continue;
- /* Read each record from the join buffer and look for matches */
- for (cnt= records - test(skip_last) ; cnt; cnt--)
- {
- /*
- If only the first match is needed and it has been already found for
- the next record read from the join buffer then the record is skipped.
- */
- if (!check_only_first_match || !skip_record_if_match())
- {
- get_record();
- rc= generate_full_extensions(get_curr_rec());
- if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
- goto finish;
- }
+ uchar *rec_ptr;
+ /* Read each possible candidate from the buffer and look for matches */
+ while ((rec_ptr= get_next_candidate_for_match()))
+ {
+ /*
+ If only the first match is needed, and, it has been already found for
+ the next record read from the join buffer, then the record is skipped.
+ Also those records that must be null complemented are not considered
+ as candidates for matches.
+ */
+ if ((!check_only_first_match && !outer_join_first_inner) ||
+ !skip_next_candidate_for_match(rec_ptr))
+ {
+ read_next_candidate_for_match(rec_ptr);
+ rc= generate_full_extensions(rec_ptr);
+ if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+ goto finish;
}
}
- } while (!(error= info->read_record(info)));
+ }
- if (error > 0) // Fatal error
- rc= NESTED_LOOP_ERROR;
-finish:
- for (tab= join->join_tab; tab != join_tab ; tab++)
- tab->table->status= tab->status;
+finish:
+ if (error)
+ rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+ join_tab_scan->close();
return rc;
}
-
+
/*
Set match flag for a record in join buffer if it has not been set yet
@@ -1862,7 +2203,7 @@ finish:
The function assumes that the match flag for any record in any cache
is placed in the first byte occupied by the record fields.
- RETURN
+ RETURN VALUE
TRUE the match flag is set by this call for the first time
FALSE the match flag has been set before this call
*/
@@ -1891,9 +2232,9 @@ bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
DBUG_ASSERT(cache);
rec_ptr= cache->get_rec_ref(rec_ptr);
}
- if (rec_ptr[0] == 0)
+ if ((Match_flag) rec_ptr[0] != MATCH_FOUND)
{
- rec_ptr[0]= 1;
+ rec_ptr[0]= MATCH_FOUND;
first_inner->found= 1;
return TRUE;
}
@@ -1914,7 +2255,7 @@ bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
case the function calls the join_tab->next_select method to generate
all full extension for this partial join match.
- RETURN
+ RETURN VALUE
return one of enum_nested_loop_state.
*/
@@ -1969,7 +2310,7 @@ enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
Setting the match flag on can trigger re-evaluation of pushdown conditions
for the record when join_tab is the last inner table of an outer join.
- RETURN
+ RETURN VALUE
TRUE there is a match
FALSE there is no match
*/
@@ -1977,7 +2318,7 @@ enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
{
/* Check whether pushdown conditions are satisfied */
- if (join_tab->select && join_tab->select->skip_record(join->thd) < 1)
+ if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0)
return FALSE;
if (!join_tab->is_last_inner_table())
@@ -2007,7 +2348,7 @@ inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
*/
for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
{
- if (tab->select && tab->select->skip_record(join->thd) < 1)
+ if (tab->select && tab->select->skip_record(join->thd) <= 0)
return FALSE;
}
}
@@ -2038,9 +2379,9 @@ inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
NOTES
The same implementation of the virtual method join_null_complements
- is used for JOIN_CACHE_BNL and JOIN_CACHE_BKA.
+ is used for BNL/BNLH/BKA/BKA join algorthm.
- RETURN
+ RETURN VALUE
return one of enum_nested_loop_state.
*/
@@ -2049,7 +2390,6 @@ enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
uint cnt;
enum_nested_loop_state rc= NESTED_LOOP_OK;
bool is_first_inner= join_tab == join_tab->first_unmatched;
- bool is_last_inner= join_tab == join_tab->first_unmatched->last_inner;
/* Return at once if there are no records in the join buffer */
if (!records)
@@ -2070,40 +2410,16 @@ enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
goto finish;
}
/* Just skip the whole record if a match for it has been already found */
- if (!is_first_inner || !skip_record_if_match())
+ if (!is_first_inner || !skip_if_matched())
{
get_record();
/* The outer row is complemented by nulls for each inner table */
restore_record(join_tab->table, s->default_values);
mark_as_null_row(join_tab->table);
- /* Check all pushdown conditions attached to the inner table */
- join_tab->first_unmatched->found= 1;
- if (join_tab->select && join_tab->select->skip_record(join->thd) < 1)
- continue;
- if (is_last_inner)
- {
- JOIN_TAB *first_upper= join_tab->first_unmatched->first_upper;
- while (first_upper && first_upper->last_inner == join_tab)
- {
- set_match_flag_if_none(first_upper, get_curr_rec());
- for (JOIN_TAB* tab= first_upper; tab <= join_tab; tab++)
- {
- if (tab->select && tab->select->skip_record(join->thd) < 1)
- goto next;
- }
- first_upper= first_upper->first_upper;
- }
- }
- /* Find all matches for the remaining join tables */
- rc= (*join_tab->next_select)(join, join_tab+1, 0);
+ rc= generate_full_extensions(get_curr_rec());
if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
- {
- reset(TRUE);
goto finish;
- }
}
- next:
- ;
}
finish:
@@ -2112,475 +2428,136 @@ finish:
/*
- Initialize retrieval of range sequence for BKA algorithm
-
- SYNOPSIS
- bka_range_seq_init()
- init_params pointer to the BKA join cache object
- n_ranges the number of ranges obtained
- flags combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
-
- DESCRIPTION
- The function interprets init_param as a pointer to a JOIN_CACHE_BKA
- object. The function prepares for an iteration over the join keys
- built for all records from the cache join buffer.
-
- NOTE
- This function are used only as a callback function.
-
- RETURN
- init_param value that is to be used as a parameter of bka_range_seq_next()
-*/
-
-static
-range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
-{
- DBUG_ENTER("bka_range_seq_init");
- JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
- cache->reset(0);
- DBUG_RETURN((range_seq_t) init_param);
-}
-
+ Add a comment on the join algorithm employed by the join cache
-/*
- Get the key over the next record from the join buffer used by BKA
-
SYNOPSIS
- bka_range_seq_next()
- seq the value returned by bka_range_seq_init
- range OUT reference to the next range
-
- DESCRIPTION
- The function interprets seq as a pointer to a JOIN_CACHE_BKA
- object. The function returns a pointer to the range descriptor
- for the key built over the next record from the join buffer.
-
- NOTE
- This function are used only as a callback function.
-
- RETURN
- 0 ok, the range structure filled with info about the next key
- 1 no more ranges
-*/
-
-static
-uint bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
-{
- DBUG_ENTER("bka_range_seq_next");
- JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
- TABLE_REF *ref= &cache->join_tab->ref;
- key_range *start_key= &range->start_key;
- if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
- {
- start_key->keypart_map= (1 << ref->key_parts) - 1;
- start_key->flag= HA_READ_KEY_EXACT;
- range->end_key= *start_key;
- range->end_key.flag= HA_READ_AFTER_KEY;
- range->ptr= (char *) cache->get_curr_rec();
- range->range_flag= EQ_RANGE;
- DBUG_RETURN(0);
- }
- DBUG_RETURN(1);
-}
-
-
-/*
- Check whether range_info orders to skip the next record from BKA buffer
+ print_explain_comment()
+ str string to add the comment on the employed join algorithm to
- SYNOPSIS
- bka_range_seq_skip_record()
- seq value returned by bka_range_seq_init()
- range_info information about the next range
- rowid [NOT USED] rowid of the record to be checked
-
-
DESCRIPTION
- The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
- The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
- object. The function returns TRUE if the record with this range_info
- is to be filtered out from the stream of records returned by
- multi_range_read_next().
-
- NOTE
- This function are used only as a callback function.
+ This function adds info on the type of the used join buffer (flat or
+ incremental) and on the type of the the employed join algorithm (BNL,
+ BNLH, BKA or BKAH) to the the end of the sring str.
- RETURN
- 1 record with this range_info is to be filtered out from the stream
- of records returned by multi_range_read_next()
- 0 the record is to be left in the stream
+ RETURN VALUE
+ none
*/
-static
-bool bka_range_seq_skip_record(range_seq_t rseq, char *range_info, uchar *rowid)
+void JOIN_CACHE::print_explain_comment(String *str)
{
- DBUG_ENTER("bka_range_seq_skip_record");
- JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
- bool res= cache->get_match_flag_by_pos((uchar *) range_info);
- DBUG_RETURN(res);
-}
-
-/*
- Using BKA find matches from the next table for records from the join buffer
-
- SYNOPSIS
- join_matching_records()
- skip_last do not look for matches for the last partial join record
-
- DESCRIPTION
- This function can be used only when the table join_tab can be accessed
- by keys built over the fields of previous join tables.
- The function retrieves all partial join records from the join buffer and
- for each of them builds the key value to access join_tab, performs index
- look-up with this key and selects matching records yielded by this look-up
- If a match is found the function will call the sub_select function trying
- to look for matches for the remaining join operations.
- This function currently is called only from the function join_records.
- It's assumed that this function is always called with the skip_last
- parameter equal to false.
-
- NOTES
- The function produces all matching extensions for the records in the
- join buffer following the path of the Batched Key Access algorithm.
- When an outer join operation is performed all unmatched records from
- the join buffer must be extended by null values. The function
- join_null_complements serves this purpose.
- The Batched Key Access algorithm assumes that key accesses are batched.
- In other words it assumes that, first, either keys themselves or the
- corresponding rowids (primary keys) are accumulated in a buffer, then
- data rows from join_tab are fetched for all of them. When a row is
- fetched it is always returned with a reference to the key by which it
- has been accessed.
- When key values are batched we can save on the number of the server
- requests for index lookups. For the remote engines, like NDB cluster, it
- essentially reduces the number of round trips between the server and
- the engine when performing a join operation.
- When the rowids for the keys are batched we can optimize the order
- in what we fetch the data for this rowids. The performance benefits of
- this optimization can be significant for such engines as MyISAM, InnoDB.
- What is exactly batched are hidden behind implementations of
- MRR handler interface that is supposed to be appropriately chosen
- for each engine. If for a engine no specific implementation of the MRR
- interface is supllied then the default implementation is used. This
- implementation actually follows the path of Nested Loops Join algorithm.
- In this case BKA join surely will demonstrate a worse performance than
- NL join.
-
- RETURN
- return one of enum_nested_loop_state
-*/
-
-enum_nested_loop_state JOIN_CACHE_BKA::join_matching_records(bool skip_last)
-{
- int error;
- handler *file= join_tab->table->file;
- enum_nested_loop_state rc= NESTED_LOOP_OK;
- uchar *rec_ptr= 0;
- bool check_only_first_match= join_tab->check_only_first_match();
-
- /* Set functions to iterate over keys in the join buffer */
-
- RANGE_SEQ_IF seq_funcs= { bka_range_seq_init,
- bka_range_seq_next,
- check_only_first_match ?
- bka_range_seq_skip_record : 0,
- join_tab->cache_idx_cond ?
- bka_skip_index_tuple : 0 };
-
- /* The value of skip_last must be always FALSE when this function is called */
- DBUG_ASSERT(!skip_last);
-
- /* Return at once if there are no records in the join buffer */
- if (!records)
- return NESTED_LOOP_OK;
-
- rc= init_join_matching_records(&seq_funcs, records);
- if (rc != NESTED_LOOP_OK)
- goto finish;
-
- while (!(error= file->multi_range_read_next((char **) &rec_ptr)))
- {
- if (join->thd->killed)
- {
- /* The user has aborted the execution of the query */
- join->thd->send_kill_message();
- rc= NESTED_LOOP_KILLED;
- goto finish;
- }
- if (join_tab->keep_current_rowid)
- join_tab->table->file->position(join_tab->table->record[0]);
- /*
- If only the first match is needed and it has been already found
- for the associated partial join record then the returned candidate
- is discarded.
- */
- if (rc == NESTED_LOOP_OK &&
- (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
- {
- get_record_by_pos(rec_ptr);
- update_virtual_fields(join->thd, join_tab->table);
- rc= generate_full_extensions(rec_ptr);
- if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
- goto finish;
- }
- }
-
- if (error > 0 && error != HA_ERR_END_OF_FILE)
- return NESTED_LOOP_ERROR;
-finish:
- return end_join_matching_records(rc);
-}
-
-
-
-/*
- Prepare to search for records that match records from the join buffer
-
- SYNOPSIS
- init_join_matching_records()
- seq_funcs structure of range sequence interface
- ranges number of keys/ranges in the sequence
-
- DESCRIPTION
- This function calls the multi_range_read_init function to set up
- the BKA process of generating the keys from the records in the join
- buffer and looking for matching records from the table to be joined.
- The function passes as a parameter a structure of functions that
- implement the range sequence interface. This interface is used to
- enumerate all generated keys and optionally to filter the matching
- records returned by the multi_range_read_next calls from the
- intended invocation of the join_matching_records method. The
- multi_range_read_init function also receives the parameters for
- MRR buffer to be used and flags specifying the mode in which
- this buffer will be functioning.
- The number of keys in the sequence expected by multi_range_read_init
- is passed through the parameter ranges.
-
- RETURN
- return one of enum_nested_loop_state
-*/
-
-enum_nested_loop_state
-JOIN_CACHE_BKA::init_join_matching_records(RANGE_SEQ_IF *seq_funcs, uint ranges)
-{
- int error;
- handler *file= join_tab->table->file;
- enum_nested_loop_state rc= NESTED_LOOP_OK;
-
- join_tab->table->null_row= 0;
-
-
- /* Dynamic range access is never used with BKA */
- DBUG_ASSERT(join_tab->use_quick != 2);
-
- for (JOIN_TAB *tab =join->join_tab; tab != join_tab ; tab++)
- {
- tab->status= tab->table->status;
- tab->table->status= 0;
- }
-
- init_mrr_buff();
-
- /*
- Prepare to iterate over keys from the join buffer and to get
- matching candidates obtained with MMR handler functions.
- */
- if (!file->inited)
- file->ha_index_init(join_tab->ref.key, 1);
- if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges,
- mrr_mode, &mrr_buff)))
- rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+ str->append(STRING_WITH_LEN(" ("));
+ const char *buffer_type= prev_cache ? "incremental" : "flat";
+ str->append(buffer_type);
+ str->append(STRING_WITH_LEN(", "));
- return rc;
-}
+ const char *join_alg;
+ switch (get_join_alg()) {
+ case BNL_JOIN_ALG:
+ join_alg= "BNL";
+ break;
+ case BNLH_JOIN_ALG:
+ join_alg= "BNLH";
+ break;
+ case BKA_JOIN_ALG:
+ join_alg= "BKA";
+ break;
+ case BKAH_JOIN_ALG:
+ join_alg= "BKAH";
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+ str->append(join_alg);
+ str->append(STRING_WITH_LEN(" join"));
+ str->append(STRING_WITH_LEN(")"));
+ }
+
/*
- Finish searching for records that match records from the join buffer
+ Initialize a hashed join cache
SYNOPSIS
- end_join_matching_records()
- rc return code passed by the join_matching_records function
+ init()
DESCRIPTION
- This function perform final actions on searching for all matches for
- the records from the join buffer and building all full join extensions
- of the records with these matches.
-
- RETURN
- return code rc passed to the function as a parameter
+ The function initializes the cache structure with a hash table in it.
+ The hash table will be used to store key values for the records from
+ the join buffer.
+ The function allocates memory for the join buffer and for descriptors of
+ the record fields stored in the buffer.
+ The function also initializes a hash table for record keys within the join
+ buffer space.
+
+ NOTES VALUE
+ The function is supposed to be called by the init methods of the classes
+ derived from JOIN_CACHE_HASHED.
+
+ RETURN VALUE
+ 0 initialization with buffer allocations has been succeeded
+ 1 otherwise
*/
-enum_nested_loop_state
-JOIN_CACHE_BKA::end_join_matching_records(enum_nested_loop_state rc)
+int JOIN_CACHE_HASHED::init()
{
- for (JOIN_TAB *tab=join->join_tab; tab != join_tab ; tab++)
- tab->table->status= tab->status;
- return rc;
-}
-
+ int rc= 0;
+ TABLE_REF *ref= &join_tab->ref;
-/*
- Get the key built over the next record from BKA join buffer
+ DBUG_ENTER("JOIN_CACHE_HASHED::init");
- SYNOPSIS
- get_next_key()
- key pointer to the buffer where the key value is to be placed
+ hash_table= 0;
+ key_entries= 0;
- DESCRIPTION
- The function reads key fields from the current record in the join buffer.
- and builds the key value out of these fields that will be used to access
- the 'join_tab' table. Some of key fields may belong to previous caches.
- They are accessed via record references to the record parts stored in the
- previous join buffers. The other key fields always are placed right after
- the flag fields of the record.
- If the key is embedded, which means that its value can be read directly
- from the join buffer, then *key is set to the beginning of the key in
- this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
- The function returns the length of the key if it succeeds ro read it.
- If is assumed that the functions starts reading at the position of
- the record length which is provided for each records in a BKA cache.
- After the key is built the 'pos' value points to the first position after
- the current record.
- The function returns 0 if the initial position is after the beginning
- of the record fields for last record from the join buffer.
+ key_length= ref->key_length;
- RETURN
- length of the key value - if the starting value of 'pos' points to
- the position before the fields for the last record,
- 0 - otherwise.
-*/
+ if ((rc= JOIN_CACHE::init()))
+ DBUG_RETURN (rc);
-uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
-{
- uint len;
- uint32 rec_len;
- uchar *init_pos;
- JOIN_CACHE *cache;
-
- if (pos > last_rec_pos || !records)
- return 0;
+ if (!(key_buff= (uchar*) sql_alloc(key_length)))
+ DBUG_RETURN(1);
- /* Any record in a BKA cache is prepended with its length */
- DBUG_ASSERT(with_length);
-
- /* Read the length of the record */
- rec_len= get_rec_length(pos);
- pos+= size_of_rec_len;
- init_pos= pos;
+ /* Take into account a reference to the next record in the key chain */
+ pack_length+= get_size_of_rec_offset();
+ pack_length_with_blob_ptrs+= get_size_of_rec_offset();
- /* Read a reference to the previous cache if any */
- if (prev_cache)
- pos+= prev_cache->get_size_of_rec_offset();
+ init_hash_table();
- curr_rec_pos= pos;
+ rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
+ (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
- /* Read all flag fields of the record */
- read_flag_fields();
-
+ data_fields_offset= 0;
if (use_emb_key)
{
- /* An embedded key is taken directly from the join buffer */
- *key= pos;
- len= emb_key_length;
- }
- else
- {
- /* Read key arguments from previous caches if there are any such fields */
- if (external_key_arg_fields)
- {
- uchar *rec_ptr= curr_rec_pos;
- uint key_arg_count= external_key_arg_fields;
- CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
- for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
- {
- uint len= 0;
- DBUG_ASSERT(cache);
- rec_ptr= cache->get_rec_ref(rec_ptr);
- while (!cache->referenced_fields)
- {
- cache= cache->prev_cache;
- DBUG_ASSERT(cache);
- rec_ptr= cache->get_rec_ref(rec_ptr);
- }
- while (key_arg_count &&
- cache->read_referenced_field(*copy_ptr, rec_ptr, &len))
- {
- copy_ptr++;
- --key_arg_count;
- }
- }
- }
-
- /*
- Read the other key arguments from the current record. The fields for
- these arguments are always first in the sequence of the record's fields.
- */
- CACHE_FIELD *copy= field_descr+flag_fields;
- CACHE_FIELD *copy_end= copy+local_key_arg_fields;
- bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
+ CACHE_FIELD *copy= field_descr;
+ CACHE_FIELD *copy_end= copy+flag_fields;
for ( ; copy < copy_end; copy++)
- read_record_field(copy, blob_in_rec_buff);
-
- /* Build the key over the fields read into the record buffers */
- TABLE_REF *ref= &join_tab->ref;
- cp_buffer_from_ref(join->thd, join_tab->table, ref);
- *key= ref->key_buff;
- len= ref->key_length;
- }
-
- pos= init_pos+rec_len;
+ data_fields_offset+= copy->length;
+ }
- return len;
-}
+ DBUG_RETURN(rc);
+}
/*
- Initialize a BKA_UNIQUE cache
+ Initialize the hash table of a hashed join cache
SYNOPSIS
- init()
+ init_hash_table()
DESCRIPTION
- The function initializes the cache structure. It supposed to be called
- right after a constructor for the JOIN_CACHE_BKA_UNIQUE.
- The function allocates memory for the join buffer and for descriptors of
- the record fields stored in the buffer.
- The function also estimates the number of hash table entries in the hash
- table to be used and initializes this hash table.
+ The function estimates the number of hash table entries in the hash
+ table to be used and initializes this hash table within the join buffer
+ space.
- NOTES
- The code of this function should have been included into the constructor
- code itself. However the new operator for the class JOIN_CACHE_BKA_UNIQUE
- would never fail while memory allocation for the join buffer is not
- absolutely unlikely to fail. That's why this memory allocation has to be
- placed in a separate function that is called in a couple with a cache
- constructor.
- It is quite natural to put almost all other constructor actions into
- this function.
-
- RETURN
- 0 initialization with buffer allocations has been succeeded
- 1 otherwise
+ RETURN VALUE
+ Currently the function always returns 0;
*/
-int JOIN_CACHE_BKA_UNIQUE::init()
+int JOIN_CACHE_HASHED::init_hash_table()
{
- int rc= 0;
- TABLE_REF *ref= &join_tab->ref;
-
- DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::init");
-
hash_table= 0;
key_entries= 0;
- if ((rc= JOIN_CACHE_BKA::init()))
- DBUG_RETURN (rc);
-
- key_length= ref->key_length;
-
- /* Take into account a reference to the next record in the key chain */
- pack_length+= get_size_of_rec_offset();
-
/* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
uint max_size_of_key_ofs= max(2, get_size_of_rec_offset());
for (size_of_key_ofs= 2;
@@ -2591,7 +2568,10 @@ int JOIN_CACHE_BKA_UNIQUE::init()
size_of_key_ofs + // reference to the next key
(use_emb_key ? get_size_of_rec_offset() : key_length);
- uint n= buff_size / (pack_length+key_entry_length+size_of_key_ofs);
+ ulong space_per_rec= avg_record_length +
+ avg_aux_buffer_incr +
+ key_entry_length+size_of_key_ofs;
+ uint n= buff_size / space_per_rec;
/*
TODO: Make a better estimate for this upper bound of
@@ -2601,6 +2581,7 @@ int JOIN_CACHE_BKA_UNIQUE::init()
key_entry_length+size_of_key_ofs);
hash_entries= (uint) (n / 0.7);
+ set_if_bigger(hash_entries, 1);
if (offset_size(max_n*key_entry_length) <=
size_of_key_ofs)
@@ -2612,27 +2593,74 @@ int JOIN_CACHE_BKA_UNIQUE::init()
cleanup_hash_table();
curr_key_entry= hash_table;
- pack_length+= key_entry_length;
- pack_length_with_blob_ptrs+= get_size_of_rec_offset() + key_entry_length;
+ return 0;
+}
- rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
- (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
- data_fields_offset= 0;
- if (use_emb_key)
- {
- CACHE_FIELD *copy= field_descr;
- CACHE_FIELD *copy_end= copy+flag_fields;
- for ( ; copy < copy_end; copy++)
- data_fields_offset+= copy->length;
- }
+/*
+ Reallocate the join buffer of a hashed join cache
+
+ SYNOPSIS
+ realloc_buffer()
- DBUG_RETURN(rc);
+ DESCRITION
+ The function reallocates the join buffer of the hashed join cache.
+ After this it initializes a hash table within the buffer space and
+ resets the join cache for writing.
+
+ NOTES
+ The function assumes that buff_size contains the new value for the join
+ buffer size.
+
+ RETURN VALUE
+ 0 if the buffer has been successfully reallocated
+ 1 otherwise
+*/
+
+int JOIN_CACHE_HASHED::realloc_buffer()
+{
+ int rc;
+ free();
+ rc= test(!(buff= (uchar*) my_malloc(buff_size, MYF(0))));
+ init_hash_table();
+ reset(TRUE);
+ return rc;
}
+/*
+ Get maximum size of the additional space per record used for record keys
+
+ SYNOPSYS
+ get_max_key_addon_space_per_record()
+
+ DESCRIPTION
+ The function returns the size of the space occupied by one key entry
+ and one hash table entry.
+
+ RETURN VALUE
+ maximum size of the additional space per record that is used to store
+ record keys in the hash table
+*/
+
+uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record()
+{
+ ulong len;
+ TABLE_REF *ref= &join_tab->ref;
+ /*
+ The total number of hash entries in the hash tables is bounded by
+ ceiling(N/0.7) where N is the maximum number of records in the buffer.
+ That's why the multiplier 2 is used in the formula below.
+ */
+ len= (use_emb_key ? get_size_of_rec_offset() : ref->key_length) +
+ size_of_rec_ofs + // size of the key chain header
+ size_of_rec_ofs + // >= size of the reference to the next key
+ 2*size_of_rec_ofs; // >= 2*( size of hash table entry)
+ return len;
+}
+
/*
- Reset the JOIN_CACHE_BKA_UNIQUE buffer for reading/writing
+ Reset the buffer of a hashed join cache for reading/writing
SYNOPSIS
reset()
@@ -2640,15 +2668,15 @@ int JOIN_CACHE_BKA_UNIQUE::init()
DESCRIPTION
This implementation of the virtual function reset() resets the join buffer
- of the JOIN_CACHE_BKA_UNIQUE class for reading or writing.
+ of the JOIN_CACHE_HASHED class for reading or writing.
Additionally to what the default implementation does this function
cleans up the hash table allocated within the buffer.
- RETURN
+ RETURN VALUE
none
*/
-void JOIN_CACHE_BKA_UNIQUE::reset(bool for_writing)
+void JOIN_CACHE_HASHED::reset(bool for_writing)
{
this->JOIN_CACHE::reset(for_writing);
if (for_writing && hash_table)
@@ -2656,15 +2684,16 @@ void JOIN_CACHE_BKA_UNIQUE::reset(bool for_writing)
curr_key_entry= hash_table;
}
+
/*
- Add a record into the JOIN_CACHE_BKA_UNIQUE buffer
+ Add a record into the buffer of a hashed join cache
SYNOPSIS
put_record()
DESCRIPTION
This implementation of the virtual function put_record writes the next
- matching record into the join buffer of the JOIN_CACHE_BKA_UNIQUE class.
+ matching record into the join buffer of the JOIN_CACHE_HASHED class.
Additionally to what the default implementation does this function
performs the following.
It extracts from the record the key value used in lookups for matching
@@ -2675,14 +2704,16 @@ void JOIN_CACHE_BKA_UNIQUE::reset(bool for_writing)
is attached to the key entry. The key value is either placed in the hash
element added for the key or, if the use_emb_key flag is set, remains in
the record from the partial join.
+ If the match flag field of a record contains MATCH_IMPOSSIBLE the key is
+ not created for this record.
- RETURN
+ RETURN VALUE
TRUE if it has been decided that it should be the last record
in the join buffer,
FALSE otherwise
*/
-bool JOIN_CACHE_BKA_UNIQUE::put_record()
+bool JOIN_CACHE_HASHED::put_record()
{
bool is_full;
uchar *key;
@@ -2698,6 +2729,9 @@ bool JOIN_CACHE_BKA_UNIQUE::put_record()
link= prev_cache->get_curr_rec_link();
write_record_data(link, &is_full);
+ if (last_written_is_null_compl)
+ return is_full;
+
if (use_emb_key)
key= get_curr_emb_key();
else
@@ -2751,6 +2785,7 @@ bool JOIN_CACHE_BKA_UNIQUE::put_record()
memcpy(cp, key, key_len);
}
last_key_entry= cp;
+ DBUG_ASSERT(last_key_entry >= end_pos);
/* Increment the counter of key_entries in the hash table */
key_entries++;
}
@@ -2759,7 +2794,7 @@ bool JOIN_CACHE_BKA_UNIQUE::put_record()
/*
- Read the next record from the JOIN_CACHE_BKA_UNIQUE buffer
+ Read the next record from the buffer of a hashed join cache
SYNOPSIS
get_record()
@@ -2769,12 +2804,12 @@ bool JOIN_CACHE_BKA_UNIQUE::put_record()
function get_record does this implementation skips the link element
used to connect the records with the same key into a chain.
- RETURN
- TRUE - there are no more records to read from the join buffer
- FALSE - otherwise
+ RETURN VALUE
+ TRUE there are no more records to read from the join buffer
+ FALSE otherwise
*/
-bool JOIN_CACHE_BKA_UNIQUE::get_record()
+bool JOIN_CACHE_HASHED::get_record()
{
pos+= get_size_of_rec_offset();
return this->JOIN_CACHE::get_record();
@@ -2782,26 +2817,55 @@ bool JOIN_CACHE_BKA_UNIQUE::get_record()
/*
- Skip record from the JOIN_CACHE_BKA_UNIQUE join buffer if its match flag is on
+ Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND
SYNOPSIS
- skip_record_if_match()
+ skip_if_matched()
DESCRIPTION
- This implementation of the virtual function skip_record_if_match does
+ This implementation of the virtual function skip_if_matched does
the same as the default implementation does, but it takes into account
the link element used to connect the records with the same key into a chain.
- RETURN
- TRUE - the match flag is on and the record has been skipped
- FALSE - the match flag is off
+ RETURN VALUE
+ TRUE the match flag is MATCH_FOUND and the record has been skipped
+ FALSE otherwise
*/
-bool JOIN_CACHE_BKA_UNIQUE::skip_record_if_match()
+bool JOIN_CACHE_HASHED::skip_if_matched()
{
uchar *save_pos= pos;
pos+= get_size_of_rec_offset();
- if (!this->JOIN_CACHE::skip_record_if_match())
+ if (!this->JOIN_CACHE::skip_if_matched())
+ {
+ pos= save_pos;
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+/*
+ Skip record from a hashed join buffer if its match flag dictates to do so
+
+ SYNOPSIS
+ skip_if_uneeded_match()
+
+ DESCRIPTION
+ This implementation of the virtual function skip_if_not_needed_match does
+ the same as the default implementation does, but it takes into account
+ the link element used to connect the records with the same key into a chain.
+
+ RETURN VALUE
+ TRUE the match flag dictates to skip the record
+ FALSE the match flag is off
+*/
+
+bool JOIN_CACHE_HASHED::skip_if_not_needed_match()
+{
+ uchar *save_pos= pos;
+ pos+= get_size_of_rec_offset();
+ if (!this->JOIN_CACHE::skip_if_not_needed_match())
{
pos= save_pos;
return FALSE;
@@ -2830,13 +2894,13 @@ bool JOIN_CACHE_BKA_UNIQUE::skip_record_if_match()
Otherwise the function returns the position where the reference to the
newly created hash element for the given key is to be added.
- RETURN
- TRUE - the key is found in the hash table
- FALSE - otherwise
+ RETURN VALUE
+ TRUE the key is found in the hash table
+ FALSE otherwise
*/
-bool JOIN_CACHE_BKA_UNIQUE::key_search(uchar *key, uint key_len,
- uchar **key_ref_ptr)
+bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
+ uchar **key_ref_ptr)
{
bool is_found= FALSE;
uint idx= get_hash_idx(key, key_length);
@@ -2871,11 +2935,11 @@ bool JOIN_CACHE_BKA_UNIQUE::key_search(uchar *key, uint key_len,
The function calculates an index of the hash entry in the hash table
of the join buffer for the given key
- RETURN
+ RETURN VALUE
the calculated index of the hash entry for the given key.
*/
-uint JOIN_CACHE_BKA_UNIQUE::get_hash_idx(uchar* key, uint key_len)
+uint JOIN_CACHE_HASHED::get_hash_idx(uchar* key, uint key_len)
{
ulong nr= 1;
ulong nr2= 4;
@@ -2902,11 +2966,11 @@ uint JOIN_CACHE_BKA_UNIQUE::get_hash_idx(uchar* key, uint key_len)
The function cleans up the hash table in the join buffer removing all
hash elements from the table.
- RETURN
+ RETURN VALUE
none
*/
-void JOIN_CACHE_BKA_UNIQUE:: cleanup_hash_table()
+void JOIN_CACHE_HASHED:: cleanup_hash_table()
{
last_key_entry= hash_table;
bzero(hash_table, (buff+buff_size)-hash_table);
@@ -2915,64 +2979,680 @@ void JOIN_CACHE_BKA_UNIQUE:: cleanup_hash_table()
/*
- Initialize retrieval of range sequence for BKA_UNIQUE algorithm
+ Check whether all records in a key chain have their match flags set on
+
+ SYNOPSIS
+ check_all_match_flags_for_key()
+ key_chain_ptr
+
+ DESCRIPTION
+ This function retrieves records in the given circular chain and checks
+ whether their match flags are set on. The parameter key_chain_ptr shall
+ point to the position in the join buffer storing the reference to the
+ last element of this chain.
+
+ RETURN VALUE
+ TRUE if each retrieved record has its match flag set to MATCH_FOUND
+ FALSE otherwise
+*/
+
+bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr)
+{
+ uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
+ uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+ do
+ {
+ next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
+ uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
+ if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND)
+ return FALSE;
+ }
+ while (next_rec_ref_ptr != last_rec_ref_ptr);
+ return TRUE;
+}
+
+
+/*
+ Get the next key built for the records from the buffer of a hashed join cache
+
+ SYNOPSIS
+ get_next_key()
+ key pointer to the buffer where the key value is to be placed
+
+ DESCRIPTION
+ The function reads the next key value stored in the hash table of the
+ join buffer. Depending on the value of the use_emb_key flag of the
+ join cache the value is read either from the table itself or from
+ the record field where it occurs.
+
+ RETURN VALUE
+ length of the key value - if the starting value of 'cur_key_entry' refers
+ to the position after that referred by the the value of 'last_key_entry',
+ 0 - otherwise.
+*/
+
+uint JOIN_CACHE_HASHED::get_next_key(uchar ** key)
+{
+ if (curr_key_entry == last_key_entry)
+ return 0;
+
+ curr_key_entry-= key_entry_length;
+
+ *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
+
+ DBUG_ASSERT(*key >= buff && *key < hash_table);
+
+ return key_length;
+}
+
+
+/*
+ Initiate an iteration process over records in the joined table
+
+ SYNOPSIS
+ open()
+
+ DESCRIPTION
+ The function initiates the process of iteration over records from the
+ joined table recurrently performed by the BNL/BKLH join algorithm.
+
+ RETURN VALUE
+ 0 the initiation is a success
+ error code otherwise
+*/
+
+int JOIN_TAB_SCAN::open()
+{
+ for (JOIN_TAB *tab= join->join_tab; tab != join_tab ; tab++)
+ {
+ tab->status= tab->table->status;
+ tab->table->status= 0;
+ }
+ is_first_record= TRUE;
+ return join_init_read_record(join_tab);
+}
+
+
+/*
+ Read the next record that can match while scanning the joined table
+
+ SYNOPSIS
+ next()
+
+ DESCRIPTION
+ The function reads the next record from the joined table that can
+ match some records in the buffer of the join cache 'cache'. To do
+ this the function calls the function that scans table records and
+ looks for the next one that meets the condition pushed to the
+ joined table join_tab.
+
+ NOTES
+ The function catches the signal that kills the query.
+
+ RETURN VALUE
+ 0 the next record exists and has been successfully read
+ error code otherwise
+*/
+
+int JOIN_TAB_SCAN::next()
+{
+ int err= 0;
+ int skip_rc;
+ READ_RECORD *info= &join_tab->read_record;
+ SQL_SELECT *select= join_tab->cache_select;
+ if (is_first_record)
+ is_first_record= FALSE;
+ else
+ err= info->read_record(info);
+ if (!err)
+ update_virtual_fields(join->thd, join_tab->table);
+ while (!err && select && (skip_rc= select->skip_record(join->thd)) <= 0)
+ {
+ if (join->thd->killed || skip_rc < 0)
+ return 1;
+ /*
+ Move to the next record if the last retrieved record does not
+ meet the condition pushed to the table join_tab.
+ */
+ err= info->read_record(info);
+ if (!err)
+ update_virtual_fields(join->thd, join_tab->table);
+ }
+ return err;
+}
+
+
+/*
+ Perform finalizing actions for a scan over the table records
+
+ SYNOPSIS
+ close()
+
+ DESCRIPTION
+ The function performs the necessary restoring actions after
+ the table scan over the joined table has been finished.
+
+ RETURN VALUE
+ none
+*/
+
+void JOIN_TAB_SCAN::close()
+{
+ for (JOIN_TAB *tab= join->join_tab; tab != join_tab ; tab++)
+ tab->table->status= tab->status;
+}
+
+
+/*
+ Prepare to iterate over the BNL join cache buffer to look for matches
+
+ SYNOPSIS
+ prepare_look_for_matches()
+ skip_last <-> ignore the last record in the buffer
+
+ DESCRIPTION
+ The function prepares the join cache for an iteration over the
+ records in the join buffer. The iteration is performed when looking
+ for matches for the record from the joined table join_tab that
+ has been placed into the record buffer of the joined table.
+ If the value of the parameter skip_last is TRUE then the last
+ record from the join buffer is ignored.
+ The function initializes the counter of the records that have been
+ not iterated over yet.
+
+ RETURN VALUE
+ TRUE there are no records in the buffer to iterate over
+ FALSE otherwise
+*/
+
+bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last)
+{
+ if (!records)
+ return TRUE;
+ reset(FALSE);
+ rem_records= records-test(skip_last);
+ return rem_records == 0;
+}
+
+
+/*
+ Get next record from the BNL join cache buffer when looking for matches
+
+ SYNOPSIS
+ get_next_candidate_for_match
+
+ DESCRIPTION
+ This method is used for iterations over the records from the join
+ cache buffer when looking for matches for records from join_tab.
+ The methods performs the necessary preparations to read the next record
+ from the join buffer into the record buffer by the method
+ read_next_candidate_for_match, or, to skip the next record from the join
+ buffer by the method skip_recurrent_candidate_for_match.
+ This implementation of the virtual method get_next_candidate_for_match
+ just decrements the counter of the records that are to be iterated over
+ and returns the current value of the cursor 'pos' as the position of
+ the record to be processed.
+
+ RETURN VALUE
+ pointer to the position right after the prefix of the current record
+ in the join buffer if the there is another record to iterate over,
+ 0 - otherwise.
+*/
+
+uchar *JOIN_CACHE_BNL::get_next_candidate_for_match()
+{
+ if (!rem_records)
+ return 0;
+ rem_records--;
+ return pos+base_prefix_length;
+}
+
+
+/*
+ Check whether the matching record from the BNL cache is to be skipped
+
+ SYNOPSIS
+ skip_next_candidate_for_match
+ rec_ptr pointer to the position in the join buffer right after the prefix
+ of the current record
+
+ DESCRIPTION
+ This implementation of the virtual function just calls the
+ method skip_if_not_needed_match to check whether the record referenced by
+ ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the
+ first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and
+ join_tab is the first inner table of an outer join.
+ If so, the function just skips this record setting the value of the
+ cursor 'pos' to the position right after it.
+
+ RETURN VALUE
+ TRUE the record referenced by rec_ptr has been skipped
+ FALSE otherwise
+*/
+
+bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr)
+{
+ pos= rec_ptr-base_prefix_length;
+ return skip_if_not_needed_match();
+}
+
+
+/*
+ Read next record from the BNL join cache buffer when looking for matches
+
+ SYNOPSIS
+ read_next_candidate_for_match
+ rec_ptr pointer to the position in the join buffer right after the prefix
+ the current record.
+
+ DESCRIPTION
+ This implementation of the virtual method read_next_candidate_for_match
+ calls the method get_record to read the record referenced by rec_ptr from
+ the join buffer into the record buffer. If this record refers to the
+ fields in the other join buffers the call of get_record ensures that
+ these fields are read into the corresponding record buffers as well.
+ This function is supposed to be called after a successful call of
+ the method get_next_candidate_for_match.
+
+ RETURN VALUE
+ none
+*/
+
+void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr)
+{
+ pos= rec_ptr-base_prefix_length;
+ get_record();
+}
+
+
+/*
+ Initialize the BNL join cache
+
+ SYNOPSIS
+ init
+
+ DESCRIPTION
+ The function initializes the cache structure. It is supposed to be called
+ right after a constructor for the JOIN_CACHE_BNL.
+
+ NOTES
+ The function first constructs a companion object of the type JOIN_TAB_SCAN,
+ then it calls the init method of the parent class.
+
+ RETURN VALUE
+ 0 initialization with buffer allocations has been succeeded
+ 1 otherwise
+*/
+
+int JOIN_CACHE_BNL::init()
+{
+ DBUG_ENTER("JOIN_CACHE_BNL::init");
+
+ if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
+ DBUG_RETURN(1);
+
+ DBUG_RETURN(JOIN_CACHE::init());
+}
+
+
+/*
+ Get the chain of records from buffer matching the current candidate for join
+
+ SYNOPSIS
+ get_matching_chain_by_join_key()
+
+ DESCRIPTION
+ This function first build a join key for the record of join_tab that
+ currently is in the join buffer for this table. Then it looks for
+ the key entry with this key in the hash table of the join cache.
+ If such a key entry is found the function returns the pointer to
+ the head of the chain of records in the join_buffer that match this
+ key.
+
+ RETURN VALUE
+ The pointer to the corresponding circular list of records if
+ the key entry with the join key is found, 0 - otherwise.
+*/
+
+uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key()
+{
+ uchar *key_ref_ptr;
+ TABLE *table= join_tab->table;
+ TABLE_REF *ref= &join_tab->ref;
+ KEY *keyinfo= table->key_info+ref->key;
+ /* Build the join key value out of the record in the record buffer */
+ key_copy(key_buff, table->record[0], keyinfo, key_length);
+ /* Look for this key in the join buffer */
+ if (!key_search(key_buff, key_length, &key_ref_ptr))
+ return 0;
+ return key_ref_ptr+get_size_of_key_offset();
+}
+
+
+/*
+ Prepare to iterate over the BNLH join cache buffer to look for matches
+
+ SYNOPSIS
+ prepare_look_for_matches()
+ skip_last <-> ignore the last record in the buffer
+
+ DESCRIPTION
+ The function prepares the join cache for an iteration over the
+ records in the join buffer. The iteration is performed when looking
+ for matches for the record from the joined table join_tab that
+ has been placed into the record buffer of the joined table.
+ If the value of the parameter skip_last is TRUE then the last
+ record from the join buffer is ignored.
+ The function builds the hashed key from the join fields of join_tab
+ and uses this key to look in the hash table of the join cache for
+ the chain of matching records in in the join buffer. If it finds
+ such a chain it sets the member last_rec_ref_ptr to point to the
+ last link of the chain while setting the member next_rec_ref_po 0.
+
+ RETURN VALUE
+ TRUE there are no matching records in the buffer to iterate over
+ FALSE otherwise
+*/
+
+bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last)
+{
+ uchar *curr_matching_chain;
+ last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
+ if (!(curr_matching_chain= get_matching_chain_by_join_key()))
+ return 1;
+ last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
+ return 0;
+}
+
+
+/*
+ Get next record from the BNLH join cache buffer when looking for matches
+
+ SYNOPSIS
+ get_next_candidate_for_match
+
+ DESCRIPTION
+ This method is used for iterations over the records from the join
+ cache buffer when looking for matches for records from join_tab.
+ The methods performs the necessary preparations to read the next record
+ from the join buffer into the record buffer by the method
+ read_next_candidate_for_match, or, to skip the next record from the join
+ buffer by the method skip_next_candidate_for_match.
+ This implementation of the virtual method moves to the next record
+ in the chain of all records from the join buffer that are to be
+ equi-joined with the current record from join_tab.
+
+ RETURN VALUE
+ pointer to the beginning of the record fields in the join buffer
+ if the there is another record to iterate over, 0 - otherwise.
+*/
+
+uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match()
+{
+ if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr)
+ return 0;
+ next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ?
+ next_matching_rec_ref_ptr :
+ last_matching_rec_ref_ptr);
+ return next_matching_rec_ref_ptr+rec_fields_offset;
+}
+
+
+/*
+ Check whether the matching record from the BNLH cache is to be skipped
+
+ SYNOPSIS
+ skip_next_candidate_for_match
+ rec_ptr pointer to the position in the join buffer right after
+ the previous record
+
+ DESCRIPTION
+ This implementation of the virtual function just calls the
+ method get_match_flag_by_pos to check whether the record referenced
+ by ref_ptr has its match flag set to MATCH_FOUND.
+
+ RETURN VALUE
+ TRUE the record referenced by rec_ptr has its match flag set to
+ MATCH_FOUND
+ FALSE otherwise
+*/
+
+bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr)
+{
+ return join_tab->check_only_first_match() &&
+ (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
+}
+
+
+/*
+ Read next record from the BNLH join cache buffer when looking for matches
+
+ SYNOPSIS
+ read_next_candidate_for_match
+ rec_ptr pointer to the position in the join buffer right after
+ the previous record
+
+ DESCRIPTION
+ This implementation of the virtual method read_next_candidate_for_match
+ calls the method get_record_by_pos to read the record referenced by rec_ptr
+ from the join buffer into the record buffer. If this record refers to
+ fields in the other join buffers the call of get_record_by_po ensures that
+ these fields are read into the corresponding record buffers as well.
+ This function is supposed to be called after a successful call of
+ the method get_next_candidate_for_match.
+
+ RETURN VALUE
+ none
+*/
+
+void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr)
+{
+ get_record_by_pos(rec_ptr);
+}
+
+
+/*
+ Initialize the BNLH join cache
+
+ SYNOPSIS
+ init
+
+ DESCRIPTION
+ The function initializes the cache structure. It is supposed to be called
+ right after a constructor for the JOIN_CACHE_BNLH.
+
+ NOTES
+ The function first constructs a companion object of the type JOIN_TAB_SCAN,
+ then it calls the init method of the parent class.
+
+ RETURN VALUE
+ 0 initialization with buffer allocations has been succeeded
+ 1 otherwise
+*/
+
+int JOIN_CACHE_BNLH::init()
+{
+ DBUG_ENTER("JOIN_CACHE_BNLH::init");
+
+ if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
+ DBUG_RETURN(1);
+
+ DBUG_RETURN(JOIN_CACHE_HASHED::init());
+}
+
+
+/*
+ Calculate the increment of the MRR buffer for a record write
+
+ SYNOPSIS
+ aux_buffer_incr()
+
+ DESCRIPTION
+ This implementation of the virtual function aux_buffer_incr determines
+ for how much the size of the MRR buffer should be increased when another
+ record is added to the cache.
+
+ RETURN VALUE
+ the increment of the size of the MRR buffer for the next record
+*/
+
+uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(ulong recno)
+{
+ uint incr= 0;
+ TABLE_REF *ref= &join_tab->ref;
+ TABLE *tab= join_tab->table;
+ uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1];
+ set_if_bigger(rec_per_key, 1);
+ if (recno == 1)
+ incr= ref->key_length + tab->file->ref_length;
+ incr+= tab->file->stats.mrr_length_per_rec * rec_per_key;
+ return incr;
+}
+
+
+/*
+ Initiate iteration over records returned by MRR for the current join buffer
+
+ SYNOPSIS
+ open()
+
+ DESCRIPTION
+ The function initiates the process of iteration over the records from
+ join_tab returned by the MRR interface functions for records from
+ the join buffer. Such an iteration is performed by the BKA/BKAH join
+ algorithm for each new refill of the join buffer.
+ The function calls the MRR handler function multi_range_read_init to
+ initiate this process.
+
+ RETURN VALUE
+ 0 the initiation is a success
+ error code otherwise
+*/
+
+int JOIN_TAB_SCAN_MRR::open()
+{
+ handler *file= join_tab->table->file;
+
+ join_tab->table->null_row= 0;
+
+
+ /* Dynamic range access is never used with BKA */
+ DBUG_ASSERT(join_tab->use_quick != 2);
+
+ for (JOIN_TAB *tab =join->join_tab; tab != join_tab ; tab++)
+ {
+ tab->status= tab->table->status;
+ tab->table->status= 0;
+ }
+
+ init_mrr_buff();
+
+ /*
+ Prepare to iterate over keys from the join buffer and to get
+ matching candidates obtained with MMR handler functions.
+ */
+ if (!file->inited)
+ file->ha_index_init(join_tab->ref.key, 1);
+ ranges= cache->get_number_of_ranges_for_mrr();
+ if (!join_tab->cache_idx_cond)
+ range_seq_funcs.skip_index_tuple= 0;
+ return file->multi_range_read_init(&range_seq_funcs, (void*) cache,
+ ranges, mrr_mode, &mrr_buff);
+}
+
+
+/*
+ Read the next record returned by MRR for the current join buffer
+
+ SYNOPSIS
+ next()
+
+ DESCRIPTION
+ The function reads the next record from the joined table join_tab
+ returned by the MRR handler function multi_range_read_next for
+ the current refill of the join buffer. The record is read into
+ the record buffer used for join_tab records in join operations.
+
+ RETURN VALUE
+ 0 the next record exists and has been successfully read
+ error code otherwise
+*/
+
+int JOIN_TAB_SCAN_MRR::next()
+{
+ char **ptr= (char **) cache->get_curr_association_ptr();
+ int rc= join_tab->table->file->multi_range_read_next(ptr) ? -1 : 0;
+ if (!rc)
+ {
+ /*
+ If a record in in an incremental cache contains no fields then the
+ association for the last record in cache will be equal to cache->end_pos
+ */
+ DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) &&
+ (uchar *) (*ptr) <= cache->end_pos);
+ update_virtual_fields(join->thd, join_tab->table);
+ }
+ return rc;
+}
+
+
+/*
+ Initialize retrieval of range sequence for BKA join algorithm
SYNOPSIS
bka_range_seq_init()
- init_params pointer to the BKA_INIQUE join cache object
- n_ranges the number of ranges obtained
- flags combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+ init_params pointer to the BKA join cache object
+ n_ranges the number of ranges obtained
+ flags combination of MRR flags
DESCRIPTION
- The function interprets init_param as a pointer to a JOIN_CACHE_BKA_UNIQUE
- object. The function prepares for an iteration over the unique join keys
- built over the records from the cache join buffer.
+ The function interprets init_param as a pointer to a JOIN_CACHE_BKA
+ object. The function prepares for an iteration over the join keys
+ built for all records from the cache join buffer.
NOTE
This function are used only as a callback function.
- RETURN
- init_param value that is to be used as a parameter of
- bka_unique_range_seq_next()
+ RETURN VALUE
+ init_param value that is to be used as a parameter of bka_range_seq_next()
*/
static
-range_seq_t bka_unique_range_seq_init(void *init_param, uint n_ranges,
- uint flags)
+range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
{
- DBUG_ENTER("bka_unique_range_seq_init");
- JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) init_param;
+ DBUG_ENTER("bka_range_seq_init");
+ JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
cache->reset(0);
DBUG_RETURN((range_seq_t) init_param);
}
/*
- Get the key over the next record from the join buffer used by BKA_UNIQUE
+ Get the next range/key over records from the join buffer used by a BKA cache
SYNOPSIS
- bka_unique_range_seq_next()
- seq value returned by bka_unique_range_seq_init()
+ bka_range_seq_next()
+ seq the value returned by bka_range_seq_init
range OUT reference to the next range
DESCRIPTION
- The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
+ The function interprets seq as a pointer to a JOIN_CACHE_BKA
object. The function returns a pointer to the range descriptor
- for the next unique key built over records from the join buffer.
+ for the key built over the next record from the join buffer.
NOTE
This function are used only as a callback function.
- RETURN
- 0 ok, the range structure filled with info about the next key
- 1 no more ranges
+ RETURN VALUE
+ 0 ok, the range structure filled with info about the next range/key
+ 1 no more ranges
*/
static
-uint bka_unique_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+uint bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
{
- DBUG_ENTER("bka_unique_range_seq_next");
- JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
+ DBUG_ENTER("bka_range_seq_next");
+ JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
TABLE_REF *ref= &cache->join_tab->ref;
key_range *start_key= &range->start_key;
if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
@@ -2981,7 +3661,7 @@ uint bka_unique_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
start_key->flag= HA_READ_KEY_EXACT;
range->end_key= *start_key;
range->end_key.flag= HA_READ_AFTER_KEY;
- range->ptr= (char *) cache->get_curr_key_chain();
+ range->ptr= (char *) cache->get_curr_rec();
range->range_flag= EQ_RANGE;
DBUG_RETURN(0);
}
@@ -2990,305 +3670,652 @@ uint bka_unique_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
/*
- Check whether range_info orders to skip the next record from BKA_UNIQUE buffer
+ Check whether range_info orders to skip the next record from BKA buffer
SYNOPSIS
- bka_unique_range_seq_skip_record()
- seq value returned by bka_unique_range_seq_init()
+ bka_range_seq_skip_record()
+ seq value returned by bka_range_seq_init()
range_info information about the next range
- rowid [NOT USED] rowid of the record to be checked (not used)
+ rowid [NOT USED] rowid of the record to be checked
+
DESCRIPTION
- The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
- object. The function returns TRUE if the record with this range_info
- is to be filtered out from the stream of records returned by
+ The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
+ The function returns TRUE if the record with this range_info
+ is to be filtered out from the stream of records returned by
multi_range_read_next().
NOTE
This function are used only as a callback function.
- RETURN
+ RETURN VALUE
1 record with this range_info is to be filtered out from the stream
of records returned by multi_range_read_next()
0 the record is to be left in the stream
*/
static
-bool bka_unique_range_seq_skip_record(range_seq_t rseq, char *range_info,
- uchar *rowid)
+bool bka_range_seq_skip_record(range_seq_t rseq, char *range_info, uchar *rowid)
{
- DBUG_ENTER("bka_unique_range_seq_skip_record");
- JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
- bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
+ DBUG_ENTER("bka_range_seq_skip_record");
+ JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+ bool res= cache->get_match_flag_by_pos((uchar *) range_info) ==
+ JOIN_CACHE::MATCH_FOUND;
DBUG_RETURN(res);
}
-
+
/*
- Check if the record combination matches the index condition
+ Check if the record combination from BKA cache matches the index condition
SYNOPSIS
- JOIN_CACHE_BKA_UNIQUE::skip_index_tuple()
- rseq Value returned by bka_range_seq_init()
- range_info MRR range association data
+ bka_skip_index_tuple()
+ rseq value returned by bka_range_seq_init()
+ range_info record chain for the next range/key returned by MRR
DESCRIPTION
- See JOIN_CACHE_BKA::skip_index_tuple().
- This function is the variant for use with
- JOIN_CACHE_BKA_UNIQUE. The difference from JOIN_CACHE_BKA case is that
- there may be multiple previous table record combinations that share the
- same key, i.e. they map to the same MRR range.
- As a consequence, we need to loop through all previous table record
- combinations that match the given MRR range key range_info until we find
- one that satisfies the index condition.
+ This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
+ see comments there.
NOTE
- Possible optimization:
- Before we unpack the record from a previous table
- check if this table is used in the condition.
- If so then unpack the record otherwise skip the unpacking.
- This should be done by a special virtual method
- get_partial_record_by_pos().
-
- RETURN
+ This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
+
+ RETURN VALUE
0 The record combination satisfies the index condition
1 Otherwise
-
-
*/
-bool JOIN_CACHE_BKA_UNIQUE::skip_index_tuple(range_seq_t rseq, char *range_info)
+static
+bool bka_skip_index_tuple(range_seq_t rseq, char *range_info)
{
- DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::skip_index_tuple");
- JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
- uchar *last_rec_ref_ptr= cache->get_next_rec_ref((uchar*) range_info);
- uchar *next_rec_ref_ptr= last_rec_ref_ptr;
- do
- {
- next_rec_ref_ptr= cache->get_next_rec_ref(next_rec_ref_ptr);
- uchar *rec_ptr= next_rec_ref_ptr + cache->rec_fields_offset;
- cache->get_record_by_pos(rec_ptr);
- if (join_tab->cache_idx_cond->val_int())
- DBUG_RETURN(FALSE);
- } while(next_rec_ref_ptr != last_rec_ref_ptr);
- DBUG_RETURN(TRUE);
+ DBUG_ENTER("bka_skip_index_tuple");
+ JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+ DBUG_RETURN(cache->skip_index_tuple(range_info));
}
/*
- Check if the record combination matches the index condition
+ Prepare to read the record from BKA cache matching the current joined record
SYNOPSIS
- bka_unique_skip_index_tuple()
- rseq Value returned by bka_range_seq_init()
- range_info MRR range association data
+ prepare_look_for_matches()
+ skip_last <-> ignore the last record in the buffer (always unused here)
+
+ DESCRIPTION
+ The function prepares to iterate over records in the join cache buffer
+ matching the record loaded into the record buffer for join_tab when
+ performing join operation by BKA join algorithm. With BKA algorithms the
+ record loaded into the record buffer for join_tab always has a direct
+ reference to the matching records from the join buffer. When the regular
+ BKA join algorithm is employed the record from join_tab can refer to
+ only one such record.
+ The function sets the counter of the remaining records from the cache
+ buffer that would match the current join_tab record to 1.
+
+ RETURN VALUE
+ TRUE there are no records in the buffer to iterate over
+ FALSE otherwise
+*/
+bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last)
+{
+ if (!records)
+ return TRUE;
+ rem_records= 1;
+ return FALSE;
+}
+
+
+/*
+ Get the record from the BKA cache matching the current joined record
+
+ SYNOPSIS
+ get_next_candidate_for_match
+
DESCRIPTION
- This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
- see comments there.
+ This method is used for iterations over the records from the join
+ cache buffer when looking for matches for records from join_tab.
+ The method performs the necessary preparations to read the next record
+ from the join buffer into the record buffer by the method
+ read_next_candidate_for_match, or, to skip the next record from the join
+ buffer by the method skip_if_not_needed_match.
+ This implementation of the virtual method get_next_candidate_for_match
+ just decrements the counter of the records that are to be iterated over
+ and returns the value of curr_association as a reference to the position
+ of the beginning of the record fields in the buffer.
+
+ RETURN VALUE
+ pointer to the start of the record fields in the join buffer
+ if the there is another record to iterate over, 0 - otherwise.
+*/
- NOTE
- This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
-
- RETURN
- 0 The record combination satisfies the index condition
- 1 Otherwise
+uchar *JOIN_CACHE_BKA::get_next_candidate_for_match()
+{
+ if (!rem_records)
+ return 0;
+ rem_records--;
+ return curr_association;
+}
+
+
+/*
+ Check whether the matching record from the BKA cache is to be skipped
+
+ SYNOPSIS
+ skip_next_candidate_for_match
+ rec_ptr pointer to the position in the join buffer right after
+ the previous record
+
+ DESCRIPTION
+ This implementation of the virtual function just calls the
+ method get_match_flag_by_pos to check whether the record referenced
+ by ref_ptr has its match flag set to MATCH_FOUND.
+
+ RETURN VALUE
+ TRUE the record referenced by rec_ptr has its match flag set to
+ MATCH_FOUND
+ FALSE otherwise
*/
-static
-bool bka_unique_skip_index_tuple(range_seq_t rseq, char *range_info)
+bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr)
{
- DBUG_ENTER("bka_unique_skip_index_tuple");
- JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
- DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
+ return join_tab->check_only_first_match() &&
+ (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
}
/*
- Using BKA_UNIQUE find matches from the next table for records from join buffer
+ Read the next record from the BKA join cache buffer when looking for matches
SYNOPSIS
- join_matching_records()
- skip_last do not look for matches for the last partial join record
+ read_next_candidate_for_match
+ rec_ptr pointer to the position in the join buffer right after
+ the previous record
DESCRIPTION
- This function can be used only when the table join_tab can be accessed
- by keys built over the fields of previous join tables.
- The function retrieves all keys from the hash table of the join buffer
- built for partial join records from the buffer. For each of these keys
- the function performs an index lookup and tries to match records yielded
- by this lookup with records from the join buffer attached to the key.
- If a match is found the function will call the sub_select function trying
- to look for matches for the remaining join operations.
- This function does not assume that matching records are necessarily
- returned with references to the keys by which they were found. If the call
- of the function multi_range_read_init returns flags with
- HA_MRR_NO_ASSOCIATION then a search for the key built from the returned
- record is carried on. The search is performed by probing in in the hash
- table of the join buffer.
- This function currently is called only from the function join_records.
- It's assumed that this function is always called with the skip_last
- parameter equal to false.
-
- RETURN
- return one of enum_nested_loop_state
+ This implementation of the virtual method read_next_candidate_for_match
+ calls the method get_record_by_pos to read the record referenced by rec_ptr
+ from the join buffer into the record buffer. If this record refers to
+ fields in the other join buffers the call of get_record_by_po ensures that
+ these fields are read into the corresponding record buffers as well.
+ This function is supposed to be called after a successful call of
+ the method get_next_candidate_for_match.
+
+ RETURN VALUE
+ none
*/
-enum_nested_loop_state
-JOIN_CACHE_BKA_UNIQUE::join_matching_records(bool skip_last)
+void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr)
+{
+ get_record_by_pos(rec_ptr);
+}
+
+
+/*
+ Initialize the BKA join cache
+
+ SYNOPSIS
+ init
+
+ DESCRIPTION
+ The function initializes the cache structure. It is supposed to be called
+ right after a constructor for the JOIN_CACHE_BKA.
+
+ NOTES
+ The function first constructs a companion object of the type
+ JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
+
+ RETURN VALUE
+ 0 initialization with buffer allocations has been succeeded
+ 1 otherwise
+*/
+
+int JOIN_CACHE_BKA::init()
{
- int error;
- uchar *key_chain_ptr;
- handler *file= join_tab->table->file;
- enum_nested_loop_state rc= NESTED_LOOP_OK;
bool check_only_first_match= join_tab->check_only_first_match();
- bool no_association= test(mrr_mode & HA_MRR_NO_ASSOCIATION);
- /* Set functions to iterate over keys in the join buffer */
- RANGE_SEQ_IF seq_funcs= { bka_unique_range_seq_init,
- bka_unique_range_seq_next,
- check_only_first_match && !no_association ?
- bka_unique_range_seq_skip_record : 0,
- join_tab->cache_idx_cond ?
- bka_unique_skip_index_tuple : 0 };
+ RANGE_SEQ_IF rs_funcs= { bka_range_seq_init,
+ bka_range_seq_next,
+ check_only_first_match ?
+ bka_range_seq_skip_record : 0,
+ bka_skip_index_tuple };
- /* The value of skip_last must be always FALSE when this function is called */
- DBUG_ASSERT(!skip_last);
+ DBUG_ENTER("JOIN_CACHE_BKA::init");
- /* Return at once if there are no records in the join buffer */
- if (!records)
- return NESTED_LOOP_OK;
-
- rc= init_join_matching_records(&seq_funcs, key_entries);
- if (rc != NESTED_LOOP_OK)
- goto finish;
+ if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab,
+ mrr_mode, rs_funcs)))
+ DBUG_RETURN(1);
- while (!(error= file->multi_range_read_next((char **) &key_chain_ptr)))
- {
- if (no_association)
- {
- uchar *key_ref_ptr;
- TABLE *table= join_tab->table;
- TABLE_REF *ref= &join_tab->ref;
- KEY *keyinfo= table->key_info+ref->key;
- /*
- Build the key value out of the record returned by the call of
- multi_range_read_next in the record buffer
- */
- key_copy(ref->key_buff, table->record[0], keyinfo, ref->key_length);
- /* Look for this key in the join buffer */
- if (!key_search(ref->key_buff, ref->key_length, &key_ref_ptr))
- continue;
- key_chain_ptr= key_ref_ptr+get_size_of_key_offset();
- }
+ DBUG_RETURN(JOIN_CACHE::init());
+}
- uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
- uchar *next_rec_ref_ptr= last_rec_ref_ptr;
- do
- {
- next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
- uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
- if (join->thd->killed)
- {
- /* The user has aborted the execution of the query */
- join->thd->send_kill_message();
- rc= NESTED_LOOP_KILLED;
- goto finish;
- }
- /*
- If only the first match is needed and it has been already found
- for the associated partial join record then the returned candidate
- is discarded.
- */
- if (rc == NESTED_LOOP_OK &&
- (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
- {
- get_record_by_pos(rec_ptr);
- update_virtual_fields(join->thd, join_tab->table);
- rc= generate_full_extensions(rec_ptr);
- if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
- goto finish;
+/*
+ Get the key built over the next record from BKA join buffer
+
+ SYNOPSIS
+ get_next_key()
+ key pointer to the buffer where the key value is to be placed
+
+ DESCRIPTION
+ The function reads key fields from the current record in the join buffer.
+ and builds the key value out of these fields that will be used to access
+ the 'join_tab' table. Some of key fields may belong to previous caches.
+ They are accessed via record references to the record parts stored in the
+ previous join buffers. The other key fields always are placed right after
+ the flag fields of the record.
+ If the key is embedded, which means that its value can be read directly
+ from the join buffer, then *key is set to the beginning of the key in
+ this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
+ The function returns the length of the key if it succeeds ro read it.
+ If is assumed that the functions starts reading at the position of
+ the record length which is provided for each records in a BKA cache.
+ After the key is built the 'pos' value points to the first position after
+ the current record.
+ The function just skips the records with MATCH_IMPOSSIBLE in the
+ match flag field if there is any.
+ The function returns 0 if the initial position is after the beginning
+ of the record fields for last record from the join buffer.
+
+ RETURN VALUE
+ length of the key value - if the starting value of 'pos' points to
+ the position before the fields for the last record,
+ 0 - otherwise.
+*/
+
+uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
+{
+ uint len;
+ uint32 rec_len;
+ uchar *init_pos;
+ JOIN_CACHE *cache;
+
+start:
+
+ /* Any record in a BKA cache is prepended with its length */
+ DBUG_ASSERT(with_length);
+
+ if ((pos+size_of_rec_len) > last_rec_pos || !records)
+ return 0;
+
+ /* Read the length of the record */
+ rec_len= get_rec_length(pos);
+ pos+= size_of_rec_len;
+ init_pos= pos;
+
+ /* Read a reference to the previous cache if any */
+ if (prev_cache)
+ pos+= prev_cache->get_size_of_rec_offset();
+
+ curr_rec_pos= pos;
+
+ /* Read all flag fields of the record */
+ read_flag_fields();
+
+ if (with_match_flag &&
+ (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE )
+ {
+ pos= init_pos+rec_len;
+ goto start;
+ }
+
+ if (use_emb_key)
+ {
+ /* An embedded key is taken directly from the join buffer */
+ *key= pos;
+ len= emb_key_length;
+ }
+ else
+ {
+ /* Read key arguments from previous caches if there are any such fields */
+ if (external_key_arg_fields)
+ {
+ uchar *rec_ptr= curr_rec_pos;
+ uint key_arg_count= external_key_arg_fields;
+ CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
+ for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
+ {
+ uint len= 0;
+ DBUG_ASSERT(cache);
+ rec_ptr= cache->get_rec_ref(rec_ptr);
+ while (!cache->referenced_fields)
+ {
+ cache= cache->prev_cache;
+ DBUG_ASSERT(cache);
+ rec_ptr= cache->get_rec_ref(rec_ptr);
+ }
+ while (key_arg_count &&
+ cache->read_referenced_field(*copy_ptr, rec_ptr, &len))
+ {
+ copy_ptr++;
+ --key_arg_count;
+ }
}
}
- while (next_rec_ref_ptr != last_rec_ref_ptr);
+
+ /*
+ Read the other key arguments from the current record. The fields for
+ these arguments are always first in the sequence of the record's fields.
+ */
+ CACHE_FIELD *copy= field_descr+flag_fields;
+ CACHE_FIELD *copy_end= copy+local_key_arg_fields;
+ bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
+ for ( ; copy < copy_end; copy++)
+ read_record_field(copy, blob_in_rec_buff);
+
+ /* Build the key over the fields read into the record buffers */
+ TABLE_REF *ref= &join_tab->ref;
+ cp_buffer_from_ref(join->thd, join_tab->table, ref);
+ *key= ref->key_buff;
+ len= ref->key_length;
}
- if (error > 0 && error != HA_ERR_END_OF_FILE)
- return NESTED_LOOP_ERROR;
-finish:
- return end_join_matching_records(rc);
+ pos= init_pos+rec_len;
+
+ return len;
+}
+
+
+/*
+ Check the index condition of the joined table for a record from the BKA cache
+
+ SYNOPSIS
+ skip_index_tuple()
+ range_info pointer to the record returned by MRR
+
+ DESCRIPTION
+ This function is invoked from MRR implementation to check if an index
+ tuple matches the index condition. It is used in the case where the index
+ condition actually depends on both columns of the used index and columns
+ from previous tables.
+
+ NOTES
+ Accessing columns of the previous tables requires special handling with
+ BKA. The idea of BKA is to collect record combinations in a buffer and
+ then do a batch of ref access lookups, i.e. by the time we're doing a
+ lookup its previous-records-combination is not in prev_table->record[0]
+ but somewhere in the join buffer.
+ We need to get it from there back into prev_table(s)->record[0] before we
+ can evaluate the index condition, and that's why we need this function
+ instead of regular IndexConditionPushdown.
+
+ NOTES
+ Possible optimization:
+ Before we unpack the record from a previous table
+ check if this table is used in the condition.
+ If so then unpack the record otherwise skip the unpacking.
+ This should be done by a special virtual method
+ get_partial_record_by_pos().
+
+ RETURN VALUE
+ 1 the record combination does not satisfies the index condition
+ 0 otherwise
+*/
+
+bool JOIN_CACHE_BKA::skip_index_tuple(char *range_info)
+{
+ DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
+ get_record_by_pos((uchar*)range_info);
+ DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
}
+
/*
- Check whether all records in a key chain have their match flags set on
+ Initialize retrieval of range sequence for the BKAH join algorithm
+
+ SYNOPSIS
+ bkah_range_seq_init()
+ init_params pointer to the BKAH join cache object
+ n_ranges the number of ranges obtained
+ flags combination of MRR flags
+ DESCRIPTION
+ The function interprets init_param as a pointer to a JOIN_CACHE_BKAH
+ object. The function prepares for an iteration over distinct join keys
+ built over the records from the cache join buffer.
+
+ NOTE
+ This function are used only as a callback function.
+
+ RETURN VALUE
+ init_param value that is to be used as a parameter of
+ bkah_range_seq_next()
+*/
+
+static
+range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+ DBUG_ENTER("bkah_range_seq_init");
+ JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param;
+ cache->reset(0);
+ DBUG_RETURN((range_seq_t) init_param);
+}
+
+
+/*
+ Get the next range/key over records from the join buffer of a BKAH cache
+
SYNOPSIS
- check_all_match_flags_for_key()
- key_chain_ptr
+ bkah_range_seq_next()
+ seq value returned by bkah_range_seq_init()
+ range OUT reference to the next range
+
+ DESCRIPTION
+ The function interprets seq as a pointer to a JOIN_CACHE_BKAH
+ object. The function returns a pointer to the range descriptor
+ for the next unique key built over records from the join buffer.
+
+ NOTE
+ This function are used only as a callback function.
+
+ RETURN VALUE
+ 0 ok, the range structure filled with info about the next range/key
+ 1 no more ranges
+*/
+
+static
+uint bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+ DBUG_ENTER("bkah_range_seq_next");
+ JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+ TABLE_REF *ref= &cache->join_tab->ref;
+ key_range *start_key= &range->start_key;
+ if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
+ {
+ start_key->keypart_map= (1 << ref->key_parts) - 1;
+ start_key->flag= HA_READ_KEY_EXACT;
+ range->end_key= *start_key;
+ range->end_key.flag= HA_READ_AFTER_KEY;
+ range->ptr= (char *) cache->get_curr_key_chain();
+ range->range_flag= EQ_RANGE;
+ DBUG_RETURN(0);
+ }
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Check whether range_info orders to skip the next record from BKAH join buffer
+ SYNOPSIS
+ bkah_range_seq_skip_record()
+ seq value returned by bkah_range_seq_init()
+ range_info information about the next range/key returned by MRR
+ rowid [NOT USED] rowid of the record to be checked (not used)
+
DESCRIPTION
- This function retrieves records in the given circular chain and checks
- whether their match flags are set on. The parameter key_chain_ptr shall
- point to the position in the join buffer storing the reference to the
- last element of this chain.
-
- RETURN
- TRUE if each retrieved record has its match flag set on
- FALSE otherwise
+ The function interprets seq as a pointer to a JOIN_CACHE_BKAH
+ object. The function returns TRUE if the record with this range_info
+ is to be filtered out from the stream of records returned by
+ multi_range_read_next().
+
+ NOTE
+ This function are used only as a callback function.
+
+ RETURN VALUE
+ 1 record with this range_info is to be filtered out from the stream
+ of records returned by multi_range_read_next()
+ 0 the record is to be left in the stream
+*/
+
+static
+bool bkah_range_seq_skip_record(range_seq_t rseq, char *range_info,
+ uchar *rowid)
+{
+ DBUG_ENTER("bkah_range_seq_skip_record");
+ JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+ bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Check if the record combination from BKAH cache matches the index condition
+
+ SYNOPSIS
+ bkah_skip_index_tuple()
+ rseq value returned by bka_range_seq_init()
+ range_info record chain for the next range/key returned by MRR
+
+ DESCRIPTION
+ This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
+ see comments there.
+
+ NOTE
+ This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
+
+ RETURN VALUE
+ 0 some records from the chain satisfy the index condition
+ 1 otherwise
*/
-bool JOIN_CACHE_BKA_UNIQUE::check_all_match_flags_for_key(uchar *key_chain_ptr)
+static
+bool bkah_skip_index_tuple(range_seq_t rseq, char *range_info)
{
- uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
- uchar *next_rec_ref_ptr= last_rec_ref_ptr;
- do
- {
- next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
- uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
- if (!get_match_flag_by_pos(rec_ptr))
- return FALSE;
- }
- while (next_rec_ref_ptr != last_rec_ref_ptr);
- return TRUE;
+ DBUG_ENTER("bka_unique_skip_index_tuple");
+ JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+ DBUG_RETURN(cache->skip_index_tuple(range_info));
}
-
-/*
- Get the next key built for the records from BKA_UNIQUE join buffer
+
+/*
+ Prepare to read record from BKAH cache matching the current joined record
SYNOPSIS
- get_next_key()
- key pointer to the buffer where the key value is to be placed
+ prepare_look_for_matches()
+ skip_last <-> ignore the last record in the buffer (always unused here)
DESCRIPTION
- The function reads the next key value stored in the hash table of the
- join buffer. Depending on the value of the use_emb_key flag of the
- join cache the value is read either from the table itself or from
- the record field where it occurs.
+ The function prepares to iterate over records in the join cache buffer
+ matching the record loaded into the record buffer for join_tab when
+ performing join operation by BKAH join algorithm. With BKAH algorithm, if
+ association labels are used, then record loaded into the record buffer
+ for join_tab always has a direct reference to the chain of the mathing
+ records from the join buffer. If association labels are not used then
+ then the chain of the matching records is obtained by the call of the
+ get_key_chain_by_join_key function.
+
+ RETURN VALUE
+ TRUE there are no records in the buffer to iterate over
+ FALSE otherwise
+*/
+
+bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last)
+{
+ last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
+ if (no_association &&
+ (curr_matching_chain= get_matching_chain_by_join_key()))
+ return 1;
+ last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
+ return 0;
+}
- RETURN
- length of the key value - if the starting value of 'cur_key_entry' refers
- to the position after that referred by the the value of 'last_key_entry'
- 0 - otherwise.
+/*
+ Initialize the BKAH join cache
+
+ SYNOPSIS
+ init
+
+ DESCRIPTION
+ The function initializes the cache structure. It is supposed to be called
+ right after a constructor for the JOIN_CACHE_BKAH.
+
+ NOTES
+ The function first constructs a companion object of the type
+ JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
+
+ RETURN VALUE
+ 0 initialization with buffer allocations has been succeeded
+ 1 otherwise
*/
-uint JOIN_CACHE_BKA_UNIQUE::get_next_key(uchar ** key)
-{
- if (curr_key_entry == last_key_entry)
- return 0;
+int JOIN_CACHE_BKAH::init()
+{
+ bool check_only_first_match= join_tab->check_only_first_match();
- curr_key_entry-= key_entry_length;
+ no_association= test(mrr_mode & HA_MRR_NO_ASSOCIATION);
- *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
+ RANGE_SEQ_IF rs_funcs= { bkah_range_seq_init,
+ bkah_range_seq_next,
+ check_only_first_match && !no_association ?
+ bkah_range_seq_skip_record : 0,
+ bkah_skip_index_tuple };
- DBUG_ASSERT(*key >= buff && *key < hash_table);
+ DBUG_ENTER("JOIN_CACHE_BKAH::init");
- return key_length;
+ if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab,
+ mrr_mode, rs_funcs)))
+ DBUG_RETURN(1);
+
+ DBUG_RETURN(JOIN_CACHE_HASHED::init());
}
-/****************************************************************************
- * Join cache module end
- ****************************************************************************/
+/*
+ Check the index condition of the joined table for a record from the BKA cache
+
+ SYNOPSIS
+ skip_index_tuple()
+ range_info record chain returned by MRR
+
+ DESCRIPTION
+ See JOIN_CACHE_BKA::skip_index_tuple().
+ This function is the variant for use with rhe class JOIN_CACHE_BKAH.
+ The difference from JOIN_CACHE_BKA case is that there may be multiple
+ previous table record combinations that share the same key(MRR range).
+ As a consequence, we need to loop through the chain of all table record
+ combinations that match the given MRR range key range_info until we find
+ one that satisfies the index condition.
+
+ NOTE
+ Possible optimization:
+ Before we unpack the record from a previous table
+ check if this table is used in the condition.
+ If so then unpack the record otherwise skip the unpacking.
+ This should be done by a special virtual method
+ get_partial_record_by_pos().
+
+ RETURN VALUE
+ 1 any record combination from the chain referred by range_info
+ does not satisfy the index condition
+ 0 otherwise
+
+
+*/
+
+bool JOIN_CACHE_BKAH::skip_index_tuple(char *range_info)
+{
+ uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info);
+ uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+ DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple");
+ do
+ {
+ next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
+ uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset;
+ get_record_by_pos(rec_ptr);
+ if (join_tab->cache_idx_cond->val_int())
+ DBUG_RETURN(FALSE);
+ } while(next_rec_ref_ptr != last_rec_ref_ptr);
+ DBUG_RETURN(TRUE);
+}
diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h
new file mode 100644
index 00000000000..ea84a50c885
--- /dev/null
+++ b/sql/sql_join_cache.h
@@ -0,0 +1,1376 @@
+/*
+ This file contains declarations for implementations
+ of block based join algorithms
+*/
+
+#define JOIN_CACHE_INCREMENTAL_BIT 1
+#define JOIN_CACHE_HASHED_BIT 2
+#define JOIN_CACHE_BKA_BIT 4
+
+/*
+ Categories of data fields of variable length written into join cache buffers.
+ The value of any of these fields is written into cache together with the
+ prepended length of the value.
+*/
+#define CACHE_BLOB 1 /* blob field */
+#define CACHE_STRIPPED 2 /* field stripped of trailing spaces */
+#define CACHE_VARSTR1 3 /* short string value (length takes 1 byte) */
+#define CACHE_VARSTR2 4 /* long string value (length takes 2 bytes) */
+
+/*
+ The CACHE_FIELD structure used to describe fields of records that
+ are written into a join cache buffer from record buffers and backward.
+*/
+typedef struct st_cache_field {
+ uchar *str; /**< buffer from/to where the field is to be copied */
+ uint length; /**< maximal number of bytes to be copied from/to str */
+ /*
+ Field object for the moved field
+ (0 - for a flag field, see JOIN_CACHE::create_flag_fields).
+ */
+ Field *field;
+ uint type; /**< category of the of the copied field (CACHE_BLOB et al.) */
+ /*
+ The number of the record offset value for the field in the sequence
+ of offsets placed after the last field of the record. These
+ offset values are used to access fields referred to from other caches.
+ If the value is 0 then no offset for the field is saved in the
+ trailing sequence of offsets.
+ */
+ uint referenced_field_no;
+ /* The remaining structure fields are used as containers for temp values */
+ uint blob_length; /**< length of the blob to be copied */
+ uint offset; /**< field offset to be saved in cache buffer */
+} CACHE_FIELD;
+
+
+class JOIN_TAB_SCAN;
+
+
+/*
+ JOIN_CACHE is the base class to support the implementations of
+ - Block Nested Loop (BNL) Join Algorithm,
+ - Block Nested Loop Hash (BNLH) Join Algorithm,
+ - Batched Key Access (BKA) Join Algorithm.
+ The first algorithm is supported by the derived class JOIN_CACHE_BNL,
+ the second algorithm is supported by the derived class JOIN_CACHE_BNLH,
+ while the third algorithm is implemented in two variant supported by
+ the classes JOIN_CACHE_BKA and JOIN_CACHE_BKAH.
+ These three algorithms have a lot in common. Each of them first accumulates
+ the records of the left join operand in a join buffer and then searches for
+ matching rows of the second operand for all accumulated records.
+ For the first two algorithms this strategy saves on logical I/O operations:
+ the entire set of records from the join buffer requires only one look-through
+ of the records provided by the second operand.
+ For the third algorithm the accumulation of records allows to optimize
+ fetching rows of the second operand from disk for some engines (MyISAM,
+ InnoDB), or to minimize the number of round-trips between the Server and
+ the engine nodes (NDB Cluster).
+*/
+
+class JOIN_CACHE :public Sql_alloc
+{
+
+private:
+
+ /* Size of the offset of a record from the cache */
+ uint size_of_rec_ofs;
+ /* Size of the length of a record in the cache */
+ uint size_of_rec_len;
+ /* Size of the offset of a field within a record in the cache */
+ uint size_of_fld_ofs;
+
+protected:
+
+ /* 3 functions below actually do not use the hidden parameter 'this' */
+
+ /* Calculate the number of bytes used to store an offset value */
+ uint offset_size(uint len)
+ { return (len < 256 ? 1 : len < 256*256 ? 2 : 4); }
+
+ /* Get the offset value that takes ofs_sz bytes at the position ptr */
+ ulong get_offset(uint ofs_sz, uchar *ptr)
+ {
+ switch (ofs_sz) {
+ case 1: return uint(*ptr);
+ case 2: return uint2korr(ptr);
+ case 4: return uint4korr(ptr);
+ }
+ return 0;
+ }
+
+ /* Set the offset value ofs that takes ofs_sz bytes at the position ptr */
+ void store_offset(uint ofs_sz, uchar *ptr, ulong ofs)
+ {
+ switch (ofs_sz) {
+ case 1: *ptr= (uchar) ofs; return;
+ case 2: int2store(ptr, (uint16) ofs); return;
+ case 4: int4store(ptr, (uint32) ofs); return;
+ }
+ }
+
+ /*
+ The maximum total length of the fields stored for a record in the cache.
+ For blob fields only the sizes of the blob lengths are taken into account.
+ */
+ uint length;
+
+ /*
+ Representation of the executed multi-way join through which all needed
+ context can be accessed.
+ */
+ JOIN *join;
+
+ /*
+ Cardinality of the range of join tables whose fields can be put into the
+ cache. A table from the range not necessarily contributes to the cache.
+ */
+ uint tables;
+
+ /*
+ The total number of flag and data fields that can appear in a record
+ written into the cache. Fields with null values are always skipped
+ to save space.
+ */
+ uint fields;
+
+ /*
+ The total number of flag fields in a record put into the cache. They are
+ used for table null bitmaps, table null row flags, and an optional match
+ flag. Flag fields go before other fields in a cache record with the match
+ flag field placed always at the very beginning of the record.
+ */
+ uint flag_fields;
+
+ /* The total number of blob fields that are written into the cache */
+ uint blobs;
+
+ /*
+ The total number of fields referenced from field descriptors for other join
+ caches. These fields are used to construct key values.
+ When BKA join algorithm is employed the constructed key values serve to
+ access matching rows with index lookups.
+ The key values are put into a hash table when the BNLH join algorithm
+ is employed and when BKAH is used for the join operation.
+ */
+ uint referenced_fields;
+
+ /*
+ The current number of already created data field descriptors.
+ This number can be useful for implementations of the init methods.
+ */
+ uint data_field_count;
+
+ /*
+ The current number of already created pointers to the data field
+ descriptors. This number can be useful for implementations of
+ the init methods.
+ */
+ uint data_field_ptr_count;
+
+ /*
+ Array of the descriptors of fields containing 'fields' elements.
+ These are all fields that are stored for a record in the cache.
+ */
+ CACHE_FIELD *field_descr;
+
+ /*
+ Array of pointers to the blob descriptors that contains 'blobs' elements.
+ */
+ CACHE_FIELD **blob_ptr;
+
+ /*
+ This flag indicates that records written into the join buffer contain
+ a match flag field. The flag must be set by the init method.
+ */
+ bool with_match_flag;
+ /*
+ This flag indicates that any record is prepended with the length of the
+ record which allows us to skip the record or part of it without reading.
+ */
+ bool with_length;
+
+ /*
+ The maximal number of bytes used for a record representation in
+ the cache excluding the space for blob data.
+ For future derived classes this representation may contains some
+ redundant info such as a key value associated with the record.
+ */
+ uint pack_length;
+ /*
+ The value of pack_length incremented by the total size of all
+ pointers of a record in the cache to the blob data.
+ */
+ uint pack_length_with_blob_ptrs;
+
+ /*
+ The total size of the record base prefix. The base prefix of record may
+ include the following components:
+ - the length of the record
+ - the link to a record in a previous buffer.
+ Each record in the buffer are supplied with the same set of the components.
+ */
+ uint base_prefix_length;
+
+ /*
+ The expected length of a record in the join buffer together with
+ all prefixes and postfixes
+ */
+ ulong avg_record_length;
+
+ /* The expected size of the space per record in the auxiliary buffer */
+ ulong avg_aux_buffer_incr;
+
+ /* Expected join buffer space used for one record */
+ ulong space_per_record;
+
+ /* Pointer to the beginning of the join buffer */
+ uchar *buff;
+ /*
+ Size of the entire memory allocated for the join buffer.
+ Part of this memory may be reserved for the auxiliary buffer.
+ */
+ ulong buff_size;
+ /* The minimal join buffer size when join buffer still makes sense to use */
+ ulong min_buff_size;
+ /* The maximum expected size if the join buffer to be used */
+ ulong max_buff_size;
+ /* Size of the auxiliary buffer */
+ ulong aux_buff_size;
+
+ /* The number of records put into the join buffer */
+ ulong records;
+ /*
+ The number of records in the fully refilled join buffer of
+ the minimal size equal to min_buff_size
+ */
+ ulong min_records;
+ /*
+ The maximum expected number of records to be put in the join buffer
+ at one refill
+ */
+ ulong max_records;
+
+ /*
+ Pointer to the current position in the join buffer.
+ This member is used both when writing to buffer and
+ when reading from it.
+ */
+ uchar *pos;
+ /*
+ Pointer to the first free position in the join buffer,
+ right after the last record into it.
+ */
+ uchar *end_pos;
+
+ /*
+ Pointer to the beginning of the first field of the current read/write
+ record from the join buffer. The value is adjusted by the
+ get_record/put_record functions.
+ */
+ uchar *curr_rec_pos;
+ /*
+ Pointer to the beginning of the first field of the last record
+ from the join buffer.
+ */
+ uchar *last_rec_pos;
+
+ /*
+ Flag is set if the blob data for the last record in the join buffer
+ is in record buffers rather than in the join cache.
+ */
+ bool last_rec_blob_data_is_in_rec_buff;
+
+ /*
+ Pointer to the position to the current record link.
+ Record links are used only with linked caches. Record links allow to set
+ connections between parts of one join record that are stored in different
+ join buffers.
+ In the simplest case a record link is just a pointer to the beginning of
+ the record stored in the buffer.
+ In a more general case a link could be a reference to an array of pointers
+ to records in the buffer.
+ */
+ uchar *curr_rec_link;
+
+ /*
+ This flag is set to TRUE if join_tab is the first inner table of an outer
+ join and the latest record written to the join buffer is detected to be
+ null complemented after checking on conditions over the outer tables for
+ this outer join operation
+ */
+ bool last_written_is_null_compl;
+
+ /*
+ The number of fields put in the join buffer of the join cache that are
+ used in building keys to access the table join_tab
+ */
+ uint local_key_arg_fields;
+ /*
+ The total number of the fields in the previous caches that are used
+ in building keys to access the table join_tab
+ */
+ uint external_key_arg_fields;
+
+ /*
+ This flag indicates that the key values will be read directly from the join
+ buffer. It will save us building key values in the key buffer.
+ */
+ bool use_emb_key;
+ /* The length of an embedded key value */
+ uint emb_key_length;
+
+ /*
+ This object provides the methods to iterate over records of
+ the joined table join_tab when looking for join matches between
+ records from join buffer and records from join_tab.
+ BNL and BNLH join algorithms retrieve all records from join_tab,
+ while BKA/BKAH algorithm iterates only over those records from
+ join_tab that can be accessed by look-ups with join keys built
+ from records in join buffer.
+ */
+ JOIN_TAB_SCAN *join_tab_scan;
+
+ void calc_record_fields();
+ void collect_info_on_key_args();
+ int alloc_fields();
+ void create_flag_fields();
+ void create_key_arg_fields();
+ void create_remaining_fields();
+ void set_constants();
+ int alloc_buffer();
+
+ /* Shall reallocate the join buffer */
+ virtual int realloc_buffer();
+
+ /* Check the possibility to read the access keys directly from join buffer */
+ bool check_emb_key_usage();
+
+ uint get_size_of_rec_offset() { return size_of_rec_ofs; }
+ uint get_size_of_rec_length() { return size_of_rec_len; }
+ uint get_size_of_fld_offset() { return size_of_fld_ofs; }
+
+ uchar *get_rec_ref(uchar *ptr)
+ {
+ return buff+get_offset(size_of_rec_ofs, ptr-size_of_rec_ofs);
+ }
+ ulong get_rec_length(uchar *ptr)
+ {
+ return (ulong) get_offset(size_of_rec_len, ptr);
+ }
+ ulong get_fld_offset(uchar *ptr)
+ {
+ return (ulong) get_offset(size_of_fld_ofs, ptr);
+ }
+
+ void store_rec_ref(uchar *ptr, uchar* ref)
+ {
+ store_offset(size_of_rec_ofs, ptr-size_of_rec_ofs, (ulong) (ref-buff));
+ }
+ void store_rec_length(uchar *ptr, ulong len)
+ {
+ store_offset(size_of_rec_len, ptr, len);
+ }
+ void store_fld_offset(uchar *ptr, ulong ofs)
+ {
+ store_offset(size_of_fld_ofs, ptr, ofs);
+ }
+
+ /* Write record fields and their required offsets into the join buffer */
+ uint write_record_data(uchar *link, bool *is_full);
+
+ /* Get the total length of all prefixes of a record in the join buffer */
+ virtual uint get_prefix_length() { return base_prefix_length; }
+ /* Get maximum total length of all affixes of a record in the join buffer */
+ virtual uint get_record_max_affix_length();
+
+ /*
+ Shall get maximum size of the additional space per record used for
+ record keys
+ */
+ virtual uint get_max_key_addon_space_per_record() { return 0; }
+
+ /*
+ This method must determine for how much the auxiliary buffer should be
+ incremented when a new record is added to the join buffer.
+ If no auxiliary buffer is needed the function should return 0.
+ */
+ virtual uint aux_buffer_incr(ulong recno);
+
+ /* Shall calculate how much space is remaining in the join buffer */
+ virtual ulong rem_space()
+ {
+ return max(buff_size-(end_pos-buff)-aux_buff_size,0);
+ }
+
+ /*
+ Shall calculate how much space is taken by allocation of the key
+ for a record in the join buffer
+ */
+ virtual uint extra_key_length() { return 0; }
+
+ /* Read all flag and data fields of a record from the join buffer */
+ uint read_all_record_fields();
+
+ /* Read all flag fields of a record from the join buffer */
+ uint read_flag_fields();
+
+ /* Read a data record field from the join buffer */
+ uint read_record_field(CACHE_FIELD *copy, bool last_record);
+
+ /* Read a referenced field from the join buffer */
+ bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len);
+
+ /*
+ Shall skip record from the join buffer if its match flag
+ is set to MATCH_FOUND
+ */
+ virtual bool skip_if_matched();
+
+ /*
+ Shall skip record from the join buffer if its match flag
+ commands to do so
+ */
+ virtual bool skip_if_not_needed_match();
+
+ /*
+ True if rec_ptr points to the record whose blob data stay in
+ record buffers
+ */
+ bool blob_data_is_in_rec_buff(uchar *rec_ptr)
+ {
+ return rec_ptr == last_rec_pos && last_rec_blob_data_is_in_rec_buff;
+ }
+
+ /* Find matches from the next table for records from the join buffer */
+ virtual enum_nested_loop_state join_matching_records(bool skip_last);
+
+ /* Shall set an auxiliary buffer up (currently used only by BKA joins) */
+ virtual int setup_aux_buffer(HANDLER_BUFFER &aux_buff)
+ {
+ DBUG_ASSERT(0);
+ return 0;
+ }
+
+ /*
+ Shall get the number of ranges in the cache buffer passed
+ to the MRR interface
+ */
+ virtual uint get_number_of_ranges_for_mrr() { return 0; };
+
+ /*
+ Shall prepare to look for records from the join cache buffer that would
+ match the record of the joined table read into the record buffer
+ */
+ virtual bool prepare_look_for_matches(bool skip_last)= 0;
+ /*
+ Shall return a pointer to the record from join buffer that is checked
+ as the next candidate for a match with the current record from join_tab.
+ Each implementation of this virtual function should bare in mind
+ that the record position it returns shall be exactly the position
+ passed as the parameter to the implementations of the virtual functions
+ skip_next_candidate_for_match and read_next_candidate_for_match.
+ */
+ virtual uchar *get_next_candidate_for_match()= 0;
+ /*
+ Shall check whether the given record from the join buffer has its match
+ flag settings commands to skip the record in the buffer.
+ */
+ virtual bool skip_next_candidate_for_match(uchar *rec_ptr)= 0;
+ /*
+ Shall read the given record from the join buffer into the
+ the corresponding record buffer
+ */
+ virtual void read_next_candidate_for_match(uchar *rec_ptr)= 0;
+
+ /*
+ Shall return the location of the association label returned by
+ the multi_read_range_next function for the current record loaded
+ into join_tab's record buffer
+ */
+ virtual uchar **get_curr_association_ptr() { return 0; };
+
+ /* Add null complements for unmatched outer records from the join buffer */
+ virtual enum_nested_loop_state join_null_complements(bool skip_last);
+
+ /* Restore the fields of the last record from the join buffer */
+ virtual void restore_last_record();
+
+ /* Set match flag for a record in join buffer if it has not been set yet */
+ bool set_match_flag_if_none(JOIN_TAB *first_inner, uchar *rec_ptr);
+
+ enum_nested_loop_state generate_full_extensions(uchar *rec_ptr);
+
+ /* Check matching to a partial join record from the join buffer */
+ bool check_match(uchar *rec_ptr);
+
+ /*
+ This constructor creates an unlinked join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter.
+ */
+ JOIN_CACHE(JOIN *j, JOIN_TAB *tab)
+ {
+ join= j;
+ join_tab= tab;
+ prev_cache= next_cache= 0;
+ buff= 0;
+ }
+
+ /*
+ This constructor creates a linked join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter. The parameter 'prev' specifies the previous
+ cache object to which this cache is linked.
+ */
+ JOIN_CACHE(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)
+ {
+ join= j;
+ join_tab= tab;
+ next_cache= 0;
+ prev_cache= prev;
+ buff= 0;
+ if (prev)
+ prev->next_cache= this;
+ }
+
+public:
+
+ /*
+ The enumeration type Join_algorithm includes a mnemonic constant for
+ each join algorithm that employs join buffers
+ */
+
+ enum Join_algorithm
+ {
+ BNL_JOIN_ALG, /* Block Nested Loop Join algorithm */
+ BNLH_JOIN_ALG, /* Block Nested Loop Hash Join algorithm */
+ BKA_JOIN_ALG, /* Batched Key Access Join algorithm */
+ BKAH_JOIN_ALG, /* Batched Key Access with Hash Table Join Algorithm */
+ };
+
+ /*
+ The enumeration type Match_flag describes possible states of the match flag
+ field stored for the records of the first inner tables of outer joins and
+ semi-joins in the cases when the first match strategy is used for them.
+ When a record with match flag field is written into the join buffer the
+ state of the field usually is MATCH_NOT_FOUND unless this is a record of the
+ first inner table of the outer join for which the on precondition (the
+ condition from on expression over outer tables) has turned out not to be
+ true. In the last case the state of the match flag is MATCH_IMPOSSIBLE.
+ The state of the match flag field is changed to MATCH_FOUND as soon as
+ the first full matching combination of inner tables of the outer join or
+ the semi-join is discovered.
+ */
+ enum Match_flag { MATCH_NOT_FOUND, MATCH_FOUND, MATCH_IMPOSSIBLE };
+
+ /* Table to be joined with the partial join records from the cache */
+ JOIN_TAB *join_tab;
+
+ /* Pointer to the previous join cache if there is any */
+ JOIN_CACHE *prev_cache;
+ /* Pointer to the next join cache if there is any */
+ JOIN_CACHE *next_cache;
+
+ /* Shall initialize the join cache structure */
+ virtual int init();
+
+ /* Get the current size of the cache join buffer */
+ ulong get_join_buffer_size() { return buff_size; }
+ /* Set the size of the cache join buffer to a new value */
+ void set_join_buffer_size(ulong sz) { buff_size= sz; }
+
+ /* Get the minimum possible size of the cache join buffer */
+ virtual ulong get_min_join_buffer_size();
+ /* Get the maximum possible size of the cache join buffer */
+ virtual ulong get_max_join_buffer_size();
+
+ /* Shrink the size if the cache join buffer in a given ratio */
+ bool shrink_join_buffer_in_ratio(ulonglong n, ulonglong d);
+
+ /* Shall return the type of the employed join algorithm */
+ virtual enum Join_algorithm get_join_alg()= 0;
+
+ /*
+ The function shall return TRUE only when there is a key access
+ to the join table
+ */
+ virtual bool is_key_access()= 0;
+
+ /* Shall reset the join buffer for reading/writing */
+ virtual void reset(bool for_writing);
+
+ /*
+ This function shall add a record into the join buffer and return TRUE
+ if it has been decided that it should be the last record in the buffer.
+ */
+ virtual bool put_record();
+
+ /*
+ This function shall read the next record into the join buffer and return
+ TRUE if there is no more next records.
+ */
+ virtual bool get_record();
+
+ /*
+ This function shall read the record at the position rec_ptr
+ in the join buffer
+ */
+ virtual void get_record_by_pos(uchar *rec_ptr);
+
+ /* Shall return the value of the match flag for the positioned record */
+ virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr);
+
+ /* Shall return the position of the current record */
+ virtual uchar *get_curr_rec() { return curr_rec_pos; }
+
+ /* Shall set the current record link */
+ virtual void set_curr_rec_link(uchar *link) { curr_rec_link= link; }
+
+ /* Shall return the current record link */
+ virtual uchar *get_curr_rec_link()
+ {
+ return (curr_rec_link ? curr_rec_link : get_curr_rec());
+ }
+
+ /* Join records from the join buffer with records from the next join table */
+ enum_nested_loop_state join_records(bool skip_last);
+
+ /* Add a comment on the join algorithm employed by the join cache */
+ void print_explain_comment(String *str);
+
+ virtual ~JOIN_CACHE() {}
+ void reset_join(JOIN *j) { join= j; }
+ void free()
+ {
+ x_free(buff);
+ buff= 0;
+ }
+
+ JOIN_TAB *get_next_table(JOIN_TAB *tab);
+
+ friend class JOIN_CACHE_HASHED;
+ friend class JOIN_CACHE_BNL;
+ friend class JOIN_CACHE_BKA;
+ friend class JOIN_TAB_SCAN;
+ friend class JOIN_TAB_SCAN_MRR;
+
+};
+
+
+/*
+ The class JOIN_CACHE_HASHED is the base class for the classes
+ JOIN_CACHE_HASHED_BNL and JOIN_CACHE_HASHED_BKA. The first of them supports
+ an implementation of Block Nested Loop Hash (BNLH) Join Algorithm,
+ while the second is used for a variant of the BKA Join algorithm that performs
+ only one lookup for any records from join buffer with the same key value.
+ For a join cache of this class the records from the join buffer that have
+ the same access key are linked into a chain attached to a key entry structure
+ that either itself contains the key value, or, in the case when the keys are
+ embedded, refers to its occurrence in one of the records from the chain.
+ To build the chains with the same keys a hash table is employed. It is placed
+ at the very end of the join buffer. The array of hash entries is allocated
+ first at the very bottom of the join buffer, while key entries are placed
+ before this array.
+ A hash entry contains a header of the list of the key entries with the same
+ hash value.
+ Each key entry is a structure of the following type:
+ struct st_join_cache_key_entry {
+ union {
+ uchar[] value;
+ cache_ref *value_ref; // offset from the beginning of the buffer
+ } hash_table_key;
+ key_ref next_key; // offset backward from the beginning of hash table
+ cache_ref *last_rec // offset from the beginning of the buffer
+ }
+ The references linking the records in a chain are always placed at the very
+ beginning of the record info stored in the join buffer. The records are
+ linked in a circular list. A new record is always added to the end of this
+ list.
+
+ The following picture represents a typical layout for the info stored in the
+ join buffer of a join cache object of the JOIN_CACHE_HASHED class.
+
+ buff
+ V
+ +----------------------------------------------------------------------------+
+ | |[*]record_1_1| |
+ | ^ | |
+ | | +--------------------------------------------------+ |
+ | | |[*]record_2_1| | |
+ | | ^ | V |
+ | | | +------------------+ |[*]record_1_2| |
+ | | +--------------------+-+ | |
+ |+--+ +---------------------+ | | +-------------+ |
+ || | | V | | |
+ |||[*]record_3_1| |[*]record_1_3| |[*]record_2_2| | |
+ ||^ ^ ^ | |
+ ||+----------+ | | | |
+ ||^ | |<---------------------------+-------------------+ |
+ |++ | | ... mrr | buffer ... ... | | |
+ | | | | |
+ | +-----+--------+ | +-----|-------+ |
+ | V | | | V | | |
+ ||key_3|[/]|[*]| | | |key_2|[/]|[*]| | |
+ | +-+---|-----------------------+ | |
+ | V | | | | |
+ | |key_1|[*]|[*]| | | ... |[*]| ... |[*]| ... | |
+ +----------------------------------------------------------------------------+
+ ^ ^ ^
+ | i-th entry j-th entry
+ hash table
+
+ i-th hash entry:
+ circular record chain for key_1:
+ record_1_1
+ record_1_2
+ record_1_3 (points to record_1_1)
+ circular record chain for key_3:
+ record_3_1 (points to itself)
+
+ j-th hash entry:
+ circular record chain for key_2:
+ record_2_1
+ record_2_2 (points to record_2_1)
+
+*/
+
+class JOIN_CACHE_HASHED: public JOIN_CACHE
+{
+
+private:
+
+ /* Size of the offset of a key entry in the hash table */
+ uint size_of_key_ofs;
+
+ /*
+ Length of the key entry in the hash table.
+ A key entry either contains the key value, or it contains a reference
+ to the key value if use_emb_key flag is set for the cache.
+ */
+ uint key_entry_length;
+
+ /* The beginning of the hash table in the join buffer */
+ uchar *hash_table;
+ /* Number of hash entries in the hash table */
+ uint hash_entries;
+
+
+ /* The position of the currently retrieved key entry in the hash table */
+ uchar *curr_key_entry;
+
+ /* The offset of the data fields from the beginning of the record fields */
+ uint data_fields_offset;
+
+ uint get_hash_idx(uchar* key, uint key_len);
+
+ int init_hash_table();
+ void cleanup_hash_table();
+
+protected:
+
+ /*
+ Length of a key value.
+ It is assumed that all key values have the same length.
+ */
+ uint key_length;
+ /* Buffer to store key values for probing */
+ uchar *key_buff;
+
+ /* Number of key entries in the hash table (number of distinct keys) */
+ uint key_entries;
+
+ /* The position of the last key entry in the hash table */
+ uchar *last_key_entry;
+
+ /*
+ The offset of the record fields from the beginning of the record
+ representation. The record representation starts with a reference to
+ the next record in the key record chain followed by the length of
+ the trailing record data followed by a reference to the record segment
+ in the previous cache, if any, followed by the record fields.
+ */
+ uint rec_fields_offset;
+
+ uint get_size_of_key_offset() { return size_of_key_ofs; }
+
+ /*
+ Get the position of the next_key_ptr field pointed to by
+ a linking reference stored at the position key_ref_ptr.
+ This reference is actually the offset backward from the
+ beginning of hash table.
+ */
+ uchar *get_next_key_ref(uchar *key_ref_ptr)
+ {
+ return hash_table-get_offset(size_of_key_ofs, key_ref_ptr);
+ }
+
+ /*
+ Store the linking reference to the next_key_ptr field at
+ the position key_ref_ptr. The position of the next_key_ptr
+ field is pointed to by ref. The stored reference is actually
+ the offset backward from the beginning of the hash table.
+ */
+ void store_next_key_ref(uchar *key_ref_ptr, uchar *ref)
+ {
+ store_offset(size_of_key_ofs, key_ref_ptr, (ulong) (hash_table-ref));
+ }
+
+ /*
+ Check whether the reference to the next_key_ptr field at the position
+ key_ref_ptr contains a nil value.
+ */
+ bool is_null_key_ref(uchar *key_ref_ptr)
+ {
+ ulong nil= 0;
+ return memcmp(key_ref_ptr, &nil, size_of_key_ofs ) == 0;
+ }
+
+ /*
+ Set the reference to the next_key_ptr field at the position
+ key_ref_ptr equal to nil.
+ */
+ void store_null_key_ref(uchar *key_ref_ptr)
+ {
+ ulong nil= 0;
+ store_offset(size_of_key_ofs, key_ref_ptr, nil);
+ }
+
+ uchar *get_next_rec_ref(uchar *ref_ptr)
+ {
+ return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+ }
+
+ void store_next_rec_ref(uchar *ref_ptr, uchar *ref)
+ {
+ store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+ }
+
+ /*
+ Get the position of the embedded key value for the current
+ record pointed to by get_curr_rec().
+ */
+ uchar *get_curr_emb_key()
+ {
+ return get_curr_rec()+data_fields_offset;
+ }
+
+ /*
+ Get the position of the embedded key value pointed to by a reference
+ stored at ref_ptr. The stored reference is actually the offset from
+ the beginning of the join buffer.
+ */
+ uchar *get_emb_key(uchar *ref_ptr)
+ {
+ return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+ }
+
+ /*
+ Store the reference to an embedded key at the position key_ref_ptr.
+ The position of the embedded key is pointed to by ref. The stored
+ reference is actually the offset from the beginning of the join buffer.
+ */
+ void store_emb_key_ref(uchar *ref_ptr, uchar *ref)
+ {
+ store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+ }
+
+ /* Get the total length of all prefixes of a record in hashed join buffer */
+ uint get_prefix_length()
+ {
+ return base_prefix_length + get_size_of_rec_offset();
+ }
+
+ /*
+ Get maximum size of the additional space per record used for
+ the hash table with record keys
+ */
+ uint get_max_key_addon_space_per_record();
+
+ /*
+ Calculate how much space in the buffer would not be occupied by
+ records, key entries and additional memory for the MMR buffer.
+ */
+ ulong rem_space()
+ {
+ return max(last_key_entry-end_pos-aux_buff_size,0);
+ }
+
+ /*
+ Calculate how much space is taken by allocation of the key
+ entry for a record in the join buffer
+ */
+ uint extra_key_length() { return key_entry_length; }
+
+ /*
+ Skip record from a hashed join buffer if its match flag
+ is set to MATCH_FOUND
+ */
+ bool skip_if_matched();
+
+ /*
+ Skip record from a hashed join buffer if its match flag setting
+ commands to do so
+ */
+ bool skip_if_not_needed_match();
+
+ /* Search for a key in the hash table of the join buffer */
+ bool key_search(uchar *key, uint key_len, uchar **key_ref_ptr);
+
+ /* Reallocate the join buffer of a hashed join cache */
+ int realloc_buffer();
+
+ /*
+ This constructor creates an unlinked hashed join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter.
+ */
+ JOIN_CACHE_HASHED(JOIN *j, JOIN_TAB *tab) :JOIN_CACHE(j, tab) {}
+
+ /*
+ This constructor creates a linked hashed join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter. The parameter 'prev' specifies the previous
+ cache object to which this cache is linked.
+ */
+ JOIN_CACHE_HASHED(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)
+ :JOIN_CACHE(j, tab, prev) {}
+
+public:
+
+ /* Initialize a hashed join cache */
+ int init();
+
+ /* Reset the buffer of a hashed join cache for reading/writing */
+ void reset(bool for_writing);
+
+ /* Add a record into the buffer of a hashed join cache */
+ bool put_record();
+
+ /* Read the next record from the buffer of a hashed join cache */
+ bool get_record();
+
+ /*
+ Shall check whether all records in a key chain have
+ their match flags set on
+ */
+ virtual bool check_all_match_flags_for_key(uchar *key_chain_ptr);
+
+ uint get_next_key(uchar **key);
+
+ /* Get the head of the record chain attached to the current key entry */
+ uchar *get_curr_key_chain()
+ {
+ return get_next_rec_ref(curr_key_entry+key_entry_length-
+ get_size_of_rec_offset());
+ }
+
+};
+
+
+/*
+ The class JOIN_TAB_SCAN is a companion class for the classes JOIN_CACHE_BNL
+ and JOIN_CACHE_BNLH. Actually the class implements the iterator over the
+ table joinded by BNL/BNLH join algorithm.
+ The virtual functions open, next and close are called for any iteration over
+ the table. The function open is called to initiate the process of the
+ iteration. The function next shall read the next record from the joined
+ table. The record is read into the record buffer of the joined table.
+ The record is to be matched with records from the join cache buffer.
+ The function close shall perform the finalizing actions for the iteration.
+*/
+
+class JOIN_TAB_SCAN: public Sql_alloc
+{
+
+private:
+ /* TRUE if this is the first record from the joined table to iterate over */
+ bool is_first_record;
+
+protected:
+
+ /* The joined table to be iterated over */
+ JOIN_TAB *join_tab;
+ /* The join cache used to join the table join_tab */
+ JOIN_CACHE *cache;
+ /*
+ Representation of the executed multi-way join through which
+ all needed context can be accessed.
+ */
+ JOIN *join;
+
+public:
+
+ JOIN_TAB_SCAN(JOIN *j, JOIN_TAB *tab)
+ {
+ join= j;
+ join_tab= tab;
+ cache= join_tab->cache;
+ }
+
+ virtual ~JOIN_TAB_SCAN() {}
+
+ /*
+ Shall calculate the increment of the auxiliary buffer for a record
+ write if such a buffer is used by the table scan object
+ */
+ virtual uint aux_buffer_incr(ulong recno) { return 0; }
+
+ /* Initiate the process of iteration over the joined table */
+ virtual int open();
+ /*
+ Shall read the next candidate for matches with records from
+ the join buffer.
+ */
+ virtual int next();
+ /*
+ Perform the finalizing actions for the process of iteration
+ over the joined_table.
+ */
+ virtual void close();
+
+};
+
+/*
+ The class JOIN_CACHE_BNL is used when the BNL join algorithm is
+ employed to perform a join operation
+*/
+
+class JOIN_CACHE_BNL :public JOIN_CACHE
+{
+private:
+ /*
+ The number of the records in the join buffer that have to be
+ checked yet for a match with the current record of join_tab
+ read into the record buffer.
+ */
+ uint rem_records;
+
+protected:
+
+ bool prepare_look_for_matches(bool skip_last);
+
+ uchar *get_next_candidate_for_match();
+
+ bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+ void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+ /*
+ This constructor creates an unlinked BNL join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter.
+ */
+ JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab) :JOIN_CACHE(j, tab) {}
+
+ /*
+ This constructor creates a linked BNL join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter. The parameter 'prev' specifies the previous
+ cache object to which this cache is linked.
+ */
+ JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)
+ :JOIN_CACHE(j, tab, prev) {}
+
+ /* Initialize the BNL cache */
+ int init();
+
+ enum Join_algorithm get_join_alg() { return BNL_JOIN_ALG; }
+
+ bool is_key_access() { return FALSE; }
+
+};
+
+
+/*
+ The class JOIN_CACHE_BNLH is used when the BNLH join algorithm is
+ employed to perform a join operation
+*/
+
+class JOIN_CACHE_BNLH :public JOIN_CACHE_HASHED
+{
+
+protected:
+
+ /*
+ The pointer to the last record from the circular list of the records
+ that match the join key built out of the record in the join buffer for
+ the join_tab table
+ */
+ uchar *last_matching_rec_ref_ptr;
+ /*
+ The pointer to the next current record from the circular list of the
+ records that match the join key built out of the record in the join buffer
+ for the join_tab table. This pointer is used by the class method
+ get_next_candidate_for_match to iterate over records from the circular
+ list.
+ */
+ uchar *next_matching_rec_ref_ptr;
+
+ /*
+ Get the chain of records from buffer matching the current candidate
+ record for join
+ */
+ uchar *get_matching_chain_by_join_key();
+
+ bool prepare_look_for_matches(bool skip_last);
+
+ uchar *get_next_candidate_for_match();
+
+ bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+ void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+ /*
+ This constructor creates an unlinked BNLH join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter.
+ */
+ JOIN_CACHE_BNLH(JOIN *j, JOIN_TAB *tab) : JOIN_CACHE_HASHED(j, tab) {}
+
+ /*
+ This constructor creates a linked BNLH join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter. The parameter 'prev' specifies the previous
+ cache object to which this cache is linked.
+ */
+ JOIN_CACHE_BNLH(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)
+ : JOIN_CACHE_HASHED(j, tab, prev) {}
+
+ /* Initialize the BNLH cache */
+ int init();
+
+ enum Join_algorithm get_join_alg() { return BNLH_JOIN_ALG; }
+
+ bool is_key_access() { return TRUE; }
+
+};
+
+
+/*
+ The class JOIN_TAB_SCAN_MRR is a companion class for the classes
+ JOIN_CACHE_BKA and JOIN_CACHE_BKAH. Actually the class implements the
+ iterator over the records from join_tab selected by BKA/BKAH join
+ algorithm as the candidates to be joined.
+ The virtual functions open, next and close are called for any iteration over
+ join_tab record candidates. The function open is called to initiate the
+ process of the iteration. The function next shall read the next record from
+ the set of the record candidates. The record is read into the record buffer
+ of the joined table. The function close shall perform the finalizing actions
+ for the iteration.
+*/
+
+class JOIN_TAB_SCAN_MRR: public JOIN_TAB_SCAN
+{
+ /* Interface object to generate key ranges for MRR */
+ RANGE_SEQ_IF range_seq_funcs;
+
+ /* Number of ranges to be processed by the MRR interface */
+ uint ranges;
+
+ /* Flag to to be passed to the MRR interface */
+ uint mrr_mode;
+
+ /* MRR buffer assotiated with this join cache */
+ HANDLER_BUFFER mrr_buff;
+
+ /* Shall initialize the MRR buffer */
+ virtual void init_mrr_buff()
+ {
+ cache->setup_aux_buffer(mrr_buff);
+ }
+
+public:
+
+ JOIN_TAB_SCAN_MRR(JOIN *j, JOIN_TAB *tab, uint flags, RANGE_SEQ_IF rs_funcs)
+ :JOIN_TAB_SCAN(j, tab), range_seq_funcs(rs_funcs), mrr_mode(flags) {}
+
+ uint aux_buffer_incr(ulong recno);
+
+ int open();
+
+ int next();
+
+};
+
+/*
+ The class JOIN_CACHE_BKA is used when the BKA join algorithm is
+ employed to perform a join operation
+*/
+
+class JOIN_CACHE_BKA :public JOIN_CACHE
+{
+private:
+
+ /* Flag to to be passed to the companion JOIN_TAB_SCAN_MRR object */
+ uint mrr_mode;
+
+ /*
+ This value is set to 1 by the class prepare_look_for_matches method
+ and back to 0 by the class get_next_candidate_for_match method
+ */
+ uint rem_records;
+
+ /*
+ This field contains the current association label set by a call of
+ the multi_range_read_next handler function.
+ See the function JOIN_CACHE_BKA::get_curr_key_association()
+ */
+ uchar *curr_association;
+
+protected:
+
+ /*
+ Get the number of ranges in the cache buffer passed to the MRR
+ interface. For each record its own range is passed.
+ */
+ uint get_number_of_ranges_for_mrr() { return records; }
+
+ /*
+ Setup the MRR buffer as the space between the last record put
+ into the join buffer and the very end of the join buffer
+ */
+ int setup_aux_buffer(HANDLER_BUFFER &aux_buff)
+ {
+ aux_buff.buffer= end_pos;
+ aux_buff.buffer_end= buff+buff_size;
+ return 0;
+ }
+
+ bool prepare_look_for_matches(bool skip_last);
+
+ uchar *get_next_candidate_for_match();
+
+ bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+ void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+ /*
+ This constructor creates an unlinked BKA join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter.
+ The MRR mode initially is set to 'flags'.
+ */
+ JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags)
+ :JOIN_CACHE(j, tab), mrr_mode(flags) {}
+ /*
+ This constructor creates a linked BKA join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter. The parameter 'prev' specifies the previous
+ cache object to which this cache is linked.
+ The MRR mode initially is set to 'flags'.
+ */
+ JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE *prev)
+ :JOIN_CACHE(j, tab, prev), mrr_mode(flags) {}
+
+ uchar **get_curr_association_ptr() { return &curr_association; }
+
+ /* Initialize the BKA cache */
+ int init();
+
+ enum Join_algorithm get_join_alg() { return BKA_JOIN_ALG; }
+
+ bool is_key_access() { return TRUE; }
+
+ /* Get the key built over the next record from the join buffer */
+ uint get_next_key(uchar **key);
+
+ /* Check index condition of the joined table for a record from BKA cache */
+ bool skip_index_tuple(char *range_info);
+
+};
+
+
+
+/*
+ The class JOIN_CACHE_BKAH is used when the BKAH join algorithm is
+ employed to perform a join operation
+*/
+
+class JOIN_CACHE_BKAH :public JOIN_CACHE_BNLH
+{
+
+private:
+ /* Flag to to be passed to the companion JOIN_TAB_SCAN_MRR object */
+ uint mrr_mode;
+
+ /*
+ This flag is set to TRUE if the implementation of the MRR interface cannot
+ handle range association labels and does not return them to the caller of
+ the multi_range_read_next handler function. E.g. the implementation of
+ the MRR inteface for the Falcon engine could not return association
+ labels to the caller of multi_range_read_next.
+ The flag is set by JOIN_CACHE_BKA::init() and is not ever changed.
+ */
+ bool no_association;
+
+ /*
+ This field contains the association label returned by the
+ multi_range_read_next function.
+ See the function JOIN_CACHE_BKAH::get_curr_key_association()
+ */
+ uchar *curr_matching_chain;
+
+protected:
+
+ uint get_number_of_ranges_for_mrr() { return key_entries; }
+
+ /*
+ Initialize the MRR buffer allocating some space within the join buffer.
+ The entire space between the last record put into the join buffer and the
+ last key entry added to the hash table is used for the MRR buffer.
+ */
+ int setup_aux_buffer(HANDLER_BUFFER &aux_buff)
+ {
+ aux_buff.buffer= end_pos;
+ aux_buff.buffer_end= last_key_entry;
+ return 0;
+ }
+
+ bool prepare_look_for_matches(bool skip_last);
+
+ /*
+ The implementations of the methods
+ - get_next_candidate_for_match
+ - skip_recurrent_candidate_for_match
+ - read_next_candidate_for_match
+ are inherited from the JOIN_CACHE_BNLH class
+ */
+
+public:
+
+ /*
+ This constructor creates an unlinked BKAH join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter.
+ The MRR mode initially is set to 'flags'.
+ */
+ JOIN_CACHE_BKAH(JOIN *j, JOIN_TAB *tab, uint flags)
+ :JOIN_CACHE_BNLH(j, tab), mrr_mode(flags) {}
+
+ /*
+ This constructor creates a linked BKAH join cache. The cache is to be
+ used to join table 'tab' to the result of joining the previous tables
+ specified by the 'j' parameter. The parameter 'prev' specifies the previous
+ cache object to which this cache is linked.
+ The MRR mode initially is set to 'flags'.
+ */
+ JOIN_CACHE_BKAH(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE *prev)
+ :JOIN_CACHE_BNLH(j, tab, prev), mrr_mode(flags) {}
+
+ uchar **get_curr_association_ptr() { return &curr_matching_chain; }
+
+ /* Initialize the BKAH cache */
+ int init();
+
+ enum Join_algorithm get_join_alg() { return BKAH_JOIN_ALG; }
+
+ /* Check index condition of the joined table for a record from BKAH cache */
+ bool skip_index_tuple(char *range_info);
+};
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 037d7ea5df8..f53ffb7f893 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -100,6 +100,7 @@ static void make_outerjoin_info(JOIN *join);
static Item*
make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, table_map sjm_tables);
static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
+static void revise_cache_usage(JOIN_TAB *join_tab);
static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after);
static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
static void update_depend_map(JOIN *join);
@@ -178,12 +179,14 @@ static int join_ft_read_next(READ_RECORD *info);
int join_read_always_key_or_null(JOIN_TAB *tab);
int join_read_next_same_or_null(READ_RECORD *info);
static COND *make_cond_for_table(Item *cond,table_map table,
- table_map used_table,
- bool exclude_expensive_cond);
+ table_map used_table,
+ bool exclude_expensive_cond,
+ bool retain_ref_cond);
static COND *make_cond_for_table_from_pred(Item *root_cond, Item *cond,
table_map tables,
table_map used_table,
- bool exclude_expensive_cond);
+ bool exclude_expensive_cond,
+ bool retain_ref_cond);
static Item* part_of_refkey(TABLE *form,Field *field);
uint find_shortest_key(TABLE *table, const key_map *usable_keys);
@@ -926,7 +929,7 @@ JOIN::optimize()
if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
{
COND *table_independent_conds=
- make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0, FALSE);
+ make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0, FALSE, FALSE);
DBUG_EXECUTE("where",
print_where(table_independent_conds,
"where after opt_sum_query()",
@@ -1640,6 +1643,62 @@ void JOIN::restore_tmp()
}
+/*
+ Shrink join buffers used for preceding tables to reduce the occupied space
+
+ SYNOPSIS
+ shrink_join_buffers()
+ jt table up to which the buffers are to be shrunk
+ curr_space the size of the space used by the buffers for tables 1..jt
+ needed_space the size of the space that has to be used by these buffers
+
+ DESCRIPTION
+ The function makes an attempt to shrink all join buffers used for the
+ tables starting from the first up to jt to reduce the total size of the
+ space occupied by the buffers used for tables 1,...,jt from curr_space
+ to needed_space.
+ The function assumes that the buffer for the table jt has not been
+ allocated yet.
+
+ RETURN
+ FALSE if all buffer have been successfully shrunk
+ TRUE otherwise
+*/
+
+bool JOIN::shrink_join_buffers(JOIN_TAB *jt,
+ ulonglong curr_space,
+ ulonglong needed_space)
+{
+ JOIN_CACHE *cache;
+ for (JOIN_TAB *tab= join_tab+const_tables; tab < jt; tab++)
+ {
+ cache= tab->cache;
+ if (cache)
+ {
+ ulong buff_size;
+ if (needed_space < cache->get_min_join_buffer_size())
+ return TRUE;
+ if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space))
+ {
+ revise_cache_usage(tab);
+ return TRUE;
+ }
+ buff_size= cache->get_join_buffer_size();
+ curr_space-= buff_size;
+ needed_space-= buff_size;
+ }
+ }
+
+ cache= jt->cache;
+ DBUG_ASSERT(cache);
+ if (needed_space < cache->get_min_join_buffer_size())
+ return TRUE;
+ cache->set_join_buffer_size(needed_space);
+
+ return FALSE;
+}
+
+
int
JOIN::reinit()
{
@@ -2209,7 +2268,7 @@ JOIN::exec()
Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having,
used_tables,
- (table_map)0, FALSE);
+ (table_map)0, FALSE, FALSE);
if (sort_table_cond)
{
if (!curr_table->select)
@@ -2240,7 +2299,7 @@ JOIN::exec()
QT_ORDINARY););
curr_join->tmp_having= make_cond_for_table(curr_join->tmp_having,
~ (table_map) 0,
- ~used_tables, FALSE);
+ ~used_tables, FALSE, FALSE);
DBUG_EXECUTE("where",print_where(curr_join->tmp_having,
"having after sort",
QT_ORDINARY););
@@ -5866,15 +5925,15 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
Find how much space the prevous read not const tables takes in cache.
*/
-void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
+void JOIN_TAB::calc_used_field_length(bool max_fl)
{
uint null_fields,blobs,fields,rec_length;
Field **f_ptr,*field;
uint uneven_bit_fields;
- MY_BITMAP *read_set= join_tab->table->read_set;
+ MY_BITMAP *read_set= table->read_set;
uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
- for (f_ptr=join_tab->table->field ; (field= *f_ptr) ; f_ptr++)
+ for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
{
if (bitmap_is_set(read_set, field->field_index))
{
@@ -5891,24 +5950,105 @@ void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
}
}
if (null_fields || uneven_bit_fields)
- rec_length+=(join_tab->table->s->null_fields+7)/8;
- if (join_tab->table->maybe_null)
+ rec_length+=(table->s->null_fields+7)/8;
+ if (table->maybe_null)
rec_length+=sizeof(my_bool);
- if (blobs)
+ if (max_fl)
{
- uint blob_length=(uint) (join_tab->table->file->stats.mean_rec_length-
- (join_tab->table->s->reclength-rec_length));
- rec_length+=(uint) max(4,blob_length);
- }
+ // TODO: to improve this estimate for max expected length
+ if (blobs)
+ {
+ uint blob_length=(uint) (table->file->stats.mean_rec_length-
+ (table->s->reclength-rec_length));
+ rec_length+=(uint) max(sizeof(void*) * blobs, blob_length);
+ }
+ max_used_fieldlength= rec_length;
+ }
+ else if (table->file->stats.mean_rec_length)
+ set_if_smaller(rec_length, table->file->stats.mean_rec_length);
+
/*
psergey-todo: why we don't count here rowid that we might need to store
when using DuplicateElimination?
*/
- join_tab->used_fields=fields;
- join_tab->used_fieldlength=rec_length;
- join_tab->used_blobs=blobs;
- join_tab->used_null_fields= null_fields;
- join_tab->used_uneven_bit_fields= uneven_bit_fields;
+ used_fields=fields;
+ used_fieldlength=rec_length;
+ used_blobs=blobs;
+ used_null_fields= null_fields;
+ used_uneven_bit_fields= uneven_bit_fields;
+}
+
+
+/**
+ @brief
+ Check whether hash join algorithm can be used to join this table
+
+ @details
+ This function finds out whether the ref items that have been chosen
+ by the planner to access this table can be used for hash join algorithms.
+ The answer depends on a certain property of the the fields of the
+ joined tables on which the hash join key is built. If hash join is
+ allowed for all these fields the answer is positive.
+
+ @note
+ The function is supposed to be called now only after the function
+ get_best_combination has been called.
+
+ @retval TRUE it's possible to use hash join to join this table
+ @retval FALSE otherwise
+*/
+
+bool JOIN_TAB::hash_join_is_possible()
+{
+ if (type != JT_REF && type != JT_EQ_REF)
+ return FALSE;
+ KEY *keyinfo= &table->key_info[ref.key];
+ for (uint i= 0; i < ref.key_parts; i++)
+ {
+ if (!keyinfo->key_part[i].field->hash_join_is_possible())
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+/*
+ @brief
+ Extract pushdown conditions for a table scan
+
+ @details
+ This functions extracts pushdown conditions usable when this table is scanned.
+ The conditions are extracted either from WHERE or from ON expressions.
+ The conditions are attached to the field cache_select of this table.
+
+ @note
+ Currently the extracted conditions are used only by BNL and BNLH join.
+ algorithms.
+
+ @retval 0 on success
+ 1 otherwise
+*/
+
+int JOIN_TAB::make_scan_filter()
+{
+ COND *tmp;
+ DBUG_ENTER("make_scan_filter");
+
+ Item *cond= is_inner_table_of_outer_join() ?
+ *get_first_inner_table()->on_expr_ref : join->conds;
+
+ if (cond &&
+ (tmp=make_cond_for_table(cond, join->const_table_map | table->map,
+ table->map, FALSE, TRUE)))
+ {
+ DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
+ if (!(cache_select=
+ (SQL_SELECT*) join->thd->memdup((uchar*) select, sizeof(SQL_SELECT))))
+ DBUG_RETURN(1);
+ cache_select->cond= tmp;
+ cache_select->read_tables=join->const_table_map;
+ }
+ DBUG_RETURN(0);
}
@@ -5917,16 +6057,13 @@ cache_record_length(JOIN *join,uint idx)
{
uint length=0;
JOIN_TAB **pos,**end;
- THD *thd=join->thd;
for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ;
pos != end ;
pos++)
{
JOIN_TAB *join_tab= *pos;
- if (!join_tab->used_fieldlength) /* Not calced yet */
- calc_used_field_length(thd, join_tab);
- length+=join_tab->used_fieldlength;
+ length+= join_tab->get_used_fieldlength();
}
return length;
}
@@ -6397,6 +6534,8 @@ inline void add_cond_and_fix(Item **e1, Item *e2)
{
if (*e1)
{
+ if (!e2)
+ return;
Item *res;
if ((res= new Item_cond_and(*e1, e2)))
{
@@ -6467,9 +6606,8 @@ static void add_not_null_conds(JOIN *join)
for (uint i=join->const_tables ; i < join->tables ; i++)
{
JOIN_TAB *tab=join->join_tab+i;
- if ((tab->type == JT_REF || tab->type == JT_EQ_REF ||
- tab->type == JT_REF_OR_NULL) &&
- !tab->table->maybe_null)
+ if (tab->type == JT_REF || tab->type == JT_EQ_REF ||
+ tab->type == JT_REF_OR_NULL)
{
for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
{
@@ -6501,9 +6639,14 @@ static void add_not_null_conds(JOIN *join)
DBUG_EXECUTE("where",print_where(notnull,
referred_tab->table->alias.c_ptr(),
QT_ORDINARY););
- COND *new_cond= referred_tab->select_cond;
- add_cond_and_fix(&new_cond, notnull);
- referred_tab->set_select_cond(new_cond, __LINE__);
+ if (!tab->first_inner)
+ {
+ COND *new_cond= referred_tab->select_cond;
+ add_cond_and_fix(&new_cond, notnull);
+ referred_tab->set_select_cond(new_cond, __LINE__);
+ }
+ else
+ add_cond_and_fix(tab->first_inner->on_expr_ref, notnull);
}
}
}
@@ -6682,7 +6825,11 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
COND *const_cond=
make_cond_for_table(cond,
join->const_table_map,
- (table_map) 0, TRUE);
+ (table_map) 0, TRUE, FALSE);
+ /* Add conditions added by add_not_null_conds(). */
+ for (uint i= 0 ; i < join->const_tables ; i++)
+ add_cond_and_fix(&const_cond, join->join_tab[i].select_cond);
+
DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY););
for (JOIN_TAB *tab= join->join_tab+join->const_tables;
tab < join->join_tab+join->tables ; tab++)
@@ -6692,7 +6839,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
JOIN_TAB *cond_tab= tab->first_inner;
COND *tmp= make_cond_for_table(*tab->on_expr_ref,
join->const_table_map,
- ( table_map) 0, FALSE);
+ (table_map) 0, FALSE, FALSE);
if (!tmp)
continue;
tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
@@ -6780,7 +6927,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
tmp= NULL;
if (cond)
- tmp= make_cond_for_table(cond, used_tables, current_map, FALSE);
+ tmp= make_cond_for_table(cond, used_tables, current_map, FALSE, FALSE);
+ /* Add conditions added by add_not_null_conds(). */
+ if (tab->select_cond)
+ add_cond_and_fix(&tmp, tab->select_cond);
if (cond && !tmp && tab->quick)
{ // Outer join
if (tab->type != JT_ALL)
@@ -6838,7 +6988,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
if (thd->variables.engine_condition_pushdown && !first_inner_tab)
{
COND *push_cond=
- make_cond_for_table(tmp, current_map, current_map, FALSE);
+ make_cond_for_table(tmp, current_map, current_map, FALSE, FALSE);
if (push_cond)
{
/* Push condition to handler */
@@ -6965,19 +7115,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
if (i != join->const_tables && tab->use_quick != 2 &&
!tab->first_inner)
{ /* Read with cache */
- if (cond &&
- (tmp=make_cond_for_table(cond,
- join->const_table_map |
- current_map,
- current_map, FALSE)))
- {
- DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
- tab->cache_select=(SQL_SELECT*)
- thd->memdup((uchar*) sel, sizeof(SQL_SELECT));
- tab->cache_select->cond=tmp;
- tab->cache_select->read_tables=join->const_table_map;
- }
- }
+ if (tab->make_scan_filter())
+ DBUG_RETURN(1);
+ }
}
}
@@ -6999,7 +7139,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
JOIN_TAB *cond_tab= join_tab->first_inner;
COND *tmp= make_cond_for_table(*join_tab->on_expr_ref,
join->const_table_map,
- (table_map) 0, FALSE);
+ (table_map) 0, FALSE, FALSE);
if (!tmp)
continue;
tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
@@ -7034,10 +7174,15 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
current_map= tab->table->map;
used_tables2|= current_map;
COND *tmp_cond= make_cond_for_table(on_expr, used_tables2,
- current_map, FALSE);
+ current_map, FALSE, FALSE);
+ if (tab == first_inner_tab && tab->on_precond)
+ add_cond_and_fix(&tmp_cond, tab->on_precond);
if (tmp_cond)
{
JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : tab;
+ Item **sel_cond_ref= tab < first_inner_tab ?
+ &first_inner_tab->on_precond :
+ &tab->select_cond;
/*
First add the guards for match variables of
all embedding outer join operations.
@@ -7060,15 +7205,15 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
tmp_cond->quick_fix_field();
/* Add the predicate to other pushed down predicates */
DBUG_PRINT("info", ("Item_cond_and"));
- cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond :
- new Item_cond_and(cond_tab->select_cond,
- tmp_cond);
+ *sel_cond_ref= !(*sel_cond_ref) ?
+ tmp_cond :
+ new Item_cond_and(*sel_cond_ref, tmp_cond);
DBUG_PRINT("info", ("Item_cond_and 0x%lx",
- (ulong)cond_tab->select_cond));
- if (!cond_tab->select_cond)
- DBUG_RETURN(1);
- cond_tab->select_cond->quick_fix_field();
- cond_tab->select_cond->update_used_tables();
+ (ulong)(*sel_cond_ref)));
+ if (!(*sel_cond_ref))
+ DBUG_RETURN(1);
+ (*sel_cond_ref)->quick_fix_field();
+ (*sel_cond_ref)->update_used_tables();
if (cond_tab->select)
cond_tab->select->cond= cond_tab->select_cond;
}
@@ -7160,6 +7305,19 @@ void set_join_cache_denial(JOIN_TAB *join_tab)
{
if (join_tab->cache)
{
+ /*
+ If there is a previous cache linked to this cache through the
+ next_cache pointer: remove the link.
+ */
+ if (join_tab->cache->prev_cache)
+ join_tab->cache->prev_cache->next_cache= 0;
+ /*
+ No need to do the same for next_cache since cache denial is done
+ backwards starting from the latest cache in the linked list (see
+ revise_cache_usage()).
+ */
+ DBUG_ASSERT(!join_tab->cache->next_cache);
+
join_tab->cache->free();
join_tab->cache= 0;
}
@@ -7352,8 +7510,11 @@ end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
join join for which the check is performed
options options of the join
no_jbuf_after don't use join buffering after table with this number
+ prev_tab previous join table
icp_other_tables_ok OUT TRUE if condition pushdown supports
other tables presence
+ idx_cond_fact_out OUT TRUE if condition pushed to the index is factored
+ out of the condition pushed to the table
DESCRIPTION
The function finds out whether the table 'tab' can be joined using a join
@@ -7365,24 +7526,66 @@ end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
depend on:
- the access method to access rows of the joined table
- whether the join table is an inner table of an outer join or semi-join
+ - whether the optimizer switches
+ outer_join_with_cache, semijoin_with_cache, join_cache_incremental,
+ join_cache_hashed, join_cache_bka,
+ are set on or off
- the join cache level set for the query
- the join 'options'.
+
In any case join buffer is not used if the number of the joined table is
greater than 'no_jbuf_after'. It's also never used if the value of
join_cache_level is equal to 0.
- The other valid settings of join_cache_level lay in the interval 1..8.
- If join_cache_level==1|2 then join buffer is used only for inner joins
- with 'JT_ALL' access method.
- If join_cache_level==3|4 then join buffer is used for any join operation
- (inner join, outer join, semi-join) with 'JT_ALL' access method.
- If 'JT_ALL' access method is used to read rows of the joined table then
- always a JOIN_CACHE_BNL object is employed.
+ If the optimizer switch outer_join_with_cache is off no join buffer is
+ used for outer join operations.
+ If the optimizer switch semijoin_with_cache is off no join buffer is used
+ for semi-join operations.
+ If the optimizer switch join_cache_incremental is off no incremental join
+ buffers are used.
+ If the optimizer switch join_cache_hashed is off then the optimizer does
+ not use neither BNLH algorithm, nor BKAH algorithm to perform join
+ operations.
+
+ If the optimizer switch join_cache_bka is off then the optimizer does not
+ use neither BKA algprithm, nor BKAH algorithm to perform join operation.
+ The valid settings for join_cache_level lay in the interval 0..8.
+ If it set to 0 no join buffers are used to perform join operations.
+ Currently we differentiate between join caches of 8 levels:
+ 1 : non-incremental join cache used for BNL join algorithm
+ 2 : incremental join cache used for BNL join algorithm
+ 3 : non-incremental join cache used for BNLH join algorithm
+ 4 : incremental join cache used for BNLH join algorithm
+ 5 : non-incremental join cache used for BKA join algorithm
+ 6 : incremental join cache used for BKA join algorithm
+ 7 : non-incremental join cache used for BKAH join algorithm
+ 8 : incremental join cache used for BKAH join algorithm
+ If the value of join_cache_level is set to n then no join caches of
+ levels higher than n can be employed.
+
+ If the optimizer switches outer_join_with_cache, semijoin_with_cache,
+ join_cache_incremental, join_cache_hashed, join_cache_bka are all on
+ the following rules are applied.
+ If join_cache_level==1|2 then join buffer is used for inner joins, outer
+ joins and semi-joins with 'JT_ALL' access method. In this case a
+ JOIN_CACHE_BNL object is employed.
+ If join_cache_level==3|4 and then join buffer is used for a join operation
+ (inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method
+ then a JOIN_CACHE_BNLH object is employed.
If an index is used to access rows of the joined table and the value of
join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed.
If an index is used to access rows of the joined table and the value of
- join_cache_level==7|8 then a JOIN_CACHE_BKA_UNIQUE object is employed.
+ join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed.
If the value of join_cache_level is odd then creation of a non-linked
join cache is forced.
+
+ Currently for any join operation a join cache of the level of the
+ highest allowed and applicable level is used.
+ For example, if join_cache_level is set to 6 and the optimizer switch
+ join_cache_bka is off, while the optimizer switch join_cache_hashed is
+ on then for any inner join operation with JT_REF/JT_EQREF access method
+ to the joined table the BNLH join algorithm will be used, while for
+ the table accessed by the JT_ALL methods the BNL algorithm will be used.
+
If the function decides that a join buffer can be used to join the table
'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns
the selected join cache object to the field 'cache' of the previous
@@ -7399,10 +7602,13 @@ end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
For a nested outer join/semi-join, currently, we either use join buffers for
all inner tables or for none of them.
Some engines (e.g. Falcon) currently allow to use only a join cache
- of the type JOIN_CACHE_BKA_UNIQUE when the joined table is accessed through
+ of the type JOIN_CACHE_BKAH when the joined table is accessed through
an index. For these engines setting the value of join_cache_level to 5 or 6
results in that no join buffer is used to join the table.
+ RETURN VALUE
+ cache level if cache is used, otherwise returns 0
+
TODO
Support BKA inside SJ-Materialization nests. When doing this, we'll need
to only store sj-inner tables in the join buffer.
@@ -7426,17 +7632,15 @@ end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
first_tab= join->join_tab + first_sjm_table;
}
#endif
-
- RETURN
-
- cache level if cache is used, otherwise returns 0
*/
static
uint check_join_cache_usage(JOIN_TAB *tab,
JOIN *join, ulonglong options,
uint no_jbuf_after,
- bool *icp_other_tables_ok)
+ JOIN_TAB *prev_tab,
+ bool *icp_other_tables_ok,
+ bool *idx_cond_fact_out)
{
uint flags;
COST_VECT cost;
@@ -7444,13 +7648,22 @@ uint check_join_cache_usage(JOIN_TAB *tab,
uint bufsz= 4096;
JOIN_CACHE *prev_cache=0;
uint cache_level= join->thd->variables.join_cache_level;
- bool force_unlinked_cache= test(cache_level & 1);
+ bool force_unlinked_cache=
+ !optimizer_flag(join->thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL);
+ bool no_hashed_cache=
+ !optimizer_flag(join->thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED);
+ bool no_bka_cache=
+ !optimizer_flag(join->thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA);
uint i= tab - join->join_tab;
*icp_other_tables_ok= TRUE;
- if (cache_level == 0 || i == join->const_tables)
+ *idx_cond_fact_out= TRUE;
+ if (cache_level == 0 || i == join->const_tables || !prev_tab)
return 0;
+ if (force_unlinked_cache && (cache_level%2 == 0))
+ cache_level--;
+
if (options & SELECT_NO_JOIN_CACHE)
goto no_join_cache;
/*
@@ -7459,17 +7672,25 @@ uint check_join_cache_usage(JOIN_TAB *tab,
*/
if (tab->use_quick == 2)
goto no_join_cache;
+
+ if (tab->is_inner_table_of_semi_join_with_first_match() &&
+ !optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE))
+ goto no_join_cache;
+ if (tab->is_inner_table_of_outer_join() &&
+ !optimizer_flag(join->thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE))
+ goto no_join_cache;
+
/*
Non-linked join buffers can't guarantee one match
*/
- if (force_unlinked_cache &&
- (!(tab->type == JT_ALL) || cache_level <= 4) &&
- ((tab->is_inner_table_of_semi_join_with_first_match() &&
- !tab->is_single_inner_of_semi_join_with_first_match()) ||
- (tab->is_inner_table_of_outer_join() &&
- !tab->is_single_inner_of_outer_join())))
- goto no_join_cache;
-
+ if (tab->is_nested_inner())
+ {
+ if (force_unlinked_cache || cache_level == 1)
+ goto no_join_cache;
+ if (cache_level & 1)
+ cache_level--;
+ }
+
/*
Don't use join buffering if we're dictated not to by no_jbuf_after (this
...)
@@ -7487,7 +7708,7 @@ uint check_join_cache_usage(JOIN_TAB *tab,
if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab &&
!tab->first_sj_inner_tab->use_join_cache)
goto no_join_cache;
- if (!tab[-1].use_join_cache)
+ if (!prev_tab->use_join_cache)
{
/*
Check whether table tab and the previous one belong to the same nest of
@@ -7509,47 +7730,89 @@ uint check_join_cache_usage(JOIN_TAB *tab,
}
if (!force_unlinked_cache)
- prev_cache= tab[-1].cache;
+ prev_cache= prev_tab->cache;
switch (tab->type) {
case JT_ALL:
- if (cache_level <= 2 && (tab->first_inner || tab->first_sj_inner_tab))
- goto no_join_cache;
- if ((options & SELECT_DESCRIBE) ||
- (((tab->cache= new JOIN_CACHE_BNL(join, tab, prev_cache))) &&
- !tab->cache->init()))
+ if (cache_level == 1)
+ prev_cache= 0;
+ if ((tab->cache= new JOIN_CACHE_BNL(join, tab, prev_cache)) &&
+ ((options & SELECT_DESCRIBE) || !tab->cache->init()))
{
*icp_other_tables_ok= FALSE;
- return cache_level;
+ return (2-test(!prev_cache));
}
goto no_join_cache;
case JT_SYSTEM:
case JT_CONST:
case JT_REF:
case JT_EQ_REF:
- if (cache_level <= 4)
- return 0;
+ if (cache_level <=2 || (no_hashed_cache && no_bka_cache))
+ goto no_join_cache;
+
flags= HA_MRR_NO_NULL_ENDPOINTS;
if (tab->table->covering_keys.is_set(tab->ref.key))
flags|= HA_MRR_INDEX_ONLY;
rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
&bufsz, &flags, &cost);
- if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL) &&
- (!(flags & HA_MRR_NO_ASSOCIATION) || cache_level > 6) &&
- ((options & SELECT_DESCRIBE) ||
- (((cache_level <= 6 &&
- (tab->cache= new JOIN_CACHE_BKA(join, tab, flags, prev_cache))) ||
- (cache_level > 6 &&
- (tab->cache= new JOIN_CACHE_BKA_UNIQUE(join, tab, flags, prev_cache)))
- ) && !tab->cache->init())))
- return cache_level;
+
+ if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache ||
+ ((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6))
+ {
+ if (!tab->hash_join_is_possible() ||
+ tab->make_scan_filter())
+ goto no_join_cache;
+ if (cache_level == 3)
+ {
+ if (prev_tab->cache)
+ goto no_join_cache;
+ prev_cache= 0;
+ }
+ if ((tab->cache= new JOIN_CACHE_BNLH(join, tab, prev_cache)) &&
+ ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+ {
+ *icp_other_tables_ok= FALSE;
+ return (4-test(!prev_cache));
+ }
+ goto no_join_cache;
+ }
+ if (cache_level > 4 && no_bka_cache)
+ goto no_join_cache;
+
+ if ((flags & HA_MRR_NO_ASSOCIATION) &&
+ (cache_level <= 6 || no_hashed_cache))
+ goto no_join_cache;
+
+ if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL))
+ {
+ if (cache_level <= 6 || no_hashed_cache)
+ {
+ if (cache_level == 5)
+ prev_cache= 0;
+ if ((tab->cache= new JOIN_CACHE_BKA(join, tab, flags, prev_cache)) &&
+ ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+ return (6-test(!prev_cache));
+ goto no_join_cache;
+ }
+ else
+ {
+ if (cache_level == 7)
+ prev_cache= 0;
+ if ((tab->cache= new JOIN_CACHE_BKAH(join, tab, flags, prev_cache)) &&
+ ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+ {
+ *idx_cond_fact_out= FALSE;
+ return (8-test(!prev_cache));
+ }
+ goto no_join_cache;
+ }
+ }
goto no_join_cache;
default : ;
}
no_join_cache:
- if (cache_level>2)
- revise_cache_usage(tab);
+ revise_cache_usage(tab);
return 0;
}
@@ -7580,6 +7843,7 @@ static bool
make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
{
uint i;
+ uint jcl;
bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
bool sorted= 1;
uint first_sjm_table= MAX_TABLES;
@@ -7591,17 +7855,31 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
setup_semijoin_dups_elimination(join, options, no_jbuf_after))
DBUG_RETURN(TRUE); /* purecov: inspected */
+ for (i= 0; i < join->const_tables; i++)
+ join->join_tab[i].partial_join_cardinality= 1;
+
for (i=join->const_tables ; i < join->tables ; i++)
{
JOIN_TAB *tab=join->join_tab+i;
TABLE *table=tab->table;
bool icp_other_tables_ok;
+ bool idx_cond_fact_out;
tab->read_record.table= table;
tab->read_record.file=table->file;
tab->read_record.unlock_row= rr_unlock_row;
tab->next_select=sub_select; /* normal select */
tab->sorted= sorted;
sorted= 0; // only first must be sorted
+
+ /*
+ The approximation below for partial join cardinality is not good because
+ - it does not take into account some pushdown predicates
+ - it does not differentiate between inner joins, outer joins and semi-joins.
+ Later it should be improved.
+ */
+ tab->partial_join_cardinality= join->best_positions[i].records_read *
+ (i ? (tab-1)->partial_join_cardinality : 1);
+
if (tab->loosescan_match_tab)
{
if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
@@ -7609,6 +7887,12 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
return TRUE; /* purecov: inspected */
tab->sorted= TRUE;
}
+
+ /*
+ SJ-Materialization
+ */
+ if (!(i >= first_sjm_table && i < last_sjm_table))
+ tab->first_sjm_sibling= NULL;
if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy))
{
/* This is a start of semi-join nest */
@@ -7621,49 +7905,74 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
if (setup_sj_materialization(tab))
return TRUE;
+ for (uint j= first_sjm_table; j != last_sjm_table; j++)
+ join->join_tab[j].first_sjm_sibling= join->join_tab + first_sjm_table;
}
table->status=STATUS_NO_RECORD;
pick_table_access_method (tab);
+ /*
+ This loop currently can be executed only once as the function
+ check_join_cache_usage does not change the value of tab->type.
+ It won't be true for the future code.
+ */
+ for ( ; ; )
+ {
+ enum join_type tab_type= tab->type;
+ switch (tab->type) {
+ case JT_SYSTEM:
+ case JT_CONST:
+ case JT_EQ_REF:
+ case JT_REF:
+ case JT_REF_OR_NULL:
+ case JT_ALL:
+ if ((jcl= check_join_cache_usage(tab, join, options,
+ no_jbuf_after,
+ i == last_sjm_table ?
+ join->join_tab+first_sjm_table :
+ tab-1,
+ &icp_other_tables_ok,
+ &idx_cond_fact_out)))
+ {
+ tab->use_join_cache= TRUE;
+ tab[-1].next_select=sub_select_cache;
+ }
+ break;
+ default:
+ ;
+ }
+ if (tab->type == tab_type)
+ break;
+ }
+
switch (tab->type) {
case JT_SYSTEM: // Only happens with left join
case JT_CONST: // Only happens with left join
/* Only happens with outer joins */
tab->read_first_record= tab->type == JT_SYSTEM ?
join_read_system :join_read_const;
- if (check_join_cache_usage(tab, join, options, no_jbuf_after,
- &icp_other_tables_ok))
- {
- tab->use_join_cache= TRUE;
- tab[-1].next_select=sub_select_cache;
- }
- else
if (table->covering_keys.is_set(tab->ref.key) &&
!table->no_keyread)
{
table->key_read=1;
table->file->extra(HA_EXTRA_KEYREAD);
}
- else
- push_index_cond(tab, tab->ref.key, icp_other_tables_ok);
+ else if (!jcl || jcl > 4)
+ push_index_cond(tab, tab->ref.key,
+ icp_other_tables_ok, idx_cond_fact_out);
break;
case JT_EQ_REF:
tab->read_record.unlock_row= join_read_key_unlock_row;
/* fall through */
- if (check_join_cache_usage(tab, join, options, no_jbuf_after,
- &icp_other_tables_ok))
- {
- tab->use_join_cache= TRUE;
- tab[-1].next_select=sub_select_cache;
- }
if (table->covering_keys.is_set(tab->ref.key) &&
!table->no_keyread)
{
table->key_read=1;
table->file->extra(HA_EXTRA_KEYREAD);
}
- else
- push_index_cond(tab, tab->ref.key, icp_other_tables_ok );
+ else if (!jcl || jcl > 4)
+ push_index_cond(tab, tab->ref.key,
+ icp_other_tables_ok, idx_cond_fact_out);
break;
case JT_REF_OR_NULL:
case JT_REF:
@@ -7674,17 +7983,12 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
}
delete tab->quick;
tab->quick=0;
- if (check_join_cache_usage(tab, join, options, no_jbuf_after,
- &icp_other_tables_ok))
- {
- tab->use_join_cache= TRUE;
- tab[-1].next_select=sub_select_cache;
- }
if (table->covering_keys.is_set(tab->ref.key) &&
!table->no_keyread)
table->enable_keyread();
- else
- push_index_cond(tab, tab->ref.key, icp_other_tables_ok);
+ else if (!jcl || jcl > 4)
+ push_index_cond(tab, tab->ref.key,
+ icp_other_tables_ok, idx_cond_fact_out);
break;
case JT_ALL:
/*
@@ -7693,12 +7997,6 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
Also don't use cache if this is the first table in semi-join
materialization nest.
*/
- if (check_join_cache_usage(tab, join, options, no_jbuf_after,
- &icp_other_tables_ok))
- {
- tab->use_join_cache= TRUE;
- tab[-1].next_select=sub_select_cache;
- }
/* These init changes read_record */
if (tab->use_quick == 2)
{
@@ -7771,7 +8069,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
}
if (tab->select && tab->select->quick &&
tab->select->quick->index != MAX_KEY && ! tab->table->key_read)
- push_index_cond(tab, tab->select->quick->index, icp_other_tables_ok);
+ push_index_cond(tab, tab->select->quick->index,
+ icp_other_tables_ok, idx_cond_fact_out);
}
break;
case JT_FT:
@@ -7795,8 +8094,11 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
JOIN_TAB *tab=join->join_tab+i;
if (tab->use_join_cache)
{
- JOIN_TAB *sort_by_tab= join->get_sort_by_join_tab();
- if (sort_by_tab && !join->need_tmp)
+ JOIN_TAB *sort_by_tab= join->group && join->simple_group &&
+ join->group_list ?
+ join->join_tab+join->const_tables :
+ join->get_sort_by_join_tab();
+ if (sort_by_tab)
{
join->need_tmp= 1;
join->simple_order= join->simple_group= 0;
@@ -9356,6 +9658,11 @@ Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
Item_equal *upper= item_field->find_item_equal(upper_levels);
Item_field *item= item_field;
TABLE_LIST *field_sjm= embedding_sjm(item_field);
+ if (!field_sjm)
+ {
+ current_sjm= NULL;
+ current_sjm_head= NULL;
+ }
/*
Check if "item_field=head" equality is already guaranteed to be true
@@ -10424,7 +10731,7 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
{
/* Find the best access method that would not use join buffering */
best_access_path(join, rs, reopt_remaining_tables, i,
- test(i < no_jbuf_before), rec_count,
+ TRUE, rec_count,
&pos, &loose_scan_pos);
}
else
@@ -13104,7 +13411,7 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
DBUG_RETURN(nls);
}
int error;
- enum_nested_loop_state rc;
+ enum_nested_loop_state rc= NESTED_LOOP_OK;
READ_RECORD *info= &join_tab->read_record;
if (join_tab->flush_weedout_table)
@@ -13137,18 +13444,21 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
/* Set first_unmatched for the last inner table of this group */
join_tab->last_inner->first_unmatched= join_tab;
- }
+ if (join_tab->on_precond && !join_tab->on_precond->val_int())
+ rc= NESTED_LOOP_NO_MORE_ROWS;
+ }
join->thd->row_count= 0;
if (join_tab->loosescan_match_tab)
join_tab->loosescan_match_tab->found_match= FALSE;
- error= (*join_tab->read_first_record)(join_tab);
-
- if (join_tab->keep_current_rowid)
- join_tab->table->file->position(join_tab->table->record[0]);
-
- rc= evaluate_join_record(join, join_tab, error);
+ if (rc != NESTED_LOOP_NO_MORE_ROWS)
+ {
+ error= (*join_tab->read_first_record)(join_tab);
+ if (join_tab->keep_current_rowid)
+ join_tab->table->file->position(join_tab->table->record[0]);
+ rc= evaluate_join_record(join, join_tab, error);
+ }
}
/*
@@ -13440,27 +13750,6 @@ evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab)
return (*join_tab->next_select)(join, join_tab+1, 0);
}
-#ifdef MERGE_JUNK
-//psergey3-merge: remove:
- SQL_SELECT *select;
- select= join_tab->select;
-
- int err= 0;
- (err= join_tab->cache.select->skip_record(join->thd)) != 0 ))
- {
- reset_cache_write(&join_tab->cache);
- return NESTED_LOOP_ERROR;
- }
-
- if (!select || (err= select->skip_record(join->thd)) != 0)
- if (err < 0)
- {
- reset_cache_write(&join_tab->cache);
- return NESTED_LOOP_ERROR;
- }
-
- rc= NESTED_LOOP_OK;
-#endif
/*****************************************************************************
The different ways to read a record
Returns -1 if row was not found, 0 if row was found and 1 on errors
@@ -13806,13 +14095,6 @@ join_read_always_key(JOIN_TAB *tab)
}
}
- /* Perform "Late NULLs Filtering" (see internals manual for explanations) */
- for (uint i= 0 ; i < tab->ref.key_parts ; i++)
- {
- if ((tab->ref.null_rejecting & 1 << i) && tab->ref.items[i]->is_null())
- return -1;
- }
-
if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
return -1;
if ((error= table->file->ha_index_read_map(table->record[0],
@@ -14667,6 +14949,7 @@ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
used_table Table that we're extracting the condition for (may
also include PSEUDO_TABLE_BITS
exclude_expensive_cond Do not push expensive conditions
+ retain_ref_cond Retain ref conditions
DESCRIPTION
Extract the condition that can be checked after reading the table
@@ -14692,16 +14975,18 @@ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
static Item *
make_cond_for_table(Item *cond, table_map tables, table_map used_table,
- bool exclude_expensive_cond)
+ bool exclude_expensive_cond, bool retain_ref_cond)
{
return make_cond_for_table_from_pred(cond, cond, tables, used_table,
- exclude_expensive_cond);
+ exclude_expensive_cond,
+ retain_ref_cond);
}
static Item *
make_cond_for_table_from_pred(Item *root_cond, Item *cond,
table_map tables, table_map used_table,
- bool exclude_expensive_cond)
+ bool exclude_expensive_cond,
+ bool retain_ref_cond)
{
if (used_table && !(cond->used_tables() & used_table) &&
@@ -14732,7 +15017,8 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond,
{
Item *fix=make_cond_for_table_from_pred(root_cond, item,
tables, used_table,
- exclude_expensive_cond);
+ exclude_expensive_cond,
+ retain_ref_cond);
if (fix)
new_cond->argument_list()->push_back(fix);
}
@@ -14764,7 +15050,8 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond,
{
Item *fix=make_cond_for_table_from_pred(root_cond, item,
tables, 0L,
- exclude_expensive_cond);
+ exclude_expensive_cond,
+ retain_ref_cond);
if (!fix)
return (COND*) 0; // Always true
new_cond->argument_list()->push_back(fix);
@@ -14785,7 +15072,8 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond,
table_count times, we mark each item that we have examined with the result
of the test
*/
- if (cond->marker == 3 || (cond->used_tables() & ~tables) ||
+ if ((cond->marker == 3 && !retain_ref_cond) ||
+ (cond->used_tables() & ~tables) ||
/*
When extracting constant conditions, treat expensive conditions as
non-constant, so that they are not evaluated at optimization time.
@@ -14800,13 +15088,13 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond,
{
Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
- if (left_item->type() == Item::FIELD_ITEM &&
+ if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
test_if_ref(root_cond, (Item_field*) left_item,right_item))
{
cond->marker=3; // Checked when read
return (COND*) 0;
}
- if (right_item->type() == Item::FIELD_ITEM &&
+ if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
test_if_ref(root_cond, (Item_field*) right_item,left_item))
{
cond->marker=3; // Checked when read
@@ -15991,7 +16279,7 @@ static bool fix_having(JOIN *join, Item **having)
DBUG_EXECUTE("where",print_where(*having,"having", QT_ORDINARY););
Item* sort_table_cond=make_cond_for_table(*having, used_tables, used_tables,
- FALSE);
+ FALSE, FALSE);
if (sort_table_cond)
{
if (!table->select)
@@ -16009,7 +16297,8 @@ static bool fix_having(JOIN *join, Item **having)
DBUG_EXECUTE("where",print_where(table->select_cond,
"select and having",
QT_ORDINARY););
- *having= make_cond_for_table(*having,~ (table_map) 0,~used_tables, FALSE);
+ *having= make_cond_for_table(*having,~ (table_map) 0,~used_tables,
+ FALSE, FALSE);
DBUG_EXECUTE("where",
print_where(*having,"having after make_cond", QT_ORDINARY););
}
@@ -18870,8 +19159,11 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
}
}
- if (i > 0 && tab[-1].next_select == sub_select_cache)
+ if (tab->cache)
+ {
extra.append(STRING_WITH_LEN("; Using join buffer"));
+ tab->cache->print_explain_comment(&extra);
+ }
/* Skip initial "; "*/
const char *str= extra.ptr();
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 1ec79796ebf..ecc19f763fa 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -160,7 +160,9 @@ typedef struct st_join_table {
TABLE *table;
KEYUSE *keyuse; /**< pointer to first used key */
SQL_SELECT *select;
- COND *select_cond;
+ COND *select_cond;
+ COND *on_precond; /**< part of on condition to check before
+ accessing the first inner table */
QUICK_SELECT_I *quick;
/*
The value of select_cond before we've attempted to do Index Condition
@@ -216,11 +218,16 @@ typedef struct st_join_table {
E(#records) is in found_records.
*/
ha_rows read_time;
-
+
+ double partial_join_cardinality;
+
table_map dependent,key_dependent;
uint use_quick,index;
uint status; ///< Save status for cache
- uint used_fields,used_fieldlength,used_blobs;
+ uint used_fields;
+ ulong used_fieldlength;
+ ulong max_used_fieldlength;
+ uint used_blobs;
uint used_null_fields;
uint used_rowid_fields;
uint used_uneven_bit_fields;
@@ -235,6 +242,7 @@ typedef struct st_join_table {
ha_rows limit;
TABLE_REF ref;
bool use_join_cache;
+ ulong join_buffer_size_limit;
JOIN_CACHE *cache;
/*
Index condition for BKA access join
@@ -298,6 +306,8 @@ typedef struct st_join_table {
*/
uint sj_strategy;
+ struct st_join_table *first_sjm_sibling;
+
void cleanup();
inline bool is_using_loose_index_scan()
{
@@ -349,6 +359,19 @@ typedef struct st_join_table {
return (first_inner && first_inner->last_inner == this) ||
last_sj_inner_tab == this;
}
+ /*
+ Check whether the table belongs to a nest of inner tables of an
+ outer join or to a nest of inner tables of a semi-join
+ */
+ bool is_nested_inner()
+ {
+ if (first_inner &&
+ (first_inner != first_inner->last_inner || first_inner->first_upper))
+ return TRUE;
+ if (first_sj_inner_tab && first_sj_inner_tab != last_sj_inner_tab)
+ return TRUE;
+ return FALSE;
+ }
struct st_join_table *get_first_inner_table()
{
if (first_inner)
@@ -369,850 +392,26 @@ typedef struct st_join_table {
select->cond= new_cond;
return tmp_select_cond;
}
-} JOIN_TAB;
-
-
-/*
- Categories of data fields of variable length written into join cache buffers.
- The value of any of these fields is written into cache together with the
- prepended length of the value.
-*/
-#define CACHE_BLOB 1 /* blob field */
-#define CACHE_STRIPPED 2 /* field stripped of trailing spaces */
-#define CACHE_VARSTR1 3 /* short string value (length takes 1 byte) */
-#define CACHE_VARSTR2 4 /* long string value (length takes 2 bytes) */
-
-/*
- The CACHE_FIELD structure used to describe fields of records that
- are written into a join cache buffer from record buffers and backward.
-*/
-typedef struct st_cache_field {
- uchar *str; /**< buffer from/to where the field is to be copied */
- uint length; /**< maximal number of bytes to be copied from/to str */
- /*
- Field object for the moved field
- (0 - for a flag field, see JOIN_CACHE::create_flag_fields).
- */
- Field *field;
- uint type; /**< category of the of the copied field (CACHE_BLOB et al.) */
- /*
- The number of the record offset value for the field in the sequence
- of offsets placed after the last field of the record. These
- offset values are used to access fields referred to from other caches.
- If the value is 0 then no offset for the field is saved in the
- trailing sequence of offsets.
- */
- uint referenced_field_no;
- /* The remaining structure fields are used as containers for temp values */
- uint blob_length; /**< length of the blob to be copied */
- uint offset; /**< field offset to be saved in cache buffer */
-} CACHE_FIELD;
-
-
-/*
- JOIN_CACHE is the base class to support the implementations of both
- Blocked-Based Nested Loops (BNL) Join Algorithm and Batched Key Access (BKA)
- Join Algorithm. The first algorithm is supported by the derived class
- JOIN_CACHE_BNL, while the second algorithm is supported by the derived
- class JOIN_CACHE_BKA.
- These two algorithms have a lot in common. Both algorithms first
- accumulate the records of the left join operand in a join buffer and
- then search for matching rows of the second operand for all accumulated
- records.
- For the first algorithm this strategy saves on logical I/O operations:
- the entire set of records from the join buffer requires only one look-through
- the records provided by the second operand.
- For the second algorithm the accumulation of records allows to optimize
- fetching rows of the second operand from disk for some engines (MyISAM,
- InnoDB), or to minimize the number of round-trips between the Server and
- the engine nodes (NDB Cluster).
-*/
-
-class JOIN_CACHE :public Sql_alloc
-{
-
-private:
-
- /* Size of the offset of a record from the cache */
- uint size_of_rec_ofs;
- /* Size of the length of a record in the cache */
- uint size_of_rec_len;
- /* Size of the offset of a field within a record in the cache */
- uint size_of_fld_ofs;
-
-protected:
-
- /* 3 functions below actually do not use the hidden parameter 'this' */
-
- /* Calculate the number of bytes used to store an offset value */
- uint offset_size(uint len)
- { return (len < 256 ? 1 : len < 256*256 ? 2 : 4); }
-
- /* Get the offset value that takes ofs_sz bytes at the position ptr */
- ulong get_offset(uint ofs_sz, uchar *ptr)
+ void calc_used_field_length(bool max_fl);
+ ulong get_used_fieldlength()
{
- switch (ofs_sz) {
- case 1: return uint(*ptr);
- case 2: return uint2korr(ptr);
- case 4: return uint4korr(ptr);
- }
- return 0;
+ if (!used_fieldlength)
+ calc_used_field_length(FALSE);
+ return used_fieldlength;
}
-
- /* Set the offset value ofs that takes ofs_sz bytes at the position ptr */
- void store_offset(uint ofs_sz, uchar *ptr, ulong ofs)
+ ulong get_max_used_fieldlength()
{
- switch (ofs_sz) {
- case 1: *ptr= (uchar) ofs; return;
- case 2: int2store(ptr, (uint16) ofs); return;
- case 4: int4store(ptr, (uint32) ofs); return;
- }
+ if (!max_used_fieldlength)
+ calc_used_field_length(TRUE);
+ return max_used_fieldlength;
}
-
- /*
- The total maximal length of the fields stored for a record in the cache.
- For blob fields only the sizes of the blob lengths are taken into account.
- */
- uint length;
-
- /*
- Representation of the executed multi-way join through which all needed
- context can be accessed.
- */
- JOIN *join;
-
- /*
- Cardinality of the range of join tables whose fields can be put into the
- cache. (A table from the range not necessarily contributes to the cache.)
- */
- uint tables;
-
- /*
- The total number of flag and data fields that can appear in a record
- written into the cache. Fields with null values are always skipped
- to save space.
- */
- uint fields;
-
- /*
- The total number of flag fields in a record put into the cache. They are
- used for table null bitmaps, table null row flags, and an optional match
- flag. Flag fields go before other fields in a cache record with the match
- flag field placed always at the very beginning of the record.
- */
- uint flag_fields;
-
- /* The total number of blob fields that are written into the cache */
- uint blobs;
-
- /*
- The total number of fields referenced from field descriptors for other join
- caches. These fields are used to construct key values to access matching
- rows with index lookups. Currently the fields can be referenced only from
- descriptors for bka caches. However they may belong to a cache of any type.
- */
- uint referenced_fields;
-
- /*
- The current number of already created data field descriptors.
- This number can be useful for implementations of the init methods.
- */
- uint data_field_count;
-
- /*
- The current number of already created pointers to the data field
- descriptors. This number can be useful for implementations of
- the init methods.
- */
- uint data_field_ptr_count;
- /*
- Array of the descriptors of fields containing 'fields' elements.
- These are all fields that are stored for a record in the cache.
- */
- CACHE_FIELD *field_descr;
-
- /*
- Array of pointers to the blob descriptors that contains 'blobs' elements.
- */
- CACHE_FIELD **blob_ptr;
-
- /*
- This flag indicates that records written into the join buffer contain
- a match flag field. The flag must be set by the init method.
- */
- bool with_match_flag;
- /*
- This flag indicates that any record is prepended with the length of the
- record which allows us to skip the record or part of it without reading.
- */
- bool with_length;
-
- /*
- The maximal number of bytes used for a record representation in
- the cache excluding the space for blob data.
- For future derived classes this representation may contains some
- redundant info such as a key value associated with the record.
- */
- uint pack_length;
- /*
- The value of pack_length incremented by the total size of all
- pointers of a record in the cache to the blob data.
- */
- uint pack_length_with_blob_ptrs;
-
- /* Pointer to the beginning of the join buffer */
- uchar *buff;
- /*
- Size of the entire memory allocated for the join buffer.
- Part of this memory may be reserved for the auxiliary buffer.
- */
- ulong buff_size;
- /* Size of the auxiliary buffer. */
- ulong aux_buff_size;
-
- /* The number of records put into the join buffer */
- uint records;
-
- /*
- Pointer to the current position in the join buffer.
- This member is used both when writing to buffer and
- when reading from it.
- */
- uchar *pos;
- /*
- Pointer to the first free position in the join buffer,
- right after the last record into it.
- */
- uchar *end_pos;
-
- /*
- Pointer to the beginning of first field of the current read/write record
- from the join buffer. The value is adjusted by the get_record/put_record
- functions.
- */
- uchar *curr_rec_pos;
- /*
- Pointer to the beginning of first field of the last record
- from the join buffer.
- */
- uchar *last_rec_pos;
-
- /*
- Flag is set if the blob data for the last record in the join buffer
- is in record buffers rather than in the join cache.
- */
- bool last_rec_blob_data_is_in_rec_buff;
-
- /*
- Pointer to the position to the current record link.
- Record links are used only with linked caches. Record links allow to set
- connections between parts of one join record that are stored in different
- join buffers.
- In the simplest case a record link is just a pointer to the beginning of
- the record stored in the buffer.
- In a more general case a link could be a reference to an array of pointers
- to records in the buffer. */
- uchar *curr_rec_link;
-
- void calc_record_fields();
- int alloc_fields(uint external_fields);
- void create_flag_fields();
- void create_remaining_fields(bool all_read_fields);
- void set_constants();
- int alloc_buffer();
-
- uint get_size_of_rec_offset() { return size_of_rec_ofs; }
- uint get_size_of_rec_length() { return size_of_rec_len; }
- uint get_size_of_fld_offset() { return size_of_fld_ofs; }
-
- uchar *get_rec_ref(uchar *ptr)
- {
- return buff+get_offset(size_of_rec_ofs, ptr-size_of_rec_ofs);
- }
- ulong get_rec_length(uchar *ptr)
- {
- return (ulong) get_offset(size_of_rec_len, ptr);
- }
- ulong get_fld_offset(uchar *ptr)
- {
- return (ulong) get_offset(size_of_fld_ofs, ptr);
- }
-
- void store_rec_ref(uchar *ptr, uchar* ref)
- {
- store_offset(size_of_rec_ofs, ptr-size_of_rec_ofs, (ulong) (ref-buff));
- }
-
- void store_rec_length(uchar *ptr, ulong len)
- {
- store_offset(size_of_rec_len, ptr, len);
- }
- void store_fld_offset(uchar *ptr, ulong ofs)
- {
- store_offset(size_of_fld_ofs, ptr, ofs);
- }
-
- /* Write record fields and their required offsets into the join buffer */
- uint write_record_data(uchar *link, bool *is_full);
-
- /*
- This method must determine for how much the auxiliary buffer should be
- incremented when a new record is added to the join buffer.
- If no auxiliary buffer is needed the function should return 0.
- */
- virtual uint aux_buffer_incr() { return 0; }
-
- /* Shall calculate how much space is remaining in the join buffer */
- virtual ulong rem_space()
- {
- return max(buff_size-(end_pos-buff)-aux_buff_size,0);
- }
-
- /* Shall skip record from the join buffer if its match flag is on */
- virtual bool skip_record_if_match();
-
- /* Read all flag and data fields of a record from the join buffer */
- uint read_all_record_fields();
-
- /* Read all flag fields of a record from the join buffer */
- uint read_flag_fields();
-
- /* Read a data record field from the join buffer */
- uint read_record_field(CACHE_FIELD *copy, bool last_record);
-
- /* Read a referenced field from the join buffer */
- bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len);
-
- /*
- True if rec_ptr points to the record whose blob data stay in
- record buffers
- */
- bool blob_data_is_in_rec_buff(uchar *rec_ptr)
- {
- return rec_ptr == last_rec_pos && last_rec_blob_data_is_in_rec_buff;
- }
-
- /* Find matches from the next table for records from the join buffer */
- virtual enum_nested_loop_state join_matching_records(bool skip_last)=0;
-
- /* Add null complements for unmatched outer records from buffer */
- virtual enum_nested_loop_state join_null_complements(bool skip_last);
-
- /* Restore the fields of the last record from the join buffer */
- virtual void restore_last_record();
-
- /*Set match flag for a record in join buffer if it has not been set yet */
- bool set_match_flag_if_none(JOIN_TAB *first_inner, uchar *rec_ptr);
-
- enum_nested_loop_state generate_full_extensions(uchar *rec_ptr);
-
- /* Check matching to a partial join record from the join buffer */
- bool check_match(uchar *rec_ptr);
-
-public:
-
- /* Table to be joined with the partial join records from the cache */
- JOIN_TAB *join_tab;
-
- /* Pointer to the previous join cache if there is any */
- JOIN_CACHE *prev_cache;
- /* Pointer to the next join cache if there is any */
- JOIN_CACHE *next_cache;
-
- /* Shall initialize the join cache structure */
- virtual int init()=0;
-
- /* The function shall return TRUE only for BKA caches */
- virtual bool is_key_access() { return FALSE; }
-
- /* Shall reset the join buffer for reading/writing */
- virtual void reset(bool for_writing);
-
- /*
- This function shall add a record into the join buffer and return TRUE
- if it has been decided that it should be the last record in the buffer.
- */
- virtual bool put_record();
-
- /*
- This function shall read the next record into the join buffer and return
- TRUE if there is no more next records.
- */
- virtual bool get_record();
-
- /*
- This function shall read the record at the position rec_ptr
- in the join buffer
- */
- virtual void get_record_by_pos(uchar *rec_ptr);
-
- /* Shall return the value of the match flag for the positioned record */
- virtual bool get_match_flag_by_pos(uchar *rec_ptr);
-
- /* Shall return the position of the current record */
- virtual uchar *get_curr_rec() { return curr_rec_pos; }
-
- /* Shall set the current record link */
- virtual void set_curr_rec_link(uchar *link) { curr_rec_link= link; }
-
- /* Shall return the current record link */
- virtual uchar *get_curr_rec_link()
- {
- return (curr_rec_link ? curr_rec_link : get_curr_rec());
- }
-
- /* Join records from the join buffer with records from the next join table */
- enum_nested_loop_state join_records(bool skip_last);
-
- virtual ~JOIN_CACHE() {}
- void reset_join(JOIN *j) { join= j; }
- void free()
- {
- x_free(buff);
- buff= 0;
- }
-
- friend class JOIN_CACHE_BNL;
- friend class JOIN_CACHE_BKA;
- friend class JOIN_CACHE_BKA_UNIQUE;
-};
-
-
-class JOIN_CACHE_BNL :public JOIN_CACHE
-{
-
-protected:
-
- /* Using BNL find matches from the next table for records from join buffer */
- enum_nested_loop_state join_matching_records(bool skip_last);
-
-public:
-
- /*
- This constructor creates an unlinked BNL join cache. The cache is to be
- used to join table 'tab' to the result of joining the previous tables
- specified by the 'j' parameter.
- */
- JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab)
- {
- join= j;
- join_tab= tab;
- prev_cache= next_cache= 0;
- }
-
- /*
- This constructor creates a linked BNL join cache. The cache is to be
- used to join table 'tab' to the result of joining the previous tables
- specified by the 'j' parameter. The parameter 'prev' specifies the previous
- cache object to which this cache is linked.
- */
- JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)
- {
- join= j;
- join_tab= tab;
- prev_cache= prev;
- next_cache= 0;
- if (prev)
- prev->next_cache= this;
- }
-
- /* Initialize the BNL cache */
- int init();
-
-};
-
-class JOIN_CACHE_BKA :public JOIN_CACHE
-{
-protected:
-
- /* Flag to to be passed to the MRR interface */
- uint mrr_mode;
-
- /* MRR buffer assotiated with this join cache */
- HANDLER_BUFFER mrr_buff;
-
- /* Shall initialize the MRR buffer */
- virtual void init_mrr_buff()
- {
- mrr_buff.buffer= end_pos;
- mrr_buff.buffer_end= buff+buff_size;
- }
-
- /*
- The number of the cache fields that are used in building keys to access
- the table join_tab
- */
- uint local_key_arg_fields;
- /*
- The total number of the fields in the previous caches that are used
- in building keys t access the table join_tab
- */
- uint external_key_arg_fields;
-
- /*
- This flag indicates that the key values will be read directly from the join
- buffer. It will save us building key values in the key buffer.
- */
- bool use_emb_key;
- /* The length of an embedded key value */
- uint emb_key_length;
-
- /* Check the possibility to read the access keys directly from join buffer */
- bool check_emb_key_usage();
-
- /* Calculate the increment of the MM buffer for a record write */
- uint aux_buffer_incr();
-
- /* Using BKA find matches from the next table for records from join buffer */
- enum_nested_loop_state join_matching_records(bool skip_last);
-
- /* Prepare to search for records that match records from the join buffer */
- enum_nested_loop_state init_join_matching_records(RANGE_SEQ_IF *seq_funcs,
- uint ranges);
-
- /* Finish searching for records that match records from the join buffer */
- enum_nested_loop_state end_join_matching_records(enum_nested_loop_state rc);
-
-public:
-
- /*
- This constructor creates an unlinked BKA join cache. The cache is to be
- used to join table 'tab' to the result of joining the previous tables
- specified by the 'j' parameter.
- The MRR mode initially is set to 'flags'.
- */
- JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags)
- {
- join= j;
- join_tab= tab;
- prev_cache= next_cache= 0;
- mrr_mode= flags;
- }
-
- /*
- This constructor creates a linked BKA join cache. The cache is to be
- used to join table 'tab' to the result of joining the previous tables
- specified by the 'j' parameter. The parameter 'prev' specifies the cache
- object to which this cache is linked.
- The MRR mode initially is set to 'flags'.
- */
- JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE* prev)
- {
- join= j;
- join_tab= tab;
- prev_cache= prev;
- next_cache= 0;
- if (prev)
- prev->next_cache= this;
- mrr_mode= flags;
- }
-
- /* Initialize the BKA cache */
- int init();
-
- bool is_key_access() { return TRUE; }
-
- /* Shall get the key built over the next record from the join buffer */
- virtual uint get_next_key(uchar **key);
-
- /* Check if the record combination matches the index condition */
- bool skip_index_tuple(range_seq_t rseq, char *range_info);
-};
-
-/*
- The class JOIN_CACHE_BKA_UNIQUE supports the variant of the BKA join algorithm
- that submits only distinct keys to the MRR interface. The records in the join
- buffer of a cache of this class that have the same access key are linked into
- a chain attached to a key entry structure that either itself contains the key
- value, or, in the case when the keys are embedded, refers to its occurance in
- one of the records from the chain.
- To build the chains with the same keys a hash table is employed. It is placed
- at the very end of the join buffer. The array of hash entries is allocated
- first at the very bottom of the join buffer, then go key entries. A hash entry
- contains a header of the list of the key entries with the same hash value.
- Each key entry is a structure of the following type:
- struct st_join_cache_key_entry {
- union {
- uchar[] value;
- cache_ref *value_ref; // offset from the beginning of the buffer
- } hash_table_key;
- key_ref next_key; // offset backward from the beginning of hash table
- cache_ref *last_rec // offset from the beginning of the buffer
- }
- The references linking the records in a chain are always placed at the very
- beginning of the record info stored in the join buffer. The records are
- linked in a circular list. A new record is always added to the end of this
- list. When a key is passed to the MRR interface it can be passed either with
- an association link containing a reference to the header of the record chain
- attached to the corresponding key entry in the hash table, or without any
- association link. When the next record is returned by a call to the MRR
- function multi_range_read_next without any association (because if was not
- passed together with the key) then the key value is extracted from the
- returned record and searched for it in the hash table. If there is any records
- with such key the chain of them will be yielded as the result of this search.
-
- The following picture represents a typical layout for the info stored in the
- join buffer of a join cache object of the JOIN_CACHE_BKA_UNIQUE class.
-
- buff
- V
- +----------------------------------------------------------------------------+
- | |[*]record_1_1| |
- | ^ | |
- | | +--------------------------------------------------+ |
- | | |[*]record_2_1| | |
- | | ^ | V |
- | | | +------------------+ |[*]record_1_2| |
- | | +--------------------+-+ | |
- |+--+ +---------------------+ | | +-------------+ |
- || | | V | | |
- |||[*]record_3_1| |[*]record_1_3| |[*]record_2_2| | |
- ||^ ^ ^ | |
- ||+----------+ | | | |
- ||^ | |<---------------------------+-------------------+ |
- |++ | | ... mrr | buffer ... ... | | |
- | | | | |
- | +-----+--------+ | +-----|-------+ |
- | V | | | V | | |
- ||key_3|[/]|[*]| | | |key_2|[/]|[*]| | |
- | +-+---|-----------------------+ | |
- | V | | | | |
- | |key_1|[*]|[*]| | | ... |[*]| ... |[*]| ... | |
- +----------------------------------------------------------------------------+
- ^ ^ ^
- | i-th entry j-th entry
- hash table
-
- i-th hash entry:
- circular record chain for key_1:
- record_1_1
- record_1_2
- record_1_3 (points to record_1_1)
- circular record chain for key_3:
- record_3_1 (points to itself)
-
- j-th hash entry:
- circular record chain for key_2:
- record_2_1
- record_2_2 (points to record_2_1)
-
-*/
-
-class JOIN_CACHE_BKA_UNIQUE :public JOIN_CACHE_BKA
-{
-
-private:
-
- /* Size of the offset of a key entry in the hash table */
- uint size_of_key_ofs;
-
- /*
- Length of a key value.
- It is assumed that all key values have the same length.
- */
- uint key_length;
- /*
- Length of the key entry in the hash table.
- A key entry either contains the key value, or it contains a reference
- to the key value if use_emb_key flag is set for the cache.
- */
- uint key_entry_length;
-
- /* The beginning of the hash table in the join buffer */
- uchar *hash_table;
- /* Number of hash entries in the hash table */
- uint hash_entries;
-
- /* Number of key entries in the hash table (number of distinct keys) */
- uint key_entries;
-
- /* The position of the last key entry in the hash table */
- uchar *last_key_entry;
-
- /* The position of the currently retrieved key entry in the hash table */
- uchar *curr_key_entry;
-
- /*
- The offset of the record fields from the beginning of the record
- representation. The record representation starts with a reference to
- the next record in the key record chain followed by the length of
- the trailing record data followed by a reference to the record segment
- in the previous cache, if any, followed by the record fields.
- */
- uint rec_fields_offset;
- /* The offset of the data fields from the beginning of the record fields */
- uint data_fields_offset;
-
- uint get_hash_idx(uchar* key, uint key_len);
-
- void cleanup_hash_table();
-
-protected:
-
- uint get_size_of_key_offset() { return size_of_key_ofs; }
-
- /*
- Get the position of the next_key_ptr field pointed to by
- a linking reference stored at the position key_ref_ptr.
- This reference is actually the offset backward from the
- beginning of hash table.
- */
- uchar *get_next_key_ref(uchar *key_ref_ptr)
- {
- return hash_table-get_offset(size_of_key_ofs, key_ref_ptr);
- }
-
- /*
- Store the linking reference to the next_key_ptr field at
- the position key_ref_ptr. The position of the next_key_ptr
- field is pointed to by ref. The stored reference is actually
- the offset backward from the beginning of the hash table.
- */
- void store_next_key_ref(uchar *key_ref_ptr, uchar *ref)
- {
- store_offset(size_of_key_ofs, key_ref_ptr, (ulong) (hash_table-ref));
- }
-
- /*
- Check whether the reference to the next_key_ptr field at the position
- key_ref_ptr contains a nil value.
- */
- bool is_null_key_ref(uchar *key_ref_ptr)
- {
- ulong nil= 0;
- return memcmp(key_ref_ptr, &nil, size_of_key_ofs ) == 0;
- }
-
- /*
- Set the reference to the next_key_ptr field at the position
- key_ref_ptr equal to nil.
- */
- void store_null_key_ref(uchar *key_ref_ptr)
- {
- ulong nil= 0;
- store_offset(size_of_key_ofs, key_ref_ptr, nil);
- }
-
- uchar *get_next_rec_ref(uchar *ref_ptr)
- {
- return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
- }
-
- void store_next_rec_ref(uchar *ref_ptr, uchar *ref)
- {
- store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
- }
-
- /*
- Get the position of the embedded key value for the current
- record pointed to by get_curr_rec().
- */
- uchar *get_curr_emb_key()
- {
- return get_curr_rec()+data_fields_offset;
- }
-
- /*
- Get the position of the embedded key value pointed to by a reference
- stored at ref_ptr. The stored reference is actually the offset from
- the beginning of the join buffer.
- */
- uchar *get_emb_key(uchar *ref_ptr)
- {
- return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
- }
-
- /*
- Store the reference to an embedded key at the position key_ref_ptr.
- The position of the embedded key is pointed to by ref. The stored
- reference is actually the offset from the beginning of the join buffer.
- */
- void store_emb_key_ref(uchar *ref_ptr, uchar *ref)
- {
- store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
- }
-
- /*
- Calculate how much space in the buffer would not be occupied by
- records, key entries and additional memory for the MMR buffer.
- */
- ulong rem_space()
- {
- return max(last_key_entry-end_pos-aux_buff_size,0);
- }
-
- /*
- Initialize the MRR buffer allocating some space within the join buffer.
- The entire space between the last record put into the join buffer and the
- last key entry added to the hash table is used for the MRR buffer.
- */
- void init_mrr_buff()
- {
- mrr_buff.buffer= end_pos;
- mrr_buff.buffer_end= last_key_entry;
- }
-
- /* Skip record from JOIN_CACHE_BKA_UNIQUE buffer if its match flag is on */
- bool skip_record_if_match();
-
- /* Using BKA_UNIQUE find matches for records from join buffer */
- enum_nested_loop_state join_matching_records(bool skip_last);
-
- /* Search for a key in the hash table of the join buffer */
- bool key_search(uchar *key, uint key_len, uchar **key_ref_ptr);
-
-public:
-
- /*
- This constructor creates an unlinked BKA_UNIQUE join cache. The cache is
- to be used to join table 'tab' to the result of joining the previous tables
- specified by the 'j' parameter.
- The MRR mode initially is set to 'flags'.
- */
- JOIN_CACHE_BKA_UNIQUE(JOIN *j, JOIN_TAB *tab, uint flags)
- :JOIN_CACHE_BKA(j, tab, flags) {}
-
- /*
- This constructor creates a linked BKA_UNIQUE join cache. The cache is
- to be used to join table 'tab' to the result of joining the previous tables
- specified by the 'j' parameter. The parameter 'prev' specifies the cache
- object to which this cache is linked.
- The MRR mode initially is set to 'flags'.
- */
- JOIN_CACHE_BKA_UNIQUE(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE* prev)
- :JOIN_CACHE_BKA(j, tab, flags, prev) {}
-
- /* Initialize the BKA_UNIQUE cache */
- int init();
-
- /* Reset the JOIN_CACHE_BKA_UNIQUE buffer for reading/writing */
- void reset(bool for_writing);
-
- /* Add a record into the JOIN_CACHE_BKA_UNIQUE buffer */
- bool put_record();
-
- /* Read the next record from the JOIN_CACHE_BKA_UNIQUE buffer */
- bool get_record();
-
- /*
- Shall check whether all records in a key chain have
- their match flags set on
- */
- virtual bool check_all_match_flags_for_key(uchar *key_chain_ptr);
+ double get_partial_join_cardinality() { return partial_join_cardinality; }
+ bool hash_join_is_possible();
+ int make_scan_filter();
+} JOIN_TAB;
- uint get_next_key(uchar **key);
-
- /* Get the head of the record chain attached to the current key entry */
- uchar *get_curr_key_chain()
- {
- return get_next_rec_ref(curr_key_entry+key_entry_length-
- get_size_of_rec_offset());
- }
-
- /* Check if the record combination matches the index condition */
- bool skip_index_tuple(range_seq_t rseq, char *range_info);
-};
+#include "sql_join_cache.h"
enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
end_of_records);
@@ -1745,6 +944,10 @@ public:
NULL : join_tab+const_tables;
}
bool setup_subquery_caches();
+ bool shrink_join_buffers(JOIN_TAB *jt,
+ ulonglong curr_space,
+ ulonglong needed_space);
+
private:
/**
TRUE if the query contains an aggregate function but has no GROUP
@@ -1874,6 +1077,15 @@ class store_key_field: public store_key
TABLE *table= copy_field.to_field->table;
my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
table->write_set);
+
+ /*
+ It looks like the next statement is needed only for a simplified
+ hash function over key values used now in BNLH join.
+ When the implementation of this function will be replaced for a proper
+ full version this statement probably should be removed.
+ */
+ bzero(copy_field.to_ptr,copy_field.to_length);
+
copy_field.do_copy(&copy_field);
dbug_tmp_restore_column_map(table->write_set, old_map);
null_key= to_field->is_null();
@@ -1907,6 +1119,15 @@ public:
my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
table->write_set);
int res= FALSE;
+
+ /*
+ It looks like the next statement is needed only for a simplified
+ hash function over key values used now in BNLH join.
+ When the implementation of this function will be replaced for a proper
+ full version this statement probably should be removed.
+ */
+ to_field->reset();
+
if (use_value)
item->save_val(to_field);
else
@@ -1969,7 +1190,6 @@ int report_error(TABLE *table, int error);
int safe_index_read(JOIN_TAB *tab);
COND *remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value);
int test_if_item_cache_changed(List<Cached_item> &list);
-void calc_used_field_length(THD *thd, JOIN_TAB *join_tab);
int join_init_read_record(JOIN_TAB *tab);
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
inline Item * and_items(Item* cond, Item *item)
@@ -1997,7 +1217,8 @@ inline bool optimizer_flag(THD *thd, uint flag)
void eliminate_tables(JOIN *join);
/* Index Condition Pushdown entry point function */
-void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok);
+void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok,
+ bool factor_out);
/****************************************************************************
Temporary table support for SQL Runtime