summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorapostle <vladkakurin007@gmail.com>2022-06-01 02:32:48 +0000
committerapostle <vladkakurin007@gmail.com>2022-06-01 02:32:48 +0000
commit2425c61c57906198af40758d471b019de5fcd157 (patch)
tree0e2b5e18f477d771e9f0e021452419a8f54fa2dd
parente88ce8c538e55739f3d31f1b7faca2913f3d02f7 (diff)
downloadmariadb-git-2425c61c57906198af40758d471b019de5fcd157.tar.gz
add awful but working sampling method selection
-rw-r--r--sql/lex.h1
-rw-r--r--sql/records.cc37
-rw-r--r--sql/records.h1
-rw-r--r--sql/sql_yacc.yy15
-rw-r--r--sql/table.cc2
-rw-r--r--sql/table.h2
6 files changed, 53 insertions, 5 deletions
diff --git a/sql/lex.h b/sql/lex.h
index 55589d888e6..b2ca572481d 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -571,6 +571,7 @@ SYMBOL symbols[] = {
/** sql_function and condition_property_name for GET DIAGNOSTICS */
{ "ROW_NUMBER", SYM(ROW_NUMBER_SYM)},
{ "RTREE", SYM(RTREE_SYM)},
+ {"SAMPLE_METHOD", SYM(SAMPLE_METHOD)},
{ "SAVEPOINT", SYM(SAVEPOINT_SYM)},
{ "SCHEDULE", SYM(SCHEDULE_SYM)},
{ "SCHEMA", SYM(DATABASE)},
diff --git a/sql/records.cc b/sql/records.cc
index 9eadf788585..896ecafe1c0 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -38,6 +38,7 @@
static int rr_quick(READ_RECORD *info);
int rr_sequential(READ_RECORD *info);
int rr_sequential_sample(READ_RECORD *info);
+int rr_full_scan_sample(READ_RECORD *info);
static int rr_from_tempfile(READ_RECORD *info);
template<bool> static int rr_unpack_from_tempfile(READ_RECORD *info);
template<bool,bool> static int rr_unpack_from_buffer(READ_RECORD *info);
@@ -325,10 +326,26 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
else if (table->tablesample)
{
double fract= table->tablesample->val_real() / 100.0;
+ info->sample_factor = fract;
info->sample_counter= (ha_rows)(table->file->records() * fract + 0.5);
- info->read_record_func= rr_sequential_sample;
- if (table->file->ha_sample_init())
- DBUG_RETURN(1);
+ if(table->sample_method_flag && table->sample_method_flag->val_real() > 1.0)
+ {
+ info->read_record_func= rr_full_scan_sample;
+ }
+ else
+ {
+ info->read_record_func= rr_sequential_sample;
+ }
+ if(table->sample_method_flag && table->sample_method_flag->val_real() > 1.0)
+ {
+ if (table->file->ha_rnd_init(TRUE))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ if (table->file->ha_sample_init())
+ DBUG_RETURN(1);
+ }
}
else
{
@@ -548,6 +565,20 @@ int rr_sequential_sample(READ_RECORD *info)
return tmp;
}
+int rr_full_scan_sample(READ_RECORD *info)
+{
+ int tmp;
+ do
+ {
+ tmp= info->table->file->ha_rnd_next(info->record());
+ if(thd_rnd(info->thd) <= info->sample_factor)
+ break;
+ } while (!tmp);
+ if(tmp)
+ tmp= rr_handle_error(info, tmp);
+ return tmp;
+}
+
static int rr_from_tempfile(READ_RECORD *info)
{
diff --git a/sql/records.h b/sql/records.h
index 579e0e2bf3a..ed3b9a84b5a 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -62,6 +62,7 @@ struct READ_RECORD
uint ref_length, reclength, rec_cache_size, error_offset;
ha_rows sample_counter;
+ double sample_factor;
/**
Counting records when reading result from filesort().
Used when filesort leaves the result in the filesort buffer.
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 88df2bc3d7a..0399225ad5f 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -679,6 +679,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
%token <kwd> SYSDATE
%token <kwd> TABLE_REF_PRIORITY
%token <kwd> TABLESAMPLE /* SQL-2016-R */
+%token <kwd> SAMPLE_METHOD
%token <kwd> TABLE_SYM /* SQL-2003-R */
%token <kwd> TERMINATED
%token <kwd> THEN_SYM /* SQL-2003-R */
@@ -1499,7 +1500,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
%type <item_param> param_marker
%type <item_num>
- NUM_literal opt_table_sample
+ NUM_literal opt_table_sample opt_sample_method
%type <item_basic_constant> text_literal
@@ -11710,10 +11711,18 @@ opt_table_sample:
}
;
+opt_sample_method:
+ /* empty */ { $$=0; }
+ | SAMPLE_METHOD '(' NUM_literal ')'
+ {
+ $$=$3;
+ }
+ ;
+
table_primary_ident:
table_ident opt_use_partition opt_for_system_time_clause
opt_table_alias_clause opt_key_definition
- opt_table_sample
+ opt_table_sample opt_sample_method
{
if (!($$= Select->add_table_to_list(thd, $1, $4,
0,
@@ -11726,6 +11735,8 @@ table_primary_ident:
$$->vers_conditions= Lex->vers_conditions;
if ($6)
$$->tablesample= $6;
+ if ($7)
+ $$->sample_method_flag= $7;
}
;
diff --git a/sql/table.cc b/sql/table.cc
index 4504e37dd7f..795a9e0b754 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -5555,6 +5555,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
no_cache= false;
initialize_opt_range_structures();
tablesample= NULL;
+ sample_method_flag= NULL;
#ifdef HAVE_REPLICATION
/* used in RBR Triggers */
master_had_triggers= 0;
@@ -8546,6 +8547,7 @@ bool TABLE_LIST::process_table_sample(TABLE *tbl)
tbl->keys_in_use_for_order_by.clear_all();
tbl->covering_keys.clear_all();
tbl->tablesample= tablesample;
+ tbl->sample_method_flag= sample_method_flag;
}
return false;
}
diff --git a/sql/table.h b/sql/table.h
index 5c0e7f9b51a..266249fd9aa 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -1498,6 +1498,7 @@ public:
bool master_had_triggers;
#endif
Item *tablesample;
+ Item *sample_method_flag; /* rand_sample if>=1.0, full_scan_sample another*/
REGINFO reginfo; /* field connections */
MEM_ROOT mem_root;
/**
@@ -2223,6 +2224,7 @@ struct TABLE_LIST
for_insert_data= insert_data;
}
Item *tablesample;
+ Item *sample_method_flag;
/*
List of tables local to a subquery (used by SQL_I_List). Considers