diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2019-08-13 18:57:00 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2019-08-13 18:57:00 +0300 |
commit | 624dd71b9419555eca8baadc695e3376de72286f (patch) | |
tree | 31aaab8aeac43f921638407ab82190dd05a72793 /storage | |
parent | d4d865fcc8083782b6e4419c69bec372cd0b4142 (diff) | |
parent | e9c1701e11e2441435223cc7c00c467f58aaff19 (diff) | |
download | mariadb-git-624dd71b9419555eca8baadc695e3376de72286f.tar.gz |
Merge 10.4 into 10.5
Diffstat (limited to 'storage')
459 files changed, 25981 insertions, 8384 deletions
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index b62f78ad318..0b352589fe3 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -370,7 +370,7 @@ int Archive_share::write_v1_metafile() @return Length of packed row */ -unsigned int ha_archive::pack_row_v1(uchar *record) +unsigned int ha_archive::pack_row_v1(const uchar *record) { uint *blob, *end; uchar *pos; @@ -867,7 +867,7 @@ error: /* This is where the actual row is written out. */ -int ha_archive::real_write_row(uchar *buf, azio_stream *writer) +int ha_archive::real_write_row(const uchar *buf, azio_stream *writer) { my_off_t written; unsigned int r_pack_length; @@ -916,7 +916,7 @@ uint32 ha_archive::max_row_length(const uchar *record) } -unsigned int ha_archive::pack_row(uchar *record, azio_stream *writer) +unsigned int ha_archive::pack_row(const uchar *record, azio_stream *writer) { uchar *ptr; my_ptrdiff_t const rec_offset= record - table->record[0]; @@ -958,7 +958,7 @@ unsigned int ha_archive::pack_row(uchar *record, azio_stream *writer) for implementing start_bulk_insert() is that we could skip setting dirty to true each time. */ -int ha_archive::write_row(uchar *buf) +int ha_archive::write_row(const uchar *buf) { int rc; uchar *read_buf= NULL; diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h index 043eab2670a..b9fcf10f96f 100644 --- a/storage/archive/ha_archive.h +++ b/storage/archive/ha_archive.h @@ -95,7 +95,7 @@ class ha_archive: public handler void destroy_record_buffer(archive_record_buffer *r); int frm_copy(azio_stream *src, azio_stream *dst); int frm_compare(azio_stream *src); - unsigned int pack_row_v1(uchar *record); + unsigned int pack_row_v1(const uchar *record); public: ha_archive(handlerton *hton, TABLE_SHARE *table_arg); @@ -131,8 +131,8 @@ public: int index_next(uchar * buf); int open(const char *name, int mode, uint test_if_locked); int close(void); - int write_row(uchar * buf); - int real_write_row(uchar *buf, azio_stream *writer); + int write_row(const uchar * buf); + int real_write_row(const uchar *buf, azio_stream *writer); int truncate(); int rnd_init(bool scan=1); int rnd_next(uchar *buf); @@ -168,7 +168,7 @@ public: uint32 max_row_length(const uchar *buf); bool fix_rec_buff(unsigned int length); int unpack_row(azio_stream *file_to_read, uchar *record); - unsigned int pack_row(uchar *record, azio_stream *writer); + unsigned int pack_row(const uchar *record, azio_stream *writer); bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); int external_lock(THD *thd, int lock_type); private: diff --git a/storage/blackhole/ha_blackhole.cc b/storage/blackhole/ha_blackhole.cc index a57c667c20a..c7803003398 100644 --- a/storage/blackhole/ha_blackhole.cc +++ b/storage/blackhole/ha_blackhole.cc @@ -117,7 +117,7 @@ const char *ha_blackhole::index_type(uint key_number) HA_KEY_ALG_RTREE) ? "RTREE" : "BTREE"); } -int ha_blackhole::write_row(uchar * buf) +int ha_blackhole::write_row(const uchar * buf) { DBUG_ENTER("ha_blackhole::write_row"); DBUG_RETURN(table->next_number_field ? update_auto_increment() : 0); diff --git a/storage/blackhole/ha_blackhole.h b/storage/blackhole/ha_blackhole.h index 0632c87c306..6ee30877b64 100644 --- a/storage/blackhole/ha_blackhole.h +++ b/storage/blackhole/ha_blackhole.h @@ -96,7 +96,7 @@ public: THR_LOCK_DATA **to, enum thr_lock_type lock_type); private: - virtual int write_row(uchar *buf); + virtual int write_row(const uchar *buf); virtual int update_row(const uchar *old_data, const uchar *new_data); virtual int delete_row(const uchar *buf); }; diff --git a/storage/cassandra/ha_cassandra.cc b/storage/cassandra/ha_cassandra.cc index e0127ba752a..410150b088f 100644 --- a/storage/cassandra/ha_cassandra.cc +++ b/storage/cassandra/ha_cassandra.cc @@ -1930,7 +1930,7 @@ void ha_cassandra::free_dynamic_row(DYNAMIC_COLUMN_VALUE **vals, *names= 0; } -int ha_cassandra::write_row(uchar *buf) +int ha_cassandra::write_row(const uchar *buf) { my_bitmap_map *old_map; int ires; diff --git a/storage/cassandra/ha_cassandra.h b/storage/cassandra/ha_cassandra.h index 9a44abc985f..a36d58fa4da 100644 --- a/storage/cassandra/ha_cassandra.h +++ b/storage/cassandra/ha_cassandra.h @@ -239,7 +239,7 @@ public: int open(const char *name, int mode, uint test_if_locked); int close(void); - int write_row(uchar *buf); + int write_row(const uchar *buf); int update_row(const uchar *old_data, const uchar *new_data); int delete_row(const uchar *buf); diff --git a/storage/connect/CMakeLists.txt b/storage/connect/CMakeLists.txt index 68ed0a4490c..feef8db70ec 100644 --- a/storage/connect/CMakeLists.txt +++ b/storage/connect/CMakeLists.txt @@ -118,7 +118,6 @@ IF(CONNECT_WITH_LIBXML2) FIND_PACKAGE(LibXml2) IF (LIBXML2_FOUND) INCLUDE_DIRECTORIES(${LIBXML2_INCLUDE_DIR}) - SET(ZLIB_LIBRARY "z") # see ZLIB_INCLUDE_DIR below SET(XML_LIBRARY ${LIBXML2_LIBRARIES}) SET(CONNECT_SOURCES ${CONNECT_SOURCES} libdoc.cpp libdoc.h) add_definitions(-DLIBXML2_SUPPORT) @@ -346,15 +345,6 @@ IF(MSVC AND (CMAKE_CXX_FLAGS MATCHES "/MP")) ENDIF() ENDIF() - -# Don't link with bundled zlib and systel libxml2 at the same time. -# System libxml2 uses system zlib, might conflict with the bundled one. -IF (XML_LIBRARY AND BUILD_BUNDLED_ZLIB) - GET_PROPERTY(INCS TARGET connect PROPERTY INCLUDE_DIRECTORIES) - LIST(REMOVE_ITEM INCS ${ZLIB_INCLUDE_DIR}) - SET_PROPERTY(TARGET connect PROPERTY INCLUDE_DIRECTORIES ${INCS}) -ENDIF() - IF(WIN32) IF (libmongoc-1.0_FOUND) SET_TARGET_PROPERTIES(connect PROPERTIES LINK_FLAGS diff --git a/storage/connect/array.cpp b/storage/connect/array.cpp index 0bf31fdb5fa..972a1e72403 100644 --- a/storage/connect/array.cpp +++ b/storage/connect/array.cpp @@ -457,7 +457,7 @@ char *ARRAY::GetStringValue(int n) /***********************************************************************/ bool ARRAY::Find(PVAL valp) { - register int n; + int n; PVAL vp; if (Type != valp->GetType()) { @@ -1067,7 +1067,7 @@ MULAR::MULAR(PGLOBAL g, int n) : CSORT(false) /***********************************************************************/ int MULAR::Qcompare(int *i1, int *i2) { - register int i, n = 0; + int i, n = 0; for (i = 0; i < Narray; i++) if ((n = Pars[i]->Qcompare(i1, i2))) diff --git a/storage/connect/csort.cpp b/storage/connect/csort.cpp index 670131b8fd2..1e4ba674e23 100644 --- a/storage/connect/csort.cpp +++ b/storage/connect/csort.cpp @@ -178,9 +178,9 @@ void CSORT::DebugSort(int ph, int n, int *base, int *mid, int *tmp) /***********************************************************************/ int CSORT::Qsortx(void) { - register int c; - register int lo, hi, min; - register int i, j, rc = 0; + int c; + int lo, hi, min; + int i, j, rc = 0; // To do: rc should be checked for being used uninitialized int *top; #ifdef DEBTRACE @@ -344,7 +344,7 @@ int CSORT::Qsortx(void) /***********************************************************************/ void CSORT::Qstx(int *base, int *max) { - register int *i, *j, *jj, *mid, *him, c; + int *i, *j, *jj, *mid, *him, c; int *tmp; int lo, hi, rc; size_t zlo, zhi, cnm; @@ -543,9 +543,9 @@ void CSORT::Qstx(int *base, int *max) /***********************************************************************/ int CSORT::Qsortc(void) { - register int c; - register int lo, hi, min; - register int i, j, k, m, rc = 0; + int c; + int lo, hi, min; + int i, j, k, m, rc = 0; // To do: rc should be checked for being used uninitialized int *max; #ifdef DEBTRACE @@ -720,7 +720,7 @@ int CSORT::Qsortc(void) /***********************************************************************/ void CSORT::Qstc(int *base, int *max) { - register int *i, *j, *jj, *lt, *eq, *gt, *mid; + int *i, *j, *jj, *lt, *eq, *gt, *mid; int c = 0, lo, hi, rc; size_t zlo, zhi, cnm; @@ -907,9 +907,9 @@ void CSORT::Qstc(int *base, int *max) /***********************************************************************/ void CSORT::Istc(int *base, int *hi, int *max) { - register int c = 0; - register int *lo; - register int *i, *j; + int c = 0; + int *lo; + int *i, *j; /*********************************************************************/ /* First put smallest element, which must be in the first THRESH, */ diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc index 1916dd28422..4bf980044e3 100644 --- a/storage/connect/ha_connect.cc +++ b/storage/connect/ha_connect.cc @@ -1314,7 +1314,7 @@ char *ha_connect::GetRealString(PCSZ s) { char *sv; - if (IsPartitioned() && s && partname && *partname) { + if (IsPartitioned() && s && *partname) { sv= (char*)PlugSubAlloc(xp->g, NULL, 0); sprintf(sv, s, partname); PlugSubAlloc(xp->g, NULL, strlen(sv) + 1); @@ -3065,7 +3065,7 @@ PCFIL ha_connect::CheckCond(PGLOBAL g, PCFIL filp, const Item *cond) strncat(s, res->ptr(), res->length()); if (res->length() < 19) - strcat(s, "1970-01-01 00:00:00" + res->length()); + strcat(s, &"1970-01-01 00:00:00"[res->length()]); strcat(s, "'}"); break; @@ -3096,7 +3096,7 @@ PCFIL ha_connect::CheckCond(PGLOBAL g, PCFIL filp, const Item *cond) strncat(s, res->ptr(), res->length()); if (res->length() < 19) - strcat(s, "1970-01-01 00:00:00" + res->length()); + strcat(s, &"1970-01-01 00:00:00"[res->length()]); strcat(s, "'}"); break; @@ -3575,7 +3575,7 @@ int ha_connect::close(void) item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc, sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc and sql_update.cc */ -int ha_connect::write_row(uchar *buf) +int ha_connect::write_row(const uchar *buf) { int rc= 0; PGLOBAL& g= xp->g; diff --git a/storage/connect/ha_connect.h b/storage/connect/ha_connect.h index 5a1dcb1a88f..53e666d534d 100644 --- a/storage/connect/ha_connect.h +++ b/storage/connect/ha_connect.h @@ -388,7 +388,7 @@ virtual int check(THD* thd, HA_CHECK_OPT* check_opt); We implement this in ha_connect.cc. It's not an obligatory method; skip it and and MySQL will treat it as not implemented. */ - int write_row(uchar *buf); + int write_row(const uchar *buf); /** @brief We implement this in ha_connect.cc. It's not an obligatory method; diff --git a/storage/connect/odbconn.cpp b/storage/connect/odbconn.cpp index c8584224d7d..18d64677773 100644 --- a/storage/connect/odbconn.cpp +++ b/storage/connect/odbconn.cpp @@ -2277,7 +2277,7 @@ int ODBConn::GetCatInfo(CATPARM *cap) int i, irc; bool b; PCSZ fnc = "Unknown"; - UWORD n; + UWORD n = 0; SWORD ncol, len, tp; SQLULEN crow = 0; PQRYRES qrp = cap->Qrp; diff --git a/storage/connect/valblk.cpp b/storage/connect/valblk.cpp index 73ca135691c..5179fa654cf 100644 --- a/storage/connect/valblk.cpp +++ b/storage/connect/valblk.cpp @@ -511,7 +511,7 @@ void TYPBLK<TYPE>::SetValues(PVBLK pv, int k, int n) CheckType(pv) TYPE *lp = ((TYPBLK*)pv)->Typp; - for (register int i = k; i < n; i++) // TODO + for (int i = k; i < n; i++) // TODO Typp[i] = lp[i]; } // end of SetValues @@ -805,7 +805,7 @@ void CHRBLK::SetValue(const char *sp, uint len, int n) if (Blanks) { // Suppress eventual ending zero and right fill with blanks - for (register int i = len; i < Long; i++) + for (int i = len; i < Long; i++) p[i] = ' '; } else if ((signed)len < Long) diff --git a/storage/connect/xindex.cpp b/storage/connect/xindex.cpp index 0689304cc82..137e29ab9dd 100644 --- a/storage/connect/xindex.cpp +++ b/storage/connect/xindex.cpp @@ -272,8 +272,8 @@ void XINDEX::Close(void) /***********************************************************************/ int XINDEX::Qcompare(int *i1, int *i2) { - register int k; - register PXCOL kcp; + int k; + PXCOL kcp; for (kcp = To_KeyCol, k = 0; kcp; kcp = kcp->Next) if ((k = kcp->Compare(*i1, *i2))) @@ -746,7 +746,7 @@ int XINDEX::ColMaxSame(PXCOL kp) /***********************************************************************/ bool XINDEX::Reorder(PGLOBAL g __attribute__((unused))) { - register int i, j, k, n; + int i, j, k, n; bool sorted = true; PXCOL kcp; #if 0 @@ -1870,8 +1870,8 @@ int XINDEX::Fetch(PGLOBAL g) /***********************************************************************/ int XINDEX::FastFind(void) { - register int curk, sup, inf, i= 0, k, n = 2; - register PXCOL kp, kcp; + int curk, sup, inf, i= 0, k, n = 2; + PXCOL kp, kcp; //assert((int)nv == Nval); @@ -2209,8 +2209,8 @@ int XINDXS::Fetch(PGLOBAL g) /***********************************************************************/ int XINDXS::FastFind(void) { - register int sup, inf, i= 0, n = 2; - register PXCOL kcp = To_KeyCol; + int sup, inf, i= 0, n = 2; + PXCOL kcp = To_KeyCol; if (Nblk && Op == OP_EQ) { // Look in block values to find in which block to search @@ -3235,7 +3235,7 @@ void KXYCOL::FillValue(PVAL valp) int KXYCOL::Compare(int i1, int i2) { // Do the actual comparison between values. - register int k = Kblp->CompVal(i1, i2); + int k = Kblp->CompVal(i1, i2); if (trace(4)) htrc("Compare done result=%d\n", k); @@ -3250,7 +3250,7 @@ int KXYCOL::CompVal(int i) { // Do the actual comparison between numerical values. if (trace(4)) { - register int k = (int)Kblp->CompVal(Valp, (int)i); + int k = (int)Kblp->CompVal(Valp, (int)i); htrc("Compare done result=%d\n", k); return k; diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index 8a27ea19b3d..e67a43a3c1b 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -1003,7 +1003,7 @@ int ha_tina::close(void) of the file and appends the data. In an error case it really should just truncate to the original position (this is not done yet). */ -int ha_tina::write_row(uchar * buf) +int ha_tina::write_row(const uchar * buf) { int size; DBUG_ENTER("ha_tina::write_row"); diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h index efb161a714e..aae535c271e 100644 --- a/storage/csv/ha_tina.h +++ b/storage/csv/ha_tina.h @@ -136,7 +136,7 @@ public: int open(const char *name, int mode, uint open_options); int close(void); - int write_row(uchar * buf); + int write_row(const uchar * buf); int update_row(const uchar * old_data, const uchar * new_data); int delete_row(const uchar * buf); int rnd_init(bool scan=1); diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc index 477aacd666e..818081518f5 100644 --- a/storage/example/ha_example.cc +++ b/storage/example/ha_example.cc @@ -397,7 +397,7 @@ int ha_example::close(void) sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc and sql_update.cc */ -int ha_example::write_row(uchar *buf) +int ha_example::write_row(const uchar *buf) { DBUG_ENTER("ha_example::write_row"); /* diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h index 52fb4ff3c1b..0a08e871461 100644 --- a/storage/example/ha_example.h +++ b/storage/example/ha_example.h @@ -180,7 +180,7 @@ public: We implement this in ha_example.cc. It's not an obligatory method; skip it and and MySQL will treat it as not implemented. */ - int write_row(uchar *buf); + int write_row(const uchar *buf); /** @brief We implement this in ha_example.cc. It's not an obligatory method; diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index 81367f0a80f..a479d14ab6c 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -1824,7 +1824,7 @@ bool ha_federated::append_stmt_insert(String *query) sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. */ -int ha_federated::write_row(uchar *buf) +int ha_federated::write_row(const uchar *buf) { char values_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char insert_field_value_buffer[STRING_BUFFER_USUAL_SIZE]; diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h index fc379707703..3beb2ca9570 100644 --- a/storage/federated/ha_federated.h +++ b/storage/federated/ha_federated.h @@ -209,7 +209,7 @@ public: void start_bulk_insert(ha_rows rows, uint flags); int end_bulk_insert(); - int write_row(uchar *buf); + int write_row(const uchar *buf); int update_row(const uchar *old_data, const uchar *new_data); int delete_row(const uchar *buf); int index_init(uint keynr, bool sorted); diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc index 7a83440cc19..1dd4aacee49 100644 --- a/storage/federatedx/ha_federatedx.cc +++ b/storage/federatedx/ha_federatedx.cc @@ -1988,7 +1988,7 @@ bool ha_federatedx::append_stmt_insert(String *query) sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. */ -int ha_federatedx::write_row(uchar *buf) +int ha_federatedx::write_row(const uchar *buf) { char values_buffer[FEDERATEDX_QUERY_BUFFER_SIZE]; char insert_field_value_buffer[STRING_BUFFER_USUAL_SIZE]; diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h index 67aa83f7b33..1870a83d13d 100644 --- a/storage/federatedx/ha_federatedx.h +++ b/storage/federatedx/ha_federatedx.h @@ -397,7 +397,7 @@ public: void start_bulk_insert(ha_rows rows, uint flags); int end_bulk_insert(); - int write_row(uchar *buf); + int write_row(const uchar *buf); int update_row(const uchar *old_data, const uchar *new_data); int delete_row(const uchar *buf); int index_init(uint keynr, bool sorted); diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index 846c9322090..b03c9dfd002 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -227,7 +227,7 @@ void ha_heap::update_key_stats() } -int ha_heap::write_row(uchar * buf) +int ha_heap::write_row(const uchar * buf) { int res; if (table->next_number_field && buf == table->record[0]) diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h index 0d2a7a3a442..3440c8fd205 100644 --- a/storage/heap/ha_heap.h +++ b/storage/heap/ha_heap.h @@ -70,7 +70,7 @@ public: int open(const char *name, int mode, uint test_if_locked); int close(void); void set_keys_for_scanning(void); - int write_row(uchar * buf); + int write_row(const uchar * buf); int update_row(const uchar * old_data, const uchar * new_data); int delete_row(const uchar * buf); virtual void get_auto_increment(ulonglong offset, ulonglong increment, diff --git a/storage/innobase/.clang-format b/storage/innobase/.clang-format deleted file mode 100644 index f7a72f3cf24..00000000000 --- a/storage/innobase/.clang-format +++ /dev/null @@ -1,10 +0,0 @@ -UseTab: Always -TabWidth: 8 -IndentWidth: 8 -BreakBeforeBinaryOperators: All -PointerAlignment: Left -AlwaysBreakAfterReturnType: TopLevel -BreakBeforeBraces: Custom -BraceWrapping: - AfterFunction: true -AccessModifierOffset: -8 diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index dfa6e032176..cbd280af223 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -144,8 +144,7 @@ SET(INNOBASE_SOURCES ut/ut0rnd.cc ut/ut0ut.cc ut/ut0vec.cc - ut/ut0wqueue.cc - ut/ut0timer.cc) + ut/ut0wqueue.cc) MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE MODULE_OUTPUT_NAME ha_innodb diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 01a638ed764..67e0bfdd8b9 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -3491,7 +3491,8 @@ btr_lift_page_up( /* btr_page_empty() is supposed to zero-initialize the field. */ ut_ad(!page_get_instant(father_block->frame)); - if (page_level == 0 && index->is_instant()) { + if (index->is_instant() + && father_block->page.id.page_no() == root_page_no) { ut_ad(!father_page_zip); btr_set_instant(father_block, *index, mtr); } diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index c5a50f24b83..2c1cdb37a38 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -4904,6 +4904,7 @@ btr_cur_pessimistic_update( itself. Thus the following call is safe. */ row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, entry_heap); + const ulint n = new_entry->n_fields; btr_cur_trim(new_entry, index, update, thr); /* We have to set appropriate extern storage bits in the new @@ -4911,7 +4912,10 @@ btr_cur_pessimistic_update( ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec)); ut_ad(rec_offs_validate(rec, index, *offsets)); - n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap); + if (index->is_primary()) { + n_ext += btr_push_update_extern_fields( + new_entry, n, update, entry_heap); + } if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) { @@ -4926,7 +4930,7 @@ btr_cur_pessimistic_update( ut_ad(dict_index_is_clust(index)); ut_ad(thr_get_trx(thr)->in_rollback); - DBUG_EXECUTE_IF("ib_blob_update_rollback", DBUG_SUICIDE();); + DEBUG_SYNC_C("blob_rollback_middle"); btr_rec_free_updated_extern_fields( index, rec, page_zip, *offsets, update, true, mtr); @@ -7246,29 +7250,33 @@ btr_cur_unmark_extern_fields( } } -/*******************************************************************//** -Flags the data tuple fields that are marked as extern storage in the +/** Flag the data tuple fields that are marked as extern storage in the update vector. We use this function to remember which fields we must mark as extern storage in a record inserted for an update. +@param[in,out] tuple clustered index record +@param[in] n number of fields in tuple, before any btr_cur_trim() +@param[in] update update vector +@param[in,out] heap memory heap @return number of flagged external columns */ ulint -btr_push_update_extern_fields( -/*==========================*/ - dtuple_t* tuple, /*!< in/out: data tuple */ - const upd_t* update, /*!< in: update vector */ - mem_heap_t* heap) /*!< in: memory heap */ +btr_push_update_extern_fields(dtuple_t* tuple, ulint n, const upd_t* update, + mem_heap_t* heap) { - ulint n_pushed = 0; - ulint n; - const upd_field_t* uf; + ulint n_pushed = 0; + const upd_field_t* uf = update->fields; + + ut_ad(n >= tuple->n_fields); + /* The clustered index record must always contain a + PRIMARY KEY and the system columns DB_TRX_ID,DB_ROLL_PTR. */ + ut_ad(tuple->n_fields > DATA_ROLL_PTR); + compile_time_assert(DATA_ROLL_PTR == 2); - uf = update->fields; - n = upd_get_n_fields(update); + for (ulint un = upd_get_n_fields(update); un--; uf++) { + ut_ad(uf->field_no < n); - for (; n--; uf++) { - if (dfield_is_ext(&uf->new_val)) { - dfield_t* field - = dtuple_get_nth_field(tuple, uf->field_no); + if (dfield_is_ext(&uf->new_val) + && uf->field_no < tuple->n_fields) { + dfield_t* field = &tuple->fields[uf->field_no]; if (!dfield_is_ext(field)) { dfield_set_ext(field); diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 369a0bf6181..6a1d23fb472 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved. +Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. Copyright (C) 2014, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -36,7 +36,6 @@ Modified 30/07/2014 Jan Lindström jan.lindstrom@mariadb.com #include "ibuf0ibuf.h" #include "lock0lock.h" #include "srv0start.h" -#include "ut0timer.h" #include <list> @@ -100,8 +99,7 @@ Initialize defragmentation. */ void btr_defragment_init() { - srv_defragment_interval = ut_microseconds_to_timer( - (ulonglong) (1000000.0 / srv_defragment_frequency)); + srv_defragment_interval = 1000000000ULL / srv_defragment_frequency; mutex_create(LATCH_ID_BTR_DEFRAGMENT_MUTEX, &btr_defragment_mutex); } @@ -728,7 +726,7 @@ DECLARE_THREAD(btr_defragment_thread)(void*) } pcur = item->pcur; - ulonglong now = ut_timer_now(); + ulonglong now = my_interval_timer(); ulonglong elapsed = now - item->last_processed; if (elapsed < srv_defragment_interval) { @@ -738,11 +736,12 @@ DECLARE_THREAD(btr_defragment_thread)(void*) defragmentation of all indices queue up on a single thread, it's likely other indices that follow this one don't need to sleep again. */ - os_thread_sleep(((ulint)ut_timer_to_microseconds( - srv_defragment_interval - elapsed))); + os_thread_sleep(static_cast<ulint> + ((srv_defragment_interval - elapsed) + / 1000)); } - now = ut_timer_now(); + now = my_interval_timer(); mtr_start(&mtr); cursor = btr_pcur_get_btr_cur(pcur); index = btr_cur_get_index(cursor); diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index e8c3ab7f02c..7a7c3189add 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -635,7 +635,7 @@ buf_buddy_relocate( if (buf_page_can_relocate(bpage)) { /* Relocate the compressed page. */ - uintmax_t usec = ut_time_us(NULL); + const ulonglong ns = my_interval_timer(); ut_a(bpage->zip.data == src); @@ -651,7 +651,7 @@ buf_buddy_relocate( buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i]; buddy_stat->relocated++; - buddy_stat->relocated_usec += ut_time_us(NULL) - usec; + buddy_stat->relocated_usec+= (my_interval_timer() - ns) / 1000; return(true); } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 152c7d13747..cff1cda47f1 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -57,6 +57,7 @@ Created 11/5/1995 Heikki Tuuri #include "dict0dict.h" #include "log0recv.h" #include "srv0mon.h" +#include "log0crypt.h" #include "fil0pagecompress.h" #include "fsp0pagecompress.h" #endif /* !UNIV_INNOCHECKSUM */ @@ -472,6 +473,45 @@ buf_pool_register_chunk( chunk->blocks->frame, chunk)); } +/** Decrypt a page for temporary tablespace. +@param[in,out] tmp_frame Temporary buffer +@param[in] src_frame Page to decrypt +@return true if temporary tablespace decrypted, false if not */ +static bool buf_tmp_page_decrypt(byte* tmp_frame, byte* src_frame) +{ + if (buf_page_is_zeroes(src_frame, srv_page_size)) { + return true; + } + + /* read space & lsn */ + uint header_len = FIL_PAGE_DATA; + + /* Copy FIL page header, it is not encrypted */ + memcpy(tmp_frame, src_frame, header_len); + + /* Calculate the offset where decryption starts */ + const byte* src = src_frame + header_len; + byte* dst = tmp_frame + header_len; + uint srclen = uint(srv_page_size) + - header_len - FIL_PAGE_DATA_END; + ulint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET); + + if (!log_tmp_block_decrypt(src, srclen, dst, + (offset * srv_page_size))) { + return false; + } + + memcpy(tmp_frame + srv_page_size - FIL_PAGE_DATA_END, + src_frame + srv_page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + memcpy(src_frame, tmp_frame, srv_page_size); + srv_stats.pages_decrypted.inc(); + srv_stats.n_temp_blocks_decrypted.inc(); + + return true; /* page was decrypted */ +} + /** Decrypt a page. @param[in,out] bpage Page control block @param[in,out] space tablespace @@ -492,6 +532,22 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) return (true); } + if (space->purpose == FIL_TYPE_TEMPORARY + && innodb_encrypt_temporary_tables) { + buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool); + buf_tmp_reserve_crypt_buf(slot); + + if (!buf_tmp_page_decrypt(slot->crypt_buf, dst_frame)) { + slot->release(); + ib::error() << "Encrypted page " << bpage->id + << " in file " << space->chain.start->name; + return false; + } + + slot->release(); + return true; + } + /* Page is encrypted if encryption information is found from tablespace and page contains used key_version. This is true also for pages first compressed and then encrypted. */ @@ -1076,25 +1132,30 @@ buf_page_is_corrupted( /* A page filled with NUL bytes is considered not corrupted. The FIL_PAGE_FILE_FLUSH_LSN field may be written nonzero for the first page of each file of the system tablespace. - Ignore it for the system tablespace. */ + We want to ignore it for the system tablespace, but because + we do not know the expected tablespace here, we ignore the + field for all data files, except for + innodb_checksum_algorithm=full_crc32 which we handled above. */ if (!checksum_field1 && !checksum_field2) { - ulint i = 0; - do { - if (read_buf[i]) { - return true; + /* Checksum fields can have valid value as zero. + If the page is not empty then do the checksum + calculation for the page. */ + bool all_zeroes = true; + for (size_t i = 0; i < srv_page_size; i++) { +#ifndef UNIV_INNOCHECKSUM + if (i == FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) { + i += 8; } - } while (++i < FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - - /* Ignore FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION unless - innodb_checksum_algorithm=full_crc32. */ - i += 8; - - do { +#endif if (read_buf[i]) { - return true; + all_zeroes = false; + break; } - } while (++i < srv_page_size); - return false; + } + + if (all_zeroes) { + return false; + } } switch (curr_algo) { @@ -1283,14 +1344,16 @@ buf_madvise_do_dump() @param[in] zip_size compressed page size, or 0 */ void buf_page_print(const byte* read_buf, ulint zip_size) { - const ulint size = zip_size ? zip_size : srv_page_size; dict_index_t* index; +#ifndef UNIV_DEBUG + const ulint size = zip_size ? zip_size : srv_page_size; ib::info() << "Page dump in ascii and hex (" << size << " bytes):"; ut_print_buf(stderr, read_buf, size); fputs("\nInnoDB: End of page dump\n", stderr); +#endif if (zip_size) { /* Print compressed page. */ @@ -1934,7 +1997,7 @@ buf_pool_init_instance( buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); - buf_pool->last_printout_time = ut_time(); + buf_pool->last_printout_time = time(NULL); } /* 2. Initialize flushing fields -------------------------------- */ @@ -2748,7 +2811,7 @@ buf_pool_resize() buf_resize_status("Withdrawing blocks to be shrunken."); - ib_time_t withdraw_started = ut_time(); + time_t withdraw_started = time(NULL); ulint message_interval = 60; ulint retry_interval = 1; @@ -2774,8 +2837,10 @@ withdraw_retry: /* abort buffer pool load */ buf_load_abort(); + const time_t current_time = time(NULL); + if (should_retry_withdraw - && ut_difftime(ut_time(), withdraw_started) >= message_interval) { + && difftime(current_time, withdraw_started) >= message_interval) { if (message_interval > 900) { message_interval = 1800; @@ -2791,8 +2856,7 @@ withdraw_retry: trx = UT_LIST_GET_NEXT(trx_list, trx)) { if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != NULL - && ut_difftime(withdraw_started, - trx->start_time) > 0) { + && withdraw_started > trx->start_time) { if (!found) { ib::warn() << "The following trx might hold" @@ -2805,13 +2869,13 @@ withdraw_retry: } lock_trx_print_wait_and_mvcc_state( - stderr, trx); + stderr, trx, current_time); } } mutex_exit(&trx_sys.mutex); lock_mutex_exit(); - withdraw_started = ut_time(); + withdraw_started = current_time; } if (should_retry_withdraw) { @@ -6291,7 +6355,7 @@ void buf_refresh_io_stats( buf_pool_t* buf_pool) { - buf_pool->last_printout_time = ut_time(); + buf_pool->last_printout_time = time(NULL); buf_pool->old_stat = buf_pool->stat; } @@ -7354,6 +7418,44 @@ operator<<( return(out); } +/** Encrypt a buffer of temporary tablespace +@param[in] offset Page offset +@param[in] src_frame Page to encrypt +@param[in,out] dst_frame Output buffer +@return encrypted buffer or NULL */ +static byte* buf_tmp_page_encrypt( + ulint offset, + byte* src_frame, + byte* dst_frame) +{ + uint header_len = FIL_PAGE_DATA; + /* FIL page header is not encrypted */ + memcpy(dst_frame, src_frame, header_len); + + /* Calculate the start offset in a page */ + uint unencrypted_bytes = header_len + FIL_PAGE_DATA_END; + uint srclen = srv_page_size - unencrypted_bytes; + const byte* src = src_frame + header_len; + byte* dst = dst_frame + header_len; + + if (!log_tmp_block_encrypt(src, srclen, dst, (offset * srv_page_size), + true)) { + return NULL; + } + + memcpy(dst_frame + srv_page_size - FIL_PAGE_DATA_END, + src_frame + srv_page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + /* Handle post encryption checksum */ + mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, + buf_calc_page_crc32(dst_frame)); + + srv_stats.pages_encrypted.inc(); + srv_stats.n_temp_blocks_encrypted.inc(); + return dst_frame; +} + /** Encryption and page_compression hook that is called just before a page is written to disk. @param[in,out] space tablespace @@ -7387,13 +7489,20 @@ buf_page_encrypt( fil_space_crypt_t* crypt_data = space->crypt_data; - const bool encrypted = crypt_data - && !crypt_data->not_encrypted() - && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED - && (!crypt_data->is_default_encryption() - || srv_encrypt_tables); + bool encrypted, page_compressed; - bool page_compressed = space->is_compressed(); + if (space->purpose == FIL_TYPE_TEMPORARY) { + ut_ad(!crypt_data); + encrypted = innodb_encrypt_temporary_tables; + page_compressed = false; + } else { + encrypted = crypt_data + && !crypt_data->not_encrypted() + && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED + && (!crypt_data->is_default_encryption() + || srv_encrypt_tables); + page_compressed = space->is_compressed(); + } if (!encrypted && !page_compressed) { /* No need to encrypt or page compress the page. @@ -7433,18 +7542,25 @@ buf_page_encrypt( if (!page_compressed) { not_compressed: - /* Encrypt page content */ - byte* tmp = fil_space_encrypt(space, - bpage->id.page_no(), - bpage->newest_modification, - src_frame, - dst_frame); + byte* tmp; + if (space->purpose == FIL_TYPE_TEMPORARY) { + /* Encrypt temporary tablespace page content */ + tmp = buf_tmp_page_encrypt(bpage->id.page_no(), + src_frame, dst_frame); + } else { + /* Encrypt page content */ + tmp = fil_space_encrypt( + space, bpage->id.page_no(), + bpage->newest_modification, + src_frame, dst_frame); + } bpage->real_size = srv_page_size; slot->out_buf = dst_frame = tmp; ut_d(fil_page_type_validate(space, tmp)); } else { + ut_ad(space->purpose != FIL_TYPE_TEMPORARY); /* First we compress the page content */ buf_tmp_reserve_compression_buf(slot); byte* tmp = slot->comp_buf; diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index ac5b1edebfc..658d023c9c7 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -2405,7 +2405,7 @@ page_cleaner_flush_pages_recommendation( if (prev_lsn == 0) { /* First time around. */ prev_lsn = cur_lsn; - prev_time = ut_time(); + prev_time = time(NULL); return(0); } @@ -2415,7 +2415,7 @@ page_cleaner_flush_pages_recommendation( sum_pages += last_pages_in; - time_t curr_time = ut_time(); + time_t curr_time = time(NULL); double time_elapsed = difftime(curr_time, prev_time); /* We update our variables every srv_flushing_avg_loops diff --git a/storage/innobase/data/data0data.cc b/storage/innobase/data/data0data.cc index 02f921f716c..7cc9c2bc09e 100644 --- a/storage/innobase/data/data0data.cc +++ b/storage/innobase/data/data0data.cc @@ -601,7 +601,6 @@ dtuple_convert_big_rec( dfield_t* dfield; ulint size; ulint n_fields; - ulint local_len; ulint local_prefix_len; if (!dict_index_is_clust(index)) { @@ -612,6 +611,7 @@ dtuple_convert_big_rec( return NULL; } + ulint local_len = index->table->get_overflow_field_local_len(); const auto zip_size = index->table->space->zip_size(); ut_ad(index->n_uniq > 0); diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc index 31fb05e59c6..7de50f19217 100644 --- a/storage/innobase/dict/dict0defrag_bg.cc +++ b/storage/innobase/dict/dict0defrag_bg.cc @@ -238,7 +238,6 @@ dict_stats_save_defrag_summary( dict_index_t* index) /*!< in: index */ { dberr_t ret=DB_SUCCESS; - lint now = (lint) ut_time(); if (dict_index_is_ibuf(index)) { return DB_SUCCESS; @@ -246,7 +245,7 @@ dict_stats_save_defrag_summary( dict_sys_lock(); - ret = dict_stats_save_index_stat(index, now, "n_pages_freed", + ret = dict_stats_save_index_stat(index, time(NULL), "n_pages_freed", index->stat_defrag_n_pages_freed, NULL, "Number of pages freed during" @@ -276,7 +275,7 @@ dict_stats_save_defrag_stats( return dict_stats_report_error(index->table, true); } - lint now = (lint) ut_time(); + const time_t now = time(NULL); mtr_t mtr; ulint n_leaf_pages; ulint n_leaf_reserved; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 9643584b167..b5bd73ed5ec 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -1768,20 +1768,10 @@ dict_col_name_is_reserved( return(FALSE); } -/****************************************************************//** -If a record of this index might not fit on a single B-tree page, -return TRUE. -@return TRUE if the index record could become too big */ -static -ibool -dict_index_too_big_for_tree( -/*========================*/ - const dict_table_t* table, /*!< in: table */ - const dict_index_t* new_index, /*!< in: index */ - bool strict) /*!< in: TRUE=report error if - records could be too big to - fit in an B-tree page */ +bool dict_index_t::rec_potentially_too_big(bool strict) const { + ut_ad(table); + ulint comp; ulint i; /* maximum possible storage size of a record */ @@ -1793,8 +1783,8 @@ dict_index_too_big_for_tree( /* FTS index consists of auxiliary tables, they shall be excluded from index row size check */ - if (new_index->type & DICT_FTS) { - return(false); + if (type & DICT_FTS) { + return false; } DBUG_EXECUTE_IF( @@ -1815,8 +1805,7 @@ dict_index_too_big_for_tree( an empty page, minus a byte for recoding the heap number in the page modification log. The maximum allowed node pointer size is half that. */ - page_rec_max = page_zip_empty_size(new_index->n_fields, - zip_size); + page_rec_max = page_zip_empty_size(n_fields, zip_size); if (page_rec_max) { page_rec_max--; } @@ -1844,25 +1833,24 @@ dict_index_too_big_for_tree( if (comp) { /* Include the "null" flags in the maximum possible record size. */ - rec_max_size += UT_BITS_IN_BYTES( - unsigned(new_index->n_nullable)); + rec_max_size += UT_BITS_IN_BYTES(unsigned(n_nullable)); } else { - /* For each column, include a 2-byte offset and a + /* For each column, include a 2-byte offset and a "null" flag. The 1-byte format is only used in short records that do not contain externally stored columns. Such records could never exceed the page limit, even when using the 2-byte format. */ - rec_max_size += 2 * unsigned(new_index->n_fields); + rec_max_size += 2 * unsigned(n_fields); } - /* Compute the maximum possible record size. */ - for (i = 0; i < new_index->n_fields; i++) { + const ulint max_local_len = table->get_overflow_field_local_len(); + + /* Compute the maximum possible record size. */ + for (i = 0; i < n_fields; i++) { const dict_field_t* field - = dict_index_get_nth_field(new_index, i); + = dict_index_get_nth_field(this, i); const dict_col_t* col = dict_field_get_col(field); - ulint field_max_size; - ulint field_ext_max_size; /* In dtuple_convert_big_rec(), variable-length columns that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE @@ -1876,26 +1864,28 @@ dict_index_too_big_for_tree( case in rec_get_converted_size_comp() for REC_STATUS_ORDINARY records. */ - field_max_size = dict_col_get_fixed_size(col, comp); + size_t field_max_size = dict_col_get_fixed_size(col, comp); if (field_max_size && field->fixed_len != 0) { /* dict_index_add_col() should guarantee this */ ut_ad(!field->prefix_len || field->fixed_len == field->prefix_len); /* Fixed lengths are not encoded in ROW_FORMAT=COMPACT. */ - field_ext_max_size = 0; goto add_field_size; } field_max_size = dict_col_get_max_size(col); - field_ext_max_size = field_max_size < 256 ? 1 : 2; if (field->prefix_len) { if (field->prefix_len < field_max_size) { field_max_size = field->prefix_len; } - } else if (field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE - && dict_index_is_clust(new_index)) { + + // those conditions were copied from dtuple_convert_big_rec() + } else if (field_max_size > max_local_len + && field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE + && DATA_BIG_COL(col) + && dict_index_is_clust(this)) { /* In the worst case, we have a locally stored column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes. @@ -1903,21 +1893,26 @@ dict_index_too_big_for_tree( column were stored externally, the lengths in the clustered index page would be BTR_EXTERN_FIELD_REF_SIZE and 2. */ - field_max_size = BTR_EXTERN_LOCAL_STORED_MAX_SIZE; - field_ext_max_size = 1; + field_max_size = max_local_len; } if (comp) { /* Add the extra size for ROW_FORMAT=COMPACT. For ROW_FORMAT=REDUNDANT, these bytes were added to rec_max_size before this loop. */ - rec_max_size += field_ext_max_size; + rec_max_size += field_max_size < 256 ? 1 : 2; } add_field_size: rec_max_size += field_max_size; /* Check the size limit on leaf pages. */ if (rec_max_size >= page_rec_max) { + // with 4k page size innodb_index_stats becomes too big + // this crutch allows server bootstrapping to continue + if (table->is_system_db) { + return false; + } + ib::error_or_warn(strict) << "Cannot add field " << field->name << " in table " << table->name @@ -1927,7 +1922,7 @@ add_field_size: " size (" << page_rec_max << ") for a record on index leaf page."; - return(TRUE); + return true; } /* Check the size limit on non-leaf pages. Records @@ -1936,14 +1931,14 @@ add_field_size: and a node pointer field. When we have processed the unique columns, rec_max_size equals the size of the node pointer record minus the node pointer column. */ - if (i + 1 == dict_index_get_n_unique_in_tree(new_index) + if (i + 1 == dict_index_get_n_unique_in_tree(this) && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) { - return(TRUE); + return true; } } - return(FALSE); + return false; } /** Adds an index to the dictionary cache, with possible indexing newly @@ -2013,7 +2008,7 @@ dict_index_add_to_cache( new_index->disable_ahi = index->disable_ahi; #endif - if (dict_index_too_big_for_tree(index->table, new_index, strict)) { + if (new_index->rec_potentially_too_big(strict)) { if (strict) { dict_mem_index_free(new_index); diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 67a9d2de5e1..bd4bb261320 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -956,7 +956,7 @@ dict_stats_update_transient( table->stat_sum_of_other_index_sizes = sum_of_index_sizes - index->stat_index_size; - table->stats_last_recalc = ut_time(); + table->stats_last_recalc = time(NULL); table->stat_modified_counter = 0; @@ -2267,7 +2267,7 @@ dict_stats_update_persistent( += index->stat_index_size; } - table->stats_last_recalc = ut_time(); + table->stats_last_recalc = time(NULL); table->stat_modified_counter = 0; @@ -2296,7 +2296,7 @@ rolled back only in the case of error, but not freed. dberr_t dict_stats_save_index_stat( dict_index_t* index, - ib_time_t last_update, + time_t last_update, const char* stat_name, ib_uint64_t stat_value, ib_uint64_t* sample_size, @@ -2424,7 +2424,6 @@ dict_stats_save( const index_id_t* only_for_index) { pars_info_t* pinfo; - ib_time_t now; dberr_t ret; dict_table_t* table; char db_utf8[MAX_DB_UTF8_LEN]; @@ -2443,7 +2442,7 @@ dict_stats_save( dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8), table_utf8, sizeof(table_utf8)); - now = ut_time(); + const time_t now = time(NULL); dict_sys_lock(); pinfo = pars_info_create(); diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index 6702a884dcf..2985b6faf35 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -393,14 +393,14 @@ dict_stats_process_entry_from_recalc_pool() mutex_exit(&dict_sys.mutex); - /* ut_time() could be expensive, the current function + /* time() could be expensive, the current function is called once every time a table has been changed more than 10% and on a system with lots of small tables, this could become hot. If we find out that this is a problem, then the check below could eventually be replaced with something else, though a time interval is the natural approach. */ - if (ut_difftime(ut_time(), table->stats_last_recalc) + if (difftime(time(NULL), table->stats_last_recalc) < MIN_RECALC_INTERVAL) { /* Stats were (re)calculated not long ago. To avoid diff --git a/storage/innobase/eval/eval0eval.cc b/storage/innobase/eval/eval0eval.cc index fc16b9defb5..577157d2eb9 100644 --- a/storage/innobase/eval/eval0eval.cc +++ b/storage/innobase/eval/eval0eval.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -814,7 +815,7 @@ eval_predefined( dfield_get_data(que_node_get_val(arg1))); } else if (func == PARS_SYSDATE_TOKEN) { - int_val = (lint) ut_time(); + int_val = (lint) time(NULL); } else { eval_predefined_2(func_node); diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 44cc9daa508..50b73222607 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1838,19 +1838,18 @@ fil_crypt_get_page_throttle_func( state->crypt_stat.pages_read_from_disk++; - uintmax_t start = ut_time_us(NULL); + const ulonglong start = my_interval_timer(); block = buf_page_get_gen(page_id, zip_size, RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, file, line, mtr, &err); - uintmax_t end = ut_time_us(NULL); - - if (end < start) { - end = start; // safety... - } + const ulonglong end = my_interval_timer(); state->cnt_waited++; - state->sum_waited_us += (end - start); + + if (end > start) { + state->sum_waited_us += (end - start) / 1000; + } /* average page load */ ulint add_sleeptime_ms = 0; @@ -2174,7 +2173,7 @@ fil_crypt_flush_space( bool success = false; ulint n_pages = 0; ulint sum_pages = 0; - uintmax_t start = ut_time_us(NULL); + const ulonglong start = my_interval_timer(); do { success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages); @@ -2182,11 +2181,11 @@ fil_crypt_flush_space( sum_pages += n_pages; } while (!success && !space->is_stopping()); - uintmax_t end = ut_time_us(NULL); + const ulonglong end = my_interval_timer(); if (sum_pages && end > start) { state->cnt_waited += sum_pages; - state->sum_waited_us += (end - start); + state->sum_waited_us += (end - start) / 1000; /* statistics */ state->crypt_stat.pages_flushed += sum_pages; @@ -2621,7 +2620,8 @@ fil_space_crypt_close_tablespace( { fil_space_crypt_t* crypt_data = space->crypt_data; - if (!crypt_data || srv_n_fil_crypt_threads == 0) { + if (!crypt_data || srv_n_fil_crypt_threads == 0 + || !fil_crypt_threads_inited) { return; } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index d395e21f79f..0c00671214b 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2858,9 +2858,15 @@ fil_rename_tablespace( space->n_pending_ops--; ut_ad(space->name == old_space_name); ut_ad(node->name == old_file_name); - - bool success = os_file_rename( - innodb_data_file_key, old_file_name, new_file_name); + bool success; + DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", + goto skip_second_rename; ); + success = os_file_rename(innodb_data_file_key, + old_file_name, + new_file_name); + DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", +skip_second_rename: + success = false; ); ut_ad(node->name == old_file_name); @@ -4295,6 +4301,11 @@ fil_io( req_type.set_fil_node(node); + ut_ad(!req_type.is_write() + || page_id.space() == SRV_LOG_SPACE_FIRST_ID + || !fil_is_user_tablespace_id(page_id.space()) + || offset == page_id.page_no() * zip_size); + /* Queue the aio request */ dberr_t err = os_aio( req_type, diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 37819e37426..246f1ce574c 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -74,8 +74,8 @@ ulong fts_min_token_size; // FIXME: testing -static ib_time_t elapsed_time = 0; -static ulint n_nodes = 0; +static time_t elapsed_time; +static ulint n_nodes; #ifdef FTS_CACHE_SIZE_DEBUG /** The cache size permissible lower limit (1K) */ @@ -194,15 +194,13 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] sync sync state @param[in] unlock_cache whether unlock cache lock when write node @param[in] wait whether wait when a sync is in progress -@param[in] has_dict whether has dict operation lock @return DB_SUCCESS if all OK */ static dberr_t fts_sync( fts_sync_t* sync, bool unlock_cache, - bool wait, - bool has_dict); + bool wait); /****************************************************************//** Release all resources help by the words rb tree e.g., the node ilist. */ @@ -3389,7 +3387,7 @@ fts_add_doc_from_tuple( if (cache->total_size > fts_max_cache_size / 5 || fts_need_sync) { - fts_sync(cache->sync, true, false, false); + fts_sync(cache->sync, true, false); } mtr_start(&mtr); @@ -3567,7 +3565,7 @@ fts_add_doc_by_id( DBUG_EXECUTE_IF( "fts_instrument_sync_debug", - fts_sync(cache->sync, true, true, false); + fts_sync(cache->sync, true, true); ); DEBUG_SYNC_C("fts_instrument_sync_request"); @@ -3826,7 +3824,7 @@ fts_write_node( pars_info_t* info; dberr_t error; ib_uint32_t doc_count; - ib_time_t start_time; + time_t start_time; doc_id_t last_doc_id; doc_id_t first_doc_id; char table_name[MAX_FULL_NAME_LEN]; @@ -3875,9 +3873,9 @@ fts_write_node( " :last_doc_id, :doc_count, :ilist);"); } - start_time = ut_time(); + start_time = time(NULL); error = fts_eval_sql(trx, *graph); - elapsed_time += ut_time() - start_time; + elapsed_time += time(NULL) - start_time; ++n_nodes; return(error); @@ -4054,7 +4052,7 @@ fts_sync_begin( n_nodes = 0; elapsed_time = 0; - sync->start_time = ut_time(); + sync->start_time = time(NULL); sync->trx = trx_create(); trx_start_internal(sync->trx); @@ -4193,7 +4191,7 @@ fts_sync_commit( if (fts_enable_diag_print && elapsed_time) { ib::info() << "SYNC for table " << sync->table->name << ": SYNC time: " - << (ut_time() - sync->start_time) + << (time(NULL) - sync->start_time) << " secs: elapsed " << (double) n_nodes / elapsed_time << " ins/sec"; @@ -4263,15 +4261,13 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] sync sync state @param[in] unlock_cache whether unlock cache lock when write node @param[in] wait whether wait when a sync is in progress -@param[in] has_dict whether has dict operation lock @return DB_SUCCESS if all OK */ static dberr_t fts_sync( fts_sync_t* sync, bool unlock_cache, - bool wait, - bool has_dict) + bool wait) { if (srv_read_only_mode) { return DB_READ_ONLY; @@ -4304,12 +4300,6 @@ fts_sync( DEBUG_SYNC_C("fts_sync_begin"); fts_sync_begin(sync); - /* When sync in background, we hold dict operation lock - to prevent DDL like DROP INDEX, etc. */ - if (has_dict) { - sync->trx->dict_operation_lock_mode = RW_S_LATCH; - } - begin_sync: if (cache->total_size > fts_max_cache_size) { /* Avoid the case: sync never finish when @@ -4400,16 +4390,9 @@ end_sync: /** Run SYNC on the table, i.e., write out data from the cache to the FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] table fts table -@param[in] unlock_cache whether unlock cache when write node @param[in] wait whether wait for existing sync to finish -@param[in] has_dict whether has dict operation lock @return DB_SUCCESS on success, error code on failure. */ -dberr_t -fts_sync_table( - dict_table_t* table, - bool unlock_cache, - bool wait, - bool has_dict) +dberr_t fts_sync_table(dict_table_t* table, bool wait) { dberr_t err = DB_SUCCESS; @@ -4417,8 +4400,7 @@ fts_sync_table( if (table->space && table->fts->cache && !dict_table_is_corrupted(table)) { - err = fts_sync(table->fts->cache->sync, - unlock_cache, wait, has_dict); + err = fts_sync(table->fts->cache->sync, !wait, wait); } return(err); diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index f45ed70f374..6a3e52d8ace 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -58,7 +58,7 @@ static os_event_t fts_opt_shutdown_event = NULL; static const ulint FTS_WORD_NODES_INIT_SIZE = 64; /** Last time we did check whether system need a sync */ -static ib_time_t last_check_sync_time; +static time_t last_check_sync_time; /** FTS optimize thread message types. */ enum fts_msg_type_t { @@ -180,12 +180,11 @@ struct fts_slot_t { ulint deleted; /*!< Number of doc ids deleted since the last time this table was optimized */ - ib_time_t last_run; /*!< Time last run completed */ + /** time(NULL) of completing fts_optimize_table_bk() */ + time_t last_run; - ib_time_t completed; /*!< Optimize finish time */ - - ib_time_t interval_time; /*!< Minimum time to wait before - optimizing the table again. */ + /** time(NULL) of latest successful fts_optimize_table() */ + time_t completed; }; /** A table remove message for the FTS optimize thread. */ @@ -217,8 +216,8 @@ char fts_enable_diag_print; /** ZLib compressed block size.*/ static ulint FTS_ZIP_BLOCK_SIZE = 1024; -/** The amount of time optimizing in a single pass, in milliseconds. */ -static ib_time_t fts_optimize_time_limit = 0; +/** The amount of time optimizing in a single pass, in seconds. */ +static ulint fts_optimize_time_limit; /** It's defined in fts0fts.cc */ extern const char* fts_common_tables[]; @@ -1530,7 +1529,7 @@ fts_optimize_compact( /*=================*/ fts_optimize_t* optim, /*!< in: optimize state data */ dict_index_t* index, /*!< in: current FTS being optimized */ - ib_time_t start_time) /*!< in: optimize start time */ + time_t start_time) /*!< in: optimize start time */ { ulint i; dberr_t error = DB_SUCCESS; @@ -1563,8 +1562,11 @@ fts_optimize_compact( /* Free the word that was optimized. */ fts_word_free(word); + ulint interval = ulint(time(NULL) - start_time); + if (fts_optimize_time_limit > 0 - && (ut_time() - start_time) > fts_optimize_time_limit) { + && (lint(interval) < 0 + || interval > fts_optimize_time_limit)) { optim->done = TRUE; } @@ -1624,7 +1626,7 @@ fts_optimize_get_index_start_time( /*==============================*/ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: FTS index */ - ib_time_t* start_time) /*!< out: time in secs */ + time_t* start_time) /*!< out: time in secs */ { return(fts_config_get_index_ulint( trx, index, FTS_OPTIMIZE_START_TIME, @@ -1640,7 +1642,7 @@ fts_optimize_set_index_start_time( /*==============================*/ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: FTS index */ - ib_time_t start_time) /*!< in: start time */ + time_t start_time) /*!< in: start time */ { return(fts_config_set_index_ulint( trx, index, FTS_OPTIMIZE_START_TIME, @@ -1656,7 +1658,7 @@ fts_optimize_get_index_end_time( /*============================*/ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: FTS index */ - ib_time_t* end_time) /*!< out: time in secs */ + time_t* end_time) /*!< out: time in secs */ { return(fts_config_get_index_ulint( trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time)); @@ -1671,7 +1673,7 @@ fts_optimize_set_index_end_time( /*============================*/ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: FTS index */ - ib_time_t end_time) /*!< in: end time */ + time_t end_time) /*!< in: end time */ { return(fts_config_set_index_ulint( trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time)); @@ -1734,22 +1736,23 @@ fts_optimize_free( Get the max time optimize should run in millisecs. @return max optimize time limit in millisecs. */ static -ib_time_t +ulint fts_optimize_get_time_limit( /*========================*/ trx_t* trx, /*!< in: transaction */ fts_table_t* fts_table) /*!< in: aux table */ { - ib_time_t time_limit = 0; + ulint time_limit = 0; fts_config_get_ulint( trx, fts_table, - FTS_OPTIMIZE_LIMIT_IN_SECS, (ulint*) &time_limit); + FTS_OPTIMIZE_LIMIT_IN_SECS, &time_limit); + /* FIXME: This is returning milliseconds, while the variable + is being stored and interpreted as seconds! */ return(time_limit * 1000); } - /**********************************************************************//** Run OPTIMIZE on the given table. Note: this can take a very long time (hours). */ @@ -1762,7 +1765,6 @@ fts_optimize_words( fts_string_t* word) /*!< in: the starting word to optimize */ { fts_fetch_t fetch; - ib_time_t start_time; que_t* graph = NULL; CHARSET_INFO* charset = optim->fts_index_table.charset; @@ -1772,7 +1774,7 @@ fts_optimize_words( fts_optimize_time_limit = fts_optimize_get_time_limit( optim->trx, &optim->fts_common_table); - start_time = ut_time(); + const time_t start_time = time(NULL); /* Setup the callback to use for fetching the word ilist etc. */ fetch.read_arg = optim->words; @@ -1858,7 +1860,7 @@ fts_optimize_index_completed( dberr_t error; byte buf[sizeof(ulint)]; #ifdef FTS_OPTIMIZE_DEBUG - ib_time_t end_time = ut_time(); + time_t end_time = time(NULL); error = fts_optimize_set_index_end_time(optim->trx, index, end_time); #endif @@ -2249,8 +2251,8 @@ fts_optimize_indexes( dict_index_t* index; #ifdef FTS_OPTIMIZE_DEBUG - ib_time_t end_time; - ib_time_t start_time; + time_t end_time; + time_t start_time; /* Get the start and end optimize times for this index. */ error = fts_optimize_get_index_start_time( @@ -2270,14 +2272,14 @@ fts_optimize_indexes( /* Start time will be 0 only for the first time or after completing the optimization of all FTS indexes. */ if (start_time == 0) { - start_time = ut_time(); + start_time = time(NULL); error = fts_optimize_set_index_start_time( optim->trx, index, start_time); } /* Check if this index needs to be optimized or not. */ - if (ut_difftime(end_time, start_time) < 0) { + if (difftime(end_time, start_time) < 0) { error = fts_optimize_index(optim, index); if (error != DB_SUCCESS) { @@ -2349,7 +2351,7 @@ fts_optimize_reset_start_time( for (uint i = 0; i < ib_vector_size(fts->indexes); ++i) { dict_index_t* index; - ib_time_t start_time = 0; + time_t start_time = 0; /* Reset the start time to 0 for this index. */ error = fts_optimize_set_index_start_time( @@ -2378,11 +2380,13 @@ fts_optimize_table_bk( /*==================*/ fts_slot_t* slot) /*!< in: table to optimiza */ { - dberr_t error; + const time_t now = time(NULL); + const ulint interval = ulint(now - slot->last_run); /* Avoid optimizing tables that were optimized recently. */ if (slot->last_run > 0 - && (ut_time() - slot->last_run) < slot->interval_time) { + && lint(interval) >= 0 + && interval < FTS_OPTIMIZE_INTERVAL_IN_SECS) { return(DB_SUCCESS); } @@ -2390,12 +2394,19 @@ fts_optimize_table_bk( dict_table_t* table = dict_table_open_on_id( slot->table_id, FALSE, DICT_TABLE_OP_NORMAL); - if (table && fil_table_accessible(table) + if (!table) { + slot->last_run = now; + return DB_SUCCESS; + } + + dberr_t error; + + if (fil_table_accessible(table) && table->fts && table->fts->cache && table->fts->cache->deleted >= FTS_OPTIMIZE_THRESHOLD) { error = fts_optimize_table(table); - slot->last_run = ut_time(); + slot->last_run = time(NULL); if (error == DB_SUCCESS) { slot->running = false; @@ -2403,7 +2414,7 @@ fts_optimize_table_bk( } } else { /* Note time this run completed. */ - slot->last_run = ut_time(); + slot->last_run = now; error = DB_SUCCESS; } @@ -2653,7 +2664,6 @@ static bool fts_optimize_new_table(dict_table_t* table) slot->table_id = table->id; slot->running = false; - slot->interval_time = FTS_OPTIMIZE_INTERVAL_IN_SECS; return(TRUE); } @@ -2689,37 +2699,23 @@ Calculate how many tables in fts_slots need to be optimized. @return no. of tables to optimize */ static ulint fts_optimize_how_many() { - ulint i; - ib_time_t delta; - ulint n_tables = 0; - ib_time_t current_time; - - current_time = ut_time(); + ulint n_tables = 0; + const time_t current_time = time(NULL); - for (i = 0; i < ib_vector_size(fts_slots); ++i) { + for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) { const fts_slot_t* slot = static_cast<const fts_slot_t*>( ib_vector_get_const(fts_slots, i)); if (slot->table_id == 0) { continue; } - if (!slot->running) { - ut_a(slot->completed <= current_time); - - delta = current_time - slot->completed; + const time_t end = slot->running + ? slot->last_run : slot->completed; + ulint interval = ulint(current_time - end); - /* Skip slots that have been optimized recently. */ - if (delta >= slot->interval_time) { - ++n_tables; - } - } else { - ut_a(slot->last_run <= current_time); - - delta = current_time - slot->last_run; - - if (delta > slot->interval_time) { - ++n_tables; - } + if (lint(interval) < 0 + || interval >= FTS_OPTIMIZE_INTERVAL_IN_SECS) { + ++n_tables; } } @@ -2731,14 +2727,15 @@ Check if the total memory used by all FTS table exceeds the maximum limit. @return true if a sync is needed, false otherwise */ static bool fts_is_sync_needed() { - ulint total_memory = 0; - double time_diff = difftime(ut_time(), last_check_sync_time); + ulint total_memory = 0; + const time_t now = time(NULL); + double time_diff = difftime(now, last_check_sync_time); - if (fts_need_sync || time_diff < 5) { + if (fts_need_sync || (time_diff >= 0 && time_diff < 5)) { return(false); } - last_check_sync_time = ut_time(); + last_check_sync_time = now; for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) { const fts_slot_t* slot = static_cast<const fts_slot_t*>( @@ -2776,7 +2773,7 @@ static void fts_optimize_sync_table(table_id_t table_id) table_id, FALSE, DICT_TABLE_OP_NORMAL)) { if (fil_table_accessible(table) && table->fts && table->fts->cache) { - fts_sync_table(table, true, false, false); + fts_sync_table(table, false); } dict_table_close(table, FALSE, FALSE); @@ -2969,7 +2966,7 @@ fts_optimize_init(void) table_vector.clear(); fts_opt_shutdown_event = os_event_create(0); - last_check_sync_time = ut_time(); + last_check_sync_time = time(NULL); os_thread_create(fts_optimize_thread, fts_optimize_wq, NULL); } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index e77e5a99ca6..04341ba7a0b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -100,7 +100,6 @@ this program; if not, write to the Free Software Foundation, Inc., #include "row0sel.h" #include "row0upd.h" #include "fil0crypt.h" -#include "ut0timer.h" #include "srv0mon.h" #include "srv0srv.h" #include "srv0start.h" @@ -1145,6 +1144,10 @@ static SHOW_VAR innodb_status_variables[]= { &export_vars.innodb_n_rowlog_blocks_encrypted, SHOW_LONGLONG}, {"encryption_n_rowlog_blocks_decrypted", &export_vars.innodb_n_rowlog_blocks_decrypted, SHOW_LONGLONG}, + {"encryption_n_temp_blocks_encrypted", + &export_vars.innodb_n_temp_blocks_encrypted, SHOW_LONGLONG}, + {"encryption_n_temp_blocks_decrypted", + &export_vars.innodb_n_temp_blocks_decrypted, SHOW_LONGLONG}, /* scrubing */ {"scrub_background_page_reorganizations", @@ -1639,18 +1642,6 @@ thd_trx_is_auto_commit( && thd_is_select(thd)); } -extern "C" time_t thd_start_time(const THD* thd); - -/******************************************************************//** -Get the thread start time. -@return the thread start time in seconds since the epoch. */ -ulint thd_start_time_in_secs(THD*) -{ - // FIXME: This function should be added to the server code. - //return(thd_start_time(thd)); - return(ulint(ut_time())); -} - /** Enter InnoDB engine after checking the max number of user threads allowed, else the thread is put into sleep. @param[in,out] prebuilt row prebuilt handler */ @@ -1678,12 +1669,13 @@ innobase_srv_conc_enter_innodb( } else if (trx->mysql_thd != NULL && thd_is_replication_slave_thread(trx->mysql_thd)) { - - UT_WAIT_FOR( - srv_conc_get_active_threads() - < srv_thread_concurrency, - srv_replication_delay * 1000); - + const ulonglong end = my_interval_timer() + + ulonglong(srv_replication_delay) * 1000000; + while (srv_conc_get_active_threads() + >= srv_thread_concurrency + || my_interval_timer() >= end) { + os_thread_sleep(2000 /* 2 ms */); + } } else { srv_conc_enter_innodb(prebuilt); } @@ -3659,7 +3651,8 @@ static int innodb_init_params() } #endif - if ((srv_encrypt_tables || srv_encrypt_log) + if ((srv_encrypt_tables || srv_encrypt_log + || innodb_encrypt_temporary_tables) && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) { sql_print_error("InnoDB: cannot enable encryption, " "encryption plugin is not available"); @@ -5103,17 +5096,6 @@ ha_innobase::index_type( } /****************************************************************//** -Returns the table file name extension. -@return file extension string */ - -const char** -ha_innobase::bas_ext() const -/*========================*/ -{ - return(ha_innobase_exts); -} - -/****************************************************************//** Returns the operations supported for indexes. @return flags of supported operations */ @@ -5658,6 +5640,7 @@ innobase_build_v_templ( ulint n_v_col = ib_table->n_v_cols; bool marker[REC_MAX_N_FIELDS]; + DBUG_ENTER("innobase_build_v_templ"); ut_ad(ncol < REC_MAX_N_FIELDS); if (add_v != NULL) { @@ -5674,7 +5657,7 @@ innobase_build_v_templ( if (!locked) { mutex_exit(&dict_sys.mutex); } - return; + DBUG_VOID_RETURN; } memset(marker, 0, sizeof(bool) * ncol); @@ -5685,7 +5668,8 @@ innobase_build_v_templ( s_templ->n_col = ncol; s_templ->n_v_col = n_v_col; s_templ->rec_len = table->s->reclength; - s_templ->default_rec = table->s->default_values; + s_templ->default_rec = UT_NEW_ARRAY_NOKEY(uchar, s_templ->rec_len); + memcpy(s_templ->default_rec, table->s->default_values, s_templ->rec_len); /* Mark those columns could be base columns */ for (ulint i = 0; i < ib_table->n_v_cols; i++) { @@ -5782,6 +5766,7 @@ innobase_build_v_templ( s_templ->db_name = table->s->db.str; s_templ->tb_name = table->s->table_name.str; + DBUG_VOID_RETURN; } /** Check consistency between .frm indexes and InnoDB indexes. @@ -5987,6 +5972,8 @@ ha_innobase::open(const char* name, int, uint) ib_table = open_dict_table(name, norm_name, is_part, ignore_err); + DEBUG_SYNC(thd, "ib_open_after_dict_open"); + if (NULL == ib_table) { if (is_part) { @@ -6053,7 +6040,7 @@ no_such_table: if (!thd_tablespace_op(thd)) { set_my_errno(ENOENT); - int ret_err = HA_ERR_NO_SUCH_TABLE; + int ret_err = HA_ERR_TABLESPACE_MISSING; if (space && space->crypt_data && space->crypt_data->is_encrypted()) { @@ -6092,14 +6079,6 @@ no_such_table: mutex_enter(&dict_sys.mutex); if (ib_table->vc_templ == NULL) { ib_table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t()); - } else if (ib_table->get_ref_count() == 1) { - /* Clean and refresh the template if no one else - get hold on it */ - dict_free_vc_templ(ib_table->vc_templ); - ib_table->vc_templ->vtempl = NULL; - } - - if (ib_table->vc_templ->vtempl == NULL) { innobase_build_v_templ( table, ib_table, ib_table->vc_templ, NULL, true); @@ -7859,7 +7838,7 @@ handle. int ha_innobase::write_row( /*===================*/ - uchar* record) /*!< in: a row in MySQL format */ + const uchar* record) /*!< in: a row in MySQL format */ { dberr_t error; #ifdef WITH_WSREP @@ -9221,7 +9200,7 @@ ha_innobase::index_read( table->s->table_name.str); table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; + error = HA_ERR_TABLESPACE_MISSING; break; case DB_TABLESPACE_NOT_FOUND: @@ -9232,8 +9211,7 @@ ha_innobase::index_read( table->s->table_name.str); table->status = STATUS_NOT_FOUND; - //error = HA_ERR_TABLESPACE_MISSING; - error = HA_ERR_NO_SUCH_TABLE; + error = HA_ERR_TABLESPACE_MISSING; break; default: @@ -9374,15 +9352,16 @@ ha_innobase::change_active_index( /* Initialization of search_tuple is not needed for FT index since FT search returns rank only. In addition engine should be able to retrieve FTS_DOC_ID column value if necessary. */ - if ((m_prebuilt->index->type & DICT_FTS)) { -#ifdef MYSQL_STORE_FTS_DOC_ID - if (table->fts_doc_id_field - && bitmap_is_set(table->read_set, - table->fts_doc_id_field->field_index - && m_prebuilt->read_just_key)) { - m_prebuilt->fts_doc_id_in_read_set = 1; + if (m_prebuilt->index->type & DICT_FTS) { + for (uint i = 0; i < table->s->fields; i++) { + if (m_prebuilt->read_just_key + && bitmap_is_set(table->read_set, i) + && !strcmp(table->s->field[i]->field_name.str, + FTS_DOC_ID_COL_NAME)) { + m_prebuilt->fts_doc_id_in_read_set = true; + break; + } } -#endif } else { ulint n_fields = dict_index_get_n_unique_in_tree( m_prebuilt->index); @@ -9395,13 +9374,10 @@ ha_innobase::change_active_index( /* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is always added to read_set. */ - -#ifdef MYSQL_STORE_FTS_DOC_ID - m_prebuilt->fts_doc_id_in_read_set = - (m_prebuilt->read_just_key && table->fts_doc_id_field - && m_prebuilt->in_fts_query); -#endif - + m_prebuilt->fts_doc_id_in_read_set = m_prebuilt->in_fts_query + && m_prebuilt->read_just_key + && m_prebuilt->index->contains_col_or_prefix( + m_prebuilt->table->fts->doc_col, false); } /* MySQL changes the active index for a handle also during some @@ -9480,7 +9456,7 @@ ha_innobase::general_fetch( table->s->table_name.str); table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; + error = HA_ERR_TABLESPACE_MISSING; break; case DB_TABLESPACE_NOT_FOUND: @@ -9789,7 +9765,7 @@ ha_innobase::ft_init_ext( /* If tablespace is discarded, we should return here */ if (!ft_table->space) { - my_error(ER_NO_SUCH_TABLE, MYF(0), table->s->db.str, + my_error(ER_TABLESPACE_MISSING, MYF(0), table->s->db.str, table->s->table_name.str); return(NULL); } @@ -10006,7 +9982,7 @@ next_record: table->s->table_name.str); table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; + error = HA_ERR_TABLESPACE_MISSING; break; case DB_TABLESPACE_NOT_FOUND: @@ -10987,6 +10963,17 @@ err_col: dict_table_add_system_columns(table, heap); if (table->is_temporary()) { + if ((options->encryption == 1 + && !innodb_encrypt_temporary_tables) + || (options->encryption == 2 + && innodb_encrypt_temporary_tables)) { + push_warning_printf(m_thd, + Sql_condition::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "Ignoring encryption parameter during " + "temporary table creation."); + } + m_trx->table_id = table->id = dict_sys.get_temporary_table_id(); ut_ad(dict_tf_get_rec_format(table->flags) @@ -12187,6 +12174,21 @@ int create_table_info_t::prepare_create_table(const char* name, bool strict) DBUG_RETURN(HA_ERR_UNSUPPORTED); } + for (uint i = 0; i < m_form->s->keys; i++) { + const size_t max_field_len + = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(m_flags); + const KEY& key = m_form->key_info[i]; + + if (key.algorithm == HA_KEY_ALG_FULLTEXT) { + continue; + } + + if (too_big_key_part_length(max_field_len, key)) { + DBUG_RETURN(convert_error_code_to_mysql( + DB_TOO_BIG_INDEX_COL, m_flags, NULL)); + } + } + DBUG_RETURN(parse_table_name(name)); } @@ -14286,7 +14288,7 @@ ha_innobase::optimize( if (innodb_optimize_fulltext_only) { if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache && m_prebuilt->table->space) { - fts_sync_table(m_prebuilt->table, false, true, false); + fts_sync_table(m_prebuilt->table); fts_optimize_table(m_prebuilt->table); } try_alter = false; @@ -14898,144 +14900,6 @@ struct tablename_compare { } }; -/** Get the table name and database name for the given table. -@param[in,out] thd user thread handle -@param[out] f_key_info pointer to table_name_info object -@param[in] foreign foreign key constraint. */ -static -void -get_table_name_info( - THD* thd, - st_handler_tablename* f_key_info, - const dict_foreign_t* foreign) -{ -#define FILENAME_CHARSET_MBMAXLEN 5 - char tmp_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1]; - char name_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1]; - const char* ptr; - - size_t len = dict_get_db_name_len( - foreign->referenced_table_name_lookup); - ut_memcpy(tmp_buff, foreign->referenced_table_name_lookup, len); - tmp_buff[len] = 0; - - ut_ad(len < sizeof(tmp_buff)); - - len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff)); - f_key_info->db = thd_strmake(thd, name_buff, len); - - ptr = dict_remove_db_name(foreign->referenced_table_name_lookup); - len = filename_to_tablename(ptr, name_buff, sizeof(name_buff)); - f_key_info->tablename = thd_strmake(thd, name_buff, len); -} - -/** Get the list of tables ordered by the dependency on the other tables using -the 'CASCADE' foreign key constraint. -@param[in,out] thd user thread handle -@param[out] fk_table_list set of tables name info for the - dependent table -@retval 0 for success. */ -int -ha_innobase::get_cascade_foreign_key_table_list( - THD* thd, - List<st_handler_tablename>* fk_table_list) -{ - m_prebuilt->trx->op_info = "getting cascading foreign keys"; - - std::forward_list<table_list_item, ut_allocator<table_list_item> > - table_list; - - typedef std::set<st_handler_tablename, tablename_compare, - ut_allocator<st_handler_tablename> > cascade_fk_set; - - cascade_fk_set fk_set; - - mutex_enter(&dict_sys.mutex); - - /* Initialize the table_list with prebuilt->table name. */ - struct table_list_item item = {m_prebuilt->table, - m_prebuilt->table->name.m_name}; - - table_list.push_front(item); - - /* Get the parent table, grand parent table info from the - table list by depth-first traversal. */ - do { - const dict_table_t* parent_table; - dict_table_t* parent = NULL; - std::pair<cascade_fk_set::iterator,bool> ret; - - item = table_list.front(); - table_list.pop_front(); - parent_table = item.table; - - if (parent_table == NULL) { - - ut_ad(item.name != NULL); - - parent_table = parent = dict_table_open_on_name( - item.name, TRUE, FALSE, - DICT_ERR_IGNORE_NONE); - - if (parent_table == NULL) { - /* foreign_key_checks is or was probably - disabled; ignore the constraint */ - continue; - } - } - - for (dict_foreign_set::const_iterator it = - parent_table->foreign_set.begin(); - it != parent_table->foreign_set.end(); ++it) { - - const dict_foreign_t* foreign = *it; - st_handler_tablename f1; - - /* Skip the table if there is no - cascading operation. */ - if (0 == (foreign->type - & ~(DICT_FOREIGN_ON_DELETE_NO_ACTION - | DICT_FOREIGN_ON_UPDATE_NO_ACTION))) { - continue; - } - - if (foreign->referenced_table_name_lookup != NULL) { - get_table_name_info(thd, &f1, foreign); - ret = fk_set.insert(f1); - - /* Ignore the table if it is already - in the set. */ - if (!ret.second) { - continue; - } - - struct table_list_item item1 = { - foreign->referenced_table, - foreign->referenced_table_name_lookup}; - - table_list.push_front(item1); - - st_handler_tablename* fk_table = - (st_handler_tablename*) thd_memdup( - thd, &f1, sizeof(*fk_table)); - - fk_table_list->push_front(fk_table); - } - } - - if (parent != NULL) { - dict_table_close(parent, true, false); - } - - } while(!table_list.empty()); - - mutex_exit(&dict_sys.mutex); - - m_prebuilt->trx->op_info = ""; - - return(0); -} - /*****************************************************************//** Checks if ALTER TABLE may change the storage engine of the table. Changing storage engines is not allowed for tables for which there @@ -15179,12 +15043,9 @@ ha_innobase::extra( } /** -MySQL calls this method at the end of each statement. This method -exists for readability only. ha_innobase::reset() doesn't give any -clue about the method. */ - +MySQL calls this method at the end of each statement */ int -ha_innobase::end_stmt() +ha_innobase::reset() { if (m_prebuilt->blob_heap) { row_mysql_prebuilt_free_blob_heap(m_prebuilt); @@ -15203,15 +15064,6 @@ ha_innobase::end_stmt() return(0); } -/** -MySQL calls this method at the end of each statement */ - -int -ha_innobase::reset() -{ - return(end_stmt()); -} - /******************************************************************//** MySQL calls this function at the start of each SQL statement inside LOCK TABLES. Inside LOCK TABLES the ::external_lock method does not work to @@ -15441,7 +15293,7 @@ ha_innobase::external_lock( ER_TABLESPACE_DISCARDED, table->s->table_name.str); - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + DBUG_RETURN(HA_ERR_TABLESPACE_MISSING); } row_quiesce_table_start(m_prebuilt->table, trx); @@ -17938,8 +17790,7 @@ innodb_defragment_frequency_update(THD*, st_mysql_sys_var*, void*, const void* save) { srv_defragment_frequency = (*static_cast<const uint*>(save)); - srv_defragment_interval = ut_microseconds_to_timer( - (ulonglong) (1000000.0 / srv_defragment_frequency)); + srv_defragment_interval = 1000000000ULL / srv_defragment_frequency; } static inline char *my_strtok_r(char *str, const char *delim, char **saveptr) @@ -19739,6 +19590,11 @@ static MYSQL_SYSVAR_BOOL(debug_force_scrubbing, NULL, NULL, FALSE); #endif /* UNIV_DEBUG */ +static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tables, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enrypt the temporary table data.", + NULL, NULL, false); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), @@ -19941,6 +19797,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { #endif MYSQL_SYSVAR(buf_dump_status_frequency), MYSQL_SYSVAR(background_thread), + MYSQL_SYSVAR(encrypt_temporary_tables), NULL }; @@ -20289,6 +20146,7 @@ TABLE* innobase_init_vc_templ(dict_table_t* table) if (table->vc_templ != NULL) { return NULL; } + DBUG_ENTER("innobase_init_vc_templ"); table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t()); @@ -20296,13 +20154,13 @@ TABLE* innobase_init_vc_templ(dict_table_t* table) ut_ad(mysql_table); if (!mysql_table) { - return NULL; + DBUG_RETURN(NULL); } mutex_enter(&dict_sys.mutex); innobase_build_v_templ(mysql_table, table, table->vc_templ, NULL, true); mutex_exit(&dict_sys.mutex); - return mysql_table; + DBUG_RETURN(mysql_table); } /** Change dbname and table name in table->vc_templ. @@ -20347,7 +20205,7 @@ innobase_rename_vc_templ( given col_no. @param[in] foreign foreign key information @param[in] update updated parent vector. -@param[in] col_no column position of the table +@param[in] col_no base column position of the child table to check @return updated field from the parent update vector, else NULL */ static dfield_t* @@ -20363,6 +20221,10 @@ innobase_get_field_from_update_vector( ulint prefix_col_no; for (ulint i = 0; i < foreign->n_fields; i++) { + if (dict_index_get_nth_col_no(foreign->foreign_index, i) + != col_no) { + continue; + } parent_col_no = dict_index_get_nth_col_no(parent_index, i); parent_field_no = dict_table_get_nth_col_pos( @@ -20372,8 +20234,7 @@ innobase_get_field_from_update_vector( upd_field_t* parent_ufield = &update->fields[j]; - if (parent_ufield->field_no == parent_field_no - && parent_col_no == col_no) { + if (parent_ufield->field_no == parent_field_no) { return(&parent_ufield->new_val); } } @@ -20504,6 +20365,7 @@ innobase_get_computed_value( ut_ad(thd != NULL); ut_ad(mysql_table); + DBUG_ENTER("innobase_get_computed_value"); const mysql_row_templ_t* vctempl = index->table->vc_templ->vtempl[ index->table->vc_templ->n_col + col->v_pos]; @@ -20592,7 +20454,7 @@ innobase_get_computed_value( stderr); dtuple_print(stderr, row); #endif /* INNODB_VIRTUAL_DEBUG */ - return(NULL); + DBUG_RETURN(NULL); } if (vctempl->mysql_null_bit_mask @@ -20600,7 +20462,7 @@ innobase_get_computed_value( & vctempl->mysql_null_bit_mask)) { dfield_set_null(field); field->type.prtype |= DATA_VIRTUAL; - return(field); + DBUG_RETURN(field); } row_mysql_store_col_in_innobase_format( @@ -20632,7 +20494,7 @@ innobase_get_computed_value( dfield_dup(field, heap); } - return(field); + DBUG_RETURN(field); } diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 60c560f9bc7..28e1a1e36f1 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -58,35 +58,33 @@ struct st_handler_tablename const char *tablename; }; /** The class defining a handle to an Innodb table */ -class ha_innobase: public handler +class ha_innobase final: public handler { public: ha_innobase(handlerton* hton, TABLE_SHARE* table_arg); - ~ha_innobase(); + ~ha_innobase() override; /** Get the row type from the storage engine. If this method returns ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used. */ - enum row_type get_row_type() const; + enum row_type get_row_type() const override; - const char* table_type() const; + const char* table_type() const; - const char* index_type(uint key_number); + const char* index_type(uint key_number) override; - const char** bas_ext() const; + Table_flags table_flags() const override; - Table_flags table_flags() const; + ulong index_flags(uint idx, uint part, bool all_parts) const override; - ulong index_flags(uint idx, uint part, bool all_parts) const; + uint max_supported_keys() const override; - uint max_supported_keys() const; + uint max_supported_key_length() const override; - uint max_supported_key_length() const; + uint max_supported_key_part_length() const override; - uint max_supported_key_part_length() const; + const key_map* keys_to_use_for_scanning() override; - const key_map* keys_to_use_for_scanning(); - - void column_bitmaps_signal(); + void column_bitmaps_signal() override; /** Opens dictionary table object using table name. For partition, we need to try alternative lower/upper case names to support moving data files across @@ -102,97 +100,97 @@ public: bool is_partition, dict_err_ignore_t ignore_err); - int open(const char *name, int mode, uint test_if_locked); + int open(const char *name, int mode, uint test_if_locked) override; - handler* clone(const char *name, MEM_ROOT *mem_root); + handler* clone(const char *name, MEM_ROOT *mem_root) override; - int close(void); + int close(void) override; - double scan_time(); + double scan_time() override; - double read_time(uint index, uint ranges, ha_rows rows); + double read_time(uint index, uint ranges, ha_rows rows) override; - int delete_all_rows(); + int delete_all_rows() override; - int write_row(uchar * buf); + int write_row(const uchar * buf) override; - int update_row(const uchar * old_data, const uchar * new_data); + int update_row(const uchar * old_data, const uchar * new_data) override; - int delete_row(const uchar * buf); + int delete_row(const uchar * buf) override; - bool was_semi_consistent_read(); + bool was_semi_consistent_read() override; - void try_semi_consistent_read(bool yes); + void try_semi_consistent_read(bool yes) override; - void unlock_row(); + void unlock_row() override; - int index_init(uint index, bool sorted); + int index_init(uint index, bool sorted) override; - int index_end(); + int index_end() override; int index_read( uchar* buf, const uchar* key, uint key_len, - ha_rkey_function find_flag); + ha_rkey_function find_flag) override; - int index_read_last(uchar * buf, const uchar * key, uint key_len); + int index_read_last(uchar * buf, const uchar * key, + uint key_len) override; - int index_next(uchar * buf); + int index_next(uchar * buf) override; - int index_next_same(uchar * buf, const uchar *key, uint keylen); + int index_next_same(uchar * buf, const uchar * key, + uint keylen) override; - int index_prev(uchar * buf); + int index_prev(uchar * buf) override; - int index_first(uchar * buf); + int index_first(uchar * buf) override; - int index_last(uchar * buf); + int index_last(uchar * buf) override; /* Copy a cached MySQL row. If requested, also avoids overwriting non-read columns. */ void copy_cached_row(uchar *to_rec, const uchar *from_rec, uint rec_length); - int rnd_init(bool scan); - - int rnd_end(); + int rnd_init(bool scan) override; - int rnd_next(uchar *buf); + int rnd_end() override; - int rnd_pos(uchar * buf, uchar *pos); + int rnd_next(uchar *buf) override; - int ft_init(); - void ft_end() { rnd_end(); } - FT_INFO *ft_init_ext(uint flags, uint inx, String* key); - int ft_read(uchar* buf); + int rnd_pos(uchar * buf, uchar *pos) override; - void position(const uchar *record); + int ft_init() override; + void ft_end() override { rnd_end(); } + FT_INFO *ft_init_ext(uint flags, uint inx, String* key) override; + int ft_read(uchar* buf) override; - int info(uint); + void position(const uchar *record) override; - int analyze(THD* thd,HA_CHECK_OPT* check_opt); + int info(uint) override; - int optimize(THD* thd,HA_CHECK_OPT* check_opt); + int analyze(THD* thd,HA_CHECK_OPT* check_opt) override; - int discard_or_import_tablespace(my_bool discard); + int optimize(THD* thd,HA_CHECK_OPT* check_opt) override; - int extra(ha_extra_function operation); + int discard_or_import_tablespace(my_bool discard) override; - int reset(); + int extra(ha_extra_function operation) override; - int external_lock(THD *thd, int lock_type); + int reset() override; - int start_stmt(THD *thd, thr_lock_type lock_type); + int external_lock(THD *thd, int lock_type) override; - void position(uchar *record); + int start_stmt(THD *thd, thr_lock_type lock_type) override; ha_rows records_in_range( uint inx, key_range* min_key, - key_range* max_key); + key_range* max_key) override; - ha_rows estimate_rows_upper_bound(); + ha_rows estimate_rows_upper_bound() override; - void update_create_info(HA_CREATE_INFO* create_info); + void update_create_info(HA_CREATE_INFO* create_info) override; inline int create( const char* name, @@ -204,63 +202,57 @@ public: int create( const char* name, TABLE* form, - HA_CREATE_INFO* create_info); - - const char* check_table_options(THD *thd, TABLE* table, - HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format); + HA_CREATE_INFO* create_info) override; inline int delete_table(const char* name, enum_sql_command sqlcom); - int truncate(); + int truncate() override; - int delete_table(const char *name); + int delete_table(const char *name) override; - int rename_table(const char* from, const char* to); + int rename_table(const char* from, const char* to) override; int defragment_table(const char* name, const char* index_name, bool async); - int check(THD* thd, HA_CHECK_OPT* check_opt); - char* update_table_comment(const char* comment); + int check(THD* thd, HA_CHECK_OPT* check_opt) override; + char* update_table_comment(const char* comment) override; - char* get_foreign_key_create_info(); + char* get_foreign_key_create_info() override; - int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list); + int get_foreign_key_list(THD *thd, + List<FOREIGN_KEY_INFO> *f_key_list) override; int get_parent_foreign_key_list( THD* thd, - List<FOREIGN_KEY_INFO>* f_key_list); - int get_cascade_foreign_key_table_list( - THD* thd, - List<st_handler_tablename>* fk_table_list); - + List<FOREIGN_KEY_INFO>* f_key_list) override; - bool can_switch_engines(); + bool can_switch_engines() override; - uint referenced_by_foreign_key(); + uint referenced_by_foreign_key() override; - void free_foreign_key_create_info(char* str); + void free_foreign_key_create_info(char* str) override; - uint lock_count(void) const; + uint lock_count(void) const override; THR_LOCK_DATA** store_lock( THD* thd, THR_LOCK_DATA** to, - thr_lock_type lock_type); + thr_lock_type lock_type) override; - void init_table_handle_for_HANDLER(); + void init_table_handle_for_HANDLER() override; - virtual void get_auto_increment( + void get_auto_increment( ulonglong offset, ulonglong increment, ulonglong nb_desired_values, ulonglong* first_value, - ulonglong* nb_reserved_values); - int reset_auto_increment(ulonglong value); + ulonglong* nb_reserved_values) override; + int reset_auto_increment(ulonglong value) override; - virtual bool get_error_message(int error, String *buf); + bool get_error_message(int error, String *buf) override; - virtual bool get_foreign_dup_key(char*, uint, char*, uint); + bool get_foreign_dup_key(char*, uint, char*, uint) override; - uint8 table_cache_type(); + uint8 table_cache_type() override; /** Ask handler about permission to cache table during query registration @@ -270,11 +262,11 @@ public: const char* table_key, uint key_length, qc_engine_callback* call_back, - ulonglong* engine_data); + ulonglong* engine_data) override; - bool primary_key_is_clustered(); + bool primary_key_is_clustered() override; - int cmp_ref(const uchar* ref1, const uchar* ref2); + int cmp_ref(const uchar* ref1, const uchar* ref2) override; /** On-line ALTER TABLE interface @see handler0alter.cc @{ */ @@ -304,7 +296,7 @@ public: enum_alter_inplace_result check_if_supported_inplace_alter( TABLE* altered_table, - Alter_inplace_info* ha_alter_info); + Alter_inplace_info* ha_alter_info) override; /** Allows InnoDB to update internal structures with concurrent writes blocked (provided that check_if_supported_inplace_alter() @@ -320,7 +312,7 @@ public: */ bool prepare_inplace_alter_table( TABLE* altered_table, - Alter_inplace_info* ha_alter_info); + Alter_inplace_info* ha_alter_info) override; /** Alter the table structure in-place with operations specified using HA_ALTER_FLAGS and Alter_inplace_information. @@ -336,7 +328,7 @@ public: */ bool inplace_alter_table( TABLE* altered_table, - Alter_inplace_info* ha_alter_info); + Alter_inplace_info* ha_alter_info) override; /** Commit or rollback the changes made during prepare_inplace_alter_table() and inplace_alter_table() inside @@ -355,12 +347,12 @@ public: bool commit_inplace_alter_table( TABLE* altered_table, Alter_inplace_info* ha_alter_info, - bool commit); + bool commit) override; /** @} */ bool check_if_incompatible_data( HA_CREATE_INFO* info, - uint table_changes); + uint table_changes) override; /** @name Multi Range Read interface @{ */ @@ -375,11 +367,11 @@ public: void* seq_init_param, uint n_ranges, uint mode, - HANDLER_BUFFER* buf); + HANDLER_BUFFER* buf) override; /** Process next multi range read @see DsMrr_impl::dsmrr_next @param range_info */ - int multi_range_read_next(range_id_t *range_info); + int multi_range_read_next(range_id_t *range_info) override; /** Initialize multi range read and get information. @see ha_myisam::multi_range_read_info_const @@ -398,7 +390,7 @@ public: uint n_ranges, uint* bufsz, uint* flags, - Cost_estimate* cost); + Cost_estimate* cost) override; /** Initialize multi range read and get information. @see DsMrr_impl::dsmrr_info @@ -411,16 +403,16 @@ public: @param cost */ ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, uint key_parts, uint* bufsz, uint* flags, - Cost_estimate* cost); + Cost_estimate* cost) override; int multi_range_read_explain_info(uint mrr_mode, - char *str, size_t size); + char *str, size_t size) override; /** Attempt to push down an index condition. @param[in] keyno MySQL key number @param[in] idx_cond Index condition to be checked @return idx_cond if pushed; NULL if not pushed */ - Item* idx_cond_push(uint keyno, Item* idx_cond); + Item* idx_cond_push(uint keyno, Item* idx_cond) override; /* @} */ /** Check if InnoDB is not storing virtual column metadata for a table. @@ -435,22 +427,19 @@ public: @param[in] pk_filter filter against which primary keys are to be checked @retval false if pushed (always) */ - bool rowid_filter_push(Rowid_filter *rowid_filter); - - bool can_convert_string(const Field_string* field, - const Column_definition& new_field) const; - bool can_convert_varstring(const Field_varstring* field, - const Column_definition& new_field) const; - bool can_convert_blob(const Field_blob* field, - const Column_definition& new_field) const; + bool rowid_filter_push(Rowid_filter *rowid_filter) override; + + bool + can_convert_string(const Field_string* field, + const Column_definition& new_field) const override; + bool can_convert_varstring( + const Field_varstring* field, + const Column_definition& new_field) const override; + bool + can_convert_blob(const Field_blob* field, + const Column_definition& new_field) const override; protected: - /** - MySQL calls this method at the end of each statement. This method - exists for readability only, called from reset(). The name reset() - doesn't give any clue that it is called at the end of a statement. */ - int end_stmt(); - dberr_t innobase_get_autoinc(ulonglong* value); dberr_t innobase_lock_autoinc(); ulonglong innobase_peek_autoinc(); @@ -483,7 +472,7 @@ protected: false if accessing individual fields is enough */ void build_template(bool whole_row); - virtual int info_low(uint, bool); + int info_low(uint, bool); /** The multi range read session object */ DsMrr_impl m_ds_mrr; @@ -955,3 +944,10 @@ ib_push_frm_error( TABLE* table, /*!< in: MySQL table */ ulint n_keys, /*!< in: InnoDB #keys */ bool push_warning); /*!< in: print warning ? */ + +/** Check each index part length whether they not exceed the max limit +@param[in] max_field_len maximum allowed key part length +@param[in] key MariaDB key definition +@return true if index column length exceeds limit */ +MY_ATTRIBUTE((warn_unused_result)) +bool too_big_key_part_length(size_t max_field_len, const KEY& key); diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 08e549629fd..19c35c66885 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -108,7 +108,8 @@ static const alter_table_operations INNOBASE_INPLACE_IGNORE | ALTER_VIRTUAL_GCOL_EXPR | ALTER_DROP_CHECK_CONSTRAINT | ALTER_RENAME - | ALTER_COLUMN_INDEX_LENGTH; + | ALTER_COLUMN_INDEX_LENGTH + | ALTER_CHANGE_INDEX_COMMENT; /** Operations on foreign key definitions (changing the schema only) */ static const alter_table_operations INNOBASE_FOREIGN_OPERATIONS @@ -3989,22 +3990,15 @@ created_clustered: DBUG_RETURN(indexdefs); } -/*******************************************************************//** -Check each index column size, make sure they do not exceed the max limit -@return true if index column size exceeds limit */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_check_column_length( -/*=========================*/ - ulint max_col_len, /*!< in: maximum column length */ - const KEY* key_info) /*!< in: Indexes to be created */ +MY_ATTRIBUTE((warn_unused_result)) +bool too_big_key_part_length(size_t max_field_len, const KEY& key) { - for (ulint key_part = 0; key_part < key_info->user_defined_key_parts; key_part++) { - if (key_info->key_part[key_part].length > max_col_len) { - return(true); + for (ulint i = 0; i < key.user_defined_key_parts; i++) { + if (key.key_part[i].length > max_field_len) { + return true; } } - return(false); + return false; } /********************************************************************//** @@ -7664,7 +7658,7 @@ check_if_ok_to_rename: continue; } - if (innobase_check_column_length(max_col_len, key)) { + if (too_big_key_part_length(max_col_len, *key)) { my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), max_col_len); goto err_exit_no_heap; @@ -8798,13 +8792,13 @@ innobase_drop_foreign_try( } /** Rename a column in the data dictionary tables. -@param[in] user_table InnoDB table that was being altered -@param[in] trx data dictionary transaction -@param[in] table_name Table name in MySQL -@param[in] from old column name -@param[in] to new column name -@param[in] new_clustered whether the table has been rebuilt -@param[in] is_virtual whether it is a virtual column +@param[in] user_table InnoDB table that was being altered +@param[in] trx Data dictionary transaction +@param[in] table_name Table name in MySQL +@param[in] from old column name +@param[in] to new column name +@param[in] new_clustered whether the table has been rebuilt +@param[in] evict_fk_cache Evict the fk info from cache @retval true Failure @retval false Success */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) @@ -8815,7 +8809,8 @@ innobase_rename_column_try( const char* table_name, const char* from, const char* to, - bool new_clustered) + bool new_clustered, + bool evict_fk_cache) { dberr_t error; @@ -8977,7 +8972,8 @@ rename_foreign: } } - if (new_clustered) { + /* Reload the foreign key info for instant table too. */ + if (new_clustered || evict_fk_cache) { std::for_each(fk_evict.begin(), fk_evict.end(), dict_foreign_remove_from_cache); } @@ -9026,7 +9022,9 @@ innobase_rename_columns_try( if (innobase_rename_column_try( ctx->old_table, trx, table_name, cf->field->field_name.str, - cf->field_name.str, true)) { + cf->field_name.str, + ctx->need_rebuild(), + ctx->is_instant())) { return(true); } goto processed_field; @@ -9075,7 +9073,7 @@ static void get_type(const Field& f, ulint& prtype, ulint& mtype, ulint& len) } /** Enlarge a column in the data dictionary tables. -@param user_table InnoDB table that was being altered +@param ctx In-place ALTER TABLE context @param trx data dictionary transaction @param table_name Table name in MySQL @param pos 0-based index to user_table->cols[] or user_table->v_cols[] @@ -9086,7 +9084,7 @@ static void get_type(const Field& f, ulint& prtype, ulint& mtype, ulint& len) static MY_ATTRIBUTE((nonnull, warn_unused_result)) bool innobase_rename_or_enlarge_column_try( - const dict_table_t* user_table, + ha_innobase_inplace_ctx*ctx, trx_t* trx, const char* table_name, ulint pos, @@ -9094,8 +9092,10 @@ innobase_rename_or_enlarge_column_try( bool is_v) { dict_col_t* col; + dict_table_t* user_table = ctx->old_table; DBUG_ENTER("innobase_rename_or_enlarge_column_try"); + DBUG_ASSERT(!ctx->need_rebuild()); DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); @@ -9155,7 +9155,7 @@ innobase_rename_or_enlarge_column_try( if (!same_name && innobase_rename_column_try(user_table, trx, table_name, col_name, f.field_name.str, - false)) { + false, ctx->is_instant())) { DBUG_RETURN(true); } @@ -9213,7 +9213,7 @@ innobase_rename_or_enlarge_columns_try( while (Create_field* cf = cf_it++) { if (cf->field == *fp) { if (innobase_rename_or_enlarge_column_try( - ctx->old_table, trx, table_name, + ctx, trx, table_name, idx, **af, is_v)) { DBUG_RETURN(true); } @@ -9823,74 +9823,6 @@ commit_try_rebuild( index->to_be_dropped = 0; } - /* We copied the table. Any indexes that were requested to be - dropped were not created in the copy of the table. Apply any - last bit of the rebuild log and then rename the tables. */ - - if (ctx->online) { - DEBUG_SYNC_C("row_log_table_apply2_before"); - - dict_vcol_templ_t* s_templ = NULL; - - if (ctx->new_table->n_v_cols > 0) { - s_templ = UT_NEW_NOKEY( - dict_vcol_templ_t()); - s_templ->vtempl = NULL; - - innobase_build_v_templ( - altered_table, ctx->new_table, s_templ, - NULL, true); - ctx->new_table->vc_templ = s_templ; - } - - error = row_log_table_apply( - ctx->thr, user_table, altered_table, - static_cast<ha_innobase_inplace_ctx*>( - ha_alter_info->handler_ctx)->m_stage, - ctx->new_table); - - if (s_templ) { - ut_ad(ctx->need_rebuild()); - dict_free_vc_templ(s_templ); - UT_DELETE(s_templ); - ctx->new_table->vc_templ = NULL; - } - - ulint err_key = thr_get_trx(ctx->thr)->error_key_num; - - switch (error) { - KEY* dup_key; - case DB_SUCCESS: - break; - case DB_DUPLICATE_KEY: - if (err_key == ULINT_UNDEFINED) { - /* This should be the hidden index on - FTS_DOC_ID. */ - dup_key = NULL; - } else { - DBUG_ASSERT(err_key < - ha_alter_info->key_count); - dup_key = &ha_alter_info - ->key_info_buffer[err_key]; - } - print_keydup_error(altered_table, dup_key, MYF(0)); - DBUG_RETURN(true); - case DB_ONLINE_LOG_TOO_BIG: - my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0), - get_error_key_name(err_key, ha_alter_info, - rebuilt_table)); - DBUG_RETURN(true); - case DB_INDEX_CORRUPT: - my_error(ER_INDEX_CORRUPT, MYF(0), - get_error_key_name(err_key, ha_alter_info, - rebuilt_table)); - DBUG_RETURN(true); - default: - my_error_innodb(error, table_name, user_table->flags); - DBUG_RETURN(true); - } - } - if ((ha_alter_info->handler_flags & ALTER_COLUMN_NAME) && innobase_rename_columns_try(ha_alter_info, ctx, old_table, @@ -10641,6 +10573,91 @@ do { \ # define DBUG_INJECT_CRASH(prefix, count) #endif +/** Apply the log for the table rebuild operation. +@param[in] ctx Inplace Alter table context +@param[in] altered_table MySQL table that is being altered +@return true Failure, else false. */ +static bool alter_rebuild_apply_log( + ha_innobase_inplace_ctx* ctx, + Alter_inplace_info* ha_alter_info, + TABLE* altered_table) +{ + DBUG_ENTER("alter_rebuild_apply_log"); + + if (!ctx->online) { + DBUG_RETURN(false); + } + + /* We copied the table. Any indexes that were requested to be + dropped were not created in the copy of the table. Apply any + last bit of the rebuild log and then rename the tables. */ + dict_table_t* user_table = ctx->old_table; + dict_table_t* rebuilt_table = ctx->new_table; + + DEBUG_SYNC_C("row_log_table_apply2_before"); + + dict_vcol_templ_t* s_templ = NULL; + + if (ctx->new_table->n_v_cols > 0) { + s_templ = UT_NEW_NOKEY( + dict_vcol_templ_t()); + s_templ->vtempl = NULL; + + innobase_build_v_templ(altered_table, ctx->new_table, s_templ, + NULL, true); + ctx->new_table->vc_templ = s_templ; + } + + dberr_t error = row_log_table_apply( + ctx->thr, user_table, altered_table, + static_cast<ha_innobase_inplace_ctx*>( + ha_alter_info->handler_ctx)->m_stage, + ctx->new_table); + + if (s_templ) { + ut_ad(ctx->need_rebuild()); + dict_free_vc_templ(s_templ); + UT_DELETE(s_templ); + ctx->new_table->vc_templ = NULL; + } + + ulint err_key = thr_get_trx(ctx->thr)->error_key_num; + + switch (error) { + KEY* dup_key; + case DB_SUCCESS: + break; + case DB_DUPLICATE_KEY: + if (err_key == ULINT_UNDEFINED) { + /* This should be the hidden index on + FTS_DOC_ID. */ + dup_key = NULL; + } else { + DBUG_ASSERT(err_key < ha_alter_info->key_count); + dup_key = &ha_alter_info->key_info_buffer[err_key]; + } + + print_keydup_error(altered_table, dup_key, MYF(0)); + DBUG_RETURN(true); + case DB_ONLINE_LOG_TOO_BIG: + my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0), + get_error_key_name(err_key, ha_alter_info, + rebuilt_table)); + DBUG_RETURN(true); + case DB_INDEX_CORRUPT: + my_error(ER_INDEX_CORRUPT, MYF(0), + get_error_key_name(err_key, ha_alter_info, + rebuilt_table)); + DBUG_RETURN(true); + default: + my_error_innodb(error, ctx->old_table->name.m_name, + user_table->flags); + DBUG_RETURN(true); + } + + DBUG_RETURN(false); +} + /** Commit or rollback the changes made during prepare_inplace_alter_table() and inplace_alter_table() inside the storage engine. Note that the allowed level of concurrency @@ -10785,6 +10802,19 @@ ha_innobase::commit_inplace_alter_table( ut_ad(!ctx->new_table->fts->add_wq); fts_optimize_remove_table(ctx->new_table); } + + /* Apply the online log of the table before acquiring + data dictionary latches. Here alter thread already acquired + MDL_EXCLUSIVE on the table. So there can't be anymore DDLs, DMLs + for the altered table. By applying the log here, InnoDB + makes sure that concurrent DDLs, purge thread or any other + background thread doesn't wait for the dict_operation_lock + for longer time. */ + if (new_clustered && commit + && alter_rebuild_apply_log( + ctx, ha_alter_info, altered_table)) { + DBUG_RETURN(true); + } } if (!trx) { diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 75ba764ddea..a17113fb9f1 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -784,17 +784,17 @@ btr_rec_copy_externally_stored_field( ulint* len, mem_heap_t* heap); -/*******************************************************************//** -Flags the data tuple fields that are marked as extern storage in the +/** Flag the data tuple fields that are marked as extern storage in the update vector. We use this function to remember which fields we must mark as extern storage in a record inserted for an update. +@param[in,out] tuple clustered index record +@param[in] n number of fields in tuple, before any btr_cur_trim() +@param[in] update update vector +@param[in,out] heap memory heap @return number of flagged external columns */ ulint -btr_push_update_extern_fields( -/*==========================*/ - dtuple_t* tuple, /*!< in/out: data tuple */ - const upd_t* update, /*!< in: update vector */ - mem_heap_t* heap) /*!< in: memory heap */ +btr_push_update_extern_fields(dtuple_t* tuple, ulint n, const upd_t* update, + mem_heap_t* heap) MY_ATTRIBUTE((nonnull)); /***********************************************************//** Sets a secondary index record's delete mark to the given value. This diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 10f17537b08..7c36d5b8680 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1904,13 +1904,13 @@ public: HazardPointer(buf_pool, mutex) {} /** Destructor */ - virtual ~FlushHp() {} + ~FlushHp() override {} /** Adjust the value of hp. This happens when some other thread working on the same list attempts to remove the hp from the list. @param bpage buffer block to be compared */ - void adjust(const buf_page_t* bpage); + void adjust(const buf_page_t* bpage) override; }; /** Class implementing buf_pool->LRU hazard pointer */ @@ -1925,13 +1925,13 @@ public: HazardPointer(buf_pool, mutex) {} /** Destructor */ - virtual ~LRUHp() {} + ~LRUHp() override {} /** Adjust the value of hp. This happens when some other thread working on the same list attempts to remove the hp from the list. @param bpage buffer block to be compared */ - void adjust(const buf_page_t* bpage); + void adjust(const buf_page_t* bpage) override; }; /** Special purpose iterators to be used when scanning the LRU list. @@ -1949,7 +1949,7 @@ public: LRUHp(buf_pool, mutex) {} /** Destructor */ - virtual ~LRUItr() {} + ~LRUItr() override {} /** Selects from where to start a scan. If we have scanned too deep into the LRU list it resets the value to the tail diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index bf115c0dbeb..99b6971f603 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -826,6 +826,18 @@ dict_table_has_atomic_blobs(const dict_table_t* table) return(DICT_TF_HAS_ATOMIC_BLOBS(table->flags)); } +/** @return potential max length stored inline for externally stored fields */ +inline size_t dict_table_t::get_overflow_field_local_len() const +{ + if (dict_table_has_atomic_blobs(this)) { + /* ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED: do not + store any BLOB prefix locally */ + return BTR_EXTERN_FIELD_REF_SIZE; + } + /* up to MySQL 5.1: store a 768-byte prefix locally */ + return BTR_EXTERN_FIELD_REF_SIZE + DICT_ANTELOPE_MAX_INDEX_COL_LEN; +} + /** Set the various values in a dict_table_t::flags pointer. @param[in,out] flags, Pointer to a 4 byte Table Flags @param[in] format, File Format diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index ca2e5a5c52b..41c5c2220a4 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -1274,6 +1274,9 @@ void dict_free_vc_templ( dict_vcol_templ_t* vc_templ) { + UT_DELETE_ARRAY(vc_templ->default_rec); + vc_templ->default_rec = NULL; + if (vc_templ->vtempl != NULL) { ut_ad(vc_templ->n_v_col > 0); for (ulint i = 0; i < vc_templ->n_col diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index f507007fee9..e72b24da9f5 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1214,6 +1214,12 @@ struct dict_index_t { bool vers_history_row(const rec_t* rec, bool &history_row); + /** If a record of this index might not fit on a single B-tree page, + return true. + @param[in] strict issue error or warning + @return true if the index record could become too big */ + bool rec_potentially_too_big(bool strict) const; + /** Reconstruct the clustered index fields. */ inline void reconstruct_fields(); @@ -1776,6 +1782,9 @@ struct dict_table_t { ut_ad(fk_checks > 0); } + /** For overflow fields returns potential max length stored inline */ + inline size_t get_overflow_field_local_len() const; + private: /** Initialize instant->field_map. @tparam replace_dropped whether to point clustered index fields @@ -1979,7 +1988,7 @@ public: unsigned stat_initialized:1; /** Timestamp of last recalc of the stats. */ - ib_time_t stats_last_recalc; + time_t stats_last_recalc; /** The two bits below are set in the 'stat_persistent' member. They have the following meaning: diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h index 5bd921b1b8b..ab001130364 100644 --- a/storage/innobase/include/dict0stats.h +++ b/storage/innobase/include/dict0stats.h @@ -216,7 +216,7 @@ rolled back only in the case of error, but not freed. dberr_t dict_stats_save_index_stat( dict_index_t* index, - ib_time_t last_update, + time_t last_update, const char* stat_name, ib_uint64_t stat_value, ib_uint64_t* sample_size, diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h index 8c5b24fbadb..15485769429 100644 --- a/storage/innobase/include/fsp0file.h +++ b/storage/innobase/include/fsp0file.h @@ -504,13 +504,13 @@ public: /* No op - base constructor is called. */ } - ~RemoteDatafile() + ~RemoteDatafile() override { shutdown(); } /** Release the resources. */ - void shutdown(); + void shutdown() override; /** Get the link filepath. @return m_link_filepath */ @@ -532,7 +532,7 @@ public: in read-only mode so that it can be validated. @param[in] strict whether to issue error messages @return DB_SUCCESS or error code */ - dberr_t open_read_only(bool strict); + dberr_t open_read_only(bool strict) override; /** Opens a handle to the file linked to in an InnoDB Symbolic Link file in read-write mode so that it can be restored from doublewrite @@ -540,7 +540,7 @@ public: @param[in] read_only_mode If true, then readonly mode checks are enforced. @return DB_SUCCESS or error code */ - dberr_t open_read_write(bool read_only_mode) + dberr_t open_read_write(bool read_only_mode) override MY_ATTRIBUTE((warn_unused_result)); /****************************************************************** diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h index d3a79ec23a9..bcb8dd5e5e9 100644 --- a/storage/innobase/include/fsp0sysspace.h +++ b/storage/innobase/include/fsp0sysspace.h @@ -49,7 +49,7 @@ public: /* No op */ } - ~SysTablespace() + ~SysTablespace() override { shutdown(); } diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index b5c81250c28..07be853efad 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -780,16 +780,9 @@ fts_drop_orphaned_tables(void); /** Run SYNC on the table, i.e., write out data from the cache to the FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] table fts table -@param[in] unlock_cache whether unlock cache when write node -@param[in] wait whether wait for existing sync to finish -@param[in] has_dict whether has dict operation lock +@param[in] wait whether to wait for existing sync to finish @return DB_SUCCESS on success, error code on failure. */ -dberr_t -fts_sync_table( - dict_table_t* table, - bool unlock_cache, - bool wait, - bool has_dict); +dberr_t fts_sync_table(dict_table_t* table, bool wait = true); /****************************************************************//** Free the query graph but check whether dict_sys.mutex is already diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h index 26f18cc3d1d..a08a60b9e95 100644 --- a/storage/innobase/include/fts0types.h +++ b/storage/innobase/include/fts0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -123,7 +123,8 @@ struct fts_sync_t { doc_id_t max_doc_id; /*!< The doc id at which the cache was noted as being full, we use this to set the upper_limit field */ - ib_time_t start_time; /*!< SYNC start time */ + time_t start_time; /*!< SYNC start time; only used if + fts_enable_diag_print */ bool in_progress; /*!< flag whether sync is in progress.*/ bool unlock_cache; /*!< flag whether unlock cache when write fts node */ diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index b529f37d76e..f37dff44b2f 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -361,14 +361,6 @@ thd_trx_is_auto_commit( /*===================*/ THD* thd); /*!< in: thread handle, or NULL */ -/******************************************************************//** -Get the thread start time. -@return the thread start time in seconds since the epoch. */ -ulint -thd_start_time_in_secs( -/*===================*/ - THD* thd); /*!< in: thread handle, or NULL */ - /*****************************************************************//** A wrapper function of innobase_convert_name(), convert a table name to the MySQL system_charset_info (UTF-8) and quote it if needed. diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h index c121ada4bfd..960cafe5cdb 100644 --- a/storage/innobase/include/ib0mutex.h +++ b/storage/innobase/include/ib0mutex.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,8 +29,8 @@ Created 2013-03-26 Sunny Bains. #ifndef ib0mutex_h #define ib0mutex_h -#include "ut0ut.h" -#include "ut0rnd.h" +#include "my_atomic.h" +#include "my_cpu.h" #include "os0event.h" #include "sync0arr.h" diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 829e932172a..cf785e2ad21 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -566,11 +566,10 @@ lock_print_info_summary( /** Prints transaction lock wait and MVCC state. @param[in,out] file file where to print -@param[in] trx transaction */ +@param[in] trx transaction +@param[in] now current time */ void -lock_trx_print_wait_and_mvcc_state( - FILE* file, - const trx_t* trx); +lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now); /*********************************************************************//** Prints info of locks for each transaction. This function assumes that the diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h index bdd03c49554..cb04afdf9db 100644 --- a/storage/innobase/include/lock0types.h +++ b/storage/innobase/include/lock0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, MariaDB Corporation. +Copyright (c) 2018, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -190,10 +190,14 @@ struct ib_lock_t lock. The link node in a singly linked list, used during hashing. */ - /* Statistics for how long lock has been held and time - how long this lock had to be waited before it was granted */ - time_t requested_time; /*!< Lock request time */ - ulint wait_time; /*!< Time waited this lock or 0 */ + /** time(NULL) of the lock request creation. + Used for computing wait_time and diagnostics only. + Note: bogus durations may be reported + when the system time is adjusted! */ + time_t requested_time; + /** Cumulated wait time in seconds. + Note: may be bogus when the system time is adjusted! */ + ulint wait_time; union { lock_table_t tab_lock;/*!< table lock */ diff --git a/storage/innobase/include/log0crypt.h b/storage/innobase/include/log0crypt.h index c54a369ff47..8d26ccb2ba3 100644 --- a/storage/innobase/include/log0crypt.h +++ b/storage/innobase/include/log0crypt.h @@ -96,7 +96,6 @@ bool log_crypt(byte* buf, lsn_t lsn, ulint size, log_crypt_t op = LOG_ENCRYPT); @param[in] size size of the block @param[out] dst destination block @param[in] offs offset to block -@param[in] space_id tablespace id @param[in] encrypt true=encrypt; false=decrypt @return whether the operation succeeded */ UNIV_INTERN @@ -106,7 +105,6 @@ log_tmp_block_encrypt( ulint size, byte* dst, uint64_t offs, - ulint space_id, bool encrypt = true) MY_ATTRIBUTE((warn_unused_result, nonnull)); @@ -115,7 +113,6 @@ log_tmp_block_encrypt( @param[in] size size of the block @param[out] dst destination block @param[in] offs offset to block -@param[in] space_id tablespace id @return whether the operation succeeded */ inline bool @@ -123,10 +120,9 @@ log_tmp_block_decrypt( const byte* src, ulint size, byte* dst, - uint64_t offs, - ulint space_id) + uint64_t offs) { - return(log_tmp_block_encrypt(src, size, dst, offs, space_id, false)); + return(log_tmp_block_encrypt(src, size, dst, offs, false)); } /** @return whether temporary files are encrypted */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 564931b6f3a..7cd05b2f755 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -232,7 +232,7 @@ struct recv_sys_t{ /*!< the LSN of a MLOG_CHECKPOINT record, or 0 if none was parsed */ /** the time when progress was last reported */ - ib_time_t progress_time; + time_t progress_time; mem_heap_t* heap; /*!< memory heap of log records and file addresses*/ @@ -311,7 +311,7 @@ struct recv_sys_t{ @param[in] time the current time @return whether progress should be reported (the last report was at least 15 seconds ago) */ - bool report(ib_time_t time) + bool report(time_t time) { if (time - progress_time < 15) { return false; diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h index fa4c2526ae3..6d0f95cba19 100644 --- a/storage/innobase/include/mem0mem.h +++ b/storage/innobase/include/mem0mem.h @@ -73,7 +73,7 @@ allocations of small buffers. */ /** If a memory heap is allowed to grow into the buffer pool, the following is the maximum size for a single allocated buffer: */ -#define MEM_MAX_ALLOC_IN_BUF (srv_page_size - 200) +#define MEM_MAX_ALLOC_IN_BUF (srv_page_size - 200 + REDZONE_SIZE) /** Space needed when allocating for a user a field of length N. The space is allocated only in multiples of UNIV_MEM_ALIGNMENT. */ diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic index e00e814571c..fae2aaf4d04 100644 --- a/storage/innobase/include/mem0mem.ic +++ b/storage/innobase/include/mem0mem.ic @@ -183,13 +183,15 @@ mem_heap_alloc( ulint n) { mem_block_t* block; - void* buf; + byte* buf; ulint free; ut_d(mem_block_validate(heap)); block = UT_LIST_GET_LAST(heap->base); + n += REDZONE_SIZE; + ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF)); /* Check if there is enough space in block. If not, create a new @@ -212,7 +214,8 @@ mem_heap_alloc( mem_block_set_free(block, free + MEM_SPACE_NEEDED(n)); - TRASH_ALLOC(buf, n); + buf = buf + REDZONE_SIZE; + UNIV_MEM_ALLOC(buf, n - REDZONE_SIZE); return(buf); } @@ -342,6 +345,8 @@ mem_heap_free_top( ut_d(mem_block_validate(heap)); + n += REDZONE_SIZE; + block = UT_LIST_GET_LAST(heap->base); /* Subtract the free field of block */ diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index 0a596e50ac7..eae636d6b7d 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -1293,7 +1293,7 @@ public: } /** Destructor */ - virtual ~rec_printer() {} + ~rec_printer() override {} private: /** Copy constructor */ diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h index b2c5651c9c5..beb2f8c2bfb 100644 --- a/storage/innobase/include/row0ftsort.h +++ b/storage/innobase/include/row0ftsort.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -59,6 +59,8 @@ struct fts_psort_t; struct fts_psort_common_t { row_merge_dup_t* dup; /*!< descriptor of FTS index */ dict_table_t* new_table; /*!< source table */ + /** Old table page size */ + ulint old_zip_size; trx_t* trx; /*!< transaction */ fts_psort_t* all_info; /*!< all parallel sort info */ os_event_t sort_event; /*!< sort event */ @@ -190,26 +192,27 @@ row_merge_create_fts_sort_index( instead of 8 bytes integer to store Doc ID during sort */ -/********************************************************************//** -Initialize FTS parallel sort structures. -@return TRUE if all successful */ -ibool +/** Initialize FTS parallel sort structures. +@param[in] trx transaction +@param[in,out] dup descriptor of FTS index being created +@param[in] new_table table where indexes are created +@param[in] opt_doc_id_size whether to use 4 bytes instead of 8 bytes + integer to store Doc ID during sort +@param[in] old_zip_size page size of the old table during alter +@param[out] psort parallel sort info to be instantiated +@param[out] merge parallel merge info to be instantiated +@return true if all successful */ +bool row_fts_psort_info_init( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - row_merge_dup_t* dup, /*!< in,own: descriptor of - FTS index being created */ - const dict_table_t* new_table,/*!< in: table where indexes are - created */ - ibool opt_doc_id_size, - /*!< in: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort */ - fts_psort_t** psort, /*!< out: parallel sort info to be - instantiated */ - fts_psort_t** merge) /*!< out: parallel merge info - to be instantiated */ + trx_t* trx, + row_merge_dup_t*dup, + dict_table_t* new_table, + bool opt_doc_id_size, + ulint old_zip_size, + fts_psort_t** psort, + fts_psort_t** merge) MY_ATTRIBUTE((nonnull)); + /********************************************************************//** Clean up and deallocate FTS parallel sort structures, and close temparary merge sort files */ diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index 5651c70a1ba..5a4d424981e 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -2,7 +2,7 @@ Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2018, MariaDB Corporation. +Copyright (c) 2013, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -64,9 +64,9 @@ create the internal counter ID in "monitor_id_t". */ /** Structure containing the actual values of a monitor counter. */ struct monitor_value_t { - ib_time_t mon_start_time; /*!< Start time of monitoring */ - ib_time_t mon_stop_time; /*!< Stop time of monitoring */ - ib_time_t mon_reset_time; /*!< Time counter resetted */ + time_t mon_start_time; /*!< Start time of monitoring */ + time_t mon_stop_time; /*!< Stop time of monitoring */ + time_t mon_reset_time; /*!< Time of resetting the counter */ mon_type_t mon_value; /*!< Current counter Value */ mon_type_t mon_max_value; /*!< Current Max value */ mon_type_t mon_min_value; /*!< Current Min value */ @@ -719,8 +719,8 @@ monitor counter #define MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value) \ MONITOR_CHECK_DEFINED(value); \ if (MONITOR_IS_ON(monitor)) { \ - uintmax_t old_time = (value); \ - value = ut_time_us(NULL); \ + uintmax_t old_time = value; \ + value = microsecond_interval_timer(); \ MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\ } diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 8e4d8f4e835..d6188362e9a 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -189,6 +189,12 @@ struct srv_stats_t /** Number of spaces in keyrotation list */ ulint_ctr_64_t key_rotation_list_length; + /** Number of temporary tablespace blocks encrypted */ + ulint_ctr_64_t n_temp_blocks_encrypted; + + /** Number of temporary tablespace blocks decrypted */ + ulint_ctr_64_t n_temp_blocks_decrypted; + /** Number of lock deadlocks */ ulint_ctr_1_t lock_deadlock_count; }; @@ -472,6 +478,9 @@ extern ulong srv_max_purge_lag; extern ulong srv_max_purge_lag_delay; extern ulong srv_replication_delay; + +extern my_bool innodb_encrypt_temporary_tables; + /*-------------------------------------------*/ /** Modes of operation */ @@ -1038,6 +1047,12 @@ struct export_var_t{ /*!< Number of row log blocks decrypted */ ib_int64_t innodb_n_rowlog_blocks_decrypted; + /* Number of temporary tablespace pages encrypted */ + ib_int64_t innodb_n_temp_blocks_encrypted; + + /* Number of temporary tablespace pages decrypted */ + ib_int64_t innodb_n_temp_blocks_decrypted; + ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */ ulint innodb_sec_rec_cluster_reads_avoided;/*!< srv_sec_rec_cluster_reads_avoided */ @@ -1067,10 +1082,14 @@ struct srv_slot_t{ ibool suspended; /*!< TRUE if the thread is waiting for the event of this slot */ - ib_time_t suspend_time; /*!< time when the thread was - suspended. Initialized by - lock_wait_table_reserve_slot() - for lock wait */ + /** time(NULL) when the thread was suspended. + FIXME: Use my_interval_timer() or similar, to avoid bogus + timeouts in lock_wait_check_and_cancel() or lock_wait_suspend_thread() + when the system time is adjusted to the past! + + FIXME: This is duplicating trx_lock_t::wait_started, + which is being used for diagnostic purposes only. */ + time_t suspend_time; ulong wait_timeout; /*!< wait time that if exceeded the thread will be timed out. Initialized by diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index 556c5d991d4..bf47cb8fe88 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -625,7 +625,7 @@ struct rw_lock_t #endif /* UNIV_PFS_RWLOCK */ #ifdef UNIV_DEBUG - virtual std::string to_string() const; + std::string to_string() const override; /** In the debug version: pointer to the debug info list of the lock */ UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list; diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h index 142a0d72e5b..e2b5354d2de 100644 --- a/storage/innobase/include/sync0types.h +++ b/storage/innobase/include/sync0types.h @@ -1038,7 +1038,7 @@ struct sync_checker : public sync_check_functor_t /** Check the latching constraints @param[in] level The level held by the thread @return whether a latch violation was detected */ - bool operator()(const latch_level_t level) const + bool operator()(const latch_level_t level) const override { if (some_allowed) { switch (level) { @@ -1082,7 +1082,7 @@ struct sync_allowed_latches : public sync_check_functor_t { @param[in] latch The latch level to check @return true if there is a latch violation */ - bool operator()(const latch_level_t level) const + bool operator()(const latch_level_t level) const override { return(std::find(begin, end, level) == end); } diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h index 47b7b740732..4eab97c0b02 100644 --- a/storage/innobase/include/trx0i_s.h +++ b/storage/innobase/include/trx0i_s.h @@ -117,12 +117,12 @@ struct i_s_trx_row_t { trx_id_t trx_id; /*!< transaction identifier */ const char* trx_state; /*!< transaction state from trx_get_que_state_str() */ - ib_time_t trx_started; /*!< trx_t::start_time */ + time_t trx_started; /*!< trx_t::start_time */ const i_s_locks_row_t* requested_lock_row; /*!< pointer to a row in innodb_locks if trx is waiting, or NULL */ - ib_time_t trx_wait_started; /*!< trx_t::wait_started */ + time_t trx_wait_started; /*!< trx_t->lock.wait_started */ uintmax_t trx_weight; /*!< TRX_WEIGHT() */ ulint trx_mysql_thread_id; /*!< thd_get_thread_id() */ const char* trx_query; /*!< MySQL statement being diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index f9d7cde29b1..d37b51036a4 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -927,10 +927,11 @@ public: on dict_sys.latch. Protected by dict_sys.latch. */ - time_t start_time; /*!< time the state last time became - TRX_STATE_ACTIVE */ - ib_uint64_t start_time_micro; /*!< start time of transaction in - microseconds */ + /** wall-clock time of the latest transition to TRX_STATE_ACTIVE; + used for diagnostic purposes only */ + time_t start_time; + /** microsecond_interval_timer() of transaction start */ + ulonglong start_time_micro; lsn_t commit_lsn; /*!< lsn at the time of the commit */ table_id_t table_id; /*!< Table to drop iff dict_operation == TRX_DICT_OP_TABLE, or 0. */ diff --git a/storage/innobase/include/ut0timer.h b/storage/innobase/include/ut0timer.h deleted file mode 100644 index 376af3cf0ef..00000000000 --- a/storage/innobase/include/ut0timer.h +++ /dev/null @@ -1,67 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/ut0timer.h -Timer routines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ -#ifndef ut0timer_h -#define ut0timer_h - -#include "univ.i" - -/* Current timer stats */ -extern struct my_timer_unit_info ut_timer; - -/**************************************************************//** -Function pointer to point selected timer function. -@return timer current value */ -extern ulonglong (*ut_timer_now)(void); - -/**************************************************************//** -Sets up the data required for use of my_timer_* functions. -Selects the best timer by high frequency, and tight resolution. -Points my_timer_now() to the selected timer function. -Initializes my_timer struct to contain the info for selected timer.*/ -UNIV_INTERN -void ut_init_timer(void); - -/**************************************************************//** -Convert native timer units in a ulonglong into microseconds in a double -@return time in microseconds */ -UNIV_INLINE -double -ut_timer_to_microseconds( -/*=====================*/ - ulonglong when); /*!< in: time where to calculate */ -/**************************************************************//** -Convert microseconds in a double to native timer units in a ulonglong -@return time in microseconds */ -UNIV_INLINE -ulonglong -ut_microseconds_to_timer( -/*=====================*/ - ulonglong when); /*!< in: time where to calculate */ - -#include "ut0timer.ic" - -#endif diff --git a/storage/innobase/include/ut0timer.ic b/storage/innobase/include/ut0timer.ic deleted file mode 100644 index 26cf0bd2fbe..00000000000 --- a/storage/innobase/include/ut0timer.ic +++ /dev/null @@ -1,56 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/ut0timer.ic -Timer routines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ - -/**************************************************************//** -Convert native timer units in a ulonglong into microseconds in a double -@return time in microseconds */ -UNIV_INLINE -double -ut_timer_to_microseconds( -/*=====================*/ - ulonglong when) /*!< in: time where to calculate */ -{ - double ret = (double)(when); - ret *= 1000000.0; - ret /= (double)(ut_timer.frequency); - return ret; -} - -/**************************************************************//** -Convert microseconds in a double to native timer units in a ulonglong -@return time in microseconds */ -UNIV_INLINE -ulonglong -ut_microseconds_to_timer( -/*=====================*/ - ulonglong when) /*!< in: time where to calculate */ -{ - double ret = (double)when; - ret *= (double)(ut_timer.frequency); - ret /= 1000000.0; - return (ulonglong)ret; -} diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index c53b08b64b8..410d2ead738 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -50,33 +50,6 @@ Created 1/20/1994 Heikki Tuuri /** Index name prefix in fast index creation, as a string constant */ #define TEMP_INDEX_PREFIX_STR "\377" -/** Time stamp */ -typedef time_t ib_time_t; - -#if defined (__GNUC__) -# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory") -#elif defined (_MSC_VER) -# define UT_COMPILER_BARRIER() _ReadWriteBarrier() -#else -# define UT_COMPILER_BARRIER() -#endif - -/*********************************************************************//** -Delays execution for at most max_wait_us microseconds or returns earlier -if cond becomes true. -@param cond in: condition to wait for; evaluated every 2 ms -@param max_wait_us in: maximum delay to wait, in microseconds */ -# define UT_WAIT_FOR(cond, max_wait_us) \ -do { \ - uintmax_t start_us; \ - start_us = ut_time_us(NULL); \ - while (!(cond) \ - && ut_time_us(NULL) - start_us < (max_wait_us)) {\ - \ - os_thread_sleep(2000 /* 2 ms */); \ - } \ -} while (0) - #define ut_max std::max #define ut_min std::min @@ -173,44 +146,6 @@ ut_2_power_up( MY_ATTRIBUTE((const)); /**********************************************************//** -Returns system time. We do not specify the format of the time returned: -the only way to manipulate it is to use the function ut_difftime. -@return system time */ -ib_time_t -ut_time(void); -/*=========*/ - -/**********************************************************//** -Returns system time. -Upon successful completion, the value 0 is returned; otherwise the -value -1 is returned and the global variable errno is set to indicate the -error. -@return 0 on success, -1 otherwise */ -int -ut_usectime( -/*========*/ - ulint* sec, /*!< out: seconds since the Epoch */ - ulint* ms); /*!< out: microseconds since the Epoch+*sec */ - -/**********************************************************//** -Returns the number of microseconds since epoch. Similar to -time(3), the return value is also stored in *tloc, provided -that tloc is non-NULL. -@return us since epoch */ -uintmax_t -ut_time_us( -/*=======*/ - uintmax_t* tloc); /*!< out: us since epoch, if non-NULL */ -/**********************************************************//** -Returns the number of milliseconds since some epoch. The -value may wrap around. It should only be used for heuristic -purposes. -@return ms since epoch */ -ulint -ut_time_ms(void); -/*============*/ - -/**********************************************************//** Returns the number of milliseconds since some epoch. The value may wrap around. It should only be used for heuristic purposes. @@ -218,16 +153,6 @@ purposes. ulint ut_time_ms(void); /*============*/ - -/**********************************************************//** -Returns the difference of two times in seconds. -@return time2 - time1 expressed in seconds */ -double -ut_difftime( -/*========*/ - ib_time_t time2, /*!< in: time */ - ib_time_t time1); /*!< in: time */ - #endif /* !UNIV_INNOCHECKSUM */ /** Determine how many bytes (groups of 8 bits) are needed to @@ -269,14 +194,7 @@ void ut_sprintf_timestamp( /*=================*/ char* buf); /*!< in: buffer where to sprintf */ -/*************************************************************//** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. -@return dummy value */ -void -ut_delay( -/*=====*/ - ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */ + /*************************************************************//** Prints the contents of a memory buffer in hex and ascii. */ void diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h index 008ea2a70dd..6a096a36894 100644 --- a/storage/innobase/include/ut0wqueue.h +++ b/storage/innobase/include/ut0wqueue.h @@ -84,7 +84,7 @@ ib_wqueue_timedwait( /*================*/ /* out: work item or NULL on timeout*/ ib_wqueue_t* wq, /* in: work queue */ - ib_time_t wait_in_usecs); /* in: wait time in micro seconds */ + ulint wait_in_usecs); /* in: wait time in micro seconds */ /******************************************************************** Return first item on work queue or NULL if queue is empty diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index b397735e997..7e7558aff32 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -72,44 +72,39 @@ extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); extern "C" int thd_need_wait_reports(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); -/** Print info of a table lock. +/** Pretty-print a table lock. @param[in,out] file output stream @param[in] lock table lock */ -static -void -lock_table_print(FILE* file, const lock_t* lock); +static void lock_table_print(FILE* file, const lock_t* lock); -/** Print info of a record lock. +/** Pretty-print a record lock. @param[in,out] file output stream -@param[in] lock record lock */ -static -void -lock_rec_print(FILE* file, const lock_t* lock); +@param[in] lock record lock +@param[in,out] mtr mini-transaction for accessing the record */ +static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr); /** Deadlock checker. */ class DeadlockChecker { public: - /** Checks if a joining lock request results in a deadlock. If - a deadlock is found this function will resolve the deadlock - by choosing a victim transaction and rolling it back. It - will attempt to resolve all deadlocks. The returned transaction - id will be the joining transaction id or 0 if some other - transaction was chosen as a victim and rolled back or no - deadlock found. - - @param lock lock the transaction is requesting - @param trx transaction requesting the lock - - @return id of transaction chosen as victim or 0 */ - static const trx_t* check_and_resolve( - const lock_t* lock, - trx_t* trx); + /** Check if a joining lock request results in a deadlock. + If a deadlock is found, we will resolve the deadlock by + choosing a victim transaction and rolling it back. + We will attempt to resolve all deadlocks. + + @param[in] lock the lock request + @param[in,out] trx transaction requesting the lock + + @return trx if it was chosen as victim + @retval NULL if another victim was chosen, + or there is no deadlock (any more) */ + static const trx_t* check_and_resolve(const lock_t* lock, trx_t* trx); private: /** Do a shallow copy. Default destructor OK. @param trx the start transaction (start node) @param wait_lock lock that a transaction wants - @param mark_start visited node counter */ + @param mark_start visited node counter + @param report_waiters whether to call thd_rpl_deadlock_check() */ DeadlockChecker( const trx_t* trx, const lock_t* wait_lock, @@ -751,11 +746,12 @@ lock_rec_has_to_wait( thread, we need to look at trx ordering and lock types */ if (wsrep_thd_is_BF(trx->mysql_thd, FALSE) && wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) { + mtr_t mtr; if (wsrep_debug) { ib::info() << "BF-BF lock conflict, locking: " << for_locking; - lock_rec_print(stderr, lock2); + lock_rec_print(stderr, lock2, mtr); ib::info() << " SQL1: " << wsrep_thd_query(trx->mysql_thd) << " SQL2: " @@ -777,7 +773,7 @@ lock_rec_has_to_wait( << " locked " << wsrep_thd_transaction_state_str( lock2->trx->mysql_thd); - lock_rec_print(stderr, lock2); + lock_rec_print(stderr, lock2, mtr); ib::info() << " SQL1: " << wsrep_thd_query(trx->mysql_thd) << " SQL2: " @@ -1100,6 +1096,7 @@ wsrep_kill_victim( } my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE); + mtr_t mtr; if ((!bf_other) || (wsrep_thd_order_before( @@ -1127,7 +1124,7 @@ wsrep_kill_victim( ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:"; if (lock_get_type(lock) == LOCK_REC) { - lock_rec_print(stderr, lock); + lock_rec_print(stderr, lock, mtr); } else { lock_table_print(stderr, lock); } @@ -1293,6 +1290,7 @@ wsrep_print_wait_locks( lock_t* c_lock) /* conflicting lock to print */ { if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) { + mtr_t mtr; ib::info() << "WSREP: c_lock != wait lock"; ib::info() << " SQL: " << wsrep_thd_query(c_lock->trx->mysql_thd); @@ -1300,13 +1298,14 @@ wsrep_print_wait_locks( if (lock_get_type_low(c_lock) & LOCK_TABLE) { lock_table_print(stderr, c_lock); } else { - lock_rec_print(stderr, c_lock); + lock_rec_print(stderr, c_lock, mtr); } if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) { lock_table_print(stderr, c_lock->trx->lock.wait_lock); } else { - lock_rec_print(stderr, c_lock->trx->lock.wait_lock); + lock_rec_print(stderr, c_lock->trx->lock.wait_lock, + mtr); } } } @@ -1519,11 +1518,7 @@ If only one of them is a wait lock, it has lower priority. If either is a high priority transaction, the lock has higher priority. Otherwise, the one with an older transaction has higher priority. @returns true if lock1 has higher priority, false otherwise. */ -static -bool -has_higher_priority( - lock_t *lock1, - lock_t *lock2) +static bool has_higher_priority(lock_t *lock1, lock_t *lock2) { if (lock1 == NULL) { return false; @@ -1730,10 +1725,7 @@ lock_rec_enqueue_waiting( lock_prdt_set_prdt(lock, prdt); } - if ( -#ifdef UNIV_DEBUG - const trx_t* victim = -#endif + if (ut_d(const trx_t* victim =) DeadlockChecker::check_and_resolve(lock, trx)) { ut_ad(victim == trx); lock_reset_lock_and_trx_wait(lock); @@ -1757,7 +1749,7 @@ lock_rec_enqueue_waiting( trx->lock.que_state = TRX_QUE_LOCK_WAIT; trx->lock.was_chosen_as_deadlock_victim = false; - trx->lock.wait_started = ut_time(); + trx->lock.wait_started = time(NULL); ut_a(que_thr_stop(thr)); @@ -2067,12 +2059,13 @@ lock_rec_has_to_wait_in_queue( if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) && wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) { if (wsrep_debug) { + mtr_t mtr; ib::info() << "WSREP: waiting BF trx: " << ib::hex(wait_lock->trx->id) << " query: " << wsrep_thd_query(wait_lock->trx->mysql_thd); - lock_rec_print(stderr, wait_lock); + lock_rec_print(stderr, wait_lock, mtr); ib::info() << "WSREP: do not wait another BF trx: " << ib::hex(lock->trx->id) << " query: " << wsrep_thd_query(lock->trx->mysql_thd); - lock_rec_print(stderr, lock); + lock_rec_print(stderr, lock, mtr); } /* don't wait for another BF lock */ continue; @@ -3768,7 +3761,7 @@ lock_table_enqueue_waiting( ); const trx_t* victim_trx = - DeadlockChecker::check_and_resolve(lock, trx); + DeadlockChecker::check_and_resolve(lock, trx); if (victim_trx != 0) { ut_ad(victim_trx == trx); @@ -3789,7 +3782,7 @@ lock_table_enqueue_waiting( trx->lock.que_state = TRX_QUE_LOCK_WAIT; - trx->lock.wait_started = ut_time(); + trx->lock.wait_started = time(NULL); trx->lock.was_chosen_as_deadlock_victim = false; ut_a(que_thr_stop(thr)); @@ -4427,20 +4420,14 @@ lock_table_print(FILE* file, const lock_t* lock) putc('\n', file); } -/** Print info of a record lock. +/** Pretty-print a record lock. @param[in,out] file output stream -@param[in] lock record lock */ -static -void -lock_rec_print(FILE* file, const lock_t* lock) +@param[in] lock record lock +@param[in,out] mtr mini-transaction for accessing the record */ +static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr) { ulint space; ulint page_no; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); ut_ad(lock_mutex_own()); ut_a(lock_get_type_low(lock) == LOCK_REC); @@ -4480,13 +4467,16 @@ lock_rec_print(FILE* file, const lock_t* lock) fputs(" waiting", file); } - mtr_start(&mtr); - putc('\n', file); - const buf_block_t* block; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + rec_offs_init(offsets_); - block = buf_page_try_get(page_id_t(space, page_no), &mtr); + mtr.start(); + const buf_block_t* block = buf_page_try_get(page_id_t(space, page_no), + &mtr); for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) { @@ -4515,9 +4505,9 @@ lock_rec_print(FILE* file, const lock_t* lock) putc('\n', file); } - mtr_commit(&mtr); + mtr.commit(); - if (heap) { + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } } @@ -4620,11 +4610,10 @@ lock_print_info_summary( /** Prints transaction lock wait and MVCC state. @param[in,out] file file where to print -@param[in] trx transaction */ +@param[in] trx transaction +@param[in] now current time */ void -lock_trx_print_wait_and_mvcc_state( - FILE* file, - const trx_t* trx) +lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now) { fprintf(file, "---"); @@ -4644,10 +4633,11 @@ lock_trx_print_wait_and_mvcc_state( fprintf(file, "------- TRX HAS BEEN WAITING %lu SEC" " FOR THIS LOCK TO BE GRANTED:\n", - (ulong) difftime(ut_time(), trx->lock.wait_started)); + (ulong) difftime(now, trx->lock.wait_started)); if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) { - lock_rec_print(file, trx->lock.wait_lock); + mtr_t mtr; + lock_rec_print(file, trx->lock.wait_lock, mtr); } else { lock_table_print(file, trx->lock.wait_lock); } @@ -4665,6 +4655,7 @@ lock_trx_print_locks( FILE* file, /*!< in/out: File to write */ const trx_t* trx) /*!< in: current transaction */ { + mtr_t mtr; uint32_t i= 0; /* Iterate over the transaction's locks. */ for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); @@ -4672,7 +4663,7 @@ lock_trx_print_locks( lock = UT_LIST_GET_NEXT(trx_locks, lock)) { if (lock_get_type_low(lock) == LOCK_REC) { - lock_rec_print(file, lock); + lock_rec_print(file, lock, mtr); } else { ut_ad(lock_get_type_low(lock) & LOCK_TABLE); @@ -4693,20 +4684,21 @@ lock_trx_print_locks( /** Functor to display all transactions */ struct lock_print_info { - lock_print_info(FILE* file) : file(file) {} + lock_print_info(FILE* file, time_t now) : file(file), now(now) {} void operator()(const trx_t* trx) const { ut_ad(mutex_own(&trx_sys.mutex)); if (trx == purge_sys.query->trx) return; - lock_trx_print_wait_and_mvcc_state(file, trx); + lock_trx_print_wait_and_mvcc_state(file, trx, now); if (trx->will_lock && srv_print_innodb_lock_monitor) lock_trx_print_locks(file, trx); } FILE* const file; + const time_t now; }; /*********************************************************************//** @@ -4721,9 +4713,10 @@ lock_print_info_all_transactions( ut_ad(lock_mutex_own()); fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n"); + const time_t now = time(NULL); mutex_enter(&trx_sys.mutex); - ut_list_map(trx_sys.trx_list, lock_print_info(file)); + ut_list_map(trx_sys.trx_list, lock_print_info(file, now)); mutex_exit(&trx_sys.mutex); lock_mutex_exit(); @@ -6549,10 +6542,11 @@ DeadlockChecker::print(const lock_t* lock) ut_ad(lock_mutex_own()); if (lock_get_type_low(lock) == LOCK_REC) { - lock_rec_print(lock_latest_err_file, lock); + mtr_t mtr; + lock_rec_print(lock_latest_err_file, lock, mtr); if (srv_print_all_deadlocks) { - lock_rec_print(stderr, lock); + lock_rec_print(stderr, lock, mtr); } } else { lock_table_print(lock_latest_err_file, lock); @@ -6847,7 +6841,7 @@ DeadlockChecker::search() @param trx transaction rolled back @param lock lock trx wants */ void -DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock) +DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock) { ut_ad(lock_mutex_own()); @@ -6892,16 +6886,17 @@ DeadlockChecker::trx_rollback() trx_mutex_exit(trx); } -/** Checks if a joining lock request results in a deadlock. If a deadlock is -found this function will resolve the deadlock by choosing a victim transaction -and rolling it back. It will attempt to resolve all deadlocks. The returned -transaction id will be the joining transaction instance or NULL if some other -transaction was chosen as a victim and rolled back or no deadlock found. +/** Check if a joining lock request results in a deadlock. +If a deadlock is found, we will resolve the deadlock by +choosing a victim transaction and rolling it back. +We will attempt to resolve all deadlocks. -@param[in] lock lock the transaction is requesting -@param[in,out] trx transaction requesting the lock +@param[in] lock the lock request +@param[in,out] trx transaction requesting the lock -@return transaction instanace chosen as victim or 0 */ +@return trx if it was chosen as victim +@retval NULL if another victim was chosen, +or there is no deadlock (any more) */ const trx_t* DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx) { diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc index ead9815ac02..94104172577 100644 --- a/storage/innobase/lock/lock0wait.cc +++ b/storage/innobase/lock/lock0wait.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. +Copyright (c) 2014, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -58,7 +58,7 @@ lock_wait_table_print(void) (ulong) slot->in_use, (ulong) slot->suspended, slot->wait_timeout, - (ulong) difftime(ut_time(), slot->suspend_time)); + (ulong) difftime(time(NULL), slot->suspend_time)); } } @@ -155,7 +155,7 @@ lock_wait_table_reserve_slot( os_event_reset(slot->event); slot->suspended = TRUE; - slot->suspend_time = ut_time(); + slot->suspend_time = time(NULL); slot->wait_timeout = wait_timeout; if (slot == lock_sys.last_slot) { @@ -231,13 +231,8 @@ lock_wait_suspend_thread( user OS thread */ { srv_slot_t* slot; - double wait_time; trx_t* trx; ibool was_declared_inside_innodb; - int64_t start_time = 0; - int64_t finish_time; - ulint sec; - ulint ms; ulong lock_wait_timeout; trx = thr_get_trx(thr); @@ -283,15 +278,12 @@ lock_wait_suspend_thread( lock_wait_mutex_exit(); trx_mutex_exit(trx); + ulonglong start_time = 0; + if (thr->lock_state == QUE_THR_LOCK_ROW) { srv_stats.n_lock_wait_count.inc(); srv_stats.n_lock_wait_current_count++; - - if (ut_usectime(&sec, &ms) == -1) { - start_time = -1; - } else { - start_time = int64_t(sec) * 1000000 + int64_t(ms); - } + start_time = my_interval_timer(); } ulint lock_type = ULINT_UNDEFINED; @@ -371,28 +363,23 @@ lock_wait_suspend_thread( row_mysql_freeze_data_dictionary(trx); } - wait_time = ut_difftime(ut_time(), slot->suspend_time); + double wait_time = difftime(time(NULL), slot->suspend_time); /* Release the slot for others to use */ lock_wait_table_release_slot(slot); if (thr->lock_state == QUE_THR_LOCK_ROW) { - int64_t diff_time; - if (start_time == -1 || ut_usectime(&sec, &ms) == -1) { - finish_time = -1; - diff_time = 0; - } else { - finish_time = int64_t(sec) * 1000000 + int64_t(ms); - diff_time = std::max<int64_t>( - 0, finish_time - start_time); - srv_stats.n_lock_wait_time.add(diff_time); + const ulonglong finish_time = my_interval_timer(); + if (finish_time >= start_time) { + const ulint diff_time = static_cast<ulint> + ((finish_time - start_time) / 1000); + srv_stats.n_lock_wait_time.add(diff_time); /* Only update the variable if we successfully retrieved the start and finish times. See Bug#36819. */ - if (ulint(diff_time) > lock_sys.n_lock_max_wait_time) { - lock_sys.n_lock_max_wait_time - = ulint(diff_time); + if (diff_time > lock_sys.n_lock_max_wait_time) { + lock_sys.n_lock_max_wait_time = diff_time; } /* Record the lock wait time for this thread */ thd_storage_lock_wait(trx->mysql_thd, diff_time); @@ -468,19 +455,12 @@ lock_wait_check_and_cancel( const srv_slot_t* slot) /*!< in: slot reserved by a user thread when the wait started */ { - trx_t* trx; - double wait_time; - ib_time_t suspend_time = slot->suspend_time; - ut_ad(lock_wait_mutex_own()); - ut_ad(slot->in_use); - ut_ad(slot->suspended); - wait_time = ut_difftime(ut_time(), suspend_time); - - trx = thr_get_trx(slot->thr); + double wait_time = difftime(time(NULL), slot->suspend_time); + trx_t* trx = thr_get_trx(slot->thr); if (trx_is_interrupted(trx) || (slot->wait_timeout < 100000000 @@ -515,7 +495,6 @@ lock_wait_check_and_cancel( trx_mutex_exit(trx); } - } /*********************************************************************//** diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc index b088c9af09d..c2ec46158c6 100644 --- a/storage/innobase/log/log0crypt.cc +++ b/storage/innobase/log/log0crypt.cc @@ -66,6 +66,9 @@ struct crypt_info_t { /** The crypt info */ static crypt_info_t info; +/** Initialization vector used for temporary files/tablespace */ +static byte tmp_iv[MY_AES_BLOCK_SIZE]; + /** Crypt info when upgrading from 10.1 */ static crypt_info_t infos[5 * 2]; /** First unused slot in infos[] */ @@ -243,9 +246,6 @@ UNIV_INTERN bool log_crypt_init() { - ut_ad(log_mutex_own()); - ut_ad(log_sys.is_encrypted()); - info.key_version = encryption_key_get_latest_version( LOG_DEFAULT_ENCRYPTION_KEY); @@ -255,7 +255,8 @@ log_crypt_init() return false; } - if (my_random_bytes(info.crypt_msg.bytes, MY_AES_BLOCK_SIZE) + if (my_random_bytes(tmp_iv, MY_AES_BLOCK_SIZE) != MY_AES_OK + || my_random_bytes(info.crypt_msg.bytes, sizeof info.crypt_msg) != MY_AES_OK || my_random_bytes(info.crypt_nonce.bytes, sizeof info.crypt_nonce) != MY_AES_OK) { @@ -422,7 +423,6 @@ log_crypt_read_checkpoint_buf(const byte* buf) @param[in] size size of the block @param[out] dst destination block @param[in] offs offset to block -@param[in] space_id tablespace id @param[in] encrypt true=encrypt; false=decrypt @return whether the operation succeeded */ UNIV_INTERN @@ -432,19 +432,17 @@ log_tmp_block_encrypt( ulint size, byte* dst, uint64_t offs, - ulint space_id, bool encrypt) { uint dst_len; - uint64_t aes_ctr_iv[MY_AES_BLOCK_SIZE / sizeof(uint64_t)]; - bzero(aes_ctr_iv, sizeof aes_ctr_iv); - aes_ctr_iv[0] = space_id; - aes_ctr_iv[1] = offs; + uint64_t iv[MY_AES_BLOCK_SIZE / sizeof(uint64_t)]; + iv[0] = offs; + memcpy(iv + 1, tmp_iv, sizeof iv - sizeof *iv); int rc = encryption_crypt( - src, (uint)size, dst, &dst_len, - info.crypt_key.bytes, MY_AES_BLOCK_SIZE, - reinterpret_cast<byte*>(aes_ctr_iv), (uint)(sizeof aes_ctr_iv), + src, uint(size), dst, &dst_len, + const_cast<byte*>(info.crypt_key.bytes), MY_AES_BLOCK_SIZE, + reinterpret_cast<byte*>(iv), uint(sizeof iv), encrypt ? ENCRYPTION_FLAG_ENCRYPT|ENCRYPTION_FLAG_NOPAD : ENCRYPTION_FLAG_DECRYPT|ENCRYPTION_FLAG_NOPAD, diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index d4d81a8aa9b..9af7dbdbdd9 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -58,7 +58,7 @@ Created 9/20/1997 Heikki Tuuri /** Log records are stored in the hash table in chunks at most of this size; this must be less than srv_page_size as it is stored in the buffer pool */ -#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t)) +#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t) - REDZONE_SIZE) /** Read-ahead area in applying log records to file pages */ #define RECV_READ_AHEAD_AREA 32U @@ -812,7 +812,7 @@ void recv_sys_t::create() found_corrupt_fs = false; mlog_checkpoint_lsn = 0; - progress_time = ut_time(); + progress_time = time(NULL); recv_max_page_lsn = 0; memset(truncated_undo_spaces, 0, sizeof truncated_undo_spaces); @@ -947,7 +947,7 @@ fail: } } - if (recv_sys.report(ut_time())) { + if (recv_sys.report(time(NULL))) { ib::info() << "Read redo log up to LSN=" << *start_lsn; service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, "Read redo log up to LSN=" LSN_PF, @@ -1949,7 +1949,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, mtr.discard_modifications(); mtr.commit(); - ib_time_t time = ut_time(); + time_t now = time(NULL); mutex_enter(&recv_sys.mutex); @@ -1961,7 +1961,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, ut_ad(!recv_sys.pages.empty()); recv_sys.pages.erase(p); - if (recv_sys.report(time)) { + if (recv_sys.report(now)) { const ulint n = recv_sys.pages.size(); ib::info() << "To recover: " << n << " pages from log"; service_manager_extend_timeout( diff --git a/storage/innobase/os/os0event.cc b/storage/innobase/os/os0event.cc index d6dd137f692..0676ba5f6c1 100644 --- a/storage/innobase/os/os0event.cc +++ b/storage/innobase/os/os0event.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -25,13 +26,11 @@ Created 2012-09-23 Sunny Bains #include "os0event.h" #include "ut0mutex.h" +#include <my_sys.h> #ifdef _WIN32 #include <windows.h> #include <synchapi.h> -#endif /* _WIN32 */ - -#ifdef _WIN32 /** Native condition variable. */ typedef CONDITION_VARIABLE os_cond_t; #else @@ -358,21 +357,9 @@ os_event::wait_time_low( struct timespec abstime; if (time_in_usec != OS_SYNC_INFINITE_TIME) { - struct timeval tv; - int ret; - ulint sec; - ulint usec; - - ret = ut_usectime(&sec, &usec); - ut_a(ret == 0); - - tv.tv_sec = sec; - tv.tv_usec = usec; - - tv.tv_usec += time_in_usec; - - abstime.tv_sec = tv.tv_sec + tv.tv_usec / 1000000; - abstime.tv_nsec = tv.tv_usec % 1000000 * 1000; + ulonglong usec = ulonglong(time_in_usec) + my_hrtime().val; + abstime.tv_sec = usec / 1000000; + abstime.tv_nsec = (usec % 1000000) * 1000; } else { abstime.tv_nsec = 999999999; abstime.tv_sec = (time_t) ULINT_MAX; diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index fe7c0227bf3..2d04a7cddc9 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -5837,7 +5837,7 @@ AIO::start( os_aio_validate(); - os_last_printout = ut_time(); + os_last_printout = time(NULL); if (srv_use_native_aio) { return(true); @@ -6093,7 +6093,7 @@ AIO::reserve_slot( } slot->is_reserved = true; - slot->reservation_time = ut_time(); + slot->reservation_time = time(NULL); slot->m1 = m1; slot->m2 = m2; slot->file = file; @@ -6903,7 +6903,7 @@ private: { ulint age; - age = (ulint) difftime(ut_time(), slot->reservation_time); + age = (ulint) difftime(time(NULL), slot->reservation_time); if ((age >= 2 && age > m_oldest) || (age >= 2 @@ -7305,7 +7305,7 @@ os_aio_print(FILE* file) AIO::print_all(file); putc('\n', file); - current_time = ut_time(); + current_time = time(NULL); time_elapsed = 0.001 + difftime(current_time, os_last_printout); fprintf(file, @@ -7371,7 +7371,7 @@ os_aio_refresh_stats() os_bytes_read_since_printout = 0; - os_last_printout = ut_time(); + os_last_printout = time(NULL); } /** Checks that all slots in the system have been freed, that is, there are diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index 257b7a21050..4fdfafa5d0a 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -41,7 +41,7 @@ number between 0 and 2^64-1 inclusive. The formula and the constants being used are: X[n+1] = (a * X[n] + c) mod m where: -X[0] = ut_time_us(NULL) +X[0] = my_interval_timer() a = 1103515245 (3^5 * 5 * 7 * 129749) c = 12345 (3 * 5 * 823) m = 18446744073709551616 (2^64) @@ -54,12 +54,10 @@ page_cur_lcg_prng(void) { #define LCG_a 1103515245 #define LCG_c 12345 - static ib_uint64_t lcg_current = 0; - static ibool initialized = FALSE; + static uint64_t lcg_current; - if (!initialized) { - lcg_current = (ib_uint64_t) ut_time_us(NULL); - initialized = TRUE; + if (!lcg_current) { + lcg_current = my_interval_timer(); } /* no need to "% 2^64" explicitly because lcg_current is diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 705202a0ab4..c4c5d1727ee 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -2365,18 +2365,11 @@ page_validate( the page record type definition */ { const page_dir_slot_t* slot; - mem_heap_t* heap; - byte* buf; - ulint count; - ulint own_count; - ulint rec_own_count; - ulint slot_no; - ulint data_size; const rec_t* rec; const rec_t* old_rec = NULL; ulint offs; ulint n_slots; - ibool ret = FALSE; + ibool ret = TRUE; ulint i; ulint* offsets = NULL; ulint* old_offsets = NULL; @@ -2390,7 +2383,13 @@ page_validate( if (UNIV_UNLIKELY((ibool) !!page_is_comp(page) != dict_table_is_comp(index->table))) { ib::error() << "'compact format' flag mismatch"; - goto func_exit2; +func_exit2: + ib::error() << "Apparent corruption in space " + << page_get_space_id(page) << " page " + << page_get_page_no(page) + << " of index " << index->name + << " of table " << index->table->name; + return FALSE; } if (page_is_comp(page)) { if (UNIV_UNLIKELY(!page_simple_validate_new(page))) { @@ -2415,19 +2414,12 @@ page_validate( if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) { ib::error() << "PAGE_MAX_TRX_ID out of bounds: " << max_trx_id << ", " << sys_max_trx_id; - goto func_exit2; + ret = FALSE; } } else { ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN); } - heap = mem_heap_create(srv_page_size + 200); - - /* The following buffer is used to check that the - records in the page record heap do not overlap */ - - buf = static_cast<byte*>(mem_heap_zalloc(heap, srv_page_size)); - /* Check first that the record heap and the directory do not overlap. */ @@ -2436,20 +2428,45 @@ page_validate( if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP) <= page_dir_get_nth_slot(page, n_slots - 1)))) { - ib::warn() << "Record heap and dir overlap on space " - << page_get_space_id(page) << " page " - << page_get_page_no(page) << " index " << index->name - << ", " << page_header_get_ptr(page, PAGE_HEAP_TOP) - << ", " << page_dir_get_nth_slot(page, n_slots - 1); + ib::warn() << "Record heap and directory overlap"; + goto func_exit2; + } - goto func_exit; + switch (uint16_t type = fil_page_get_type(page)) { + case FIL_PAGE_RTREE: + if (!index->is_spatial()) { +wrong_page_type: + ib::warn() << "Wrong page type " << type; + ret = FALSE; + } + break; + case FIL_PAGE_TYPE_INSTANT: + if (index->is_instant() + && page_get_page_no(page) == index->page) { + break; + } + goto wrong_page_type; + case FIL_PAGE_INDEX: + if (index->is_spatial()) { + goto wrong_page_type; + } + if (index->is_instant() + && page_get_page_no(page) == index->page) { + goto wrong_page_type; + } + break; + default: + goto wrong_page_type; } + /* The following buffer is used to check that the + records in the page record heap do not overlap */ + mem_heap_t* heap = mem_heap_create(srv_page_size + 200);; + byte* buf = static_cast<byte*>(mem_heap_zalloc(heap, srv_page_size)); + /* Validate the record list in a loop checking also that it is consistent with the directory. */ - count = 0; - data_size = 0; - own_count = 1; + ulint count = 0, data_size = 0, own_count = 1, slot_no = 0; slot_no = 0; slot = page_dir_get_nth_slot(page, slot_no); @@ -2464,11 +2481,13 @@ page_validate( && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec) == page_is_leaf(page))) { ib::error() << "'node_ptr' flag mismatch"; - goto func_exit; + ret = FALSE; + goto next_rec; } if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) { - goto func_exit; + ret = FALSE; + goto next_rec; } /* Check that the records are in the ascending order */ @@ -2480,16 +2499,10 @@ page_validate( /* For spatial index, on nonleaf leavel, we allow recs to be equal. */ - bool rtr_equal_nodeptrs = - (ret == 0 && dict_index_is_spatial(index) - && !page_is_leaf(page)); + if (ret <= 0 && !(ret == 0 && index->is_spatial() + && !page_is_leaf(page))) { - if (ret <= 0 && !rtr_equal_nodeptrs) { - - ib::error() << "Records in wrong order on" - " space " << page_get_space_id(page) - << " page " << page_get_page_no(page) - << " index " << index->name; + ib::error() << "Records in wrong order"; fputs("\nInnoDB: previous record ", stderr); /* For spatial index, print the mbr info.*/ @@ -2510,7 +2523,7 @@ page_validate( putc('\n', stderr); } - goto func_exit; + ret = FALSE; } } @@ -2530,41 +2543,41 @@ page_validate( offs = page_offset(rec_get_start(rec, offsets)); i = rec_offs_size(offsets); if (UNIV_UNLIKELY(offs + i >= srv_page_size)) { - ib::error() << "Record offset out of bounds"; - goto func_exit; + ib::error() << "Record offset out of bounds: " + << offs << '+' << i; + ret = FALSE; + goto next_rec; } - while (i--) { if (UNIV_UNLIKELY(buf[offs + i])) { - /* No other record may overlap this */ - ib::error() << "Record overlaps another"; - goto func_exit; + ib::error() << "Record overlaps another: " + << offs << '+' << i; + ret = FALSE; + break; } - buf[offs + i] = 1; } - if (page_is_comp(page)) { - rec_own_count = rec_get_n_owned_new(rec); - } else { - rec_own_count = rec_get_n_owned_old(rec); - } - - if (UNIV_UNLIKELY(rec_own_count != 0)) { + if (ulint rec_own_count = page_is_comp(page) + ? rec_get_n_owned_new(rec) + : rec_get_n_owned_old(rec)) { /* This is a record pointed to by a dir slot */ if (UNIV_UNLIKELY(rec_own_count != own_count)) { - ib::error() << "Wrong owned count " - << rec_own_count << ", " << own_count; - goto func_exit; + ib::error() << "Wrong owned count at " << offs + << ": " << rec_own_count + << ", " << own_count; + ret = FALSE; } if (page_dir_slot_get_rec(slot) != rec) { ib::error() << "Dir slot does not" - " point to right rec"; - goto func_exit; + " point to right rec at " << offs; + ret = FALSE; } - page_dir_slot_check(slot); + if (ret) { + page_dir_slot_check(slot); + } own_count = 0; if (!page_rec_is_supremum(rec)) { @@ -2573,6 +2586,7 @@ page_validate( } } +next_rec: if (page_rec_is_supremum(rec)) { break; } @@ -2597,14 +2611,14 @@ page_validate( } } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) { n_owned_zero: - ib::error() << "n owned is zero"; - goto func_exit; + ib::error() << "n owned is zero at " << offs; + ret = FALSE; } if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { ib::error() << "n slots wrong " << slot_no << " " << (n_slots - 1); - goto func_exit; + ret = FALSE; } if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS)) @@ -2613,65 +2627,57 @@ n_owned_zero: ib::error() << "n recs wrong " << page_header_get_field(page, PAGE_N_RECS) + PAGE_HEAP_NO_USER_LOW << " " << (count + 1); - goto func_exit; + ret = FALSE; } if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) { ib::error() << "Summed data size " << data_size << ", returned by func " << page_get_data_size(page); - goto func_exit; + ret = FALSE; } /* Check then the free list */ - rec = page_header_get_ptr(page, PAGE_FREE); - - while (rec != NULL) { + for (rec = page_header_get_ptr(page, PAGE_FREE); + rec; + rec = page_rec_get_next_const(rec)) { offsets = rec_get_offsets(rec, index, offsets, page_is_leaf(page), ULINT_UNDEFINED, &heap); if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) { - - goto func_exit; + ret = FALSE; + continue; } count++; offs = page_offset(rec_get_start(rec, offsets)); i = rec_offs_size(offsets); if (UNIV_UNLIKELY(offs + i >= srv_page_size)) { - ib::error() << "Record offset out of bounds"; - goto func_exit; + ib::error() << "Free record offset out of bounds: " + << offs << '+' << i; + ret = FALSE; + continue; } - while (i--) { - if (UNIV_UNLIKELY(buf[offs + i])) { - ib::error() << "Record overlaps another" - " in free list"; - goto func_exit; + ib::error() << "Free record overlaps another: " + << offs << '+' << i; + ret = FALSE; + break; } - buf[offs + i] = 1; } - - rec = page_rec_get_next_const(rec); } if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { ib::error() << "N heap is wrong " << page_dir_get_n_heap(page) << " " << count + 1; - goto func_exit; + ret = FALSE; } - ret = TRUE; - -func_exit: mem_heap_free(heap); - if (UNIV_UNLIKELY(ret == FALSE)) { -func_exit2: - ib::error() << "Apparent corruption in space " - << page_get_space_id(page) << " page " - << page_get_page_no(page) << " index " << index->name; + if (UNIV_UNLIKELY(!ret)) { + goto func_exit2; } return(ret); diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index 9cfaf42db55..851caddf1da 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -1255,7 +1255,7 @@ page_zip_compress( ulint n_blobs = 0; byte* storage; /* storage of uncompressed columns */ - uintmax_t usec = ut_time_us(NULL); + const ulonglong ns = my_interval_timer(); #ifdef PAGE_ZIP_COMPRESS_DBG FILE* logfile = NULL; #endif @@ -1509,7 +1509,7 @@ err_exit: dict_index_zip_failure(index); } - uintmax_t time_diff = ut_time_us(NULL) - usec; + const uint64_t time_diff = (my_interval_timer() - ns) / 1000; page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff; if (cmp_per_index_enabled) { @@ -1575,7 +1575,7 @@ err_exit: fclose(logfile); } #endif /* PAGE_ZIP_COMPRESS_DBG */ - uintmax_t time_diff = ut_time_us(NULL) - usec; + const uint64_t time_diff = (my_interval_timer() - ns) / 1000; page_zip_stat[page_zip->ssize - 1].compressed_ok++; page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff; if (cmp_per_index_enabled) { @@ -3202,13 +3202,13 @@ page_zip_decompress( page header fields that should not change after page creation */ { - uintmax_t usec = ut_time_us(NULL); + const ulonglong ns = my_interval_timer(); if (!page_zip_decompress_low(page_zip, page, all)) { return(FALSE); } - uintmax_t time_diff = ut_time_us(NULL) - usec; + const uint64_t time_diff = (my_interval_timer() - ns) / 1000; page_zip_stat[page_zip->ssize - 1].decompressed++; page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff; diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc index ab83f928492..45a5fee59ec 100644 --- a/storage/innobase/row/row0ftsort.cc +++ b/storage/innobase/row/row0ftsort.cc @@ -159,25 +159,26 @@ row_merge_create_fts_sort_index( return(new_index); } -/*********************************************************************//** -Initialize FTS parallel sort structures. -@return TRUE if all successful */ -ibool + +/** Initialize FTS parallel sort structures. +@param[in] trx transaction +@param[in,out] dup descriptor of FTS index being created +@param[in,out] new_table table where indexes are created +@param[in] opt_doc_id_size whether to use 4 bytes instead of 8 bytes + integer to store Doc ID during sort +@param[in] old_zip_size page size of the old table during alter +@param[out] psort parallel sort info to be instantiated +@param[out] merge parallel merge info to be instantiated +@return true if all successful */ +bool row_fts_psort_info_init( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - row_merge_dup_t* dup, /*!< in,own: descriptor of - FTS index being created */ - const dict_table_t* new_table,/*!< in: table on which indexes are - created */ - ibool opt_doc_id_size, - /*!< in: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort */ - fts_psort_t** psort, /*!< out: parallel sort info to be - instantiated */ - fts_psort_t** merge) /*!< out: parallel merge info - to be instantiated */ + trx_t* trx, + row_merge_dup_t*dup, + dict_table_t* new_table, + bool opt_doc_id_size, + ulint old_zip_size, + fts_psort_t** psort, + fts_psort_t** merge) { ulint i; ulint j; @@ -187,6 +188,7 @@ row_fts_psort_info_init( ulint block_size; ibool ret = TRUE; bool encrypted = false; + ut_ad(ut_is_2pow(old_zip_size)); block_size = 3 * srv_sort_buf_size; @@ -209,7 +211,8 @@ row_fts_psort_info_init( } common_info->dup = dup; - common_info->new_table = (dict_table_t*) new_table; + common_info->new_table = new_table; + common_info->old_zip_size = old_zip_size; common_info->trx = trx; common_info->all_info = psort_info; common_info->sort_event = os_event_create(0); @@ -803,7 +806,7 @@ DECLARE_THREAD(fts_parallel_tokenization)( block = psort_info->merge_block; crypt_block = psort_info->crypt_block; - const ulint zip_size = table->space->zip_size(); + const ulint zip_size = psort_info->psort_common->old_zip_size; row_merge_fts_get_next_doc_item(psort_info, &doc_item); diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 39dc8f4bba4..148d223a364 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -634,12 +634,12 @@ struct FetchIndexRootPages : public AbstractCallback { m_table(table) UNIV_NOTHROW { } /** Destructor */ - virtual ~FetchIndexRootPages() UNIV_NOTHROW { } + ~FetchIndexRootPages() UNIV_NOTHROW override { } /** Called for each block as it is read from the file. @param block block to convert, it is not from the buffer pool. @retval DB_SUCCESS or error code. */ - dberr_t operator()(buf_block_t* block) UNIV_NOTHROW; + dberr_t operator()(buf_block_t* block) UNIV_NOTHROW override; /** Update the import configuration that will be used to import the tablespace. */ @@ -812,7 +812,7 @@ public: rec_offs_init(m_offsets_); } - virtual ~PageConverter() UNIV_NOTHROW + ~PageConverter() UNIV_NOTHROW override { if (m_heap != 0) { mem_heap_free(m_heap); @@ -822,7 +822,7 @@ public: /** Called for each block as it is read from the file. @param block block to convert, it is not from the buffer pool. @retval DB_SUCCESS or error code. */ - dberr_t operator()(buf_block_t* block) UNIV_NOTHROW; + dberr_t operator()(buf_block_t* block) UNIV_NOTHROW override; private: /** Update the page, set the space id, max trx id and index id. diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index c965d51a6d1..bd894d06541 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1430,7 +1430,8 @@ row_ins_foreign_check_on_constraint( cascade->state = UPD_NODE_UPDATE_CLUSTERED; #ifdef WITH_WSREP - err = wsrep_append_foreign_key(trx, foreign, clust_rec, clust_index, + err = wsrep_append_foreign_key(trx, foreign, cascade->pcur->old_rec, + clust_index, FALSE, WSREP_SERVICE_KEY_EXCLUSIVE); if (err != DB_SUCCESS) { fprintf(stderr, @@ -1816,6 +1817,10 @@ row_ins_check_foreign_constraint( && wsrep_protocol_version < 4) ? WSREP_SERVICE_KEY_SHARED : WSREP_SERVICE_KEY_REFERENCE); + if (err != DB_SUCCESS) { + fprintf(stderr, + "WSREP: foreign key append failed: %d\n", err); + } #endif /* WITH_WSREP */ goto end_scan; } else if (foreign->type != 0) { diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 02c4d1b0d82..b6e31a2b017 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -444,8 +444,7 @@ row_log_online_op( if (log_tmp_is_encrypted()) { if (!log_tmp_block_encrypt( buf, srv_sort_buf_size, - log->crypt_tail, byte_offset, - index->table->space_id)) { + log->crypt_tail, byte_offset)) { log->error = DB_DECRYPTION_FAILED; goto write_failed; } @@ -2886,8 +2885,7 @@ all_done: if (log_tmp_is_encrypted()) { if (!log_tmp_block_decrypt( buf, srv_sort_buf_size, - index->online_log->crypt_head, - ofs, index->table->space_id)) { + index->online_log->crypt_head, ofs)) { error = DB_DECRYPTION_FAILED; goto func_exit; } @@ -3792,8 +3790,7 @@ all_done: if (log_tmp_is_encrypted()) { if (!log_tmp_block_decrypt( buf, srv_sort_buf_size, - index->online_log->crypt_head, - ofs, index->table->space_id)) { + index->online_log->crypt_head, ofs)) { error = DB_DECRYPTION_FAILED; goto func_exit; } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 43d06e983cf..31166bdb94f 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -1101,7 +1101,7 @@ row_merge_read( /* If encryption is enabled decrypt buffer */ if (success && log_tmp_is_encrypted()) { if (!log_tmp_block_decrypt(buf, srv_sort_buf_size, - crypt_buf, ofs, space)) { + crypt_buf, ofs)) { return (FALSE); } @@ -1150,7 +1150,7 @@ row_merge_write( if (!log_tmp_block_encrypt(static_cast<const byte*>(buf), buf_len, static_cast<byte*>(crypt_buf), - ofs, space)) { + ofs)) { return false; } @@ -2567,6 +2567,7 @@ write_buffers: BTR_SEARCH_LEAF, &pcur, &mtr); buf = row_merge_buf_empty(buf); + merge_buf[i] = buf; /* Restart the outer loop on the record. We did not insert it into any index yet. */ @@ -2692,6 +2693,7 @@ write_buffers: } } merge_buf[i] = row_merge_buf_empty(buf); + buf = merge_buf[i]; if (UNIV_LIKELY(row != NULL)) { /* Try writing the record again, now @@ -2869,8 +2871,7 @@ wait_again: if (max_doc_id && err == DB_SUCCESS) { /* Sync fts cache for other fts indexes to keep all fts indexes consistent in sync_doc_id. */ - err = fts_sync_table(const_cast<dict_table_t*>(new_table), - false, true, false); + err = fts_sync_table(const_cast<dict_table_t*>(new_table)); if (err == DB_SUCCESS) { fts_update_next_doc_id(NULL, new_table, max_doc_id); @@ -4693,6 +4694,7 @@ row_merge_build_indexes( created */ if (!row_fts_psort_info_init( trx, dup, new_table, opt_doc_id_size, + old_table->space->zip_size(), &psort_info, &merge_info)) { error = DB_CORRUPTION; goto func_exit; diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 2bcd799a32c..3d3944e6a6b 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -3403,7 +3403,9 @@ row_drop_table_for_mysql( calling btr_search_drop_page_hash_index() while we hold the InnoDB dictionary lock, we will drop any adaptive hash index entries upfront. */ - bool immune = is_temp_name + const bool immune = is_temp_name + || create_failed + || sqlcom == SQLCOM_CREATE_TABLE || strstr(table->name.m_name, "/FTS"); while (buf_LRU_drop_page_hash_for_tablespace(table)) { diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index ac3c5af7d8e..f327dce121b 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -897,7 +897,8 @@ row_vers_old_has_index_entry( ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_S_FIX)); - ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S)); + ut_ad(!rw_lock_own(&purge_sys.latch, RW_LOCK_S)); + ut_ad(also_curr || !vcol_info); clust_index = dict_table_get_first_index(index->table); @@ -964,7 +965,7 @@ row_vers_old_has_index_entry( entry = row_build_index_entry( row, ext, index, heap); if (entry && !dtuple_coll_cmp(ientry, entry)) { - goto safe_to_purge; + goto unsafe_to_purge; } } else { /* Build index entry out of row */ @@ -985,7 +986,7 @@ row_vers_old_has_index_entry( clust_index, clust_offsets, index, ientry, roll_ptr, trx_id, NULL, &vrow, mtr)) { - goto safe_to_purge; + goto unsafe_to_purge; } } clust_offsets = rec_get_offsets(rec, clust_index, NULL, @@ -1018,7 +1019,7 @@ row_vers_old_has_index_entry( a different binary value in a char field, but the collation identifies the old and new value anyway! */ if (entry && !dtuple_coll_cmp(ientry, entry)) { -safe_to_purge: +unsafe_to_purge: mem_heap_free(heap); if (v_heap) { @@ -1058,7 +1059,6 @@ safe_to_purge: if (!prev_version) { /* Versions end here */ -unsafe_to_purge: mem_heap_free(heap); if (v_heap) { @@ -1120,7 +1120,7 @@ unsafe_to_purge: and new value anyway! */ if (entry && !dtuple_coll_cmp(ientry, entry)) { - goto safe_to_purge; + goto unsafe_to_purge; } } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 52885cdade4..7557e3ec897 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -417,6 +417,9 @@ my_bool srv_print_innodb_lock_monitor; PRIMARY KEY */ my_bool srv_force_primary_key; +/** Key version to encrypt the temporary tablespace */ +my_bool innodb_encrypt_temporary_tables; + /* Array of English strings describing the current state of an i/o handler thread */ @@ -1607,6 +1610,12 @@ srv_export_innodb_status(void) export_vars.innodb_n_rowlog_blocks_encrypted = srv_stats.n_rowlog_blocks_encrypted; export_vars.innodb_n_rowlog_blocks_decrypted = srv_stats.n_rowlog_blocks_decrypted; + export_vars.innodb_n_temp_blocks_encrypted = + srv_stats.n_temp_blocks_encrypted; + + export_vars.innodb_n_temp_blocks_decrypted = + srv_stats.n_temp_blocks_decrypted; + export_vars.innodb_defragment_compression_failures = btr_defragment_compression_failures; export_vars.innodb_defragment_failures = btr_defragment_failures; @@ -1692,8 +1701,9 @@ DECLARE_THREAD(srv_monitor_thread)(void*) pfs_register_thread(srv_monitor_thread_key); #endif /* UNIV_PFS_THREAD */ - srv_last_monitor_time = ut_time(); - last_monitor_time = ut_time(); + current_time = time(NULL); + srv_last_monitor_time = current_time; + last_monitor_time = current_time; mutex_skipped = 0; last_srv_print_monitor = srv_print_innodb_monitor; loop: @@ -1704,12 +1714,12 @@ loop: os_event_wait_time_low(srv_monitor_event, 5000000, sig_count); - current_time = ut_time(); + current_time = time(NULL); time_elapsed = difftime(current_time, last_monitor_time); if (time_elapsed > 15) { - last_monitor_time = ut_time(); + last_monitor_time = current_time; if (srv_print_innodb_monitor) { /* Reset mutex_skipped counter everytime @@ -2046,20 +2056,16 @@ static void srv_shutdown_print_master_pending( /*==============================*/ - ib_time_t* last_print_time, /*!< last time the function + time_t* last_print_time, /*!< last time the function print the message */ ulint n_tables_to_drop, /*!< number of tables to be dropped */ ulint n_bytes_merged) /*!< number of change buffer just merged */ { - ib_time_t current_time; - double time_elapsed; - - current_time = ut_time(); - time_elapsed = ut_difftime(current_time, *last_print_time); + time_t current_time = time(NULL); - if (time_elapsed > 60) { + if (difftime(current_time, *last_print_time) > 60) { *last_print_time = current_time; if (n_tables_to_drop) { @@ -2137,8 +2143,8 @@ void srv_master_do_active_tasks(void) /*============================*/ { - ib_time_t cur_time = ut_time(); - uintmax_t counter_time = ut_time_us(NULL); + time_t cur_time = time(NULL); + ulonglong counter_time = microsecond_interval_timer(); /* First do the tasks that we are suppose to do at each invocation of this function. */ @@ -2168,7 +2174,7 @@ srv_master_do_active_tasks(void) /* Do an ibuf merge */ srv_main_thread_op_info = "doing insert buffer merge"; - counter_time = ut_time_us(NULL); + counter_time = microsecond_interval_timer(); ibuf_merge_in_background(false); MONITOR_INC_TIME_IN_MICRO_SECS( MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time); @@ -2233,8 +2239,6 @@ void srv_master_do_idle_tasks(void) /*==========================*/ { - uintmax_t counter_time; - ++srv_main_idle_loops; MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS); @@ -2243,7 +2247,7 @@ srv_master_do_idle_tasks(void) /* ALTER TABLE in MySQL requires on Unix that the table handler can drop tables lazily after there no longer are SELECT queries to them. */ - counter_time = ut_time_us(NULL); + ulonglong counter_time = microsecond_interval_timer(); srv_main_thread_op_info = "doing background drop tables"; row_drop_tables_for_mysql_in_background(); MONITOR_INC_TIME_IN_MICRO_SECS( @@ -2262,7 +2266,7 @@ srv_master_do_idle_tasks(void) log_free_check(); /* Do an ibuf merge */ - counter_time = ut_time_us(NULL); + counter_time = microsecond_interval_timer(); srv_main_thread_op_info = "doing insert buffer merge"; ibuf_merge_in_background(true); MONITOR_INC_TIME_IN_MICRO_SECS( @@ -2315,7 +2319,7 @@ srv_shutdown(bool ibuf_merge) { ulint n_bytes_merged = 0; ulint n_tables_to_drop; - ib_time_t now = ut_time(); + time_t now = time(NULL); do { ut_ad(!srv_read_only_mode); @@ -2453,10 +2457,10 @@ static bool srv_purge_should_exit() uint32_t history_size = trx_sys.rseg_history_len; if (history_size) { #if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY - static ib_time_t progress_time; - ib_time_t time = ut_time(); - if (time - progress_time >= 15) { - progress_time = time; + static time_t progress_time; + time_t now = time(NULL); + if (now - progress_time >= 15) { + progress_time = now; service_manager_extend_timeout( INNODB_EXTEND_TIMEOUT_INTERVAL, "InnoDB: to purge %u transactions", diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 8b1b466e9ae..04bb3163a84 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -47,7 +47,6 @@ Created 2/16/1996 Heikki Tuuri #include "row0ftsort.h" #include "ut0mem.h" -#include "ut0timer.h" #include "mem0mem.h" #include "data0data.h" #include "data0type.h" @@ -484,7 +483,7 @@ create_log_files( /* Create a log checkpoint. */ log_mutex_enter(); if (log_sys.is_encrypted() && !log_crypt_init()) { - return(DB_ERROR); + return DB_ERROR; } ut_d(recv_no_log_write = false); log_sys.lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE); @@ -1615,6 +1614,10 @@ dberr_t srv_start(bool create_new_db) srv_log_file_size_requested = srv_log_file_size; + if (innodb_encrypt_temporary_tables && !log_crypt_init()) { + return srv_init_abort(DB_ERROR); + } + if (create_new_db) { buf_flush_sync_all_buf_pools(); diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc index 51ae8bc4fd2..b9578289504 100644 --- a/storage/innobase/sync/sync0arr.cc +++ b/storage/innobase/sync/sync0arr.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2013, 2018, MariaDB Corporation. +Copyright (c) 2013, 2019, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -119,8 +119,10 @@ struct sync_cell_t { has not been signalled in the period between the reset and wait call. */ - time_t reservation_time;/*!< time when the thread reserved - the wait cell */ + /** time(NULL) when the wait cell was reserved. + FIXME: sync_array_print_long_waits_low() may display bogus + warnings when the system time is adjusted to the past! */ + time_t reservation_time; }; /* NOTE: It is allowed for a thread to wait for an event allocated for @@ -375,7 +377,7 @@ sync_array_reserve_cell( cell->thread_id = os_thread_get_curr_id(); - cell->reservation_time = ut_time(); + cell->reservation_time = time(NULL); /* Make sure the event is reset and also store the value of signal_count at which the event was reset. */ diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc index a0697364378..ed348357556 100644 --- a/storage/innobase/trx/trx0i_s.cc +++ b/storage/innobase/trx/trx0i_s.cc @@ -140,9 +140,8 @@ struct i_s_table_cache_t { struct trx_i_s_cache_t { rw_lock_t rw_lock; /*!< read-write lock protecting the rest of this structure */ - uintmax_t last_read; /*!< last time the cache was read; - measured in microseconds since - epoch */ + ulonglong last_read; /*!< last time the cache was read; + measured in nanoseconds */ ib_mutex_t last_read_mutex;/*!< mutex protecting the last_read member - it is updated inside a shared lock of the @@ -434,7 +433,7 @@ fill_trx_row( ut_ad(lock_mutex_own()); row->trx_id = trx_get_id_for_print(trx); - row->trx_started = (ib_time_t) trx->start_time; + row->trx_started = trx->start_time; row->trx_state = trx_get_que_state_str(trx); row->requested_lock_row = requested_lock_row; ut_ad(requested_lock_row == NULL @@ -443,7 +442,7 @@ fill_trx_row( if (trx->lock.wait_lock != NULL) { ut_a(requested_lock_row != NULL); - row->trx_wait_started = (ib_time_t) trx->lock.wait_started; + row->trx_wait_started = trx->lock.wait_started; } else { ut_a(requested_lock_row == NULL); row->trx_wait_started = 0; @@ -1142,22 +1141,16 @@ add_trx_relevant_locks_to_cache( } /** The minimum time that a cache must not be updated after it has been -read for the last time; measured in microseconds. We use this technique +read for the last time; measured in nanoseconds. We use this technique to ensure that SELECTs which join several INFORMATION SCHEMA tables read the same version of the cache. */ -#define CACHE_MIN_IDLE_TIME_US 100000 /* 0.1 sec */ +#define CACHE_MIN_IDLE_TIME_NS 100000000 /* 0.1 sec */ /*******************************************************************//** Checks if the cache can safely be updated. -@return TRUE if can be updated */ -static -ibool -can_cache_be_updated( -/*=================*/ - trx_i_s_cache_t* cache) /*!< in: cache */ +@return whether the cache can be updated */ +static bool can_cache_be_updated(trx_i_s_cache_t* cache) { - uintmax_t now; - /* Here we read cache->last_read without acquiring its mutex because last_read is only updated when a shared rw lock on the whole cache is being held (see trx_i_s_cache_end_read()) and @@ -1167,13 +1160,7 @@ can_cache_be_updated( ut_ad(rw_lock_own(&cache->rw_lock, RW_LOCK_X)); - now = ut_time_us(NULL); - if (now - cache->last_read > CACHE_MIN_IDLE_TIME_US) { - - return(TRUE); - } - - return(FALSE); + return my_interval_timer() - cache->last_read > CACHE_MIN_IDLE_TIME_NS; } /*******************************************************************//** @@ -1273,8 +1260,7 @@ trx_i_s_possibly_fetch_data_into_cache( lock_mutex_exit(); /* update cache last read time */ - time_t now = ut_time_us(NULL); - cache->last_read = now; + cache->last_read = my_interval_timer(); return(0); } @@ -1364,12 +1350,10 @@ trx_i_s_cache_end_read( /*===================*/ trx_i_s_cache_t* cache) /*!< in: cache */ { - uintmax_t now; - ut_ad(rw_lock_own(&cache->rw_lock, RW_LOCK_S)); /* update cache last read time */ - now = ut_time_us(NULL); + const ulonglong now = my_interval_timer(); mutex_enter(&cache->last_read_mutex); cache->last_read = now; mutex_exit(&cache->last_read_mutex); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 7fb1d481126..fd1128083ba 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -137,7 +137,8 @@ purge_graph_build() trx_t* trx = trx_create(); ut_ad(!trx->id); - trx->start_time = ut_time(); + trx->start_time = time(NULL); + trx->start_time_micro = microsecond_interval_timer(); trx->state = TRX_STATE_ACTIVE; trx->op_info = "purge trx"; diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index 3be07e0d39d..4136dd5e327 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -2473,7 +2473,10 @@ trx_undo_prev_version_build( entry = row_rec_to_index_entry( rec, index, offsets, &n_ext, heap); - n_ext += btr_push_update_extern_fields(entry, update, heap); + if (index->is_primary()) { + n_ext += btr_push_update_extern_fields( + entry, entry->n_fields, update, heap); + } /* The page containing the clustered index record corresponding to entry is latched in mtr. Thus the following call is safe. */ diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 5104ed378a8..e36c7d9e5b9 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -730,9 +730,9 @@ static my_bool trx_roll_count_callback(rw_trx_hash_element_t *element, /** Report progress when rolling back a row of a recovered transaction. */ void trx_roll_report_progress() { - ib_time_t time = ut_time(); + time_t now = time(NULL); mutex_enter(&recv_sys.mutex); - bool report = recv_sys.report(time); + bool report = recv_sys.report(now); mutex_exit(&recv_sys.mutex); if (report) { diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index de5af9764fa..1ed5a8dff47 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -610,7 +610,8 @@ trx_resurrect_table_locks( */ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, - ib_time_t start_time, uint64_t *rows_to_undo, + time_t start_time, ulonglong start_time_micro, + uint64_t *rows_to_undo, bool is_old_insert) { trx_state_t state; @@ -662,6 +663,7 @@ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, trx->id= undo->trx_id; trx->is_recovered= true; trx->start_time= start_time; + trx->start_time_micro= start_time_micro; if (undo->dict_operation) { @@ -702,7 +704,8 @@ trx_lists_init_at_db_start() /* Look from the rollback segments if there exist undo logs for transactions. */ - const ib_time_t start_time = ut_time(); + const time_t start_time = time(NULL); + const ulonglong start_time_micro= microsecond_interval_timer(); uint64_t rows_to_undo = 0; for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { @@ -721,8 +724,8 @@ trx_lists_init_at_db_start() undo = UT_LIST_GET_FIRST(rseg->old_insert_list); while (undo) { trx_undo_t* next = UT_LIST_GET_NEXT(undo_list, undo); - trx_resurrect(undo, rseg, start_time, &rows_to_undo, - true); + trx_resurrect(undo, rseg, start_time, start_time_micro, + &rows_to_undo, true); undo = next; } @@ -733,6 +736,7 @@ trx_lists_init_at_db_start() trx_t *trx = trx_sys.find(0, undo->trx_id, false); if (!trx) { trx_resurrect(undo, rseg, start_time, + start_time_micro, &rows_to_undo, false); } else { ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || @@ -994,14 +998,10 @@ trx_start_low( } } - if (trx->mysql_thd != NULL) { - trx->start_time = thd_start_time_in_secs(trx->mysql_thd); - trx->start_time_micro = thd_query_start_micro(trx->mysql_thd); - - } else { - trx->start_time = ut_time(); - trx->start_time_micro = 0; - } + trx->start_time = time(NULL); + trx->start_time_micro = trx->mysql_thd + ? thd_query_start_micro(trx->mysql_thd) + : microsecond_interval_timer(); ut_a(trx->error_state == DB_SUCCESS); @@ -1236,7 +1236,7 @@ trx_update_mod_tables_timestamp( { /* consider using trx->start_time if calling time() is too expensive here */ - time_t now = ut_time(); + const time_t now = time(NULL); trx_mod_tables_t::const_iterator end = trx->mod_tables.end(); diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc index 0403131f274..4a6447c1dcf 100644 --- a/storage/innobase/ut/ut0crc32.cc +++ b/storage/innobase/ut/ut0crc32.cc @@ -561,23 +561,6 @@ ut_crc32_init() ut_cpuid(vend, &model, &family, &stepping, &features_ecx, &features_edx); - /* Valgrind does not understand the CRC32 instructions: - - vex amd64->IR: unhandled instruction bytes: 0xF2 0x48 0xF 0x38 0xF0 0xA - valgrind: Unrecognised instruction at address 0xad3db5. - Your program just tried to execute an instruction that Valgrind - did not recognise. There are two possible reasons for this. - 1. Your program has a bug and erroneously jumped to a non-code - location. If you are running Memcheck and you just saw a - warning about a bad jump, it's probably your program's fault. - 2. The instruction is legitimate but Valgrind doesn't handle it, - i.e. it's Valgrind's fault. If you think this is the case or - you are not sure, please let us know and we'll try to fix it. - Either way, Valgrind will now raise a SIGILL signal which will - probably kill your program. - - */ - if (features_ecx & 1 << 20) { ut_crc32 = ut_crc32_hw; ut_crc32_implementation = "Using SSE2 crc32 instructions"; diff --git a/storage/innobase/ut/ut0timer.cc b/storage/innobase/ut/ut0timer.cc deleted file mode 100644 index 9aefcafebc6..00000000000 --- a/storage/innobase/ut/ut0timer.cc +++ /dev/null @@ -1,90 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, SkySQL Ab. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file ut/ut0timer.cc -Timer rountines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ - -#include "data0type.h" -#include <my_rdtsc.h> -#include <ut0timer.h> - -/**************************************************************//** -Initial timer definition -@return 0 */ -static -ulonglong -ut_timer_none(void) -/*===============*/ -{ - return 0; -} - -/**************************************************************//** -Function pointer to point selected timer function. -@return timer current value */ -ulonglong (*ut_timer_now)(void) = &ut_timer_none; - -struct my_timer_unit_info ut_timer; -extern MYSQL_PLUGIN_IMPORT MY_TIMER_INFO sys_timer_info; - -/**************************************************************//** -Sets up the data required for use of my_timer_* functions. -Selects the best timer by high frequency, and tight resolution. -Points my_timer_now() to the selected timer function. -Initializes my_timer struct to contain the info for selected timer.*/ -UNIV_INTERN -void -ut_init_timer(void) -/*===============*/ -{ - if (sys_timer_info.cycles.frequency > 1000000 && - sys_timer_info.cycles.resolution == 1) { - ut_timer = sys_timer_info.cycles; - ut_timer_now = &my_timer_cycles; - } else if (sys_timer_info.nanoseconds.frequency > 1000000 && - sys_timer_info.nanoseconds.resolution == 1) { - ut_timer = sys_timer_info.nanoseconds; - ut_timer_now = &my_timer_nanoseconds; - } else if (sys_timer_info.microseconds.frequency >= 1000000 && - sys_timer_info.microseconds.resolution == 1) { - ut_timer = sys_timer_info.microseconds; - ut_timer_now = &my_timer_microseconds; - - } else if (sys_timer_info.milliseconds.frequency >= 1000 && - sys_timer_info.milliseconds.resolution == 1) { - ut_timer = sys_timer_info.milliseconds; - ut_timer_now = &my_timer_milliseconds; - } else if (sys_timer_info.ticks.frequency >= 1000 && - /* Will probably be false */ - sys_timer_info.ticks.resolution == 1) { - ut_timer = sys_timer_info.ticks; - ut_timer_now = &my_timer_ticks; - } else { - /* None are acceptable, so leave it as "None", and fill in struct */ - ut_timer.frequency = 1; /* Avoid div-by-zero */ - ut_timer.overhead = 0; /* Since it doesn't do anything */ - ut_timer.resolution = 10; /* Another sign it's bad */ - ut_timer.routine = 0; /* None */ - } -} diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index 8ee18005d3b..5c19fcb6825 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -39,136 +39,6 @@ Created 5/11/1994 Heikki Tuuri #include "log.h" #include "my_cpu.h" -#ifdef _WIN32 -typedef VOID(WINAPI *time_fn)(LPFILETIME); -static time_fn ut_get_system_time_as_file_time = GetSystemTimeAsFileTime; - -/*****************************************************************//** -NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix -epoch starts from 1970/1/1. For selection of constant see: -http://support.microsoft.com/kb/167296/ */ -#define WIN_TO_UNIX_DELTA_USEC 11644473600000000LL - - -/*****************************************************************//** -This is the Windows version of gettimeofday(2). -@return 0 if all OK else -1 */ -static -int -ut_gettimeofday( -/*============*/ - struct timeval* tv, /*!< out: Values are relative to Unix epoch */ - void* tz) /*!< in: not used */ -{ - FILETIME ft; - int64_t tm; - - if (!tv) { - errno = EINVAL; - return(-1); - } - - ut_get_system_time_as_file_time(&ft); - - tm = (int64_t) ft.dwHighDateTime << 32; - tm |= ft.dwLowDateTime; - - ut_a(tm >= 0); /* If tm wraps over to negative, the quotient / 10 - does not work */ - - tm /= 10; /* Convert from 100 nsec periods to usec */ - - /* If we don't convert to the Unix epoch the value for - struct timeval::tv_sec will overflow.*/ - tm -= WIN_TO_UNIX_DELTA_USEC; - - tv->tv_sec = (long) (tm / 1000000L); - tv->tv_usec = (long) (tm % 1000000L); - - return(0); -} -#else -/** An alias for gettimeofday(2). On Microsoft Windows, we have to -reimplement this function. */ -#define ut_gettimeofday gettimeofday -#endif - -/**********************************************************//** -Returns system time. We do not specify the format of the time returned: -the only way to manipulate it is to use the function ut_difftime. -@return system time */ -ib_time_t -ut_time(void) -/*=========*/ -{ - return(time(NULL)); -} - - -/**********************************************************//** -Returns system time. -Upon successful completion, the value 0 is returned; otherwise the -value -1 is returned and the global variable errno is set to indicate the -error. -@return 0 on success, -1 otherwise */ -int -ut_usectime( -/*========*/ - ulint* sec, /*!< out: seconds since the Epoch */ - ulint* ms) /*!< out: microseconds since the Epoch+*sec */ -{ - struct timeval tv; - int ret; - int errno_gettimeofday; - int i; - - for (i = 0; i < 10; i++) { - - ret = ut_gettimeofday(&tv, NULL); - - if (ret == -1) { - errno_gettimeofday = errno; - ib::error() << "gettimeofday(): " - << strerror(errno_gettimeofday); - os_thread_sleep(100000); /* 0.1 sec */ - errno = errno_gettimeofday; - } else { - break; - } - } - - if (ret != -1) { - *sec = (ulint) tv.tv_sec; - *ms = (ulint) tv.tv_usec; - } - - return(ret); -} - -/**********************************************************//** -Returns the number of microseconds since epoch. Similar to -time(3), the return value is also stored in *tloc, provided -that tloc is non-NULL. -@return us since epoch */ -uintmax_t -ut_time_us( -/*=======*/ - uintmax_t* tloc) /*!< out: us since epoch, if non-NULL */ -{ - struct timeval tv; - uintmax_t us; - - ut_gettimeofday(&tv, NULL); - - us = uintmax_t(tv.tv_sec) * 1000000 + uintmax_t(tv.tv_usec); - - if (tloc != NULL) { - *tloc = us; - } - - return(us); -} - /**********************************************************//** Returns the number of milliseconds since some epoch. The value may wrap around. It should only be used for heuristic @@ -178,25 +48,8 @@ ulint ut_time_ms(void) /*============*/ { - struct timeval tv; - - ut_gettimeofday(&tv, NULL); - - return(ulint(tv.tv_sec) * 1000 + ulint(tv.tv_usec / 1000)); -} - -/**********************************************************//** -Returns the difference of two times in seconds. -@return time2 - time1 expressed in seconds */ -double -ut_difftime( -/*========*/ - ib_time_t time2, /*!< in: time */ - ib_time_t time1) /*!< in: time */ -{ - return(difftime(time2, time1)); + return static_cast<ulint>(my_interval_timer() / 1000000); } - #endif /* !UNIV_INNOCHECKSUM */ /**********************************************************//** @@ -284,27 +137,6 @@ ut_sprintf_timestamp( } /*************************************************************//** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. -@return dummy value */ -void -ut_delay( -/*=====*/ - ulint delay) /*!< in: delay in microseconds on 100 MHz Pentium */ -{ - ulint i; - - HMT_low(); - - for (i = 0; i < delay * 50; i++) { - MY_RELAX_CPU(); - UT_COMPILER_BARRIER(); - } - - HMT_medium(); -} - -/*************************************************************//** Prints the contents of a memory buffer in hex and ascii. */ void ut_print_buf( diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc index 4697aa2fc46..026431695ed 100644 --- a/storage/innobase/ut/ut0wqueue.cc +++ b/storage/innobase/ut/ut0wqueue.cc @@ -135,7 +135,7 @@ ib_wqueue_timedwait( /*================*/ /* out: work item or NULL on timeout*/ ib_wqueue_t* wq, /* in: work queue */ - ib_time_t wait_in_usecs) /* in: wait time in micro seconds */ + ulint wait_in_usecs) /* in: wait time in micro seconds */ { ib_list_node_t* node = NULL; diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 8fc10008ef9..d07d30330a0 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -356,10 +356,12 @@ static PSI_file_info all_aria_files[]= { &key_file_control, "control", PSI_FLAG_GLOBAL} }; +# ifdef HAVE_PSI_STAGE_INTERFACE static PSI_stage_info *all_aria_stages[]= { & stage_waiting_for_a_resource }; +# endif /* HAVE_PSI_STAGE_INTERFACE */ static void init_aria_psi_keys(void) { @@ -380,9 +382,10 @@ static void init_aria_psi_keys(void) count= array_elements(all_aria_files); mysql_file_register(category, all_aria_files, count); - +# ifdef HAVE_PSI_STAGE_INTERFACE count= array_elements(all_aria_stages); mysql_stage_register(category, all_aria_stages, count); +# endif /* HAVE_PSI_STAGE_INTERFACE */ } #else #define init_aria_psi_keys() /* no-op */ @@ -1228,7 +1231,7 @@ int ha_maria::close(void) } -int ha_maria::write_row(uchar * buf) +int ha_maria::write_row(const uchar * buf) { /* If we have an auto_increment column and we are writing a changed row @@ -2778,7 +2781,7 @@ int ha_maria::external_lock(THD *thd, int lock_type) changes to commit (rollback shouldn't be tested). */ DBUG_ASSERT(!thd->get_stmt_da()->is_sent() || - thd->killed == KILL_CONNECTION); + thd->killed); /* autocommit ? rollback a transaction */ #ifdef MARIA_CANNOT_ROLLBACK if (ma_commit(trn)) diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index d7043296605..691d1c9747b 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -73,7 +73,7 @@ public: int open(const char *name, int mode, uint test_if_locked); int close(void); - int write_row(uchar * buf); + int write_row(const uchar * buf); int update_row(const uchar * old_data, const uchar * new_data); int delete_row(const uchar * buf); int index_read_map(uchar * buf, const uchar * key, key_part_map keypart_map, diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 8873f191cb0..a267a482074 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -7553,7 +7553,7 @@ void _ma_print_block_info(MARIA_SHARE *share, uchar *buff) { LSN lsn= lsn_korr(buff); - printf("LSN:" LSN_FMT " type: %u dir_entries: %u dir_free: %u empty_space: %u\n", + printf("LSN: " LSN_FMT " type: %u dir_entries: %u dir_free: %u empty_space: %u\n", LSN_IN_PARTS(lsn), (uint)buff[PAGE_TYPE_OFFSET], (uint)buff[DIR_COUNT_OFFSET], diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 7404c2062fc..e2c0fa79a3d 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -5432,7 +5432,12 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) info->cur_row.checksum= (*share->calc_check_checksum)(info, sort_param-> record); - reclength= _ma_rec_pack(info,from,sort_param->record); + if (!(reclength= _ma_rec_pack(info,from,sort_param->record))) + { + _ma_check_print_error(param,"Got error %d when packing record", + my_errno); + DBUG_RETURN(1); + } flag=0; do diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 5d6b66e752c..e525602f7a1 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -170,7 +170,7 @@ static int really_execute_checkpoint(void) "Horizon" is a lower bound of the LSN of the next log record. */ checkpoint_start_log_horizon= translog_get_horizon(); - DBUG_PRINT("info",("checkpoint_start_log_horizon " LSN_FMT, + DBUG_PRINT("info",("checkpoint_start_log_horizon " LSN_FMT "", LSN_IN_PARTS(checkpoint_start_log_horizon))); lsn_store(checkpoint_start_log_horizon_char, checkpoint_start_log_horizon); diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index b345315013b..54186613ba9 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -117,8 +117,10 @@ int maria_close(register MARIA_HA *info) share->deleting ? FLUSH_IGNORE_CHANGED : FLUSH_RELEASE)) error= my_errno; unmap_file(info); - if (((share->changed && share->base.born_transactional) || - maria_is_crashed(info) || (share->temporary && !share->deleting))) + if (!internal_table && + (((share->changed && share->base.born_transactional) || + maria_is_crashed(info) || + (share->temporary && !share->deleting)))) { if (save_global_changed) { diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 5273c1bddb7..ae6fc57c114 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -224,6 +224,8 @@ my_bool _ma_write_dynamic_record(MARIA_HA *info, const uchar *record) { ulong reclength= _ma_rec_pack(info,info->rec_buff + MARIA_REC_BUFF_OFFSET, record); + if (!reclength) + return 1; return (write_dynamic_record(info,info->rec_buff + MARIA_REC_BUFF_OFFSET, reclength)); } @@ -234,6 +236,8 @@ my_bool _ma_update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS pos, { uint length= _ma_rec_pack(info, info->rec_buff + MARIA_REC_BUFF_OFFSET, record); + if (!length) + return 1; return (update_dynamic_record(info, pos, info->rec_buff + MARIA_REC_BUFF_OFFSET, length)); @@ -258,12 +262,19 @@ my_bool _ma_write_blob_record(MARIA_HA *info, const uchar *record) reclength2= _ma_rec_pack(info, rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), record); + if (!reclength2) + { + error= 1; + goto err; + } + DBUG_PRINT("info",("reclength: %lu reclength2: %lu", reclength, reclength2)); DBUG_ASSERT(reclength2 <= reclength); error= write_dynamic_record(info, rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), reclength2); +err: my_safe_afree(rec_buff, reclength); return(error != 0); } @@ -293,12 +304,19 @@ my_bool _ma_update_blob_record(MARIA_HA *info, MARIA_RECORD_POS pos, my_errno= HA_ERR_OUT_OF_MEM; /* purecov: inspected */ return(1); } - reclength2= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), - record); + reclength2= _ma_rec_pack(info, rec_buff+ + ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + record); + if (!reclength2) + { + error= 1; + goto err; + } DBUG_ASSERT(reclength2 <= reclength); error=update_dynamic_record(info,pos, rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), reclength2); +err: my_safe_afree(rec_buff, reclength); return(error != 0); } @@ -938,7 +956,12 @@ err: } - /* Pack a record. Return new reclength */ +/** + Pack a record. + + @return new reclength + @return 0 in case of wrong data in record +*/ uint _ma_rec_pack(MARIA_HA *info, register uchar *to, register const uchar *from) @@ -1042,6 +1065,11 @@ uint _ma_rec_pack(MARIA_HA *info, register uchar *to, tmp_length= uint2korr(from); store_key_length_inc(to,tmp_length); } + if (tmp_length > column->length) + { + my_errno= HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(0); + } memcpy(to, from+pack_length,tmp_length); to+= tmp_length; continue; @@ -1613,7 +1641,9 @@ my_bool _ma_cmp_dynamic_record(register MARIA_HA *info, if (!(buffer=(uchar*) my_safe_alloca(buffer_length))) DBUG_RETURN(1); } - reclength= _ma_rec_pack(info,buffer,record); + if (!(reclength= _ma_rec_pack(info,buffer,record))) + goto err; + record= buffer; filepos= info->cur_row.lastpos; diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index bc176eb2878..12cb8f838ef 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -286,7 +286,6 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, We however do a flush here for additional safety. */ /** @todo consider porting these flush-es to MyISAM */ - DBUG_ASSERT(share->reopen == 1); error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE); if (!error && share->changed) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index d0fb480d955..8f3c9d4bfac 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -3859,7 +3859,14 @@ my_bool translog_init_with_table(const char *directory, my_bool pageok; DBUG_PRINT("info", ("The log is really present")); - DBUG_ASSERT(sure_page <= last_page); + if (sure_page > last_page) + { + my_printf_error(HA_ERR_GENERIC, "Aria engine: log data error\n" + "last_log_page: " LSN_FMT " is less than\n" + "checkpoint page: " LSN_FMT, MYF(0), + LSN_IN_PARTS(last_page), LSN_IN_PARTS(sure_page)); + goto err; + } /* TODO: check page size */ @@ -4007,7 +4014,7 @@ my_bool translog_init_with_table(const char *directory, if (!logs_found) { TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE), - MYF(0)); + MYF(MY_WME)); DBUG_PRINT("info", ("The log is not found => we will create new log")); if (file == NULL) goto err; @@ -5329,7 +5336,7 @@ static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst) { uint64 diff; DBUG_ENTER("translog_put_LSN_diff"); - DBUG_PRINT("enter", ("Base: " LSN_FMT " val: " LSN_FMT " dst:%p", + DBUG_PRINT("enter", ("Base: " LSN_FMT " val: " LSN_FMT " dst: %p", LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn), dst)); DBUG_ASSERT(base_lsn > lsn); @@ -5375,7 +5382,7 @@ static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst) dst[1]= 1; lsn_store(dst + 2, lsn); } - DBUG_PRINT("info", ("new dst:%p", dst)); + DBUG_PRINT("info", ("new dst: %p", dst)); DBUG_RETURN(dst); } @@ -7994,7 +8001,7 @@ void translog_flush_buffers(TRANSLOG_ADDRESS *lsn, { struct st_translog_buffer *buffer= log_descriptor.buffers + i; translog_buffer_lock(buffer); - DBUG_PRINT("info", ("Check buffer:%p #: %u " + DBUG_PRINT("info", ("Check buffer: %p #: %u " "prev last LSN: " LSN_FMT " " "last LSN: " LSN_FMT " status: %s", buffer, diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index e83b16e0404..cf17d88f6e9 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -1,4 +1,5 @@ /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + Copyright (c) 2009, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1502,7 +1503,7 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite) is too new). Recovery does it by itself. */ share->state.is_of_horizon= translog_get_horizon(); - DBUG_PRINT("info", ("is_of_horizon set to LSN " LSN_FMT, + DBUG_PRINT("info", ("is_of_horizon set to LSN " LSN_FMT "", LSN_IN_PARTS(share->state.is_of_horizon))); } res= _ma_state_info_write_sub(share->kfile.file, &share->state, pWrite); diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 6b37b9f1bb2..ae86de4b731 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -538,8 +538,6 @@ end: if (error && !abort_message_printed) { - if (!trace_file) - fputc('\n', stderr); my_message(HA_ERR_INITIALIZATION, "Aria recovery failed. Please run aria_chk -r on all Aria " "tables and delete all aria_log.######## files", MYF(0)); @@ -674,13 +672,16 @@ prototype_redo_exec_hook(INCOMPLETE_LOG) { MARIA_HA *info; + /* We try to get table first, so that we get the table in in the trace log */ + info= get_MARIA_HA_from_REDO_record(rec); + if (skip_DDLs) { tprint(tracef, "we skip DDLs\n"); return 0; } - if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL) + if (!info) { /* no such table, don't need to warn */ return 0; @@ -1151,6 +1152,9 @@ prototype_redo_exec_hook(REDO_REPAIR_TABLE) my_bool quick_repair; DBUG_ENTER("exec_REDO_LOGREC_REDO_REPAIR_TABLE"); + /* We try to get table first, so that we get the table in in the trace log */ + info= get_MARIA_HA_from_REDO_record(rec); + if (skip_DDLs) { /* @@ -1160,8 +1164,13 @@ prototype_redo_exec_hook(REDO_REPAIR_TABLE) tprint(tracef, "we skip DDLs\n"); DBUG_RETURN(0); } - if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL) - DBUG_RETURN(0); + + if (!info) + { + /* no such table, don't need to warn */ + return 0; + } + if (maria_is_crashed(info)) { tprint(tracef, "we skip repairing crashed table\n"); @@ -1463,17 +1472,21 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) } if (share->state.state.data_file_length != dfile_len) { - tprint(tracef, ", has wrong state.data_file_length (fixing it)"); + tprint(tracef, ", has wrong state.data_file_length " + "(fixing it from %llu to %llu)", + (ulonglong) share->state.state.data_file_length, (ulonglong) dfile_len); share->state.state.data_file_length= dfile_len; } if (share->state.state.key_file_length != kfile_len) { - tprint(tracef, ", has wrong state.key_file_length (fixing it)"); + tprint(tracef, ", has wrong state.key_file_length " + "(fixing it from %llu to %llu)", + (ulonglong) share->state.state.key_file_length, (ulonglong) kfile_len); share->state.state.key_file_length= kfile_len; } if ((dfile_len % share->block_size) || (kfile_len % share->block_size)) { - tprint(tracef, ", has too short last page\n"); + tprint(tracef, ", has too short last page"); /* Recovery will fix this, no error */ ALERT_USER(); } @@ -2784,7 +2797,7 @@ static int run_redo_phase(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply) { fprintf(stderr, " 100%%"); fflush(stderr); - procent_printed= 1; + procent_printed= 1; /* Will be follwed by time */ } DBUG_RETURN(0); @@ -2934,7 +2947,6 @@ static int run_undo_phase(uint uncommitted) recovery_message_printed= REC_MSG_UNDO; } tprint(tracef, "%u transactions will be rolled back\n", uncommitted); - procent_printed= 1; for( ; ; ) { char llbuf[22]; @@ -2987,7 +2999,6 @@ static int run_undo_phase(uint uncommitted) /* In the future, we want to have this phase *online* */ } } - procent_printed= 0; DBUG_RETURN(0); } @@ -3487,6 +3498,11 @@ static int close_all_tables(void) } } end: + if (recovery_message_printed == REC_MSG_FLUSH) + { + fputc('\n', stderr); + fflush(stderr); + } mysql_mutex_unlock(&THR_LOCK_maria); DBUG_RETURN(error); } diff --git a/storage/maria/ma_recovery_util.c b/storage/maria/ma_recovery_util.c index 54d7420c96e..3b617f625f0 100644 --- a/storage/maria/ma_recovery_util.c +++ b/storage/maria/ma_recovery_util.c @@ -70,14 +70,7 @@ void tprint(FILE *trace_file __attribute__ ((unused)), #endif va_start(args, format); if (trace_file != NULL) - { - if (procent_printed) - { - procent_printed= 0; - fputc('\n', trace_file); - } vfprintf(trace_file, format, args); - } va_end(args); } @@ -93,9 +86,10 @@ void eprint(FILE *trace_file __attribute__ ((unused)), if (procent_printed) { - /* In silent mode, print on another line than the 0% 10% 20% line */ procent_printed= 0; - fputc('\n', trace_file); + /* In silent mode, print on another line than the 0% 10% 20% line */ + fputc('\n', stderr); + fflush(stderr); } vfprintf(trace_file , format, args); fputc('\n', trace_file); diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index faf47929973..07be8333794 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -83,7 +83,7 @@ my_bool _ma_write_abort_default(MARIA_HA *info __attribute__((unused))) /* Write new record to a table */ -int maria_write(MARIA_HA *info, uchar *record) +int maria_write(MARIA_HA *info, const uchar *record) { MARIA_SHARE *share= info->s; uint i; diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp index 810762e53a0..085ccf55b09 100644 --- a/storage/mroonga/ha_mroonga.cpp +++ b/storage/mroonga/ha_mroonga.cpp @@ -5828,7 +5828,7 @@ bool ha_mroonga::wrapper_have_target_index() DBUG_RETURN(have_target_index); } -int ha_mroonga::wrapper_write_row(uchar *buf) +int ha_mroonga::wrapper_write_row(const uchar *buf) { int error = 0; THD *thd = ha_thd(); @@ -5857,7 +5857,7 @@ int ha_mroonga::wrapper_write_row(uchar *buf) DBUG_RETURN(error); } -int ha_mroonga::wrapper_write_row_index(uchar *buf) +int ha_mroonga::wrapper_write_row_index(const uchar *buf) { MRN_DBUG_ENTER_METHOD(); @@ -5944,7 +5944,7 @@ err: DBUG_RETURN(error); } -int ha_mroonga::storage_write_row(uchar *buf) +int ha_mroonga::storage_write_row(const uchar *buf) { MRN_DBUG_ENTER_METHOD(); int error = 0; @@ -6207,7 +6207,7 @@ err: DBUG_RETURN(error); } -int ha_mroonga::storage_write_row_multiple_column_index(uchar *buf, +int ha_mroonga::storage_write_row_multiple_column_index(const uchar *buf, grn_id record_id, KEY *key_info, grn_obj *index_column) @@ -6244,7 +6244,7 @@ int ha_mroonga::storage_write_row_multiple_column_index(uchar *buf, DBUG_RETURN(error); } -int ha_mroonga::storage_write_row_multiple_column_indexes(uchar *buf, +int ha_mroonga::storage_write_row_multiple_column_indexes(const uchar *buf, grn_id record_id) { MRN_DBUG_ENTER_METHOD(); @@ -6356,7 +6356,7 @@ int ha_mroonga::storage_write_row_unique_index(const uchar *buf, DBUG_RETURN(0); } -int ha_mroonga::storage_write_row_unique_indexes(uchar *buf) +int ha_mroonga::storage_write_row_unique_indexes(const uchar *buf) { int error = 0; uint i; @@ -6419,7 +6419,7 @@ err: DBUG_RETURN(error); } -int ha_mroonga::write_row(uchar *buf) +int ha_mroonga::write_row(const uchar *buf) { MRN_DBUG_ENTER_METHOD(); int error = 0; diff --git a/storage/mroonga/ha_mroonga.hpp b/storage/mroonga/ha_mroonga.hpp index 72377c7b955..d105a0738ca 100644 --- a/storage/mroonga/ha_mroonga.hpp +++ b/storage/mroonga/ha_mroonga.hpp @@ -407,11 +407,11 @@ public: ha_mroonga(handlerton *hton, TABLE_SHARE *share_arg); ~ha_mroonga(); const char *table_type() const; // required - const char *index_type(uint inx); + const char *index_type(uint inx) mrn_override; const char **bas_ext() const; // required - ulonglong table_flags() const; // required - ulong index_flags(uint idx, uint part, bool all_parts) const; // required + ulonglong table_flags() const mrn_override; // required + ulong index_flags(uint idx, uint part, bool all_parts) const mrn_override; // required // required int create(const char *name, TABLE *form, HA_CREATE_INFO *info @@ -430,40 +430,40 @@ public: #ifndef MRN_HANDLER_HAVE_HA_CLOSE int close(); // required #endif - int info(uint flag); // required + int info(uint flag) mrn_override; // required - uint lock_count() const; + uint lock_count() const mrn_override; THR_LOCK_DATA **store_lock(THD *thd, // required THR_LOCK_DATA **to, - enum thr_lock_type lock_type); - int external_lock(THD *thd, int lock_type); + enum thr_lock_type lock_type) mrn_override; + int external_lock(THD *thd, int lock_type) mrn_override; - int rnd_init(bool scan); // required - int rnd_end(); + int rnd_init(bool scan) mrn_override; // required + int rnd_end() mrn_override; #ifndef MRN_HANDLER_HAVE_HA_RND_NEXT int rnd_next(uchar *buf); // required #endif #ifndef MRN_HANDLER_HAVE_HA_RND_POS int rnd_pos(uchar *buf, uchar *pos); // required #endif - void position(const uchar *record); // required - int extra(enum ha_extra_function operation); - int extra_opt(enum ha_extra_function operation, ulong cache_size); + void position(const uchar *record) mrn_override; // required + int extra(enum ha_extra_function operation) mrn_override; + int extra_opt(enum ha_extra_function operation, ulong cache_size) mrn_override; - int delete_table(const char *name); - int write_row(uchar *buf); - int update_row(const uchar *old_data, const uchar *new_data); - int delete_row(const uchar *buf); + int delete_table(const char *name) mrn_override; + int write_row(const uchar *buf) mrn_override; + int update_row(const uchar *old_data, const uchar *new_data) mrn_override; + int delete_row(const uchar *buf) mrn_override; - uint max_supported_record_length() const; - uint max_supported_keys() const; - uint max_supported_key_parts() const; - uint max_supported_key_length() const; - uint max_supported_key_part_length() const; + uint max_supported_record_length() const mrn_override; + uint max_supported_keys() const mrn_override; + uint max_supported_key_parts() const mrn_override; + uint max_supported_key_length() const mrn_override; + uint max_supported_key_part_length() const mrn_override; - ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); - int index_init(uint idx, bool sorted); - int index_end(); + ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key) mrn_override; + int index_init(uint idx, bool sorted) mrn_override; + int index_end() mrn_override; #ifndef MRN_HANDLER_HAVE_HA_INDEX_READ_MAP int index_read_map(uchar * buf, const uchar * key, key_part_map keypart_map, @@ -485,35 +485,35 @@ public: #ifndef MRN_HANDLER_HAVE_HA_INDEX_LAST int index_last(uchar *buf); #endif - int index_next_same(uchar *buf, const uchar *key, uint keylen); + int index_next_same(uchar *buf, const uchar *key, uint keylen) mrn_override; - int ft_init(); - FT_INFO *ft_init_ext(uint flags, uint inx, String *key); - int ft_read(uchar *buf); + int ft_init() mrn_override; + FT_INFO *ft_init_ext(uint flags, uint inx, String *key) mrn_override; + int ft_read(uchar *buf) mrn_override; - const Item *cond_push(const Item *cond); - void cond_pop(); + const Item *cond_push(const Item *cond) mrn_override; + void cond_pop() mrn_override; - bool get_error_message(int error, String *buf); + bool get_error_message(int error, String *buf) mrn_override; - int reset(); + int reset() mrn_override; - handler *clone(const char *name, MEM_ROOT *mem_root); - uint8 table_cache_type(); + handler *clone(const char *name, MEM_ROOT *mem_root) mrn_override; + uint8 table_cache_type() mrn_override; #ifdef MRN_HANDLER_HAVE_MULTI_RANGE_READ ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint *bufsz, - uint *flags, Cost_estimate *cost); + uint *flags, Cost_estimate *cost) mrn_override; ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, #ifdef MRN_HANDLER_HAVE_MULTI_RANGE_READ_INFO_KEY_PARTS uint key_parts, #endif - uint *bufsz, uint *flags, Cost_estimate *cost); + uint *bufsz, uint *flags, Cost_estimate *cost) mrn_override; int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint mode, - HANDLER_BUFFER *buf); - int multi_range_read_next(range_id_t *range_info); + HANDLER_BUFFER *buf) mrn_override; + int multi_range_read_next(range_id_t *range_info) mrn_override; #else // MRN_HANDLER_HAVE_MULTI_RANGE_READ int read_multi_range_first(KEY_MULTI_RANGE **found_range_p, KEY_MULTI_RANGE *ranges, @@ -523,38 +523,38 @@ public: int read_multi_range_next(KEY_MULTI_RANGE **found_range_p); #endif // MRN_HANDLER_HAVE_MULTI_RANGE_READ #ifdef MRN_HANDLER_START_BULK_INSERT_HAS_FLAGS - void start_bulk_insert(ha_rows rows, uint flags); + void start_bulk_insert(ha_rows rows, uint flags) mrn_override; #else void start_bulk_insert(ha_rows rows); #endif - int end_bulk_insert(); - int delete_all_rows(); - int truncate(); - double scan_time(); - double read_time(uint index, uint ranges, ha_rows rows); + int end_bulk_insert() mrn_override; + int delete_all_rows() mrn_override; + int truncate() mrn_override; + double scan_time() mrn_override; + double read_time(uint index, uint ranges, ha_rows rows) mrn_override; #ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING - const key_map *keys_to_use_for_scanning(); + const key_map *keys_to_use_for_scanning() mrn_override; #endif - ha_rows estimate_rows_upper_bound(); - void update_create_info(HA_CREATE_INFO* create_info); - int rename_table(const char *from, const char *to); - bool is_crashed() const; - bool auto_repair(int error) const; + ha_rows estimate_rows_upper_bound() mrn_override; + void update_create_info(HA_CREATE_INFO* create_info) mrn_override; + int rename_table(const char *from, const char *to) mrn_override; + bool is_crashed() const mrn_override; + bool auto_repair(int error) const mrn_override; bool auto_repair() const; - int disable_indexes(uint mode); - int enable_indexes(uint mode); - int check(THD* thd, HA_CHECK_OPT* check_opt); - int repair(THD* thd, HA_CHECK_OPT* check_opt); - bool check_and_repair(THD *thd); - int analyze(THD* thd, HA_CHECK_OPT* check_opt); - int optimize(THD* thd, HA_CHECK_OPT* check_opt); - bool is_fatal_error(int error_num, uint flags=0); + int disable_indexes(uint mode) mrn_override; + int enable_indexes(uint mode) mrn_override; + int check(THD* thd, HA_CHECK_OPT* check_opt) mrn_override; + int repair(THD* thd, HA_CHECK_OPT* check_opt) mrn_override; + bool check_and_repair(THD *thd) mrn_override; + int analyze(THD* thd, HA_CHECK_OPT* check_opt) mrn_override; + int optimize(THD* thd, HA_CHECK_OPT* check_opt) mrn_override; + bool is_fatal_error(int error_num, uint flags=0) mrn_override; bool check_if_incompatible_data(HA_CREATE_INFO *create_info, - uint table_changes); + uint table_changes) mrn_override; #ifdef MRN_HANDLER_HAVE_CHECK_IF_SUPPORTED_INPLACE_ALTER enum_alter_inplace_result check_if_supported_inplace_alter(TABLE *altered_table, - Alter_inplace_info *ha_alter_info); + Alter_inplace_info *ha_alter_info) mrn_override; #else alter_table_operations alter_table_flags(alter_table_operations flags); # ifdef MRN_HANDLER_HAVE_FINAL_ADD_INDEX @@ -570,77 +570,77 @@ public: int update_auto_increment(); void set_next_insert_id(ulonglong id); void get_auto_increment(ulonglong offset, ulonglong increment, ulonglong nb_desired_values, - ulonglong *first_value, ulonglong *nb_reserved_values); + ulonglong *first_value, ulonglong *nb_reserved_values) mrn_override; void restore_auto_increment(ulonglong prev_insert_id); - void release_auto_increment(); - int check_for_upgrade(HA_CHECK_OPT *check_opt); + void release_auto_increment() mrn_override; + int check_for_upgrade(HA_CHECK_OPT *check_opt) mrn_override; #ifdef MRN_HANDLER_HAVE_RESET_AUTO_INCREMENT - int reset_auto_increment(ulonglong value); + int reset_auto_increment(ulonglong value) mrn_override; #endif - bool was_semi_consistent_read(); - void try_semi_consistent_read(bool yes); - void unlock_row(); - int start_stmt(THD *thd, thr_lock_type lock_type); + bool was_semi_consistent_read() mrn_override; + void try_semi_consistent_read(bool yes) mrn_override; + void unlock_row() mrn_override; + int start_stmt(THD *thd, thr_lock_type lock_type) mrn_override; protected: #ifdef MRN_HANDLER_RECORDS_RETURN_ERROR int records(ha_rows *num_rows); #else - ha_rows records(); + ha_rows records() mrn_override; #endif #ifdef MRN_HANDLER_HAVE_HA_RND_NEXT - int rnd_next(uchar *buf); + int rnd_next(uchar *buf) mrn_override; #endif #ifdef MRN_HANDLER_HAVE_HA_RND_POS - int rnd_pos(uchar *buf, uchar *pos); + int rnd_pos(uchar *buf, uchar *pos) mrn_override; #endif #ifdef MRN_HANDLER_HAVE_HA_INDEX_READ_MAP int index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, - enum ha_rkey_function find_flag); + enum ha_rkey_function find_flag) mrn_override; #endif #ifdef MRN_HANDLER_HAVE_HA_INDEX_NEXT - int index_next(uchar *buf); + int index_next(uchar *buf) mrn_override; #endif #ifdef MRN_HANDLER_HAVE_HA_INDEX_PREV - int index_prev(uchar *buf); + int index_prev(uchar *buf) mrn_override; #endif #ifdef MRN_HANDLER_HAVE_HA_INDEX_FIRST - int index_first(uchar *buf); + int index_first(uchar *buf) mrn_override; #endif #ifdef MRN_HANDLER_HAVE_HA_INDEX_LAST - int index_last(uchar *buf); + int index_last(uchar *buf) mrn_override; #endif - void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share_arg); - bool primary_key_is_clustered(); - bool is_fk_defined_on_table_or_index(uint index); - char *get_foreign_key_create_info(); + void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share_arg) mrn_override; + bool primary_key_is_clustered() mrn_override; + bool is_fk_defined_on_table_or_index(uint index) mrn_override; + char *get_foreign_key_create_info() mrn_override; #ifdef MRN_HANDLER_HAVE_GET_TABLESPACE_NAME char *get_tablespace_name(THD *thd, char *name, uint name_len); #endif - bool can_switch_engines(); - int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list); - int get_parent_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list); - uint referenced_by_foreign_key(); - void init_table_handle_for_HANDLER(); - void free_foreign_key_create_info(char* str); + bool can_switch_engines() mrn_override; + int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list) mrn_override; + int get_parent_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list) mrn_override; + uint referenced_by_foreign_key() mrn_override; + void init_table_handle_for_HANDLER() mrn_override; + void free_foreign_key_create_info(char* str) mrn_override; #ifdef MRN_HAVE_HA_REBIND_PSI - void unbind_psi(); - void rebind_psi(); + void unbind_psi() mrn_override; + void rebind_psi() mrn_override; #endif my_bool register_query_cache_table(THD *thd, const char *table_key, uint key_length, qc_engine_callback *engine_callback, - ulonglong *engine_data); + ulonglong *engine_data) mrn_override; #ifdef MRN_HANDLER_HAVE_CHECK_IF_SUPPORTED_INPLACE_ALTER bool prepare_inplace_alter_table(TABLE *altered_table, - Alter_inplace_info *ha_alter_info); + Alter_inplace_info *ha_alter_info) mrn_override; bool inplace_alter_table(TABLE *altered_table, - Alter_inplace_info *ha_alter_info); + Alter_inplace_info *ha_alter_info) mrn_override; bool commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, - bool commit); + bool commit) mrn_override; #endif private: @@ -691,7 +691,7 @@ private: int generic_geo_open_cursor(const uchar *key, enum ha_rkey_function find_flag); #ifdef MRN_HANDLER_HAVE_HA_CLOSE - int close(); + int close() mrn_override; #endif bool is_dry_write(); bool is_enable_optimization(); @@ -907,20 +907,20 @@ private: int storage_end_bulk_insert(); bool wrapper_is_target_index(KEY *key_info); bool wrapper_have_target_index(); - int wrapper_write_row(uchar *buf); - int wrapper_write_row_index(uchar *buf); - int storage_write_row(uchar *buf); - int storage_write_row_multiple_column_index(uchar *buf, + int wrapper_write_row(const uchar *buf); + int wrapper_write_row_index(const uchar *buf); + int storage_write_row(const uchar *buf); + int storage_write_row_multiple_column_index(const uchar *buf, grn_id record_id, KEY *key_info, grn_obj *index_column); - int storage_write_row_multiple_column_indexes(uchar *buf, grn_id record_id); + int storage_write_row_multiple_column_indexes(const uchar *buf, grn_id record_id); int storage_write_row_unique_index(const uchar *buf, KEY *key_info, grn_obj *index_table, grn_obj *index_column, grn_id *key_id); - int storage_write_row_unique_indexes(uchar *buf); + int storage_write_row_unique_indexes(const uchar *buf); int wrapper_get_record_id(uchar *data, grn_id *record_id, const char *context); int wrapper_update_row(const uchar *old_data, const uchar *new_data); diff --git a/storage/mroonga/vendor/groonga/CMakeLists.txt b/storage/mroonga/vendor/groonga/CMakeLists.txt index e1f45b7fda9..564e859d146 100644 --- a/storage/mroonga/vendor/groonga/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/CMakeLists.txt @@ -77,9 +77,8 @@ if(DEFINED GRN_EMBED) else() set(GRN_EMBED_DEFAULT OFF) endif() -option(GRN_EMBED - "Build as a static library to embed into an application" - ${GRN_EMBED_DEFAULT}) +set(GRN_EMBED ${GRN_EMBED_DEFAULT} CACHE BOOL + "Build as a static library to embed into an application") set(BIN_DIR "bin") set(SBIN_DIR "sbin") diff --git a/storage/mroonga/vendor/groonga/lib/hash.c b/storage/mroonga/vendor/groonga/lib/hash.c index 8fe180481c9..3fb372ee222 100644 --- a/storage/mroonga/vendor/groonga/lib/hash.c +++ b/storage/mroonga/vendor/groonga/lib/hash.c @@ -89,12 +89,6 @@ grn_tiny_array_at_inline(grn_tiny_array *array, grn_id id) return id ? grn_tiny_array_put(array, id) : NULL; } -inline static void * -grn_tiny_array_next(grn_tiny_array *array) -{ - return grn_tiny_array_put(array, array->max + 1); -} - void grn_tiny_array_init(grn_ctx *ctx, grn_tiny_array *array, uint16_t element_size, uint16_t flags) @@ -204,15 +198,6 @@ grn_tiny_bitmap_put_byte(grn_tiny_bitmap *bitmap, grn_id bit_id) { /* Requirements: bit_id != GRN_ID_NIL. */ /* Return value: 1/0 on success, -1 on failure. */ -inline static int -grn_tiny_bitmap_get(grn_tiny_bitmap *bitmap, grn_id bit_id) -{ - uint8_t * const ptr = grn_tiny_bitmap_get_byte(bitmap, bit_id); - return ptr ? ((*ptr >> (bit_id & 7)) & 1) : -1; -} - -/* Requirements: bit_id != GRN_ID_NIL. */ -/* Return value: 1/0 on success, -1 on failure. */ /* Note: A bitmap is extended if needed. */ inline static int grn_tiny_bitmap_put(grn_tiny_bitmap *bitmap, grn_id bit_id) @@ -309,18 +294,6 @@ grn_io_array_bit_off(grn_ctx *ctx, grn_io *io, return ptr; } -inline static void * -grn_io_array_bit_flip(grn_ctx *ctx, grn_io *io, - uint32_t segment_id, uint32_t offset) -{ - uint8_t * const ptr = (uint8_t *)grn_io_array_at_inline( - ctx, io, segment_id, (offset >> 3) + 1, GRN_TABLE_ADD); - if (ptr) { - *ptr ^= 1 << (offset & 7); - } - return ptr; -} - /* grn_table_queue */ static void @@ -1738,13 +1711,6 @@ get_value(grn_ctx *ctx, grn_hash *hash, entry_str *n) return grn_hash_entry_get_value(ctx, hash, (grn_hash_entry *)n); } -inline static grn_rc -put_key(grn_ctx *ctx, grn_hash *hash, entry_str *n, uint32_t h, - const char *key, unsigned int len) -{ - return grn_hash_entry_put_key(ctx, hash, (grn_hash_entry *)n, h, key, len); -} - inline static int match_key(grn_ctx *ctx, grn_hash *hash, entry_str *ee, uint32_t h, const char *key, unsigned int len) diff --git a/storage/mroonga/vendor/groonga/lib/ii.c b/storage/mroonga/vendor/groonga/lib/ii.c index 58c789616d9..cd5559e6958 100644 --- a/storage/mroonga/vendor/groonga/lib/ii.c +++ b/storage/mroonga/vendor/groonga/lib/ii.c @@ -2194,23 +2194,6 @@ buffer_close(grn_ctx *ctx, grn_ii *ii, uint32_t pseg) return GRN_SUCCESS; } -inline static uint32_t -buffer_open_if_capable(grn_ctx *ctx, grn_ii *ii, int32_t seg, int size, buffer **b) -{ - uint32_t pseg, pos = SEG2POS(seg, 0); - if ((pseg = buffer_open(ctx, ii, pos, NULL, b)) != GRN_II_PSEG_NOT_ASSIGNED) { - uint16_t nterms = (*b)->header.nterms - (*b)->header.nterms_void; - if (!((nterms < 4096 || - (ii->header->total_chunk_size >> ((nterms >> 8) - 6)) - > (*b)->header.chunk_size) && - ((*b)->header.buffer_free >= size + sizeof(buffer_term)))) { - buffer_close(ctx, ii, pseg); - return GRN_II_PSEG_NOT_ASSIGNED; - } - } - return pseg; -} - typedef struct { uint32_t rid; uint32_t sid; diff --git a/storage/mroonga/vendor/groonga/lib/pat.c b/storage/mroonga/vendor/groonga/lib/pat.c index e2f98fba0d2..642173e2fdc 100644 --- a/storage/mroonga/vendor/groonga/lib/pat.c +++ b/storage/mroonga/vendor/groonga/lib/pat.c @@ -142,20 +142,6 @@ pat_get(grn_ctx *ctx, grn_pat *pat, grn_id id) return res; } -inline static pat_node * -pat_node_new(grn_ctx *ctx, grn_pat *pat, grn_id *id) -{ - uint32_t n = pat->header->curr_rec + 1; - pat_node *res; - if (n > GRN_ID_MAX) { return NULL; } - if ((res = pat_get(ctx, pat, n))) { - pat->header->curr_rec = n; - pat->header->n_entries++; - } - if (id) { *id = n; } - return res; -} - /* sis operation */ inline static sis_node * diff --git a/storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c b/storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c index dc64e802fbb..ddd69714b1e 100644 --- a/storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c +++ b/storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c @@ -187,55 +187,6 @@ grn_ts_ref_zero(void) return (grn_ts_ref){ 0, 0.0 }; } -/* grn_ts_bool_vector_zero() returns a zero. */ -inline static grn_ts_bool_vector -grn_ts_bool_vector_zero(void) -{ - return (grn_ts_bool_vector){ NULL, 0 }; -} - -/* grn_ts_int_vector_zero() returns a zero. */ -inline static grn_ts_int_vector -grn_ts_int_vector_zero(void) -{ - return (grn_ts_int_vector){ NULL, 0 }; -} - -/* grn_ts_float_vector_zero() returns a zero. */ -inline static grn_ts_float_vector -grn_ts_float_vector_zero(void) -{ - return (grn_ts_float_vector){ NULL, 0 }; -} - -/* grn_ts_time_vector_zero() returns a zero. */ -inline static grn_ts_time_vector -grn_ts_time_vector_zero(void) -{ - return (grn_ts_time_vector){ NULL, 0 }; -} - -/* grn_ts_text_vector_zero() returns a zero. */ -inline static grn_ts_text_vector -grn_ts_text_vector_zero(void) -{ - return (grn_ts_text_vector){ NULL, 0 }; -} - -/* grn_ts_geo_vector_zero() returns a zero. */ -inline static grn_ts_geo_vector -grn_ts_geo_vector_zero(void) -{ - return (grn_ts_geo_vector){ NULL, 0 }; -} - -/* grn_ts_ref_vector_zero() returns a zero. */ -inline static grn_ts_ref_vector -grn_ts_ref_vector_zero(void) -{ - return (grn_ts_ref_vector){ NULL, 0 }; -} - /* grn_ts_data_type_to_kind() returns a kind associated with a type. */ static grn_ts_data_kind grn_ts_data_type_to_kind(grn_ts_data_type type) diff --git a/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt b/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt index 80e531e5319..d1ca2a8a0d8 100644 --- a/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt @@ -25,9 +25,8 @@ if(DEFINED GROONGA_NORMALIZER_MYSQL_EMBED) else() set(GROONGA_NORMALIZER_MYSQL_EMBED_DEFAULT OFF) endif() -option(GROONGA_NORMALIZER_MYSQL_EMBED - "Build as a static library to embed into an application" - ${GROONGA_NORMALIZER_MYSQL_EMBED_DEFAULT}) +set(GROONGA_NORMALIZER_MYSQL_EMBED ${GROONGA_NORMALIZER_MYSQL_EMBED_DEFAULT} + CACHE BOOL "Build as a static library to embed into an application") file(READ "${CMAKE_CURRENT_SOURCE_DIR}/version" VERSION) diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 218033edc4f..bc3fbf2dbd5 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -933,7 +933,7 @@ int ha_myisam::close(void) return mi_close(tmp); } -int ha_myisam::write_row(uchar *buf) +int ha_myisam::write_row(const uchar *buf) { /* If we have an auto_increment column and we are writing a changed row diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index 3e191ecfa3f..0af4215e8f9 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -69,7 +69,7 @@ class ha_myisam: public handler void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share); int open(const char *name, int mode, uint test_if_locked); int close(void); - int write_row(uchar * buf); + int write_row(const uchar * buf); int update_row(const uchar * old_data, const uchar * new_data); int delete_row(const uchar * buf); int index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c index f08dea77354..7345ab1604d 100644 --- a/storage/myisam/mi_write.c +++ b/storage/myisam/mi_write.c @@ -40,7 +40,7 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr,uchar *key, /* Write new record to database */ -int mi_write(MI_INFO *info, uchar *record) +int mi_write(MI_INFO *info, const uchar *record) { MYISAM_SHARE *share=info->s; uint i; diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc index 8f5e65084ce..14036a31b8c 100644 --- a/storage/myisammrg/ha_myisammrg.cc +++ b/storage/myisammrg/ha_myisammrg.cc @@ -1088,7 +1088,7 @@ int ha_myisammrg::close(void) DBUG_RETURN(rc); } -int ha_myisammrg::write_row(uchar * buf) +int ha_myisammrg::write_row(const uchar * buf) { DBUG_ENTER("ha_myisammrg::write_row"); DBUG_ASSERT(this->file->children_attached); diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h index 7cb2b0f1993..b7cbd6c7d12 100644 --- a/storage/myisammrg/ha_myisammrg.h +++ b/storage/myisammrg/ha_myisammrg.h @@ -111,7 +111,7 @@ public: int detach_children(void); virtual handler *clone(const char *name, MEM_ROOT *mem_root); int close(void); - int write_row(uchar * buf); + int write_row(const uchar * buf); int update_row(const uchar * old_data, const uchar * new_data); int delete_row(const uchar * buf); int index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, diff --git a/storage/myisammrg/myrg_write.c b/storage/myisammrg/myrg_write.c index eb496911a52..e511d60d634 100644 --- a/storage/myisammrg/myrg_write.c +++ b/storage/myisammrg/myrg_write.c @@ -18,7 +18,7 @@ #include "myrg_def.h" -int myrg_write(register MYRG_INFO *info, uchar *rec) +int myrg_write(register MYRG_INFO *info, const uchar *rec) { /* [phi] MERGE_WRITE_DISABLED is handled by the else case */ if (info->merge_insert_method == MERGE_INSERT_TO_FIRST) diff --git a/storage/oqgraph/ha_oqgraph.cc b/storage/oqgraph/ha_oqgraph.cc index 2770fa97777..20ebd49bd5d 100644 --- a/storage/oqgraph/ha_oqgraph.cc +++ b/storage/oqgraph/ha_oqgraph.cc @@ -798,7 +798,7 @@ void ha_oqgraph::update_key_stats() } -int ha_oqgraph::write_row(byte * buf) +int ha_oqgraph::write_row(const byte * buf) { return HA_ERR_TABLE_READONLY; } diff --git a/storage/oqgraph/ha_oqgraph.h b/storage/oqgraph/ha_oqgraph.h index d0ac8d7e18c..0c0af6def97 100644 --- a/storage/oqgraph/ha_oqgraph.h +++ b/storage/oqgraph/ha_oqgraph.h @@ -83,7 +83,7 @@ public: int open(const char *name, int mode, uint test_if_locked); int close(void); - int write_row(byte * buf); + int write_row(const byte * buf); int update_row(const uchar * old_data, const uchar * new_data); int delete_row(const byte * buf); int index_read(byte * buf, const byte * key, diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc index 8f6346fcf15..51047561748 100644 --- a/storage/perfschema/ha_perfschema.cc +++ b/storage/perfschema/ha_perfschema.cc @@ -250,7 +250,7 @@ int ha_perfschema::close(void) DBUG_RETURN(0); } -int ha_perfschema::write_row(uchar *buf) +int ha_perfschema::write_row(const uchar *buf) { int result; diff --git a/storage/perfschema/ha_perfschema.h b/storage/perfschema/ha_perfschema.h index 4aad6a3beef..31b4e84e87d 100644 --- a/storage/perfschema/ha_perfschema.h +++ b/storage/perfschema/ha_perfschema.h @@ -120,7 +120,7 @@ public: @param buf the row to write @return 0 on success */ - int write_row(uchar *buf); + int write_row(const uchar *buf); void use_hidden_primary_key(); diff --git a/storage/perfschema/pfs_engine_table.cc b/storage/perfschema/pfs_engine_table.cc index 75ba41e9dd4..ed378de5b06 100644 --- a/storage/perfschema/pfs_engine_table.cc +++ b/storage/perfschema/pfs_engine_table.cc @@ -178,7 +178,7 @@ ha_rows PFS_engine_table_share::get_row_count(void) const return m_records; } -int PFS_engine_table_share::write_row(TABLE *table, unsigned char *buf, +int PFS_engine_table_share::write_row(TABLE *table, const unsigned char *buf, Field **fields) const { my_bitmap_map *org_bitmap; diff --git a/storage/perfschema/pfs_engine_table.h b/storage/perfschema/pfs_engine_table.h index b29a9c54754..d58ac4390f9 100644 --- a/storage/perfschema/pfs_engine_table.h +++ b/storage/perfschema/pfs_engine_table.h @@ -200,7 +200,7 @@ protected: typedef PFS_engine_table* (*pfs_open_table_t)(void); /** Callback to write a row. */ typedef int (*pfs_write_row_t)(TABLE *table, - unsigned char *buf, Field **fields); + const unsigned char *buf, Field **fields); /** Callback to delete all rows. */ typedef int (*pfs_delete_all_rows_t)(void); /** Callback to get a row count. */ @@ -217,7 +217,7 @@ struct PFS_engine_table_share /** Get the row count. */ ha_rows get_row_count(void) const; /** Write a row. */ - int write_row(TABLE *table, unsigned char *buf, Field **fields) const; + int write_row(TABLE *table, const unsigned char *buf, Field **fields) const; /** Table name. */ LEX_STRING m_name; diff --git a/storage/perfschema/table_setup_actors.cc b/storage/perfschema/table_setup_actors.cc index e7c20341e94..ada69942651 100644 --- a/storage/perfschema/table_setup_actors.cc +++ b/storage/perfschema/table_setup_actors.cc @@ -52,7 +52,7 @@ PFS_engine_table* table_setup_actors::create() return new table_setup_actors(); } -int table_setup_actors::write_row(TABLE *table, unsigned char *buf, +int table_setup_actors::write_row(TABLE *table, const unsigned char *buf, Field **fields) { Field *f; diff --git a/storage/perfschema/table_setup_actors.h b/storage/perfschema/table_setup_actors.h index aa403b4909d..2e274a0cf15 100644 --- a/storage/perfschema/table_setup_actors.h +++ b/storage/perfschema/table_setup_actors.h @@ -55,7 +55,7 @@ public: static PFS_engine_table_share m_share; /** Table builder. */ static PFS_engine_table* create(); - static int write_row(TABLE *table, unsigned char *buf, Field **fields); + static int write_row(TABLE *table, const unsigned char *buf, Field **fields); static int delete_all_rows(); static ha_rows get_row_count(); diff --git a/storage/perfschema/table_setup_objects.cc b/storage/perfschema/table_setup_objects.cc index 98de9b22416..60ea72d5724 100644 --- a/storage/perfschema/table_setup_objects.cc +++ b/storage/perfschema/table_setup_objects.cc @@ -66,7 +66,7 @@ PFS_engine_table* table_setup_objects::create(void) return new table_setup_objects(); } -int table_setup_objects::write_row(TABLE *table, unsigned char *buf, +int table_setup_objects::write_row(TABLE *table, const unsigned char *buf, Field **fields) { int result; diff --git a/storage/perfschema/table_setup_objects.h b/storage/perfschema/table_setup_objects.h index 8e883126298..d8fd041bd57 100644 --- a/storage/perfschema/table_setup_objects.h +++ b/storage/perfschema/table_setup_objects.h @@ -58,7 +58,7 @@ public: static PFS_engine_table_share m_share; /** Table builder. */ static PFS_engine_table* create(); - static int write_row(TABLE *table, unsigned char *buf, Field **fields); + static int write_row(TABLE *table, const unsigned char *buf, Field **fields); static int delete_all_rows(); static ha_rows get_row_count(); diff --git a/storage/rocksdb/.clang-format b/storage/rocksdb/.clang-format index d80b012dd4b..b1df76bdf2d 100644 --- a/storage/rocksdb/.clang-format +++ b/storage/rocksdb/.clang-format @@ -1,23 +1,49 @@ ---- +# Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2.0, +# as published by the Free Software Foundation. +# +# This program is also distributed with certain software (including +# but not limited to OpenSSL) that is licensed under separate terms, +# as designated in a particular file or component or in included license +# documentation. The authors of MySQL hereby grant you an additional +# permission to link the program and your derivative works with the +# separately licensed software that they have included with MySQL. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License, version 2.0, for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +# This is the output of clang-format-5.0 --style=google --dump-config, +# except for changes mentioned below. We lock the style so that any newer +# version of clang-format will give the same result; as time goes, we may +# update this list, requiring newer versions of clang-format. + Language: Cpp -# BasedOnStyle: LLVM -AccessModifierOffset: -2 +# BasedOnStyle: Google +AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: false +AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: false +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: @@ -32,62 +58,80 @@ BraceWrapping: BeforeCatch: false BeforeElse: false IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 80 CommentPragmas: '^ IWYU pragma:' -ConstructorInitializerAllOnOneLineOrOnePerLine: false +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true -DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false -ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH IncludeCategories: - - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' Priority: 2 - - Regex: '^(<|"(gtest|isl|json)/)' - Priority: 3 - Regex: '.*' - Priority: 1 -IncludeIsMainRegex: '$' -IndentCaseLabels: false + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true IndentWidth: 2 IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: true +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: true -PenaltyBreakBeforeFirstCallParameter: 19 +ObjCSpaceBeforeProtocolList: false +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 60 -PointerAlignment: Right +PenaltyReturnTypeOnItsOwnLine: 200 ReflowComments: true SortIncludes: true +SortUsingDeclarations: true SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 +SpacesBeforeTrailingComments: 2 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false -Standard: Cpp11 TabWidth: 8 UseTab: Never -JavaScriptQuotes: Leave -... + +# We declare one specific pointer style since right alignment is dominant in +# the MySQL code base (default --style=google has DerivePointerAlignment true). +DerivePointerAlignment: false +PointerAlignment: Right + +# MySQL source code is allowed to use C++11 features. +Standard: Cpp11 diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt index 1566f4627fb..a285cd78d3b 100644 --- a/storage/rocksdb/CMakeLists.txt +++ b/storage/rocksdb/CMakeLists.txt @@ -104,6 +104,8 @@ SET(ROCKSDB_SE_SOURCES rdb_psi.cc rdb_sst_info.cc rdb_sst_info.h + rdb_converter.cc + rdb_converter.h ) # MariaDB: the following is added in build_rocksdb.cmake, when appropriate: @@ -153,6 +155,7 @@ ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib rdb_perf_context.h rdb_buff.h rdb_mariadb_port.h + nosql_access.cc nosql_access.h ) ADD_DEPENDENCIES(rocksdb_aux_lib GenError) @@ -163,6 +166,27 @@ if (UNIX AND NOT APPLE) TARGET_LINK_LIBRARIES(rocksdb_aux_lib -lrt) endif() +# IF (WITH_JEMALLOC) +# FIND_LIBRARY(JEMALLOC_LIBRARY +# NAMES libjemalloc${PIC_EXT}.a jemalloc +# HINTS ${WITH_JEMALLOC}/lib) +# SET(rocksdb_static_libs ${rocksdb_static_libs} +# ${JEMALLOC_LIBRARY}) +# ADD_DEFINITIONS(-DROCKSDB_JEMALLOC) +# ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) +# ENDIF() + +# MariaDB: Q: why does the upstream add libunwind for a particular +# storage engine? +#IF (WITH_UNWIND) +# FIND_LIBRARY(UNWIND_LIBRARY +# NAMES libunwind${PIC_EXT}.a unwind +# HINTS ${WITH_UNWIND}/lib) +# SET(rocksdb_static_libs ${rocksdb_static_libs} +# ${UNWIND_LIBRARY}) +#ENDIF() + + TARGET_LINK_LIBRARIES(rocksdb rocksdb_aux_lib) FIND_LIBRARY(LZ4_LIBRARY NAMES liblz4${PIC_EXT}.a lz4 @@ -171,6 +195,8 @@ TARGET_LINK_LIBRARIES(rocksdb rocksdb_aux_lib) CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU) IF(HAVE_SCHED_GETCPU) ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1) +# MariaDB: don't do this: +# ADD_DEFINITIONS(-DZSTD_STATIC_LINKING_ONLY) ENDIF() IF (WITH_TBB) diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake index 3d17f27a3e6..96da9737dbc 100644 --- a/storage/rocksdb/build_rocksdb.cmake +++ b/storage/rocksdb/build_rocksdb.cmake @@ -21,11 +21,13 @@ else() if(WITH_ROCKSDB_JEMALLOC) find_package(JeMalloc REQUIRED) add_definitions(-DROCKSDB_JEMALLOC) + ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) include_directories(${JEMALLOC_INCLUDE_DIR}) endif() if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") # FreeBSD has jemaloc as default malloc add_definitions(-DROCKSDB_JEMALLOC) + ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) set(WITH_JEMALLOC ON) endif() endif() @@ -160,7 +162,7 @@ find_package(Threads REQUIRED) if(WIN32) set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib) else() - set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} ${LIBRT}) + set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} ${LIBRT} ${LIBDL}) endif() set(ROCKSDB_LIBS rocksdblib}) @@ -169,7 +171,20 @@ set(LIBS ${ROCKSDB_LIBS} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) #add_subdirectory(${ROCKSDB_SOURCE_DIR}/tools) # Main library source code - +# Note : RocksDB has a lot of unittests. We should not include these files +# in the build, because 1. they are not needed and 2. gtest causes warnings +# in windows build, which are treated as errors and cause the build to fail. +# +# Unit tests themselves: +# - *_test.cc +# - *_bench.cc +# +# - table/mock_table.cc +# - utilities/cassandra/cassandra_compaction_filter.cc +# - utilities/cassandra/format.cc +# - utilities/cassandra/merge_operator.cc +# - utilities/cassandra/test_utils.cc +# set(ROCKSDB_SOURCES cache/clock_cache.cc cache/lru_cache.cc @@ -178,24 +193,27 @@ set(ROCKSDB_SOURCES db/c.cc db/column_family.cc db/compacted_db_impl.cc - db/compaction.cc - db/compaction_iterator.cc - db/compaction_job.cc - db/compaction_picker.cc - db/compaction_picker_universal.cc + db/compaction/compaction.cc + db/compaction/compaction_iterator.cc + db/compaction/compaction_job.cc + db/compaction/compaction_picker.cc + db/compaction/compaction_picker_fifo.cc + db/compaction/compaction_picker_level.cc + db/compaction/compaction_picker_universal.cc db/convenience.cc db/db_filesnapshot.cc - db/db_impl.cc - db/db_impl_compaction_flush.cc - db/db_impl_debug.cc - db/db_impl_experimental.cc - db/db_impl_files.cc - db/db_impl_open.cc - db/db_impl_readonly.cc - db/db_impl_write.cc + db/dbformat.cc + db/db_impl/db_impl.cc + db/db_impl/db_impl_compaction_flush.cc + db/db_impl/db_impl_debug.cc + db/db_impl/db_impl_experimental.cc + db/db_impl/db_impl_files.cc + db/db_impl/db_impl_open.cc + db/db_impl/db_impl_readonly.cc + db/db_impl/db_impl_secondary.cc + db/db_impl/db_impl_write.cc db/db_info_dumper.cc db/db_iter.cc - db/dbformat.cc db/error_handler.cc db/event_helpers.cc db/experimental.cc @@ -206,15 +224,15 @@ set(ROCKSDB_SOURCES db/forward_iterator.cc db/internal_stats.cc db/log_reader.cc - db/log_writer.cc db/logs_with_prep_tracker.cc + db/log_writer.cc db/malloc_stats.cc - db/managed_iterator.cc db/memtable.cc db/memtable_list.cc db/merge_helper.cc db/merge_operator.cc db/range_del_aggregator.cc + db/range_tombstone_fragmenter.cc db/repair.cc db/snapshot_impl.cc db/table_cache.cc @@ -224,35 +242,42 @@ set(ROCKSDB_SOURCES db/version_edit.cc db/version_set.cc db/wal_manager.cc - db/write_batch.cc db/write_batch_base.cc + db/write_batch.cc db/write_controller.cc db/write_thread.cc env/env.cc env/env_chroot.cc env/env_hdfs.cc env/mock_env.cc + file/delete_scheduler.cc + file/filename.cc + file/file_util.cc + file/sst_file_manager_impl.cc + logging/auto_roll_logger.cc + logging/event_logger.cc + logging/log_buffer.cc + memory/arena.cc + memory/concurrent_arena.cc + memory/jemalloc_nodump_allocator.cc memtable/alloc_tracker.cc - memtable/hash_cuckoo_rep.cc - memtable/hash_cuckoo_rep.cc - memtable/hash_linklist_rep.cc memtable/hash_linklist_rep.cc memtable/hash_skiplist_rep.cc - memtable/hash_skiplist_rep.cc - memtable/skiplistrep.cc memtable/skiplistrep.cc memtable/vectorrep.cc - memtable/vectorrep.cc memtable/write_buffer_manager.cc monitoring/histogram.cc monitoring/histogram_windowing.cc monitoring/instrumented_mutex.cc + monitoring/in_memory_stats_history.cc monitoring/iostats_context.cc monitoring/perf_context.cc monitoring/perf_level.cc + monitoring/persistent_stats_history.cc monitoring/statistics.cc monitoring/thread_status_impl.cc monitoring/thread_status_updater.cc + monitoring/thread_status_updater_debug.cc monitoring/thread_status_util.cc monitoring/thread_status_util_debug.cc options/cf_options.cc @@ -262,96 +287,82 @@ set(ROCKSDB_SOURCES options/options_parser.cc options/options_sanity_check.cc port/stack_trace.cc - table/adaptive_table_factory.cc - table/block.cc - table/block_based_filter_block.cc - table/block_based_table_builder.cc - table/block_based_table_factory.cc - table/block_based_table_reader.cc - table/block_builder.cc + table/adaptive/adaptive_table_factory.cc + table/block_based/block_based_filter_block.cc + table/block_based/block_based_table_builder.cc + table/block_based/block_based_table_factory.cc + table/block_based/block_based_table_reader.cc + table/block_based/block_builder.cc + table/block_based/block.cc + table/block_based/block_prefix_index.cc + table/block_based/data_block_footer.cc + table/block_based/data_block_hash_index.cc + table/block_based/flush_block_policy.cc + table/block_based/full_filter_block.cc + table/block_based/index_builder.cc + table/block_based/partitioned_filter_block.cc table/block_fetcher.cc - table/block_prefix_index.cc table/bloom_block.cc - table/cuckoo_table_builder.cc - table/cuckoo_table_factory.cc - table/cuckoo_table_reader.cc - table/flush_block_policy.cc + table/cuckoo/cuckoo_table_builder.cc + table/cuckoo/cuckoo_table_factory.cc + table/cuckoo/cuckoo_table_reader.cc table/format.cc - table/full_filter_block.cc table/get_context.cc - table/index_builder.cc table/iterator.cc table/merging_iterator.cc table/meta_blocks.cc - table/partitioned_filter_block.cc table/persistent_cache_helper.cc - table/plain_table_builder.cc - table/plain_table_factory.cc - table/plain_table_index.cc - table/plain_table_key_coding.cc - table/plain_table_reader.cc + table/plain/plain_table_builder.cc + table/plain/plain_table_factory.cc + table/plain/plain_table_index.cc + table/plain/plain_table_key_coding.cc + table/plain/plain_table_reader.cc + table/sst_file_reader.cc table/sst_file_writer.cc table/table_properties.cc table/two_level_iterator.cc - tools/db_bench_tool.cc - tools/dump/db_dump_tool.cc + test_util/sync_point.cc + test_util/sync_point_impl.cc tools/ldb_cmd.cc tools/ldb_tool.cc tools/sst_dump_tool.cc - util/arena.cc - util/auto_roll_logger.cc + trace_replay/block_cache_tracer.cc + trace_replay/trace_replay.cc util/bloom.cc util/coding.cc util/compaction_job_stats_impl.cc util/comparator.cc util/compression_context_cache.cc - util/concurrent_arena.cc + util/concurrent_task_limiter_impl.cc + util/crc32c_arm64.cc util/crc32c.cc - util/delete_scheduler.cc util/dynamic_bloom.cc - util/event_logger.cc util/file_reader_writer.cc - util/file_util.cc - util/filename.cc util/filter_policy.cc util/hash.cc - util/log_buffer.cc - util/murmurhash.cc - util/random.cc - util/rate_limiter.cc - util/slice.cc - util/sst_file_manager_impl.cc - util/status.cc - util/status_message.cc - util/string_util.cc - util/sync_point.cc - util/sync_point_impl.cc - util/testutil.cc - util/thread_local.cc - util/threadpool_imp.cc - util/transaction_test_util.cc - util/xxhash.cc utilities/backupable/backupable_db.cc + utilities/blob_db/blob_compaction_filter.cc utilities/blob_db/blob_db.cc + utilities/blob_db/blob_db_impl.cc + utilities/blob_db/blob_db_impl_filesnapshot.cc + utilities/blob_db/blob_dump_tool.cc + utilities/blob_db/blob_file.cc + utilities/blob_db/blob_log_format.cc + utilities/blob_db/blob_log_reader.cc + utilities/blob_db/blob_log_writer.cc utilities/checkpoint/checkpoint_impl.cc - utilities/col_buf_decoder.cc - utilities/col_buf_encoder.cc - utilities/column_aware_encoding_util.cc utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc - utilities/date_tiered/date_tiered_db_impl.cc + utilities/convenience/info_log_finder.cc utilities/debug.cc - utilities/document/document_db.cc - utilities/document/json_document.cc - utilities/document/json_document_builder.cc utilities/env_mirror.cc - utilities/geodb/geodb_impl.cc + utilities/env_timed.cc utilities/leveldb_options/leveldb_options.cc - utilities/lua/rocks_lua_compaction_filter.cc utilities/memory/memory_util.cc + utilities/merge_operators/bytesxor.cc utilities/merge_operators/max.cc utilities/merge_operators/put.cc - utilities/merge_operators/string_append/stringappend.cc utilities/merge_operators/string_append/stringappend2.cc + utilities/merge_operators/string_append/stringappend.cc utilities/merge_operators/uint64add.cc utilities/option_change_migration/option_change_migration.cc utilities/options/options_util.cc @@ -360,10 +371,10 @@ set(ROCKSDB_SOURCES utilities/persistent_cache/block_cache_tier_metadata.cc utilities/persistent_cache/persistent_cache_tier.cc utilities/persistent_cache/volatile_tier_impl.cc - utilities/redis/redis_lists.cc utilities/simulator_cache/sim_cache.cc - utilities/spatialdb/spatial_db.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc + utilities/trace/file_trace_reader_writer.cc + utilities/transactions/optimistic_transaction.cc utilities/transactions/optimistic_transaction_db_impl.cc utilities/transactions/pessimistic_transaction.cc utilities/transactions/pessimistic_transaction_db.cc @@ -379,8 +390,18 @@ set(ROCKSDB_SOURCES utilities/ttl/db_ttl_impl.cc utilities/write_batch_with_index/write_batch_with_index.cc utilities/write_batch_with_index/write_batch_with_index_internal.cc + util/murmurhash.cc + util/random.cc + util/rate_limiter.cc + util/slice.cc + util/status.cc + util/string_util.cc + util/thread_local.cc + util/threadpool_imp.cc + util/xxhash.cc ) + if(WIN32) list(APPEND ROCKSDB_SOURCES port/win/io_win.cc diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc index 941f00cac60..4995e407e22 100644 --- a/storage/rocksdb/event_listener.cc +++ b/storage/rocksdb/event_listener.cc @@ -33,9 +33,9 @@ namespace myrocks { -static std::vector<Rdb_index_stats> -extract_index_stats(const std::vector<std::string> &files, - const rocksdb::TablePropertiesCollection &props) { +static std::vector<Rdb_index_stats> extract_index_stats( + const std::vector<std::string> &files, + const rocksdb::TablePropertiesCollection &props) { std::vector<Rdb_index_stats> ret; for (auto fn : files) { const auto it = props.find(fn); @@ -86,10 +86,11 @@ void Rdb_event_listener::OnExternalFileIngested( void Rdb_event_listener::OnBackgroundError( rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) { rdb_log_status_error(*status, "Error detected in background"); + // NO_LINT_DEBUG sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason); if (status->IsCorruption()) { rdb_persist_corruption_marker(); abort(); } } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h index f666c876eac..737973eb9da 100644 --- a/storage/rocksdb/event_listener.h +++ b/storage/rocksdb/event_listener.h @@ -22,7 +22,7 @@ namespace myrocks { class Rdb_ddl_manager; class Rdb_event_listener : public rocksdb::EventListener { -public: + public: Rdb_event_listener(const Rdb_event_listener &) = delete; Rdb_event_listener &operator=(const Rdb_event_listener &) = delete; @@ -40,10 +40,10 @@ public: void OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) override; -private: + private: Rdb_ddl_manager *m_ddl_manager; void update_index_stats(const rocksdb::TableProperties &props); }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index df700b70804..2e0b09624e2 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -15,7 +15,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif #define MYSQL_SERVER 1 @@ -31,8 +31,8 @@ #include "./ha_rocksdb.h" /* C++ standard header files */ -#include <algorithm> #include <inttypes.h> +#include <algorithm> #include <limits> #include <map> #include <queue> @@ -41,6 +41,9 @@ #include <vector> /* MySQL includes */ +#include <mysql/psi/mysql_table.h> +// #include <thread_pool_priv.h> +#include <mysys_err.h> #include "./debug_sync.h" #include "./my_bit.h" #include "./my_stacktrace.h" @@ -48,20 +51,14 @@ #include "./sql_audit.h" #include "./sql_table.h" #include "./sql_hset.h" -#include <mysql/psi/mysql_table.h> #ifdef MARIAROCKS_NOT_YET -#include <mysql/thread_pool_priv.h> #endif -#include <mysys_err.h> - -// Both MySQL and RocksDB define the same constant. To avoid compilation errors -// till we make the fix in RocksDB, we'll temporary undefine it here. -#undef CACHE_LINE_SIZE /* RocksDB includes */ #include "monitoring/histogram.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/env.h" +#include "rocksdb/memory_allocator.h" #include "rocksdb/persistent_cache.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/slice_transform.h" @@ -70,15 +67,21 @@ #include "rocksdb/utilities/convenience.h" #include "rocksdb/utilities/memory_util.h" #include "rocksdb/utilities/sim_cache.h" +#include "rocksdb/utilities/write_batch_with_index.h" #include "util/stop_watch.h" #include "./rdb_source_revision.h" +// MariaRocks: this is needed to access RocksDB debug syncpoints: +#include "test_util/sync_point.h" + /* MyRocks includes */ #include "./event_listener.h" #include "./ha_rocksdb_proto.h" #include "./logger.h" +#include "./nosql_access.h" #include "./rdb_cf_manager.h" #include "./rdb_cf_options.h" +#include "./rdb_converter.h" #include "./rdb_datadic.h" #include "./rdb_i_s.h" #include "./rdb_index_merge.h" @@ -100,18 +103,19 @@ void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all); * Get the user thread's binary logging format * @param thd user thread * @return Value to be used as index into the binlog_format_names array -*/ + */ int thd_binlog_format(const MYSQL_THD thd); /** * Check if binary logging is filtered for thread's current db. * @param thd Thread handle * @retval 1 the query is not filtered, 0 otherwise. -*/ + */ bool thd_binlog_filter_ok(const MYSQL_THD thd); } MYSQL_PLUGIN_IMPORT bool my_disable_leak_check; +extern my_bool opt_core_file; // Needed in rocksdb_init_func void ignore_db_dirs_append(const char *dirname_arg); @@ -128,22 +132,14 @@ const std::string DEFAULT_CF_NAME("default"); const std::string DEFAULT_SYSTEM_CF_NAME("__system__"); const std::string PER_INDEX_CF_NAME("$per_index_cf"); -class Rdb_explicit_snapshot; - -std::mutex explicit_snapshot_mutex; -ulonglong explicit_snapshot_counter = 0; -std::unordered_map<ulonglong, std::weak_ptr<Rdb_explicit_snapshot>> - explicit_snapshots; static std::vector<GL_INDEX_ID> rdb_indexes_to_recalc; #ifdef MARIADB_NOT_YET class Rdb_explicit_snapshot : public explicit_snapshot { - std::unique_ptr<rocksdb::ManagedSnapshot> snapshot; - public: - static std::shared_ptr<Rdb_explicit_snapshot> - create(snapshot_info_st *ss_info, rocksdb::DB *db, - const rocksdb::Snapshot *snapshot) { + static std::shared_ptr<Rdb_explicit_snapshot> create( + snapshot_info_st *ss_info, rocksdb::DB *db, + const rocksdb::Snapshot *snapshot) { std::lock_guard<std::mutex> lock(explicit_snapshot_mutex); auto s = std::unique_ptr<rocksdb::ManagedSnapshot>( new rocksdb::ManagedSnapshot(db, snapshot)); @@ -159,8 +155,24 @@ class Rdb_explicit_snapshot : public explicit_snapshot { return ret; } - static std::shared_ptr<Rdb_explicit_snapshot> - get(const ulonglong snapshot_id) { + static std::string dump_snapshots() { + std::string str; + std::lock_guard<std::mutex> lock(explicit_snapshot_mutex); + for (const auto &elem : explicit_snapshots) { + const auto &ss = elem.second.lock(); + DBUG_ASSERT(ss != nullptr); + const auto &info = ss->ss_info; + str += "\nSnapshot ID: " + std::to_string(info.snapshot_id) + + "\nBinlog File: " + info.binlog_file + + "\nBinlog Pos: " + std::to_string(info.binlog_pos) + + "\nGtid Executed: " + info.gtid_executed + "\n"; + } + + return str; + } + + static std::shared_ptr<Rdb_explicit_snapshot> get( + const ulonglong snapshot_id) { std::lock_guard<std::mutex> lock(explicit_snapshot_mutex); auto elem = explicit_snapshots.find(snapshot_id); if (elem == explicit_snapshots.end()) { @@ -172,14 +184,27 @@ class Rdb_explicit_snapshot : public explicit_snapshot { rocksdb::ManagedSnapshot *get_snapshot() { return snapshot.get(); } Rdb_explicit_snapshot(snapshot_info_st ss_info, - std::unique_ptr<rocksdb::ManagedSnapshot> snapshot) + std::unique_ptr<rocksdb::ManagedSnapshot> &&snapshot) : explicit_snapshot(ss_info), snapshot(std::move(snapshot)) {} virtual ~Rdb_explicit_snapshot() { std::lock_guard<std::mutex> lock(explicit_snapshot_mutex); explicit_snapshots.erase(ss_info.snapshot_id); } + + private: + std::unique_ptr<rocksdb::ManagedSnapshot> snapshot; + + static std::mutex explicit_snapshot_mutex; + static ulonglong explicit_snapshot_counter; + static std::unordered_map<ulonglong, std::weak_ptr<Rdb_explicit_snapshot>> + explicit_snapshots; }; + +std::mutex Rdb_explicit_snapshot::explicit_snapshot_mutex; +ulonglong Rdb_explicit_snapshot::explicit_snapshot_counter = 0; +std::unordered_map<ulonglong, std::weak_ptr<Rdb_explicit_snapshot>> + Rdb_explicit_snapshot::explicit_snapshots; #endif /** @@ -188,10 +213,11 @@ class Rdb_explicit_snapshot : public explicit_snapshot { void ha_rocksdb::update_row_stats(const operation_type &type) { DBUG_ASSERT(type < ROWS_MAX); // Find if we are modifying system databases. - if (table->s && m_tbl_def->m_is_mysql_system_table) + if (table->s && m_tbl_def->m_is_mysql_system_table) { global_stats.system_rows[type].inc(); - else + } else { global_stats.rows[type].inc(); + } } void dbug_dump_database(rocksdb::DB *db); @@ -199,8 +225,8 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton, my_core::TABLE_SHARE *table_arg, my_core::MEM_ROOT *mem_root); -static rocksdb::CompactRangeOptions -getCompactRangeOptions(int concurrency = 0) { +static rocksdb::CompactRangeOptions getCompactRangeOptions( + int concurrency = 0) { rocksdb::CompactRangeOptions compact_range_options; compact_range_options.bottommost_level_compaction = rocksdb::BottommostLevelCompaction::kForce; @@ -261,37 +287,76 @@ static void rocksdb_flush_all_memtables() { } } +static void rocksdb_delete_column_family_stub( + THD *const /* thd */, struct st_mysql_sys_var *const /* var */, + void *const /* var_ptr */, const void *const /* save */) {} + +static int rocksdb_delete_column_family( + THD *const /* thd */, struct st_mysql_sys_var *const /* var */, + void *const /* var_ptr */, struct st_mysql_value *const value) { + // Return failure for now until the race condition between creating + // CF and deleting CF is resolved + return HA_EXIT_FAILURE; + + char buff[STRING_BUFFER_USUAL_SIZE]; + int len = sizeof(buff); + + DBUG_ASSERT(value != nullptr); + + if (const char *const cf = value->val_str(value, buff, &len)) { + auto &cf_manager = rdb_get_cf_manager(); + auto ret = cf_manager.drop_cf(cf); + if (ret == HA_EXIT_SUCCESS) { + // NO_LINT_DEBUG + sql_print_information("RocksDB: Dropped column family: %s\n", cf); + } else { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Failed to drop column family: %s, error: %d\n", + cf, ret); + } + + return ret; + } + + return HA_EXIT_SUCCESS; +} + /////////////////////////////////////////////////////////// // Hash map: table name => open table handler /////////////////////////////////////////////////////////// -namespace // anonymous namespace = not visible outside this source file +namespace // anonymous namespace = not visible outside this source file { -const ulong TABLE_HASH_SIZE = 32; typedef Hash_set<Rdb_table_handler> Rdb_table_set; -struct Rdb_open_tables_map { +class Rdb_open_tables_map { + private: /* Hash table used to track the handlers of open tables */ - Rdb_table_set m_hash; + std::unordered_map<std::string, Rdb_table_handler *> m_table_map; + /* The mutex used to protect the hash table */ mutable mysql_mutex_t m_mutex; - static uchar *get_hash_key(const Rdb_table_handler *const table_handler, - size_t *const length, - my_bool not_used MY_ATTRIBUTE((__unused__))); + public: + void init() { + m_table_map.clear(); + mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST); + } + + void free() { + m_table_map.clear(); + mysql_mutex_destroy(&m_mutex); + } + size_t count() { return m_table_map.size(); } Rdb_table_handler *get_table_handler(const char *const table_name); void release_table_handler(Rdb_table_handler *const table_handler); - Rdb_open_tables_map() : m_hash(get_hash_key, system_charset_info) { } - - void free_hash(void) { m_hash.~Rdb_table_set(); } - std::vector<std::string> get_table_names(void) const; }; -} // anonymous namespace +} // anonymous namespace static Rdb_open_tables_map rdb_open_tables; @@ -326,6 +391,7 @@ static int rocksdb_create_checkpoint( status = checkpoint->CreateCheckpoint(checkpoint_dir.c_str()); delete checkpoint; if (status.ok()) { + // NO_LINT_DEBUG sql_print_information( "RocksDB: created checkpoint in directory : %s\n", checkpoint_dir.c_str()); @@ -355,6 +421,7 @@ static void rocksdb_force_flush_memtable_now_stub( static int rocksdb_force_flush_memtable_now( THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, struct st_mysql_value *const value) { + // NO_LINT_DEBUG sql_print_information("RocksDB: Manual memtable flush."); rocksdb_flush_all_memtables(); return HA_EXIT_SUCCESS; @@ -367,6 +434,7 @@ static void rocksdb_force_flush_memtable_and_lzero_now_stub( static int rocksdb_force_flush_memtable_and_lzero_now( THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, struct st_mysql_value *const value) { + // NO_LINT_DEBUG sql_print_information("RocksDB: Manual memtable and L0 flush."); rocksdb_flush_all_memtables(); @@ -375,29 +443,46 @@ static int rocksdb_force_flush_memtable_and_lzero_now( rocksdb::ColumnFamilyMetaData metadata; rocksdb::ColumnFamilyDescriptor cf_descr; + int i, max_attempts = 3, num_errors = 0; + for (const auto &cf_handle : cf_manager.get_all_cf()) { - rdb->GetColumnFamilyMetaData(cf_handle, &metadata); - cf_handle->GetDescriptor(&cf_descr); - c_options.output_file_size_limit = cf_descr.options.target_file_size_base; + for (i = 0; i < max_attempts; i++) { + rdb->GetColumnFamilyMetaData(cf_handle, &metadata); + cf_handle->GetDescriptor(&cf_descr); + c_options.output_file_size_limit = cf_descr.options.target_file_size_base; + + DBUG_ASSERT(metadata.levels[0].level == 0); + std::vector<std::string> file_names; + for (auto &file : metadata.levels[0].files) { + file_names.emplace_back(file.db_path + file.name); + } - DBUG_ASSERT(metadata.levels[0].level == 0); - std::vector<std::string> file_names; - for (auto &file : metadata.levels[0].files) { - file_names.emplace_back(file.db_path + file.name); - } + if (file_names.empty()) { + break; + } - if (!file_names.empty()) { rocksdb::Status s; s = rdb->CompactFiles(c_options, cf_handle, file_names, 1); + // Due to a race, it's possible for CompactFiles to collide + // with auto compaction, causing an error to return + // regarding file not found. In that case, retry. + if (s.IsInvalidArgument()) { + continue; + } + if (!s.ok() && !s.IsAborted()) { rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL); return HA_EXIT_FAILURE; } + break; + } + if (i == max_attempts) { + num_errors++; } } - return HA_EXIT_SUCCESS; + return num_errors == 0 ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE; } static void rocksdb_drop_index_wakeup_thread( @@ -468,11 +553,9 @@ static void rocksdb_set_update_cf_options(THD *thd, struct st_mysql_sys_var *var, void *var_ptr, const void *save); -static int rocksdb_check_bulk_load(THD *const thd, - struct st_mysql_sys_var *var - MY_ATTRIBUTE((__unused__)), - void *save, - struct st_mysql_value *value); +static int rocksdb_check_bulk_load( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value); static int rocksdb_check_bulk_load_allow_unsorted( THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), @@ -499,6 +582,8 @@ static int rocksdb_validate_set_block_cache_size( static long long rocksdb_block_cache_size; static long long rocksdb_sim_cache_size; static my_bool rocksdb_use_clock_cache; +static double rocksdb_cache_high_pri_pool_ratio; +static my_bool rocksdb_cache_dump; /* Use unsigned long long instead of uint64_t because of MySQL compatibility */ static unsigned long long // NOLINT(runtime/int) rocksdb_rate_limiter_bytes_per_sec; @@ -518,8 +603,10 @@ static my_bool rocksdb_force_compute_memtable_stats; static uint32_t rocksdb_force_compute_memtable_stats_cachetime; static my_bool rocksdb_debug_optimizer_no_zero_cardinality; static uint32_t rocksdb_wal_recovery_mode; +static uint32_t rocksdb_stats_level; static uint32_t rocksdb_access_hint_on_compaction_start; static char *rocksdb_compact_cf_name; +static char *rocksdb_delete_cf_name; static char *rocksdb_checkpoint_name; static my_bool rocksdb_signal_drop_index_thread; static my_bool rocksdb_signal_remove_mariabackup_checkpoint; @@ -555,10 +642,25 @@ char *compression_types_val= const_cast<char*>(get_rocksdb_supported_compression_types()); static unsigned long rocksdb_write_policy = rocksdb::TxnDBWritePolicy::WRITE_COMMITTED; + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +char *rocksdb_read_free_rpl_tables; +std::mutex rocksdb_read_free_rpl_tables_mutex; +#if defined(HAVE_PSI_INTERFACE) +Regex_list_handler rdb_read_free_regex_handler(key_rwlock_read_free_rpl_tables); +#else +Regex_list_handler rdb_read_free_regex_handler; +#endif +enum read_free_rpl_type { OFF = 0, PK_ONLY, PK_SK }; +static unsigned long rocksdb_read_free_rpl = read_free_rpl_type::OFF; +#endif + static my_bool rocksdb_error_on_suboptimal_collation = 1; static uint32_t rocksdb_stats_recalc_rate = 0; static uint32_t rocksdb_debug_manual_compaction_delay = 0; static uint32_t rocksdb_max_manual_compactions = 0; +static my_bool rocksdb_rollback_on_timeout = FALSE; +static my_bool rocksdb_enable_insert_with_update_caching = TRUE; std::atomic<uint64_t> rocksdb_row_lock_deadlocks(0); std::atomic<uint64_t> rocksdb_row_lock_wait_timeouts(0); @@ -566,6 +668,9 @@ std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0); std::atomic<uint64_t> rocksdb_wal_group_syncs(0); std::atomic<uint64_t> rocksdb_manual_compactions_processed(0); std::atomic<uint64_t> rocksdb_manual_compactions_running(0); +#ifndef DBUG_OFF +std::atomic<uint64_t> rocksdb_num_get_for_update_calls(0); +#endif @@ -635,7 +740,7 @@ static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) { o->listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager)); o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL; o->max_subcompactions = DEFAULT_SUBCOMPACTIONS; - o->max_open_files = -2; // auto-tune to 50% open_files_limit + o->max_open_files = -2; // auto-tune to 50% open_files_limit o->two_write_queues = true; o->manual_wal_flush = true; @@ -659,6 +764,15 @@ static TYPELIB write_policy_typelib = {array_elements(write_policy_names) - 1, "write_policy_typelib", write_policy_names, nullptr}; +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +/* This array needs to be kept up to date with myrocks::read_free_rpl_type */ +static const char *read_free_rpl_names[] = {"OFF", "PK_ONLY", "PK_SK", NullS}; + +static TYPELIB read_free_rpl_typelib = {array_elements(read_free_rpl_names) - 1, + "read_free_rpl_typelib", + read_free_rpl_names, nullptr}; +#endif + /* This enum needs to be kept up to date with rocksdb::InfoLogLevel */ static const char *info_log_level_names[] = {"debug_level", "info_level", "warn_level", "error_level", @@ -680,6 +794,23 @@ static void rocksdb_set_rocksdb_info_log_level( RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); } +static void rocksdb_set_rocksdb_stats_level(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save) { + DBUG_ASSERT(save != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + rocksdb_db_options->statistics->set_stats_level( + static_cast<rocksdb::StatsLevel>( + *static_cast<const uint64_t *>(save))); + // Actual stats level is defined at rocksdb dbopt::statistics::stats_level_ + // so adjusting rocksdb_stats_level here to make sure it points to + // the correct stats level. + rocksdb_stats_level = rocksdb_db_options->statistics->get_stats_level(); + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + static void rocksdb_set_reset_stats( my_core::THD *const /* unused */, my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), @@ -804,7 +935,7 @@ static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG, static MYSQL_THDVAR_BOOL( commit_time_batch_for_recovery, PLUGIN_VAR_RQCMDARG, "TransactionOptions::commit_time_batch_for_recovery for RocksDB", nullptr, - nullptr, FALSE); + nullptr, TRUE); static MYSQL_THDVAR_BOOL( trace_sst_api, PLUGIN_VAR_RQCMDARG, @@ -844,10 +975,11 @@ static MYSQL_THDVAR_STR(tmpdir, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC, "Directory for temporary files during DDL operations.", nullptr, nullptr, ""); +#define DEFAULT_SKIP_UNIQUE_CHECK_TABLES ".*" static MYSQL_THDVAR_STR( skip_unique_check_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, "Skip unique constraint checking for the specified tables", nullptr, - nullptr, ".*"); + nullptr, DEFAULT_SKIP_UNIQUE_CHECK_TABLES); static MYSQL_THDVAR_BOOL( commit_in_the_middle, PLUGIN_VAR_RQCMDARG, @@ -861,11 +993,83 @@ static MYSQL_THDVAR_BOOL( " Blind delete is disabled if the table has secondary key", nullptr, nullptr, FALSE); -static MYSQL_THDVAR_STR( - read_free_rpl_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + +static const char *DEFAULT_READ_FREE_RPL_TABLES = ".*"; + +static int rocksdb_validate_read_free_rpl_tables( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *save, + struct st_mysql_value *value) { + char buff[STRING_BUFFER_USUAL_SIZE]; + int length = sizeof(buff); + const char *wlist_buf = value->val_str(value, buff, &length); + const auto wlist = wlist_buf ? wlist_buf : DEFAULT_READ_FREE_RPL_TABLES; + +#if defined(HAVE_PSI_INTERFACE) + Regex_list_handler regex_handler(key_rwlock_read_free_rpl_tables); +#else + Regex_list_handler regex_handler; +#endif + + if (!regex_handler.set_patterns(wlist)) { + warn_about_bad_patterns(®ex_handler, "rocksdb_read_free_rpl_tables"); + return HA_EXIT_FAILURE; + } + + *static_cast<const char **>(save) = my_strdup(wlist, MYF(MY_WME)); + return HA_EXIT_SUCCESS; +} + +static void rocksdb_update_read_free_rpl_tables( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *var_ptr, + const void *save) { + const auto wlist = *static_cast<const char *const *>(save); + DBUG_ASSERT(wlist != nullptr); + + // This is bound to succeed since we've already checked for bad patterns in + // rocksdb_validate_read_free_rpl_tables + rdb_read_free_regex_handler.set_patterns(wlist); + + // update all table defs + struct Rdb_read_free_rpl_updater : public Rdb_tables_scanner { + int add_table(Rdb_tbl_def *tdef) override { + tdef->check_and_set_read_free_rpl_table(); + return HA_EXIT_SUCCESS; + } + } updater; + ddl_manager.scan_for_tables(&updater); + + if (wlist == DEFAULT_READ_FREE_RPL_TABLES) { + // If running SET var = DEFAULT, then rocksdb_validate_read_free_rpl_tables + // isn't called, and memory is never allocated for the value. Allocate it + // here. + *static_cast<const char **>(var_ptr) = my_strdup(wlist, MYF(MY_WME)); + } else { + // Otherwise, we just reuse the value allocated from + // rocksdb_validate_read_free_rpl_tables. + *static_cast<const char **>(var_ptr) = wlist; + } +} + +static MYSQL_SYSVAR_STR( + read_free_rpl_tables, rocksdb_read_free_rpl_tables, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC /*| PLUGIN_VAR_ALLOCATED*/, "List of tables that will use read-free replication on the slave " "(i.e. not lookup a row during replication)", - nullptr, nullptr, ""); + rocksdb_validate_read_free_rpl_tables, rocksdb_update_read_free_rpl_tables, + DEFAULT_READ_FREE_RPL_TABLES); + +static MYSQL_SYSVAR_ENUM( + read_free_rpl, rocksdb_read_free_rpl, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, + "Use read-free replication on the slave (i.e. no row lookup during " + "replication). Default is OFF, PK_SK will enable it on all tables with " + "primary key. PK_ONLY will enable it on tables where the only key is the " + "primary key (i.e. no secondary keys).", + nullptr, nullptr, read_free_rpl_type::OFF, &read_free_rpl_typelib); +#endif static MYSQL_THDVAR_BOOL(skip_bloom_filter_on_read, PLUGIN_VAR_RQCMDARG, "Skip using bloom filter for reads", nullptr, nullptr, @@ -1033,6 +1237,14 @@ static MYSQL_SYSVAR_UINT( /* min */ (uint)rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords, /* max */ (uint)rocksdb::WALRecoveryMode::kSkipAnyCorruptedRecords, 0); +static MYSQL_SYSVAR_UINT( + stats_level, rocksdb_stats_level, PLUGIN_VAR_RQCMDARG, + "Statistics Level for RocksDB. Default is 0 (kExceptHistogramOrTimers)", + nullptr, rocksdb_set_rocksdb_stats_level, + /* default */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers, + /* min */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers, + /* max */ (uint)rocksdb::StatsLevel::kAll, 0); + static MYSQL_SYSVAR_SIZE_T(compaction_readahead_size, rocksdb_db_options->compaction_readahead_size, PLUGIN_VAR_RQCMDARG, @@ -1107,7 +1319,8 @@ static MYSQL_SYSVAR_ULONG( persistent_cache_size_mb, rocksdb_persistent_cache_size_mb, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Size of cache in MB for BlockBasedTableOptions::persistent_cache " - "for RocksDB", nullptr, nullptr, rocksdb_persistent_cache_size_mb, + "for RocksDB", + nullptr, nullptr, rocksdb_persistent_cache_size_mb, /* min */ 0L, /* max */ ULONG_MAX, 0); static MYSQL_SYSVAR_UINT64_T( @@ -1286,7 +1499,7 @@ static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size, rocksdb_validate_set_block_cache_size, nullptr, /* default */ RDB_DEFAULT_BLOCK_CACHE_SIZE, /* min */ RDB_MIN_BLOCK_CACHE_SIZE, - /* max */ LONGLONG_MAX, + /* max */ LLONG_MAX, /* Block size */ RDB_MIN_BLOCK_CACHE_SIZE); static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size, @@ -1295,15 +1508,26 @@ static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size, nullptr, /* default */ 0, /* min */ 0, - /* max */ LONGLONG_MAX, + /* max */ LLONG_MAX, /* Block size */ 0); static MYSQL_SYSVAR_BOOL( - use_clock_cache, - rocksdb_use_clock_cache, + use_clock_cache, rocksdb_use_clock_cache, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Use ClockCache instead of default LRUCache for RocksDB", - nullptr, nullptr, false); + "Use ClockCache instead of default LRUCache for RocksDB", nullptr, nullptr, + false); + +static MYSQL_SYSVAR_BOOL(cache_dump, rocksdb_cache_dump, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Include RocksDB block cache content in core dump.", + nullptr, nullptr, true); + +static MYSQL_SYSVAR_DOUBLE(cache_high_pri_pool_ratio, + rocksdb_cache_high_pri_pool_ratio, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Specify the size of block cache high-pri pool", + nullptr, nullptr, /* default */ 0.0, /* min */ 0.0, + /* max */ 1.0, 0); static MYSQL_SYSVAR_BOOL( cache_index_and_filter_blocks, @@ -1313,6 +1537,14 @@ static MYSQL_SYSVAR_BOOL( "BlockBasedTableOptions::cache_index_and_filter_blocks for RocksDB", nullptr, nullptr, true); +static MYSQL_SYSVAR_BOOL( + cache_index_and_filter_with_high_priority, + *reinterpret_cast<my_bool *>( + &rocksdb_tbl_options->cache_index_and_filter_blocks_with_high_priority), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "cache_index_and_filter_blocks_with_high_priority for RocksDB", nullptr, + nullptr, true); + // When pin_l0_filter_and_index_blocks_in_cache is true, RocksDB will use the // LRU cache, but will always keep the filter & idndex block's handle checked // out (=won't call ShardedLRUCache::Release), plus the parsed out objects @@ -1441,10 +1673,10 @@ static MYSQL_SYSVAR_UINT( nullptr, nullptr, 0, /* min */ 0, /* max */ INT_MAX, 0); static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats, - rocksdb_force_compute_memtable_stats, - PLUGIN_VAR_RQCMDARG, - "Force to always compute memtable stats", - nullptr, nullptr, TRUE); + rocksdb_force_compute_memtable_stats, + PLUGIN_VAR_RQCMDARG, + "Force to always compute memtable stats", nullptr, + nullptr, TRUE); static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime, rocksdb_force_compute_memtable_stats_cachetime, @@ -1464,6 +1696,10 @@ static MYSQL_SYSVAR_STR(compact_cf, rocksdb_compact_cf_name, rocksdb_compact_column_family, rocksdb_compact_column_family_stub, ""); +static MYSQL_SYSVAR_STR(delete_cf, rocksdb_delete_cf_name, PLUGIN_VAR_RQCMDARG, + "Delete column family", rocksdb_delete_column_family, + rocksdb_delete_column_family_stub, ""); + static MYSQL_SYSVAR_STR(create_checkpoint, rocksdb_checkpoint_name, PLUGIN_VAR_RQCMDARG, "Checkpoint directory", rocksdb_create_checkpoint, @@ -1535,6 +1771,12 @@ static MYSQL_SYSVAR_UINT( "Maximum number of pending + ongoing number of manual compactions.", nullptr, nullptr, /* default */ 10, /* min */ 0, /* max */ UINT_MAX, 0); +static MYSQL_SYSVAR_BOOL( + rollback_on_timeout, rocksdb_rollback_on_timeout, PLUGIN_VAR_OPCMDARG, + "Whether to roll back the complete transaction or a single statement on " + "lock wait timeout (a single statement by default)", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_UINT( debug_manual_compaction_delay, rocksdb_debug_manual_compaction_delay, PLUGIN_VAR_RQCMDARG, @@ -1626,7 +1868,7 @@ static MYSQL_SYSVAR_LONGLONG( rocksdb_compaction_sequential_deletes_file_size, PLUGIN_VAR_RQCMDARG, "Minimum file size required for compaction_sequential_deletes", nullptr, rocksdb_set_compaction_options, 0L, - /* min */ -1L, /* max */ LONGLONG_MAX, 0); + /* min */ -1L, /* max */ LLONG_MAX, 0); static MYSQL_SYSVAR_BOOL( compaction_sequential_deletes_count_sd, @@ -1731,6 +1973,13 @@ static MYSQL_SYSVAR_BOOL(error_on_suboptimal_collation, "collation is used", nullptr, nullptr, TRUE); +static MYSQL_SYSVAR_BOOL( + enable_insert_with_update_caching, + rocksdb_enable_insert_with_update_caching, PLUGIN_VAR_OPCMDARG, + "Whether to enable optimization where we cache the read from a failed " + "insertion attempt in INSERT ON DUPLICATE KEY UPDATE", + nullptr, nullptr, TRUE); + static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100; static struct st_mysql_sys_var *rocksdb_system_variables[] = { @@ -1748,7 +1997,10 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(trace_sst_api), MYSQL_SYSVAR(commit_in_the_middle), MYSQL_SYSVAR(blind_delete_primary_key), +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported MYSQL_SYSVAR(read_free_rpl_tables), + MYSQL_SYSVAR(read_free_rpl), +#endif MYSQL_SYSVAR(bulk_load_size), MYSQL_SYSVAR(merge_buf_size), MYSQL_SYSVAR(enable_bulk_load_api), @@ -1800,6 +2052,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(enable_thread_tracking), MYSQL_SYSVAR(perf_context_level), MYSQL_SYSVAR(wal_recovery_mode), + MYSQL_SYSVAR(stats_level), MYSQL_SYSVAR(access_hint_on_compaction_start), MYSQL_SYSVAR(new_table_reader_for_compaction_inputs), MYSQL_SYSVAR(compaction_readahead_size), @@ -1809,7 +2062,10 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(block_cache_size), MYSQL_SYSVAR(sim_cache_size), MYSQL_SYSVAR(use_clock_cache), + MYSQL_SYSVAR(cache_high_pri_pool_ratio), + MYSQL_SYSVAR(cache_dump), MYSQL_SYSVAR(cache_index_and_filter_blocks), + MYSQL_SYSVAR(cache_index_and_filter_with_high_priority), MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache), MYSQL_SYSVAR(index_type), MYSQL_SYSVAR(hash_index_allow_collision), @@ -1838,6 +2094,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality), MYSQL_SYSVAR(compact_cf), + MYSQL_SYSVAR(delete_cf), MYSQL_SYSVAR(signal_drop_index_thread), MYSQL_SYSVAR(pause_background_work), MYSQL_SYSVAR(enable_2pc), @@ -1883,10 +2140,13 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(debug_manual_compaction_delay), MYSQL_SYSVAR(max_manual_compactions), MYSQL_SYSVAR(manual_compaction_threads), + MYSQL_SYSVAR(rollback_on_timeout), + + MYSQL_SYSVAR(enable_insert_with_update_caching), nullptr}; -static rocksdb::WriteOptions -rdb_get_rocksdb_write_options(my_core::THD *const thd) { +static rocksdb::WriteOptions rdb_get_rocksdb_write_options( + my_core::THD *const thd) { rocksdb::WriteOptions opt; opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); @@ -1943,19 +2203,6 @@ static int rocksdb_compact_column_family(THD *const thd, /////////////////////////////////////////////////////////////////////////////////////////// -/** - @brief - Function we use in the creation of our hash to get key. -*/ - -uchar * -Rdb_open_tables_map::get_hash_key(const Rdb_table_handler *const table_handler, - size_t *const length, - my_bool not_used MY_ATTRIBUTE((__unused__))) { - *length = table_handler->m_table_name_length; - return reinterpret_cast<uchar *>(table_handler->m_table_name); -} - /* Drop index thread's control */ @@ -2012,7 +2259,7 @@ class Rdb_snapshot_notifier : public rocksdb::TransactionNotifier { void SnapshotCreated(const rocksdb::Snapshot *snapshot) override; -public: + public: Rdb_snapshot_notifier(const Rdb_snapshot_notifier &) = delete; Rdb_snapshot_notifier &operator=(const Rdb_snapshot_notifier &) = delete; @@ -2046,9 +2293,9 @@ String timeout_message(const char *command, const char *name1, /* This is the base class for transactions when interacting with rocksdb. -*/ + */ class Rdb_transaction { -protected: + protected: ulonglong m_write_count = 0; ulonglong m_insert_count = 0; ulonglong m_update_count = 0; @@ -2059,7 +2306,7 @@ protected: bool m_is_delayed_snapshot = false; bool m_is_two_phase = false; -private: + private: /* Number of write operations this transaction had when we took the last savepoint (the idea is not to take another savepoint if we haven't made @@ -2067,7 +2314,7 @@ private: */ ulonglong m_writes_at_last_savepoint; -protected: + protected: protected: THD *m_thd = nullptr; @@ -2092,9 +2339,9 @@ protected: // This should be used only when updating binlog information. virtual rocksdb::WriteBatchBase *get_write_batch() = 0; virtual bool commit_no_binlog() = 0; - virtual rocksdb::Iterator * - get_iterator(const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle *column_family) = 0; + virtual rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *column_family) = 0; protected: /* @@ -2139,7 +2386,9 @@ protected: String m_detailed_error; int64_t m_snapshot_timestamp = 0; bool m_ddl_transaction; +#ifdef MARIAROCKS_NOT_YET std::shared_ptr<Rdb_explicit_snapshot> m_explicit_snapshot; +#endif /* Tracks the number of tables in use through external_lock. @@ -2173,8 +2422,9 @@ protected: RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex); - for (auto it : s_tx_list) + for (auto it : s_tx_list) { walker->process_tran(it); + } RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex); } @@ -2194,7 +2444,8 @@ protected: convert_error_code_to_mysql() does: force a statement rollback before returning HA_ERR_LOCK_WAIT_TIMEOUT: */ - my_core::thd_mark_transaction_to_rollback(thd, false /*just statement*/); + my_core::thd_mark_transaction_to_rollback( + thd, static_cast<bool>(rocksdb_rollback_on_timeout)); m_detailed_error.copy(timeout_message( "index", tbl_def->full_tablename().c_str(), kd.get_name().c_str())); table_handler->m_lock_wait_timeout_counter.inc(); @@ -2216,9 +2467,10 @@ protected: char user_host_buff[MAX_USER_HOST_SIZE + 1]; make_user_name(thd, user_host_buff); // NO_LINT_DEBUG - sql_print_warning("Got snapshot conflict errors: User: %s " - "Query: %s", - user_host_buff, thd->query()); + sql_print_warning( + "Got snapshot conflict errors: User: %s " + "Query: %s", + user_host_buff, thd->query()); } m_detailed_error = String(" (snapshot conflict)", system_charset_info); table_handler->m_deadlock_counter.inc(); @@ -2315,8 +2567,9 @@ protected: if (m_is_tx_failed) { rollback(); res = false; - } else + } else { res = commit(); + } return res; } @@ -2367,7 +2620,7 @@ protected: bool has_snapshot() const { return m_read_opts.snapshot != nullptr; } -private: + private: // The Rdb_sst_info structures we are currently loading. In a partitioned // table this can have more than one entry std::vector<std::shared_ptr<Rdb_sst_info>> m_curr_bulk_load; @@ -2376,7 +2629,7 @@ private: /* External merge sorts for bulk load: key ID -> merge sort instance */ std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge; -public: + public: int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf, Rdb_index_merge **key_merge) { int res; @@ -2397,22 +2650,62 @@ public: return HA_EXIT_SUCCESS; } - int finish_bulk_load(int print_client_error = true) { - int rc = 0, rc2; + /* Finish bulk loading for all table handlers belongs to one connection */ + int finish_bulk_load(bool *is_critical_error = nullptr, + int print_client_error = true) { + Ensure_cleanup cleanup([&]() { + // Always clear everything regardless of success/failure + m_curr_bulk_load.clear(); + m_curr_bulk_load_tablename.clear(); + m_key_merge.clear(); + }); + + int rc = 0; + if (is_critical_error) { + *is_critical_error = true; + } + + // PREPARE phase: finish all on-going bulk loading Rdb_sst_info and + // collect all Rdb_sst_commit_info containing (SST files, cf) + int rc2 = 0; + std::vector<Rdb_sst_info::Rdb_sst_commit_info> sst_commit_list; + sst_commit_list.reserve(m_curr_bulk_load.size()); + + for (auto &sst_info : m_curr_bulk_load) { + Rdb_sst_info::Rdb_sst_commit_info commit_info; - std::vector<std::shared_ptr<Rdb_sst_info>>::iterator it; - for (it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); it++) { - rc2 = (*it)->commit(print_client_error); - if (rc2 != 0 && rc == 0) { + // Commit the list of SST files and move it to the end of + // sst_commit_list, effectively transfer the ownership over + rc2 = sst_info->finish(&commit_info, print_client_error); + if (rc2 && rc == 0) { + // Don't return yet - make sure we finish all the SST infos rc = rc2; } + + // Make sure we have work to do - we might be losing the race + if (rc2 == 0 && commit_info.has_work()) { + sst_commit_list.emplace_back(std::move(commit_info)); + DBUG_ASSERT(!commit_info.has_work()); + } + } + + if (rc) { + return rc; } - m_curr_bulk_load.clear(); - m_curr_bulk_load_tablename.clear(); - DBUG_ASSERT(m_curr_bulk_load.size() == 0); - // Flush the index_merge sort buffers + // MERGING Phase: Flush the index_merge sort buffers into SST files in + // Rdb_sst_info and collect all Rdb_sst_commit_info containing + // (SST files, cf) if (!m_key_merge.empty()) { + Ensure_cleanup malloc_cleanup([]() { + /* + Explicitly tell jemalloc to clean up any unused dirty pages at this + point. + See https://reviews.facebook.net/D63723 for more details. + */ + purge_all_jemalloc_arenas(); + }); + rocksdb::Slice merge_key; rocksdb::Slice merge_val; for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) { @@ -2429,9 +2722,20 @@ public: // be missed by the compaction filter and not be marked for // removal. It is unclear how to lock the sql table from the storage // engine to prevent modifications to it while bulk load is occurring. - if (keydef == nullptr || table_name.empty()) { - rc2 = HA_ERR_ROCKSDB_BULK_LOAD; - break; + if (keydef == nullptr) { + if (is_critical_error) { + // We used to set the error but simply ignores it. This follows + // current behavior and we should revisit this later + *is_critical_error = false; + } + return HA_ERR_KEY_NOT_FOUND; + } else if (table_name.empty()) { + if (is_critical_error) { + // We used to set the error but simply ignores it. This follows + // current behavior and we should revisit this later + *is_critical_error = false; + } + return HA_ERR_NO_SUCH_TABLE; } const std::string &index_name = keydef->get_name(); Rdb_index_merge &rdb_merge = it->second; @@ -2440,38 +2744,112 @@ public: // "./database/table" std::replace(table_name.begin(), table_name.end(), '.', '/'); table_name = "./" + table_name; - Rdb_sst_info sst_info(rdb, table_name, index_name, rdb_merge.get_cf(), - *rocksdb_db_options, - THDVAR(get_thd(), trace_sst_api)); + auto sst_info = std::make_shared<Rdb_sst_info>( + rdb, table_name, index_name, rdb_merge.get_cf(), + *rocksdb_db_options, THDVAR(get_thd(), trace_sst_api)); while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) { - if ((rc2 = sst_info.put(merge_key, merge_val)) != 0) { + if ((rc2 = sst_info->put(merge_key, merge_val)) != 0) { + rc = rc2; + + // Don't return yet - make sure we finish the sst_info break; } } - // rc2 == -1 => finished ok; rc2 > 0 => error - if (rc2 > 0 || (rc2 = sst_info.commit(print_client_error)) != 0) { - if (rc == 0) { - rc = rc2; - } - break; + // -1 => no more items + if (rc2 != -1 && rc != 0) { + rc = rc2; + } + + Rdb_sst_info::Rdb_sst_commit_info commit_info; + rc2 = sst_info->finish(&commit_info, print_client_error); + if (rc2 != 0 && rc == 0) { + // Only set the error from sst_info->finish if finish failed and we + // didn't fail before. In other words, we don't have finish's + // success mask earlier failures + rc = rc2; + } + + if (rc) { + return rc; + } + + if (commit_info.has_work()) { + sst_commit_list.emplace_back(std::move(commit_info)); + DBUG_ASSERT(!commit_info.has_work()); } } - m_key_merge.clear(); + } - /* - Explicitly tell jemalloc to clean up any unused dirty pages at this - point. - See https://reviews.facebook.net/D63723 for more details. - */ - purge_all_jemalloc_arenas(); + // Early return in case we lost the race completely and end up with no + // work at all + if (sst_commit_list.size() == 0) { + return rc; } + + // INGEST phase: Group all Rdb_sst_commit_info by cf (as they might + // have the same cf across different indexes) and call out to RocksDB + // to ingest all SST files in one atomic operation + rocksdb::IngestExternalFileOptions options; + options.move_files = true; + options.snapshot_consistency = false; + options.allow_global_seqno = false; + options.allow_blocking_flush = false; + + std::map<rocksdb::ColumnFamilyHandle *, rocksdb::IngestExternalFileArg> + arg_map; + + // Group by column_family + for (auto &commit_info : sst_commit_list) { + if (arg_map.find(commit_info.get_cf()) == arg_map.end()) { + rocksdb::IngestExternalFileArg arg; + arg.column_family = commit_info.get_cf(), + arg.external_files = commit_info.get_committed_files(), + arg.options = options; + + arg_map.emplace(commit_info.get_cf(), arg); + } else { + auto &files = arg_map[commit_info.get_cf()].external_files; + files.insert(files.end(), commit_info.get_committed_files().begin(), + commit_info.get_committed_files().end()); + } + } + + std::vector<rocksdb::IngestExternalFileArg> args; + size_t file_count = 0; + for (auto &cf_files_pair : arg_map) { + args.push_back(cf_files_pair.second); + file_count += cf_files_pair.second.external_files.size(); + } + + const rocksdb::Status s = rdb->IngestExternalFiles(args); + if (THDVAR(m_thd, trace_sst_api)) { + // NO_LINT_DEBUG + sql_print_information( + "SST Tracing: IngestExternalFile '%zu' files returned %s", file_count, + s.ok() ? "ok" : "not ok"); + } + + if (!s.ok()) { + if (print_client_error) { + Rdb_sst_info::report_error_msg(s, nullptr); + } + return HA_ERR_ROCKSDB_BULK_LOAD; + } + + // COMMIT phase: mark everything as completed. This avoids SST file + // deletion kicking in. Otherwise SST files would get deleted if this + // entire operation is aborted + for (auto &commit_info : sst_commit_list) { + commit_info.commit(); + } + return rc; } int start_bulk_load(ha_rocksdb *const bulk_load, - std::shared_ptr<Rdb_sst_info> sst_info) { + std::shared_ptr<Rdb_sst_info> sst_info) { /* If we already have an open bulk load of a table and the name doesn't match the current one, close out the currently running one. This allows @@ -2484,8 +2862,6 @@ public: bulk_load->get_table_basename() != m_curr_bulk_load_tablename) { const auto res = finish_bulk_load(); if (res != HA_EXIT_SUCCESS) { - m_curr_bulk_load.clear(); - m_curr_bulk_load_tablename.clear(); return res; } } @@ -2535,12 +2911,10 @@ public: inserts while inside a multi-statement transaction. */ bool flush_batch() { - if (get_write_count() == 0) - return false; + if (get_write_count() == 0) return false; /* Commit the current transaction */ - if (commit_no_binlog()) - return true; + if (commit_no_binlog()) return true; /* Start another one */ start_tx(); @@ -2552,7 +2926,7 @@ public: std::max(m_auto_incr_map[gl_index_id], curr_id); } -#ifndef NDEBUG +#ifndef DBUG_OFF ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) { if (m_auto_incr_map.count(gl_index_id) > 0) { return m_auto_incr_map[gl_index_id]; @@ -2563,13 +2937,14 @@ public: virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, - const rocksdb::Slice &value) = 0; - virtual rocksdb::Status - delete_key(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) = 0; - virtual rocksdb::Status - single_delete(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) = 0; + const rocksdb::Slice &value, + const bool assume_tracked) = 0; + virtual rocksdb::Status delete_key( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) = 0; + virtual rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) = 0; virtual bool has_modifications() const = 0; @@ -2585,25 +2960,23 @@ public: virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, rocksdb::PinnableSlice *const value) const = 0; - virtual rocksdb::Status - get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, - bool exclusive) = 0; - - rocksdb::Iterator * - get_iterator(rocksdb::ColumnFamilyHandle *const column_family, - bool skip_bloom_filter, bool fill_cache, - const rocksdb::Slice &eq_cond_lower_bound, - const rocksdb::Slice &eq_cond_upper_bound, - bool read_current = false, bool create_snapshot = true) { + virtual rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool exclusive, const bool do_validate) = 0; + + rocksdb::Iterator *get_iterator( + rocksdb::ColumnFamilyHandle *const column_family, bool skip_bloom_filter, + bool fill_cache, const rocksdb::Slice &eq_cond_lower_bound, + const rocksdb::Slice &eq_cond_upper_bound, bool read_current = false, + bool create_snapshot = true) { // Make sure we are not doing both read_current (which implies we don't // want a snapshot) and create_snapshot which makes sure we create // a snapshot DBUG_ASSERT(column_family != nullptr); DBUG_ASSERT(!read_current || !create_snapshot); - if (create_snapshot) - acquire_snapshot(true); + if (create_snapshot) acquire_snapshot(true); rocksdb::ReadOptions options = m_read_opts; @@ -2635,25 +3008,33 @@ public: entire transaction. */ do_set_savepoint(); - m_writes_at_last_savepoint= m_write_count; + m_writes_at_last_savepoint = m_write_count; } /* Called when a "top-level" statement inside a transaction completes successfully and its changes become part of the transaction's changes. */ - void make_stmt_savepoint_permanent() { - + int make_stmt_savepoint_permanent() { // Take another RocksDB savepoint only if we had changes since the last // one. This is very important for long transactions doing lots of // SELECTs. - if (m_writes_at_last_savepoint != m_write_count) - { + if (m_writes_at_last_savepoint != m_write_count) { + rocksdb::WriteBatchBase *batch = get_write_batch(); + rocksdb::Status status = rocksdb::Status::NotFound(); + while ((status = batch->PopSavePoint()) == rocksdb::Status::OK()) { + } + + if (status != rocksdb::Status::NotFound()) { + return HA_EXIT_FAILURE; + } + do_set_savepoint(); - m_writes_at_last_savepoint= m_write_count; + m_writes_at_last_savepoint = m_write_count; } - } + return HA_EXIT_SUCCESS; + } /* Rollback to the savepoint we've set before the last statement @@ -2669,7 +3050,7 @@ public: statement start) because setting a savepoint is cheap. */ do_set_savepoint(); - m_writes_at_last_savepoint= m_write_count; + m_writes_at_last_savepoint = m_write_count; } } @@ -2733,10 +3114,11 @@ class Rdb_transaction_impl : public Rdb_transaction { rocksdb::Transaction *m_rocksdb_tx = nullptr; rocksdb::Transaction *m_rocksdb_reuse_tx = nullptr; -public: + public: void set_lock_timeout(int timeout_sec_arg) override { - if (m_rocksdb_tx) + if (m_rocksdb_tx) { m_rocksdb_tx->SetLockTimeout(rdb_convert_sec_to_ms(m_timeout_sec)); + } } void set_sync(bool sync) override { @@ -2753,7 +3135,7 @@ public: virtual bool is_writebatch_trx() const override { return false; } -private: + private: void release_tx(void) { // We are done with the current active transaction object. Preserve it // for later reuse. @@ -2803,7 +3185,7 @@ private: goto error; } -error: + error: /* Save the transaction object to be reused */ release_tx(); @@ -2817,7 +3199,7 @@ error: return res; } -public: + public: void rollback() override { m_write_count = 0; m_insert_count = 0; @@ -2884,39 +3266,42 @@ public: m_read_opts.snapshot = nullptr; } - if (need_clear && m_rocksdb_tx != nullptr) - m_rocksdb_tx->ClearSnapshot(); + if (need_clear && m_rocksdb_tx != nullptr) m_rocksdb_tx->ClearSnapshot(); } bool has_snapshot() { return m_read_opts.snapshot != nullptr; } rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, - const rocksdb::Slice &value) override { + const rocksdb::Slice &key, const rocksdb::Slice &value, + const bool assume_tracked) override { ++m_write_count; ++m_lock_count; - if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); - return m_rocksdb_tx->Put(column_family, key, value); + } + return m_rocksdb_tx->Put(column_family, key, value, assume_tracked); } rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) override { + const rocksdb::Slice &key, + const bool assume_tracked) override { ++m_write_count; ++m_lock_count; - if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); - return m_rocksdb_tx->Delete(column_family, key); + } + return m_rocksdb_tx->Delete(column_family, key, assume_tracked); } - rocksdb::Status - single_delete(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) override { + rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) override { ++m_write_count; ++m_lock_count; - if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); - return m_rocksdb_tx->SingleDelete(column_family, key); + } + return m_rocksdb_tx->SingleDelete(column_family, key, assume_tracked); } bool has_modifications() const override { @@ -2952,23 +3337,39 @@ public: return m_rocksdb_tx->Get(m_read_opts, column_family, key, value); } - rocksdb::Status - get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, - bool exclusive) override { - if (++m_lock_count > m_max_row_locks) + rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool exclusive, const bool do_validate) override { + if (++m_lock_count > m_max_row_locks) { return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); + } if (value != nullptr) { value->Reset(); } - return m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value, - exclusive); + rocksdb::Status s; + // If snapshot is null, pass it to GetForUpdate and snapshot is + // initialized there. Snapshot validation is skipped in that case. + if (m_read_opts.snapshot == nullptr || do_validate) { + s = m_rocksdb_tx->GetForUpdate( + m_read_opts, column_family, key, value, exclusive, + m_read_opts.snapshot ? do_validate : false); + } else { + // If snapshot is set, and if skipping validation, + // call GetForUpdate without validation and set back old snapshot + auto saved_snapshot = m_read_opts.snapshot; + m_read_opts.snapshot = nullptr; + s = m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value, + exclusive, false); + m_read_opts.snapshot = saved_snapshot; + } + return s; } - rocksdb::Iterator * - get_iterator(const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle *const column_family) override { + rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *const column_family) override { global_stats.queries[QUERIES_RANGE].inc(); return m_rocksdb_tx->GetIterator(options, column_family); } @@ -3013,10 +3414,9 @@ public: m_ddl_transaction = false; } - /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints */ - void do_set_savepoint() override { - m_rocksdb_tx->SetSavePoint(); - } + /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints + */ + void do_set_savepoint() override { m_rocksdb_tx->SetSavePoint(); } void do_rollback_to_savepoint() override { m_rocksdb_tx->RollbackToSavePoint(); @@ -3048,14 +3448,14 @@ public: const rocksdb::Snapshot *const cur_snapshot = m_rocksdb_tx->GetSnapshot(); if (org_snapshot != cur_snapshot) { - if (org_snapshot != nullptr) - m_snapshot_timestamp = 0; + if (org_snapshot != nullptr) m_snapshot_timestamp = 0; m_read_opts.snapshot = cur_snapshot; - if (cur_snapshot != nullptr) + if (cur_snapshot != nullptr) { rdb->GetEnv()->GetCurrentTime(&m_snapshot_timestamp); - else + } else { m_is_delayed_snapshot = true; + } } } } @@ -3066,7 +3466,7 @@ public: m_notifier = std::make_shared<Rdb_snapshot_notifier>(this); } - virtual ~Rdb_transaction_impl() { + virtual ~Rdb_transaction_impl() override { rollback(); // Theoretically the notifier could outlive the Rdb_transaction_impl @@ -3098,7 +3498,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { m_ddl_transaction = false; } -private: + private: bool prepare(const rocksdb::TransactionName &name) override { return true; } bool commit_no_binlog() override { @@ -3122,7 +3522,7 @@ private: res = true; goto error; } -error: + error: reset(); m_write_count = 0; @@ -3135,16 +3535,12 @@ error: } /* Implementations of do_*savepoint based on rocksdB::WriteBatch savepoints */ - void do_set_savepoint() override { - m_batch->SetSavePoint(); - } + void do_set_savepoint() override { m_batch->SetSavePoint(); } - void do_rollback_to_savepoint() override { - m_batch->RollbackToSavePoint(); - } + void do_rollback_to_savepoint() override { m_batch->RollbackToSavePoint(); } -public: + public: bool is_writebatch_trx() const override { return true; } void set_lock_timeout(int timeout_sec_arg) override { @@ -3172,8 +3568,7 @@ public: } void acquire_snapshot(bool acquire_now) override { - if (m_read_opts.snapshot == nullptr) - snapshot_created(rdb->GetSnapshot()); + if (m_read_opts.snapshot == nullptr) snapshot_created(rdb->GetSnapshot()); } void release_snapshot() override { @@ -3184,8 +3579,8 @@ public: } rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, - const rocksdb::Slice &value) override { + const rocksdb::Slice &key, const rocksdb::Slice &value, + const bool assume_tracked) override { ++m_write_count; m_batch->Put(column_family, key, value); // Note Put/Delete in write batch doesn't return any error code. We simply @@ -3194,15 +3589,16 @@ public: } rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) override { + const rocksdb::Slice &key, + const bool assume_tracked) override { ++m_write_count; m_batch->Delete(column_family, key); return rocksdb::Status::OK(); } - rocksdb::Status - single_delete(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) override { + rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool /* assume_tracked */) override { ++m_write_count; m_batch->SingleDelete(column_family, key); return rocksdb::Status::OK(); @@ -3227,10 +3623,10 @@ public: value); } - rocksdb::Status - get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, - bool exclusive) override { + rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool /* exclusive */, const bool /* do_validate */) override { if (value == nullptr) { rocksdb::PinnableSlice pin_val; rocksdb::Status s = get(column_family, key, &pin_val); @@ -3241,9 +3637,9 @@ public: return get(column_family, key, value); } - rocksdb::Iterator * - get_iterator(const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle *const column_family) override { + rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *const /* column_family */) override { const auto it = rdb->NewIterator(options); return m_batch->NewIteratorWithBase(it); } @@ -3264,8 +3660,7 @@ public: void start_stmt() override {} void rollback_stmt() override { - if (m_batch) - rollback_to_stmt_savepoint(); + if (m_batch) rollback_to_stmt_savepoint(); } explicit Rdb_writebatch_impl(THD *const thd) @@ -3274,7 +3669,7 @@ public: true); } - virtual ~Rdb_writebatch_impl() { + virtual ~Rdb_writebatch_impl() override { rollback(); delete m_batch; } @@ -3332,7 +3727,7 @@ class Rdb_perf_context_guard { } }; -} // anonymous namespace +} // anonymous namespace /* TODO: maybe, call this in external_lock() and store in ha_rocksdb.. @@ -3344,12 +3739,10 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) { if (tx == nullptr) { bool rpl_skip_tx_api= false; // MARIAROCKS_NOT_YET. if ((rpl_skip_tx_api && thd->rgi_slave) || - false /* MARIAROCKS_NOT_YET: THDVAR(thd, master_skip_tx_api) && !thd->rgi_slave)*/) + (THDVAR(thd, master_skip_tx_api) && !thd->rgi_slave)) { tx = new Rdb_writebatch_impl(thd); - } - else - { + } else { tx = new Rdb_transaction_impl(thd); } tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks)); @@ -3368,12 +3761,14 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) { static int rocksdb_close_connection(handlerton *const hton, THD *const thd) { Rdb_transaction *tx = get_tx_from_thd(thd); if (tx != nullptr) { - int rc = tx->finish_bulk_load(false); - if (rc != 0) { + bool is_critical_error; + int rc = tx->finish_bulk_load(&is_critical_error, false); + if (rc != 0 && is_critical_error) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Error %d finalizing last SST file while " - "disconnecting", - rc); + sql_print_error( + "RocksDB: Error %d finalizing last SST file while " + "disconnecting", + rc); } delete tx; @@ -3514,9 +3909,9 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx) } DEBUG_SYNC(thd, "rocksdb.prepared"); - } - else + } else { tx->make_stmt_savepoint_permanent(); + } return HA_EXIT_SUCCESS; } @@ -3557,9 +3952,8 @@ static int rocksdb_commit_by_xid(handlerton *const hton, XID *const xid) { DBUG_RETURN(HA_EXIT_SUCCESS); } -static int -rocksdb_rollback_by_xid(handlerton *const hton MY_ATTRIBUTE((__unused__)), - XID *const xid) { +static int rocksdb_rollback_by_xid( + handlerton *const hton MY_ATTRIBUTE((__unused__)), XID *const xid) { DBUG_ENTER_FUNC(); DBUG_ASSERT(hton != nullptr); @@ -3605,6 +3999,7 @@ static void rdb_xid_from_string(const std::string &src, XID *const dst) { DBUG_ASSERT(dst->gtrid_length >= 0 && dst->gtrid_length <= MAXGTRIDSIZE); DBUG_ASSERT(dst->bqual_length >= 0 && dst->bqual_length <= MAXBQUALSIZE); + memset(dst->data, 0, XIDDATASIZE); src.copy(dst->data, (dst->gtrid_length) + (dst->bqual_length), RDB_XIDHDR_LEN); } @@ -3629,13 +4024,16 @@ static int rocksdb_recover(handlerton* hton, XID* xid_list, uint len) if (is_binlog_advanced(binlog_file, *binlog_pos, file_buf, pos)) { memcpy(binlog_file, file_buf, FN_REFLEN + 1); *binlog_pos = pos; - fprintf(stderr, "RocksDB: Last binlog file position %llu," - " file name %s\n", + // NO_LINT_DEBUG + fprintf(stderr, + "RocksDB: Last binlog file position %llu," + " file name %s\n", pos, file_buf); if (*gtid_buf) { global_sid_lock->rdlock(); binlog_max_gtid->parse(global_sid_map, gtid_buf); global_sid_lock->unlock(); + // NO_LINT_DEBUG fprintf(stderr, "RocksDB: Last MySQL Gtid %s\n", gtid_buf); } } @@ -3733,8 +4131,8 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx) Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); if (tx != nullptr) { - if (commit_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN))) { + if (commit_tx || (!my_core::thd_test_options( + thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { /* This will not add anything to commit_latency_stats, and this is correct right? @@ -3865,7 +4263,7 @@ static std::string format_string(const char *const format, ...) { char *buff = static_buff; std::unique_ptr<char[]> dynamic_buff = nullptr; - len++; // Add one for null terminator + len++; // Add one for null terminator // for longer output use an allocated buffer if (static_cast<uint>(len) > sizeof(static_buff)) { @@ -3890,7 +4288,7 @@ static std::string format_string(const char *const format, ...) { } class Rdb_snapshot_status : public Rdb_tx_list_walker { -private: + private: std::string m_data; static std::string current_timestamp(void) { @@ -3924,9 +4322,8 @@ private: "=========================================\n"; } - static Rdb_deadlock_info::Rdb_dl_trx_info - get_dl_txn_info(const rocksdb::DeadlockInfo &txn, - const GL_INDEX_ID &gl_index_id) { + static Rdb_deadlock_info::Rdb_dl_trx_info get_dl_txn_info( + const rocksdb::DeadlockInfo &txn, const GL_INDEX_ID &gl_index_id) { Rdb_deadlock_info::Rdb_dl_trx_info txn_data; txn_data.trx_id = txn.m_txn_id; @@ -3953,13 +4350,12 @@ private: return txn_data; } - static Rdb_deadlock_info - get_dl_path_trx_info(const rocksdb::DeadlockPath &path_entry) { + static Rdb_deadlock_info get_dl_path_trx_info( + const rocksdb::DeadlockPath &path_entry) { Rdb_deadlock_info deadlock_info; - for (auto it = path_entry.path.begin(); it != path_entry.path.end(); - it++) { - auto txn = *it; + for (auto it = path_entry.path.begin(); it != path_entry.path.end(); it++) { + const auto &txn = *it; const GL_INDEX_ID gl_index_id = { txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>( txn.m_waiting_key.c_str()))}; @@ -3968,7 +4364,7 @@ private: DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty()); /* print the first txn in the path to display the full deadlock cycle */ if (!path_entry.path.empty() && !path_entry.limit_exceeded) { - auto deadlocking_txn = *(path_entry.path.end() - 1); + const auto &deadlocking_txn = *(path_entry.path.end() - 1); deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id; deadlock_info.deadlock_time = path_entry.deadlock_time; } @@ -3997,7 +4393,7 @@ private: #endif m_data += format_string( "---SNAPSHOT, ACTIVE %lld sec\n" - "%s\n" + "%s\n" "lock count %llu, write count %llu\n" "insert count %llu, update count %llu, delete count %llu\n", (longlong)(curr_time - snapshot_timestamp), buffer, tx->get_lock_count(), @@ -4010,19 +4406,21 @@ private: auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); m_data += "----------LATEST DETECTED DEADLOCKS----------\n"; - for (auto path_entry : dlock_buffer) { + for (const auto &path_entry : dlock_buffer) { std::string path_data; if (path_entry.limit_exceeded) { path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n"; } else { - path_data += "\n*** DEADLOCK PATH\n" - "=========================================\n"; + path_data += + "\n*** DEADLOCK PATH\n" + "=========================================\n"; const auto dl_info = get_dl_path_trx_info(path_entry); const auto deadlock_time = dl_info.deadlock_time; for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) { - const auto trx_info = *it; + const auto &trx_info = *it; path_data += format_string( - "TIMESTAMP: %" PRId64 "\n" + "TIMESTAMP: %" PRId64 + "\n" "TRANSACTION ID: %u\n" "COLUMN FAMILY NAME: %s\n" "WAITING KEY: %s\n" @@ -4037,9 +4435,9 @@ private: path_data += "---------------WAITING FOR---------------\n"; } } - path_data += - format_string("\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n", - dl_info.victim_trx_id); + path_data += format_string( + "\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n", + dl_info.victim_trx_id); } m_data += path_data; } @@ -4048,7 +4446,7 @@ private: std::vector<Rdb_deadlock_info> get_deadlock_info() { std::vector<Rdb_deadlock_info> deadlock_info; auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); - for (auto path_entry : dlock_buffer) { + for (const auto &path_entry : dlock_buffer) { if (!path_entry.limit_exceeded) { deadlock_info.push_back(get_dl_path_trx_info(path_entry)); } @@ -4063,10 +4461,10 @@ private: * out relevant information for information_schema.rocksdb_trx */ class Rdb_trx_info_aggregator : public Rdb_tx_list_walker { -private: + private: std::vector<Rdb_trx_info> *m_trx_info; -public: + public: explicit Rdb_trx_info_aggregator(std::vector<Rdb_trx_info> *const trx_info) : m_trx_info(trx_info) {} @@ -4197,9 +4595,10 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, // sure that output will look unified. DBUG_ASSERT(commit_latency_stats != nullptr); - snprintf(buf, sizeof(buf), "rocksdb.commit_latency statistics " - "Percentiles :=> 50 : %.2f 95 : %.2f " - "99 : %.2f 100 : %.2f\n", + snprintf(buf, sizeof(buf), + "rocksdb.commit_latency statistics " + "Percentiles :=> 50 : %.2f 95 : %.2f " + "99 : %.2f 100 : %.2f\n", commit_latency_stats->Percentile(50), commit_latency_stats->Percentile(95), commit_latency_stats->Percentile(99), @@ -4221,7 +4620,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, } if (rdb->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)) { - snprintf(buf, sizeof(buf), "rocksdb.actual_delayed_write_rate " + snprintf(buf, sizeof(buf), "COUNT : %llu\n", (ulonglong)v); str.append(buf); @@ -4309,6 +4708,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list); if (!s.ok()) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Returned error (%s) from GetThreadList.\n", s.ToString().c_str()); res |= true; @@ -4325,37 +4725,23 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, "\noperation_type: " + it.GetOperationName(it.operation_type) + "\noperation_stage: " + it.GetOperationStageName(it.operation_stage) + - "\nelapsed_time_ms: " + - it.MicrosToString(it.op_elapsed_micros); + "\nelapsed_time_ms: " + it.MicrosToString(it.op_elapsed_micros); - for (auto &it_props : - it.InterpretOperationProperties(it.operation_type, - it.op_properties)) { + for (auto &it_props : it.InterpretOperationProperties( + it.operation_type, it.op_properties)) { str += "\n" + it_props.first + ": " + std::to_string(it_props.second); } str += "\nstate_type: " + it.GetStateName(it.state_type); - res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id), - str, stat_print); + res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id), str, + stat_print); } } #ifdef MARIAROCKS_NOT_YET /* Explicit snapshot information */ - str.clear(); - { - std::lock_guard<std::mutex> lock(explicit_snapshot_mutex); - for (const auto &elem : explicit_snapshots) { - const auto &ss = elem.second.lock(); - DBUG_ASSERT(ss != nullptr); - const auto &info = ss->ss_info; - str += "\nSnapshot ID: " + std::to_string(info.snapshot_id) + - "\nBinlog File: " + info.binlog_file + - "\nBinlog Pos: " + std::to_string(info.binlog_pos) + - "\nGtid Executed: " + info.gtid_executed + "\n"; - } - } + str = Rdb_explicit_snapshot::dump_snapshots(); #endif if (!str.empty()) { @@ -4390,38 +4776,38 @@ static bool rocksdb_explicit_snapshot( snapshot_info_st *ss_info) /*!< out: Snapshot information */ { switch (ss_info->op) { - case snapshot_operation::SNAPSHOT_CREATE: { - if (mysql_bin_log_is_open()) { - mysql_bin_log_lock_commits(ss_info); + case snapshot_operation::SNAPSHOT_CREATE: { + if (mysql_bin_log_is_open()) { + mysql_bin_log_lock_commits(ss_info); + } + auto s = Rdb_explicit_snapshot::create(ss_info, rdb, rdb->GetSnapshot()); + if (mysql_bin_log_is_open()) { + mysql_bin_log_unlock_commits(ss_info); + } + + thd->set_explicit_snapshot(s); + return s == nullptr; } - auto s = Rdb_explicit_snapshot::create(ss_info, rdb, rdb->GetSnapshot()); - if (mysql_bin_log_is_open()) { - mysql_bin_log_unlock_commits(ss_info); + case snapshot_operation::SNAPSHOT_ATTACH: { + auto s = Rdb_explicit_snapshot::get(ss_info->snapshot_id); + if (!s) { + return true; + } + *ss_info = s->ss_info; + thd->set_explicit_snapshot(s); + return false; } - - thd->set_explicit_snapshot(s); - return s == nullptr; - } - case snapshot_operation::SNAPSHOT_ATTACH: { - auto s = Rdb_explicit_snapshot::get(ss_info->snapshot_id); - if (!s) { - return true; + case snapshot_operation::SNAPSHOT_RELEASE: { + if (!thd->get_explicit_snapshot()) { + return true; + } + *ss_info = thd->get_explicit_snapshot()->ss_info; + thd->set_explicit_snapshot(nullptr); + return false; } - *ss_info = s->ss_info; - thd->set_explicit_snapshot(s); - return false; - } - case snapshot_operation::SNAPSHOT_RELEASE: { - if (!thd->get_explicit_snapshot()) { + default: + DBUG_ASSERT(false); return true; - } - *ss_info = thd->get_explicit_snapshot()->ss_info; - thd->set_explicit_snapshot(nullptr); - return false; - } - default: - DBUG_ASSERT(false); - return true; } return true; } @@ -4567,7 +4953,7 @@ static int rocksdb_start_tx_with_shared_read_view( // case: an explicit snapshot was not assigned to this transaction if (!tx->m_explicit_snapshot) { tx->m_explicit_snapshot = - Rdb_explicit_snapshot::create(ss_info, rdb, tx->m_read_opts.snapshot); + Rdb_explicit_snapshot::create(ss_info, rdb, tx->m_read_opts.snapshot); if (!tx->m_explicit_snapshot) { my_printf_error(ER_UNKNOWN_ERROR, "Could not create snapshot", MYF(0)); error = HA_EXIT_FAILURE; @@ -4611,9 +4997,8 @@ static int rocksdb_rollback_to_savepoint(handlerton *const hton, THD *const thd, return tx->rollback_to_savepoint(savepoint); } -static bool -rocksdb_rollback_to_savepoint_can_release_mdl(handlerton *const hton, - THD *const thd) { +static bool rocksdb_rollback_to_savepoint_can_release_mdl( + handlerton *const /* hton */, THD *const /* thd */) { return true; } @@ -4661,7 +5046,7 @@ static void rocksdb_update_table_stats( /* Function needs to return void because of the interface and we've * detected an error which shouldn't happen. There's no way to let * caller know that something failed. - */ + */ SHIP_ASSERT(false); return; } @@ -4741,8 +5126,9 @@ static rocksdb::Status check_rocksdb_options_compatibility( } if (loaded_cf_descs.size() != cf_descr.size()) { - return rocksdb::Status::NotSupported("Mismatched size of column family " - "descriptors."); + return rocksdb::Status::NotSupported( + "Mismatched size of column family " + "descriptors."); } // Please see RocksDB documentation for more context about why we need to set @@ -4792,17 +5178,22 @@ static int rocksdb_init_func(void *const p) { } if (rdb_check_rocksdb_corruption()) { - sql_print_error("RocksDB: There was a corruption detected in RockDB files. " - "Check error log emitted earlier for more details."); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: There was a corruption detected in RockDB files. " + "Check error log emitted earlier for more details."); if (rocksdb_allow_to_start_after_corruption) { + // NO_LINT_DEBUG sql_print_information( "RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent " "server operating if RocksDB corruption is detected."); } else { - sql_print_error("RocksDB: The server will exit normally and stop restart " - "attempts. Remove %s file from data directory and " - "start mysqld manually.", - rdb_corruption_marker_file_name().c_str()); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: The server will exit normally and stop restart " + "attempts. Remove %s file from data directory and " + "start mysqld manually.", + rdb_corruption_marker_file_name().c_str()); exit(0); } } @@ -4813,8 +5204,10 @@ static int rocksdb_init_func(void *const p) { init_rocksdb_psi_keys(); rocksdb_hton = (handlerton *)p; - mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &rdb_open_tables.m_mutex, - MY_MUTEX_INIT_FAST); + + rdb_open_tables.init(); + Ensure_cleanup rdb_open_tables_cleanup([]() { rdb_open_tables.free(); }); + #ifdef HAVE_PSI_INTERFACE rdb_bg_thread.init(rdb_signal_bg_psi_mutex_key, rdb_signal_bg_psi_cond_key); rdb_drop_idx_thread.init(rdb_signal_drop_idx_psi_mutex_key, @@ -4884,6 +5277,8 @@ static int rocksdb_init_func(void *const p) { /* Not needed in MariaDB: rocksdb_hton->flush_logs = rocksdb_flush_wal; + rocksdb_hton->handle_single_table_select = rocksdb_handle_single_table_select; + */ rocksdb_hton->flags = HTON_TEMPORARY_NOT_SUPPORTED | @@ -4893,16 +5288,25 @@ static int rocksdb_init_func(void *const p) { DBUG_ASSERT(!mysqld_embedded); if (rocksdb_db_options->max_open_files > (long)open_files_limit) { - sql_print_information("RocksDB: rocksdb_max_open_files should not be " - "greater than the open_files_limit, effective value " - "of rocksdb_max_open_files is being set to " - "open_files_limit / 2."); + // NO_LINT_DEBUG + sql_print_information( + "RocksDB: rocksdb_max_open_files should not be " + "greater than the open_files_limit, effective value " + "of rocksdb_max_open_files is being set to " + "open_files_limit / 2."); rocksdb_db_options->max_open_files = open_files_limit / 2; } else if (rocksdb_db_options->max_open_files == -2) { rocksdb_db_options->max_open_files = open_files_limit / 2; } +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + rdb_read_free_regex_handler.set_patterns(DEFAULT_READ_FREE_RPL_TABLES); +#endif + rocksdb_stats = rocksdb::CreateDBStatistics(); + rocksdb_stats->set_stats_level( + static_cast<rocksdb::StatsLevel>(rocksdb_stats_level)); + rocksdb_stats_level = rocksdb_stats->get_stats_level(); rocksdb_db_options->statistics = rocksdb_stats; if (rocksdb_rate_limiter_bytes_per_sec != 0) { @@ -4936,13 +5340,15 @@ static int rocksdb_init_func(void *const p) { rocksdb_db_options->use_direct_reads) { // allow_mmap_reads implies !use_direct_reads and RocksDB will not open if // mmap_reads and direct_reads are both on. (NO_LINT_DEBUG) - sql_print_error("RocksDB: Can't enable both use_direct_reads " - "and allow_mmap_reads\n"); + sql_print_error( + "RocksDB: Can't enable both use_direct_reads " + "and allow_mmap_reads\n"); DBUG_RETURN(HA_EXIT_FAILURE); } // Check whether the filesystem backing rocksdb_datadir allows O_DIRECT - if (rocksdb_db_options->use_direct_reads) { + if (rocksdb_db_options->use_direct_reads || + rocksdb_db_options->use_direct_io_for_flush_and_compaction) { rocksdb::EnvOptions soptions; rocksdb::Status check_status; rocksdb::Env *const env = rocksdb_db_options->env; @@ -4963,9 +5369,11 @@ static int rocksdb_init_func(void *const p) { } if (!check_status.ok()) { - sql_print_error("RocksDB: Unable to use direct io in rocksdb-datadir:" - "(%s)", check_status.getState()); - rdb_open_tables.free_hash(); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Unable to use direct io in rocksdb-datadir:" + "(%s)", + check_status.getState()); DBUG_RETURN(HA_EXIT_FAILURE); } } @@ -4973,17 +5381,19 @@ static int rocksdb_init_func(void *const p) { if (rocksdb_db_options->allow_mmap_writes && rocksdb_db_options->use_direct_io_for_flush_and_compaction) { // See above comment for allow_mmap_reads. (NO_LINT_DEBUG) - sql_print_error("RocksDB: Can't enable both " - "use_direct_io_for_flush_and_compaction and " - "allow_mmap_writes\n"); + sql_print_error( + "RocksDB: Can't enable both " + "use_direct_io_for_flush_and_compaction and " + "allow_mmap_writes\n"); DBUG_RETURN(HA_EXIT_FAILURE); } if (rocksdb_db_options->allow_mmap_writes && rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { // NO_LINT_DEBUG - sql_print_error("RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 " - "to use allow_mmap_writes"); + sql_print_error( + "RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 " + "to use allow_mmap_writes"); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -5010,15 +5420,19 @@ static int rocksdb_init_func(void *const p) { #endif ) { sql_print_information("RocksDB: Got ENOENT when listing column families"); + + // NO_LINT_DEBUG sql_print_information( "RocksDB: assuming that we're creating a new database"); } else { rdb_log_status_error(status, "Error listing column families"); DBUG_RETURN(HA_EXIT_FAILURE); } - } else + } else { + // NO_LINT_DEBUG sql_print_information("RocksDB: %ld column families found", cf_names.size()); + } std::vector<rocksdb::ColumnFamilyDescriptor> cf_descr; std::vector<rocksdb::ColumnFamilyHandle *> cf_handles; @@ -5027,9 +5441,33 @@ static int rocksdb_init_func(void *const p) { (rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type; if (!rocksdb_tbl_options->no_block_cache) { - std::shared_ptr<rocksdb::Cache> block_cache = rocksdb_use_clock_cache - ? rocksdb::NewClockCache(rocksdb_block_cache_size) - : rocksdb::NewLRUCache(rocksdb_block_cache_size); + std::shared_ptr<rocksdb::MemoryAllocator> memory_allocator; + if (!rocksdb_cache_dump) { + size_t block_size = rocksdb_tbl_options->block_size; + rocksdb::JemallocAllocatorOptions alloc_opt; + // Limit jemalloc tcache memory usage. The range + // [block_size/4, block_size] should be enough to cover most of + // block cache allocation sizes. + alloc_opt.limit_tcache_size = true; + alloc_opt.tcache_size_lower_bound = block_size / 4; + alloc_opt.tcache_size_upper_bound = block_size; + rocksdb::Status new_alloc_status = + rocksdb::NewJemallocNodumpAllocator(alloc_opt, &memory_allocator); + if (!new_alloc_status.ok()) { + // Fallback to use default malloc/free. + rdb_log_status_error(new_alloc_status, + "Error excluding block cache from core dump"); + memory_allocator = nullptr; + DBUG_RETURN(HA_EXIT_FAILURE); + } + } + std::shared_ptr<rocksdb::Cache> block_cache = + rocksdb_use_clock_cache + ? rocksdb::NewClockCache(rocksdb_block_cache_size) + : rocksdb::NewLRUCache( + rocksdb_block_cache_size, -1 /*num_shard_bits*/, + false /*strict_capcity_limit*/, + rocksdb_cache_high_pri_pool_ratio, memory_allocator); if (rocksdb_sim_cache_size > 0) { // Simulated cache enabled // Wrap block cache inside a simulated cache and pass it to RocksDB @@ -5064,7 +5502,7 @@ static int rocksdb_init_func(void *const p) { if (rocksdb_persistent_cache_size_mb > 0) { std::shared_ptr<rocksdb::PersistentCache> pcache; - uint64_t cache_size_bytes= rocksdb_persistent_cache_size_mb * 1024 * 1024; + uint64_t cache_size_bytes = rocksdb_persistent_cache_size_mb * 1024 * 1024; status = rocksdb::NewPersistentCache( rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path), cache_size_bytes, myrocks_logger, true, &pcache); @@ -5076,6 +5514,7 @@ static int rocksdb_init_func(void *const p) { } rocksdb_tbl_options->persistent_cache = pcache; } else if (strlen(rocksdb_persistent_cache_path)) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb"); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -5093,17 +5532,23 @@ static int rocksdb_init_func(void *const p) { If there are no column families, we're creating the new database. Create one column family named "default". */ - if (cf_names.size() == 0) - cf_names.push_back(DEFAULT_CF_NAME); + if (cf_names.size() == 0) cf_names.push_back(DEFAULT_CF_NAME); std::vector<int> compaction_enabled_cf_indices; + + // NO_LINT_DEBUG sql_print_information("RocksDB: Column Families at start:"); for (size_t i = 0; i < cf_names.size(); ++i) { rocksdb::ColumnFamilyOptions opts; cf_options_map->get_cf_options(cf_names[i], &opts); + // NO_LINT_DEBUG sql_print_information(" cf=%s", cf_names[i].c_str()); + + // NO_LINT_DEBUG sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size); + + // NO_LINT_DEBUG sql_print_information(" target_file_size_base=%" PRIu64, opts.target_file_size_base); @@ -5184,25 +5629,27 @@ static int rocksdb_init_func(void *const p) { DBUG_RETURN(HA_EXIT_FAILURE); } - auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME -#ifdef HAVE_PSI_INTERFACE - , - rdb_background_psi_thread_key +#ifndef HAVE_PSI_INTERFACE + auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME); +#else + auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME, + rdb_background_psi_thread_key); #endif - ); if (err != 0) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Couldn't start the background thread: (errno=%d)", err); DBUG_RETURN(HA_EXIT_FAILURE); } - err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME -#ifdef HAVE_PSI_INTERFACE - , - rdb_drop_idx_psi_thread_key +#ifndef HAVE_PSI_INTERFACE + err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME); +#else + err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME, + rdb_drop_idx_psi_thread_key); #endif - ); if (err != 0) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Couldn't start the drop index thread: (errno=%d)", err); DBUG_RETURN(HA_EXIT_FAILURE); @@ -5219,7 +5666,6 @@ static int rocksdb_init_func(void *const p) { sql_print_error( "RocksDB: Couldn't start the manual compaction thread: (errno=%d)", err); - rdb_open_tables.free_hash(); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -5253,7 +5699,6 @@ static int rocksdb_init_func(void *const p) { if (err != 0) { // NO_LINT_DEBUG sql_print_error("RocksDB: Couldn't initialize error messages"); - rdb_open_tables.m_hash.~Rdb_table_set(); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -5276,13 +5721,17 @@ static int rocksdb_init_func(void *const p) { } #if !defined(_WIN32) && !defined(__APPLE__) - io_watchdog = new Rdb_io_watchdog(directories); + io_watchdog = new Rdb_io_watchdog(std::move(directories)); io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs); #endif // NO_LINT_DEBUG - sql_print_information("MyRocks storage engine plugin has been successfully " - "initialized."); + sql_print_information( + "MyRocks storage engine plugin has been successfully " + "initialized."); + + // Skip cleaning up rdb_open_tables as we've succeeded + rdb_open_tables_cleanup.skip(); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -5339,18 +5788,18 @@ static int rocksdb_done_func(void *const p) { "RocksDB: Couldn't stop the manual compaction thread: (errno=%d)", err); } - if (rdb_open_tables.m_hash.size()) { + if (rdb_open_tables.count()) { // Looks like we are getting unloaded and yet we have some open tables // left behind. error = 1; } + rdb_open_tables.free(); /* destructors for static objects can be called at _exit(), but we want to free the memory at dlclose() */ - rdb_open_tables.m_hash.~Rdb_table_set(); - mysql_mutex_destroy(&rdb_open_tables.m_mutex); + // MARIADB_MERGE_2019: rdb_open_tables.m_hash.~Rdb_table_set(); mysql_mutex_destroy(&rdb_sysvars_mutex); mysql_mutex_destroy(&rdb_block_cache_resize_mutex); @@ -5438,7 +5887,7 @@ static inline void rocksdb_smart_next(bool seek_backward, } } -#ifndef NDEBUG +#ifndef DBUG_OFF // simulate that RocksDB has reported corrupted data static void dbug_change_status_to_corrupted(rocksdb::Status *status) { *status = rocksdb::Status::Corruption(); @@ -5473,39 +5922,39 @@ static inline bool is_valid(rocksdb::Iterator *scan_it) { they are needed to function. */ -Rdb_table_handler * -Rdb_open_tables_map::get_table_handler(const char *const table_name) { +Rdb_table_handler *Rdb_open_tables_map::get_table_handler( + const char *const table_name) { + DBUG_ASSERT(table_name != nullptr); + Rdb_table_handler *table_handler; - uint length; - char *tmp_name; - DBUG_ASSERT(table_name != nullptr); - length = (uint)strlen(table_name); + std::string table_name_str(table_name); // First, look up the table in the hash map. RDB_MUTEX_LOCK_CHECK(m_mutex); - if (!m_hash.size() || !(table_handler = m_hash.find(table_name, length))) { + const auto it = m_table_map.find(table_name_str); + if (it != m_table_map.end()) { + // Found it + table_handler = it->second; + } else { + char *tmp_name; + // Since we did not find it in the hash map, attempt to create and add it // to the hash map. if (!(table_handler = reinterpret_cast<Rdb_table_handler *>(my_multi_malloc( MYF(MY_WME | MY_ZEROFILL), &table_handler, sizeof(*table_handler), - &tmp_name, length + 1, NullS)))) { + &tmp_name, table_name_str.length() + 1, NullS)))) { // Allocating a new Rdb_table_handler and a new table name failed. RDB_MUTEX_UNLOCK_CHECK(m_mutex); return nullptr; } table_handler->m_ref_count = 0; - table_handler->m_table_name_length = length; + table_handler->m_table_name_length = table_name_str.length(); table_handler->m_table_name = tmp_name; strmov(table_handler->m_table_name, table_name); - if (m_hash.insert(table_handler)) { - // Inserting into the hash map failed. - RDB_MUTEX_UNLOCK_CHECK(m_mutex); - my_free(table_handler); - return nullptr; - } + m_table_map.emplace(table_name_str, table_handler); thr_lock_init(&table_handler->m_thr_lock); #ifdef MARIAROCKS_NOT_YET @@ -5526,16 +5975,15 @@ std::vector<std::string> rdb_get_open_table_names(void) { } std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const { - size_t i; const Rdb_table_handler *table_handler; std::vector<std::string> names; RDB_MUTEX_LOCK_CHECK(m_mutex); - for (i = 0; (table_handler = m_hash.at(i)); i++) { + for (const auto &kv : m_table_map) { + table_handler = kv.second; DBUG_ASSERT(table_handler != nullptr); names.push_back(table_handler->m_table_name); } - DBUG_ASSERT(i == m_hash.size()); RDB_MUTEX_UNLOCK_CHECK(m_mutex); return names; @@ -5548,44 +5996,44 @@ std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const { static ulonglong rdb_get_int_col_max_value(const Field *field) { ulonglong max_value = 0; switch (field->key_type()) { - case HA_KEYTYPE_BINARY: - max_value = 0xFFULL; - break; - case HA_KEYTYPE_INT8: - max_value = 0x7FULL; - break; - case HA_KEYTYPE_USHORT_INT: - max_value = 0xFFFFULL; - break; - case HA_KEYTYPE_SHORT_INT: - max_value = 0x7FFFULL; - break; - case HA_KEYTYPE_UINT24: - max_value = 0xFFFFFFULL; - break; - case HA_KEYTYPE_INT24: - max_value = 0x7FFFFFULL; - break; - case HA_KEYTYPE_ULONG_INT: - max_value = 0xFFFFFFFFULL; - break; - case HA_KEYTYPE_LONG_INT: - max_value = 0x7FFFFFFFULL; - break; - case HA_KEYTYPE_ULONGLONG: - max_value = 0xFFFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_LONGLONG: - max_value = 0x7FFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_FLOAT: - max_value = 0x1000000ULL; - break; - case HA_KEYTYPE_DOUBLE: - max_value = 0x20000000000000ULL; - break; - default: - abort(); + case HA_KEYTYPE_BINARY: + max_value = 0xFFULL; + break; + case HA_KEYTYPE_INT8: + max_value = 0x7FULL; + break; + case HA_KEYTYPE_USHORT_INT: + max_value = 0xFFFFULL; + break; + case HA_KEYTYPE_SHORT_INT: + max_value = 0x7FFFULL; + break; + case HA_KEYTYPE_UINT24: + max_value = 0xFFFFFFULL; + break; + case HA_KEYTYPE_INT24: + max_value = 0x7FFFFFULL; + break; + case HA_KEYTYPE_ULONG_INT: + max_value = 0xFFFFFFFFULL; + break; + case HA_KEYTYPE_LONG_INT: + max_value = 0x7FFFFFFFULL; + break; + case HA_KEYTYPE_ULONGLONG: + max_value = 0xFFFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_LONGLONG: + max_value = 0x7FFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_FLOAT: + max_value = 0x1000000ULL; + break; + case HA_KEYTYPE_DOUBLE: + max_value = 0x20000000000000ULL; + break; + default: + abort(); } return max_value; @@ -5594,7 +6042,7 @@ static ulonglong rdb_get_int_col_max_value(const Field *field) { void ha_rocksdb::load_auto_incr_value() { ulonglong auto_incr = 0; bool validate_last = false, use_datadic = true; -#ifndef NDEBUG +#ifndef DBUG_OFF DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;); validate_last = true; #endif @@ -5641,7 +6089,7 @@ ulonglong ha_rocksdb::load_auto_incr_value_from_index() { // appropriately and non-index-only lookup will not read the value) const bool save_keyread_only = m_keyread_only; m_keyread_only = true; - m_key_requested = true; + m_converter->set_is_key_requested(true); if (!index_last(table->record[0])) { Field *field = @@ -5653,7 +6101,7 @@ ulonglong ha_rocksdb::load_auto_incr_value_from_index() { if (last_val != max_val) { last_val++; } -#ifndef NDEBUG +#ifndef DBUG_OFF ulonglong dd_val; if (last_val <= max_val) { const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id(); @@ -5778,8 +6226,9 @@ int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) { // Get hidden primary key from old key slice Rdb_string_reader reader(&rowkey_slice); - if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE))) + if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE))) { return HA_ERR_ROCKSDB_CORRUPT_DATA; + } const int length= 8; /* was Field_longlong::PACK_LENGTH in FB MySQL tree */ const uchar *from = reinterpret_cast<const uchar *>(reader.read(length)); @@ -5807,8 +6256,9 @@ void Rdb_open_tables_map::release_table_handler( DBUG_ASSERT(table_handler->m_ref_count > 0); if (!--table_handler->m_ref_count) { // Last reference was released. Tear down the hash entry. - const auto ret MY_ATTRIBUTE((__unused__)) = m_hash.remove(table_handler); - DBUG_ASSERT(!ret); // the hash entry must actually be found and deleted + const auto ret MY_ATTRIBUTE((__unused__)) = + m_table_map.erase(std::string(table_handler->m_table_name)); + DBUG_ASSERT(ret == 1); // the hash entry must actually be found and deleted my_core::thr_lock_delete(&table_handler->m_thr_lock); my_free(table_handler); } @@ -5824,19 +6274,34 @@ static handler *rocksdb_create_handler(my_core::handlerton *const hton, ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton, my_core::TABLE_SHARE *const table_arg) - : handler(hton, table_arg), m_table_handler(nullptr), m_scan_it(nullptr), - m_scan_it_skips_bloom(false), m_scan_it_snapshot(nullptr), - m_scan_it_lower_bound(nullptr), m_scan_it_upper_bound(nullptr), - m_tbl_def(nullptr), m_pk_descr(nullptr), m_key_descr_arr(nullptr), - m_pk_can_be_decoded(false), m_maybe_unpack_info(false), - m_pk_tuple(nullptr), m_pk_packed_tuple(nullptr), - m_sk_packed_tuple(nullptr), m_end_key_packed_tuple(nullptr), - m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr), - m_sk_packed_tuple_old(nullptr), m_dup_sk_packed_tuple(nullptr), - m_dup_sk_packed_tuple_old(nullptr), m_pack_buffer(nullptr), - m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), m_encoder_arr(nullptr), - m_row_checksums_checked(0), m_in_rpl_delete_rows(false), - m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {} + : handler(hton, table_arg), + m_table_handler(nullptr), + m_scan_it(nullptr), + m_scan_it_skips_bloom(false), + m_scan_it_snapshot(nullptr), + m_scan_it_lower_bound(nullptr), + m_scan_it_upper_bound(nullptr), + m_tbl_def(nullptr), + m_pk_descr(nullptr), + m_key_descr_arr(nullptr), + m_pk_can_be_decoded(false), + m_pk_tuple(nullptr), + m_pk_packed_tuple(nullptr), + m_sk_packed_tuple(nullptr), + m_end_key_packed_tuple(nullptr), + m_sk_match_prefix(nullptr), + m_sk_match_prefix_buf(nullptr), + m_sk_packed_tuple_old(nullptr), + m_dup_sk_packed_tuple(nullptr), + m_dup_sk_packed_tuple_old(nullptr), + m_pack_buffer(nullptr), + m_lock_rows(RDB_LOCK_NONE), + m_keyread_only(false), + m_insert_with_update(false), + m_dup_pk_found(false), + m_in_rpl_delete_rows(false), + m_in_rpl_update_rows(false), + m_force_skip_unique_check(false) {} const std::string &ha_rocksdb::get_table_basename() const { @@ -5855,9 +6320,9 @@ bool ha_rocksdb::init_with_fields() { if (pk != MAX_KEY) { const uint key_parts = table_share->key_info[pk].user_defined_key_parts; check_keyread_allowed(pk /*PK*/, key_parts - 1, true); - } else + } else { m_pk_can_be_decoded = false; - + } cached_table_flags = table_flags(); DBUG_RETURN(false); /* Ok */ @@ -5914,298 +6379,52 @@ bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd, RDB_MAX_HEXDUMP_LEN); const GL_INDEX_ID gl_index_id = kd.get_gl_index_id(); // NO_LINT_DEBUG - sql_print_error("Decoding ttl from PK value failed, " - "for index (%u,%u), val: %s", - gl_index_id.cf_id, gl_index_id.index_id, buf.c_str()); + sql_print_error( + "Decoding ttl from PK value failed, " + "for index (%u,%u), val: %s", + gl_index_id.cf_id, gl_index_id.index_id, buf.c_str()); DBUG_ASSERT(0); return false; } /* Hide record if it has expired before the current snapshot time. */ uint64 read_filter_ts = 0; -#ifndef NDEBUG +#ifndef DBUG_OFF read_filter_ts += rdb_dbug_set_ttl_read_filter_ts(); #endif bool is_hide_ttl = ts + kd.m_ttl_duration + read_filter_ts <= static_cast<uint64>(curr_ts); if (is_hide_ttl) { update_row_stats(ROWS_FILTERED); + + /* increment examined row count when rows are skipped */ + THD *thd = ha_thd(); + thd->inc_examined_row_count(1); + DEBUG_SYNC(thd, "rocksdb.ttl_rows_examined"); } return is_hide_ttl; } -void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, - rocksdb::Iterator *const iter, - bool seek_backward) { +int ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + bool seek_backward) { if (kd.has_ttl()) { + THD *thd = ha_thd(); while (iter->Valid() && should_hide_ttl_rec( kd, iter->value(), get_or_create_tx(table->in_use)->m_snapshot_timestamp)) { - rocksdb_smart_next(seek_backward, iter); - } - } -} - -/** - Convert record from table->record[0] form into a form that can be written - into rocksdb. - - @param pk_packed_slice Packed PK tuple. We need it in order to compute - and store its CRC. - @param packed_rec OUT Data slice with record data. -*/ - -int ha_rocksdb::convert_record_to_storage_format( - const struct update_row_info &row_info, rocksdb::Slice *const packed_rec) { - DBUG_ASSERT_IMP(m_maybe_unpack_info, row_info.new_pk_unpack_info); - DBUG_ASSERT(m_pk_descr != nullptr); - - const rocksdb::Slice &pk_packed_slice = row_info.new_pk_slice; - Rdb_string_writer *const pk_unpack_info = row_info.new_pk_unpack_info; - bool has_ttl = m_pk_descr->has_ttl(); - bool has_ttl_column = !m_pk_descr->m_ttl_column.empty(); - bool ttl_in_pk = has_ttl_column && (row_info.ttl_pk_offset != UINT_MAX); - - m_storage_record.length(0); - - if (has_ttl) { - /* If it's a TTL record, reserve space for 8 byte TTL value in front. */ - m_storage_record.fill(ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_in_rec, 0); - m_ttl_bytes_updated = false; - - /* - If the TTL is contained within the key, we use the offset to find the - TTL value and place it in the beginning of the value record. - */ - if (ttl_in_pk) { - Rdb_string_reader reader(&pk_packed_slice); - const char *ts; - if (!reader.read(row_info.ttl_pk_offset) || - !(ts = reader.read(ROCKSDB_SIZEOF_TTL_RECORD))) { - std::string buf; - buf = rdb_hexdump(pk_packed_slice.data(), pk_packed_slice.size(), - RDB_MAX_HEXDUMP_LEN); - const GL_INDEX_ID gl_index_id = m_pk_descr->get_gl_index_id(); - // NO_LINT_DEBUG - sql_print_error("Decoding ttl from PK failed during insert, " - "for index (%u,%u), key: %s", - gl_index_id.cf_id, gl_index_id.index_id, buf.c_str()); - return HA_EXIT_FAILURE; - } - - char *const data = const_cast<char *>(m_storage_record.ptr()); - memcpy(data, ts, ROCKSDB_SIZEOF_TTL_RECORD); -#ifndef NDEBUG - // Adjust for test case if needed - rdb_netbuf_store_uint64( - reinterpret_cast<uchar *>(data), - rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(data)) + - rdb_dbug_set_ttl_rec_ts()); -#endif - // Also store in m_ttl_bytes to propagate to update_sk - memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); - } else if (!has_ttl_column) { - /* - For implicitly generated TTL records we need to copy over the old - TTL value from the old record in the event of an update. It was stored - in m_ttl_bytes. - - Otherwise, generate a timestamp using the current time. - */ - if (!row_info.old_pk_slice.empty()) { - char *const data = const_cast<char *>(m_storage_record.ptr()); - memcpy(data, m_ttl_bytes, sizeof(uint64)); - } else { - uint64 ts = static_cast<uint64>(std::time(nullptr)); -#ifndef NDEBUG - ts += rdb_dbug_set_ttl_rec_ts(); -#endif - char *const data = const_cast<char *>(m_storage_record.ptr()); - rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts); - // Also store in m_ttl_bytes to propagate to update_sk - memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); - } - } - } else { - /* All NULL bits are initially 0 */ - m_storage_record.fill(m_null_bytes_in_rec, 0); - } - - // If a primary key may have non-empty unpack_info for certain values, - // (m_maybe_unpack_info=TRUE), we write the unpack_info block. The block - // itself was prepared in Rdb_key_def::pack_record. - if (m_maybe_unpack_info) { - m_storage_record.append(reinterpret_cast<char *>(pk_unpack_info->ptr()), - pk_unpack_info->get_current_pos()); - } - - for (uint i = 0; i < table->s->fields; i++) { - /* Don't pack decodable PK key parts */ - if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { - continue; - } - - Field *const field = table->field[i]; - if (m_encoder_arr[i].maybe_null()) { - char *data = const_cast<char *>(m_storage_record.ptr()); - if (has_ttl) { - data += ROCKSDB_SIZEOF_TTL_RECORD; - } - - if (field->is_null()) { - data[m_encoder_arr[i].m_null_offset] |= m_encoder_arr[i].m_null_mask; - /* Don't write anything for NULL values */ - continue; - } - } - - if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_BLOB) { - my_core::Field_blob *blob = (my_core::Field_blob *)field; - /* Get the number of bytes needed to store length*/ - const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr; - - /* Store the length of the value */ - m_storage_record.append(reinterpret_cast<char *>(blob->ptr), - length_bytes); - - /* Store the blob value itself */ - char *data_ptr; - memcpy(&data_ptr, blob->ptr + length_bytes, sizeof(uchar **)); - m_storage_record.append(data_ptr, blob->get_length()); - } else if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_VARCHAR) { - Field_varstring *const field_var = (Field_varstring *)field; - uint data_len; - /* field_var->length_bytes is 1 or 2 */ - if (field_var->length_bytes == 1) { - data_len = field_var->ptr[0]; - } else { - DBUG_ASSERT(field_var->length_bytes == 2); - data_len = uint2korr(field_var->ptr); - } - m_storage_record.append(reinterpret_cast<char *>(field_var->ptr), - field_var->length_bytes + data_len); - } else { - /* Copy the field data */ - const uint len = field->pack_length_in_rec(); - m_storage_record.append(reinterpret_cast<char *>(field->ptr), len); - - /* - Check if this is the TTL field within the table, if so store the TTL - in the front of the record as well here. - */ - if (has_ttl && has_ttl_column && - i == m_pk_descr->get_ttl_field_offset()) { - DBUG_ASSERT(len == ROCKSDB_SIZEOF_TTL_RECORD); - DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG); - DBUG_ASSERT(m_pk_descr->get_ttl_field_offset() != UINT_MAX); - - char *const data = const_cast<char *>(m_storage_record.ptr()); - uint64 ts = uint8korr(field->ptr); -#ifndef NDEBUG - ts += rdb_dbug_set_ttl_rec_ts(); -#endif - rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts); - - // If this is an update and the timestamp has been updated, take note - // so we can avoid updating SKs unnecessarily. - if (!row_info.old_pk_slice.empty()) { - m_ttl_bytes_updated = - memcmp(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); - } - // Store timestamp in m_ttl_bytes to propagate to update_sk - memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); + DEBUG_SYNC(thd, "rocksdb.check_flags_ser"); + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; } + rocksdb_smart_next(seek_backward, iter); } } - - if (should_store_row_debug_checksums()) { - const uint32_t key_crc32 = my_core::crc32( - 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size()); - const uint32_t val_crc32 = - my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record), - m_storage_record.length()); - uchar key_crc_buf[RDB_CHECKSUM_SIZE]; - uchar val_crc_buf[RDB_CHECKSUM_SIZE]; - rdb_netbuf_store_uint32(key_crc_buf, key_crc32); - rdb_netbuf_store_uint32(val_crc_buf, val_crc32); - m_storage_record.append((const char *)&RDB_CHECKSUM_DATA_TAG, 1); - m_storage_record.append((const char *)key_crc_buf, RDB_CHECKSUM_SIZE); - m_storage_record.append((const char *)val_crc_buf, RDB_CHECKSUM_SIZE); - } - - *packed_rec = - rocksdb::Slice(m_storage_record.ptr(), m_storage_record.length()); - return HA_EXIT_SUCCESS; } -/* - @brief - Setup which fields will be unpacked when reading rows - - @detail - Three special cases when we still unpack all fields: - - When this table is being updated (m_lock_rows==RDB_LOCK_WRITE). - - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to - read all fields to find whether there is a row checksum at the end. We could - skip the fields instead of decoding them, but currently we do decoding.) - - On index merge as bitmap is cleared during that operation - - @seealso - ha_rocksdb::setup_field_converters() - ha_rocksdb::convert_record_from_storage_format() -*/ -void ha_rocksdb::setup_read_decoders() { - m_decoders_vect.clear(); - m_key_requested = false; - - int last_useful = 0; - int skip_size = 0; - - for (uint i = 0; i < table->s->fields; i++) { - // bitmap is cleared on index merge, but it still needs to decode columns - const bool field_requested = - m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || - bitmap_is_clear_all(table->read_set) || - bitmap_is_set(table->read_set, table->field[i]->field_index); - - // We only need the decoder if the whole record is stored. - if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { - // the field potentially needs unpacking - if (field_requested) { - // the field is in the read set - m_key_requested = true; - } - continue; - } - - if (field_requested) { - // We will need to decode this field - m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size}); - last_useful = m_decoders_vect.size(); - skip_size = 0; - } else { - if (m_encoder_arr[i].uses_variable_len_encoding() || - m_encoder_arr[i].maybe_null()) { - // For variable-length field, we need to read the data and skip it - m_decoders_vect.push_back({&m_encoder_arr[i], false, skip_size}); - skip_size = 0; - } else { - // Fixed-width field can be skipped without looking at it. - // Add appropriate skip_size to the next field. - skip_size += m_encoder_arr[i].m_pack_length_in_rec; - } - } - } - - // It could be that the last few elements are varchars that just do - // skipping. Remove them. - m_decoders_vect.erase(m_decoders_vect.begin() + last_useful, - m_decoders_vect.end()); -} - -#ifndef NDEBUG +#ifndef DBUG_OFF void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) { std::string str(on_disk_rec->data(), on_disk_rec->size()); on_disk_rec->Reset(); @@ -6230,17 +6449,6 @@ void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) { on_disk_rec->PinSelf(rocksdb::Slice(res)); } -void dbug_modify_key_varchar8(String &on_disk_rec) { - std::string res; - // The key starts with index number - res.append(on_disk_rec.ptr(), Rdb_key_def::INDEX_NUMBER_SIZE); - - // Then, a mem-comparable form of a varchar(8) value. - res.append("ABCDE\0\0\0\xFC", 9); - on_disk_rec.length(0); - on_disk_rec.append(res.data(), res.size()); -} - void dbug_create_err_inplace_alter() { my_printf_error(ER_UNKNOWN_ERROR, "Intentional failure in inplace alter occurred.", MYF(0)); @@ -6249,7 +6457,6 @@ void dbug_create_err_inplace_alter() { int ha_rocksdb::convert_record_from_storage_format( const rocksdb::Slice *const key, uchar *const buf) { - DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1", dbug_append_garbage_at_end(&m_retrieved_record);); DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2", @@ -6260,91 +6467,6 @@ int ha_rocksdb::convert_record_from_storage_format( return convert_record_from_storage_format(key, &m_retrieved_record, buf); } -int ha_rocksdb::convert_blob_from_storage_format( - my_core::Field_blob *const blob, - Rdb_string_reader *const reader, - bool decode) -{ - /* Get the number of bytes needed to store length*/ - const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr; - - const char *data_len_str; - if (!(data_len_str = reader->read(length_bytes))) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - memcpy(blob->ptr, data_len_str, length_bytes); - - const uint32 data_len = blob->get_length( - reinterpret_cast<const uchar*>(data_len_str), length_bytes); - const char *blob_ptr; - if (!(blob_ptr = reader->read(data_len))) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (decode) { - // set 8-byte pointer to 0, like innodb does (relevant for 32-bit - // platforms) - memset(blob->ptr + length_bytes, 0, 8); - memcpy(blob->ptr + length_bytes, &blob_ptr, sizeof(uchar **)); - } - - return HA_EXIT_SUCCESS; -} - -int ha_rocksdb::convert_varchar_from_storage_format( - my_core::Field_varstring *const field_var, - Rdb_string_reader *const reader, - bool decode) -{ - const char *data_len_str; - if (!(data_len_str = reader->read(field_var->length_bytes))) - return HA_ERR_ROCKSDB_CORRUPT_DATA; - - uint data_len; - /* field_var->length_bytes is 1 or 2 */ - if (field_var->length_bytes == 1) { - data_len = (uchar)data_len_str[0]; - } else { - DBUG_ASSERT(field_var->length_bytes == 2); - data_len = uint2korr(data_len_str); - } - - if (data_len > field_var->field_length) { - /* The data on disk is longer than table DDL allows? */ - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (!reader->read(data_len)) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (decode) { - memcpy(field_var->ptr, data_len_str, field_var->length_bytes + data_len); - } - - return HA_EXIT_SUCCESS; -} - -int ha_rocksdb::convert_field_from_storage_format( - my_core::Field *const field, - Rdb_string_reader *const reader, - bool decode, - uint len) -{ - const char *data_bytes; - if (len > 0) { - if ((data_bytes = reader->read(len)) == nullptr) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (decode) - memcpy(field->ptr, data_bytes, len); - } - - return HA_EXIT_SUCCESS; -} - /* @brief Unpack the record in this->m_retrieved_record and this->m_last_rowkey from @@ -6361,8 +6483,8 @@ int ha_rocksdb::convert_field_from_storage_format( m_retrieved_record). @seealso - ha_rocksdb::setup_read_decoders() Sets up data structures which tell which - columns to decode. + rdb_converter::setup_read_decoders() Sets up data structures which tell + which columns to decode. @return 0 OK @@ -6372,241 +6494,7 @@ int ha_rocksdb::convert_field_from_storage_format( int ha_rocksdb::convert_record_from_storage_format( const rocksdb::Slice *const key, const rocksdb::Slice *const value, uchar *const buf) { - Rdb_string_reader reader(value); - - /* - Decode PK fields from the key - */ - DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_read1", - dbug_modify_key_varchar8(m_last_rowkey);); - - const rocksdb::Slice rowkey_slice(m_last_rowkey.ptr(), - m_last_rowkey.length()); - const char *unpack_info = nullptr; - uint16 unpack_info_len = 0; - rocksdb::Slice unpack_slice; - - /* If it's a TTL record, skip the 8 byte TTL value */ - const char *ttl_bytes; - if (m_pk_descr->has_ttl()) { - if ((ttl_bytes = reader.read(ROCKSDB_SIZEOF_TTL_RECORD))) { - memcpy(m_ttl_bytes, ttl_bytes, ROCKSDB_SIZEOF_TTL_RECORD); - } else { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - } - - /* Other fields are decoded from the value */ - const char *null_bytes = nullptr; - if (m_null_bytes_in_rec && !(null_bytes = reader.read(m_null_bytes_in_rec))) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (m_maybe_unpack_info) { - unpack_info = reader.get_current_ptr(); - if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) || - !reader.read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - unpack_info_len = - rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1)); - unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len); - - reader.read(unpack_info_len - - Rdb_key_def::get_unpack_header_size(unpack_info[0])); - } - - int err = HA_EXIT_SUCCESS; - if (m_key_requested) { - err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, - unpack_info ? &unpack_slice : nullptr, - false /* verify_checksum */); - } - - if (err != HA_EXIT_SUCCESS) { - return err; - } - - for (auto it = m_decoders_vect.begin(); it != m_decoders_vect.end(); it++) { - const Rdb_field_encoder *const field_dec = it->m_field_enc; - const bool decode = it->m_decode; - const bool isNull = - field_dec->maybe_null() && - ((null_bytes[field_dec->m_null_offset] & field_dec->m_null_mask) != 0); - - Field *const field = table->field[field_dec->m_field_index]; - - /* Skip the bytes we need to skip */ - if (it->m_skip && !reader.read(it->m_skip)) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - uint field_offset = field->ptr - table->record[0]; - uint null_offset = field->null_offset(); - bool maybe_null = field->real_maybe_null(); - field->move_field(buf + field_offset, - maybe_null ? buf + null_offset : nullptr, - field->null_bit); - // WARNING! - Don't return before restoring field->ptr and field->null_ptr! - - if (isNull) { - if (decode) { - /* This sets the NULL-bit of this record */ - field->set_null(); - /* - Besides that, set the field value to default value. CHECKSUM TABLE - depends on this. - */ - memcpy(field->ptr, table->s->default_values + field_offset, - field->pack_length()); - } - } else { - if (decode) { - field->set_notnull(); - } - - if (field_dec->m_field_type == MYSQL_TYPE_BLOB) { - err = convert_blob_from_storage_format( - (my_core::Field_blob *) field, &reader, decode); - } else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) { - err = convert_varchar_from_storage_format( - (my_core::Field_varstring *) field, &reader, decode); - } else { - err = convert_field_from_storage_format( - field, &reader, decode, field_dec->m_pack_length_in_rec); - } - } - - // Restore field->ptr and field->null_ptr - field->move_field(table->record[0] + field_offset, - maybe_null ? table->record[0] + null_offset : nullptr, - field->null_bit); - - if (err != HA_EXIT_SUCCESS) { - return err; - } - } - - if (m_verify_row_debug_checksums) { - if (reader.remaining_bytes() == RDB_CHECKSUM_CHUNK_SIZE && - reader.read(1)[0] == RDB_CHECKSUM_DATA_TAG) { - uint32_t stored_key_chksum = - rdb_netbuf_to_uint32((const uchar *)reader.read(RDB_CHECKSUM_SIZE)); - uint32_t stored_val_chksum = - rdb_netbuf_to_uint32((const uchar *)reader.read(RDB_CHECKSUM_SIZE)); - - const uint32_t computed_key_chksum = - my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size()); - const uint32_t computed_val_chksum = - my_core::crc32(0, rdb_slice_to_uchar_ptr(value), - value->size() - RDB_CHECKSUM_CHUNK_SIZE); - - DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", - stored_key_chksum++;); - - if (stored_key_chksum != computed_key_chksum) { - m_pk_descr->report_checksum_mismatch(true, key->data(), key->size()); - return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH; - } - - DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum2", - stored_val_chksum++;); - if (stored_val_chksum != computed_val_chksum) { - m_pk_descr->report_checksum_mismatch(false, value->data(), - value->size()); - return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH; - } - - m_row_checksums_checked++; - } - if (reader.remaining_bytes()) - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - return HA_EXIT_SUCCESS; -} - -void ha_rocksdb::get_storage_type(Rdb_field_encoder *const encoder, - const uint &kp) { - // STORE_SOME uses unpack_info. - if (m_pk_descr->has_unpack_info(kp)) { - DBUG_ASSERT(m_pk_descr->can_unpack(kp)); - encoder->m_storage_type = Rdb_field_encoder::STORE_SOME; - m_maybe_unpack_info = true; - } else if (m_pk_descr->can_unpack(kp)) { - encoder->m_storage_type = Rdb_field_encoder::STORE_NONE; - } -} - -/* - Setup data needed to convert table->record[] to and from record storage - format. - - @seealso - ha_rocksdb::convert_record_to_storage_format, - ha_rocksdb::convert_record_from_storage_format -*/ - -void ha_rocksdb::setup_field_converters() { - uint i; - uint null_bytes = 0; - uchar cur_null_mask = 0x1; - - DBUG_ASSERT(m_encoder_arr == nullptr); - m_encoder_arr = static_cast<Rdb_field_encoder *>( - my_malloc(table->s->fields * sizeof(Rdb_field_encoder), MYF(0))); - if (m_encoder_arr == nullptr) { - return; - } - - for (i = 0; i < table->s->fields; i++) { - Field *const field = table->field[i]; - m_encoder_arr[i].m_storage_type = Rdb_field_encoder::STORE_ALL; - - /* - Check if this field is - - a part of primary key, and - - it can be decoded back from its key image. - If both hold, we don't need to store this field in the value part of - RocksDB's key-value pair. - - If hidden pk exists, we skip this check since the field will never be - part of the hidden pk. - */ - if (!has_hidden_pk(table)) { - KEY *const pk_info = &table->key_info[table->s->primary_key]; - for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) { - /* key_part->fieldnr is counted from 1 */ - if (field->field_index + 1 == pk_info->key_part[kp].fieldnr) { - get_storage_type(&m_encoder_arr[i], kp); - break; - } - } - } - - m_encoder_arr[i].m_field_type = field->real_type(); - m_encoder_arr[i].m_field_index = i; - m_encoder_arr[i].m_pack_length_in_rec = field->pack_length_in_rec(); - - if (field->real_maybe_null()) { - m_encoder_arr[i].m_null_mask = cur_null_mask; - m_encoder_arr[i].m_null_offset = null_bytes; - if (cur_null_mask == 0x80) { - cur_null_mask = 0x1; - null_bytes++; - } else - cur_null_mask = cur_null_mask << 1; - } else { - m_encoder_arr[i].m_null_mask = 0; - } - } - - /* Count the last, unfinished NULL-bits byte */ - if (cur_null_mask != 0x1) - null_bytes++; - - m_null_bytes_in_rec = null_bytes; + return m_converter->decode(m_pk_descr, buf, key, value); } int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, @@ -6643,8 +6531,8 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, /* Sometimes, we may use m_sk_packed_tuple for storing packed PK */ max_packed_sk_len = pack_key_len; for (uint i = 0; i < table_arg->s->keys; i++) { - if (i == table_arg->s->primary_key) /* Primary key was processed above */ - continue; + /* Primary key was processed above */ + if (i == table_arg->s->primary_key) continue; // TODO: move this into get_table_handler() ?? kd_arr[i]->setup(table_arg, tbl_def_arg); @@ -6733,25 +6621,10 @@ void ha_rocksdb::free_key_buffers() { } #ifdef MARIAROCKS_NOT_YET -void ha_rocksdb::set_use_read_free_rpl(const char *const whitelist) { - DBUG_ASSERT(whitelist != nullptr); - -#if defined(HAVE_PSI_INTERFACE) - Regex_list_handler regex_handler(key_rwlock_read_free_rpl_tables); -#else - Regex_list_handler regex_handler; -#endif - - if (!regex_handler.set_patterns(whitelist)) { - warn_about_bad_patterns(®ex_handler, "read_free_rpl_tables"); - } - - m_use_read_free_rpl = regex_handler.matches(m_tbl_def->base_tablename()); -} #endif - void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) { - DBUG_ASSERT(whitelist != nullptr); + const char *const wl = + whitelist ? whitelist : DEFAULT_SKIP_UNIQUE_CHECK_TABLES; #if defined(HAVE_PSI_INTERFACE) Regex_list_handler regex_handler(key_rwlock_skip_unique_check_tables); @@ -6759,7 +6632,7 @@ void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) { Regex_list_handler regex_handler; #endif - if (!regex_handler.set_patterns(whitelist)) { + if (!regex_handler.set_patterns(wl)) { warn_about_bad_patterns(®ex_handler, "skip_unique_check_tables"); } @@ -6806,8 +6679,8 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { "dictionary"); DBUG_RETURN(HA_ERR_ROCKSDB_INVALID_TABLE); } - m_lock_rows = RDB_LOCK_NONE; + m_lock_rows = RDB_LOCK_NONE; m_key_descr_arr = m_tbl_def->m_key_descr_arr; /* @@ -6836,7 +6709,15 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { */ init_with_fields(); - setup_field_converters(); + /* Initialize decoder */ + m_converter = std::make_shared<Rdb_converter>(ha_thd(), m_tbl_def, table); + + /* + Update m_ttl_bytes address to same as Rdb_converter's m_ttl_bytes. + Remove this code after moving convert_record_to_storage_format() into + Rdb_converter class. + */ + m_ttl_bytes = m_converter->get_ttl_bytes_buffer(); /* MariaDB: adjust field->part_of_key for PK columns. We can only do it here @@ -6879,11 +6760,10 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { The following load_XXX code calls row decode functions, and they do that without having done ::external_lock() or index_init()/rnd_init(). (Note: this also means we're doing a read when there was no - setup_field_converters() call) + rdb_converter::setup_field_encoders() call) Initialize the necessary variables for them: */ - m_verify_row_debug_checksums = false; /* Load auto_increment value only once on first use. */ if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) { @@ -6901,10 +6781,7 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { stats.block_size = rocksdb_tbl_options->block_size; #ifdef MARIAROCKS_NOT_YET // MDEV-10976 - /* Determine at open whether we can use Read Free Replication or not */ - set_use_read_free_rpl(THDVAR(ha_thd(), read_free_rpl_tables)); #endif - /* Determine at open whether we should skip unique checks for this table */ set_skip_unique_check_tables(THDVAR(ha_thd(), skip_unique_check_tables)); @@ -6916,19 +6793,15 @@ int ha_rocksdb::close(void) { m_pk_descr = nullptr; m_key_descr_arr = nullptr; - + m_converter = nullptr; free_key_buffers(); - my_free(m_encoder_arr); - m_encoder_arr = nullptr; - if (m_table_handler != nullptr) { rdb_open_tables.release_table_handler(m_table_handler); m_table_handler = nullptr; } // These are needed to suppress valgrind errors in rocksdb.partition - m_storage_record.free(); m_last_rowkey.free(); m_sk_tails.free(); m_sk_tails_old.free(); @@ -7019,61 +6892,72 @@ int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s, int err; switch (s.code()) { - case rocksdb::Status::Code::kOk: - err = HA_EXIT_SUCCESS; - break; - case rocksdb::Status::Code::kNotFound: - err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND; - break; - case rocksdb::Status::Code::kCorruption: - err = HA_ERR_ROCKSDB_STATUS_CORRUPTION; - break; - case rocksdb::Status::Code::kNotSupported: - err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED; - break; - case rocksdb::Status::Code::kInvalidArgument: - err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT; - break; - case rocksdb::Status::Code::kIOError: - err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE - : HA_ERR_ROCKSDB_STATUS_IO_ERROR; - break; - case rocksdb::Status::Code::kMergeInProgress: - err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS; - break; - case rocksdb::Status::Code::kIncomplete: - err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE; - break; - case rocksdb::Status::Code::kShutdownInProgress: - err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS; - break; - case rocksdb::Status::Code::kTimedOut: - err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT; - break; - case rocksdb::Status::Code::kAborted: - err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT - : HA_ERR_ROCKSDB_STATUS_ABORTED; - break; - case rocksdb::Status::Code::kBusy: - err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK - : HA_ERR_ROCKSDB_STATUS_BUSY; - break; - case rocksdb::Status::Code::kExpired: - err = HA_ERR_ROCKSDB_STATUS_EXPIRED; - break; - case rocksdb::Status::Code::kTryAgain: - err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN; - break; - default: - DBUG_ASSERT(0); - return -1; + case rocksdb::Status::Code::kOk: + err = HA_EXIT_SUCCESS; + break; + case rocksdb::Status::Code::kNotFound: + err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND; + break; + case rocksdb::Status::Code::kCorruption: + err = HA_ERR_ROCKSDB_STATUS_CORRUPTION; + break; + case rocksdb::Status::Code::kNotSupported: + err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED; + break; + case rocksdb::Status::Code::kInvalidArgument: + err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT; + break; + case rocksdb::Status::Code::kIOError: + err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE + : HA_ERR_ROCKSDB_STATUS_IO_ERROR; + break; + case rocksdb::Status::Code::kMergeInProgress: + err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS; + break; + case rocksdb::Status::Code::kIncomplete: + err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE; + break; + case rocksdb::Status::Code::kShutdownInProgress: + err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS; + break; + case rocksdb::Status::Code::kTimedOut: + err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT; + break; + case rocksdb::Status::Code::kAborted: + err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT + : HA_ERR_ROCKSDB_STATUS_ABORTED; + break; + case rocksdb::Status::Code::kBusy: + err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK + : HA_ERR_ROCKSDB_STATUS_BUSY; + break; + case rocksdb::Status::Code::kExpired: + err = HA_ERR_ROCKSDB_STATUS_EXPIRED; + break; + case rocksdb::Status::Code::kTryAgain: + err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN; + break; + default: + DBUG_ASSERT(0); + return -1; + } + + std::string errMsg; + if (s.IsLockLimit()) { + errMsg = + "Operation aborted: Failed to acquire lock due to " + "rocksdb_max_row_locks limit"; + } else { + errMsg = s.ToString(); } if (opt_msg) { - std::string concatenated_error = s.ToString() + " (" + std::string(opt_msg) + ")"; - my_error(ER_GET_ERRMSG, MYF(0), s.code(), concatenated_error.c_str(), rocksdb_hton_name); + std::string concatenated_error = errMsg + " (" + std::string(opt_msg) + ")"; + my_error(ER_GET_ERRMSG, MYF(0), s.code(), concatenated_error.c_str(), + rocksdb_hton_name); } else { - my_error(ER_GET_ERRMSG, MYF(0), s.code(), s.ToString().c_str(), rocksdb_hton_name); + my_error(ER_GET_ERRMSG, MYF(0), s.code(), errMsg.c_str(), + rocksdb_hton_name); } return err; @@ -7083,8 +6967,8 @@ int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s, static const std::set<uint> RDB_INDEX_COLLATIONS = { COLLATION_BINARY, COLLATION_UTF8_BIN, COLLATION_LATIN1_BIN}; -static bool -rdb_is_index_collation_supported(const my_core::Field *const field) { +static bool rdb_is_index_collation_supported( + const my_core::Field *const field) { const my_core::enum_field_types type = field->real_type(); /* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */ if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING || @@ -7144,8 +7028,6 @@ int ha_rocksdb::create_key_defs( DBUG_ASSERT(table_arg->s != nullptr); - uint i; - /* These need to be one greater than MAX_INDEXES since the user can create MAX_INDEXES secondary keys and no primary key which would cause us @@ -7162,6 +7044,36 @@ int ha_rocksdb::create_key_defs( DBUG_RETURN(HA_EXIT_FAILURE); } + uint64 ttl_duration = 0; + std::string ttl_column; + uint ttl_field_offset; + + uint err; + if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg, + &ttl_duration))) { + DBUG_RETURN(err); + } + + if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column, + &ttl_field_offset))) { + DBUG_RETURN(err); + } + + /* We don't currently support TTL on tables with hidden primary keys. */ + if (ttl_duration > 0 && has_hidden_pk(table_arg)) { + my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* + If TTL duration is not specified but TTL column was specified, throw an + error because TTL column requires duration. + */ + if (ttl_duration == 0 && !ttl_column.empty()) { + my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str()); + DBUG_RETURN(HA_EXIT_FAILURE); + } + if (!old_tbl_def_arg) { /* old_tbl_def doesn't exist. this means we are in the process of creating @@ -7170,9 +7082,9 @@ int ha_rocksdb::create_key_defs( Get the index numbers (this will update the next_index_number) and create Rdb_key_def structures. */ - for (i = 0; i < tbl_def_arg->m_key_count; i++) { - if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i], - cfs[i])) { + for (uint i = 0; i < tbl_def_arg->m_key_count; i++) { + if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i], cfs[i], + ttl_duration, ttl_column)) { DBUG_RETURN(HA_EXIT_FAILURE); } } @@ -7183,7 +7095,8 @@ int ha_rocksdb::create_key_defs( generate the necessary new key definitions if any. */ if (create_inplace_key_defs(table_arg, tbl_def_arg, old_table_arg, - old_tbl_def_arg, cfs)) { + old_tbl_def_arg, cfs, ttl_duration, + ttl_column)) { DBUG_RETURN(HA_EXIT_FAILURE); } } @@ -7269,8 +7182,8 @@ int ha_rocksdb::create_cfs( // Generate the name for the column family to use. bool per_part_match_found = false; - std::string cf_name = generate_cf_name(i, table_arg, tbl_def_arg, - &per_part_match_found); + std::string cf_name = + generate_cf_name(i, table_arg, tbl_def_arg, &per_part_match_found); // Prevent create from using the system column family. if (cf_name == DEFAULT_SYSTEM_CF_NAME) { @@ -7315,7 +7228,8 @@ int ha_rocksdb::create_cfs( int ha_rocksdb::create_inplace_key_defs( const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, const TABLE *const old_table_arg, const Rdb_tbl_def *const old_tbl_def_arg, - const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs) const { + const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs, + uint64 ttl_duration, const std::string &ttl_column) const { DBUG_ENTER_FUNC(); std::shared_ptr<Rdb_key_def> *const old_key_descr = @@ -7341,10 +7255,11 @@ int ha_rocksdb::create_inplace_key_defs( struct Rdb_index_info index_info; if (!dict_manager.get_index_info(gl_index_id, &index_info)) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Could not get index information " - "for Index Number (%u,%u), table %s", - gl_index_id.cf_id, gl_index_id.index_id, - old_tbl_def_arg->full_tablename().c_str()); + sql_print_error( + "RocksDB: Could not get index information " + "for Index Number (%u,%u), table %s", + gl_index_id.cf_id, gl_index_id.index_id, + old_tbl_def_arg->full_tablename().c_str()); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -7368,7 +7283,7 @@ int ha_rocksdb::create_inplace_key_defs( dict_manager.get_stats(gl_index_id), index_info.m_index_flags, ttl_rec_offset, index_info.m_ttl_duration); } else if (create_key_def(table_arg, i, tbl_def_arg, &new_key_descr[i], - cfs[i])) { + cfs[i], ttl_duration, ttl_column)) { DBUG_RETURN(HA_EXIT_FAILURE); } @@ -7518,44 +7433,16 @@ int ha_rocksdb::compare_key_parts(const KEY *const old_key, 0 - Ok other - error, either given table ddl is not supported by rocksdb or OOM. */ -int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i, +int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint i, const Rdb_tbl_def *const tbl_def_arg, std::shared_ptr<Rdb_key_def> *const new_key_def, - const struct key_def_cf_info &cf_info) const { + const struct key_def_cf_info &cf_info, + uint64 ttl_duration, + const std::string &ttl_column) const { DBUG_ENTER_FUNC(); DBUG_ASSERT(*new_key_def == nullptr); - uint64 ttl_duration = 0; - std::string ttl_column; - uint ttl_field_offset; - - uint err; - if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg, - &ttl_duration))) { - DBUG_RETURN(err); - } - - if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column, - &ttl_field_offset))) { - DBUG_RETURN(err); - } - - /* We don't currently support TTL on tables with hidden primary keys. */ - if (ttl_duration > 0 && is_hidden_pk(i, table_arg, tbl_def_arg)) { - my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0)); - DBUG_RETURN(HA_EXIT_FAILURE); - } - - /* - If TTL duration is not specified but TTL column was specified, throw an - error because TTL column requires duration. - */ - if (ttl_duration == 0 && !ttl_column.empty()) { - my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str()); - DBUG_RETURN(HA_EXIT_FAILURE); - } - const uint index_id = ddl_manager.get_and_update_next_number(&dict_manager); const uint16_t index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST; uchar index_type; @@ -7604,7 +7491,8 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i, if (!ttl_column.empty()) { (*new_key_def)->m_ttl_column = ttl_column; } - + // initialize key_def + (*new_key_def)->setup(table_arg, tbl_def_arg); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -7612,7 +7500,7 @@ int rdb_normalize_tablename(const std::string &tablename, std::string *const strbuf) { if (tablename.size() < 2 || tablename[0] != '.' || (tablename[1] != FN_LIBCHAR && tablename[1] != FN_LIBCHAR2)) { - DBUG_ASSERT(0); // We were not passed table name? + DBUG_ASSERT(0); // We were not passed table name? return HA_ERR_ROCKSDB_INVALID_TABLE; } @@ -7622,7 +7510,7 @@ int rdb_normalize_tablename(const std::string &tablename, } if (pos == std::string::npos) { - DBUG_ASSERT(0); // We were not passed table name? + DBUG_ASSERT(0); // We were not passed table name? return HA_ERR_ROCKSDB_INVALID_TABLE; } @@ -7736,95 +7624,34 @@ int rdb_split_normalized_tablename(const std::string &fullname, return HA_EXIT_SUCCESS; } -/** - @brief - create() is called to create a database. The variable name will have the name - of the table. - - @details - When create() is called you do not need to worry about - opening the table. Also, the .frm file will have already been - created so adjusting create_info is not necessary. You can overwrite - the .frm file at this point if you wish to change the table - definition, but there are no methods currently provided for doing - so. +/* + Create a table's Rdb_tbl_def and its Rdb_key_defs and store table information + into MyRocks Data Dictionary + The method is called during create table/partition, truncate table/partition - Called from handle.cc by ha_create_table(). + @param table_name IN table's name formated as + 'dbname.tablename' + @param table_arg IN sql table + @param auto_increment_value IN specified table's auto increment value @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) - - @see - ha_create_table() in handle.cc */ - -int ha_rocksdb::create(const char *const name, TABLE *const table_arg, - HA_CREATE_INFO *const create_info) { +int ha_rocksdb::create_table(const std::string &table_name, + const TABLE *table_arg, + ulonglong auto_increment_value) { DBUG_ENTER_FUNC(); - DBUG_ASSERT(table_arg != nullptr); - DBUG_ASSERT(create_info != nullptr); - - if (create_info->data_file_name) { - // DATA DIRECTORY is used to create tables under a specific location - // outside the MySQL data directory. We don't support this for MyRocks. - // The `rocksdb_datadir` setting should be used to configure RocksDB data - // directory. - print_error(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED, - MYF(ME_WARNING)); - DBUG_RETURN(HA_WRONG_CREATE_OPTION); - } - if (create_info->index_file_name) { - // Similar check for INDEX DIRECTORY as well. - print_error(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED, - MYF(ME_WARNING)); - DBUG_RETURN(HA_WRONG_CREATE_OPTION); - } - std::string str; int err; - THD *const thd = my_core::thd_get_current_thd(); - - if (get_table_if_exists(name)) { - if (thd->lex->sql_command == SQLCOM_TRUNCATE) { - err = delete_table(name); - if (err != HA_EXIT_SUCCESS) { - DBUG_RETURN(err); - } - } else { - err = rdb_normalize_tablename(name, &str); - if (err != HA_EXIT_SUCCESS) { - DBUG_RETURN(err); - } - my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name); - DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA); - } - } - - /* - Construct dbname.tablename ourselves, because parititioning - passes strings like "./test/t14#P#p0" for individual partitions, - while table_arg->s->table_name has none of that. - */ - err = rdb_normalize_tablename(name, &str); - if (err != HA_EXIT_SUCCESS) { - DBUG_RETURN(err); - } - - if (contains_foreign_key(thd)) { - my_error(ER_NOT_SUPPORTED_YET, MYF(0), - "FOREIGN KEY for the RocksDB storage engine"); - DBUG_RETURN(HA_ERR_UNSUPPORTED); - } - const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); rocksdb::WriteBatch *const batch = wb.get(); /* Create table/key descriptions and put them into the data dictionary */ - m_tbl_def = new Rdb_tbl_def(str); + m_tbl_def = new Rdb_tbl_def(table_name); uint n_keys = table_arg->s->keys; @@ -7834,6 +7661,9 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, */ if (has_hidden_pk(table_arg)) { n_keys += 1; + // reset hidden pk id + // the starting valid value for hidden pk is 1 + m_tbl_def->m_hidden_pk_val = 1; } m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[n_keys]; @@ -7847,9 +7677,9 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)]; - if (create_info->auto_increment_value) { + if (auto_increment_value) { bool autoinc_upgrade_test = false; - m_tbl_def->m_auto_incr_val = create_info->auto_increment_value; + m_tbl_def->m_auto_incr_val = auto_increment_value; DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;); if (!autoinc_upgrade_test) { auto s = dict_manager.put_auto_incr_val( @@ -7888,6 +7718,108 @@ error: } /** + @brief + create() is called to create a table. The variable name will have the name + of the table. + + @details + When create() is called you do not need to worry about + opening the table. Also, the .frm file will have already been + created so adjusting create_info is not necessary. You can overwrite + the .frm file at this point if you wish to change the table + definition, but there are no methods currently provided for doing + so. + + Called from handle.cc by ha_create_table(). + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) + + @see + ha_create_table() in handle.cc +*/ + +int ha_rocksdb::create(const char *const name, TABLE *const table_arg, + HA_CREATE_INFO *const create_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(table_arg != nullptr); + DBUG_ASSERT(create_info != nullptr); + + if (create_info->data_file_name) { + // DATA DIRECTORY is used to create tables under a specific location + // outside the MySQL data directory. We don't support this for MyRocks. + // The `rocksdb_datadir` setting should be used to configure RocksDB data + // directory. + DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED); + } + + if (create_info->index_file_name) { + // Similar check for INDEX DIRECTORY as well. + DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED); + } + + int err; + /* + Construct dbname.tablename ourselves, because parititioning + passes strings like "./test/t14#P#p0" for individual partitions, + while table_arg->s->table_name has none of that. + */ + std::string str; + err = rdb_normalize_tablename(name, &str); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + + // FOREIGN KEY isn't supported yet + THD *const thd = my_core::thd_get_current_thd(); + if (contains_foreign_key(thd)) { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "FOREIGN KEY for the RocksDB storage engine"); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + + // Check whether Data Dictionary contain information + Rdb_tbl_def *tbl = ddl_manager.find(str); + if (tbl != nullptr) { + if (thd->lex->sql_command == SQLCOM_TRUNCATE) { + err = delete_table(tbl); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + } else { + my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name); + DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA); + } + } + + // The below adds/clears hooks in RocksDB sync points. There's no reason for + // this code to be in ::create() but it needs to be somewhere where it is + // away from any tight loops and where one can invoke it from mtr: + DBUG_EXECUTE_IF("rocksdb_enable_delay_commits", + { + auto syncpoint= rocksdb::SyncPoint::GetInstance(); + syncpoint->SetCallBack("DBImpl::WriteImpl:BeforeLeaderEnters", + [&](void* /*arg*/) {my_sleep(500);} ); + syncpoint->EnableProcessing(); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS, + "enable_delay_commits_mode ON"); + + }); + DBUG_EXECUTE_IF("rocksdb_disable_delay_commits", + { + auto syncpoint= rocksdb::SyncPoint::GetInstance(); + syncpoint->ClearCallBack("DBImpl::WriteImpl:BeforeLeaderEnters"); + syncpoint->DisableProcessing(); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS, + "enable_delay_commits_mode OFF"); + }); + + DBUG_RETURN(create_table(str, table_arg, create_info->auto_increment_value)); +} + +/** @note This function is used only when the table has not yet been opened, and keyread_allowed bitmap doesn't have the correct values yet. @@ -7930,9 +7862,10 @@ bool ha_rocksdb::check_keyread_allowed(uint inx, uint part, int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter, - const bool &full_key_match, + const bool /* unused */, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); /* We are looking for the first record such that index_tuple= lookup_tuple. @@ -7941,6 +7874,9 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, rocksdb_smart_seek(kd.m_is_reverse_cf, iter, key_slice); while (iter->Valid() && kd.value_matches_prefix(iter->key(), key_slice)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } /* If TTL is enabled we need to check if the given key has already expired from the POV of the current transaction. If it has, try going to the next @@ -7962,9 +7898,10 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, } int ha_rocksdb::read_before_key(const Rdb_key_def &kd, - const bool &full_key_match, + const bool full_key_match, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); /* We are looking for record with the biggest t.key such that t.key < lookup_tuple. @@ -7972,6 +7909,9 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd, rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice); while (is_valid(m_scan_it)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } /* We are using full key and we've hit an exact match, or... @@ -7996,6 +7936,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd, int ha_rocksdb::read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); /* We are looking for the first record such that @@ -8013,6 +7954,9 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd, */ while (is_valid(m_scan_it) && kd.has_ttl() && should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it); } @@ -8021,7 +7965,7 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd, int ha_rocksdb::position_to_correct_key( const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, - const bool &full_key_match, const uchar *const key, + const bool full_key_match, const uchar *const key, const key_part_map &keypart_map, const rocksdb::Slice &key_slice, bool *const move_forward, const int64_t ttl_filter_ts) { int rc = 0; @@ -8029,65 +7973,66 @@ int ha_rocksdb::position_to_correct_key( *move_forward = true; switch (find_flag) { - case HA_READ_KEY_EXACT: - rc = - read_key_exact(kd, m_scan_it, full_key_match, key_slice, ttl_filter_ts); - break; - case HA_READ_BEFORE_KEY: - *move_forward = false; - rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); - if (rc == 0 && !kd.covers_key(m_scan_it->key())) { - /* The record we've got is not from this index */ - rc = HA_ERR_KEY_NOT_FOUND; - } - break; - case HA_READ_AFTER_KEY: - case HA_READ_KEY_OR_NEXT: - rc = read_after_key(kd, key_slice, ttl_filter_ts); - if (rc == 0 && !kd.covers_key(m_scan_it->key())) { - /* The record we've got is not from this index */ - rc = HA_ERR_KEY_NOT_FOUND; - } - break; - case HA_READ_KEY_OR_PREV: - case HA_READ_PREFIX: - /* This flag is not used by the SQL layer, so we don't support it yet. */ - rc = HA_ERR_UNSUPPORTED; - break; - case HA_READ_PREFIX_LAST: - case HA_READ_PREFIX_LAST_OR_PREV: - *move_forward = false; - /* - Find the last record with the specified index prefix lookup. - - HA_READ_PREFIX_LAST requires that the record has the - prefix=lookup (if there are no such records, - HA_ERR_KEY_NOT_FOUND should be returned). - - HA_READ_PREFIX_LAST_OR_PREV has no such requirement. If there are no - records with prefix=lookup, we should return the last record - before that. - */ - rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); - if (rc == 0) { - const rocksdb::Slice &rkey = m_scan_it->key(); - if (!kd.covers_key(rkey)) { + case HA_READ_KEY_EXACT: + rc = read_key_exact(kd, m_scan_it, full_key_match, key_slice, + ttl_filter_ts); + break; + case HA_READ_BEFORE_KEY: + *move_forward = false; + rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); + if (rc == 0 && !kd.covers_key(m_scan_it->key())) { /* The record we've got is not from this index */ rc = HA_ERR_KEY_NOT_FOUND; - } else if (find_flag == HA_READ_PREFIX_LAST) { - uint size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, - key, keypart_map); - rocksdb::Slice lookup_tuple(reinterpret_cast<char *>(m_sk_packed_tuple), - size); - - // We need to compare the key we've got with the original search prefix. - if (!kd.value_matches_prefix(rkey, lookup_tuple)) { + } + break; + case HA_READ_AFTER_KEY: + case HA_READ_KEY_OR_NEXT: + rc = read_after_key(kd, key_slice, ttl_filter_ts); + if (rc == 0 && !kd.covers_key(m_scan_it->key())) { + /* The record we've got is not from this index */ + rc = HA_ERR_KEY_NOT_FOUND; + } + break; + case HA_READ_KEY_OR_PREV: + case HA_READ_PREFIX: + /* This flag is not used by the SQL layer, so we don't support it yet. */ + rc = HA_ERR_UNSUPPORTED; + break; + case HA_READ_PREFIX_LAST: + case HA_READ_PREFIX_LAST_OR_PREV: + *move_forward = false; + /* + Find the last record with the specified index prefix lookup. + - HA_READ_PREFIX_LAST requires that the record has the + prefix=lookup (if there are no such records, + HA_ERR_KEY_NOT_FOUND should be returned). + - HA_READ_PREFIX_LAST_OR_PREV has no such requirement. If there are no + records with prefix=lookup, we should return the last record + before that. + */ + rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); + if (rc == 0) { + const rocksdb::Slice &rkey = m_scan_it->key(); + if (!kd.covers_key(rkey)) { + /* The record we've got is not from this index */ rc = HA_ERR_KEY_NOT_FOUND; + } else if (find_flag == HA_READ_PREFIX_LAST) { + uint size = kd.pack_index_tuple(table, m_pack_buffer, + m_sk_packed_tuple, key, keypart_map); + rocksdb::Slice lookup_tuple( + reinterpret_cast<char *>(m_sk_packed_tuple), size); + + // We need to compare the key we've got with the original search + // prefix. + if (!kd.value_matches_prefix(rkey, lookup_tuple)) { + rc = HA_ERR_KEY_NOT_FOUND; + } } } - } - break; - default: - DBUG_ASSERT(0); - break; + break; + default: + DBUG_ASSERT(0); + break; } return rc; @@ -8096,11 +8041,10 @@ int ha_rocksdb::position_to_correct_key( int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, const rocksdb::Slice &slice, - const int &bytes_changed_by_succ, + const int bytes_changed_by_succ, const key_range *const end_key, uint *const end_key_packed_size) { - if (find_flag == HA_READ_KEY_EXACT) - return slice.size(); + if (find_flag == HA_READ_KEY_EXACT) return slice.size(); if (find_flag == HA_READ_PREFIX_LAST) { /* @@ -8170,8 +8114,18 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, const rocksdb::Slice &rkey = m_scan_it->key(); const rocksdb::Slice &value = m_scan_it->value(); - bool covered_lookup = - m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap); +#ifndef DBUG_OFF + bool save_keyread_only = m_keyread_only; +#endif + DBUG_EXECUTE_IF("dbug.rocksdb.HA_EXTRA_KEYREAD", { m_keyread_only = true; }); + + bool covered_lookup = (m_keyread_only && kd.can_cover_lookup()) || + kd.covers_lookup(&value, &m_lookup_bitmap); + +#ifndef DBUG_OFF + m_keyread_only = save_keyread_only; +#endif + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple); @@ -8179,12 +8133,11 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, rc = HA_ERR_ROCKSDB_CORRUPT_DATA; } else { rc = kd.unpack_record(table, buf, &rkey, &value, - m_verify_row_debug_checksums); + m_converter->get_verify_row_debug_checksums()); global_stats.covered_secondary_key_lookups.inc(); } } else { - if (kd.m_is_reverse_cf) - move_forward = !move_forward; + if (kd.m_is_reverse_cf) move_forward = !move_forward; rc = find_icp_matching_index_rec(move_forward, buf); if (!rc) { @@ -8231,11 +8184,12 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const { DBUG_ENTER_FUNC(); - ulong base_flags = HA_READ_NEXT | // doesn't seem to be used + ulong base_flags = HA_READ_NEXT | // doesn't seem to be used HA_READ_ORDER | HA_READ_RANGE | HA_READ_PREV; - if (check_keyread_allowed(inx, part, all_parts)) + if (check_keyread_allowed(inx, part, all_parts)) { base_flags |= HA_KEYREAD_ONLY; + } if (inx == table_share->primary_key) { /* @@ -8291,13 +8245,15 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { rocksdb::Slice value = m_scan_it->value(); bool covered_lookup = - m_keyread_only || m_key_descr_arr[keyno]->covers_lookup( - table, &value, &m_lookup_bitmap); + (m_keyread_only && m_key_descr_arr[keyno]->can_cover_lookup()) || + m_key_descr_arr[keyno]->covers_lookup(&value, &m_lookup_bitmap); if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { rc = m_key_descr_arr[keyno]->unpack_record( - table, buf, &key, &value, m_verify_row_debug_checksums); + table, buf, &key, &value, + m_converter->get_verify_row_debug_checksums()); global_stats.covered_secondary_key_lookups.inc(); } else { + DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete_sk"); rc = get_row_by_rowid(buf, m_pk_packed_tuple, size); } @@ -8347,9 +8303,10 @@ int ha_rocksdb::read_range_first(const key_range *const start_key, range_key_part = table->key_info[active_index].key_part; - if (!start_key) // Read first record + if (!start_key) { + // Read first record result = ha_index_first(table->record[0]); - else { + } else { #ifdef MARIAROCKS_NOT_YET if (is_using_prohibited_gap_locks( is_using_full_unique_key(active_index, start_key->keypart_map, @@ -8365,8 +8322,9 @@ int ha_rocksdb::read_range_first(const key_range *const start_key, start_key->keypart_map, start_key->flag, end_key); }) } - if (result) + if (result) { DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) ? HA_ERR_END_OF_FILE : result); + } if (compare_key(end_range) <= 0) { DBUG_RETURN(HA_EXIT_SUCCESS); @@ -8443,14 +8401,23 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, const key_range *end_key) { DBUG_ENTER_FUNC(); + DBUG_EXECUTE_IF("myrocks_busy_loop_on_row_read", int debug_i = 0; + while (1) { debug_i++; }); + int rc = 0; + THD *thd = ha_thd(); + DEBUG_SYNC(thd, "rocksdb.check_flags_rmi"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + DBUG_RETURN(rc); + } + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; const uint actual_key_parts = kd.get_key_parts(); bool using_full_key = is_using_full_key(keypart_map, actual_key_parts); - if (!end_key) - end_key = end_range; + if (!end_key) end_key = end_range; /* By default, we don't need the retrieved records to match the prefix */ m_sk_match_prefix = nullptr; @@ -8467,8 +8434,7 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, m_pk_packed_tuple, key, keypart_map); bool skip_lookup = is_blind_delete_enabled(); - rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, - skip_lookup, false); + rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, skip_lookup, false); if (!rc && !skip_lookup) { #ifdef MARIAROCKS_NOT_YET @@ -8493,8 +8459,9 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, key, tmp_map); if (table->key_info[active_index].user_defined_key_parts != - kd.get_key_parts()) + kd.get_key_parts()) { using_full_key = false; + } } else { packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, key, keypart_map); @@ -8548,14 +8515,20 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, bool use_all_keys = false; if (find_flag == HA_READ_KEY_EXACT && - my_count_bits(keypart_map) == kd.get_key_parts()) + my_count_bits(keypart_map) == kd.get_key_parts()) { use_all_keys = true; + } Rdb_transaction *const tx = get_or_create_tx(table->in_use); const bool is_new_snapshot = !tx->has_snapshot(); // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_rmi_scan"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } /* This will open the iterator and position it at a record that's equal or greater than the lookup tuple. @@ -8572,9 +8545,7 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, slice, &move_forward, tx->m_snapshot_timestamp); if (rc) { - /* This status is returned on any error */ - table->status = STATUS_NOT_FOUND; - DBUG_RETURN(rc); + break; } m_skip_scan_it_next_call = false; @@ -8584,13 +8555,15 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, then we have all the rows we need. For a secondary key we now need to lookup the primary key. */ - if (active_index == table->s->primary_key) + if (active_index == table->s->primary_key) { rc = read_row_from_primary_key(buf); - else + } else { rc = read_row_from_secondary_key(buf, kd, move_forward); + } - if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot) + if (!should_recreate_snapshot(rc, is_new_snapshot)) { break; /* Exit the loop */ + } // release the snapshot and iterator so they will be regenerated tx->release_snapshot(); @@ -8598,7 +8571,10 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, } if (rc) { - /* the only possible error condition is record-not-found */ + /* + This status is returned on any error + the only possible error condition is record-not-found + */ table->status = STATUS_NOT_FOUND; } else { table->status = 0; @@ -8632,13 +8608,21 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, other - error code */ -int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, +int ha_rocksdb::find_icp_matching_index_rec(const bool move_forward, uchar *const buf) { if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) { const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + THD *thd = ha_thd(); while (1) { - rocksdb_skip_expired_records(kd, m_scan_it, !move_forward); + int rc = rocksdb_skip_expired_records(kd, m_scan_it, !move_forward); + if (rc != HA_EXIT_SUCCESS) { + return rc; + } + + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } if (!is_valid(m_scan_it)) { table->status = STATUS_NOT_FOUND; @@ -8662,7 +8646,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, const rocksdb::Slice value = m_scan_it->value(); int err = kd.unpack_record(table, buf, &rkey, &value, - m_verify_row_debug_checksums); + m_converter->get_verify_row_debug_checksums()); if (err != HA_EXIT_SUCCESS) { return err; } @@ -8683,14 +8667,6 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, * row. */ break; } - /* - TODO: should we have this here, or RockDB handles this internally? - if (my_core::thd_killed(current_thd)) - { - rc= HA_ERR_INTERNAL_ERROR; // doesn't matter - break; - } - */ } } return HA_EXIT_SUCCESS; @@ -8722,14 +8698,14 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { // Rdb_transaction *tx= get_or_create_tx(table->in_use); // tx->snapshot= nullptr; - const bool save_verify_row_debug_checksums = m_verify_row_debug_checksums; - m_verify_row_debug_checksums = true; - + bool save_verify_row_debug_checksums = + m_converter->get_verify_row_debug_checksums(); + m_converter->set_verify_row_debug_checksums(true); /* For each secondary index, check that we can get a PK value from it */ // NO_LINT_DEBUG sql_print_verbose_info("CHECKTABLE %s: Checking table %s", table_name, table_name); - ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true + ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true ha_rows row_checksums = ha_rows(-1); bool first_index = true; @@ -8739,20 +8715,21 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { ha_index_init(keyno, true); ha_rows rows = 0; ha_rows checksums = 0; - if (first_index) - row_checksums_at_start = m_row_checksums_checked; + if (first_index) { + row_checksums_at_start = m_converter->get_row_checksums_checked(); + } int res; // NO_LINT_DEBUG sql_print_verbose_info("CHECKTABLE %s: Checking index %s", table_name, table->key_info[keyno].name.str); while (1) { - if (!rows) + if (!rows) { res = index_first(table->record[0]); - else + } else { res = index_next(table->record[0]); + } - if (res == HA_ERR_END_OF_FILE) - break; + if (res == HA_ERR_END_OF_FILE) break; if (res) { // error // NO_LINT_DEBUG @@ -8773,16 +8750,18 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { if ((res = get_row_by_rowid(table->record[0], rowkey_copy.ptr(), rowkey_copy.length()))) { // NO_LINT_DEBUG - sql_print_error("CHECKTABLE %s: .. row %lld: " - "failed to fetch row by rowid", - table_name, rows); + sql_print_error( + "CHECKTABLE %s: .. row %lld: " + "failed to fetch row by rowid", + table_name, rows); goto error; } longlong hidden_pk_id = 0; if (has_hidden_pk(table) && - read_hidden_pk_id_from_rowkey(&hidden_pk_id)) + read_hidden_pk_id_from_rowkey(&hidden_pk_id)) { goto error; + } /* Check if we get the same PK value */ uint packed_size = m_pk_descr->pack_record( @@ -8803,9 +8782,10 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { if (packed_size != sec_key_copy.length() || memcmp(m_sk_packed_tuple, sec_key_copy.ptr(), packed_size)) { // NO_LINT_DEBUG - sql_print_error("CHECKTABLE %s: .. row %lld: " - "secondary index value mismatch", - table_name, rows); + sql_print_error( + "CHECKTABLE %s: .. row %lld: " + "secondary index value mismatch", + table_name, rows); goto print_and_error; } rows++; @@ -8837,7 +8817,8 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { table_name, rows, checksums); if (first_index) { - row_checksums = m_row_checksums_checked - row_checksums_at_start; + row_checksums = + m_converter->get_row_checksums_checked() - row_checksums_at_start; first_index = false; } ha_index_end(); @@ -8850,16 +8831,16 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { } extra(HA_EXTRA_NO_KEYREAD); - m_verify_row_debug_checksums = save_verify_row_debug_checksums; + m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums); /* - TODO: we should check also for PK records that are missing in the secondary - indexes. + TODO(sergiy): we should check also for PK records that are missing in + the secondary indexes. For that, need to walk through the PK and check that every PK record has a proper counterpart in each secondary index. */ DBUG_RETURN(HA_ADMIN_OK); error: - m_verify_row_debug_checksums = save_verify_row_debug_checksums; + m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums); ha_index_or_rnd_end(); extra(HA_EXTRA_NO_KEYREAD); @@ -8869,10 +8850,11 @@ error: static void dbug_dump_str(FILE *const out, const char *const str, int len) { fprintf(out, "\""); for (int i = 0; i < len; i++) { - if (str[i] > 32) + if (str[i] > 32) { fprintf(out, "%c", str[i]); - else + } else { fprintf(out, "\\%d", str[i]); + } } fprintf(out, "\""); } @@ -8885,8 +8867,7 @@ static void dbug_dump_str(FILE *const out, const char *const str, int len) { void dbug_dump_database(rocksdb::DB *const db) { FILE *const out = fopen("/tmp/rocksdb.dump", "wt"); - if (!out) - return; + if (!out) return; rocksdb::Iterator *it = db->NewIterator(rocksdb::ReadOptions()); for (it->SeekToFirst(); it->Valid(); it->Next()) { @@ -8908,30 +8889,28 @@ rocksdb::Status ha_rocksdb::get_for_update( DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE); const bool exclusive = m_lock_rows != RDB_LOCK_READ; - rocksdb::Status s = tx->get_for_update(column_family, key, value, exclusive); - - // If we have a lock conflict and we are running in READ COMMITTTED mode - // release and reacquire the snapshot and then retry the get_for_update(). - if (s.IsBusy() && !s.IsDeadlock() && - my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) { - tx->release_snapshot(); - tx->acquire_snapshot(false); - - s = tx->get_for_update(column_family, key, value, exclusive); - } + const bool do_validate = + my_core::thd_tx_isolation(ha_thd()) > ISO_READ_COMMITTED; + rocksdb::Status s = + tx->get_for_update(column_family, key, value, exclusive, do_validate); +#ifndef DBUG_OFF + ++rocksdb_num_get_for_update_calls; +#endif return s; } -bool ha_rocksdb::is_blind_delete_enabled() -{ +bool ha_rocksdb::is_blind_delete_enabled() { THD *thd = ha_thd(); + /* + Note: in MariaDB, thd->lex->table_count is only set for multi-table DELETE, + not for single-table DELETE. So we check thd->lex->query_tables instead. + */ return (THDVAR(thd, blind_delete_primary_key) && thd->lex->sql_command == SQLCOM_DELETE && - thd->lex->table_count == 1 && + thd->lex->query_tables && !thd->lex->query_tables->next_global && table->s->keys == 1 && - !has_hidden_pk(table) && - !thd->rgi_slave); + !has_hidden_pk(table) && !thd->rgi_slave); } /* @@ -8959,8 +8938,9 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, DEBUG_SYNC(ha_thd(), "rocksdb.get_row_by_rowid"); DBUG_EXECUTE_IF("dbug.rocksdb.get_row_by_rowid", { THD *thd = ha_thd(); - const char act[] = "now signal Reached " - "wait_for signal.rocksdb.get_row_by_rowid_let_running"; + const char act[] = + "now signal Reached " + "wait_for signal.rocksdb.get_row_by_rowid_let_running"; DBUG_ASSERT(opt_debug_sync_timeout > 0); DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); };); @@ -8969,8 +8949,7 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, rocksdb::Status s; /* Pretend row found without looking up */ - if (skip_lookup) - { + if (skip_lookup) { #ifdef MARIAROCKS_NOT_YET stats.rows_deleted_blind++; #endif @@ -8983,6 +8962,17 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, if (m_lock_rows == RDB_LOCK_NONE) { tx->acquire_snapshot(true); s = tx->get(m_pk_descr->get_cf(), key_slice, &m_retrieved_record); + } else if (m_insert_with_update && m_dup_pk_found) { + DBUG_ASSERT(m_pk_descr->get_keyno() == m_dupp_errkey); + DBUG_ASSERT(m_dup_pk_retrieved_record.length() == + m_retrieved_record.size()); + DBUG_ASSERT(memcmp(m_dup_pk_retrieved_record.ptr(), + m_retrieved_record.data(), + m_retrieved_record.size()) == 0); + + // do nothing - we already have the result in m_retrieved_record and + // already taken the lock + s = rocksdb::Status::OK(); } else { s = get_for_update(tx, m_pk_descr->get_cf(), key_slice, &m_retrieved_record); @@ -9037,8 +9027,7 @@ int ha_rocksdb::index_next(uchar *const buf) { } int rc = index_next_with_direction(buf, moves_forward); - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -9057,8 +9046,7 @@ int ha_rocksdb::index_prev(uchar *const buf) { } int rc = index_next_with_direction(buf, moves_forward); - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -9071,19 +9059,33 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { if (active_index == pk_index(table, m_tbl_def)) { rc = rnd_next_with_direction(buf, move_forward); } else { - if (m_skip_scan_it_next_call) { - m_skip_scan_it_next_call = false; - } else { - if (move_forward) - m_scan_it->Next(); /* this call cannot fail */ - else - m_scan_it->Prev(); + THD *thd = ha_thd(); + for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_inwd"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } + if (m_skip_scan_it_next_call) { + m_skip_scan_it_next_call = false; + } else { + if (move_forward) { + m_scan_it->Next(); /* this call cannot fail */ + } else { + m_scan_it->Prev(); + } + } + rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index], + m_scan_it, !move_forward); + if (rc != HA_EXIT_SUCCESS) { + break; + } + rc = find_icp_matching_index_rec(move_forward, buf); + if (!rc) rc = secondary_index_read(active_index, buf); + if (!should_skip_invalidated_record(rc)) { + break; + } } - rocksdb_skip_expired_records(*m_key_descr_arr[active_index], m_scan_it, - !move_forward); - rc = find_icp_matching_index_rec(move_forward, buf); - if (!rc) - rc = secondary_index_read(active_index, buf); } DBUG_RETURN(rc); @@ -9101,8 +9103,7 @@ int ha_rocksdb::index_first(uchar *const buf) { int rc = m_key_descr_arr[active_index]->m_is_reverse_cf ? index_last_intern(buf) : index_first_intern(buf); - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -9119,8 +9120,7 @@ int ha_rocksdb::index_last(uchar *const buf) { int rc = m_key_descr_arr[active_index]->m_is_reverse_cf ? index_first_intern(buf) : index_last_intern(buf); - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -9188,8 +9188,9 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { m_skip_scan_it_next_call = true; rc = index_next_with_direction(buf, true); - if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot) - break; // exit the loop + if (!should_recreate_snapshot(rc, is_new_snapshot)) { + break; /* exit the loop */ + } // release the snapshot and iterator so they will be regenerated tx->release_snapshot(); @@ -9263,11 +9264,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) { DBUG_ASSERT(key != nullptr); const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - if (kd.m_is_reverse_cf) { - kd.get_infimum_key(key, &key_size); - } else { - kd.get_supremum_key(key, &key_size); - } + int key_end_matching_bytes = kd.get_last_key(key, &key_size); rocksdb::Slice index_key((const char *)key, key_size); @@ -9278,7 +9275,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) { // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { - setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE); + setup_scan_iterator(kd, &index_key, false, key_end_matching_bytes); m_scan_it->SeekForPrev(index_key); m_skip_scan_it_next_call = false; @@ -9287,12 +9284,12 @@ int ha_rocksdb::index_last_intern(uchar *const buf) { rc = rnd_next_with_direction(buf, false); } else { rc = find_icp_matching_index_rec(false /*move_forward*/, buf); - if (!rc) - rc = secondary_index_read(active_index, buf); + if (!rc) rc = secondary_index_read(active_index, buf); } - if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot) + if (!should_recreate_snapshot(rc, is_new_snapshot)) { break; /* exit the loop */ + } // release the snapshot and iterator so they will be regenerated tx->release_snapshot(); @@ -9331,7 +9328,7 @@ void ha_rocksdb::unlock_row() { - If the index is PRIMARY KEY, and if all of the columns of the table are covered by the PRIMARY KEY, SingleDelete can be used. */ -bool ha_rocksdb::can_use_single_delete(const uint &index) const { +bool ha_rocksdb::can_use_single_delete(const uint index) const { return (index != pk_index(table, m_tbl_def) || (!has_hidden_pk(table) && table->key_info[index].ext_key_parts == table->s->fields)); @@ -9347,12 +9344,18 @@ bool ha_rocksdb::skip_unique_check() const { 3) the user set unique_checks option to 0, and the table does not have any indexes. If the table has secondary keys, then those might becomes inconsisted/corrupted + 4) We're using read-free replication */ return THDVAR(table->in_use, bulk_load) || (m_force_skip_unique_check && m_skip_unique_check) || (my_core::thd_test_options(table->in_use, OPTION_RELAXED_UNIQUE_CHECKS) && - m_tbl_def->m_key_count == 1); + m_tbl_def->m_key_count == 1) || +#ifdef MARIAROCKS_NOT_YET + use_read_free_rpl(); +#else + FALSE; +#endif } #ifdef MARIAROCKS_NOT_YET // MDEV-10975 @@ -9452,10 +9455,9 @@ const char *ha_rocksdb::get_key_comment(const uint index, return table_arg->key_info[index].comment.str; } -const std::string ha_rocksdb::generate_cf_name(const uint index, - const TABLE *const table_arg, - const Rdb_tbl_def *const tbl_def_arg, - bool *per_part_match_found) { +const std::string ha_rocksdb::generate_cf_name( + const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found) { DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(tbl_def_arg != nullptr); DBUG_ASSERT(per_part_match_found != nullptr); @@ -9500,11 +9502,14 @@ const std::string ha_rocksdb::get_table_comment(const TABLE *const table_arg) { } /** + Write a new row + + @param[in] buf new row data to write @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) */ -int ha_rocksdb::write_row(uchar *const buf) { +int ha_rocksdb::write_row(const uchar *const buf) { DBUG_ENTER_FUNC(); DBUG_ASSERT(buf != nullptr); @@ -9522,6 +9527,11 @@ int ha_rocksdb::write_row(uchar *const buf) { } } + // clear cache at beginning of write for INSERT ON DUPLICATE + // we may get multiple write->fail->read->update if there are multiple + // values from INSERT + m_dup_pk_found = false; + const int rv = update_write_row(nullptr, buf, skip_unique_check()); if (rv == 0) { @@ -9554,6 +9564,15 @@ void ha_rocksdb::set_last_rowkey(const uchar *const old_data) { #endif } +/** + Collect update data for primary key + + @param[in, out] row_info hold all data for update row, such as + new row data/old row data + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) { int size; @@ -9566,10 +9585,9 @@ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) { row_info->new_pk_unpack_info = &m_pk_unpack_info; - size = - m_pk_descr->pack_record(table, m_pack_buffer, row_info->new_data, - m_pk_packed_tuple, row_info->new_pk_unpack_info, - false, 0, 0, nullptr, &row_info->ttl_pk_offset); + size = m_pk_descr->pack_record( + table, m_pack_buffer, row_info->new_data, m_pk_packed_tuple, + row_info->new_pk_unpack_info, false, 0, 0, nullptr); } else if (row_info->old_data == nullptr) { row_info->hidden_pk_id = update_hidden_pk_val(); size = @@ -9593,30 +9611,32 @@ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) { return HA_EXIT_SUCCESS; } -int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id, +/** + Check the specified primary key value is unique and also lock the row + + @param[in] key_id key index + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] found whether the primary key exists before. + @param[out] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::check_and_lock_unique_pk(const uint key_id, const struct update_row_info &row_info, - bool *const found, - bool *const pk_changed) { + bool *const found) { DBUG_ASSERT(found != nullptr); - DBUG_ASSERT(pk_changed != nullptr); - *pk_changed = false; - - /* - For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs - always require locking. - */ - if (row_info.old_pk_slice.size() > 0) { - /* - If the keys are the same, then no lock is needed - */ - if (!row_info.new_pk_slice.compare(row_info.old_pk_slice)) { - *found = false; - return HA_EXIT_SUCCESS; - } + DBUG_ASSERT(row_info.old_pk_slice.size() == 0 || + row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0); - *pk_changed = true; - } + /* Ignore PK violations if this is a optimized 'replace into' */ +#ifdef MARIAROCKS_NOT_YET + const bool ignore_pk_unique_check = ha_thd()->lex->blind_replace_into; +#else + const bool ignore_pk_unique_check= false; +#endif /* Perform a read to determine if a duplicate entry exists. For primary @@ -9639,17 +9659,56 @@ int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id, */ const rocksdb::Status s = get_for_update(row_info.tx, m_pk_descr->get_cf(), row_info.new_pk_slice, - &m_retrieved_record); + ignore_pk_unique_check ? nullptr : &m_retrieved_record); if (!s.ok() && !s.IsNotFound()) { return row_info.tx->set_status_error( table->in_use, s, *m_key_descr_arr[key_id], m_tbl_def, m_table_handler); } - *found = !s.IsNotFound(); + bool key_found = ignore_pk_unique_check ? false : !s.IsNotFound(); + + /* + If the pk key has ttl, we may need to pretend the row wasn't + found if it is already expired. + */ + if (key_found && m_pk_descr->has_ttl() && + should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, + (row_info.tx->m_snapshot_timestamp + ? row_info.tx->m_snapshot_timestamp + : static_cast<int64_t>(std::time(nullptr))))) { + key_found = false; + } + + if (key_found && row_info.old_data == nullptr && m_insert_with_update) { + // In INSERT ON DUPLICATE KEY UPDATE ... case, if the insert failed + // due to a duplicate key, remember the last key and skip the check + // next time + m_dup_pk_found = true; + +#ifndef DBUG_OFF + // save it for sanity checking later + m_dup_pk_retrieved_record.copy(m_retrieved_record.data(), + m_retrieved_record.size(), &my_charset_bin); +#endif + } + + *found = key_found; + return HA_EXIT_SUCCESS; } -int ha_rocksdb::check_and_lock_sk(const uint &key_id, +/** + Check the specified secondary key value is unique and also lock the row + + @param[in] key_id key index + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] found whether specified key value exists before. + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::check_and_lock_sk(const uint key_id, const struct update_row_info &row_info, bool *const found) { DBUG_ASSERT(found != nullptr); @@ -9779,8 +9838,18 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, return HA_EXIT_SUCCESS; } +/** + Enumerate all keys to check their uniquess and also lock it + + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ int ha_rocksdb::check_uniqueness_and_lock( - const struct update_row_info &row_info, bool *const pk_changed) { + const struct update_row_info &row_info, bool pk_changed) { /* Go through each index and determine if the index has uniqueness requirements. If it does, then try to obtain a row lock on the new values. @@ -9792,7 +9861,12 @@ int ha_rocksdb::check_uniqueness_and_lock( int rc; if (is_pk(key_id, table, m_tbl_def)) { - rc = check_and_lock_unique_pk(key_id, row_info, &found, pk_changed); + if (row_info.old_pk_slice.size() > 0 && !pk_changed) { + found = false; + rc = HA_EXIT_SUCCESS; + } else { + rc = check_and_lock_unique_pk(key_id, row_info, &found); + } } else { rc = check_and_lock_sk(key_id, row_info, &found); } @@ -9801,23 +9875,11 @@ int ha_rocksdb::check_uniqueness_and_lock( return rc; } - /* - If the pk key has ttl, we may need to pretend the row wasn't - found if it is already expired. The pk record is read into - m_retrieved_record by check_and_lock_unique_pk(). - */ - if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() && - should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, - (row_info.tx->m_snapshot_timestamp - ? row_info.tx->m_snapshot_timestamp - : static_cast<int64_t>(std::time(nullptr))))) { - found = false; - } - if (found) { /* There is a row with this key already, so error out. */ errkey = key_id; m_dupp_errkey = errkey; + return HA_ERR_FOUND_DUPP_KEY; } } @@ -9825,19 +9887,31 @@ int ha_rocksdb::check_uniqueness_and_lock( return HA_EXIT_SUCCESS; } +/** + Check whether secondary key value is duplicate or not + + @param[in] table_arg the table currently working on + @param[in key_def the key_def is being checked + @param[in] key secondary key storage data + @param[out] sk_info hold secondary key memcmp datas(new/old) + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ + int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg, - const Rdb_key_def &index, + const Rdb_key_def &key_def, const rocksdb::Slice *key, struct unique_sk_buf_info *sk_info) { uint n_null_fields = 0; - const rocksdb::Comparator *index_comp = index.get_cf()->GetComparator(); + const rocksdb::Comparator *index_comp = key_def.get_cf()->GetComparator(); /* Get proper SK buffer. */ uchar *sk_buf = sk_info->swap_and_get_sk_buf(); /* Get memcmp form of sk without extended pk tail */ uint sk_memcmp_size = - index.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields); + key_def.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields); sk_info->sk_memcmp_key = rocksdb::Slice(reinterpret_cast<char *>(sk_buf), sk_memcmp_size); @@ -9857,12 +9931,17 @@ int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, const rocksdb::Slice &value, bool sort) { DBUG_ENTER_FUNC(); int res; + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); // In the case of unsorted inserts, m_sst_info allocated here is not // used to store the keys. It is still used to indicate when tables // are switched. - if (m_sst_info == nullptr || m_sst_info->is_committed()) { + if (m_sst_info == nullptr || m_sst_info->is_done()) { m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name, kd.get_name(), cf, *rocksdb_db_options, THDVAR(ha_thd(), trace_sst_api))); @@ -9895,17 +9974,61 @@ int ha_rocksdb::finalize_bulk_load(bool print_client_error) { /* Skip if there are no possible ongoing bulk loads */ if (m_sst_info) { - res = m_sst_info->commit(print_client_error); + if (m_sst_info->is_done()) { + m_sst_info.reset(); + DBUG_RETURN(res); + } + + Rdb_sst_info::Rdb_sst_commit_info commit_info; + + // Wrap up the current work in m_sst_info and get ready to commit + // This transfer the responsibility of commit over to commit_info + res = m_sst_info->finish(&commit_info, print_client_error); + if (res == 0) { + // Make sure we have work to do - under race condition we could lose + // to another thread and end up with no work + if (commit_info.has_work()) { + rocksdb::IngestExternalFileOptions opts; + opts.move_files = true; + opts.snapshot_consistency = false; + opts.allow_global_seqno = false; + opts.allow_blocking_flush = false; + + const rocksdb::Status s = rdb->IngestExternalFile( + commit_info.get_cf(), commit_info.get_committed_files(), opts); + if (!s.ok()) { + if (print_client_error) { + Rdb_sst_info::report_error_msg(s, nullptr); + } + res = HA_ERR_ROCKSDB_BULK_LOAD; + } else { + // Mark the list of SST files as committed, otherwise they'll get + // cleaned up when commit_info destructs + commit_info.commit(); + } + } + } m_sst_info.reset(); } DBUG_RETURN(res); } -int ha_rocksdb::update_pk(const Rdb_key_def &kd, - const struct update_row_info &row_info, - const bool &pk_changed) { - const uint key_id = kd.get_keyno(); - const bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def); +/** + Update an existing primary key record or write a new primary key record + + @param[in] kd the primary key is being update/write + @param[in] update_row_info hold all row data, such as old row data and + new row data + @param[in] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_pk(const Rdb_key_def &kd, + const struct update_row_info &row_info, + bool pk_changed) { + uint key_id = kd.get_keyno(); + bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def); ulonglong bytes_written = 0; /* @@ -9933,7 +10056,10 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, int rc = HA_EXIT_SUCCESS; rocksdb::Slice value_slice; /* Prepare the new record to be written into RocksDB */ - if ((rc = convert_record_to_storage_format(row_info, &value_slice))) { + if ((rc = m_converter->encode_value_slice( + m_pk_descr, row_info.new_pk_slice, row_info.new_pk_unpack_info, + !row_info.old_pk_slice.empty(), should_store_row_debug_checksums(), + m_ttl_bytes, &m_ttl_bytes_updated, &value_slice))) { return rc; } @@ -9953,7 +10079,9 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, row_info.tx->get_indexed_write_batch()->Put(cf, row_info.new_pk_slice, value_slice); } else { - const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice); + const bool assume_tracked = can_assume_tracked(ha_thd()); + const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice, + assume_tracked); if (!s.ok()) { if (s.IsBusy()) { errkey = table->s->primary_key; @@ -9973,9 +10101,22 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, return rc; } -int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, - const struct update_row_info &row_info, - const bool bulk_load_sk) { +/** + update an existing secondary key record or write a new secondary key record + + @param[in] table_arg Table we're working on + @param[in] kd The secondary key being update/write + @param[in] row_info data structure contains old row data and new row data + @param[in] bulk_load_sk whether support bulk load. Currently it is only + support for write + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_sk(const TABLE *const table_arg, + const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool bulk_load_sk) { int new_packed_size; int old_packed_size; int rc = HA_EXIT_SUCCESS; @@ -9997,19 +10138,18 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, return HA_EXIT_SUCCESS; } - const bool store_row_debug_checksums = should_store_row_debug_checksums(); - + bool store_row_debug_checksums = should_store_row_debug_checksums(); new_packed_size = kd.pack_record(table_arg, m_pack_buffer, row_info.new_data, m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums, - row_info.hidden_pk_id, 0, nullptr, nullptr, m_ttl_bytes); + row_info.hidden_pk_id, 0, nullptr, m_ttl_bytes); if (row_info.old_data != nullptr) { // The old value old_packed_size = kd.pack_record( table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old, &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0, - nullptr, nullptr, m_ttl_bytes); + nullptr, m_ttl_bytes); /* Check if we are going to write the same value. This can happen when @@ -10069,13 +10209,22 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, return rc; } -int ha_rocksdb::update_indexes(const struct update_row_info &row_info, - const bool &pk_changed) { +/** + Update existing indexes(PK/SKs) or write new indexes(PK/SKs) + + @param[in] row_info hold all row data, such as old key/new key + @param[in] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_indexes(const struct update_row_info &row_info, + const bool pk_changed) { int rc; bool bulk_load_sk; // The PK must be updated first to pull out the TTL value. - rc = update_pk(*m_pk_descr, row_info, pk_changed); + rc = update_write_pk(*m_pk_descr, row_info, pk_changed); if (rc != HA_EXIT_SUCCESS) { return rc; } @@ -10090,7 +10239,8 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info, continue; } - rc = update_sk(table, *m_key_descr_arr[key_id], row_info, bulk_load_sk); + rc = update_write_sk(table, *m_key_descr_arr[key_id], row_info, + bulk_load_sk); if (rc != HA_EXIT_SUCCESS) { return rc; } @@ -10099,11 +10249,26 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info, return HA_EXIT_SUCCESS; } +/** + Update an existing row or write a new row + + @param[in] old_data nullptr for write, non-null for update + @param[in] new_data non-null for write/update + @param[in] skip_unique_check whether to check uniqueness + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ int ha_rocksdb::update_write_row(const uchar *const old_data, const uchar *const new_data, const bool skip_unique_check) { DBUG_ENTER_FUNC(); + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + bool pk_changed = false; struct update_row_info row_info; @@ -10111,7 +10276,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, row_info.new_data = new_data; row_info.skip_unique_check = skip_unique_check; row_info.new_pk_unpack_info = nullptr; - set_last_rowkey(old_data); row_info.tx = get_or_create_tx(table->in_use); @@ -10132,12 +10296,20 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, DBUG_RETURN(rc); } + /* + For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs + always require locking. + */ + if (row_info.old_pk_slice.size() > 0) { + pk_changed = row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0; + } + if (!skip_unique_check) { /* Check to see if we are going to have failures because of unique keys. Also lock the appropriate key values. */ - rc = check_uniqueness_and_lock(row_info, &pk_changed); + rc = check_uniqueness_and_lock(row_info, pk_changed); if (rc != HA_EXIT_SUCCESS) { DBUG_RETURN(rc); } @@ -10151,7 +10323,7 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, here because updates to the transaction will be made and those updates cannot be easily removed without rolling back the entire transaction. */ - rc = update_indexes(row_info, pk_changed); + rc = update_write_indexes(row_info, pk_changed); if (rc != HA_EXIT_SUCCESS) { DBUG_RETURN(rc); } @@ -10192,18 +10364,29 @@ void ha_rocksdb::setup_iterator_bounds( const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len, uchar *const lower_bound, uchar *const upper_bound, rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice) { - uint min_len = std::min(eq_cond.size(), bound_len); - memcpy(upper_bound, eq_cond.data(), min_len); - kd.successor(upper_bound, min_len); - memcpy(lower_bound, eq_cond.data(), min_len); - kd.predecessor(lower_bound, min_len); + // If eq_cond is shorter than Rdb_key_def::INDEX_NUMBER_SIZE, we should be + // able to get better bounds just by using index id directly. + if (eq_cond.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) { + DBUG_ASSERT(bound_len == Rdb_key_def::INDEX_NUMBER_SIZE); + uint size; + kd.get_infimum_key(lower_bound, &size); + DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE); + kd.get_supremum_key(upper_bound, &size); + DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE); + } else { + DBUG_ASSERT(bound_len <= eq_cond.size()); + memcpy(upper_bound, eq_cond.data(), bound_len); + kd.successor(upper_bound, bound_len); + memcpy(lower_bound, eq_cond.data(), bound_len); + kd.predecessor(lower_bound, bound_len); + } if (kd.m_is_reverse_cf) { - *upper_bound_slice = rocksdb::Slice((const char *)lower_bound, min_len); - *lower_bound_slice = rocksdb::Slice((const char *)upper_bound, min_len); + *upper_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len); + *lower_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len); } else { - *upper_bound_slice = rocksdb::Slice((const char *)upper_bound, min_len); - *lower_bound_slice = rocksdb::Slice((const char *)lower_bound, min_len); + *upper_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len); + *lower_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len); } } @@ -10222,8 +10405,17 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, bool skip_bloom = true; const rocksdb::Slice eq_cond(slice->data(), eq_cond_len); + // The size of m_scan_it_lower_bound (and upper) is technically + // max_packed_sk_len as calculated in ha_rocksdb::alloc_key_buffers. Rather + // than recalculating that number, we pass in the max of eq_cond_len and + // Rdb_key_def::INDEX_NUMBER_SIZE which is guaranteed to be smaller than + // max_packed_sk_len, hence ensuring no buffer overrun. + // + // See ha_rocksdb::setup_iterator_bounds on how the bound_len parameter is + // used. if (check_bloom_and_set_bounds( - ha_thd(), kd, eq_cond, use_all_keys, eq_cond_len, + ha_thd(), kd, eq_cond, use_all_keys, + std::max(eq_cond_len, (uint)Rdb_key_def::INDEX_NUMBER_SIZE), m_scan_it_lower_bound, m_scan_it_upper_bound, &m_scan_it_lower_bound_slice, &m_scan_it_upper_bound_slice)) { skip_bloom = false; @@ -10261,7 +10453,8 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, m_scan_it_snapshot = rdb->GetSnapshot(); auto read_opts = rocksdb::ReadOptions(); - read_opts.total_order_seek = true; // TODO: set based on WHERE conditions + // TODO(mung): set based on WHERE conditions + read_opts.total_order_seek = true; read_opts.snapshot = m_scan_it_snapshot; m_scan_it = rdb->NewIterator(read_opts, kd.get_cf()); } else { @@ -10286,12 +10479,12 @@ void ha_rocksdb::release_scan_iterator() { void ha_rocksdb::setup_iterator_for_rnd_scan() { uint key_size; - int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); + int key_start_matching_bytes = + m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); - setup_scan_iterator(*m_pk_descr, &table_key, false, - key_start_matching_bytes); + setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes); m_scan_it->Seek(table_key); m_skip_scan_it_next_call = true; } @@ -10304,8 +10497,16 @@ void ha_rocksdb::setup_iterator_for_rnd_scan() { int ha_rocksdb::rnd_init(bool scan) { DBUG_ENTER_FUNC(); + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + Rdb_transaction *const tx = get_or_create_tx(table->in_use); - setup_read_decoders(); + + // when this table is being updated, decode all fields + m_converter->setup_field_decoders(table->read_set, + m_lock_rows == RDB_LOCK_WRITE); if (scan) { m_rnd_scan_is_new_snapshot = !tx->has_snapshot(); @@ -10333,9 +10534,9 @@ int ha_rocksdb::rnd_next(uchar *const buf) { int rc; for (;;) { rc = rnd_next_with_direction(buf, true); - if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !m_rnd_scan_is_new_snapshot) + if (!should_recreate_snapshot(rc, m_rnd_scan_is_new_snapshot)) { break; /* exit the loop */ - + } // release the snapshot and iterator and then regenerate them Rdb_transaction *tx = get_or_create_tx(table->in_use); tx->release_snapshot(); @@ -10345,8 +10546,7 @@ int ha_rocksdb::rnd_next(uchar *const buf) { m_rnd_scan_is_new_snapshot = false; - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -10358,6 +10558,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { DBUG_ENTER_FUNC(); int rc; + THD *thd = ha_thd(); table->status = STATUS_NOT_FOUND; #ifdef MARIAROCKS_NOT_YET @@ -10376,13 +10577,20 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { } for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_rnwd"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } + if (m_skip_scan_it_next_call) { m_skip_scan_it_next_call = false; } else { - if (move_forward) + if (move_forward) { m_scan_it->Next(); /* this call cannot fail */ - else + } else { m_scan_it->Prev(); /* this call cannot fail */ + } } if (!is_valid(m_scan_it)) { @@ -10416,11 +10624,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { const rocksdb::Status s = get_for_update(tx, m_pk_descr->get_cf(), key, &m_retrieved_record); if (s.IsNotFound() && - my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) { - // This occurs if we accessed a row, tried to lock it, failed, - // released and reacquired the snapshot (because of READ COMMITTED - // mode) and the row was deleted by someone else in the meantime. - // If so, we just want to move on to the next row. + should_skip_invalidated_record(HA_ERR_KEY_NOT_FOUND)) { continue; } @@ -10480,11 +10684,18 @@ int ha_rocksdb::rnd_end() { int ha_rocksdb::index_init(uint idx, bool sorted) { DBUG_ENTER_FUNC(); + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + Rdb_transaction *const tx = get_or_create_tx(table->in_use); DBUG_ASSERT(tx != nullptr); - setup_read_decoders(); - m_start_range= NULL; + m_start_range= NULL; + // when this table is being updated, decode all fields + m_converter->setup_field_decoders(table->read_set, + m_lock_rows == RDB_LOCK_WRITE); if (!m_keyread_only) { m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap); @@ -10528,10 +10739,20 @@ int ha_rocksdb::truncate() { DBUG_ASSERT(m_tbl_def != nullptr); - m_tbl_def->m_auto_incr_val = 1; - m_tbl_def->m_hidden_pk_val = 1; + // Save table name to use later + std::string table_name = m_tbl_def->full_tablename(); + + // Delete current table + int err = delete_table(m_tbl_def); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } - DBUG_RETURN(remove_rows(m_tbl_def)); + // Reset auto_increment_value to 1 if auto-increment feature is enabled + // By default, the starting valid value for auto_increment_value is 1 + DBUG_RETURN(create_table( + table_name, table, + table->found_next_number_field ? 1 : 0 /* auto_increment_value */)); } /* @@ -10609,9 +10830,11 @@ rocksdb::Status ha_rocksdb::delete_or_singledelete( uint index, Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key) { - if (can_use_single_delete(index)) - return tx->single_delete(column_family, key); - return tx->delete_key(column_family, key); + const bool assume_tracked = can_assume_tracked(ha_thd()); + if (can_use_single_delete(index)) { + return tx->single_delete(column_family, key, assume_tracked); + } + return tx->delete_key(column_family, key, assume_tracked); } void ha_rocksdb::update_stats(void) { @@ -10642,8 +10865,9 @@ void ha_rocksdb::update_stats(void) { int ha_rocksdb::info(uint flag) { DBUG_ENTER_FUNC(); - if (!table) + if (!table) { DBUG_RETURN(HA_EXIT_FAILURE); + } if (flag & HA_STATUS_VARIABLE) { /* @@ -10671,10 +10895,8 @@ int ha_rocksdb::info(uint flag) { // if number of records is hardcoded, we do not want to force computation // of memtable cardinalities - if (stats.records == 0 || - (rocksdb_force_compute_memtable_stats && - rocksdb_debug_optimizer_n_rows == 0)) - { + if (stats.records == 0 || (rocksdb_force_compute_memtable_stats && + rocksdb_debug_optimizer_n_rows == 0)) { // First, compute SST files stats uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; auto r = get_range(pk_index(table, m_tbl_def), buf); @@ -10684,8 +10906,8 @@ int ha_rocksdb::info(uint flag) { if (stats.records == 0) { rdb->GetApproximateSizes(m_pk_descr->get_cf(), &r, 1, &sz, include_flags); - stats.records+= sz/ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; - stats.data_file_length+= sz; + stats.records += sz / ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; + stats.data_file_length += sz; } // Second, compute memtable stats. This call is expensive, so cache // values computed for some time. @@ -10727,14 +10949,16 @@ int ha_rocksdb::info(uint flag) { if (stats.records == 0 && !(flag & (HA_STATUS_TIME | HA_STATUS_OPEN))) { stats.records++; } - - if (rocksdb_debug_optimizer_n_rows > 0) - stats.records = rocksdb_debug_optimizer_n_rows; } - if (stats.records != 0) + if (rocksdb_debug_optimizer_n_rows > 0) + stats.records = rocksdb_debug_optimizer_n_rows; + + if (stats.records != 0) { stats.mean_rec_length = stats.data_file_length / stats.records; + } } + if (flag & HA_STATUS_CONST) { ref_length = m_pk_descr->max_storage_fmt_length(); @@ -10760,8 +10984,7 @@ int ha_rocksdb::info(uint flag) { } else { x = 0; } - if (x > stats.records) - x = stats.records; + if (x > stats.records) x = stats.records; if ((x == 0 && rocksdb_debug_optimizer_no_zero_cardinality) || rocksdb_debug_optimizer_n_rows > 0) { // Fake cardinality implementation. For example, (idx1, idx2, idx3) @@ -10796,7 +11019,7 @@ int ha_rocksdb::info(uint flag) { uniqueness violation. */ errkey = m_dupp_errkey; - dup_ref = m_pk_tuple; // TODO(?): this should store packed PK. + dup_ref = m_pk_tuple; // TODO(?): this should store packed PK. } if (flag & HA_STATUS_AUTO) { @@ -10810,8 +11033,9 @@ void ha_rocksdb::position(const uchar *const record) { DBUG_ENTER_FUNC(); longlong hidden_pk_id = 0; - if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) - DBUG_ASSERT(false); // should never reach here + if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) { + DBUG_ASSERT(false); // should never reach here + } /* Get packed primary key value from the record. @@ -10833,8 +11057,9 @@ void ha_rocksdb::position(const uchar *const record) { It could be that mem-comparable form of PK occupies less than ref_length bytes. Fill the remainder with zeros. */ - if (ref_length > packed_size) + if (ref_length > packed_size) { memset(ref + packed_size, 0, ref_length - packed_size); + } DBUG_VOID_RETURN; } @@ -10892,8 +11117,7 @@ void ha_rocksdb::calc_updated_indexes() { /* Walk over all key parts, including the "extended key" suffix */ const uint key_parts = kd.get_key_parts(); for (uint kp = 0; kp < key_parts; kp++) { - if (has_hidden_pk(table) && kp + 1 == key_parts) - break; + if (has_hidden_pk(table) && kp + 1 == key_parts) break; Field *const field = kd.get_table_field_for_part_no(table, kp); if (bitmap_is_set(table->write_set, field->field_index)) { @@ -10906,6 +11130,9 @@ void ha_rocksdb::calc_updated_indexes() { } /** + Update an existing row + @param[in] old_data nullptr for write, non-null for update + @param[in] new_data non-null for write/update @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) @@ -10924,7 +11151,7 @@ int ha_rocksdb::update_row(const uchar *const old_data, */ DBUG_ASSERT(new_data == table->record[0]); - const int rv = update_write_row(old_data, new_data, false); + const int rv = update_write_row(old_data, new_data, skip_unique_check()); if (rv == 0) { #ifdef MARIAROCKS_NOT_YET @@ -11029,7 +11256,8 @@ THR_LOCK_DATA **ha_rocksdb::store_lock(THD *const thd, THR_LOCK_DATA **to, void ha_rocksdb::read_thd_vars(THD *const thd) { m_store_row_debug_checksums = THDVAR(thd, store_row_debug_checksums); - m_verify_row_debug_checksums = THDVAR(thd, verify_row_debug_checksums); + m_converter->set_verify_row_debug_checksums( + THDVAR(thd, verify_row_debug_checksums)); m_checksums_pct = THDVAR(thd, checksums_pct); } @@ -11120,7 +11348,8 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) { tx->io_perf_end_and_record(&m_io_perf); tx->m_n_mysql_tables_in_use--; if (tx->m_n_mysql_tables_in_use == 0 && - !my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { + !my_core::thd_test_options(thd, + OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { /* Do like InnoDB: when we get here, it's time to commit a single-statement transaction. @@ -11252,9 +11481,8 @@ rocksdb::Range get_range(const Rdb_key_def &kd, } } -rocksdb::Range -ha_rocksdb::get_range(const int &i, - uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const { +rocksdb::Range ha_rocksdb::get_range( + const int i, uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const { return myrocks::get_range(*m_key_descr_arr[i], buf); } @@ -11265,11 +11493,10 @@ ha_rocksdb::get_range(const int &i, but in drop_index_thread's case, it means index is marked as removed, so no further seek will happen for the index id. */ -static bool is_myrocks_index_empty( - rocksdb::ColumnFamilyHandle *cfh, const bool is_reverse_cf, - const rocksdb::ReadOptions &read_opts, - const uint index_id) -{ +static bool is_myrocks_index_empty(rocksdb::ColumnFamilyHandle *cfh, + const bool is_reverse_cf, + const rocksdb::ReadOptions &read_opts, + const uint index_id) { bool index_removed = false; uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; rdb_netbuf_store_uint32(key_buf, index_id); @@ -11280,8 +11507,7 @@ static bool is_myrocks_index_empty( if (!it->Valid()) { index_removed = true; } else { - if (memcmp(it->key().data(), key_buf, - Rdb_key_def::INDEX_NUMBER_SIZE)) { + if (memcmp(it->key().data(), key_buf, Rdb_key_def::INDEX_NUMBER_SIZE)) { // Key does not have same prefix index_removed = true; } @@ -11308,8 +11534,8 @@ void Rdb_drop_index_thread::run() { timespec ts; int sec= dict_manager.is_drop_index_empty() - ? 24 * 60 * 60 // no filtering - : 60; // filtering + ? 24 * 60 * 60 // no filtering + : 60; // filtering set_timespec(ts,sec); const auto ret MY_ATTRIBUTE((__unused__)) = @@ -11326,26 +11552,23 @@ void Rdb_drop_index_thread::run() { if (!indices.empty()) { std::unordered_set<GL_INDEX_ID> finished; rocksdb::ReadOptions read_opts; - read_opts.total_order_seek = true; // disable bloom filter + read_opts.total_order_seek = true; // disable bloom filter for (const auto d : indices) { uint32 cf_flags = 0; if (!dict_manager.get_cf_flags(d.cf_id, &cf_flags)) { - sql_print_error("RocksDB: Failed to get column family flags " - "from cf id %u. MyRocks data dictionary may " - "get corrupted.", - d.cf_id); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Failed to get column family flags " + "from cf id %u. MyRocks data dictionary may " + "get corrupted.", + d.cf_id); abort(); } rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id); DBUG_ASSERT(cfh); const bool is_reverse_cf = cf_flags & Rdb_key_def::REVERSE_CF_FLAG; - if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) - { - finished.insert(d); - continue; - } uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0, is_reverse_cf ? 0 : 1); @@ -11365,8 +11588,7 @@ void Rdb_drop_index_thread::run() { } rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD); } - if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) - { + if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) { finished.insert(d); } } @@ -11393,28 +11615,24 @@ Rdb_tbl_def *ha_rocksdb::get_table_if_exists(const char *const tablename) { } /* - Note: the following function is called when the table is not open. That is, - this->table==nullptr, pk_key_descr==nullptr, etc. + Overload func for delete table ---it deletes table meta data in data + dictionary immediately and delete real data in background thread(async) - tablename points to line in form "./dbname/tablename". + @param tbl IN MyRocks table definition @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) */ - -int ha_rocksdb::delete_table(const char *const tablename) { +int ha_rocksdb::delete_table(Rdb_tbl_def *const tbl) { DBUG_ENTER_FUNC(); - DBUG_ASSERT(tablename != nullptr); + DBUG_ASSERT(tbl != nullptr); + DBUG_ASSERT(m_tbl_def == nullptr || m_tbl_def == tbl); const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); rocksdb::WriteBatch *const batch = wb.get(); - /* Find the table in the hash */ - Rdb_tbl_def *const tbl = get_table_if_exists(tablename); - if (!tbl) - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); dict_manager.add_drop_table(tbl->m_key_descr_arr, tbl->m_key_count, batch); /* @@ -11429,10 +11647,36 @@ int ha_rocksdb::delete_table(const char *const tablename) { } rdb_drop_idx_thread.signal(); - + // avoid dangling pointer + m_tbl_def = nullptr; DBUG_RETURN(HA_EXIT_SUCCESS); } +/* + Note: the following function is called when the table is not open. That is, + this->table==nullptr, pk_key_descr==nullptr, etc. + + tablename points to line in form "./dbname/tablename". + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ + +int ha_rocksdb::delete_table(const char *const tablename) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(tablename != nullptr); + + /* Find the table in the hash */ + Rdb_tbl_def *const tbl = get_table_if_exists(tablename); + if (!tbl) { + DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + } + + DBUG_RETURN(delete_table(tbl)); +} + int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) { const rocksdb::WriteOptions wo = rdb_get_rocksdb_write_options(handler::ha_thd()); @@ -11583,21 +11827,33 @@ int ha_rocksdb::extra(enum ha_extra_function operation) { DBUG_ENTER_FUNC(); switch (operation) { - case HA_EXTRA_KEYREAD: - m_keyread_only = true; - break; - case HA_EXTRA_NO_KEYREAD: - m_keyread_only = false; - break; - case HA_EXTRA_FLUSH: - /* - If the table has blobs, then they are part of m_retrieved_record. - This call invalidates them. - */ - m_retrieved_record.Reset(); - break; - default: - break; + case HA_EXTRA_KEYREAD: + m_keyread_only = true; + break; + case HA_EXTRA_NO_KEYREAD: + m_keyread_only = false; + break; + case HA_EXTRA_FLUSH: + /* + If the table has blobs, then they are part of m_retrieved_record. + This call invalidates them. + */ + m_retrieved_record.Reset(); + break; + case HA_EXTRA_INSERT_WITH_UPDATE: + // INSERT ON DUPLICATE KEY UPDATE + if (rocksdb_enable_insert_with_update_caching) { + m_insert_with_update = true; + } + break; + case HA_EXTRA_NO_IGNORE_DUP_KEY: + // PAIRED with HA_EXTRA_INSERT_WITH_UPDATE or HA_EXTRA_WRITE_CAN_REPLACE + // that indicates the end of REPLACE / INSERT ON DUPLICATE KEY + m_insert_with_update = false; + break; + + default: + break; } DBUG_RETURN(HA_EXIT_SUCCESS); @@ -11668,8 +11924,7 @@ ha_rows ha_rocksdb::records_in_range(uint inx, key_range *const min_key, uint64_t sz = 0; auto disk_size = kd.m_stats.m_actual_disk_size; - if (disk_size == 0) - disk_size = kd.m_stats.m_data_size; + if (disk_size == 0) disk_size = kd.m_stats.m_data_size; auto rows = kd.m_stats.m_rows; if (rows == 0 || disk_size == 0) { rows = 1; @@ -11780,7 +12035,7 @@ static int calculate_stats( // get RocksDB table properties for these ranges rocksdb::TablePropertiesCollection props; - for (auto it : ranges) { + for (const auto &it : ranges) { const auto old_size MY_ATTRIBUTE((__unused__)) = props.size(); const auto status = rdb->GetPropertiesOfTablesInRange( it.first, &it.second[0], it.second.size(), &props); @@ -11930,6 +12185,7 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, an actual reserve of some values might be a better solution. */ DEBUG_SYNC(ha_thd(), "rocksdb.autoinc_vars"); + DEBUG_SYNC(ha_thd(), "rocksdb.autoinc_vars2"); if (off > inc) { off = 1; @@ -11970,73 +12226,77 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, // The next value can be more complicated if either 'inc' or 'off' is not 1 ulonglong last_val = auto_incr; - // Loop until we can correctly update the atomic value - do { - DBUG_ASSERT(last_val > 0); - // Calculate the next value in the auto increment series: offset - // + N * increment where N is 0, 1, 2, ... - // - // For further information please visit: - // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html - // - // The following is confusing so here is an explanation: - // To get the next number in the sequence above you subtract out the - // offset, calculate the next sequence (N * increment) and then add the - // offset back in. - // - // The additions are rearranged to avoid overflow. The following is - // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact - // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why: - // - // (a+b)/c - // = (a - a%c + a%c + b - b%c + b%c) / c - // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c - // = a/c + b/c + (a%c + b%c) / c - // - // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the - // following statement. - ulonglong n = - (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc; - - // Check if n * inc + off will overflow. This can only happen if we have - // an UNSIGNED BIGINT field. - if (n > (std::numeric_limits<ulonglong>::max() - off) / inc) { - DBUG_ASSERT(max_val == std::numeric_limits<ulonglong>::max()); - // The 'last_val' value is already equal to or larger than the largest - // value in the sequence. Continuing would wrap around (technically - // the behavior would be undefined). What should we do? - // We could: - // 1) set the new value to the last possible number in our sequence - // as described above. The problem with this is that this - // number could be smaller than a value in an existing row. - // 2) set the new value to the largest possible number. This number - // may not be in our sequence, but it is guaranteed to be equal - // to or larger than any other value already inserted. + if (last_val > max_val) { + new_val = std::numeric_limits<ulonglong>::max(); + } else { + // Loop until we can correctly update the atomic value + do { + DBUG_ASSERT(last_val > 0); + // Calculate the next value in the auto increment series: offset + // + N * increment where N is 0, 1, 2, ... // - // For now I'm going to take option 2. + // For further information please visit: + // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html // - // Returning ULLONG_MAX from get_auto_increment will cause the SQL - // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to - // the SE API for get_auto_increment, inserts will fail with - // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but - // inserts will fail with ER_DUP_ENTRY for other types (or no failure - // if the column is in a non-unique SK). - new_val = std::numeric_limits<ulonglong>::max(); - auto_incr = new_val; // Store the largest value into auto_incr - break; - } + // The following is confusing so here is an explanation: + // To get the next number in the sequence above you subtract out the + // offset, calculate the next sequence (N * increment) and then add the + // offset back in. + // + // The additions are rearranged to avoid overflow. The following is + // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact + // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why: + // + // (a+b)/c + // = (a - a%c + a%c + b - b%c + b%c) / c + // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c + // = a/c + b/c + (a%c + b%c) / c + // + // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the + // following statement. + ulonglong n = + (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc; + + // Check if n * inc + off will overflow. This can only happen if we have + // an UNSIGNED BIGINT field. + if (n > (std::numeric_limits<ulonglong>::max() - off) / inc) { + DBUG_ASSERT(max_val == std::numeric_limits<ulonglong>::max()); + // The 'last_val' value is already equal to or larger than the largest + // value in the sequence. Continuing would wrap around (technically + // the behavior would be undefined). What should we do? + // We could: + // 1) set the new value to the last possible number in our sequence + // as described above. The problem with this is that this + // number could be smaller than a value in an existing row. + // 2) set the new value to the largest possible number. This number + // may not be in our sequence, but it is guaranteed to be equal + // to or larger than any other value already inserted. + // + // For now I'm going to take option 2. + // + // Returning ULLONG_MAX from get_auto_increment will cause the SQL + // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to + // the SE API for get_auto_increment, inserts will fail with + // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but + // inserts will fail with ER_DUP_ENTRY for other types (or no failure + // if the column is in a non-unique SK). + new_val = std::numeric_limits<ulonglong>::max(); + auto_incr = new_val; // Store the largest value into auto_incr + break; + } - new_val = n * inc + off; + new_val = n * inc + off; - // Attempt to store the new value (plus 1 since m_auto_incr_val contains - // the next available value) into the atomic value. If the current - // value no longer matches what we have in 'last_val' this will fail and - // we will repeat the loop (`last_val` will automatically get updated - // with the current value). - // - // See above explanation for inc == 1 for why we use std::min. - } while (!auto_incr.compare_exchange_weak(last_val, - std::min(new_val + 1, max_val))); + // Attempt to store the new value (plus 1 since m_auto_incr_val contains + // the next available value) into the atomic value. If the current + // value no longer matches what we have in 'last_val' this will fail and + // we will repeat the loop (`last_val` will automatically get updated + // with the current value). + // + // See above explanation for inc == 1 for why we use std::min. + } while (!auto_incr.compare_exchange_weak( + last_val, std::min(new_val + 1, max_val))); + } } *first_value = new_val; @@ -12052,13 +12312,13 @@ const char *dbug_print_item(Item *const item) { char *const buf = dbug_item_print_buf; String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin); str.length(0); - if (!item) - return "(Item*)nullptr"; + if (!item) return "(Item*)nullptr"; item->print(&str, QT_ORDINARY); - if (str.c_ptr() == buf) + if (str.c_ptr() == buf) { return buf; - else + } else { return "Couldn't fit into buffer"; + } } #endif /*DBUG_OFF*/ @@ -12187,7 +12447,6 @@ bool ha_rocksdb::prepare_inplace_alter_table( ALTER_DROP_UNIQUE_INDEX | ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX | ALTER_ADD_UNIQUE_INDEX)) { - if (has_hidden_pk(altered_table)) { new_n_keys += 1; } @@ -12473,7 +12732,7 @@ int ha_rocksdb::inplace_populate_sk( const int new_packed_size = index->pack_record( new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple, &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0, - nullptr, nullptr, m_ttl_bytes); + nullptr, m_ttl_bytes); const rocksdb::Slice key = rocksdb::Slice( reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size); @@ -12521,9 +12780,9 @@ int ha_rocksdb::inplace_populate_sk( is used inside print_keydup_error so that the error message shows the duplicate record. */ - if (index->unpack_record(new_table_arg, new_table_arg->record[0], - &merge_key, nullptr, - m_verify_row_debug_checksums)) { + if (index->unpack_record( + new_table_arg, new_table_arg->record[0], &merge_key, + &merge_val, m_converter->get_verify_row_debug_checksums())) { /* Should never reach here */ DBUG_ASSERT(0); } @@ -12553,7 +12812,9 @@ int ha_rocksdb::inplace_populate_sk( DBUG_RETURN(res); } - if ((res = tx->finish_bulk_load())) { + bool is_critical_error; + res = tx->finish_bulk_load(&is_critical_error); + if (res && is_critical_error) { // NO_LINT_DEBUG sql_print_error("Error finishing bulk load."); DBUG_RETURN(res); @@ -12762,22 +13023,22 @@ bool ha_rocksdb::commit_inplace_alter_table( #define SHOW_FNAME(name) rocksdb_show_##name -#define DEF_SHOW_FUNC(name, key) \ - static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \ - rocksdb_status_counters.name = \ - rocksdb_stats->getTickerCount(rocksdb::key); \ - var->type = SHOW_LONGLONG; \ - var->value = (char *)&rocksdb_status_counters.name; \ - return HA_EXIT_SUCCESS; \ +#define DEF_SHOW_FUNC(name, key) \ + static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \ + rocksdb_status_counters.name = \ + rocksdb_stats->getTickerCount(rocksdb::key); \ + var->type = SHOW_LONGLONG; \ + var->value = reinterpret_cast<char *>(&rocksdb_status_counters.name); \ + return HA_EXIT_SUCCESS; \ } -#define DEF_STATUS_VAR(name) \ +#define DEF_STATUS_VAR(name) \ { "rocksdb_" #name, (char *)&SHOW_FNAME(name), SHOW_FUNC } -#define DEF_STATUS_VAR_PTR(name, ptr, option) \ +#define DEF_STATUS_VAR_PTR(name, ptr, option) \ { "rocksdb_" name, (char *)ptr, option } -#define DEF_STATUS_VAR_FUNC(name, ptr, option) \ +#define DEF_STATUS_VAR_FUNC(name, ptr, option) \ { name, reinterpret_cast<char *>(ptr), option } struct rocksdb_status_counters_t { @@ -13007,9 +13268,8 @@ static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) { var->value = reinterpret_cast<char *>(&myrocks_status_variables); } -static ulonglong -io_stall_prop_value(const std::map<std::string, std::string> &props, - const std::string &key) { +static ulonglong io_stall_prop_value( + const std::map<std::string, std::string> &props, const std::string &key) { std::map<std::string, std::string>::const_iterator iter = props.find("io_stalls." + key); if (iter != props.end()) { @@ -13187,6 +13447,10 @@ static SHOW_VAR rocksdb_status_vars[] = { SHOW_LONGLONG), DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other, SHOW_LONGLONG), +#ifndef DBUG_OFF + DEF_STATUS_VAR_PTR("num_get_for_update_calls", + &rocksdb_num_get_for_update_calls, SHOW_LONGLONG), +#endif // the variables generated by SHOW_FUNC are sorted only by prefix (first // arg in the tuple below), so make sure it is unique to make sorting // deterministic as quick sort is not stable @@ -13428,6 +13692,49 @@ bool Rdb_manual_compaction_thread::is_manual_compaction_finished(int mc_id) { return finished; } +/** + * Locking read + Not Found + Read Committed occurs if we accessed + * a row by Seek, tried to lock it, failed, released and reacquired the + * snapshot (because of READ COMMITTED mode) and the row was deleted by + * someone else in the meantime. + * If so, we either just skipping the row, or re-creating a snapshot + * and seek again. In both cases, Read Committed constraint is not broken. + */ +bool ha_rocksdb::should_skip_invalidated_record(const int rc) { + if ((m_lock_rows != RDB_LOCK_NONE && rc == HA_ERR_KEY_NOT_FOUND && + my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED)) { + return true; + } + return false; +} +/** + * Indicating snapshot needs to be re-created and retrying seek again, + * instead of returning errors or empty set. This is normally applicable + * when hitting kBusy when locking the first row of the transaction, + * with Repeatable Read isolation level. + */ +bool ha_rocksdb::should_recreate_snapshot(const int rc, + const bool is_new_snapshot) { + if (should_skip_invalidated_record(rc) || + (rc == HA_ERR_ROCKSDB_STATUS_BUSY && is_new_snapshot)) { + return true; + } + return false; +} + +/** + * If calling put/delete/singledelete without locking the row, + * it is necessary to pass assume_tracked=false to RocksDB TX API. + * Read Free Replication and Blind Deletes are the cases when + * using TX API and skipping row locking. + */ +bool ha_rocksdb::can_assume_tracked(THD *thd) { + if (/* MARIAROCKS_NOT_YET use_read_free_rpl() ||*/ (THDVAR(thd, blind_delete_primary_key))) { + return false; + } + return true; +} + bool ha_rocksdb::check_bloom_and_set_bounds( THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, const bool use_all_keys, size_t bound_len, uchar *const lower_bound, @@ -13488,20 +13795,22 @@ bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, shorter require all parts of the key to be available for the short key match. */ - if ((use_all_keys && prefix_extractor->InRange(eq_cond)) - || prefix_extractor->SameResultWhenAppended(eq_cond)) + if ((use_all_keys && prefix_extractor->InRange(eq_cond)) || + prefix_extractor->SameResultWhenAppended(eq_cond)) { can_use = true; - else + } else { can_use = false; + } } else { /* if prefix extractor is not defined, all key parts have to be used by eq_cond. */ - if (use_all_keys) + if (use_all_keys) { can_use = true; - else + } else { can_use = false; + } } return can_use; @@ -13520,7 +13829,7 @@ bool rdb_is_ttl_enabled() { return rocksdb_enable_ttl; } bool rdb_is_ttl_read_filtering_enabled() { return rocksdb_enable_ttl_read_filtering; } -#ifndef NDEBUG +#ifndef DBUG_OFF int rdb_dbug_set_ttl_rec_ts() { return rocksdb_debug_ttl_rec_ts; } int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; } int rdb_dbug_set_ttl_read_filter_ts() { @@ -13567,17 +13876,17 @@ const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) { static_assert(RDB_IO_ERROR_LAST == 4, "Please handle all the error types."); switch (err_type) { - case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT: - return "RDB_IO_ERROR_TX_COMMIT"; - case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT: - return "RDB_IO_ERROR_DICT_COMMIT"; - case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD: - return "RDB_IO_ERROR_BG_THREAD"; - case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL: - return "RDB_IO_ERROR_GENERAL"; - default: - DBUG_ASSERT(false); - return "(unknown)"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT: + return "RDB_IO_ERROR_TX_COMMIT"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT: + return "RDB_IO_ERROR_DICT_COMMIT"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD: + return "RDB_IO_ERROR_BG_THREAD"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL: + return "RDB_IO_ERROR_GENERAL"; + default: + DBUG_ASSERT(false); + return "(unknown)"; } } @@ -13589,32 +13898,38 @@ const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) { void rdb_handle_io_error(const rocksdb::Status status, const RDB_IO_ERROR_TYPE err_type) { if (status.IsIOError()) { - switch (err_type) { - case RDB_IO_ERROR_TX_COMMIT: - case RDB_IO_ERROR_DICT_COMMIT: { - rdb_log_status_error(status, "failed to write to WAL"); - /* NO_LINT_DEBUG */ - sql_print_error("MyRocks: aborting on WAL write error."); - abort(); - break; - } - case RDB_IO_ERROR_BG_THREAD: { - rdb_log_status_error(status, "BG thread failed to write to RocksDB"); - /* NO_LINT_DEBUG */ - sql_print_error("MyRocks: aborting on BG write error."); - abort(); - break; - } - case RDB_IO_ERROR_GENERAL: { - rdb_log_status_error(status, "failed on I/O"); - /* NO_LINT_DEBUG */ - sql_print_error("MyRocks: aborting on I/O error."); - abort(); - break; + /* skip dumping core if write failed and we are allowed to do so */ +#ifdef MARIAROCKS_NOT_YET + if (skip_core_dump_on_error) { + opt_core_file = false; } - default: - DBUG_ASSERT(0); - break; +#endif + switch (err_type) { + case RDB_IO_ERROR_TX_COMMIT: + case RDB_IO_ERROR_DICT_COMMIT: { + rdb_log_status_error(status, "failed to write to WAL"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on WAL write error."); + abort(); + break; + } + case RDB_IO_ERROR_BG_THREAD: { + rdb_log_status_error(status, "BG thread failed to write to RocksDB"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on BG write error."); + abort(); + break; + } + case RDB_IO_ERROR_GENERAL: { + rdb_log_status_error(status, "failed on I/O"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on I/O error."); + abort(); + break; + } + default: + DBUG_ASSERT(0); + break; } } else if (status.IsCorruption()) { rdb_log_status_error(status, "data corruption detected!"); @@ -13624,16 +13939,16 @@ void rdb_handle_io_error(const rocksdb::Status status, abort(); } else if (!status.ok()) { switch (err_type) { - case RDB_IO_ERROR_DICT_COMMIT: { - rdb_log_status_error(status, "Failed to write to WAL (dictionary)"); - /* NO_LINT_DEBUG */ - sql_print_error("MyRocks: aborting on WAL write error."); - abort(); - break; - } - default: - rdb_log_status_error(status, "Failed to read/write in RocksDB"); - break; + case RDB_IO_ERROR_DICT_COMMIT: { + rdb_log_status_error(status, "Failed to write to WAL (dictionary)"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on WAL write error."); + abort(); + break; + } + default: + rdb_log_status_error(status, "Failed to read/write in RocksDB"); + break; } } } @@ -13741,9 +14056,10 @@ void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var, if (!s.ok()) { /* NO_LINT_DEBUG */ - sql_print_warning("MyRocks: failed to update delayed_write_rate. " - "status code = %d, status = %s", - s.code(), s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to update delayed_write_rate. " + "status code = %d, status = %s", + s.code(), s.ToString().c_str()); } } RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); @@ -13801,8 +14117,7 @@ int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) { } else if (new_value_type == MYSQL_VALUE_TYPE_INT) { long long intbuf; value->val_int(value, &intbuf); - if (intbuf > 1) - return 1; + if (intbuf > 1) return 1; *return_value = intbuf > 0 ? TRUE : FALSE; } else { return 1; @@ -13821,12 +14136,14 @@ int rocksdb_check_bulk_load( Rdb_transaction *tx = get_tx_from_thd(thd); if (tx != nullptr) { - const int rc = tx->finish_bulk_load(); - if (rc != 0) { + bool is_critical_error; + const int rc = tx->finish_bulk_load(&is_critical_error); + if (rc != 0 && is_critical_error) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Error %d finalizing last SST file while " - "setting bulk loading variable", - rc); + sql_print_error( + "RocksDB: Error %d finalizing last SST file while " + "setting bulk loading variable", + rc); THDVAR(thd, bulk_load) = 0; return 1; } @@ -13874,9 +14191,10 @@ static void rocksdb_set_max_background_jobs(THD *thd, if (!s.ok()) { /* NO_LINT_DEBUG */ - sql_print_warning("MyRocks: failed to update max_background_jobs. " - "Status code = %d, status = %s.", - s.code(), s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); } } @@ -13902,9 +14220,10 @@ static void rocksdb_set_bytes_per_sync( if (!s.ok()) { /* NO_LINT_DEBUG */ - sql_print_warning("MyRocks: failed to update max_background_jobs. " - "Status code = %d, status = %s.", - s.code(), s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); } } @@ -13930,9 +14249,10 @@ static void rocksdb_set_wal_bytes_per_sync( if (!s.ok()) { /* NO_LINT_DEBUG */ - sql_print_warning("MyRocks: failed to update max_background_jobs. " - "Status code = %d, status = %s.", - s.code(), s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); } } @@ -13959,7 +14279,7 @@ static int rocksdb_validate_set_block_cache_size( } if (new_value < RDB_MIN_BLOCK_CACHE_SIZE || - (uint64_t)new_value > (uint64_t)LONGLONG_MAX) { + (uint64_t)new_value > (uint64_t)LLONG_MAX) { return HA_EXIT_FAILURE; } @@ -13975,17 +14295,19 @@ static int rocksdb_validate_set_block_cache_size( return HA_EXIT_SUCCESS; } -static int -rocksdb_validate_update_cf_options(THD * /* unused */, - struct st_mysql_sys_var * /*unused*/, - void *save, struct st_mysql_value *value) { - +static int rocksdb_validate_update_cf_options( + THD * /* unused */, struct st_mysql_sys_var * /*unused*/, void *save, + struct st_mysql_value *value) { char buff[STRING_BUFFER_USUAL_SIZE]; const char *str; int length; length = sizeof(buff); str = value->val_str(value, buff, &length); - *(const char **)save = str; + // In some cases, str can point to buff in the stack. + // This can cause invalid memory access after validation is finished. + // To avoid this kind case, let's alway duplicate the str if str is not + // nullptr + *(const char **)save = (str == nullptr) ? nullptr : my_strdup(str, MYF(0)); if (str == nullptr) { return HA_EXIT_SUCCESS; @@ -13999,13 +14321,17 @@ rocksdb_validate_update_cf_options(THD * /* unused */, my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str); return HA_EXIT_FAILURE; } + // Loop through option_map and create missing column families + for (Rdb_cf_options::Name_to_config_t::iterator it = option_map.begin(); + it != option_map.end(); ++it) { + cf_manager.get_or_create_cf(rdb, it->first); + } return HA_EXIT_SUCCESS; } -static void -rocksdb_set_update_cf_options(THD *const /* unused */, - struct st_mysql_sys_var *const /* unused */, - void *const var_ptr, const void *const save) { +static void rocksdb_set_update_cf_options( + THD *const /* unused */, struct st_mysql_sys_var *const /* unused */, + void *const var_ptr, const void *const save) { const char *const val = *static_cast<const char *const *>(save); RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); @@ -14023,7 +14349,7 @@ rocksdb_set_update_cf_options(THD *const /* unused */, // Reset the pointers regardless of how much success we had with updating // the CF options. This will results in consistent behavior and avoids // dealing with cases when only a subset of CF-s was successfully updated. - *reinterpret_cast<char **>(var_ptr) = my_strdup(val, MYF(0)); + *reinterpret_cast<const char **>(var_ptr) = val; // Do the real work of applying the changes. Rdb_cf_options::Name_to_config_t option_map; @@ -14051,9 +14377,10 @@ rocksdb_set_update_cf_options(THD *const /* unused */, if (s != rocksdb::Status::OK()) { // NO_LINT_DEBUG - sql_print_warning("MyRocks: failed to convert the options for column " - "family '%s' to a map. %s", cf_name.c_str(), - s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to convert the options for column " + "family '%s' to a map. %s", + cf_name.c_str(), s.ToString().c_str()); } else { DBUG_ASSERT(rdb != nullptr); @@ -14062,14 +14389,16 @@ rocksdb_set_update_cf_options(THD *const /* unused */, if (s != rocksdb::Status::OK()) { // NO_LINT_DEBUG - sql_print_warning("MyRocks: failed to apply the options for column " - "family '%s'. %s", cf_name.c_str(), - s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to apply the options for column " + "family '%s'. %s", + cf_name.c_str(), s.ToString().c_str()); } else { // NO_LINT_DEBUG - sql_print_information("MyRocks: options for column family '%s' " - "have been successfully updated.", - cf_name.c_str()); + sql_print_information( + "MyRocks: options for column family '%s' " + "have been successfully updated.", + cf_name.c_str()); // Make sure that data is internally consistent as well and update // the CF options. This is necessary also to make sure that the CF @@ -14132,18 +14461,39 @@ void ha_rocksdb::rpl_after_update_rows() { DBUG_VOID_RETURN; } +#if 0 +bool ha_rocksdb::is_read_free_rpl_table() const { + return table->s && m_tbl_def->m_is_read_free_rpl_table; +} +#endif + /** @brief - Read Free Replication can be used or not. Returning False means - Read Free Replication can be used. Read Free Replication can be used - on UPDATE or DELETE row events, and table must have user defined - primary key. + Read Free Replication can be used or not. Returning true means + Read Free Replication can be used. */ -bool ha_rocksdb::use_read_free_rpl() { +bool ha_rocksdb::use_read_free_rpl() const { DBUG_ENTER_FUNC(); - DBUG_RETURN((m_in_rpl_delete_rows || m_in_rpl_update_rows) && - !has_hidden_pk(table) && m_use_read_free_rpl); + if (!ha_thd()->rli_slave || table->triggers || /* !is_read_free_rpl_table()*/ ) { + DBUG_RETURN(false); + } + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + switch (rocksdb_read_free_rpl) { + case read_free_rpl_type::OFF: + DBUG_RETURN(false); + case read_free_rpl_type::PK_ONLY: + DBUG_RETURN(!has_hidden_pk(table) && table->s->keys == 1); + case read_free_rpl_type::PK_SK: + DBUG_RETURN(!has_hidden_pk(table)); + } +#else + DBUG_RETURN(false); +#endif + + DBUG_ASSERT(false); + DBUG_RETURN(false); } #endif // MARIAROCKS_NOT_YET @@ -14182,7 +14532,7 @@ void sql_print_verbose_info(const char *format, ...) } } -} // namespace myrocks +} // namespace myrocks /** diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index c5a0afef275..1abdc345527 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -50,7 +50,9 @@ #include "rocksdb/utilities/write_batch_with_index.h" /* MyRocks header files */ +#include "./rdb_buff.h" #include "./rdb_comparator.h" +#include "./rdb_global.h" #include "./rdb_index_merge.h" #include "./rdb_io_watchdog.h" #include "./rdb_perf_context.h" @@ -58,6 +60,8 @@ #include "./rdb_utils.h" #include "./ut0counter.h" +#include "rdb_mariadb_server_port.h" + class Field_blob; class Field_varstring; /** @@ -69,181 +73,13 @@ class Field_varstring; namespace myrocks { -/* - * class for exporting transaction information for - * information_schema.rocksdb_trx - */ -struct Rdb_trx_info { - std::string name; - ulonglong trx_id; - ulonglong write_count; - ulonglong lock_count; - int timeout_sec; - std::string state; - std::string waiting_key; - ulonglong waiting_cf_id; - int is_replication; - int skip_trx_api; - int read_only; - int deadlock_detect; - int num_ongoing_bulk_load; - ulong thread_id; - std::string query_str; -}; - -std::vector<Rdb_trx_info> rdb_get_all_trx_info(); - -/* - * class for exporting deadlock transaction information for - * information_schema.rocksdb_deadlock - */ -struct Rdb_deadlock_info { - struct Rdb_dl_trx_info { - ulonglong trx_id; - std::string cf_name; - std::string waiting_key; - bool exclusive_lock; - std::string index_name; - std::string table_name; - }; - std::vector <Rdb_dl_trx_info> path; - int64_t deadlock_time; - ulonglong victim_trx_id; -}; - -std::vector<Rdb_deadlock_info> rdb_get_deadlock_info(); - -/* - This is - - the name of the default Column Family (the CF which stores indexes which - didn't explicitly specify which CF they are in) - - the name used to set the default column family parameter for per-cf - arguments. -*/ -extern const std::string DEFAULT_CF_NAME; - -/* - This is the name of the Column Family used for storing the data dictionary. -*/ -extern const std::string DEFAULT_SYSTEM_CF_NAME; - -/* - This is the name of the hidden primary key for tables with no pk. -*/ -const char *const HIDDEN_PK_NAME = "HIDDEN_PK_ID"; - -/* - Column family name which means "put this index into its own column family". - DEPRECATED!!! -*/ -extern const std::string PER_INDEX_CF_NAME; - -/* - Name for the background thread. -*/ -const char *const BG_THREAD_NAME = "myrocks-bg"; - -/* - Name for the drop index thread. -*/ -const char *const INDEX_THREAD_NAME = "myrocks-index"; - -/* - Name for the manual compaction thread. -*/ -const char *const MANUAL_COMPACTION_THREAD_NAME = "myrocks-mc"; - -/* - Separator between partition name and the qualifier. Sample usage: - - - p0_cfname=foo - - p3_tts_col=bar -*/ -const char RDB_PER_PARTITION_QUALIFIER_NAME_SEP = '_'; - -/* - Separator between qualifier name and value. Sample usage: - - - p0_cfname=foo - - p3_tts_col=bar -*/ -const char RDB_QUALIFIER_VALUE_SEP = '='; - -/* - Separator between multiple qualifier assignments. Sample usage: - - - p0_cfname=foo;p1_cfname=bar;p2_cfname=baz -*/ -const char RDB_QUALIFIER_SEP = ';'; - -/* - Qualifier name for a custom per partition column family. -*/ -const char *const RDB_CF_NAME_QUALIFIER = "cfname"; - -/* - Qualifier name for a custom per partition ttl duration. -*/ -const char *const RDB_TTL_DURATION_QUALIFIER = "ttl_duration"; - -/* - Qualifier name for a custom per partition ttl duration. -*/ -const char *const RDB_TTL_COL_QUALIFIER = "ttl_col"; - -/* - Default, minimal valid, and maximum valid sampling rate values when collecting - statistics about table. -*/ -#define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10 -#define RDB_TBL_STATS_SAMPLE_PCT_MIN 1 -#define RDB_TBL_STATS_SAMPLE_PCT_MAX 100 - -/* - Default and maximum values for rocksdb-compaction-sequential-deletes and - rocksdb-compaction-sequential-deletes-window to add basic boundary checking. -*/ -#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0 -#define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000 - -#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0 -#define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000 - -/* - Default and maximum values for various compaction and flushing related - options. Numbers are based on the hardware we currently use and our internal - benchmarks which indicate that parallelization helps with the speed of - compactions. - - Ideally of course we'll use heuristic technique to determine the number of - CPU-s and derive the values from there. This however has its own set of - problems and we'll choose simplicity for now. -*/ -#define MAX_BACKGROUND_JOBS 64 - -#define DEFAULT_SUBCOMPACTIONS 1 -#define MAX_SUBCOMPACTIONS 64 - -/* - Default value for rocksdb_sst_mgr_rate_bytes_per_sec = 0 (disabled). -*/ -#define DEFAULT_SST_MGR_RATE_BYTES_PER_SEC 0 - -/* - Defines the field sizes for serializing XID object to a string representation. - string byte format: [field_size: field_value, ...] - [ - 8: XID.formatID, - 1: XID.gtrid_length, - 1: XID.bqual_length, - XID.gtrid_length + XID.bqual_length: XID.data - ] -*/ -#define RDB_FORMATID_SZ 8 -#define RDB_GTRID_SZ 1 -#define RDB_BQUAL_SZ 1 -#define RDB_XIDHDR_LEN (RDB_FORMATID_SZ + RDB_GTRID_SZ + RDB_BQUAL_SZ) - +class Rdb_converter; +class Rdb_key_def; +class Rdb_tbl_def; +class Rdb_transaction; +class Rdb_transaction_impl; +class Rdb_writebatch_impl; +class Rdb_field_encoder; /* collations, used in MariaRocks */ enum collations_used { COLLATION_UTF8MB4_BIN = 46, @@ -255,74 +91,13 @@ enum collations_used { COLLATION_UTF8_BIN = 83 }; -/* - To fix an unhandled exception we specify the upper bound as LONGLONGMAX - instead of ULONGLONGMAX because the latter is -1 and causes an exception when - cast to jlong (signed) of JNI - - The reason behind the cast issue is the lack of unsigned int support in Java. -*/ -#define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast<uint64_t>(LONGLONG_MAX) - -/* - Hidden PK column (for tables with no primary key) is a longlong (aka 8 bytes). - static_assert() in code will validate this assumption. -*/ -#define ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN sizeof(longlong) - -/* - Bytes used to store TTL, in the beginning of all records for tables with TTL - enabled. -*/ -#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong) - -#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong) - -/* - Maximum index prefix length in bytes. -*/ -#define MAX_INDEX_COL_LEN_LARGE 3072 -#define MAX_INDEX_COL_LEN_SMALL 767 - -/* - MyRocks specific error codes. NB! Please make sure that you will update - HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in - rdb_error_messages to include any new error messages. -*/ -#define HA_ERR_ROCKSDB_FIRST (HA_ERR_LAST + 1) -#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_ROCKSDB_FIRST + 0) -#define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED \ - (HA_ERR_ROCKSDB_FIRST + 1) -#define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED \ - (HA_ERR_ROCKSDB_FIRST + 2) -#define HA_ERR_ROCKSDB_COMMIT_FAILED (HA_ERR_ROCKSDB_FIRST + 3) -#define HA_ERR_ROCKSDB_BULK_LOAD (HA_ERR_ROCKSDB_FIRST + 4) -#define HA_ERR_ROCKSDB_CORRUPT_DATA (HA_ERR_ROCKSDB_FIRST + 5) -#define HA_ERR_ROCKSDB_CHECKSUM_MISMATCH (HA_ERR_ROCKSDB_FIRST + 6) -#define HA_ERR_ROCKSDB_INVALID_TABLE (HA_ERR_ROCKSDB_FIRST + 7) -#define HA_ERR_ROCKSDB_PROPERTIES (HA_ERR_ROCKSDB_FIRST + 8) -#define HA_ERR_ROCKSDB_MERGE_FILE_ERR (HA_ERR_ROCKSDB_FIRST + 9) -/* - Each error code below maps to a RocksDB status code found in: - rocksdb/include/rocksdb/status.h -*/ -#define HA_ERR_ROCKSDB_STATUS_NOT_FOUND (HA_ERR_LAST + 10) -#define HA_ERR_ROCKSDB_STATUS_CORRUPTION (HA_ERR_LAST + 11) -#define HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED (HA_ERR_LAST + 12) -#define HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT (HA_ERR_LAST + 13) -#define HA_ERR_ROCKSDB_STATUS_IO_ERROR (HA_ERR_LAST + 14) -#define HA_ERR_ROCKSDB_STATUS_NO_SPACE (HA_ERR_LAST + 15) -#define HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS (HA_ERR_LAST + 16) -#define HA_ERR_ROCKSDB_STATUS_INCOMPLETE (HA_ERR_LAST + 17) -#define HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS (HA_ERR_LAST + 18) -#define HA_ERR_ROCKSDB_STATUS_TIMED_OUT (HA_ERR_LAST + 19) -#define HA_ERR_ROCKSDB_STATUS_ABORTED (HA_ERR_LAST + 20) -#define HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT (HA_ERR_LAST + 21) -#define HA_ERR_ROCKSDB_STATUS_BUSY (HA_ERR_LAST + 22) -#define HA_ERR_ROCKSDB_STATUS_DEADLOCK (HA_ERR_LAST + 23) -#define HA_ERR_ROCKSDB_STATUS_EXPIRED (HA_ERR_LAST + 24) -#define HA_ERR_ROCKSDB_STATUS_TRY_AGAIN (HA_ERR_LAST + 25) -#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_STATUS_TRY_AGAIN +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +extern char *rocksdb_read_free_rpl_tables; +#if defined(HAVE_PSI_INTERFACE) +extern PSI_rwlock_key key_rwlock_read_free_rpl_tables; +#endif +extern Regex_list_handler rdb_read_free_regex_handler; +#endif /** @brief @@ -338,7 +113,7 @@ struct Rdb_table_handler { atomic_stat<int> m_lock_wait_timeout_counter; atomic_stat<int> m_deadlock_counter; - my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock + my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock /* Stores cumulative table statistics */ my_io_perf_atomic_t m_io_perf_read; @@ -352,138 +127,19 @@ struct Rdb_table_handler { uint64_t m_mtcache_last_update; }; -class Rdb_key_def; -class Rdb_tbl_def; -class Rdb_transaction; -class Rdb_transaction_impl; -class Rdb_writebatch_impl; -class Rdb_field_encoder; - -const char *const rocksdb_hton_name = "ROCKSDB"; - -typedef struct _gl_index_id_s { - uint32_t cf_id; - uint32_t index_id; - bool operator==(const struct _gl_index_id_s &other) const { - return cf_id == other.cf_id && index_id == other.index_id; - } - bool operator!=(const struct _gl_index_id_s &other) const { - return cf_id != other.cf_id || index_id != other.index_id; - } - bool operator<(const struct _gl_index_id_s &other) const { - return cf_id < other.cf_id || - (cf_id == other.cf_id && index_id < other.index_id); - } - bool operator<=(const struct _gl_index_id_s &other) const { - return cf_id < other.cf_id || - (cf_id == other.cf_id && index_id <= other.index_id); - } - bool operator>(const struct _gl_index_id_s &other) const { - return cf_id > other.cf_id || - (cf_id == other.cf_id && index_id > other.index_id); - } - bool operator>=(const struct _gl_index_id_s &other) const { - return cf_id > other.cf_id || - (cf_id == other.cf_id && index_id >= other.index_id); - } -} GL_INDEX_ID; - -enum operation_type : int { - ROWS_DELETED = 0, - ROWS_INSERTED, - ROWS_READ, - ROWS_UPDATED, - ROWS_DELETED_BLIND, - ROWS_EXPIRED, - ROWS_FILTERED, - ROWS_HIDDEN_NO_SNAPSHOT, - ROWS_MAX -}; - -enum query_type : int { QUERIES_POINT = 0, QUERIES_RANGE, QUERIES_MAX }; - -#if defined(HAVE_SCHED_GETCPU) -#define RDB_INDEXER get_sched_indexer_t -#else -#define RDB_INDEXER thread_id_indexer_t -#endif - -/* Global statistics struct used inside MyRocks */ -struct st_global_stats { - ib_counter_t<ulonglong, 64, RDB_INDEXER> rows[ROWS_MAX]; - - // system_rows_ stats are only for system - // tables. They are not counted in rows_* stats. - ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX]; - - ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX]; - - ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups; -}; - -/* Struct used for exporting status to MySQL */ -struct st_export_stats { - ulonglong rows_deleted; - ulonglong rows_inserted; - ulonglong rows_read; - ulonglong rows_updated; - ulonglong rows_deleted_blind; - ulonglong rows_expired; - ulonglong rows_filtered; - ulonglong rows_hidden_no_snapshot; - - ulonglong system_rows_deleted; - ulonglong system_rows_inserted; - ulonglong system_rows_read; - ulonglong system_rows_updated; - - ulonglong queries_point; - ulonglong queries_range; - - ulonglong covered_secondary_key_lookups; -}; - -/* Struct used for exporting RocksDB memory status */ -struct st_memory_stats { - ulonglong memtable_total; - ulonglong memtable_unflushed; -}; - -/* Struct used for exporting RocksDB IO stalls stats */ -struct st_io_stall_stats { - ulonglong level0_slowdown; - ulonglong level0_slowdown_with_compaction; - ulonglong level0_numfiles; - ulonglong level0_numfiles_with_compaction; - ulonglong stop_for_pending_compaction_bytes; - ulonglong slowdown_for_pending_compaction_bytes; - ulonglong memtable_compaction; - ulonglong memtable_slowdown; - ulonglong total_stop; - ulonglong total_slowdown; - - st_io_stall_stats() - : level0_slowdown(0), level0_slowdown_with_compaction(0), - level0_numfiles(0), level0_numfiles_with_compaction(0), - stop_for_pending_compaction_bytes(0), - slowdown_for_pending_compaction_bytes(0), memtable_compaction(0), - memtable_slowdown(0), total_stop(0), total_slowdown(0) {} -}; - -} // namespace myrocks - -#include "./rdb_buff.h" +} // namespace myrocks /* Provide hash function for GL_INDEX_ID so we can include it in sets */ namespace std { -template <> struct hash<myrocks::GL_INDEX_ID> { +template <> +struct hash<myrocks::GL_INDEX_ID> { std::size_t operator()(const myrocks::GL_INDEX_ID &gl_index_id) const { const uint64_t val = ((uint64_t)gl_index_id.cf_id << 32 | (uint64_t)gl_index_id.index_id); return std::hash<uint64_t>()(val); } }; -} // namespace std +} // namespace std namespace myrocks { @@ -493,9 +149,9 @@ namespace myrocks { */ class ha_rocksdb : public my_core::handler { - my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock + my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock - Rdb_table_handler *m_table_handler; ///< Open table handler + Rdb_table_handler *m_table_handler; ///< Open table handler /* Iterator used for range scans and for full table/index scans */ rocksdb::Iterator *m_scan_it; @@ -536,11 +192,6 @@ class ha_rocksdb : public my_core::handler { */ mutable bool m_pk_can_be_decoded; - /* - TRUE <=> Some fields in the PK may require unpack_info. - */ - bool m_maybe_unpack_info; - uchar *m_pk_tuple; /* Buffer for storing PK in KeyTupleFormat */ uchar *m_pk_packed_tuple; /* Buffer for storing PK in StorageFormat */ // ^^ todo: change it to 'char*'? TODO: ^ can we join this with last_rowkey? @@ -584,10 +235,13 @@ class ha_rocksdb : public my_core::handler { */ uchar *m_pack_buffer; + /* class to convert between Mysql format and RocksDB format*/ + std::shared_ptr<Rdb_converter> m_converter; + /* Pointer to the original TTL timestamp value (8 bytes) during UPDATE. */ - char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD]; + char *m_ttl_bytes; /* The TTL timestamp value can change if the explicit TTL column is updated. If we detect this when updating the PK, we indicate it here so @@ -598,9 +252,6 @@ class ha_rocksdb : public my_core::handler { /* rowkey of the last record we've read, in StorageFormat. */ String m_last_rowkey; - /* Buffer used by convert_record_to_storage_format() */ - String m_storage_record; - /* Last retrieved record, in table->record[0] data format. @@ -620,15 +271,28 @@ class ha_rocksdb : public my_core::handler { /* TRUE means we are accessing the first row after a snapshot was created */ bool m_rnd_scan_is_new_snapshot; - /* TRUE means the replication slave will use Read Free Replication */ - bool m_use_read_free_rpl; - /* TRUE means we should skip unique key checks for this table if the replication lag gets too large */ bool m_skip_unique_check; + /* + TRUE means INSERT ON DUPLICATE KEY UPDATE. In such case we can optimize by + remember the failed attempt (if there is one that violates uniqueness check) + in write_row and in the following index_read to skip the lock check and read + entirely + */ + bool m_insert_with_update; + + /* TRUE if last time the insertion failed due to duplicated PK */ + bool m_dup_pk_found; + +#ifndef DBUG_OFF + /* Last retreived record for sanity checking */ + String m_dup_pk_retrieved_record; +#endif + /** @brief This is a bitmap of indexes (i.e. a set) whose keys (in future, values) may @@ -645,7 +309,7 @@ class ha_rocksdb : public my_core::handler { /* MySQL index number for duplicate key error */ - int m_dupp_errkey; + uint m_dupp_errkey; int create_key_defs(const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, @@ -678,11 +342,10 @@ class ha_rocksdb : public my_core::handler { MY_ATTRIBUTE((__nonnull__)); void release_scan_iterator(void); - rocksdb::Status - get_for_update(Rdb_transaction *const tx, - rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, - rocksdb::PinnableSlice *value) const; + rocksdb::Status get_for_update( + Rdb_transaction *const tx, + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *value) const; int get_row_by_rowid(uchar *const buf, const char *const rowid, const uint rowid_size, const bool skip_lookup = false, @@ -707,7 +370,7 @@ class ha_rocksdb : public my_core::handler { int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__)); int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - bool can_use_single_delete(const uint &index) const + bool can_use_single_delete(const uint index) const MY_ATTRIBUTE((__warn_unused_result__)); bool is_blind_delete_enabled(); bool skip_unique_check() const MY_ATTRIBUTE((__warn_unused_result__)); @@ -725,53 +388,12 @@ class ha_rocksdb : public my_core::handler { void set_last_rowkey(const uchar *const old_data); /* - Array of table->s->fields elements telling how to store fields in the - record. - */ - Rdb_field_encoder *m_encoder_arr; - - /* Describes instructions on how to decode the field */ - class READ_FIELD { - public: - /* Points to Rdb_field_encoder describing the field */ - Rdb_field_encoder *m_field_enc; - /* if true, decode the field, otherwise skip it */ - bool m_decode; - /* Skip this many bytes before reading (or skipping) this field */ - int m_skip; - }; - - /* - This tells which table fields should be decoded (or skipped) when - decoding table row from (pk, encoded_row) pair. (Secondary keys are - just always decoded in full currently) - */ - std::vector<READ_FIELD> m_decoders_vect; - - /* - This tells if any field which is part of the key needs to be unpacked and - decoded. - */ - bool m_key_requested = false; - - /* Setup field_decoders based on type of scan and table->read_set */ - void setup_read_decoders(); - - /* For the active index, indicates which columns must be covered for the current lookup to be covered. If the bitmap field is null, that means this index does not cover the current lookup for any record. */ MY_BITMAP m_lookup_bitmap = {nullptr, nullptr, nullptr, 0, 0}; - /* - Number of bytes in on-disk (storage) record format that are used for - storing SQL NULL flags. - */ - uint m_null_bytes_in_rec; - - void get_storage_type(Rdb_field_encoder *const encoder, const uint &kp); - void setup_field_converters(); int alloc_key_buffers(const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg, bool alloc_alter_buffers = false) @@ -779,7 +401,7 @@ class ha_rocksdb : public my_core::handler { void free_key_buffers(); // the buffer size should be at least 2*Rdb_key_def::INDEX_NUMBER_SIZE - rocksdb::Range get_range(const int &i, uchar buf[]) const; + rocksdb::Range get_range(const int i, uchar buf[]) const; /* Perf timers for data reads @@ -787,17 +409,11 @@ class ha_rocksdb : public my_core::handler { Rdb_io_perf m_io_perf; /* - A counter of how many row checksums were checked for this table. Note that - this does not include checksums for secondary index entries. - */ - my_core::ha_rows m_row_checksums_checked; - - /* Update stats */ void update_stats(void); -public: + public: /* The following two are currently only used for getting the range bounds from QUICK_SELECT_DESC. @@ -816,19 +432,19 @@ public: */ bool m_store_row_debug_checksums; - /* Same as above but for verifying checksums when reading */ - bool m_verify_row_debug_checksums; int m_checksums_pct; ha_rocksdb(my_core::handlerton *const hton, my_core::TABLE_SHARE *const table_arg); - ~ha_rocksdb() { + virtual ~ha_rocksdb() override { int err MY_ATTRIBUTE((__unused__)); err = finalize_bulk_load(false); if (err != 0) { - sql_print_error("RocksDB: Error %d finalizing bulk load while closing " - "handler.", - err); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Error %d finalizing bulk load while closing " + "handler.", + err); } } @@ -898,21 +514,6 @@ public: int rename_table(const char *const from, const char *const to) override MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int convert_blob_from_storage_format(my_core::Field_blob *const blob, - Rdb_string_reader *const reader, - bool decode) - MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - - int convert_varchar_from_storage_format( - my_core::Field_varstring *const field_var, - Rdb_string_reader *const reader, bool decode) - MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - - int convert_field_from_storage_format(my_core::Field *const field, - Rdb_string_reader *const reader, - bool decode, uint len) - MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int convert_record_from_storage_format(const rocksdb::Slice *const key, const rocksdb::Slice *const value, uchar *const buf) @@ -925,10 +526,9 @@ public: static const std::vector<std::string> parse_into_tokens(const std::string &s, const char delim); - static const std::string generate_cf_name(const uint index, - const TABLE *const table_arg, - const Rdb_tbl_def *const tbl_def_arg, - bool *per_part_match_found); + static const std::string generate_cf_name( + const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found); static const char *get_key_name(const uint index, const TABLE *const table_arg, @@ -1040,7 +640,7 @@ public: MY_ATTRIBUTE((__warn_unused_result__)); int close(void) override MY_ATTRIBUTE((__warn_unused_result__)); - int write_row(uchar *const buf) override + int write_row(const uchar *const buf) override MY_ATTRIBUTE((__warn_unused_result__)); int update_row(const uchar *const old_data, const uchar *const new_data) override MY_ATTRIBUTE((__warn_unused_result__)); @@ -1067,7 +667,7 @@ public: /* Default implementation from cancel_pushed_idx_cond() suits us */ -private: + private: struct key_def_cf_info { rocksdb::ColumnFamilyHandle *cf_handle; bool is_reverse_cf; @@ -1087,16 +687,6 @@ private: longlong hidden_pk_id; bool skip_unique_check; - - // In certain cases, TTL is enabled on a table, as well as an explicit TTL - // column. The TTL column can be part of either the key or the value part - // of the record. If it is part of the key, we store the offset here. - // - // Later on, we use this offset to store the TTL in the value part of the - // record, which we can then access in the compaction filter. - // - // Set to UINT_MAX by default to indicate that the TTL is not in key. - uint ttl_pk_offset = UINT_MAX; }; /* @@ -1129,23 +719,24 @@ private: std::array<struct key_def_cf_info, MAX_INDEXES + 1> *const cfs) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int create_key_def(const TABLE *const table_arg, const uint &i, + int create_key_def(const TABLE *const table_arg, const uint i, const Rdb_tbl_def *const tbl_def_arg, std::shared_ptr<Rdb_key_def> *const new_key_def, - const struct key_def_cf_info &cf_info) const + const struct key_def_cf_info &cf_info, uint64 ttl_duration, + const std::string &ttl_column) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int create_inplace_key_defs( const TABLE *const table_arg, Rdb_tbl_def *vtbl_def_arg, const TABLE *const old_table_arg, const Rdb_tbl_def *const old_tbl_def_arg, - const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs) const + const std::array<key_def_cf_info, MAX_INDEXES + 1> &cf, + uint64 ttl_duration, const std::string &ttl_column) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - std::unordered_map<std::string, uint> - get_old_key_positions(const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg, - const TABLE *old_table_arg, - const Rdb_tbl_def *old_tbl_def_arg) const + std::unordered_map<std::string, uint> get_old_key_positions( + const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg, + const TABLE *old_table_arg, const Rdb_tbl_def *old_tbl_def_arg) const MY_ATTRIBUTE((__nonnull__)); int compare_key_parts(const KEY *const old_key, @@ -1155,24 +746,20 @@ private: int compare_keys(const KEY *const old_key, const KEY *const new_key) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int convert_record_to_storage_format(const struct update_row_info &row_info, - rocksdb::Slice *const packed_rec) - MY_ATTRIBUTE((__nonnull__)); - bool should_hide_ttl_rec(const Rdb_key_def &kd, const rocksdb::Slice &ttl_rec_val, const int64_t curr_ts) MY_ATTRIBUTE((__warn_unused_result__)); - void rocksdb_skip_expired_records(const Rdb_key_def &kd, - rocksdb::Iterator *const iter, - bool seek_backward); + int rocksdb_skip_expired_records(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + bool seek_backward); int index_first_intern(uchar *buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int index_last_intern(uchar *buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int find_icp_matching_index_rec(const bool &move_forward, uchar *const buf) + int find_icp_matching_index_rec(const bool move_forward, uchar *const buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void calc_updated_indexes(); @@ -1180,20 +767,20 @@ private: const bool skip_unique_check) MY_ATTRIBUTE((__warn_unused_result__)); int get_pk_for_update(struct update_row_info *const row_info); - int check_and_lock_unique_pk(const uint &key_id, + int check_and_lock_unique_pk(const uint key_id, const struct update_row_info &row_info, - bool *const found, bool *const pk_changed) + bool *const found) MY_ATTRIBUTE((__warn_unused_result__)); - int check_and_lock_sk(const uint &key_id, + int check_and_lock_sk(const uint key_id, const struct update_row_info &row_info, bool *const found) MY_ATTRIBUTE((__warn_unused_result__)); int check_uniqueness_and_lock(const struct update_row_info &row_info, - bool *const pk_changed) + bool pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); bool over_bulk_load_threshold(int *err) MY_ATTRIBUTE((__warn_unused_result__)); - int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &index, + int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &key_def, const rocksdb::Slice *key, struct unique_sk_buf_info *sk_info) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -1202,32 +789,36 @@ private: bool sort) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void update_bytes_written(ulonglong bytes_written); - int update_pk(const Rdb_key_def &kd, const struct update_row_info &row_info, - const bool &pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); - int update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, - const struct update_row_info &row_info, const bool bulk_load_sk) + int update_write_pk(const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool pk_changed) + MY_ATTRIBUTE((__warn_unused_result__)); + int update_write_sk(const TABLE *const table_arg, const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool bulk_load_sk) MY_ATTRIBUTE((__warn_unused_result__)); - int update_indexes(const struct update_row_info &row_info, - const bool &pk_changed) + int update_write_indexes(const struct update_row_info &row_info, + const bool pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); int read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter, - const bool &using_full_key, - const rocksdb::Slice &key_slice, + const bool using_full_key, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int read_before_key(const Rdb_key_def &kd, const bool &using_full_key, + int read_before_key(const Rdb_key_def &kd, const bool using_full_key, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int position_to_correct_key( - const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, - const bool &full_key_match, const uchar *const key, - const key_part_map &keypart_map, const rocksdb::Slice &key_slice, - bool *const move_forward, const int64_t ttl_filter_ts) + int position_to_correct_key(const Rdb_key_def &kd, + const enum ha_rkey_function &find_flag, + const bool full_key_match, const uchar *const key, + const key_part_map &keypart_map, + const rocksdb::Slice &key_slice, + bool *const move_forward, + const int64_t ttl_filter_ts) MY_ATTRIBUTE((__warn_unused_result__)); int read_row_from_primary_key(uchar *const buf) @@ -1239,7 +830,7 @@ private: int calc_eq_cond_len(const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, const rocksdb::Slice &slice, - const int &bytes_changed_by_succ, + const int bytes_changed_by_succ, const key_range *const end_key, uint *const end_key_packed_size) MY_ATTRIBUTE((__warn_unused_result__)); @@ -1261,6 +852,10 @@ private: int calculate_stats_for_table() MY_ATTRIBUTE((__warn_unused_result__)); + bool should_skip_invalidated_record(const int rc); + bool should_recreate_snapshot(const int rc, const bool is_new_snapshot); + bool can_assume_tracked(THD *thd); + public: int index_init(uint idx, bool sorted) override MY_ATTRIBUTE((__warn_unused_result__)); @@ -1315,11 +910,15 @@ private: ha_rows records_in_range(uint inx, key_range *const min_key, key_range *const max_key) override MY_ATTRIBUTE((__warn_unused_result__)); + + int delete_table(Rdb_tbl_def *const tbl); int delete_table(const char *const from) override MY_ATTRIBUTE((__warn_unused_result__)); int create(const char *const name, TABLE *const form, HA_CREATE_INFO *const create_info) override MY_ATTRIBUTE((__warn_unused_result__)); + int create_table(const std::string &table_name, const TABLE *table_arg, + ulonglong auto_increment_value); bool check_if_incompatible_data(HA_CREATE_INFO *const info, uint table_changes) override MY_ATTRIBUTE((__warn_unused_result__)); @@ -1367,26 +966,25 @@ private: TABLE *const altered_table, my_core::Alter_inplace_info *const ha_alter_info) override; - bool - commit_inplace_alter_table(TABLE *const altered_table, - my_core::Alter_inplace_info *const ha_alter_info, - bool commit) override; + bool commit_inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info, bool commit) override; + void set_skip_unique_check_tables(const char *const whitelist); #ifdef MARIAROCKS_NOT_YET // MDEV-10976 - void set_use_read_free_rpl(const char *const whitelist); + bool is_read_free_rpl_table() const; #endif - void set_skip_unique_check_tables(const char *const whitelist); #ifdef MARIAROCKS_NOT_YET // MDEV-10976 -public: + public: virtual void rpl_before_delete_rows() override; virtual void rpl_after_delete_rows() override; virtual void rpl_before_update_rows() override; virtual void rpl_after_update_rows() override; - virtual bool use_read_free_rpl() override; + virtual bool use_read_free_rpl() const override; #endif // MARIAROCKS_NOT_YET -private: + private: /* Flags tracking if we are inside different replication operation */ bool m_in_rpl_delete_rows; bool m_in_rpl_update_rows; @@ -1436,16 +1034,21 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx { std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes, std::unordered_set<GL_INDEX_ID> dropped_index_ids, uint n_added_keys, uint n_dropped_keys, ulonglong max_auto_incr) - : my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef), - m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr), - m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys), - m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids), - m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys), + : my_core::inplace_alter_handler_ctx(), + m_new_tdef(new_tdef), + m_old_key_descr(old_key_descr), + m_new_key_descr(new_key_descr), + m_old_n_keys(old_n_keys), + m_new_n_keys(new_n_keys), + m_added_indexes(added_indexes), + m_dropped_index_ids(dropped_index_ids), + m_n_added_keys(n_added_keys), + m_n_dropped_keys(n_dropped_keys), m_max_auto_incr(max_auto_incr) {} ~Rdb_inplace_alter_ctx() {} -private: + private: /* Disable Copying */ Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &); Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &); @@ -1460,5 +1063,5 @@ extern bool prevent_myrocks_loading; void sql_print_verbose_info(const char *format, ...); -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h index 26417328194..03d24957a23 100644 --- a/storage/rocksdb/ha_rocksdb_proto.h +++ b/storage/rocksdb/ha_rocksdb_proto.h @@ -78,7 +78,7 @@ Rdb_cf_manager &rdb_get_cf_manager(); const rocksdb::BlockBasedTableOptions &rdb_get_table_options(); bool rdb_is_ttl_enabled(); bool rdb_is_ttl_read_filtering_enabled(); -#ifndef NDEBUG +#ifndef DBUG_OFF int rdb_dbug_set_ttl_rec_ts(); int rdb_dbug_set_ttl_snapshot_ts(); int rdb_dbug_set_ttl_read_filter_ts(); @@ -100,4 +100,4 @@ Rdb_ddl_manager *rdb_get_ddl_manager(void) class Rdb_binlog_manager; Rdb_binlog_manager *rdb_get_binlog_manager(void) MY_ATTRIBUTE((__warn_unused_result__)); -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/logger.h b/storage/rocksdb/logger.h index b2820127711..8902bc18893 100644 --- a/storage/rocksdb/logger.h +++ b/storage/rocksdb/logger.h @@ -22,7 +22,7 @@ namespace myrocks { class Rdb_logger : public rocksdb::Logger { -public: + public: explicit Rdb_logger(const rocksdb::InfoLogLevel log_level = rocksdb::InfoLogLevel::ERROR_LEVEL) : m_mysql_log_level(log_level) {} @@ -77,9 +77,9 @@ public: m_mysql_log_level = log_level; } -private: + private: std::shared_ptr<rocksdb::Logger> m_logger; rocksdb::InfoLogLevel m_mysql_log_level; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/myrocks_hotbackup.py b/storage/rocksdb/myrocks_hotbackup.py index 906ba814776..42c25c95c1c 100755 --- a/storage/rocksdb/myrocks_hotbackup.py +++ b/storage/rocksdb/myrocks_hotbackup.py @@ -44,12 +44,14 @@ class Writer(object): class StreamWriter(Writer): stream_cmd= '' - def __init__(self, stream_option): + def __init__(self, stream_option, direct = 0): super(StreamWriter, self).__init__() if stream_option == 'tar': self.stream_cmd= 'tar chf -' elif stream_option == 'xbstream': self.stream_cmd= 'xbstream -c' + if direct: + self.stream_cmd = self.stream_cmd + ' -d' else: raise Exception("Only tar or xbstream is supported as streaming option.") @@ -341,6 +343,13 @@ class MySQLUtil: row = cur.fetchone() return row[0] + @staticmethod + def is_directio_enabled(dbh): + sql = "SELECT @@global.rocksdb_use_direct_reads" + cur = dbh.cursor() + cur.execute(sql) + row = cur.fetchone() + return row[0] class BackupRunner: datadir = None @@ -362,9 +371,7 @@ class BackupRunner: try: signal.signal(signal.SIGINT, signal_handler) w = None - if opts.output_stream: - w = StreamWriter(opts.output_stream) - else: + if not opts.output_stream: raise Exception("Currently only streaming backup is supported.") snapshot_dir = opts.checkpoint_directory + '/' + str(backup_round) @@ -372,6 +379,11 @@ class BackupRunner: opts.mysql_password, opts.mysql_port, opts.mysql_socket) + direct = MySQLUtil.is_directio_enabled(dbh) + logger.info("Direct I/O: %d", direct) + + w = StreamWriter(opts.output_stream, direct) + if not self.datadir: self.datadir = MySQLUtil.get_datadir(dbh) logger.info("Set datadir: %s", self.datadir) diff --git a/storage/rocksdb/mysql-test/rocksdb/combinations b/storage/rocksdb/mysql-test/rocksdb/combinations index d49de3d38b3..be8080d4b9b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/combinations +++ b/storage/rocksdb/mysql-test/rocksdb/combinations @@ -3,4 +3,3 @@ loose-rocksdb_write_policy=write_committed [write_prepared] loose-rocksdb_write_policy=write_prepared -loose-rocksdb_commit_time_batch_for_recovery=on diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc index 8ec97510dbd..29ec94188a2 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc @@ -89,20 +89,32 @@ EOF # Make sure a snapshot held by another user doesn't block the bulk load connect (other,localhost,root,,); set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; + +# Assert that there is a pending snapshot +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; connection default; + +# Update CF to smaller value to create multiple SST in ingestion +eval SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= + '$pk_cf_name={write_buffer_size=8m;target_file_size_base=1m};'; + set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; --disable_query_log --echo LOAD DATA INFILE <input_file> INTO TABLE t1; eval LOAD DATA INFILE '$file' INTO TABLE t1; +# There should be no SST being ingested +select * from t1; --echo LOAD DATA INFILE <input_file> INTO TABLE t2; eval LOAD DATA INFILE '$file' INTO TABLE t2; +# There should be no SST being ingested +select * from t2; --echo LOAD DATA INFILE <input_file> INTO TABLE t3; eval LOAD DATA INFILE '$file' INTO TABLE t3; +# There should be no SST being ingested +select * from t3; --enable_query_log set rocksdb_bulk_load=0; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc index 5f808087e3e..46aea8f23b5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc @@ -100,9 +100,10 @@ EOF # Make sure a snapshot held by another user doesn't block the bulk load connect (other,localhost,root,,); set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; + +# Assert that there is a pending snapshot +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; connection default; set rocksdb_bulk_load=1; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc new file mode 100644 index 00000000000..233635b369e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc @@ -0,0 +1,298 @@ +CREATE TABLE `link_table` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id1_type` int(10) unsigned NOT NULL DEFAULT '0' , + `id2` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id2_type` int(10) unsigned NOT NULL DEFAULT '0' , + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , + `visibility` tinyint(3) NOT NULL DEFAULT '0' , + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , + `time` int(10) unsigned NOT NULL DEFAULT '0' , + `version` bigint(20) unsigned NOT NULL DEFAULT '0' , + PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' , + KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , + `version` , `data`) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +CREATE TABLE `link_table2` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id1_type` int(10) unsigned NOT NULL DEFAULT '0' , + `id2` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id2_type` int(10) unsigned NOT NULL DEFAULT '0' , + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , + `visibility` tinyint(3) NOT NULL DEFAULT '0' , + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , + `time` int(10) unsigned NOT NULL DEFAULT '0' , + `version` bigint(20) unsigned NOT NULL DEFAULT '0' , + PRIMARY KEY (`link_type` , `id1` , `id2`) + COMMENT 'cf_link' , + KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , + `version` , `data`) COMMENT 'cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9; + +insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125); +insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125); +insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125); +insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125); +insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125); +insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125); +insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125); +insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125); +insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125); +insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125); +insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125); +insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125); +insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125); + +insert into link_table2 select * from link_table; + +CREATE TABLE `id_table` ( + `id` bigint(20) NOT NULL DEFAULT '0', + `type` int(11) NOT NULL DEFAULT '0', + `row_created_time` int(11) NOT NULL DEFAULT '0', + `hash_key` varchar(255) NOT NULL DEFAULT '', + `is_deleted` tinyint(4) DEFAULT '0', + PRIMARY KEY (`id`), + KEY `type_id` (`type`,`id`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +KEY_BLOCK_SIZE=8; + +insert into id_table values (1, 1, 10, '111', 0); +insert into id_table values (2, 1, 10, '111', 1); +insert into id_table values (3, 1, 10, '111', 0); +insert into id_table values (4, 1, 10, '111', 1); +insert into id_table values (5, 1, 10, '111', 0); +insert into id_table values (6, 1, 10, '111', 1); +insert into id_table values (7, 1, 10, '111', 0); +insert into id_table values (8, 1, 10, '111', 1); +insert into id_table values (9, 1, 10, '111', 0); +insert into id_table values (10, 1, 10, '111', 1); + +CREATE TABLE `node_table` ( + `id` bigint(20) unsigned NOT NULL DEFAULT '0', + `type` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + `update_time` int(10) unsigned NOT NULL DEFAULT '0', + `data` mediumtext COLLATE latin1_bin NOT NULL, + PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id', + KEY `id` (`id`) COMMENT 'cf_node' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into node_table values (1, 1, 1, 10, 'data'); + +insert into node_table values (2, 1, 1, 10, 'data'); + +insert into node_table values (3, 1, 1, 10, 'data'); + +insert into node_table values (4, 1, 1, 10, 'data'); + +insert into node_table values (5, 1, 1, 10, 'data'); + +insert into node_table values (6, 1, 1, 10, 'data'); + +insert into node_table values (7, 1, 1, 10, 'data'); + +insert into node_table values (8, 1, 1, 10, 'data'); + +insert into node_table values (9, 1, 1, 10, 'data'); + +insert into node_table values (10, 1, 1, 10, 'data'); + +CREATE TABLE `count_table` ( + `id` bigint(20) unsigned NOT NULL DEFAULT '0', + `type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `count` int(10) unsigned NOT NULL DEFAULT '0', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + + +insert into count_table values (2, 1, 1, 1, 10, 20); + +insert into count_table values (3, 1, 1, 1, 10, 20); + +insert into count_table values (4, 1, 1, 1, 10, 20); + +insert into count_table values (5, 1, 1, 1, 10, 20); + +insert into count_table values (6, 1, 1, 1, 10, 20); + +insert into count_table values (7, 1, 1, 1, 10, 20); + +insert into count_table values (8, 1, 1, 1, 10, 20); + +insert into count_table values (9, 1, 1, 1, 10, 20); + +insert into count_table values (10, 1, 1, 1, 10, 20); + +CREATE TABLE `link_table5` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(3) NOT NULL DEFAULT '0', + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); + + +CREATE TABLE `link_table3` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(4) NOT NULL DEFAULT '0', + `data` text COLLATE latin1_bin NOT NULL, + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`) + COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; + +insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); + +CREATE TABLE `link_table6` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(4) NOT NULL DEFAULT '0', + `data` text COLLATE latin1_bin NOT NULL, + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`, + `data`(255)) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; + +insert into link_table6 values (1, 1, 2, 2, 1, 1, + 'data12_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 3, 2, 1, 2, + 'data13_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 4, 2, 1, 2, + 'data14_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 5, 2, 1, 1, + 'data15_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 1, 2, 1, 1, + 'data21_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 2, 2, 1, 1, + 'data22_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 3, 2, 1, 1, + 'data32_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); + +CREATE TABLE `link_table4` ( + `id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', + `raw_key` text COLLATE latin1_bin, + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(3) NOT NULL DEFAULT '0', + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) + COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1); diff --git a/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc b/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc new file mode 100644 index 00000000000..79ac367a73b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc @@ -0,0 +1,1425 @@ +# +# Test file for WL#1724 (Min/Max Optimization for Queries with Group By Clause). +# The queries in this file test query execution via QUICK_GROUP_MIN_MAX_SELECT. +# + +# +# TODO: +# Add queries with: +# - C != const +# - C IS NOT NULL +# - HAVING clause + +--disable_warnings +drop table if exists t1; +--enable_warnings + +eval create table t1 ( + a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' ' +) engine=$engine; + +insert into t1 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'), +('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'), +('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'), +('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'), +('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'), +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'), +('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'), +('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'), +('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'), +('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'); + +create index idx_t1_0 on t1 (a1); +create index idx_t1_1 on t1 (a1,a2,b,c); +create index idx_t1_2 on t1 (a1,a2,b); +analyze table t1; + +# t2 is the same as t1, but with some NULLs in the MIN/MAX column, and +# one more nullable attribute + +--disable_warnings +drop table if exists t2; +--enable_warnings + +eval create table t2 ( + a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' ' +) engine=$engine; +insert into t2 select * from t1; +# add few rows with NULL's in the MIN/MAX column +insert into t2 (a1, a2, b, c, d) values +('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'), +('a','a','a',NULL,'xyz'), +('a','a','b',NULL,'xyz'), +('a','b','a',NULL,'xyz'), +('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'), +('d','b','b',NULL,'xyz'), +('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'), +('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'), +('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'), +('a','a','a',NULL,'xyz'), +('a','a','b',NULL,'xyz'), +('a','b','a',NULL,'xyz'), +('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'), +('d','b','b',NULL,'xyz'), +('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'), +('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'); + +create index idx_t2_0 on t2 (a1); +create index idx_t2_1 on t2 (a1,a2,b,c); +create index idx_t2_2 on t2 (a1,a2,b); +analyze table t2; + +# Table t3 is the same as t1, but with smaller column lenghts. +# This allows to test different branches of the cost computation procedure +# when the number of keys per block are less than the number of keys in the +# sub-groups formed by predicates over non-group attributes. + +--disable_warnings +drop table if exists t3; +--enable_warnings + +eval create table t3 ( + a1 char(1), a2 char(1), b char(1), c char(4) not null, d char(3), dummy char(1) default ' ' +) engine=$engine; + +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); + +create index idx_t3_0 on t3 (a1); +create index idx_t3_1 on t3 (a1,a2,b,c); +create index idx_t3_2 on t3 (a1,a2,b); +analyze table t3; + + +# +# Queries without a WHERE clause. These queries do not use ranges. +# + +# plans +explain select a1, min(a2) from t1 group by a1; +explain select a1, max(a2) from t1 group by a1; +explain select a1, min(a2), max(a2) from t1 group by a1; +explain select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b; +explain select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b; +--replace_column 7 # 9 # +explain select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b; +# Select fields in different order +explain select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1; +explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b; +explain select min(a2) from t1 group by a1; +explain select a2, min(c), max(c) from t1 group by a1,a2,b; + +# queries +select a1, min(a2) from t1 group by a1; +select a1, max(a2) from t1 group by a1; +select a1, min(a2), max(a2) from t1 group by a1; +select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b; +select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b; +select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b; +# Select fields in different order +select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1; +select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b; +select min(a2) from t1 group by a1; +select a2, min(c), max(c) from t1 group by a1,a2,b; + +# +# Queries with a where clause +# + +# A) Preds only over the group 'A' attributes +# plans +explain select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +explain select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +explain select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +explain select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b; + +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b; + +# queries +select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b; + +select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b; + +# B) Equalities only over the non-group 'B' attributes +# plans +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1; + +explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +explain select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2; +explain select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2; +explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2; +explain select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +explain select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2; +explain select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2; +explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2; +explain select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +# these queries test case 2) in TRP_GROUP_MIN_MAX::update_cost() +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +# queries +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1; + +select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2; +select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2; +select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2; +select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2; +select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2; +select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2; +select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +# these queries test case 2) in TRP_GROUP_MIN_MAX::update_cost() +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + + +# IS NULL (makes sense for t2 only) +# plans +explain select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1; +explain select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; + +explain select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1; +explain select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; + +explain select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2; +explain select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2; +explain select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; + +# queries +select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1; +select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; +select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1; +select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; +select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2; +select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2; +select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; +select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; + +# C) Range predicates for the MIN/MAX attribute +# plans +--replace_column 9 # +explain select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b; + +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; + +# queries +select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b; + +select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; + +# analyze the sub-select +explain select a1,a2,b,min(c),max(c) from t1 +where exists ( select * from t2 where t2.c = t1.c ) +group by a1,a2,b; + +# the sub-select is unrelated to MIN/MAX +explain select a1,a2,b,min(c),max(c) from t1 +where exists ( select * from t2 where t2.c > 'b1' ) +group by a1,a2,b; + + +# A,B,C) Predicates referencing mixed classes of attributes +# plans +explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +explain select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +explain select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b; + +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +# queries +select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b; + +select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + + +# +# GROUP BY queries without MIN/MAX +# + +# plans +explain select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +explain select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +explain select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +--replace_column 9 # +explain select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +# queries +select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +# +# DISTINCT queries +# + +# plans +explain select distinct a1,a2,b from t1; +explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a'); +explain extended select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +explain select distinct b from t1 where (a2 >= 'b') and (b = 'a'); +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b'; +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e'; + +--replace_column 9 # +explain select distinct a1,a2,b from t2; +--replace_column 9 # +explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a'); +explain extended select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +--replace_column 9 # +explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +explain select distinct b from t2 where (a2 >= 'b') and (b = 'a'); +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b'; +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e'; + +# queries +select distinct a1,a2,b from t1; +select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a'); +select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +select distinct b from t1 where (a2 >= 'b') and (b = 'a'); +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b'; +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e'; + +select distinct a1,a2,b from t2; +select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a'); +select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +select distinct b from t2 where (a2 >= 'b') and (b = 'a'); +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b'; +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e'; + +# BUG #6303 +select distinct t_00.a1 +from t1 t_00 +where exists ( select * from t2 where a1 = t_00.a1 ); + +# BUG #8532 - SELECT DISTINCT a, a causes server to crash +select distinct a1,a1 from t1; +select distinct a2,a1,a2,a1 from t1; +select distinct t1.a1,t2.a1 from t1,t2; + + +# +# DISTINCT queries with GROUP-BY +# + +# plans +explain select distinct a1,a2,b from t1; +explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +explain select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +explain select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1; +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1; + +--replace_column 9 # +explain select distinct a1,a2,b from t2; +--replace_column 9 # +explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +--replace_column 9 # +explain select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +--replace_column 9 # +explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +--replace_column 9 # +explain select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +--replace_column 9 # +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1; +--replace_column 9 # +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1; + +# queries +select distinct a1,a2,b from t1; +select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1; +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1; + +select distinct a1,a2,b from t2; +select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1; +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1; + + +# +# COUNT (DISTINCT cols) queries +# + +explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); +explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a'); +explain extended select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a'); + +select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); +select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a'); +select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a'); + +# +# Queries with expressions in the select clause +# + +explain select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b; +explain select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b; +explain select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b; +explain select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +explain select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2; + +select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b; +select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b; +select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b; +select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2; + + +# +# Negative examples: queries that should NOT be treated as optimizable by +# QUICK_GROUP_MIN_MAX_SELECT +# + +# select a non-indexed attribute +explain select a1,a2,b,d,min(c),max(c) from t1 group by a1,a2,b; + +explain select a1,a2,b,d from t1 group by a1,a2,b; + +# predicate that references an attribute that is after the MIN/MAX argument +# in the index +explain extended select a1,a2,min(b),max(b) from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2; + +# predicate that references a non-indexed attribute +explain extended select a1,a2,b,min(c),max(c) from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b; + +explain extended select a1,a2,b,c from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b,c; + +# non-equality predicate for a non-group select attribute +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b < 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b' and b >= 'a') group by a1; +explain extended select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b; + +# non-group field with an equality predicate that references a keypart after the +# MIN/MAX argument +explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; +select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; + +# disjunction for a non-group select attribute +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1; + +# non-range predicate for the MIN/MAX attribute +explain select a1,a2,b,min(c),max(c) from t2 +where (c > 'a000') and (c <= 'd999') and (c like '_8__') group by a1,a2,b; + +# not all attributes are indexed by one index +explain select a1, a2, b, c, min(d), max(d) from t1 group by a1,a2,b,c; + +# other aggregate functions than MIN/MAX +explain select a1,a2,count(a2) from t1 group by a1,a2,b; +explain extended select a1,a2,count(a2) from t1 where (a1 > 'a') group by a1,a2,b; +explain extended select sum(ord(a1)) from t1 where (a1 > 'a') group by a1,a2,b; + +# test multi_range_groupby flag +set optimizer_switch = 'multi_range_groupby=off'; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1; +set optimizer_switch = 'default'; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1; + + +# +# Bug #16710: select distinct doesn't return all it should +# + +explain select distinct(a1) from t1 where ord(a2) = 98; +select distinct(a1) from t1 where ord(a2) = 98; + +# +# BUG#11044: DISTINCT or GROUP BY queries with equality predicates instead of MIN/MAX. +# + +explain select a1 from t1 where a2 = 'b' group by a1; +select a1 from t1 where a2 = 'b' group by a1; + +explain select distinct a1 from t1 where a2 = 'b'; +select distinct a1 from t1 where a2 = 'b'; + +# +# Bug #12672: primary key implcitly included in every innodb index +# +# Test case moved to group_min_max_innodb + + +# +# Bug #6142: a problem with the empty innodb table +# +# Test case moved to group_min_max_innodb + + +# +# Bug #9798: group by with rollup +# +# Test case moved to group_min_max_innodb + + +# +# Bug #13293 Wrongly used index results in endless loop. +# +# Test case moved to group_min_max_innodb + + +drop table t1,t2,t3; + +# +# Bug #14920 Ordering aggregated result sets with composite primary keys +# corrupts resultset +# +eval create table t1 (c1 int not null,c2 int not null, primary key(c1,c2)) engine=$engine; +insert into t1 (c1,c2) values +(10,1),(10,2),(10,3),(20,4),(20,5),(20,6),(30,7),(30,8),(30,9); +select distinct c1, c2 from t1 order by c2; +select c1,min(c2) as c2 from t1 group by c1 order by c2; +select c1,c2 from t1 group by c1,c2 order by c2; +drop table t1; + +# +# Bug #16203: Analysis for possible min/max optimization erroneously +# returns impossible range +# + +eval CREATE TABLE t1 (a varchar(5), b int(11), PRIMARY KEY (a,b)) engine=$engine; +INSERT INTO t1 VALUES ('AA',1), ('AA',2), ('AA',3), ('BB',1), ('AA',4); +OPTIMIZE TABLE t1; + +SELECT a FROM t1 WHERE a='AA' GROUP BY a; +SELECT a FROM t1 WHERE a='BB' GROUP BY a; + +EXPLAIN SELECT a FROM t1 WHERE a='AA' GROUP BY a; +EXPLAIN SELECT a FROM t1 WHERE a='BB' GROUP BY a; + +SELECT DISTINCT a FROM t1 WHERE a='BB'; +SELECT DISTINCT a FROM t1 WHERE a LIKE 'B%'; +SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a; + +DROP TABLE t1; + + +# +# Bug #15102: select distinct returns empty result, select count +# distinct > 0 (correct) +# + +CREATE TABLE t1 ( + a int(11) NOT NULL DEFAULT '0', + b varchar(16) COLLATE latin1_general_ci NOT NULL DEFAULT '', + PRIMARY KEY (a,b) + ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci; + +delimiter |; + +CREATE PROCEDURE a(x INT) +BEGIN + DECLARE rnd INT; + DECLARE cnt INT; + + WHILE x > 0 DO + SET rnd= x % 100; + SET cnt = (SELECT COUNT(*) FROM t1 WHERE a = rnd); + INSERT INTO t1(a,b) VALUES (rnd, CAST(cnt AS CHAR)); + SET x= x - 1; + END WHILE; +END| + +DELIMITER ;| + +CALL a(1000); + +SELECT a FROM t1 WHERE a=0; +SELECT DISTINCT a FROM t1 WHERE a=0; +SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0; + +DROP TABLE t1; +DROP PROCEDURE a; + +# +# Bug #18068: SELECT DISTINCT +# + +eval CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a)) engine=$engine; + +INSERT INTO t1 (a) VALUES + (''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'), + ('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'), + ('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN'); + +EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a; +SELECT DISTINCT a,a FROM t1 ORDER BY a; + +DROP TABLE t1; + +# +# Bug #21007: NATURAL JOIN (any JOIN (2 x NATURAL JOIN)) crashes the server +# + +eval CREATE TABLE t1 (id1 INT, id2 INT) engine=$engine; +eval CREATE TABLE t2 (id2 INT, id3 INT, id5 INT) engine=$engine; +eval CREATE TABLE t3 (id3 INT, id4 INT) engine=$engine; +eval CREATE TABLE t4 (id4 INT) engine=$engine; +eval CREATE TABLE t5 (id5 INT, id6 INT) engine=$engine; +eval CREATE TABLE t6 (id6 INT) engine=$engine; + +INSERT INTO t1 VALUES(1,1); +INSERT INTO t2 VALUES(1,1,1); +INSERT INTO t3 VALUES(1,1); +INSERT INTO t4 VALUES(1); +INSERT INTO t5 VALUES(1,1); +INSERT INTO t6 VALUES(1); + +# original bug query +SELECT * FROM +t1 + NATURAL JOIN +(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6) + ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)); + +# inner join swapped +SELECT * FROM +t1 + NATURAL JOIN +(((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6) on t3.id4 = t5.id5) JOIN t2 + ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)); + +# one join less, no ON cond +SELECT * FROM t1 NATURAL JOIN ((t3 join (t5 NATURAL JOIN t6)) JOIN t2); + +# wrong error message: 'id2' - ambiguous column +SELECT * FROM +(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6) + ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)) + NATURAL JOIN +t1; +SELECT * FROM +(t2 JOIN ((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6))) + NATURAL JOIN +t1; + +DROP TABLE t1,t2,t3,t4,t5,t6; + +# +# Bug#22342: No results returned for query using max and group by +# +eval CREATE TABLE t1 (a int, b int, PRIMARY KEY (a,b), KEY b (b)) engine=$engine; +INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3); +ANALYZE TABLE t1; + +explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a; +eval CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)) engine=$engine; +INSERT INTO t2 SELECT a,b,b FROM t1; +ANALYZE TABLE t2; +explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; +SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; + +DROP TABLE t1,t2; + +# +# Bug#24156: Loose index scan not used with CREATE TABLE ...SELECT and similar statements +# + +eval CREATE TABLE t1 (a INT, b INT, INDEX (a,b)) engine=$engine; +INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3), (1,4), (1,5), + (2,2), (2,3), (2,1), (3,1), (4,1), (4,2), (4,3), (4,4), (4,5), (4,6); +ANALYZE TABLE t1; +EXPLAIN SELECT max(b), a FROM t1 GROUP BY a; +FLUSH STATUS; +SELECT max(b), a FROM t1 GROUP BY a; +SHOW STATUS LIKE 'handler_read__e%'; +EXPLAIN SELECT max(b), a FROM t1 GROUP BY a; +FLUSH STATUS; +eval CREATE TABLE t2 engine=$engine SELECT max(b), a FROM t1 GROUP BY a; +SHOW STATUS LIKE 'handler_read__e%'; +FLUSH STATUS; +SELECT * FROM (SELECT max(b), a FROM t1 GROUP BY a) b; +SHOW STATUS LIKE 'handler_read__e%'; +FLUSH STATUS; +(SELECT max(b), a FROM t1 GROUP BY a) UNION + (SELECT max(b), a FROM t1 GROUP BY a); +SHOW STATUS LIKE 'handler_read__e%'; +EXPLAIN (SELECT max(b), a FROM t1 GROUP BY a) UNION + (SELECT max(b), a FROM t1 GROUP BY a); + +EXPLAIN SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x + FROM t1 AS t1_outer; +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE EXISTS + (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE + (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) > 12; +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE + a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING + a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +EXPLAIN SELECT 1 FROM t1 AS t1_outer1 JOIN t1 AS t1_outer2 + ON t1_outer1.a = (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) + AND t1_outer1.b = t1_outer2.b; +EXPLAIN SELECT (SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x + FROM t1 AS t1_outer) x2 FROM t1 AS t1_outer2; + +CREATE TABLE t3 LIKE t1; +FLUSH STATUS; +INSERT INTO t3 SELECT a,MAX(b) FROM t1 GROUP BY a; +SHOW STATUS LIKE 'handler_read__e%'; +DELETE FROM t3; +FLUSH STATUS; +INSERT INTO t3 SELECT 1, (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) + FROM t1 LIMIT 1; +SHOW STATUS LIKE 'handler_read__e%'; +FLUSH STATUS; +DELETE FROM t3 WHERE (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) > 10000; +SHOW STATUS LIKE 'handler_read__e%'; +FLUSH STATUS; +--error ER_SUBQUERY_NO_1_ROW +DELETE FROM t3 WHERE (SELECT (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) x + FROM t1) > 10000; +SHOW STATUS LIKE 'handler_read__e%'; + +DROP TABLE t1,t2,t3; + +# +# Bug#25602: queries with DISTINCT and SQL_BIG_RESULT hint +# for which loose scan optimization is applied +# + +eval CREATE TABLE t1 (a int, INDEX idx(a)) engine=$engine; +INSERT INTO t1 VALUES + (4), (2), (1), (2), (4), (2), (1), (4), + (4), (2), (1), (2), (2), (4), (1), (4); +ANALYZE TABLE t1; + +EXPLAIN SELECT DISTINCT(a) FROM t1; +SELECT DISTINCT(a) FROM t1; +EXPLAIN SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1; +SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1; + +DROP TABLE t1; + +# +# Bug #32268: Indexed queries give bogus MIN and MAX results +# + +eval CREATE TABLE t1 (a INT, b INT) engine=$engine; +INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3); +INSERT INTO t1 SELECT a + 1, b FROM t1; +INSERT INTO t1 SELECT a + 2, b FROM t1; +ANALYZE TABLE t1; + +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; + +CREATE INDEX break_it ON t1 (a, b); + +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a; +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a; + +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; + +EXPLAIN +SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC; +SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC; + +DROP TABLE t1; + +# +# Bug#38195: Incorrect handling of aggregate functions when loose index scan is +# used causes server crash. +# +create table t1 (a int, b int, primary key (a,b), key `index` (a,b)) engine=MyISAM; +insert into t1 (a,b) values +(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6), + (0,7),(0,8),(0,9),(0,10),(0,11),(0,12),(0,13), +(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6), + (1,7),(1,8),(1,9),(1,10),(1,11),(1,12),(1,13), +(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6), + (2,7),(2,8),(2,9),(2,10),(2,11),(2,12),(2,13), +(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6), + (3,7),(3,8),(3,9),(3,10),(3,11),(3,12),(3,13); +insert into t1 (a,b) select a, max(b)+1 from t1 where a = 0 group by a; +ANALYZE TABLE t1; +select * from t1; +explain extended select sql_buffer_result a, max(b)+1 from t1 where a = 0 group by a; +drop table t1; + + +# +# Bug #41610: key_infix_len can be overwritten causing some group by queries +# to return no rows +# + +eval CREATE TABLE t1 (a int, b int, c int, d int, + KEY foo (c,d,a,b), KEY bar (c,a,b,d)) engine=$engine; + +INSERT INTO t1 VALUES (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 1, 3), (1, 1, 1, 4); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT a,b,c+1,d FROM t1; +ANALYZE TABLE t1; + +#Should be non-empty +EXPLAIN SELECT DISTINCT c FROM t1 WHERE d=4; +SELECT DISTINCT c FROM t1 WHERE d=4; + +DROP TABLE t1; + +--echo # +--echo # Bug #45386: Wrong query result with MIN function in field list, +--echo # WHERE and GROUP BY clause +--echo # + +eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; +INSERT INTO t SELECT * FROM t; +ANALYZE TABLE t; + +--echo # test MIN +--echo #should use range with index for group by +EXPLAIN +SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a; +--echo #should return 1 row +SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a; + +--echo # test MAX +--echo #should use range with index for group by +EXPLAIN +SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a; +--echo #should return 1 row +SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a; + +--echo # test 3 ranges and use the middle one +INSERT INTO t SELECT a, 2 FROM t; + +--echo #should use range with index for group by +EXPLAIN +SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a; +--echo #should return 1 row +SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a; + +DROP TABLE t; + +--echo # +--echo # Bug #48472: Loose index scan inappropriately chosen for some WHERE +--echo # conditions +--echo # + +eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; +ANALYZE TABLE t; + +SELECT a, MAX(b) FROM t WHERE 0=b+0 GROUP BY a; + +DROP TABLE t; + +--echo End of 5.0 tests + +--echo # +--echo # Bug #46607: Assertion failed: (cond_type == Item::FUNC_ITEM) results in +--echo # server crash +--echo # + +eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; + +SELECT a, MAX(b) FROM t WHERE b GROUP BY a; + +DROP TABLE t; + +# +# BUG#49902 - SELECT returns incorrect results +# +eval CREATE TABLE t1(a INT NOT NULL, b INT NOT NULL, KEY (b)) engine=$engine; +INSERT INTO t1 VALUES(1,1),(2,1); +ANALYZE TABLE t1; +SELECT 1 AS c, b FROM t1 WHERE b IN (1,2) GROUP BY c, b; +SELECT a FROM t1 WHERE b=1; +DROP TABLE t1; + +--echo # +--echo # Bug#47762: Incorrect result from MIN() when WHERE tests NOT NULL column +--echo # for NULL +--echo # + +--echo ## Test for NULLs allowed +eval CREATE TABLE t1 ( a INT, KEY (a) ) engine=$engine; +INSERT INTO t1 VALUES (1), (2), (3); +ANALYZE TABLE t1; +--source include/min_null_cond.inc +INSERT INTO t1 VALUES (NULL), (NULL); +ANALYZE TABLE t1; +--source include/min_null_cond.inc +DROP TABLE t1; + +--echo ## Test for NOT NULLs +eval CREATE TABLE t1 ( a INT NOT NULL PRIMARY KEY) engine=$engine; +INSERT INTO t1 VALUES (1), (2), (3); +ANALYZE TABLE t1; +--echo # +--echo # NULL-safe operator test disabled for non-NULL indexed columns. +--echo # +--echo # See bugs +--echo # +--echo # - Bug#52173: Reading NULL value from non-NULL index gives +--echo # wrong result in embedded server +--echo # +--echo # - Bug#52174: Sometimes wrong plan when reading a MAX value from +--echo # non-NULL index +--echo # +--let $skip_null_safe_test= 1 +--source include/min_null_cond.inc +DROP TABLE t1; + +--echo # +--echo # Bug#53859: Valgrind: opt_sum_query(TABLE_LIST*, List<Item>&, Item*) at +--echo # opt_sum.cc:305 +--echo # +eval CREATE TABLE t1 ( a INT, KEY (a) ) engine=$engine; +INSERT INTO t1 VALUES (1), (2), (3); + +SELECT MIN( a ) AS min_a +FROM t1 +WHERE a > 1 AND a IS NULL +ORDER BY min_a; + +DROP TABLE t1; + + +--echo End of 5.1 tests + + +--echo # +--echo # WL#3220 (Loose index scan for COUNT DISTINCT) +--echo # + +eval CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b)) engine=$engine; +INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1); +INSERT INTO t1 SELECT a, b + 4, 1 FROM t1; +INSERT INTO t1 SELECT a + 1, b, 1 FROM t1; +ANALYZE TABLE t1; +eval CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, KEY (a,b,c)) engine=$engine; +INSERT INTO t2 VALUES (1,1,1,1,1,1), (1,2,1,1,1,1), (1,3,1,1,1,1), + (1,4,1,1,1,1); +INSERT INTO t2 SELECT a, b + 4, c,d,e,f FROM t2; +INSERT INTO t2 SELECT a + 1, b, c,d,e,f FROM t2; +ANALYZE TABLE t2; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1; +SELECT COUNT(DISTINCT a) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1; +SELECT COUNT(DISTINCT a,b) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1; +SELECT COUNT(DISTINCT b,a) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT b) FROM t1; +SELECT COUNT(DISTINCT b) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a; +SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a; + +EXPLAIN SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a; +SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b; +SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b; + +EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1; +SELECT DISTINCT COUNT(DISTINCT a) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1; +SELECT COUNT(DISTINCT a, b + 0) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10; +SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10; +SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10; + +EXPLAIN SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10; +SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10; + +EXPLAIN SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1; +SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1; + +EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a; +SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a; + +EXPLAIN SELECT COUNT(DISTINCT a), 12 FROM t1; +SELECT COUNT(DISTINCT a), 12 FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a, b, c) FROM t2; +SELECT COUNT(DISTINCT a, b, c) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2; +SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2; +SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2; +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2; +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2; +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2; + +EXPLAIN SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c; +SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c; + +EXPLAIN SELECT COUNT(DISTINCT c, a, b) FROM t2 + WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c; +SELECT COUNT(DISTINCT c, a, b) FROM t2 + WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c; + +EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5 + GROUP BY b; +SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5 + GROUP BY b; + +EXPLAIN SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; +SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; + +EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; +SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; + +EXPLAIN SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42; +SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42; + +EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2 + WHERE b = 13 AND c = 42 GROUP BY a; +SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2 + WHERE b = 13 AND c = 42 GROUP BY a; + +--echo # This query could have been resolved using loose index scan since +--echo # the second part of count(..) is defined by a constant predicate +EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; +SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; + +EXPLAIN SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a; +SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a; + +EXPLAIN SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c; +SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c; + +EXPLAIN SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a; +SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a; + +DROP TABLE t1,t2; + +--echo # end of WL#3220 tests + +--echo # +--echo # Bug#50539: Wrong result when loose index scan is used for an aggregate +--echo # function with distinct +--echo # +eval CREATE TABLE t1 ( + f1 int(11) NOT NULL DEFAULT '0', + f2 char(1) NOT NULL DEFAULT '', + PRIMARY KEY (f1,f2) +) engine=$engine; +insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'), +(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D'); +ANALYZE TABLE t1; + +SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; + +drop table t1; +--echo # End of test#50539. + +--echo # +--echo # Bug#17217128 - BAD INTERACTION BETWEEN MIN/MAX AND +--echo # "HAVING SUM(DISTINCT)": WRONG RESULTS. +--echo # + +eval CREATE TABLE t (a INT, b INT, KEY(a,b)) engine=$engine; +INSERT INTO t VALUES (1,1), (2,2), (3,3), (4,4), (1,0), (3,2), (4,5); +ANALYZE TABLE t; +let $DEFAULT_TRACE_MEM_SIZE=1048576; # 1MB +eval set optimizer_trace_max_mem_size=$DEFAULT_TRACE_MEM_SIZE; +set @@session.optimizer_trace='enabled=on'; +set end_markers_in_json=on; + +ANALYZE TABLE t; + +SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a; +EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a; +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a; +EXPLAIN SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a; +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a); +EXPLAIN SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a); +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t; +EXPLAIN SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t; +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a; +EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a; +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SET optimizer_trace_max_mem_size=DEFAULT; +SET optimizer_trace=DEFAULT; +SET end_markers_in_json=DEFAULT; + +DROP TABLE t; + +--echo # +--echo # Bug#18109609: LOOSE INDEX SCAN IS NOT USED WHEN IT SHOULD +--echo # + +eval CREATE TABLE t1 ( +id INT AUTO_INCREMENT PRIMARY KEY, +c1 INT, +c2 INT, +KEY(c1,c2)) engine=$engine; + +INSERT INTO t1(c1,c2) VALUES +(1, 1), (1,2), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), (4,2), (4,3), +(4,4), (4,5), (4,6), (4,7), (4,8), (4,9), (4,10), (4,11), (4,12), (4,13), +(4,14), (4,15), (4,16), (4,17), (4,18), (4,19), (4,20),(5,5); +ANALYZE TABLE t1; + +EXPLAIN SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1; +FLUSH STATUS; +SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1; +SHOW SESSION STATUS LIKE 'Handler_read%'; + +DROP TABLE t1; + +--echo # End of test for Bug#18109609 diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc new file mode 100644 index 00000000000..d9b4b46b25a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc @@ -0,0 +1,23 @@ +# Common test pattern for options that control direct i/o +# +# Required input: +# $io_option - name and assignment to enable on server command line + +--perl +use Cwd 'abs_path'; + +open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/data_in_shm.inc") or die; +my $real_path= abs_path($ENV{'MYSQLTEST_VARDIR'}); +my $in_shm= (index($real_path, "/dev/shm") != -1) || + (index($real_path, "/run/shm") != -1); +print FILE "let \$DATA_IN_SHM= $in_shm;\n"; +close FILE; +EOF + +--source $MYSQL_TMP_DIR/data_in_shm.inc +--remove_file $MYSQL_TMP_DIR/data_in_shm.inc + +if ($DATA_IN_SHM) +{ + --skip DATADIR is in /{dev|run}/shm, possibly due to --mem +} diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_write_prepared.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_write_prepared.inc new file mode 100644 index 00000000000..df088ea047d --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/have_write_prepared.inc @@ -0,0 +1,3 @@ +if (`select count(*) = 0 from information_schema.session_variables where variable_name = 'rocksdb_write_policy' and variable_value = 'write_prepared';`) { + --skip Test requires write_prepared policy +} diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc index c23717c4fda..34947cb0ecb 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc @@ -52,8 +52,9 @@ let $ID = `SELECT connection_id()`; send SELECT * FROM t0 WHERE value > 0 FOR UPDATE; connection con2; -let $wait_condition = SELECT 1 FROM information_schema.processlist - WHERE id = $ID AND state = "Sending data"; +let $wait_condition = + SELECT 1 FROM information_schema.processlist + WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data"; --source include/wait_condition.inc eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; UPDATE t0 SET VALUE=VALUE+1 WHERE id=190000; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc index da80f796750..8140b81a95e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc @@ -52,8 +52,9 @@ let $ID = `SELECT connection_id()`; send SELECT * FROM t0 WHERE value > 0 FOR UPDATE; connection con2; -let $wait_condition = SELECT 1 FROM information_schema.processlist - WHERE id = $ID AND state = "Sending data"; +let $wait_condition = + SELECT 1 FROM information_schema.processlist + WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data"; --source include/wait_condition.inc eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; INSERT INTO t0 VALUES(200001,1), (-1,1); diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc index b77a54e4360..e28f1c90b3a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc @@ -55,8 +55,9 @@ let $ID = `SELECT connection_id()`; send SELECT * FROM t0 WHERE value > 0 FOR UPDATE; connection con2; -let $wait_condition = SELECT 1 FROM information_schema.processlist - WHERE id = $ID AND state = "Sending data"; +let $wait_condition = + SELECT 1 FROM information_schema.processlist + WHERE (id = $ID /* OR srv_id = $ID*/) AND state = "Sending data"; --source include/wait_condition.inc eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; BEGIN; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc index 9494146ba5c..13ceca07913 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc @@ -55,8 +55,9 @@ let $ID = `SELECT connection_id()`; send SELECT * FROM t0 WHERE value > 0 FOR UPDATE; connection con2; -let $wait_condition = SELECT 1 FROM information_schema.processlist - WHERE id = $ID AND state = "Sending data"; +let $wait_condition = + SELECT 1 FROM information_schema.processlist + WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data"; --source include/wait_condition.inc eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; BEGIN; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc b/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc deleted file mode 100644 index 71e713226d7..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc +++ /dev/null @@ -1,53 +0,0 @@ -# Usage: -# -# let $order = ASC; # or DESC -# let $comment = "rev:cf2"; # or "" -# --source suite/rocksdb/include/rocksdb_concurrent_delete.inc - -let $first_row = -1; # Error this should never happen -if ($order == 'ASC') -{ - let $first_row = 1; -} -if ($order == 'DESC') -{ - let $first_row = 3; -} - -connect (con, localhost, root,,); -connection default; - ---disable_warnings -SET debug_sync='RESET'; -DROP TABLE IF EXISTS t1; ---enable_warnings - -eval CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT $comment, a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3); - -# This will cause the SELECT to block after finding the first row, but -# before locking and reading it. -connection con; -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE; - -# While that connection is waiting, delete the first row (the one con -# is about to lock and read -connection default; -SET debug_sync='now WAIT_FOR parked'; -eval DELETE FROM t1 WHERE pk = $first_row; - -# Signal the waiting select to continue -SET debug_sync='now SIGNAL go'; - -# Now get the results from the select. The first entry (1,1) (or (3,3) when -# using reverse ordering) should be missing. Prior to the fix the SELECT -# would have returned: "1815: Internal error: NotFound:" -connection con; -reap; - -# Cleanup -connection default; -disconnect con; -set debug_sync='RESET'; -drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc new file mode 100644 index 00000000000..6e427f26fd6 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc @@ -0,0 +1,24 @@ +# Common test pattern for options that control direct i/o +# +# Required input: +# $io_option - name and assignment to enable on server command line + +--source include/have_direct_io.inc + +--echo Checking direct reads +--let $restart_noprint=2 +--let $_mysqld_option=$io_option +--source include/restart_mysqld_with_option.inc + +CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb; +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (1, 1,'a'); +INSERT INTO t1 (a,b) VALUES (2,'b'); +set global rocksdb_force_flush_memtable_now=1; +--sorted_result +SELECT a,b FROM t1; +DROP TABLE t1; + +# cleanup +--let _$mysqld_option= +--source include/restart_mysqld.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result index 722edadb4da..a6bb9eb64c7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result @@ -27,6 +27,12 @@ select IF(variable_value - @b3 between 1000 and 1500, 'OK', variable_value - @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced'; Rocksdb_wal_synced OK +set debug_dbug='+d,rocksdb_enable_delay_commits'; +create table dummy10(a int) engine=rocksdb; +Warnings: +Warning 1210 enable_delay_commits_mode ON +drop table dummy10; +set debug_dbug='-d,rocksdb_enable_delay_commits'; ## ## 2PC + durability + group commit ## @@ -45,6 +51,12 @@ select IF(variable_value - @b3 between 1 and 9000, 'OK', variable_value - @b3) from information_schema.global_status where variable_name='Rocksdb_wal_synced'; IF(variable_value - @b3 between 1 and 9000, 'OK', variable_value - @b3) OK +set debug_dbug='+d,rocksdb_disable_delay_commits'; +create table dummy10(a int) engine=rocksdb; +Warnings: +Warning 1210 enable_delay_commits_mode OFF +drop table dummy10; +set debug_dbug='-d,rocksdb_disable_delay_commits'; ## # 2PC enabled, MyRocks durability disabled, single thread ## diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result index 0617232f1e3..e7883f7e03e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result @@ -17,7 +17,7 @@ ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; set @tmp= @@rocksdb_max_row_locks; set session rocksdb_max_row_locks=1000; ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; -ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to max_num_locks limit' from ROCKSDB +ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to rocksdb_max_row_locks limit' from ROCKSDB set session rocksdb_bulk_load=1; ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; set session rocksdb_bulk_load=0; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result index f59b841a595..cc47ceff7ca 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result @@ -176,3 +176,24 @@ INSERT INTO t1 (a) VALUES (1); UPDATE t1 SET pk = 3; ALTER TABLE t1 AUTO_INCREMENT 2; DROP TABLE t1; +#---------------------------------- +# Issue #902 Debug assert in autoincrement with small field type +#---------------------------------- +SET auto_increment_increment=100, auto_increment_offset=10; +CREATE TABLE t1(i INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615; +INSERT INTO t1 VALUES (NULL); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +i +ALTER TABLE t1 AUTO_INCREMENT=1; +INSERT INTO t1 VALUES (NULL); +SELECT * FROM t1; +i +10 +ALTER TABLE t1 AUTO_INCREMENT=18446744073709551615; +INSERT INTO t1 VALUES (NULL); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +i +10 +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result index db64778d345..9331b4e22a5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result @@ -12,17 +12,18 @@ SET auto_increment_increment = 2; SET auto_increment_offset = 1; connect con3, localhost, root,,; connection con1; +SET debug_sync='rocksdb.autoinc_vars2 SIGNAL go2'; SET debug_sync='rocksdb.autoinc_vars SIGNAL parked1 WAIT_FOR go1'; INSERT INTO t1 VALUES(NULL); +connection default; +SET debug_sync='now WAIT_FOR parked1'; connection con2; SET debug_sync='rocksdb.autoinc_vars SIGNAL parked2 WAIT_FOR go2'; INSERT INTO t1 VALUES(NULL); connection default; -SET debug_sync='now WAIT_FOR parked1'; SET debug_sync='now WAIT_FOR parked2'; SET debug_sync='now SIGNAL go1'; connection con3; -SET debug_sync='now SIGNAL go2'; connection default; connection con1; connection con2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_without_tx_api.result b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result index a3fc25cc81b..973d1876fa0 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_without_tx_api.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result @@ -3,6 +3,7 @@ Warnings: Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. [connection master] +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key; set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api; DROP TABLE IF EXISTS t1,t2; @@ -17,6 +18,7 @@ SELECT count(*) FROM t1; count(*) 9000 include/sync_slave_sql_with_master.inc +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT count(*) FROM t1; count(*) 9000 @@ -71,14 +73,14 @@ count(*) call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*"); call mtr.add_suppression("Slave: Can't find record in 't1'.*"); include/wait_for_slave_sql_error.inc [errno=1032] -set @save_rocksdb_read_free_rpl_tables=@@global.rocksdb_read_free_rpl_tables; -set global rocksdb_read_free_rpl_tables="t.*"; +set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl; +set global rocksdb_read_free_rpl=PK_SK; START SLAVE; include/sync_slave_sql_with_master.inc SELECT count(*) FROM t1; count(*) 7000 -set global rocksdb_read_free_rpl_tables=@save_rocksdb_read_free_rpl_tables; +set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl; SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key; SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api; DROP TABLE t1, t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result new file mode 100644 index 00000000000..683b672e360 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result @@ -0,0 +1,87 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key; +set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api; +DROP TABLE IF EXISTS t1,t2; +create table t1 (id int primary key, value int, value2 varchar(200)) engine=rocksdb; +create table t2 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb; +SET session rocksdb_blind_delete_primary_key=1; +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +variable_value-@c +1000 +SELECT count(*) FROM t1; +count(*) +9000 +include/sync_slave_sql_with_master.inc +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SELECT count(*) FROM t1; +count(*) +9000 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +variable_value-@c +0 +SELECT count(*) FROM t2; +count(*) +9000 +SET session rocksdb_master_skip_tx_api=1; +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +variable_value-@c +1000 +SELECT count(*) FROM t1; +count(*) +8000 +SELECT count(*) FROM t2; +count(*) +8000 +include/sync_slave_sql_with_master.inc +SELECT count(*) FROM t1; +count(*) +8000 +SELECT count(*) FROM t2; +count(*) +8000 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +DELETE FROM t1 WHERE id BETWEEN 3001 AND 4000; +DELETE FROM t2 WHERE id BETWEEN 3001 AND 4000; +select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +variable_value-@c +0 +SELECT count(*) FROM t1; +count(*) +7000 +SELECT count(*) FROM t2; +count(*) +7000 +include/sync_slave_sql_with_master.inc +SELECT count(*) FROM t1; +count(*) +7000 +SELECT count(*) FROM t2; +count(*) +7000 +DELETE FROM t1 WHERE id = 10; +SELECT count(*) FROM t1; +count(*) +7000 +call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*"); +call mtr.add_suppression("Slave: Can't find record in 't1'.*"); +include/wait_for_slave_sql_error.inc [errno=1032] +set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl; +set global rocksdb_read_free_rpl=PK_SK; +START SLAVE; +include/sync_slave_sql_with_master.inc +SELECT count(*) FROM t1; +count(*) +7000 +set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl; +SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key; +SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api; +DROP TABLE t1, t2; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result index 773fb68e07e..d447b480f44 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result @@ -21,6 +21,24 @@ id1 id2 link_type visibility data time version select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; case when variable_value-@c > 0 then 'true' else 'false' end true +# MariaDB: we don't have optimizer_force_index_for_range, but we can use EITS +# to get the query plan we want. +set @tmp_use_stat_tables= @@use_stat_tables; +set use_stat_tables='preferably'; +analyze table linktable persistent for all; +Table Op Msg_type Msg_text +test.linktable analyze status Engine-independent statistics collected +test.linktable analyze status OK +flush tables; +explain select * from linktable; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE linktable ALL NULL NULL NULL NULL 10000 +# This must use range(id1_type2), key_len=24 +explain +select id1, id2, link_type, visibility, data, time, version from linktable +FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE linktable range id1_type2 id1_type2 24 NULL 1000 Using where; Using index select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc; id1 id2 link_type visibility data time version diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result index 4f6702b85a7..daf4f5e30ba 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result @@ -59,4 +59,27 @@ insert into t4 values (1, 0xFFFF, 0xFFF, 12345); # This must not fail an assert: select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; pk kp1 kp2 col1 -drop table t1,t2,t3,t4; +# +# Issue #881: Issue #809 still occurs for reverse scans on forward cfs +# +create table t5 ( +id1 bigint not null, +id2 bigint not null, +id3 varchar(100) not null, +id4 int not null, +id5 int not null, +value bigint, +value2 varchar(100), +primary key (id1, id2, id3, id4) COMMENT 'bf5_1' +) engine=ROCKSDB; +insert into t5 select * from t1; +set global rocksdb_force_flush_memtable_now=1; +# An index scan starting from the end of the table: +explain +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t5 index NULL PRIMARY 122 NULL 1 +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +drop table t1,t2,t3,t4,t5; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result index 2a5f63f7bf7..08863a078ae 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result @@ -22,19 +22,20 @@ KEY(a) ) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 -connection default; +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= +'cf1={write_buffer_size=8m;target_file_size_base=1m};'; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE <input_file> INTO TABLE t1; +pk a b LOAD DATA INFILE <input_file> INTO TABLE t2; +pk a b LOAD DATA INFILE <input_file> INTO TABLE t3; +pk a b set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result index e5b3612d6a4..be01a4f154f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result @@ -22,19 +22,20 @@ KEY(a) ) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 -connection default; +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= +'cf1={write_buffer_size=8m;target_file_size_base=1m};'; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE <input_file> INTO TABLE t1; +pk a b LOAD DATA INFILE <input_file> INTO TABLE t2; +pk a b LOAD DATA INFILE <input_file> INTO TABLE t3; +pk a b set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result index 36c81b7eb21..bb602210e09 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result @@ -22,19 +22,20 @@ KEY(a) ) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 -connection default; +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= +'cf1={write_buffer_size=8m;target_file_size_base=1m};'; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE <input_file> INTO TABLE t1; +pk a b LOAD DATA INFILE <input_file> INTO TABLE t2; +pk a b LOAD DATA INFILE <input_file> INTO TABLE t3; +pk a b set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result index 685d3d25f19..948b9562d20 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result @@ -22,19 +22,20 @@ KEY(a) ) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 -connection default; +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= +'cf1={write_buffer_size=8m;target_file_size_base=1m};'; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE <input_file> INTO TABLE t1; +pk a b LOAD DATA INFILE <input_file> INTO TABLE t2; +pk a b LOAD DATA INFILE <input_file> INTO TABLE t3; +pk a b set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result index 632b3b47eb5..2c380a430e7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result @@ -59,13 +59,10 @@ CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 connection default; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result index b5a56b21f5e..24c7909b06d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result @@ -59,13 +59,10 @@ CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 connection default; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result new file mode 100644 index 00000000000..1f687dfec53 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result @@ -0,0 +1,693 @@ +CREATE TABLE `link_table` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id1_type` int(10) unsigned NOT NULL DEFAULT '0' , +`id2` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id2_type` int(10) unsigned NOT NULL DEFAULT '0' , +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , +`visibility` tinyint(3) NOT NULL DEFAULT '0' , +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , +`time` int(10) unsigned NOT NULL DEFAULT '0' , +`version` bigint(20) unsigned NOT NULL DEFAULT '0' , +PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' , +KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , +`version` , `data`) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +CREATE TABLE `link_table2` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id1_type` int(10) unsigned NOT NULL DEFAULT '0' , +`id2` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id2_type` int(10) unsigned NOT NULL DEFAULT '0' , +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , +`visibility` tinyint(3) NOT NULL DEFAULT '0' , +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , +`time` int(10) unsigned NOT NULL DEFAULT '0' , +`version` bigint(20) unsigned NOT NULL DEFAULT '0' , +PRIMARY KEY (`link_type` , `id1` , `id2`) +COMMENT 'cf_link' , +KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , +`version` , `data`) COMMENT 'cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9; +insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125); +insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125); +insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125); +insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125); +insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125); +insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125); +insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125); +insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125); +insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125); +insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125); +insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125); +insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125); +insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125); +insert into link_table2 select * from link_table; +CREATE TABLE `id_table` ( +`id` bigint(20) NOT NULL DEFAULT '0', +`type` int(11) NOT NULL DEFAULT '0', +`row_created_time` int(11) NOT NULL DEFAULT '0', +`hash_key` varchar(255) NOT NULL DEFAULT '', +`is_deleted` tinyint(4) DEFAULT '0', +PRIMARY KEY (`id`), +KEY `type_id` (`type`,`id`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +KEY_BLOCK_SIZE=8; +insert into id_table values (1, 1, 10, '111', 0); +insert into id_table values (2, 1, 10, '111', 1); +insert into id_table values (3, 1, 10, '111', 0); +insert into id_table values (4, 1, 10, '111', 1); +insert into id_table values (5, 1, 10, '111', 0); +insert into id_table values (6, 1, 10, '111', 1); +insert into id_table values (7, 1, 10, '111', 0); +insert into id_table values (8, 1, 10, '111', 1); +insert into id_table values (9, 1, 10, '111', 0); +insert into id_table values (10, 1, 10, '111', 1); +CREATE TABLE `node_table` ( +`id` bigint(20) unsigned NOT NULL DEFAULT '0', +`type` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +`update_time` int(10) unsigned NOT NULL DEFAULT '0', +`data` mediumtext COLLATE latin1_bin NOT NULL, +PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id', +KEY `id` (`id`) COMMENT 'cf_node' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into node_table values (1, 1, 1, 10, 'data'); +insert into node_table values (2, 1, 1, 10, 'data'); +insert into node_table values (3, 1, 1, 10, 'data'); +insert into node_table values (4, 1, 1, 10, 'data'); +insert into node_table values (5, 1, 1, 10, 'data'); +insert into node_table values (6, 1, 1, 10, 'data'); +insert into node_table values (7, 1, 1, 10, 'data'); +insert into node_table values (8, 1, 1, 10, 'data'); +insert into node_table values (9, 1, 1, 10, 'data'); +insert into node_table values (10, 1, 1, 10, 'data'); +CREATE TABLE `count_table` ( +`id` bigint(20) unsigned NOT NULL DEFAULT '0', +`type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`count` int(10) unsigned NOT NULL DEFAULT '0', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into count_table values (2, 1, 1, 1, 10, 20); +insert into count_table values (3, 1, 1, 1, 10, 20); +insert into count_table values (4, 1, 1, 1, 10, 20); +insert into count_table values (5, 1, 1, 1, 10, 20); +insert into count_table values (6, 1, 1, 1, 10, 20); +insert into count_table values (7, 1, 1, 1, 10, 20); +insert into count_table values (8, 1, 1, 1, 10, 20); +insert into count_table values (9, 1, 1, 1, 10, 20); +insert into count_table values (10, 1, 1, 1, 10, 20); +CREATE TABLE `link_table5` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(3) NOT NULL DEFAULT '0', +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); +CREATE TABLE `link_table3` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(4) NOT NULL DEFAULT '0', +`data` text COLLATE latin1_bin NOT NULL, +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`) +COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; +insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); +CREATE TABLE `link_table6` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(4) NOT NULL DEFAULT '0', +`data` text COLLATE latin1_bin NOT NULL, +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`, +`data`(255)) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; +insert into link_table6 values (1, 1, 2, 2, 1, 1, +'data12_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 3, 2, 1, 2, +'data13_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 4, 2, 1, 2, +'data14_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 5, 2, 1, 1, +'data15_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 1, 2, 1, 1, +'data21_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 2, 2, 1, 1, +'data22_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 3, 2, 1, 1, +'data32_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +CREATE TABLE `link_table4` ( +`id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', +`raw_key` text COLLATE latin1_bin, +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(3) NOT NULL DEFAULT '0', +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) +COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1); +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version +from link_table WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +# Point query +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +1 3 1 2 a11 125 +1 4 1 2 a11 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1) and id2 IN (2) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +1 3 1 2 a11 125 +1 4 1 2 a11 125 +2 2 1 2 a10 125 +2 3 1 2 a11 125 +2 4 1 2 a11 125 +# Prefix range query +# Prefix range query with SK +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME DESC LIMIT 10; +id1 id2 link_type visibility data time version +1 2 3 3 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME ASC LIMIT 10; +id1 id2 link_type visibility data time version +1 2 3 3 a10 10 125 +# Prefix range query with SK with limits +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,10; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,5; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,1; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,10; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,5; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,1; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,10; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,5; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,1; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,10; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,5; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,1; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,0; +id1 id2 link_type visibility data time version +# Prefix range query with PK +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +id1 id2 link_type +1 10 3 +1 9 3 +1 8 3 +1 7 3 +1 6 3 +1 5 3 +1 4 3 +1 3 3 +1 2 3 +1 1 3 +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; +id1 id2 link_type +1 1 3 +1 2 3 +1 3 3 +1 4 3 +1 5 3 +1 6 3 +1 7 3 +1 8 3 +1 9 3 +1 10 3 +# Prefix range query with PK + value +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 5 3 3 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 2 3 3 a10 10 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; +id1 id2 link_type visibility data time version +1 1 3 4 a10 10 125 +1 2 3 3 a10 10 125 +1 3 3 4 a11 11 125 +1 4 3 4 a11 11 125 +1 5 3 3 a12 12 125 +1 6 3 4 a12 12 125 +1 7 3 4 a12 12 125 +1 8 3 4 a13 13 125 +1 9 3 4 a14 14 125 +1 10 3 4 a15 15 125 +# Transaction +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +COMMIT; +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 cde 125 +ROLLBACK; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +# Data types +SELECT /*+ bypass */ id1 FROM link_table where link_type="3"; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1"; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1'; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01'; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL; +id1 +DROP TABLE count_table; +DROP TABLE link_table; +DROP TABLE link_table3; +DROP TABLE link_table2; +DROP TABLE id_table; +DROP TABLE node_table; +DROP TABLE link_table5; +DROP TABLE link_table6; +DROP TABLE link_table4; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result new file mode 100644 index 00000000000..1f687dfec53 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result @@ -0,0 +1,693 @@ +CREATE TABLE `link_table` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id1_type` int(10) unsigned NOT NULL DEFAULT '0' , +`id2` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id2_type` int(10) unsigned NOT NULL DEFAULT '0' , +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , +`visibility` tinyint(3) NOT NULL DEFAULT '0' , +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , +`time` int(10) unsigned NOT NULL DEFAULT '0' , +`version` bigint(20) unsigned NOT NULL DEFAULT '0' , +PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' , +KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , +`version` , `data`) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +CREATE TABLE `link_table2` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id1_type` int(10) unsigned NOT NULL DEFAULT '0' , +`id2` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id2_type` int(10) unsigned NOT NULL DEFAULT '0' , +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , +`visibility` tinyint(3) NOT NULL DEFAULT '0' , +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , +`time` int(10) unsigned NOT NULL DEFAULT '0' , +`version` bigint(20) unsigned NOT NULL DEFAULT '0' , +PRIMARY KEY (`link_type` , `id1` , `id2`) +COMMENT 'cf_link' , +KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , +`version` , `data`) COMMENT 'cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9; +insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125); +insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125); +insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125); +insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125); +insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125); +insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125); +insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125); +insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125); +insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125); +insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125); +insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125); +insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125); +insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125); +insert into link_table2 select * from link_table; +CREATE TABLE `id_table` ( +`id` bigint(20) NOT NULL DEFAULT '0', +`type` int(11) NOT NULL DEFAULT '0', +`row_created_time` int(11) NOT NULL DEFAULT '0', +`hash_key` varchar(255) NOT NULL DEFAULT '', +`is_deleted` tinyint(4) DEFAULT '0', +PRIMARY KEY (`id`), +KEY `type_id` (`type`,`id`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +KEY_BLOCK_SIZE=8; +insert into id_table values (1, 1, 10, '111', 0); +insert into id_table values (2, 1, 10, '111', 1); +insert into id_table values (3, 1, 10, '111', 0); +insert into id_table values (4, 1, 10, '111', 1); +insert into id_table values (5, 1, 10, '111', 0); +insert into id_table values (6, 1, 10, '111', 1); +insert into id_table values (7, 1, 10, '111', 0); +insert into id_table values (8, 1, 10, '111', 1); +insert into id_table values (9, 1, 10, '111', 0); +insert into id_table values (10, 1, 10, '111', 1); +CREATE TABLE `node_table` ( +`id` bigint(20) unsigned NOT NULL DEFAULT '0', +`type` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +`update_time` int(10) unsigned NOT NULL DEFAULT '0', +`data` mediumtext COLLATE latin1_bin NOT NULL, +PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id', +KEY `id` (`id`) COMMENT 'cf_node' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into node_table values (1, 1, 1, 10, 'data'); +insert into node_table values (2, 1, 1, 10, 'data'); +insert into node_table values (3, 1, 1, 10, 'data'); +insert into node_table values (4, 1, 1, 10, 'data'); +insert into node_table values (5, 1, 1, 10, 'data'); +insert into node_table values (6, 1, 1, 10, 'data'); +insert into node_table values (7, 1, 1, 10, 'data'); +insert into node_table values (8, 1, 1, 10, 'data'); +insert into node_table values (9, 1, 1, 10, 'data'); +insert into node_table values (10, 1, 1, 10, 'data'); +CREATE TABLE `count_table` ( +`id` bigint(20) unsigned NOT NULL DEFAULT '0', +`type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`count` int(10) unsigned NOT NULL DEFAULT '0', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into count_table values (2, 1, 1, 1, 10, 20); +insert into count_table values (3, 1, 1, 1, 10, 20); +insert into count_table values (4, 1, 1, 1, 10, 20); +insert into count_table values (5, 1, 1, 1, 10, 20); +insert into count_table values (6, 1, 1, 1, 10, 20); +insert into count_table values (7, 1, 1, 1, 10, 20); +insert into count_table values (8, 1, 1, 1, 10, 20); +insert into count_table values (9, 1, 1, 1, 10, 20); +insert into count_table values (10, 1, 1, 1, 10, 20); +CREATE TABLE `link_table5` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(3) NOT NULL DEFAULT '0', +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); +CREATE TABLE `link_table3` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(4) NOT NULL DEFAULT '0', +`data` text COLLATE latin1_bin NOT NULL, +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`) +COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; +insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); +CREATE TABLE `link_table6` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(4) NOT NULL DEFAULT '0', +`data` text COLLATE latin1_bin NOT NULL, +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`, +`data`(255)) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; +insert into link_table6 values (1, 1, 2, 2, 1, 1, +'data12_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 3, 2, 1, 2, +'data13_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 4, 2, 1, 2, +'data14_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 5, 2, 1, 1, +'data15_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 1, 2, 1, 1, +'data21_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 2, 2, 1, 1, +'data22_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 3, 2, 1, 1, +'data32_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +CREATE TABLE `link_table4` ( +`id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', +`raw_key` text COLLATE latin1_bin, +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(3) NOT NULL DEFAULT '0', +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) +COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1); +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version +from link_table WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +# Point query +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +1 3 1 2 a11 125 +1 4 1 2 a11 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1) and id2 IN (2) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +1 3 1 2 a11 125 +1 4 1 2 a11 125 +2 2 1 2 a10 125 +2 3 1 2 a11 125 +2 4 1 2 a11 125 +# Prefix range query +# Prefix range query with SK +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME DESC LIMIT 10; +id1 id2 link_type visibility data time version +1 2 3 3 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME ASC LIMIT 10; +id1 id2 link_type visibility data time version +1 2 3 3 a10 10 125 +# Prefix range query with SK with limits +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,10; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,5; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,1; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,10; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,5; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,1; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,10; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,5; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,1; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,10; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,5; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,1; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,0; +id1 id2 link_type visibility data time version +# Prefix range query with PK +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +id1 id2 link_type +1 10 3 +1 9 3 +1 8 3 +1 7 3 +1 6 3 +1 5 3 +1 4 3 +1 3 3 +1 2 3 +1 1 3 +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; +id1 id2 link_type +1 1 3 +1 2 3 +1 3 3 +1 4 3 +1 5 3 +1 6 3 +1 7 3 +1 8 3 +1 9 3 +1 10 3 +# Prefix range query with PK + value +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 5 3 3 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 2 3 3 a10 10 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; +id1 id2 link_type visibility data time version +1 1 3 4 a10 10 125 +1 2 3 3 a10 10 125 +1 3 3 4 a11 11 125 +1 4 3 4 a11 11 125 +1 5 3 3 a12 12 125 +1 6 3 4 a12 12 125 +1 7 3 4 a12 12 125 +1 8 3 4 a13 13 125 +1 9 3 4 a14 14 125 +1 10 3 4 a15 15 125 +# Transaction +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +COMMIT; +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 cde 125 +ROLLBACK; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +# Data types +SELECT /*+ bypass */ id1 FROM link_table where link_type="3"; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1"; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1'; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01'; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL; +id1 +DROP TABLE count_table; +DROP TABLE link_table; +DROP TABLE link_table3; +DROP TABLE link_table2; +DROP TABLE id_table; +DROP TABLE node_table; +DROP TABLE link_table5; +DROP TABLE link_table6; +DROP TABLE link_table4; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result b/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result new file mode 100644 index 00000000000..12c5bc4f85c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result @@ -0,0 +1,66 @@ +set debug_sync='RESET'; +set global rocksdb_debug_ttl_read_filter_ts = -10; +connect conn1, localhost, root,,; +connection default; +CREATE TABLE t1 (id INT, value INT, KEY (id), KEY (value)) ENGINE=ROCKSDB; +CREATE TABLE t2 (id INT, value INT) ENGINE=ROCKSDB; +CREATE TABLE t3 (id INT, kp1 INT, PRIMARY KEY (id), KEY(kp1)) ENGINE=ROCKSDB COMMENT='ttl_duration=1'; +INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5); +INSERT INTO t2 SELECT * FROM t1; +INSERT INTO t3 SELECT * FROM t1; +connection conn1; +set debug_sync='rocksdb.check_flags_rmi SIGNAL parked WAIT_FOR go'; +SELECT value FROM t1 WHERE value = 3; +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +set debug_sync='RESET'; +connection conn1; +set debug_sync='rocksdb.check_flags_rmi_scan SIGNAL parked WAIT_FOR go'; +SELECT DISTINCT(id) FROM t1 WHERE value = 5 AND id IN (1, 3, 5); +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +set debug_sync='RESET'; +connection conn1; +set debug_sync='rocksdb.check_flags_inwd SIGNAL parked WAIT_FOR go'; +SELECT value FROM t1 WHERE value > 3; +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +set debug_sync='RESET'; +connection conn1; +set debug_sync='rocksdb.check_flags_rnwd SIGNAL parked WAIT_FOR go'; +SELECT id FROM t2; +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +set debug_sync='RESET'; +connection conn1; +set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go'; +SELECT kp1 FROM t3 ORDER BY kp1; +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +connection default; +disconnect conn1; +set debug_sync='RESET'; +set global rocksdb_debug_ttl_read_filter_ts = DEFAULT; +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result b/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result index e5aeb57ebdf..1c45cfd09fe 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result @@ -8,7 +8,7 @@ ERROR HY000: Table without primary key cannot be created outside mysql schema. CREATE TABLE IF NOT EXISTS mysql_table_2 (a INT) ENGINE=ROCKSDB; ERROR HY000: Table without primary key cannot be created outside mysql schema. CREATE TABLE mysql_table_no_cols ENGINE=ROCKSDB; -ERROR HY000: Table without primary key cannot be created outside mysql schema. +ERROR 42000: A table must have at least 1 column CREATE TABLE mysql.mysql_table_2 (a INT) ENGINE=ROCKSDB; CREATE TABLE mysql_primkey (a INT PRIMARY KEY, b INT, c INT, d INT, INDEX (c)) ENGINE=ROCKSDB; ALTER TABLE mysql_primkey DROP b, DROP a, ADD (f INT PRIMARY KEY); @@ -29,10 +29,24 @@ DROP INDEX `PRIMARY` ON mysql_primkey4; ERROR HY000: Table without primary key cannot be created outside mysql schema. ALTER TABLE mysql.mysql_table ADD PRIMARY KEY (a); ALTER TABLE mysql.mysql_table DROP PRIMARY KEY; +SET default_storage_engine=ROCKSDB; +CREATE TABLE mysql_noeng(a INT, b INT); +ERROR HY000: Table without primary key cannot be created outside mysql schema. +SET sql_mode=""; +CREATE TABLE mysql_noeng_sub(a INT, b INT) ENGINE=BOGUS_ENGINE; +ERROR HY000: Table without primary key cannot be created outside mysql schema. +CREATE TABLE mysql_primkey5 LIKE mysql_primkey; +SET @@global.block_create_no_primary_key = false; +CREATE TABLE mysql_no_primkey (a INT) ENGINE=ROCKSDB; +SET @@global.block_create_no_primary_key = true; +CREATE TABLE mysql_block_no_primkey LIKE mysql_no_primkey; +ERROR HY000: Table without primary key cannot be created outside mysql schema. DROP TABLE mysql_primkey; DROP TABLE mysql_primkey2; DROP TABLE mysql_primkey3; DROP TABLE mysql_primkey4; +DROP TABLE mysql_primkey5; +DROP TABLE mysql_no_primkey; USE mysql; DROP TABLE mysql_table; DROP TABLE mysql_table_2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result index 50733f81598..1e2636c873a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result @@ -36,8 +36,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default alter table t1 modify i bigint;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -52,7 +52,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -89,8 +89,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default alter table t1 rename t1_new;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -105,7 +105,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> select * from t1_new; i 1 @@ -143,8 +143,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default drop table t1;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -160,8 +160,8 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -193,8 +193,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default drop table t1;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -209,7 +209,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -241,9 +241,9 @@ connection: default (for show processlist) # both con1 and con2 exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: con2 alter table t1 modify i bigint;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -259,9 +259,9 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -293,8 +293,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default create index idx1 on t1 (i);; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -309,7 +309,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -333,8 +333,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default drop index idx1 on t1;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -349,7 +349,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -381,8 +381,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default truncate t1;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -397,7 +397,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -429,8 +429,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -445,7 +445,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -469,8 +469,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default drop trigger ins_sum;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -485,7 +485,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -517,8 +517,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default optimize table t1;; Table Op Msg_type Msg_text @@ -537,7 +537,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 1; @@ -569,8 +569,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default lock tables t1 write;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -585,7 +585,7 @@ set high_priority_ddl = 0; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> unlock tables; drop user test_user1@localhost; drop user test_user2@localhost; @@ -628,8 +628,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default alter table t1 modify i bigint;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -637,7 +637,7 @@ alter high_priority table t1 modify i bigint;; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -674,8 +674,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default alter table t1 rename t1_new;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -683,7 +683,7 @@ alter high_priority table t1 rename t1_new;; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> select * from t1_new; i 1 @@ -721,8 +721,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default drop table t1;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -731,8 +731,8 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -764,8 +764,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default drop table t1;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -773,7 +773,7 @@ drop high_priority table t1;; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -805,9 +805,9 @@ connection: default (for show processlist) # both con1 and con2 exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: con2 alter table t1 modify i bigint;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -816,9 +816,9 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -850,8 +850,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default create index idx1 on t1 (i);; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -859,7 +859,7 @@ create high_priority index idx1 on t1 (i);; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -883,8 +883,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default drop index idx1 on t1;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -892,7 +892,7 @@ drop high_priority index idx1 on t1;; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -924,8 +924,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default truncate t1;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -933,7 +933,7 @@ truncate high_priority t1;; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -965,8 +965,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -974,7 +974,7 @@ create high_priority trigger ins_sum before insert on t1 for each row set @sum = connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -998,8 +998,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default drop trigger ins_sum;; ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1 @@ -1007,7 +1007,7 @@ drop high_priority trigger ins_sum;; connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> ## Test parameters: ## use_sys_var = 0; @@ -1039,8 +1039,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 -<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> +<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> connection: default optimize table t1;; Table Op Msg_type Msg_text @@ -1052,7 +1052,7 @@ test.t1 optimize status OK connection: default (for show processlist) show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id -<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0 +<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID> drop user test_user1@localhost; drop user test_user2@localhost; drop table if exists t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/delete_before_lock.result b/storage/rocksdb/mysql-test/rocksdb/r/delete_before_lock.result deleted file mode 100644 index a8ea5e1677f..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/r/delete_before_lock.result +++ /dev/null @@ -1,22 +0,0 @@ -connect con, localhost, root,,; -connection default; -set debug_sync='RESET'; -drop table if exists t1; -create table t1 (id1 int, id2 int, value int, primary key (id1, id2)) engine=rocksdb; -insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1), (2, 2, 2); -connection con; -set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; -update t1 set value=100 where id1=1; -connection default; -set debug_sync='now WAIT_FOR parked'; -delete from t1 where id1=1 and id2=1; -set debug_sync='now SIGNAL go'; -connection con; -select * from t1 where id1=1 for update; -id1 id2 value -1 2 100 -1 3 100 -connection default; -disconnect con; -set debug_sync='RESET'; -drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result b/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result new file mode 100644 index 00000000000..4386ad590ae --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result @@ -0,0 +1,38 @@ +create table t1 ( +pk int not null primary key, +col1 varchar(10) +) engine=rocksdb; +insert into t1 values (1,1),(2,2),(3,3); +set session debug= "+d,myrocks_busy_loop_on_row_read"; +select * from t1 where pk=1; +# testing unclean shutdown on stuck instance +# Run shutdown sql command with forcing kill (exit code 127) +shutdown 1; +Got one of the listed errors +# verifying exit code is printed +# restart the server +shutdown 230; +Got one of the listed errors +# restart the server +# verifying SHUTDOWN is refused if exit code > 255 +SHUTDOWN 256; +ERROR HY000: exit code must be 0..255 +SHUTDOWN 10000; +ERROR HY000: exit code must be 0..255 +# verifying SHUTDOWN is refused if instances are not read only +SHUTDOWN 0 read_only; +ERROR HY000: Only read_only instance can be killed. +SHUTDOWN 127 read_only; +ERROR HY000: Only read_only instance can be killed. +SHUTDOWN 127; +Got one of the listed errors +# restart the server +set session debug= "+d,myrocks_busy_loop_on_row_read"; +select * from t1 where pk=1; +SET GLOBAL read_only=1; +# verifying SHUTDOWN read_only works with read_only instance +# Run shutdown sql command with forcing kill (exit code 127) +shutdown 255 read_only; +Got one of the listed errors +# restart the server +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result b/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result new file mode 100644 index 00000000000..7fede0ac603 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result @@ -0,0 +1,3503 @@ +set global debug="+d,force_group_by"; +drop table if exists t1; +create table t1 ( +a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' ' +) engine=RocksDB; +insert into t1 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'), +('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'), +('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'), +('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'), +('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'), +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'), +('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'), +('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'), +('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'), +('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'); +create index idx_t1_0 on t1 (a1); +create index idx_t1_1 on t1 (a1,a2,b,c); +create index idx_t1_2 on t1 (a1,a2,b); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +drop table if exists t2; +create table t2 ( +a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' ' +) engine=RocksDB; +insert into t2 select * from t1; +insert into t2 (a1, a2, b, c, d) values +('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'), +('a','a','a',NULL,'xyz'), +('a','a','b',NULL,'xyz'), +('a','b','a',NULL,'xyz'), +('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'), +('d','b','b',NULL,'xyz'), +('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'), +('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'), +('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'), +('a','a','a',NULL,'xyz'), +('a','a','b',NULL,'xyz'), +('a','b','a',NULL,'xyz'), +('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'), +('d','b','b',NULL,'xyz'), +('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'), +('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'); +create index idx_t2_0 on t2 (a1); +create index idx_t2_1 on t2 (a1,a2,b,c); +create index idx_t2_2 on t2 (a1,a2,b); +analyze table t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +drop table if exists t3; +create table t3 ( +a1 char(1), a2 char(1), b char(1), c char(4) not null, d char(3), dummy char(1) default ' ' +) engine=RocksDB; +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +create index idx_t3_0 on t3 (a1); +create index idx_t3_1 on t3 (a1,a2,b,c); +create index idx_t3_2 on t3 (a1,a2,b); +analyze table t3; +Table Op Msg_type Msg_text +test.t3 analyze status OK +explain select a1, min(a2) from t1 group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using index for group-by +explain select a1, max(a2) from t1 group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 65 NULL 126 Using index for group-by +explain select a1, min(a2), max(a2) from t1 group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using index for group-by +explain select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by +explain select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by +explain select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 # NULL # Using index for group-by +explain select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using index for group-by +explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by +explain select min(a2) from t1 group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using index for group-by +explain select a2, min(c), max(c) from t1 group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by +select a1, min(a2) from t1 group by a1; +a1 min(a2) +a a +b a +c a +d a +select a1, max(a2) from t1 group by a1; +a1 max(a2) +a b +b b +c b +d b +select a1, min(a2), max(a2) from t1 group by a1; +a1 min(a2) max(a2) +a a b +b a b +c a b +d a b +select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a a111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b; +a1 a2 b max(c) min(c) +a a a d111 a111 +a a b h112 e112 +a b a l121 i121 +a b b p122 m122 +b a a d211 a211 +b a b h212 e212 +b b a l221 i221 +b b b p222 m222 +c a a d311 a311 +c a b h312 e312 +c b a l321 i321 +c b b p322 m322 +d a a d411 a411 +d a b h412 e412 +d b a l421 i421 +d b b p422 m422 +select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b; +a1 a2 b max(c) min(c) +a a NULL a999 a777 +a a a d111 a111 +a a b h112 e112 +a b a l121 i121 +a b b p122 m122 +b a a d211 a211 +b a b h212 e212 +b b a l221 i221 +b b b p222 m222 +c a NULL c999 c777 +c a a d311 a311 +c a b h312 e312 +c b a l321 i321 +c b b p322 m322 +d a a d411 a411 +d a b h412 e412 +d b a l421 i421 +d b b p422 m422 +e a a NULL NULL +e a b NULL NULL +select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1; +min(a2) a1 max(a2) min(a2) a1 +a a b a a +a b b a b +a c b a c +a d b a d +select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b; +a1 b min(c) a1 max(c) b a2 max(c) max(c) +a a a111 a d111 a a d111 d111 +a b e112 a h112 b a h112 h112 +a a i121 a l121 a b l121 l121 +a b m122 a p122 b b p122 p122 +b a a211 b d211 a a d211 d211 +b b e212 b h212 b a h212 h212 +b a i221 b l221 a b l221 l221 +b b m222 b p222 b b p222 p222 +c a a311 c d311 a a d311 d311 +c b e312 c h312 b a h312 h312 +c a i321 c l321 a b l321 l321 +c b m322 c p322 b b p322 p322 +d a a411 d d411 a a d411 d411 +d b e412 d h412 b a h412 h412 +d a i421 d l421 a b l421 l421 +d b m422 d p422 b b p422 p422 +select min(a2) from t1 group by a1; +min(a2) +a +a +a +a +select a2, min(c), max(c) from t1 group by a1,a2,b; +a2 min(c) max(c) +a a111 d111 +a e112 h112 +b i121 l121 +b m122 p122 +a a211 d211 +a e212 h212 +b i221 l221 +b m222 p222 +a a311 d311 +a e312 h312 +b i321 l321 +b m322 p322 +a a411 d411 +a e412 h412 +b i421 l421 +b m422 p422 +explain select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1002 Using where; Using index for group-by +explain select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1002 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1503 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1503 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 2004 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 2004 Using where; Using index for group-by +explain select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1503 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a a111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +a1 a2 b min(c) max(c) +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +a1 a2 b max(c) +a a a d111 +a a b h112 +a b a l121 +a b b p122 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +a1 max(c) +a d111 +a h112 +a l121 +a p122 +c d311 +c h312 +c l321 +c p322 +d d411 +d h412 +d l421 +d p422 +select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a a111 d111 +a a b e112 h112 +b a a a211 d211 +b a b e212 h212 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +a1 a2 b max(c) +b a a d211 +b a b h212 +b b a l221 +b b b p222 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +a1 a2 b min(c) max(c) +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +a1 a2 b max(c) +a b a l121 +a b b p122 +b b a l221 +b b b p222 +c b a l321 +c b b p322 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +a1 a2 b min(c) max(c) +a b a i121 l121 +a b b m122 p122 +b b a i221 l221 +b b b m222 p222 +c b a i321 l321 +c b b m322 p322 +d b a i421 l421 +d b b m422 p422 +select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +a1 min(c) max(c) +b a211 d211 +b e212 h212 +b i221 l221 +b m222 p222 +c a311 d311 +c e312 h312 +c i321 l321 +c m322 p322 +d a411 d411 +d e412 h412 +d i421 l421 +d m422 p422 +select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b; +a1 max(c) +a d111 +a h112 +a l121 +a p122 +b d211 +b h212 +b l221 +b p222 +d d411 +d h412 +d l421 +d p422 +select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b; +a1 a2 b max(c) +a a NULL a999 +a a a d111 +a a b h112 +a b a l121 +a b b p122 +b a a d211 +b a b h212 +b b a l221 +b b b p222 +c a NULL c999 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b; +a1 a2 b min(c) max(c) +a a NULL a777 a999 +a a a a111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a NULL c777 c999 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +a1 a2 b min(c) max(c) +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a NULL c777 c999 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +e a a NULL NULL +e a b NULL NULL +select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +a1 a2 b max(c) +a a NULL a999 +a a a d111 +a a b h112 +a b a l121 +a b b p122 +c a NULL c999 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +e a a NULL +e a b NULL +select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +a1 max(c) +a a999 +a d111 +a h112 +a l121 +a p122 +c c999 +c d311 +c h312 +c l321 +c p322 +d d411 +d h412 +d l421 +d p422 +e NULL +e NULL +select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +a1 a2 b min(c) max(c) +a a NULL a777 a999 +a a a a111 d111 +a a b e112 h112 +b a a a211 d211 +b a b e212 h212 +c a NULL c777 c999 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +e a a NULL NULL +e a b NULL NULL +select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +a1 a2 b max(c) +b a a d211 +b a b h212 +b b a l221 +b b b p222 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +a1 a2 b min(c) max(c) +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +a1 a2 b max(c) +a b a l121 +a b b p122 +b b a l221 +b b b p222 +c b a l321 +c b b p322 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +a1 a2 b min(c) max(c) +a b a i121 l121 +a b b m122 p122 +b b a i221 l221 +b b b m222 p222 +c b a i321 l321 +c b b m322 p322 +d b a i421 l421 +d b b m422 p422 +select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +a1 min(c) max(c) +b a211 d211 +b e212 h212 +b i221 l221 +b m222 p222 +c c777 c999 +c a311 d311 +c e312 h312 +c i321 l321 +c m322 p322 +d a411 d411 +d e412 h412 +d i421 l421 +d m422 p422 +e NULL NULL +e NULL NULL +select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b; +a1 max(c) +a a999 +a d111 +a h112 +a l121 +a p122 +b d211 +b h212 +b l221 +b p222 +d d411 +d h412 +d l421 +d p422 +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by +explain select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by +explain select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 251 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 251 Using where; Using index for group-by +explain select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by +explain select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by +explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +a1 a2 b max(c) min(c) +a a b h112 e112 +b a b h212 e212 +c a b h312 e312 +d a b h412 e412 +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +a1 a2 b max(c) min(c) +a b b p122 e112 +b b b p222 e212 +c b b p322 e312 +d b b p422 e412 +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +a1 a2 b max(c) min(c) +a a b h112 a111 +b a b h212 a211 +c a b h312 a311 +d a b h412 a411 +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1; +a1 a2 b max(c) min(c) +a b b p122 a111 +b b b p222 a211 +c b b p322 a311 +d b b p422 a411 +select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +a1 max(c) min(c) +a h112 e112 +b h212 e212 +c h312 e312 +d h412 e412 +select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +a1 max(c) min(c) +a p122 e112 +b p222 e212 +c p322 e312 +d p422 e412 +select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +a1 max(c) min(c) +a h112 a111 +b h212 a211 +c h312 a311 +d h412 a411 +select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2; +a1 a2 b max(c) +a a b h112 +a b b p122 +b a b h212 +b b b p222 +c a b h312 +c b b p322 +d a b h412 +d b b p422 +select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; +a1 a2 b max(c) +a a b h112 +a b b p122 +b a b h212 +b b b p222 +c a b h312 +c b b p322 +d a b h412 +d b b p422 +select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2; +a1 a2 b min(c) max(c) +a a b e112 h112 +a b b m122 p122 +b a b e212 h212 +b b b m222 p222 +c a b e312 h312 +c b b m322 p322 +d a b e412 h412 +d b b m422 p422 +select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; +a1 a2 b min(c) max(c) +a a b a111 h112 +a b b i121 p122 +b a b a211 h212 +b b b i221 p222 +c a b a311 h312 +c b b i321 p322 +d a b a411 h412 +d b b i421 p422 +select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2; +a1 a2 max(c) +a a h112 +a b p122 +b a h212 +b b p222 +c a h312 +c b p322 +d a h412 +d b p422 +select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; +a1 a2 max(c) +a a h112 +a b p122 +b a h212 +b b p222 +c a h312 +c b p322 +d a h412 +d b p422 +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +a1 a2 b max(c) min(c) +a a b h112 e112 +b a b h212 e212 +c a b h312 e312 +d a b h412 e412 +e a b NULL NULL +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +a1 a2 b max(c) min(c) +a b b p122 e112 +b b b p222 e212 +c b b p322 e312 +d b b p422 e412 +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +a1 a2 b max(c) min(c) +a a b h112 a111 +b a b h212 a211 +c a b h312 a311 +d a b h412 a411 +e a b NULL NULL +select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +a1 max(c) min(c) +a h112 e112 +b h212 e212 +c h312 e312 +d h412 e412 +e NULL NULL +select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +a1 max(c) min(c) +a p122 e112 +b p222 e212 +c p322 e312 +d p422 e412 +select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +a1 max(c) min(c) +a h112 a111 +b h212 a211 +c h312 a311 +d h412 a411 +e NULL NULL +select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2; +a1 a2 b max(c) +a a b h112 +a b b p122 +b a b h212 +b b b p222 +c a b h312 +c b b p322 +d a b h412 +d b b p422 +e a b NULL +select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; +a1 a2 b max(c) +a a b h112 +a b b p122 +b a b h212 +b b b p222 +c a b h312 +c b b p322 +d a b h412 +d b b p422 +e a b NULL +select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2; +a1 a2 b min(c) max(c) +a a b e112 h112 +a b b m122 p122 +b a b e212 h212 +b b b m222 p222 +c a b e312 h312 +c b b m322 p322 +d a b e412 h412 +d b b m422 p422 +e a b NULL NULL +select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; +a1 a2 b min(c) max(c) +a a b a111 h112 +a b b i121 p122 +b a b a211 h212 +b b b i221 p222 +c a b a311 h312 +c b b i321 p322 +d a b a411 h412 +d b b i421 p422 +e a b NULL NULL +select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2; +a1 a2 max(c) +a a h112 +a b p122 +b a h212 +b b p222 +c a h312 +c b p322 +d a h412 +d b p422 +e a NULL +select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; +a1 a2 max(c) +a a h112 +a b p122 +b a h212 +b b p222 +c a h312 +c b p322 +d a h412 +d b p422 +e a NULL +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +a1 a2 b max(c) min(c) +a a b h112 e112 +b a b h212 e212 +c a b h312 e312 +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +a1 a2 b max(c) min(c) +a b b p122 e112 +b b b p222 e212 +c b b p322 e312 +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +a1 a2 b max(c) min(c) +a a b h112 a111 +b a b h212 a211 +c a b h312 a311 +select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +a1 max(c) min(c) +a h112 e112 +b h212 e212 +c h312 e312 +select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +a1 max(c) min(c) +a p122 e112 +b p222 e212 +c p322 e312 +select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +a1 max(c) min(c) +a h112 a111 +b h212 a211 +c h312 a311 +explain select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 126 Using where; Using index for group-by +explain select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 251 Using where; Using index for group-by +explain select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 251 Using where; Using index for group-by +select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1; +a1 a2 b min(c) +a a NULL a777 +c a NULL c777 +select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; +a1 a2 b min(c) +select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1; +a1 a2 b max(c) +a a NULL a999 +c a NULL c999 +select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; +a1 a2 b max(c) +select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2; +a1 a2 b min(c) +a a NULL a777 +c a NULL c777 +select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2; +a1 a2 b max(c) +a a NULL a999 +c a NULL c999 +select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; +a1 a2 b min(c) max(c) +a a NULL a777 a999 +c a NULL c777 c999 +select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; +a1 a2 b min(c) max(c) +a a NULL a777 a999 +c a NULL c777 c999 +explain select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b; +a1 a2 b max(c) +a a a d111 +a a b h112 +a b a l121 +a b b p122 +b a a d211 +b a b h212 +b b a l221 +b b b p222 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a b111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a b211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a a b311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a b411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b; +a1 a2 b max(c) +a a b h112 +a b a l121 +a b b p122 +b a b h212 +b b a l221 +b b b p222 +c a b h312 +c b a l321 +c b b p322 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a b g112 h112 +a b a i121 l121 +a b b m122 p122 +b a b f212 h212 +b b a i221 l221 +b b b m222 p222 +c a b f312 h312 +c b a i321 l321 +c b b m322 p322 +d a b f412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b; +a1 a2 b max(c) +select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b; +a1 a2 b min(c) max(c) +select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b; +a1 a2 b max(c) +a a a d111 +a a b h112 +a b a k121 +b a a d211 +b a b h212 +b b a k221 +c a a d311 +c a b h312 +c b a j321 +d a a d411 +d a b h412 +d b a j421 +select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a a111 d111 +a a b e112 h112 +a b a i121 k121 +b a a a211 d211 +b a b e212 h212 +b b a i221 k221 +c a a a311 d311 +c a b e312 h312 +c b a i321 j321 +d a a a411 d411 +d a b e412 h412 +d b a i421 j421 +select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +a1 a2 b max(c) +a a a d111 +a a b h112 +a b a l121 +a b b p122 +b a a d211 +b a b h212 +b b a l221 +b b b p222 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a b111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a b211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a a b311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a b411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +a1 a2 b max(c) +a a a d111 +a a b h112 +a b a l121 +a b b p122 +b a a d211 +b a b h212 +b b a l221 +b b b p222 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a a111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a c111 d111 +a a b e112 g112 +b a a b211 d211 +b a b e212 f212 +c a a b311 d311 +c a b e312 f312 +d a a b411 d411 +d a b e412 f412 +select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a a111 c111 +b a a a211 c211 +c a a a311 c311 +d a a a411 c411 +d a b g412 g412 +d b a k421 k421 +select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a c111 d111 +a a b e112 h112 +b a a b211 d211 +b a b e212 h212 +c a a b311 d311 +c a b e312 h312 +d a a b411 d411 +d a b e412 h412 +select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a b111 d111 +a a b e112 h112 +b a a b211 d211 +b a b e212 h212 +c a a b311 d311 +c a b e312 h312 +d a a b411 d411 +d a b e412 h412 +select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b; +a1 a2 b max(c) +a a a d111 +a a b h112 +a b a l121 +a b b p122 +b a a d211 +b a b h212 +b b a l221 +b b b p222 +c a NULL c999 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a b111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a b211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a NULL c777 c999 +c a a b311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a b411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b; +a1 a2 b max(c) +a a b h112 +a b a l121 +a b b p122 +b a b h212 +b b a l221 +b b b p222 +c a b h312 +c b a l321 +c b b p322 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a b g112 h112 +a b a i121 l121 +a b b m122 p122 +b a b f212 h212 +b b a i221 l221 +b b b m222 p222 +c a b f312 h312 +c b a i321 l321 +c b b m322 p322 +d a b f412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b; +a1 a2 b max(c) +select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b; +a1 a2 b min(c) max(c) +select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b; +a1 a2 b max(c) +a a NULL a999 +a a a d111 +a a b h112 +a b a k121 +b a a d211 +b a b h212 +b b a k221 +c a NULL c999 +c a a d311 +c a b h312 +c b a j321 +d a a d411 +d a b h412 +d b a j421 +select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a NULL a777 a999 +a a a a111 d111 +a a b e112 h112 +a b a i121 k121 +b a a a211 d211 +b a b e212 h212 +b b a i221 k221 +c a NULL c777 c999 +c a a a311 d311 +c a b e312 h312 +c b a i321 j321 +d a a a411 d411 +d a b e412 h412 +d b a i421 j421 +select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +a1 a2 b max(c) +a a a d111 +a a b h112 +a b a l121 +a b b p122 +b a a d211 +b a b h212 +b b a l221 +b b b p222 +c a NULL c999 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a b111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a b211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a NULL c777 c999 +c a a b311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a b411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +a1 a2 b max(c) +a a NULL a999 +a a a d111 +a a b h112 +a b a l121 +a b b p122 +b a a d211 +b a b h212 +b b a l221 +b b b p222 +c a NULL c999 +c a a d311 +c a b h312 +c b a l321 +c b b p322 +d a a d411 +d a b h412 +d b a l421 +d b b p422 +select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a NULL a777 a999 +a a a a111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a a211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a NULL c777 c999 +c a a a311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a a411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a c111 d111 +a a b e112 g112 +b a a b211 d211 +b a b e212 f212 +c a NULL c777 c999 +c a a b311 d311 +c a b e312 f312 +d a a b411 d411 +d a b e412 f412 +select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a NULL a777 a999 +a a a a111 c111 +b a a a211 c211 +c a a a311 c311 +d a a a411 c411 +d a b g412 g412 +d b a k421 k421 +select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a c111 d111 +a a b e112 h112 +b a a b211 d211 +b a b e212 h212 +c a NULL c777 c999 +c a a b311 d311 +c a b e312 h312 +d a a b411 d411 +d a b e412 h412 +explain select a1,a2,b,min(c),max(c) from t1 +where exists ( select * from t2 where t2.c = t1.c ) +group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 index idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1000 Using where; Using index +2 DEPENDENT SUBQUERY t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index +explain select a1,a2,b,min(c),max(c) from t1 +where exists ( select * from t2 where t2.c > 'b1' ) +group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by +2 SUBQUERY t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index +explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1002 Using where; Using index for group-by +explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1002 Using where; Using index for group-by +explain select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a b e112 h112 +b a b e212 h212 +c a b e312 h312 +c b b m322 p322 +d a b e412 h412 +d b b m422 p422 +select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a c111 d111 +a a b e112 h112 +b a a b211 d211 +b a b e212 h212 +c a a b311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a b411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +a1 a2 b min(c) max(c) +a b a i121 l121 +b b a i221 l221 +c b a i321 l321 +d b a i421 l421 +select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +a1 a2 b min(c) +b b a k221 +c b a k321 +d b a k421 +select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +a1 a2 b min(c) +b b a k221 +c b a k321 +d b a k421 +select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +a1 a2 b min(c) +select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b; +a1 a2 b min(c) +select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a b e112 h112 +b a b e212 h212 +c a b e312 h312 +c b b m322 p322 +d a b e412 h412 +d b b m422 p422 +e a b NULL NULL +select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +a1 a2 b min(c) max(c) +a a a c111 d111 +a a b e112 h112 +b a a b211 d211 +b a b e212 h212 +c a NULL c777 c999 +c a a b311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a b411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +a1 a2 b min(c) max(c) +a b a i121 l121 +b b a i221 l221 +c b a i321 l321 +d b a i421 l421 +select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +a1 a2 b min(c) +b b a k221 +c b a k321 +d b a k421 +select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +a1 a2 b min(c) +b b a k221 +c b a k321 +d b a k421 +select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +a1 a2 b min(c) +explain select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +a1 a2 b +a a b +b a b +c a b +c b b +d a b +d b b +select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +a1 a2 b +a b a +b b a +c b a +d b a +select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +a1 a2 b c +a b a i121 +select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +a1 a2 b c +a b a i121 +select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +a1 a2 b +select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +a1 a2 b +a a b +b a b +c a b +c b b +d a b +d b b +e a b +select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +a1 a2 b +a b a +b b a +c b a +d b a +select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +a1 a2 b c +a b a i121 +select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +a1 a2 b c +a b a i121 +select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +a1 a2 b +explain select distinct a1,a2,b from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by +explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain extended select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 range idx_t1_1 idx_t1_1 163 NULL 1001 99.90 Using where; Using index for group-by +Warnings: +Note 1003 /* select#1 */ select distinct `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where ((`test`.`t1`.`c` = 'i121') and (`test`.`t1`.`b` = 'a') and (`test`.`t1`.`a2` >= 'b')) +explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select distinct b from t1 where (a2 >= 'b') and (b = 'a'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 Using where; Using index +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 252 Using where; Using index for group-by +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 252 Using where; Using index for group-by +explain select distinct a1,a2,b from t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using index for group-by +explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain extended select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 range idx_t2_1 idx_t2_1 163 NULL 1001 99.90 Using where; Using index for group-by +Warnings: +Note 1003 /* select#1 */ select distinct `test`.`t2`.`a1` AS `a1`,`test`.`t2`.`a2` AS `a2`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c` from `test`.`t2` where ((`test`.`t2`.`c` = 'i121') and (`test`.`t2`.`b` = 'a') and (`test`.`t2`.`a2` >= 'b')) +explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select distinct b from t2 where (a2 >= 'b') and (b = 'a'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_1,idx_t2_2 idx_t2_2 146 NULL 1000 Using where; Using index +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL 252 Using where; Using index for group-by +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL 252 Using where; Using index for group-by +select distinct a1,a2,b from t1; +a1 a2 b +a a a +a a b +a b a +a b b +b a a +b a b +b b a +b b b +c a a +c a b +c b a +c b b +d a a +d a b +d b a +d b b +select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a'); +a1 a2 b +a b a +b b a +c b a +d b a +select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +a1 a2 b c +a b a i121 +select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +a1 a2 b +select distinct b from t1 where (a2 >= 'b') and (b = 'a'); +b +a +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b'; +a1 +a +d +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e'; +a1 +select distinct a1,a2,b from t2; +a1 a2 b +a a NULL +a a a +a a b +a b a +a b b +b a a +b a b +b b a +b b b +c a NULL +c a a +c a b +c b a +c b b +d a a +d a b +d b a +d b b +e a a +e a b +select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a'); +a1 a2 b +a b a +b b a +c b a +d b a +select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +a1 a2 b c +a b a i121 +select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +a1 a2 b +select distinct b from t2 where (a2 >= 'b') and (b = 'a'); +b +a +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b'; +a1 +a +d +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e'; +a1 +select distinct t_00.a1 +from t1 t_00 +where exists ( select * from t2 where a1 = t_00.a1 ); +a1 +a +b +c +d +select distinct a1,a1 from t1; +a1 a1 +a a +b b +c c +d d +select distinct a2,a1,a2,a1 from t1; +a2 a1 a2 a1 +a a a a +b a b a +a b a b +b b b b +a c a c +b c b c +a d a d +b d b d +select distinct t1.a1,t2.a1 from t1,t2; +a1 a1 +a a +b a +c a +d a +a b +b b +c b +d b +a c +b c +c c +d c +a d +b d +c d +d d +a e +b e +c e +d e +explain select distinct a1,a2,b from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by +explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by +explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by; Using temporary; Using filesort +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 252 Using where; Using index for group-by +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 252 Using where; Using index for group-by +explain select distinct a1,a2,b from t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using index for group-by +explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by +explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by +explain select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by; Using temporary; Using filesort +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL # Using where; Using index for group-by +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL # Using where; Using index for group-by +select distinct a1,a2,b from t1; +a1 a2 b +a a a +a a b +a b a +a b b +b a a +b a b +b b a +b b b +c a a +c a b +c b a +c b b +d a a +d a b +d b a +d b b +select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +a1 a2 b +a b a +b b a +c b a +d b a +select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +a1 a2 b c +a b a i121 +select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +a1 a2 b +select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +b +a +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1; +a1 +a +d +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1; +a1 +select distinct a1,a2,b from t2; +a1 a2 b +a a NULL +a a a +a a b +a b a +a b b +b a a +b a b +b b a +b b b +c a NULL +c a a +c a b +c b a +c b b +d a a +d a b +d b a +d b b +e a a +e a b +select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +a1 a2 b +a b a +b b a +c b a +d b a +select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +a1 a2 b c +a b a i121 +select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +a1 a2 b +select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +b +a +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1; +a1 +a +d +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1; +a1 +explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by (scanning) +explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1 idx_t1_1 163 NULL 1001 Using where; Using index for group-by (scanning) +explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 100.00 Using where; Using index for group-by (scanning) +Warnings: +Note 1003 /* select#1 */ select count(distinct `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`) AS `count(distinct a1,a2,b)` from `test`.`t1` where ((`test`.`t1`.`b` = 'c') and (`test`.`t1`.`a1` > 'a') and (`test`.`t1`.`a2` > 'a')) +explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 Using where; Using index +explain extended select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a'); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 100.00 Using where; Using index for group-by (scanning) +Warnings: +Note 1003 /* select#1 */ select (98 + count(distinct `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`)) AS `98 + count(distinct a1,a2,b)` from `test`.`t1` where ((`test`.`t1`.`a1` > 'a') and (`test`.`t1`.`a2` > 'a')) +select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); +count(distinct a1,a2,b) +4 +select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +count(distinct a1,a2,b,c) +1 +select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +count(distinct a1,a2,b) +0 +select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a'); +count(distinct b) +1 +select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a'); +98 + count(distinct a1,a2,b) +104 +explain select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by +explain select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using index for group-by +select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b; +a1 a2 b concat(min(c), max(c)) +a a a a111d111 +a a b e112h112 +a b a i121l121 +a b b m122p122 +b a a a211d211 +b a b e212h212 +b b a i221l221 +b b b m222p222 +c a a a311d311 +c a b e312h312 +c b a i321l321 +c b b m322p322 +select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b; +concat(a1,min(c)) b +aa111 a +ae112 b +ai121 a +am122 b +ba211 a +be212 b +bi221 a +bm222 b +ca311 a +ce312 b +ci321 a +cm322 b +select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b; +concat(a1,min(c)) b max(c) +aa111 a d111 +ae112 b h112 +ai121 a l121 +am122 b p122 +ba211 a d211 +be212 b h212 +bi221 a l221 +bm222 b p222 +ca311 a d311 +ce312 b h312 +ci321 a l321 +cm322 b p322 +select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +concat(a1,a2) b min(c) max(c) +aa a a111 d111 +aa b e112 h112 +ab a i121 l121 +ab b m122 p122 +ba a a211 d211 +ba b e212 h212 +bb a i221 l221 +bb b m222 p222 +ca a a311 d311 +ca b e312 h312 +cb a i321 l321 +cb b m322 p322 +select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2; +concat(ord(min(b)),ord(max(b))) min(b) max(b) +9798 a b +9798 a b +9798 a b +9798 a b +9798 a b +9798 a b +9798 a b +9798 a b +explain select a1,a2,b,d,min(c),max(c) from t1 group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 NULL +explain select a1,a2,b,d from t1 group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 NULL +explain extended select a1,a2,min(b),max(b) from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1000 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,min(`test`.`t1`.`b`) AS `min(b)`,max(`test`.`t1`.`b`) AS `max(b)` from `test`.`t1` where (((`test`.`t1`.`a1` = 'b') or (`test`.`t1`.`a1` = 'd') or (`test`.`t1`.`a1` = 'a') or (`test`.`t1`.`a1` = 'c')) and (`test`.`t1`.`a2` > 'a') and (`test`.`t1`.`c` > 'a111')) group by `test`.`t1`.`a1`,`test`.`t1`.`a2` +explain extended select a1,a2,b,min(c),max(c) from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,min(`test`.`t1`.`c`) AS `min(c)`,max(`test`.`t1`.`c`) AS `max(c)` from `test`.`t1` where (((`test`.`t1`.`a1` = 'b') or (`test`.`t1`.`a1` = 'd') or (`test`.`t1`.`a1` = 'a') or (`test`.`t1`.`a1` = 'c')) and (`test`.`t1`.`a2` > 'a') and (`test`.`t1`.`d` > 'xy2')) group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b` +explain extended select a1,a2,b,c from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b,c; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1000 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where (((`test`.`t1`.`a1` = 'b') or (`test`.`t1`.`a1` = 'd') or (`test`.`t1`.`a1` = 'a') or (`test`.`t1`.`a1` = 'c')) and (`test`.`t1`.`a2` > 'a') and (`test`.`t1`.`d` > 'xy2')) group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`,`test`.`t1`.`c` +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b < 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b' and b >= 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where +explain extended select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1000 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b` from `test`.`t1` where (((`test`.`t1`.`a1` = 'b') or (`test`.`t1`.`a1` = 'd') or (`test`.`t1`.`a1` = 'a') or (`test`.`t1`.`a1` = 'c')) and (`test`.`t1`.`a2` > 'a') and (`test`.`t1`.`c` > 'a111')) group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b` +explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where +select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; +a1 a2 min(b) c +a a a a111 +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where +explain select a1,a2,b,min(c),max(c) from t2 +where (c > 'a000') and (c <= 'd999') and (c like '_8__') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 1000 Using where; Using index +explain select a1, a2, b, c, min(d), max(d) from t1 group by a1,a2,b,c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index idx_t1_1 idx_t1_1 163 NULL 1000 NULL +explain select a1,a2,count(a2) from t1 group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 Using index +explain extended select a1,a2,count(a2) from t1 where (a1 > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,count(`test`.`t1`.`a2`) AS `count(a2)` from `test`.`t1` where (`test`.`t1`.`a1` > 'a') group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b` +explain extended select sum(ord(a1)) from t1 where (a1 > 'a') group by a1,a2,b; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select sum(ord(`test`.`t1`.`a1`)) AS `sum(ord(a1))` from `test`.`t1` where (`test`.`t1`.`a1` > 'a') group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b` +set optimizer_switch = 'multi_range_groupby=off'; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where +set optimizer_switch = 'default'; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by +explain select distinct(a1) from t1 where ord(a2) = 98; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_0 65 NULL 1000 Using where +select distinct(a1) from t1 where ord(a2) = 98; +a1 +a +b +c +d +explain select a1 from t1 where a2 = 'b' group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using where; Using index for group-by +select a1 from t1 where a2 = 'b' group by a1; +a1 +a +b +c +d +explain select distinct a1 from t1 where a2 = 'b'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using where; Using index for group-by +select distinct a1 from t1 where a2 = 'b'; +a1 +a +b +c +d +drop table t1,t2,t3; +create table t1 (c1 int not null,c2 int not null, primary key(c1,c2)) engine=RocksDB; +insert into t1 (c1,c2) values +(10,1),(10,2),(10,3),(20,4),(20,5),(20,6),(30,7),(30,8),(30,9); +select distinct c1, c2 from t1 order by c2; +c1 c2 +10 1 +10 2 +10 3 +20 4 +20 5 +20 6 +30 7 +30 8 +30 9 +select c1,min(c2) as c2 from t1 group by c1 order by c2; +c1 c2 +10 1 +20 4 +30 7 +select c1,c2 from t1 group by c1,c2 order by c2; +c1 c2 +10 1 +10 2 +10 3 +20 4 +20 5 +20 6 +30 7 +30 8 +30 9 +drop table t1; +CREATE TABLE t1 (a varchar(5), b int(11), PRIMARY KEY (a,b)) engine=RocksDB; +INSERT INTO t1 VALUES ('AA',1), ('AA',2), ('AA',3), ('BB',1), ('AA',4); +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +SELECT a FROM t1 WHERE a='AA' GROUP BY a; +a +AA +SELECT a FROM t1 WHERE a='BB' GROUP BY a; +a +BB +EXPLAIN SELECT a FROM t1 WHERE a='AA' GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref PRIMARY PRIMARY 7 const 1000 Using where; Using index +EXPLAIN SELECT a FROM t1 WHERE a='BB' GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref PRIMARY PRIMARY 7 const 1000 Using where; Using index +SELECT DISTINCT a FROM t1 WHERE a='BB'; +a +BB +SELECT DISTINCT a FROM t1 WHERE a LIKE 'B%'; +a +BB +SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a; +a +BB +DROP TABLE t1; +CREATE TABLE t1 ( +a int(11) NOT NULL DEFAULT '0', +b varchar(16) COLLATE latin1_general_ci NOT NULL DEFAULT '', +PRIMARY KEY (a,b) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci; +CREATE PROCEDURE a(x INT) +BEGIN +DECLARE rnd INT; +DECLARE cnt INT; +WHILE x > 0 DO +SET rnd= x % 100; +SET cnt = (SELECT COUNT(*) FROM t1 WHERE a = rnd); +INSERT INTO t1(a,b) VALUES (rnd, CAST(cnt AS CHAR)); +SET x= x - 1; +END WHILE; +END| +CALL a(1000); +SELECT a FROM t1 WHERE a=0; +a +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +SELECT DISTINCT a FROM t1 WHERE a=0; +a +0 +SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0; +COUNT(DISTINCT a) +1 +DROP TABLE t1; +DROP PROCEDURE a; +CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a)) engine=RocksDB; +INSERT INTO t1 (a) VALUES +(''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'), +('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'), +('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN'); +EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range PRIMARY PRIMARY 66 NULL 1001 Using index for group-by +SELECT DISTINCT a,a FROM t1 ORDER BY a; +a a + +CENTRAL CENTRAL +EASTERN EASTERN +GREATER LONDON GREATER LONDON +NORTH CENTRAL NORTH CENTRAL +NORTH EAST NORTH EAST +NORTH WEST NORTH WEST +SCOTLAND SCOTLAND +SOUTH EAST SOUTH EAST +SOUTH WEST SOUTH WEST +WESTERN WESTERN +DROP TABLE t1; +CREATE TABLE t1 (id1 INT, id2 INT) engine=RocksDB; +CREATE TABLE t2 (id2 INT, id3 INT, id5 INT) engine=RocksDB; +CREATE TABLE t3 (id3 INT, id4 INT) engine=RocksDB; +CREATE TABLE t4 (id4 INT) engine=RocksDB; +CREATE TABLE t5 (id5 INT, id6 INT) engine=RocksDB; +CREATE TABLE t6 (id6 INT) engine=RocksDB; +INSERT INTO t1 VALUES(1,1); +INSERT INTO t2 VALUES(1,1,1); +INSERT INTO t3 VALUES(1,1); +INSERT INTO t4 VALUES(1); +INSERT INTO t5 VALUES(1,1); +INSERT INTO t6 VALUES(1); +SELECT * FROM +t1 +NATURAL JOIN +(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6) +ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)); +id2 id1 id3 id5 id4 id3 id6 id5 +1 1 1 1 1 1 1 1 +SELECT * FROM +t1 +NATURAL JOIN +(((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6) on t3.id4 = t5.id5) JOIN t2 +ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)); +id2 id1 id4 id3 id6 id5 id3 id5 +1 1 1 1 1 1 1 1 +SELECT * FROM t1 NATURAL JOIN ((t3 join (t5 NATURAL JOIN t6)) JOIN t2); +id2 id1 id3 id4 id6 id5 id3 id5 +1 1 1 1 1 1 1 1 +SELECT * FROM +(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6) +ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)) +NATURAL JOIN +t1; +id2 id3 id5 id4 id3 id6 id5 id1 +1 1 1 1 1 1 1 1 +SELECT * FROM +(t2 JOIN ((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6))) +NATURAL JOIN +t1; +id2 id3 id5 id4 id3 id6 id5 id1 +1 1 1 1 1 1 1 1 +DROP TABLE t1,t2,t3,t4,t5,t6; +CREATE TABLE t1 (a int, b int, PRIMARY KEY (a,b), KEY b (b)) engine=RocksDB; +INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range PRIMARY,b PRIMARY 8 NULL 501 Using where; Using index for group-by +SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +MAX(b) a +1 1 +SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a; +MIN(b) a +2 1 +CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)) engine=RocksDB; +INSERT INTO t2 SELECT a,b,b FROM t1; +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range PRIMARY PRIMARY 12 NULL 251 Using where; Using index for group-by +SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; +MIN(c) +2 +DROP TABLE t1,t2; +CREATE TABLE t1 (a INT, b INT, INDEX (a,b)) engine=RocksDB; +INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3), (1,4), (1,5), +(2,2), (2,3), (2,1), (3,1), (4,1), (4,2), (4,3), (4,4), (4,5), (4,6); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +EXPLAIN SELECT max(b), a FROM t1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by +FLUSH STATUS; +SELECT max(b), a FROM t1 GROUP BY a; +max(b) a +5 1 +3 2 +1 3 +6 4 +SHOW STATUS LIKE 'handler_read__e%'; +Variable_name Value +Handler_read_key 8 +Handler_read_next 0 +EXPLAIN SELECT max(b), a FROM t1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by +FLUSH STATUS; +CREATE TABLE t2 engine=RocksDB SELECT max(b), a FROM t1 GROUP BY a; +SHOW STATUS LIKE 'handler_read__e%'; +Variable_name Value +Handler_read_key 8 +Handler_read_next 0 +FLUSH STATUS; +SELECT * FROM (SELECT max(b), a FROM t1 GROUP BY a) b; +max(b) a +5 1 +3 2 +1 3 +6 4 +SHOW STATUS LIKE 'handler_read__e%'; +Variable_name Value +Handler_read_key 8 +Handler_read_next 0 +FLUSH STATUS; +(SELECT max(b), a FROM t1 GROUP BY a) UNION +(SELECT max(b), a FROM t1 GROUP BY a); +max(b) a +5 1 +3 2 +1 3 +6 4 +SHOW STATUS LIKE 'handler_read__e%'; +Variable_name Value +Handler_read_key 16 +Handler_read_next 0 +EXPLAIN (SELECT max(b), a FROM t1 GROUP BY a) UNION +(SELECT max(b), a FROM t1 GROUP BY a); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 range a a 5 NULL 501 Using index for group-by +2 UNION t1 range a a 5 NULL 501 Using index for group-by +NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL Using temporary +EXPLAIN SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x +FROM t1 AS t1_outer; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using index +2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE EXISTS +(SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using index +2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE +(SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) > 12; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE +2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE +a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using where; Using index +2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by +EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING +a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1_outer range a a 5 NULL 501 Using index for group-by +2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by +EXPLAIN SELECT 1 FROM t1 AS t1_outer1 JOIN t1 AS t1_outer2 +ON t1_outer1.a = (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) +AND t1_outer1.b = t1_outer2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1_outer2 index NULL a 10 NULL 1000 Using where; Using index +1 PRIMARY t1_outer1 ref a a 10 const,test.t1_outer2.b 1 Using where; Using index +2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by +EXPLAIN SELECT (SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x +FROM t1 AS t1_outer) x2 FROM t1 AS t1_outer2; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1_outer2 index NULL a 10 NULL 1000 Using index +2 SUBQUERY t1_outer index NULL a 10 NULL 1000 Using index +3 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by +CREATE TABLE t3 LIKE t1; +FLUSH STATUS; +INSERT INTO t3 SELECT a,MAX(b) FROM t1 GROUP BY a; +SHOW STATUS LIKE 'handler_read__e%'; +Variable_name Value +Handler_read_key 8 +Handler_read_next 0 +DELETE FROM t3; +FLUSH STATUS; +INSERT INTO t3 SELECT 1, (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) +FROM t1 LIMIT 1; +SHOW STATUS LIKE 'handler_read__e%'; +Variable_name Value +Handler_read_key 8 +Handler_read_next 0 +FLUSH STATUS; +DELETE FROM t3 WHERE (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) > 10000; +SHOW STATUS LIKE 'handler_read__e%'; +Variable_name Value +Handler_read_key 8 +Handler_read_next 0 +FLUSH STATUS; +DELETE FROM t3 WHERE (SELECT (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) x +FROM t1) > 10000; +ERROR 21000: Subquery returns more than 1 row +SHOW STATUS LIKE 'handler_read__e%'; +Variable_name Value +Handler_read_key 8 +Handler_read_next 1 +DROP TABLE t1,t2,t3; +CREATE TABLE t1 (a int, INDEX idx(a)) engine=RocksDB; +INSERT INTO t1 VALUES +(4), (2), (1), (2), (4), (2), (1), (4), +(4), (2), (1), (2), (2), (4), (1), (4); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +EXPLAIN SELECT DISTINCT(a) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx idx 5 NULL 1001 Using index for group-by +SELECT DISTINCT(a) FROM t1; +a +1 +2 +4 +EXPLAIN SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx idx 5 NULL 1001 Using index for group-by +SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1; +a +1 +2 +4 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT) engine=RocksDB; +INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3); +INSERT INTO t1 SELECT a + 1, b FROM t1; +INSERT INTO t1 SELECT a + 2, b FROM t1; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 Using temporary; Using filesort +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; +a MIN(b) MAX(b) +4 1 3 +3 1 3 +2 1 3 +1 1 3 +CREATE INDEX break_it ON t1 (a, b); +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range break_it break_it 10 NULL 501 Using index for group-by +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a; +a MIN(b) MAX(b) +1 1 3 +2 1 3 +3 1 3 +4 1 3 +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range break_it break_it 10 NULL 501 Using index for group-by; Using temporary; Using filesort +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; +a MIN(b) MAX(b) +4 1 3 +3 1 3 +2 1 3 +1 1 3 +EXPLAIN +SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index break_it break_it 10 NULL 1000 Using index +SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC; +a MIN(b) MAX(b) AVG(b) +4 1 3 2.0000 +3 1 3 2.0000 +2 1 3 2.0000 +1 1 3 2.0000 +DROP TABLE t1; +create table t1 (a int, b int, primary key (a,b), key `index` (a,b)) engine=MyISAM; +insert into t1 (a,b) values +(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6), +(0,7),(0,8),(0,9),(0,10),(0,11),(0,12),(0,13), +(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6), +(1,7),(1,8),(1,9),(1,10),(1,11),(1,12),(1,13), +(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6), +(2,7),(2,8),(2,9),(2,10),(2,11),(2,12),(2,13), +(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6), +(3,7),(3,8),(3,9),(3,10),(3,11),(3,12),(3,13); +insert into t1 (a,b) select a, max(b)+1 from t1 where a = 0 group by a; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select * from t1; +a b +0 0 +0 1 +0 2 +0 3 +0 4 +0 5 +0 6 +0 7 +0 8 +0 9 +0 10 +0 11 +0 12 +0 13 +0 14 +1 0 +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 +1 10 +1 11 +1 12 +1 13 +2 0 +2 1 +2 2 +2 3 +2 4 +2 5 +2 6 +2 7 +2 8 +2 9 +2 10 +2 11 +2 12 +2 13 +3 0 +3 1 +3 2 +3 3 +3 4 +3 5 +3 6 +3 7 +3 8 +3 9 +3 10 +3 11 +3 12 +3 13 +explain extended select sql_buffer_result a, max(b)+1 from t1 where a = 0 group by a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 range PRIMARY,index PRIMARY 4 NULL 1 100.00 Using where; Using index for group-by; Using temporary +Warnings: +Note 1003 /* select#1 */ select sql_buffer_result `test`.`t1`.`a` AS `a`,(max(`test`.`t1`.`b`) + 1) AS `max(b)+1` from `test`.`t1` where (`test`.`t1`.`a` = 0) group by `test`.`t1`.`a` +drop table t1; +CREATE TABLE t1 (a int, b int, c int, d int, +KEY foo (c,d,a,b), KEY bar (c,a,b,d)) engine=RocksDB; +INSERT INTO t1 VALUES (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 1, 3), (1, 1, 1, 4); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT a,b,c+1,d FROM t1; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +EXPLAIN SELECT DISTINCT c FROM t1 WHERE d=4; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range foo,bar foo 10 NULL 126 Using where; Using index for group-by +SELECT DISTINCT c FROM t1 WHERE d=4; +c +1 +2 +DROP TABLE t1; +# +# Bug #45386: Wrong query result with MIN function in field list, +# WHERE and GROUP BY clause +# +CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; +INSERT INTO t SELECT * FROM t; +ANALYZE TABLE t; +Table Op Msg_type Msg_text +test.t analyze status OK +# test MIN +#should use range with index for group by +EXPLAIN +SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range a a 10 NULL 501 Using where; Using index for group-by +#should return 1 row +SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a; +a MIN(b) +2 1 +# test MAX +#should use range with index for group by +EXPLAIN +SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range a a 10 NULL 501 Using where; Using index for group-by +#should return 1 row +SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a; +a MAX(b) +2 0 +# test 3 ranges and use the middle one +INSERT INTO t SELECT a, 2 FROM t; +#should use range with index for group by +EXPLAIN +SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range a a 10 NULL 501 Using where; Using index for group-by +#should return 1 row +SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a; +a MAX(b) +2 1 +DROP TABLE t; +# +# Bug #48472: Loose index scan inappropriately chosen for some WHERE +# conditions +# +CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; +ANALYZE TABLE t; +Table Op Msg_type Msg_text +test.t analyze status OK +SELECT a, MAX(b) FROM t WHERE 0=b+0 GROUP BY a; +a MAX(b) +2 0 +DROP TABLE t; +End of 5.0 tests +# +# Bug #46607: Assertion failed: (cond_type == Item::FUNC_ITEM) results in +# server crash +# +CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; +SELECT a, MAX(b) FROM t WHERE b GROUP BY a; +a MAX(b) +2 1 +DROP TABLE t; +CREATE TABLE t1(a INT NOT NULL, b INT NOT NULL, KEY (b)) engine=RocksDB; +INSERT INTO t1 VALUES(1,1),(2,1); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +SELECT 1 AS c, b FROM t1 WHERE b IN (1,2) GROUP BY c, b; +c b +1 1 +SELECT a FROM t1 WHERE b=1; +a +1 +2 +DROP TABLE t1; +# +# Bug#47762: Incorrect result from MIN() when WHERE tests NOT NULL column +# for NULL +# +## Test for NULLs allowed +CREATE TABLE t1 ( a INT, KEY (a) ) engine=RocksDB; +INSERT INTO t1 VALUES (1), (2), (3); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a = NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a = NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a <> NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a <> NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a > NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a > NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a < NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a < NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a <=> NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x No matching min/max row +SELECT MIN( a ) FROM t1 WHERE a <=> NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0); +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +x x x x x x x x x Using where; Using index +SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0); +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a IS NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x No matching min/max row +SELECT MIN( a ) FROM t1 WHERE a IS NULL; +MIN( a ) +NULL +INSERT INTO t1 VALUES (NULL), (NULL); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a = NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a = NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a <> NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a <> NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a > NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a > NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a < NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a < NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a <=> NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Select tables optimized away +SELECT MIN( a ) FROM t1 WHERE a <=> NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0); +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +x x x x x x x x x Using where; Using index +SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0); +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a IS NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Select tables optimized away +SELECT MIN( a ) FROM t1 WHERE a IS NULL; +MIN( a ) +NULL +DROP TABLE t1; +## Test for NOT NULLs +CREATE TABLE t1 ( a INT NOT NULL PRIMARY KEY) engine=RocksDB; +INSERT INTO t1 VALUES (1), (2), (3); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +# +# NULL-safe operator test disabled for non-NULL indexed columns. +# +# See bugs +# +# - Bug#52173: Reading NULL value from non-NULL index gives +# wrong result in embedded server +# +# - Bug#52174: Sometimes wrong plan when reading a MAX value from +# non-NULL index +# +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a = NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a = NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a <> NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a <> NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a > NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a > NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a < NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a < NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL; +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0); +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE noticed after reading const tables +x x x x x x x x x Using where; Using index +SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0); +MIN( a ) +NULL +EXPLAIN +SELECT MIN( a ) FROM t1 WHERE a IS NULL; +id select_type table type possible_keys key key_len ref rows Extra +x x x x x x x x x Impossible WHERE +SELECT MIN( a ) FROM t1 WHERE a IS NULL; +MIN( a ) +NULL +DROP TABLE t1; +# +# Bug#53859: Valgrind: opt_sum_query(TABLE_LIST*, List<Item>&, Item*) at +# opt_sum.cc:305 +# +CREATE TABLE t1 ( a INT, KEY (a) ) engine=RocksDB; +INSERT INTO t1 VALUES (1), (2), (3); +SELECT MIN( a ) AS min_a +FROM t1 +WHERE a > 1 AND a IS NULL +ORDER BY min_a; +min_a +NULL +DROP TABLE t1; +End of 5.1 tests +# +# WL#3220 (Loose index scan for COUNT DISTINCT) +# +CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b)) engine=RocksDB; +INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1); +INSERT INTO t1 SELECT a, b + 4, 1 FROM t1; +INSERT INTO t1 SELECT a + 1, b, 1 FROM t1; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, KEY (a,b,c)) engine=RocksDB; +INSERT INTO t2 VALUES (1,1,1,1,1,1), (1,2,1,1,1,1), (1,3,1,1,1,1), +(1,4,1,1,1,1); +INSERT INTO t2 SELECT a, b + 4, c,d,e,f FROM t2; +INSERT INTO t2 SELECT a + 1, b, c,d,e,f FROM t2; +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +2 +EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 10 NULL 1001 Using index for group-by (scanning) +SELECT COUNT(DISTINCT a,b) FROM t1; +COUNT(DISTINCT a,b) +16 +EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 10 NULL 1001 Using index for group-by (scanning) +SELECT COUNT(DISTINCT b,a) FROM t1; +COUNT(DISTINCT b,a) +16 +EXPLAIN SELECT COUNT(DISTINCT b) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a a 10 NULL 1000 Using index +SELECT COUNT(DISTINCT b) FROM t1; +COUNT(DISTINCT b) +8 +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by +SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a; +COUNT(DISTINCT a) +1 +1 +EXPLAIN SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 10 NULL 1001 Using index for group-by (scanning) +SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a; +COUNT(DISTINCT b) +8 +8 +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a a 10 NULL 1000 Using index; Using filesort +SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b; +COUNT(DISTINCT a) +2 +2 +2 +2 +2 +2 +2 +2 +EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a a 10 NULL 1000 Using index +SELECT DISTINCT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +2 +EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 10 NULL 1000 Using index +SELECT COUNT(DISTINCT a, b + 0) FROM t1; +COUNT(DISTINCT a, b + 0) +16 +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 10 NULL 1000 Using index +SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10; +COUNT(DISTINCT a) +2 +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 NULL +SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10; +COUNT(DISTINCT a) +2 +EXPLAIN SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by +SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10; +1 +1 +EXPLAIN SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 10 NULL 1001 Using index for group-by (scanning) +SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1; +1 +1 +1 +EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1_1 index a a 10 NULL 1000 Using index; Using temporary; Using filesort +1 SIMPLE t1_2 index NULL a 10 NULL 1000 Using index; Using join buffer (Block Nested Loop) +SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a; +COUNT(DISTINCT t1_1.a) +1 +1 +EXPLAIN SELECT COUNT(DISTINCT a), 12 FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by +SELECT COUNT(DISTINCT a), 12 FROM t1; +COUNT(DISTINCT a) 12 +2 12 +EXPLAIN SELECT COUNT(DISTINCT a, b, c) FROM t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 15 NULL 1001 Using index for group-by (scanning) +SELECT COUNT(DISTINCT a, b, c) FROM t2; +COUNT(DISTINCT a, b, c) +16 +EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 5 NULL 251 Using index for group-by +SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2; +COUNT(DISTINCT a) SUM(DISTINCT a) AVG(DISTINCT a) +2 3 1.5000 +EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 NULL +SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2; +COUNT(DISTINCT a) SUM(DISTINCT a) AVG(DISTINCT f) +2 3 1.0000 +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 10 NULL 501 Using index for group-by (scanning) +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2; +COUNT(DISTINCT a, b) COUNT(DISTINCT b, a) +16 16 +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 NULL +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2; +COUNT(DISTINCT a, b) COUNT(DISTINCT b, f) +16 8 +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 NULL +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2; +COUNT(DISTINCT a, b) COUNT(DISTINCT b, d) +16 8 +EXPLAIN SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 15 NULL 1001 Using index for group-by (scanning) +SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c; +a c COUNT(DISTINCT c, a, b) +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +EXPLAIN SELECT COUNT(DISTINCT c, a, b) FROM t2 +WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 15 NULL 1001 Using where; Using index for group-by (scanning) +SELECT COUNT(DISTINCT c, a, b) FROM t2 +WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c; +COUNT(DISTINCT c, a, b) +EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5 +GROUP BY b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ref a a 5 const 1000 Using where; Using index +SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5 +GROUP BY b; +COUNT(DISTINCT b) SUM(DISTINCT b) +EXPLAIN SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 10 NULL 501 Using index for group-by (scanning) +SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; +a COUNT(DISTINCT b) SUM(DISTINCT b) +1 8 36 +2 8 36 +EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 10 NULL 501 Using index for group-by (scanning) +SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; +COUNT(DISTINCT b) SUM(DISTINCT b) +8 36 +8 36 +EXPLAIN SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 1000 Using where +SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42; +COUNT(DISTINCT a, b) +0 +EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2 +WHERE b = 13 AND c = 42 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 15 NULL 251 Using where; Using index for group-by +SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2 +WHERE b = 13 AND c = 42 GROUP BY a; +a COUNT(DISTINCT a) SUM(DISTINCT a) +# This query could have been resolved using loose index scan since +# the second part of count(..) is defined by a constant predicate +EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index NULL a 15 NULL 1000 Using where; Using index +SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; +COUNT(DISTINCT a, b) SUM(DISTINCT a) +0 NULL +EXPLAIN SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index a a 15 NULL 1000 Using index +SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a; +SUM(DISTINCT a) MAX(b) +1 8 +2 8 +EXPLAIN SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 15 NULL 1001 Using index for group-by (scanning) +SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c; +42 * (a + c + COUNT(DISTINCT c, a, b)) +126 +126 +126 +126 +126 +126 +126 +126 +168 +168 +168 +168 +168 +168 +168 +168 +EXPLAIN SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index a a 15 NULL 1000 Using index +SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a; +(SUM(DISTINCT a) + MAX(b)) +9 +10 +DROP TABLE t1,t2; +# end of WL#3220 tests +# +# Bug#50539: Wrong result when loose index scan is used for an aggregate +# function with distinct +# +CREATE TABLE t1 ( +f1 int(11) NOT NULL DEFAULT '0', +f2 char(1) NOT NULL DEFAULT '', +PRIMARY KEY (f1,f2) +) engine=RocksDB; +insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'), +(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D'); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +f1 COUNT(DISTINCT f2) +1 3 +2 1 +3 4 +explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index PRIMARY PRIMARY 5 NULL 1000 Using index +drop table t1; +# End of test#50539. +# +# Bug#17217128 - BAD INTERACTION BETWEEN MIN/MAX AND +# "HAVING SUM(DISTINCT)": WRONG RESULTS. +# +CREATE TABLE t (a INT, b INT, KEY(a,b)) engine=RocksDB; +INSERT INTO t VALUES (1,1), (2,2), (3,3), (4,4), (1,0), (3,2), (4,5); +ANALYZE TABLE t; +Table Op Msg_type Msg_text +test.t analyze status OK +set optimizer_trace_max_mem_size=1048576; +set @@session.optimizer_trace='enabled=on'; +set end_markers_in_json=on; +ANALYZE TABLE t; +Table Op Msg_type Msg_text +test.t analyze status OK +SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a; +a SUM(DISTINCT a) MIN(b) +1 1 0 +2 2 2 +3 3 2 +4 4 4 +EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t index a a 10 NULL 1000 Using index +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +OK +1 +SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a; +a SUM(DISTINCT a) MAX(b) +1 1 1 +2 2 2 +3 3 3 +4 4 5 +EXPLAIN SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t index a a 10 NULL 1000 Using index +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +OK +1 +SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a); +a MAX(b) +1 1 +2 2 +3 3 +4 5 +EXPLAIN SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t index a a 10 NULL 1000 Using index +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +OK +1 +SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t; +SUM(DISTINCT a) MIN(b) MAX(b) +10 0 5 +EXPLAIN SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t index a a 10 NULL 1000 Using index +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +OK +1 +SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a; +a SUM(DISTINCT a) MIN(b) MAX(b) +1 1 0 1 +2 2 2 2 +3 3 2 3 +4 4 4 5 +EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t index a a 10 NULL 1000 Using index +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +OK +1 +SET optimizer_trace_max_mem_size=DEFAULT; +SET optimizer_trace=DEFAULT; +SET end_markers_in_json=DEFAULT; +DROP TABLE t; +# +# Bug#18109609: LOOSE INDEX SCAN IS NOT USED WHEN IT SHOULD +# +CREATE TABLE t1 ( +id INT AUTO_INCREMENT PRIMARY KEY, +c1 INT, +c2 INT, +KEY(c1,c2)) engine=RocksDB; +INSERT INTO t1(c1,c2) VALUES +(1, 1), (1,2), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), (4,2), (4,3), +(4,4), (4,5), (4,6), (4,7), (4,8), (4,9), (4,10), (4,11), (4,12), (4,13), +(4,14), (4,15), (4,16), (4,17), (4,18), (4,19), (4,20),(5,5); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +EXPLAIN SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range c1 c1 5 NULL 251 Using where; Using index for group-by +FLUSH STATUS; +SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1; +MAX(c2) c1 +20 4 +SHOW SESSION STATUS LIKE 'Handler_read%'; +Variable_name Value +Handler_read_first 0 +Handler_read_key 3 +Handler_read_last 1 +Handler_read_next 0 +Handler_read_prev 0 +Handler_read_rnd 0 +Handler_read_rnd_next 0 +DROP TABLE t1; +# End of test for Bug#18109609 +set global debug="-d,force_group_by"; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result b/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result new file mode 100644 index 00000000000..93c8a464577 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result @@ -0,0 +1,10 @@ +CREATE TABLE t1 (a INT, b CHAR(8), KEY ab(a, b)) ENGINE=rocksdb DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_bin; +INSERT INTO t1 (a,b) VALUES (76,'bar'); +INSERT INTO t1 (a,b) VALUES (35,'foo'); +INSERT INTO t1 (a,b) VALUES (77,'baz'); +SET debug_dbug="+d,dbug.rocksdb.HA_EXTRA_KEYREAD"; +SELECT b FROM t1 FORCE INDEX(ab) WHERE a=35; +b +foo +SET debug_dbug="-d,dbug.rocksdb.HA_EXTRA_KEYREAD"; +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s.result index 3e3ef439954..84671b765b6 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/i_s.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s.result @@ -132,7 +132,11 @@ ROCKSDB_SST_PROPS CREATE TEMPORARY TABLE `ROCKSDB_SST_PROPS` ( `TOP_LEVEL_INDEX_SIZE` bigint(21) NOT NULL DEFAULT 0, `FILTER_BLOCK_SIZE` bigint(21) NOT NULL DEFAULT 0, `COMPRESSION_ALGO` varchar(193) NOT NULL DEFAULT '', - `CREATION_TIME` bigint(21) NOT NULL DEFAULT 0 + `CREATION_TIME` bigint(21) NOT NULL DEFAULT 0, + `FILE_CREATION_TIME` bigint(21) NOT NULL DEFAULT 0, + `OLDEST_KEY_TIME` bigint(21) NOT NULL DEFAULT 0, + `FILTER_POLICY` varchar(193) NOT NULL DEFAULT '', + `COMPRESSION_OPTIONS` varchar(193) NOT NULL DEFAULT '' ) ENGINE=MEMORY DEFAULT CHARSET=utf8 SHOW CREATE TABLE INFORMATION_SCHEMA.ROCKSDB_TRX; Table Create Table diff --git a/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result b/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result index 3ae0769338f..6d4139caefa 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result @@ -21,9 +21,100 @@ a b 5 e 6 f DROP TABLE t1; -#---------------------------------------- -# UNIQUE KEYS are not supported currently -#----------------------------------------- +CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'); +INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f'); +INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n'); +INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z'); +ERROR 23000: Duplicate entry '1' for key 'a' +INSERT INTO t1 (a,b) VALUES (3,'a'),(0,''); +ERROR 23000: Duplicate entry '3' for key 'a' +INSERT INTO t1 (a,b) VALUES (0,''); +SELECT a,b FROM t1; +a b +0 +1 a +100 a +2 b +29 n +3 c +30 m +4 d +5 e +6 f +INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z'); +Warnings: +Warning 1062 Duplicate entry '1' for key 'a' +INSERT INTO t1 (a,b) VALUES (3,'a'),(4,'d') ON DUPLICATE KEY UPDATE a = a+10; +SELECT a,b FROM t1; +a b +0 +1 a +100 a +12345 z +13 c +14 d +2 b +29 n +30 m +5 e +6 f +DROP TABLE t1; +CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'); +INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f'); +INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n'); +INSERT INTO t1 (a,b) VALUES (100,'b'), (2,'c'); +INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z'); +ERROR 23000: Duplicate entry '1-a' for key 'a' +SELECT a,b FROM t1; +a b +1 a +100 a +100 b +2 b +2 c +29 n +3 c +30 m +4 d +5 e +6 f +INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z'); +Warnings: +Warning 1062 Duplicate entry '1-a' for key 'a' +INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z') ON DUPLICATE KEY UPDATE a = a+VALUES(a); +SELECT a,b FROM t1; +a b +100 a +100 b +2 a +2 b +2 c +24690 z +29 n +3 c +30 m +4 d +5 e +6 f +INSERT INTO t1 (a,b) VALUES (101,'x'),(101,'x'); +ERROR 23000: Duplicate entry '101-x' for key 'a' +SELECT a,b FROM t1; +a b +100 a +100 b +2 a +2 b +2 c +24690 z +29 n +3 c +30 m +4 d +5 e +6 f +DROP TABLE t1; CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'); INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f'); @@ -63,3 +154,109 @@ a b 5 e 6 f DROP TABLE t1; + +INSERT on DUPLICATE KEY UPDATE with multiple keys + +CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +SELECT a,b FROM t1; +a b +1 aaaaaaaaaaaaaaaaaa +10 aa +2 aaaaaaa +3 aa +4 aa +5 aa +6 aa +7 aa +8 aa +9 aa +DROP TABLE t1; + +INSERT on DUPLICATE KEY UPDATE with secondary key + +CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1; +SELECT a,b,c,pk FROM t1; +a b c pk +1 a 22 1 +10 j 1 11 +2 b 6 3 +2 c 4 14 +3 c 1 4 +4 d 1 5 +5 e 1 6 +6 f 1 7 +7 g 1 8 +8 h 1 9 +9 i 1 10 +DROP TABLE t1; + +Disable caching and see if it still functions properly + +SELECT @@rocksdb_enable_insert_with_update_caching; +@@rocksdb_enable_insert_with_update_caching +1 +SET GLOBAL rocksdb_enable_insert_with_update_caching=0; +SELECT @@rocksdb_enable_insert_with_update_caching; +@@rocksdb_enable_insert_with_update_caching +0 + +INSERT on DUPLICATE KEY UPDATE with multiple keys + +CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +SELECT a,b FROM t1; +a b +1 aaaaaaaaaaaaaaaaaa +10 aa +2 aaaaaaa +3 aa +4 aa +5 aa +6 aa +7 aa +8 aa +9 aa +DROP TABLE t1; + +INSERT on DUPLICATE KEY UPDATE with secondary key + +CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1; +SELECT a,b,c,pk FROM t1; +a b c pk +1 a 22 1 +10 j 1 11 +2 b 6 3 +2 c 4 14 +3 c 1 4 +4 d 1 5 +5 e 1 6 +6 f 1 7 +7 g 1 8 +8 h 1 9 +9 i 1 10 +DROP TABLE t1; + +Cleanup + +SET GLOBAL rocksdb_enable_insert_with_update_caching=1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue884.result b/storage/rocksdb/mysql-test/rocksdb/r/issue884.result new file mode 100644 index 00000000000..60c9674516a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/issue884.result @@ -0,0 +1,80 @@ +create table test ( +a bigint(20) not null, +b bigint(20) not null, +c varchar(500) not null, +d bigint(20) not null, +e bigint(20) not null, +f varchar(500) not null, +g varchar(500) not null, +h varchar(500) not null, +i varchar(1000) not null, +j varchar(16384) not null, +k varchar(200) not null, +l varchar(500) not null, +m varchar(100) not null, +n bigint(20) not null, +primary key (a, b, m, c(100), l(100), d, e, f(100), g(100), h(100), n), +key n (n), +key d (d, a) +) engine = rocksdb default charset = latin1; +Table Op Msg_type Msg_text +test.test analyze status Engine-independent statistics collected +test.test analyze status OK +explain +select * from test where d = 10 and a = 10 and b = 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE test index_merge PRIMARY,d d,PRIMARY 24,16 NULL # Using intersect(d,PRIMARY); Using where +select * from test where d = 10 and a = 10 and b = 2; +a b c d e f g h i j k l m n +10 2 i 10 950 f g h i j k l m 950 +10 2 i 10 951 f g h i j k l m 951 +10 2 i 10 952 f g h i j k l m 952 +10 2 i 10 953 f g h i j k l m 953 +10 2 i 10 954 f g h i j k l m 954 +10 2 i 10 955 f g h i j k l m 955 +10 2 i 10 956 f g h i j k l m 956 +10 2 i 10 957 f g h i j k l m 957 +10 2 i 10 958 f g h i j k l m 958 +10 2 i 10 959 f g h i j k l m 959 +10 2 i 10 960 f g h i j k l m 960 +10 2 i 10 961 f g h i j k l m 961 +10 2 i 10 962 f g h i j k l m 962 +10 2 i 10 963 f g h i j k l m 963 +10 2 i 10 964 f g h i j k l m 964 +10 2 i 10 965 f g h i j k l m 965 +10 2 i 10 966 f g h i j k l m 966 +10 2 i 10 967 f g h i j k l m 967 +10 2 i 10 968 f g h i j k l m 968 +10 2 i 10 969 f g h i j k l m 969 +10 2 i 10 970 f g h i j k l m 970 +10 2 i 10 971 f g h i j k l m 971 +10 2 i 10 972 f g h i j k l m 972 +10 2 i 10 973 f g h i j k l m 973 +10 2 i 10 974 f g h i j k l m 974 +10 2 i 10 975 f g h i j k l m 975 +10 2 i 10 976 f g h i j k l m 976 +10 2 i 10 977 f g h i j k l m 977 +10 2 i 10 978 f g h i j k l m 978 +10 2 i 10 979 f g h i j k l m 979 +10 2 i 10 980 f g h i j k l m 980 +10 2 i 10 981 f g h i j k l m 981 +10 2 i 10 982 f g h i j k l m 982 +10 2 i 10 983 f g h i j k l m 983 +10 2 i 10 984 f g h i j k l m 984 +10 2 i 10 985 f g h i j k l m 985 +10 2 i 10 986 f g h i j k l m 986 +10 2 i 10 987 f g h i j k l m 987 +10 2 i 10 988 f g h i j k l m 988 +10 2 i 10 989 f g h i j k l m 989 +10 2 i 10 990 f g h i j k l m 990 +10 2 i 10 991 f g h i j k l m 991 +10 2 i 10 992 f g h i j k l m 992 +10 2 i 10 993 f g h i j k l m 993 +10 2 i 10 994 f g h i j k l m 994 +10 2 i 10 995 f g h i j k l m 995 +10 2 i 10 996 f g h i j k l m 996 +10 2 i 10 997 f g h i j k l m 997 +10 2 i 10 998 f g h i j k l m 998 +10 2 i 10 999 f g h i j k l m 999 +10 2 i 10 1000 f g h i j k l m 1000 +drop table test; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue896.result b/storage/rocksdb/mysql-test/rocksdb/r/issue896.result new file mode 100644 index 00000000000..917c95733f7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/issue896.result @@ -0,0 +1,17 @@ +CREATE TABLE `t1` ( +`a` bigint(20) NOT NULL, +`b` varchar(10) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, +`u` bigint(20) unsigned NOT NULL, +`d` bigint(20) DEFAULT NULL, +PRIMARY KEY (`a`,`b`), +KEY `d` (`d`) +) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='ttl_duration=1000;ttl_col=u'; +INSERT INTO t1 VALUES (100, 'aaabbb', UNIX_TIMESTAMP(), 200); +EXPLAIN SELECT COUNT(*) FROM t1 FORCE INDEX(d); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL d 11 NULL # Using index +# segfault here without the fix +SELECT COUNT(*) FROM t1 FORCE INDEX(d); +COUNT(*) +1 +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue900.result b/storage/rocksdb/mysql-test/rocksdb/r/issue900.result new file mode 100644 index 00000000000..062d0da0864 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/issue900.result @@ -0,0 +1,11 @@ +CREATE TABLE t1(c1 VARCHAR(1) CHARACTER SET 'utf8' COLLATE 'utf8_bin', c2 YEAR, c3 REAL(1,0) UNSIGNED, PRIMARY KEY(c1)) ENGINE=RocksDB; +INSERT INTO t1 VALUES(0,'0','0'); +INSERT INTO t1 VALUES('{0}','0','0'); +Warnings: +Warning 1265 Data truncated for column 'c1' at row 1 +INSERT INTO t1 VALUES('1','0','1'); +ALTER TABLE t1 ADD INDEX(c3), ADD UNIQUE (c3); +ERROR 23000: Duplicate entry '0' for key 'c3_2' +SELECT c3 FROM t1 FORCE INDEX(c3) ORDER BY c3; +ERROR 42000: Key 'c3' doesn't exist in table 't1' +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result b/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result new file mode 100644 index 00000000000..600f19e0d61 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result @@ -0,0 +1,15 @@ +create table t (i int primary key) engine=rocksdb; +drop table t; +create table t (i int primary key, j int, key(j) comment 'rev:bf5_2') engine=rocksdb; +select RIGHT(HEX(index_number), 2) from information_schema.rocksdb_ddl where table_name = 't'; +RIGHT(HEX(index_number), 2) +FE +FF +insert into t values (1, 1); +select j from t order by j asc; +j +1 +select j from t order by j desc; +j +1 +drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result b/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result new file mode 100644 index 00000000000..ff4625698ca --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result @@ -0,0 +1,128 @@ +reset master; +set GLOBAL binlog_format= 'ROW'; +SET GLOBAL enable_blind_replace=ON; +set binlog_format=row; +create table t5 (c1 int primary key, c2 int); +insert into t5 values (1, 1); +insert into t5 values (2, 2); +insert into t5 values (3, 3); +select * from t5; +c1 c2 +1 1 +2 2 +3 3 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t5 values (1, 11); +replace into t5 values (2, 22); +replace into t5 values (3, 33); +select case when variable_value-@c = 3 then 'true' else 'false' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t5; +c1 c2 +1 11 +2 22 +3 33 +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int) +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +flush logs; +drop table t5; +reset master; +Replaying binlog events containing blind replace statements should work +select * from t5; +c1 c2 +1 11 +2 22 +3 33 +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int) +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +drop table t5; +reset master; +Replaying the same binlog events with blind replace disabled should work +The server should internally convert such events into updates +SET GLOBAL enable_blind_replace=OFF; +select * from t5; +c1 c2 +1 11 +2 22 +3 33 +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int) +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t5) +master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +set GLOBAL binlog_format=DEFAULT; +SET GLOBAL enable_blind_replace=DEFAULT; +drop table t5; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result b/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result index 5ac36e1f4ba..ac6615be093 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result @@ -2,7 +2,7 @@ drop table if exists r1; connect con1,localhost,root,,; connect con2,localhost,root,,; connection con1; -create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4)) engine=rocksdb; +create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4), KEY (value1, value2, value3)) engine=rocksdb; insert into r1 values (1,1,1,1,1,1,1,1); insert into r1 values (1,1,1,2,2,2,2,2); insert into r1 values (1,1,2,1,3,3,3,3); @@ -41,7 +41,7 @@ update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1'; /*!50601 SET @enable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=1', 'SET @dummy = 0') */; /*!50601 PREPARE s FROM @enable_bulk_load */; /*!50601 EXECUTE s */; --- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000002', MASTER_LOG_POS=4832; +-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START; -- SET GLOBAL gtid_slave_pos='0-1-18'; DROP TABLE IF EXISTS `r1`; /*!40101 SET @saved_cs_client = @@character_set_client */; @@ -55,7 +55,8 @@ CREATE TABLE `r1` ( `value2` int(11) DEFAULT NULL, `value3` int(11) DEFAULT NULL, `value4` int(11) DEFAULT NULL, - PRIMARY KEY (`id1`,`id2`,`id3`,`id4`) + PRIMARY KEY (`id1`,`id2`,`id3`,`id4`), + KEY `value1` (`value1`,`value2`,`value3`) ) ENGINE=ROCKSDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; /* ORDERING KEY (DESC) : PRIMARY */; @@ -78,9 +79,70 @@ UNLOCK TABLES; /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!40101 SET NAMES utf8 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; +/*!50601 SELECT count(*) INTO @is_mysql8 FROM information_schema.TABLES WHERE table_schema='performance_schema' AND table_name='session_variables' */; +/*!50601 SET @check_rocksdb = CONCAT( 'SELECT count(*) INTO @is_rocksdb_supported FROM ', IF (@is_mysql8, 'performance', 'information'), '_schema.session_variables WHERE variable_name=\'rocksdb_bulk_load\'') */; +/*!50601 PREPARE s FROM @check_rocksdb */; +/*!50601 EXECUTE s */; +/*!50601 SET @bulk_load_allow_sk = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load_allow_sk=1', 'SET @dummy = 0') */; +/*!50601 PREPARE s FROM @bulk_load_allow_sk */; +/*!50601 EXECUTE s */; +/*!50601 SET @enable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=1', 'SET @dummy = 0') */; +/*!50601 PREPARE s FROM @enable_bulk_load */; +/*!50601 EXECUTE s */; +-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START; +DROP TABLE IF EXISTS `r1`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `r1` ( + `id1` int(11) NOT NULL DEFAULT '0', + `id2` int(11) NOT NULL DEFAULT '0', + `id3` varchar(100) NOT NULL DEFAULT '', + `id4` int(11) NOT NULL DEFAULT '0', + `value1` int(11) DEFAULT NULL, + `value2` int(11) DEFAULT NULL, + `value3` int(11) DEFAULT NULL, + `value4` int(11) DEFAULT NULL, + PRIMARY KEY (`id1`,`id2`,`id3`,`id4`), + KEY `value1` (`value1`,`value2`,`value3`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1; +/*!40101 SET character_set_client = @saved_cs_client */; +/* ORDERING KEY (DESC) : PRIMARY */; + +LOCK TABLES `r1` WRITE; +/*!40000 ALTER TABLE `r1` DISABLE KEYS */; +INSERT INTO `r1` VALUES (2,2,'2',2,16,16,16,16),(2,2,'2',1,15,15,15,15),(2,2,'1',2,14,14,14,14),(2,2,'1',1,13,13,13,13),(2,1,'2',2,12,12,12,12),(2,1,'2',1,11,11,11,11),(2,1,'1',2,10,10,10,10),(2,1,'1',1,9,9,9,9),(1,2,'2',2,8,8,8,8),(1,2,'2',1,7,7,7,7),(1,2,'1',2,6,6,6,6),(1,2,'1',1,5,5,5,5),(1,1,'2',2,4,4,4,4),(1,1,'2',1,3,3,3,3),(1,1,'1',2,2,2,2,2),(1,1,'1',1,1,1,1,1); +/*!40000 ALTER TABLE `r1` ENABLE KEYS */; +UNLOCK TABLES; +/*!50601 SET @disable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=0', 'SET @dummy = 0') */; +/*!50601 PREPARE s FROM @disable_bulk_load */; +/*!50601 EXECUTE s */; +/*!50601 SET @disable_bulk_load_allow_sk = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load_allow_sk=0', 'SET @dummy = 0') */; +/*!50601 PREPARE s FROM @disable_bulk_load_allow_sk */; +/*!50601 EXECUTE s */; +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + rollback; connection con1; -1 +2 set @save_default_storage_engine=@@global.default_storage_engine; SET GLOBAL default_storage_engine=rocksdb; @@ -94,7 +156,7 @@ SET GLOBAL default_storage_engine=rocksdb; /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; --- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000002', MASTER_LOG_POS=4832; +-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START; -- SET GLOBAL gtid_slave_pos='0-1-18'; DROP TABLE IF EXISTS `r1`; /*!40101 SET @saved_cs_client = @@character_set_client */; @@ -108,7 +170,8 @@ CREATE TABLE `r1` ( `value2` int(11) DEFAULT NULL, `value3` int(11) DEFAULT NULL, `value4` int(11) DEFAULT NULL, - PRIMARY KEY (`id1`,`id2`,`id3`,`id4`) + PRIMARY KEY (`id1`,`id2`,`id3`,`id4`), + KEY `value1` (`value1`,`value2`,`value3`) ) ENGINE=ROCKSDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; /* ORDERING KEY : (null) */; @@ -128,7 +191,7 @@ UNLOCK TABLES; /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; -2 +3 ==== mysqldump with --innodb-stats-on-metadata ==== /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; @@ -141,7 +204,7 @@ UNLOCK TABLES; /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; --- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000002', MASTER_LOG_POS=4832; +-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START; -- SET GLOBAL gtid_slave_pos='0-1-18'; DROP TABLE IF EXISTS `r1`; /*!40101 SET @saved_cs_client = @@character_set_client */; @@ -155,7 +218,8 @@ CREATE TABLE `r1` ( `value2` int(11) DEFAULT NULL, `value3` int(11) DEFAULT NULL, `value4` int(11) DEFAULT NULL, - PRIMARY KEY (`id1`,`id2`,`id3`,`id4`) + PRIMARY KEY (`id1`,`id2`,`id3`,`id4`), + KEY `value1` (`value1`,`value2`,`value3`) ) ENGINE=ROCKSDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; /* ORDERING KEY : (null) */; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result new file mode 100644 index 00000000000..12223ebf228 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result @@ -0,0 +1,98 @@ +SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level; +SET GLOBAL rocksdb_perf_context_level=3; +SET GLOBAL enable_blind_replace=ON; +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +drop table t1; +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +create trigger trg before insert on t1 for each row set @a:=1; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +drop table t1; +create table t1(c1 int,c2 int) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +drop table t1; +create table t1(c1 int,c2 int unique) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +drop table t1; +create table t1(c1 int primary key,c2 int unique) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +drop table t1; +create table t1(c1 int primary key,c2 int, key idx1(c2)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +drop table t1; +SET GLOBAL enable_blind_replace=OFF; +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +drop table t1; +SET GLOBAL enable_blind_replace=DEFAULT; +SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result new file mode 100644 index 00000000000..65ee9768339 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result @@ -0,0 +1,46 @@ +SET GLOBAL enable_blind_replace=ON; +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +SELECT @@global.enable_blind_replace; +@@global.enable_blind_replace +1 +begin; +replace into t1 values(1,11); +SELECT @@global.enable_blind_replace; +@@global.enable_blind_replace +1 +begin; +update t1 set c2=22 where c1=1; +commit; +# Reap update. +commit; +select * from t1; +c1 c2 +1 22 +2 2 +3 3 +SELECT @@global.enable_blind_replace; +@@global.enable_blind_replace +1 +begin; +update t1 set c2=55 where c1=1; +SELECT @@global.enable_blind_replace; +@@global.enable_blind_replace +1 +begin; +replace into t1 values(1,66); +commit; +# Reap replace into. +commit; +select * from t1; +c1 c2 +1 66 +2 2 +3 3 +drop table t1; +SET GLOBAL enable_blind_replace=DEFAULT; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result b/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result index c0903eda663..070169fd674 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result @@ -33,6 +33,13 @@ CF_NAME OPTION_TYPE VALUE __system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24 cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.26 default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24 +SET @@global.rocksdb_update_cf_options = 'cf2={prefix_extractor=capped:28};'; +SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%'; +CF_NAME OPTION_TYPE VALUE +__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24 +cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.26 +cf2 PREFIX_EXTRACTOR rocksdb.CappedPrefix.28 +default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24 select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1; COUNT(*) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result b/storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result index 0a42e730fe6..ce3d7d9147e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result @@ -184,7 +184,7 @@ insert into linktable values (1,1,4,1,1,1,1,1,1); set global rocksdb_force_flush_memtable_now = true; explain select id1, id2, link_type, visibility, data, time, version from linktable where id1 = 1 and link_type = 1 and id2 in (1, 2); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE linktable ref PRIMARY,id1_type PRIMARY 16 const,const 2 Using where +1 SIMPLE linktable range PRIMARY,id1_type PRIMARY 24 NULL 2 Using where drop table linktable; CREATE TABLE `linktable` ( `id1` bigint(20) unsigned NOT NULL DEFAULT '0', @@ -206,6 +206,6 @@ insert into linktable values (1,1,4,1,1,1,1,1,1); set global rocksdb_force_flush_memtable_now = true; explain select id1, id2, link_type, visibility, data, time, version from linktable where id1 = 1 and link_type = 1 and id2 in (1, 2); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE linktable ref PRIMARY,id1_type PRIMARY 16 const,const 2 Using where +1 SIMPLE linktable range PRIMARY,id1_type PRIMARY 24 NULL 2 Using where drop table linktable; DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result index d8d78a2f571..f720a33e86b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result @@ -884,11 +884,14 @@ rocksdb_bulk_load_allow_sk OFF rocksdb_bulk_load_allow_unsorted OFF rocksdb_bulk_load_size 1000 rocksdb_bytes_per_sync 0 +rocksdb_cache_dump ON +rocksdb_cache_high_pri_pool_ratio 0.000000 rocksdb_cache_index_and_filter_blocks ON +rocksdb_cache_index_and_filter_with_high_priority ON rocksdb_checksums_pct 100 rocksdb_collect_sst_properties ON rocksdb_commit_in_the_middle OFF -rocksdb_commit_time_batch_for_recovery OFF +rocksdb_commit_time_batch_for_recovery ON rocksdb_compact_cf rocksdb_compaction_readahead_size 0 rocksdb_compaction_sequential_deletes 0 @@ -910,9 +913,11 @@ rocksdb_debug_ttl_rec_ts 0 rocksdb_debug_ttl_snapshot_ts 0 rocksdb_default_cf_options rocksdb_delayed_write_rate 0 +rocksdb_delete_cf rocksdb_delete_obsolete_files_period_micros 21600000000 rocksdb_enable_2pc ON rocksdb_enable_bulk_load_api ON +rocksdb_enable_insert_with_update_caching ON rocksdb_enable_thread_tracking ON rocksdb_enable_ttl ON rocksdb_enable_ttl_read_filtering ON @@ -963,10 +968,10 @@ rocksdb_persistent_cache_size_mb 0 rocksdb_pin_l0_filter_and_index_blocks_in_cache ON rocksdb_print_snapshot_conflict_queries OFF rocksdb_rate_limiter_bytes_per_sec 0 -rocksdb_read_free_rpl_tables rocksdb_records_in_range 50 rocksdb_remove_mariabackup_checkpoint OFF rocksdb_reset_stats OFF +rocksdb_rollback_on_timeout OFF rocksdb_seconds_between_stat_computes 3600 rocksdb_signal_drop_index_thread OFF rocksdb_sim_cache_size 0 @@ -975,6 +980,7 @@ rocksdb_skip_fill_cache OFF rocksdb_skip_unique_check_tables .* rocksdb_sst_mgr_rate_bytes_per_sec 0 rocksdb_stats_dump_period_sec 600 +rocksdb_stats_level 0 rocksdb_stats_recalc_rate 0 rocksdb_store_row_debug_checksums OFF rocksdb_strict_collation_check OFF @@ -1359,7 +1365,7 @@ insert into t1 select (@a:=@a+1), 1234 from information_schema.session_variables set @tmp1= @@rocksdb_max_row_locks; set rocksdb_max_row_locks= 20; update t1 set a=a+10; -ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to max_num_locks limit' from ROCKSDB +ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to rocksdb_max_row_locks limit' from ROCKSDB DROP TABLE t1; # # Test AUTO_INCREMENT behavior problem, @@ -1463,8 +1469,9 @@ set autocommit=1; drop table t0, t1; # # Check status variables +# NOTE: We exclude rocksdb_num_get_for_update_calls because it's a debug only status var # -show status like 'rocksdb%'; +show status where variable_name like 'rocksdb%' and variable_name not like '%num_get_for_update%'; Variable_name Value Rocksdb_rows_deleted # Rocksdb_rows_inserted # @@ -1575,7 +1582,7 @@ Rocksdb_write_other # Rocksdb_write_self # Rocksdb_write_timedout # Rocksdb_write_wal # -select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%'; +select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%'; VARIABLE_NAME ROCKSDB_ROWS_DELETED ROCKSDB_ROWS_INSERTED @@ -1688,7 +1695,7 @@ ROCKSDB_WRITE_TIMEDOUT ROCKSDB_WRITE_WAL # RocksDB-SE's status variables are global internally # but they are shown as both session and global, like InnoDB's status vars. -select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%'; +select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%'; VARIABLE_NAME ROCKSDB_ROWS_DELETED ROCKSDB_ROWS_INSERTED diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result index 24b93ee3395..32bb70a7464 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result @@ -355,7 +355,7 @@ test.t2 analyze Warning Engine-independent statistics are not collected for colu test.t2 analyze status OK EXPLAIN PARTITIONS SELECT * FROM t2 WHERE col3 = 0x4 AND col2 = 0x34567; id select_type table partitions type possible_keys key key_len ref rows Extra -1 SIMPLE t2 custom_p2 ref col3 col3 258 const 1 Using where +1 SIMPLE t2 custom_p2 ref col3 col3 258 const # Using where DROP TABLE t2; CREATE TABLE `t2` ( `col1` bigint(20) NOT NULL, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result index dbc89f32d90..e4b757ef3b0 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result @@ -110,7 +110,7 @@ set session debug_dbug= "-d,myrocks_simulate_bad_key_checksum1"; explain select a from t3 force index(a) where a<4; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t3 index a a 5 NULL # Using where; Using index +1 SIMPLE t3 range a a 5 NULL # Using where; Using index select a from t3 force index(a) where a<4; a 1 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result index ea9114c14d1..9106e79f80c 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result @@ -1,10 +1,12 @@ connect con, localhost, root,,; connection default; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; SET debug_sync='RESET'; -DROP TABLE IF EXISTS t1; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--PK first row delete connection con; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; connection default; @@ -15,17 +17,430 @@ connection con; pk a 2 2 3 3 +4 4 +5 5 +--PK middle row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +--PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET debug_sync='RESET'; +CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a)); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--SK first row delete +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 1; +SET debug_sync='now SIGNAL go'; +connection con; +a +2 +3 +4 +5 +--SK middle row delete +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +--SK end row delete +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET debug_sync='RESET'; +CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--PK first row delete +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +4 4 +3 3 +2 2 +1 1 +--PK middle row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +--PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 1; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET debug_sync='RESET'; +CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--PK first row delete +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 1; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +2 2 +3 3 +4 4 +5 5 +--PK middle row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +--PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET debug_sync='RESET'; +CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--PK first row delete +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +4 4 +3 3 +2 2 +1 1 +--PK middle row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +--PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 1; +SET debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +set debug_sync='RESET'; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb; +insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); +--First row delete with PRIMARY +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (PRIMARY) set value=100 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=1; +set debug_sync='now SIGNAL go'; +connection con; +select * from t1 where id1=1; +id1 id2 value +1 2 100 +1 3 100 +1 4 100 +1 5 100 +--Middle row delete with PRIMARY +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (PRIMARY) set value=200 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=3; +set debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +select * from t1 where id1=1; +id1 id2 value +1 2 100 +1 4 100 +1 5 100 +--End row delete with PRIMARY +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (PRIMARY) set value=300 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=5; +set debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +select * from t1 where id1=1; +id1 id2 value +1 2 100 +1 4 100 +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +set debug_sync='RESET'; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb; +insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); +--First row delete with sk +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (sk) set value=100 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=1; +set debug_sync='now SIGNAL go'; +connection con; +select * from t1 where id1=1; +id1 id2 value +1 2 100 +1 3 100 +1 4 100 +1 5 100 +--Middle row delete with sk +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (sk) set value=200 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=3; +set debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +select * from t1 where id1=1; +id1 id2 value +1 2 100 +1 4 100 +1 5 100 +--End row delete with sk +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (sk) set value=300 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=5; +set debug_sync='now SIGNAL go'; +connection con; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +select * from t1 where id1=1; +id1 id2 value +1 2 100 +1 4 100 +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET debug_sync='RESET'; +CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--PK first row delete +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 1; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +2 2 +3 3 +4 4 +5 5 +--PK middle row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +2 2 +4 4 +5 5 +--PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +2 2 +4 4 +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET debug_sync='RESET'; +CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a)); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--SK first row delete +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 1; +SET debug_sync='now SIGNAL go'; +connection con; +a +2 +3 +4 +5 +--SK middle row delete +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; +connection con; +a +2 +4 +5 +--SK end row delete +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +a +2 +4 connection default; disconnect con; set debug_sync='RESET'; drop table t1; connect con, localhost, root,,; connection default; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; SET debug_sync='RESET'; -DROP TABLE IF EXISTS t1; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--PK first row delete connection con; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +4 4 +3 3 +2 2 +1 1 +--PK middle row delete SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; connection default; @@ -34,19 +449,33 @@ DELETE FROM t1 WHERE pk = 3; SET debug_sync='now SIGNAL go'; connection con; pk a +4 4 2 2 1 1 +--PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 1; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +4 4 +2 2 connection default; disconnect con; set debug_sync='RESET'; drop table t1; connect con, localhost, root,,; connection default; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; SET debug_sync='RESET'; -DROP TABLE IF EXISTS t1; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--PK first row delete connection con; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; connection default; @@ -57,17 +486,57 @@ connection con; pk a 2 2 3 3 +4 4 +5 5 +--PK middle row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +2 2 +4 4 +5 5 +--PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +2 2 +4 4 connection default; disconnect con; set debug_sync='RESET'; drop table t1; connect con, localhost, root,,; connection default; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; SET debug_sync='RESET'; -DROP TABLE IF EXISTS t1; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); +--PK first row delete connection con; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +4 4 +3 3 +2 2 +1 1 +--PK middle row delete SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; connection default; @@ -76,8 +545,126 @@ DELETE FROM t1 WHERE pk = 3; SET debug_sync='now SIGNAL go'; connection con; pk a +4 4 2 2 1 1 +--PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; +connection default; +SET debug_sync='now WAIT_FOR parked'; +DELETE FROM t1 WHERE pk = 1; +SET debug_sync='now SIGNAL go'; +connection con; +pk a +4 4 +2 2 +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +set debug_sync='RESET'; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb; +insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); +--First row delete with PRIMARY +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (PRIMARY) set value=100 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=1; +set debug_sync='now SIGNAL go'; +connection con; +select * from t1 where id1=1; +id1 id2 value +1 2 100 +1 3 100 +1 4 100 +1 5 100 +--Middle row delete with PRIMARY +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (PRIMARY) set value=200 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=3; +set debug_sync='now SIGNAL go'; +connection con; +select * from t1 where id1=1; +id1 id2 value +1 2 200 +1 4 200 +1 5 200 +--End row delete with PRIMARY +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (PRIMARY) set value=300 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=5; +set debug_sync='now SIGNAL go'; +connection con; +select * from t1 where id1=1; +id1 id2 value +1 2 300 +1 4 300 +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +connect con, localhost, root,,; +connection default; +set debug_sync='RESET'; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb; +insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); +--First row delete with sk +connection con; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (sk) set value=100 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=1; +set debug_sync='now SIGNAL go'; +connection con; +select * from t1 where id1=1; +id1 id2 value +1 2 100 +1 3 100 +1 4 100 +1 5 100 +--Middle row delete with sk +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (sk) set value=200 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=3; +set debug_sync='now SIGNAL go'; +connection con; +select * from t1 where id1=1; +id1 id2 value +1 2 200 +1 4 200 +1 5 200 +--End row delete with sk +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +update t1 force index (sk) set value=300 where id1=1; +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=5; +set debug_sync='now SIGNAL go'; +connection con; +select * from t1 where id1=1; +id1 id2 value +1 2 300 +1 4 300 connection default; disconnect con; set debug_sync='RESET'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result new file mode 100644 index 00000000000..fabf077e27a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result @@ -0,0 +1,335 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +drop table if exists t1; +create table t1 (id int primary key, value int); +insert into t1 values (1,1), (2,2), (3,3), (4,4); +include/sync_slave_sql_with_master.inc + +# regular update/delete. With rocks_read_free_rpl=PK_SK, rocksdb_rows_read does not increase on slaves + +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +update t1 set value=value+1 where id=1; +delete from t1 where id=4; +select * from t1; +id value +1 2 +2 2 +3 3 +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t1; +id value +1 2 +2 2 +3 3 + +# "rocks_read_free_rpl=PK_SK" makes "row not found error" not happen anymore + +include/stop_slave.inc +delete from t1 where id in (2, 3); +include/start_slave.inc +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +update t1 set value=value+1 where id=3; +delete from t1 where id=2; +select * from t1; +id value +1 2 +3 4 +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t1; +id value +1 2 +3 4 + +## tables without primary key -- read free replication should be disabled + + +#no index + +drop table t1; +create table t1 (c1 int, c2 int); +insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5); +include/sync_slave_sql_with_master.inc +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +update t1 set c2=100 where c1=3; +delete from t1 where c1 <= 2; +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +select * from t1; +c1 c2 +3 100 +4 4 +5 5 + +#secondary index only + +drop table t1; +create table t1 (c1 int, c2 int, index i(c1)); +insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5); +include/sync_slave_sql_with_master.inc +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +update t1 set c2=100 where c1=3; +delete from t1 where c1 <= 2; +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +select * from t1; +c1 c2 +3 100 +4 4 +5 5 + +## large row operations -- primary key modification, secondary key modification + +drop table t1; +create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2)); +include/sync_slave_sql_with_master.inc +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +#updating all secondary keys by 1 + +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +include/diff_tables.inc [master:t1, slave:t1] + +#updating all primary keys by 2 + +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +include/diff_tables.inc [master:t1, slave:t1] + +#updating secondary keys after truncating t1 on slave + +truncate table t1; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +update t1 set c2=c2+10; +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +include/diff_tables.inc [master:t1, slave:t1] + +#updating primary keys after truncating t1 on slave + +truncate table t1; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +update t1 set id2=id2+10; +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +include/diff_tables.inc [master:t1, slave:t1] + +#deleting half rows + +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +delete from t1 where id1 <= 5000; +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +include/diff_tables.inc [master:t1, slave:t1] + +# rocksdb_read_free_rpl = PK_ONLY i.e. it only works on tables with only PK + +[on slave] +stop slave; +set @@global.rocksdb_read_free_rpl = PK_ONLY; +start slave; +[on master] +create table t2 (id int primary key, i1 int, i2 int, value int); +create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); +insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +include/sync_slave_sql_with_master.inc +[on slave] +delete from t2 where id <= 2; +delete from u2 where id <= 2; +[on master] +update t2 set i2=100, value=100 where id=1; +update u2 set i2=100, value=100 where id=1; +[on slave] +call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*"); +call mtr.add_suppression("Slave: Can't find record in 'u2'.*"); +include/wait_for_slave_sql_error.inc [errno=1032] +select count(*) from t2 force index(primary); +count(*) +2 +select * from t2 where id=1; +id i1 i2 value +1 1 100 100 +select i1 from t2 where i1=1; +i1 +1 +select i2 from t2 where i2=100; +i2 +100 +select count(*) from u2 force index(primary); +count(*) +1 +select count(*) from u2 force index(i1); +count(*) +1 +select count(*) from u2 force index(i2); +count(*) +1 +select * from u2 where id=1; +id i1 i2 value +select i1 from u2 where i1=1; +i1 +select i2 from u2 where i2=100; +i2 +include/wait_for_slave_sql_to_start.inc +[on slave] +stop slave; +set @@global.rocksdb_read_free_rpl = PK_SK; +start slave; + +# some tables with read-free replication on and some with it off + +[on slave] +stop slave; +set @@global.rocksdb_read_free_rpl_tables = "t.*"; +start slave; +[on master] +drop table if exists t2; +drop table if exists u2; +create table t2 (id int primary key, i1 int, i2 int, value int); +create table u2 (id int primary key, i1 int, i2 int, value int); +insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +include/sync_slave_sql_with_master.inc +[on slave] +delete from t2 where id <= 2; +delete from u2 where id <= 2; +[on master] +update t2 set i2=100, value=100 where id=1; +update u2 set i2=100, value=100 where id=1; +[on slave] +call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*"); +call mtr.add_suppression("Slave: Can't find record in 'u2'.*"); +include/wait_for_slave_sql_error.inc [errno=1032] +select count(*) from t2 force index(primary); +count(*) +2 +select * from t2 where id=1; +id i1 i2 value +1 1 100 100 +select i1 from t2 where i1=1; +i1 +1 +select i2 from t2 where i2=100; +i2 +100 +select count(*) from u2 force index(primary); +count(*) +1 +select * from u2 where id=1; +id i1 i2 value +select i1 from u2 where i1=1; +i1 +select i2 from u2 where i2=100; +i2 +include/wait_for_slave_sql_to_start.inc +[on slave] +stop slave; +set @@global.rocksdb_read_free_rpl_tables = ".*"; +start slave; + +# secondary keys lose rows + +[on master] +create table t3 (id int primary key, i1 int, i2 int, value int, index(i1), +index(i2)); +insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +include/sync_slave_sql_with_master.inc +[on slave] +delete from t3 where id <= 2; +[on master] +update t3 set i2=100, value=100 where id=1; +include/sync_slave_sql_with_master.inc +select count(*) from t3 force index(primary); +count(*) +2 +select count(*) from t3 force index(i1); +count(*) +1 +select count(*) from t3 force index(i2); +count(*) +2 +select * from t3 where id=1; +id i1 i2 value +1 1 100 100 +select i1 from t3 where i1=1; +i1 +select i2 from t3 where i2=100; +i2 +100 + +# secondary keys have extra rows + +[on master] +create table t4 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); +insert into t4 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +include/sync_slave_sql_with_master.inc +[on slave] +update t4 set i1=100 where id=1; +[on master] +delete from t4 where id=1; +include/sync_slave_sql_with_master.inc +[on slave] +select count(*) from t4 force index(primary); +count(*) +2 +select count(*) from t4 force index(i1); +count(*) +3 +select count(*) from t4 force index(i2); +count(*) +2 +select i1 from t4 where i1=100; +i1 +100 + +# inserts are also read-free + +[on master] +drop table if exists t2; +drop table if exists t3; +create table t2 (id int primary key, i1 int, i2 int); +create table t3 (id int primary key, i1 int, i2 int, key(i1)); +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +insert into t2 values(1, 1, 1); +insert into t2 values(2, 2, 2); +insert into t3 values(1, 1, 1); +insert into t3 values(2, 2, 2); +include/sync_slave_sql_with_master.inc +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t2; +id i1 i2 +1 1 1 +2 2 2 +select * from t3; +id i1 i2 +1 1 1 +2 2 2 +drop table t1, t2, t3, t4, u2; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result new file mode 100644 index 00000000000..9e3c7a0582b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result @@ -0,0 +1,35 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +include/stop_slave.inc +set @@global.rocksdb_read_free_rpl = PK_SK; +include/start_slave.inc +include/sync_slave_sql_with_master.inc +include/diff_tables.inc [master:t1, slave:t1] +include/diff_tables.inc [master:t2, slave:t2] +include/diff_tables.inc [master:t3, slave:t3] +include/diff_tables.inc [master:t4, slave:t4] +include/diff_tables.inc [master:t5, slave:t5] +include/diff_tables.inc [master:t6, slave:t6] +include/diff_tables.inc [master:t7, slave:t7] +include/diff_tables.inc [master:t8, slave:t8] +include/sync_slave_sql_with_master.inc +include/stop_slave.inc +set @@global.rocksdb_read_free_rpl = PK_ONLY; +include/start_slave.inc +include/sync_slave_sql_with_master.inc +include/diff_tables.inc [master:t1, slave:t1] +include/diff_tables.inc [master:t2, slave:t2] +include/diff_tables.inc [master:t3, slave:t3] +include/diff_tables.inc [master:t4, slave:t4] +include/diff_tables.inc [master:t5, slave:t5] +include/diff_tables.inc [master:t6, slave:t6] +include/diff_tables.inc [master:t7, slave:t7] +include/diff_tables.inc [master:t8, slave:t8] +include/sync_slave_sql_with_master.inc +include/stop_slave.inc +set @@global.rocksdb_read_free_rpl = default; +include/start_slave.inc +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result new file mode 100644 index 00000000000..adf05d06aac --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result @@ -0,0 +1,84 @@ +drop table if exists t1; +SET @@global.rocksdb_rollback_on_timeout = 1; +show variables like 'rocksdb_rollback_on_timeout'; +Variable_name Value +rocksdb_rollback_on_timeout ON +create table t1 (a int unsigned not null primary key) engine = rocksdb; +insert into t1 values (1); +commit; +connect con1,localhost,root,,; +connect con2,localhost,root,,; +connection con2; +begin work; +insert into t1 values (5); +insert into t1 values (6); +update t1 set a = a + 1 where a = 1; +connection con1; +begin work; +insert into t1 values (7); +insert into t1 values (8); +update t1 set a = a + 1 where a = 1; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +select * from t1; +a +1 +commit; +connection con2; +select * from t1; +a +2 +5 +6 +commit; +connection default; +select * from t1; +a +2 +5 +6 +SET @@global.rocksdb_rollback_on_timeout = 0; +show variables like 'rocksdb_rollback_on_timeout'; +Variable_name Value +rocksdb_rollback_on_timeout OFF +connection con2; +begin work; +insert into t1 values (9); +insert into t1 values (10); +update t1 set a = a + 1 where a = 2; +connection con1; +begin work; +insert into t1 values (11); +insert into t1 values (12); +update t1 set a = a + 1 where a = 2; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +select * from t1; +a +2 +5 +6 +11 +12 +commit; +connection con2; +select * from t1; +a +3 +5 +6 +9 +10 +commit; +connection default; +select * from t1; +a +3 +5 +6 +9 +10 +11 +12 +SET @@global.rocksdb_rollback_on_timeout = DEFAULT; +drop table t1; +disconnect con1; +disconnect con2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_read_free.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_read_free.result deleted file mode 100644 index 82609f46423..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/r/rpl_read_free.result +++ /dev/null @@ -1,321 +0,0 @@ -include/master-slave.inc -Warnings: -Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. -Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. -[connection master] -drop table if exists t1; -create procedure save_read_stats() -begin -select rows_requested into @rq from information_schema.table_statistics -where table_schema=database() and table_name='t1'; -select variable_value into @rr from information_schema.global_status -where variable_name='rocksdb_rows_read'; -select variable_value into @ru from information_schema.global_status -where variable_name='rocksdb_rows_updated'; -select variable_value into @rd from information_schema.global_status -where variable_name='rocksdb_rows_deleted'; -end// -create procedure get_read_stats() -begin -select rows_requested - @rq as rows_requested from -information_schema.table_statistics -where table_schema=database() and table_name='t1'; -select variable_value - @rr as rows_read from -information_schema.global_status -where variable_name='rocksdb_rows_read'; -select variable_value - @ru as rows_updated from -information_schema.global_status -where variable_name='rocksdb_rows_updated'; -select variable_value - @rd as rows_deleted from -information_schema.global_status -where variable_name='rocksdb_rows_deleted'; -end// -create table t1 (id int primary key, value int); -insert into t1 values (1,1), (2,2), (3,3), (4,4); -include/sync_slave_sql_with_master.inc - -# regular update/delete. With rocks_read_free_rpl_tables=.*, rocksdb_rows_read does not increase on slaves - -call save_read_stats(); -update t1 set value=value+1 where id=1; -delete from t1 where id=4; -select * from t1; -id value -1 2 -2 2 -3 3 -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -0 -rows_read -0 -rows_updated -1 -rows_deleted -1 -select * from t1; -id value -1 2 -2 2 -3 3 - -# "rocks_read_free_rpl_tables=.*" makes "row not found error" not happen anymore - -include/stop_slave.inc -delete from t1 where id in (2, 3); -include/start_slave.inc -call save_read_stats(); -update t1 set value=value+1 where id=3; -delete from t1 where id=2; -select * from t1; -id value -1 2 -3 4 -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -0 -rows_read -0 -rows_updated -1 -rows_deleted -1 -select * from t1; -id value -1 2 -3 4 - -## tables without primary key -- read free replication should be disabled - - -#no index - -drop table t1; -create table t1 (c1 int, c2 int); -insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5); -include/sync_slave_sql_with_master.inc -call save_read_stats(); -update t1 set c2=100 where c1=3; -delete from t1 where c1 <= 2; -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -5 -rows_read -5 -rows_updated -1 -rows_deleted -2 -select * from t1; -c1 c2 -3 100 -4 4 -5 5 - -#secondary index only - -drop table t1; -create table t1 (c1 int, c2 int, index i(c1)); -insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5); -include/sync_slave_sql_with_master.inc -call save_read_stats(); -update t1 set c2=100 where c1=3; -delete from t1 where c1 <= 2; -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -3 -rows_read -3 -rows_updated -1 -rows_deleted -2 -select * from t1; -c1 c2 -3 100 -4 4 -5 5 - -## large row operations -- primary key modification, secondary key modification - -drop table t1; -create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2)); -include/sync_slave_sql_with_master.inc -call save_read_stats(); - -#updating all seconary keys by 1 - -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -0 -rows_read -0 -rows_updated -10000 -rows_deleted -0 -include/diff_tables.inc [master:t1, slave:t1] - -#updating all primary keys by 2 - -call save_read_stats(); -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -0 -rows_read -0 -rows_updated -10000 -rows_deleted -0 -include/diff_tables.inc [master:t1, slave:t1] - -#updating secondary keys after truncating t1 on slave - -truncate table t1; -call save_read_stats(); -update t1 set c2=c2+10; -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -0 -rows_read -0 -rows_updated -10000 -rows_deleted -0 -include/diff_tables.inc [master:t1, slave:t1] - -#updating primary keys after truncating t1 on slave - -truncate table t1; -call save_read_stats(); -update t1 set id2=id2+10; -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -0 -rows_read -0 -rows_updated -10000 -rows_deleted -0 -include/diff_tables.inc [master:t1, slave:t1] - -#deleting half rows - -call save_read_stats(); -delete from t1 where id1 <= 5000; -include/sync_slave_sql_with_master.inc -call get_read_stats(); -rows_requested -0 -rows_read -0 -rows_updated -0 -rows_deleted -5000 -include/diff_tables.inc [master:t1, slave:t1] -[on master] -create table t2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); -create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); -insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); -insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); -include/sync_slave_sql_with_master.inc -[on slave] -delete from t2 where id <= 2; -delete from u2 where id <= 2; -[on master] -update t2 set i2=100, value=100 where id=1; -update u2 set i2=100, value=100 where id=1; -[on slave] -call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*"); -call mtr.add_suppression("Slave: Can't find record in 'u2'.*"); -include/wait_for_slave_sql_error.inc [errno=1032] -select count(*) from t2 force index(primary); -count(*) -2 -select count(*) from t2 force index(i1); -count(*) -1 -select count(*) from t2 force index(i2); -count(*) -2 -select * from t2 where id=1; -id i1 i2 value -1 1 100 100 -select i1 from t2 where i1=1; -i1 -select i2 from t2 where i2=100; -i2 -100 -select count(*) from u2 force index(primary); -count(*) -1 -select count(*) from u2 force index(i1); -count(*) -1 -select count(*) from u2 force index(i2); -count(*) -1 -select * from u2 where id=1; -id i1 i2 value -select i1 from u2 where i1=1; -i1 -select i2 from u2 where i2=100; -i2 -include/wait_for_slave_sql_to_start.inc - -# some tables with read-free replication on and some with it off -# secondary keys have extra rows - -[on master] -create table t3 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); -create table u3 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); -insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3); -insert into u3 values (1,1,1,1),(2,2,2,2),(3,3,3,3); -include/sync_slave_sql_with_master.inc -[on slave] -update t3 set i1=100 where id=1; -update u3 set i1=100 where id=1; -[on master] -delete from t3 where id=1; -delete from u3 where id=1; -include/sync_slave_sql_with_master.inc -[on slave] -select count(*) from t3 force index(primary); -count(*) -2 -select count(*) from t3 force index(i1); -count(*) -3 -select count(*) from t3 force index(i2); -count(*) -2 -select i1 from t3 where i1=100; -i1 -100 -select count(*) from u3 force index(primary); -count(*) -2 -select count(*) from u3 force index(i1); -count(*) -2 -select count(*) from u3 force index(i2); -count(*) -2 -select i1 from u3 where i1=100; -i1 -drop table t1, t2, t3, u2, u3; -drop procedure save_read_stats; -drop procedure get_read_stats; -include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result new file mode 100644 index 00000000000..8cdfa910739 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result @@ -0,0 +1,56 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +drop table if exists t1; +create table t0 (a int) engine=myisam; +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(a int) engine=myisam; +insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C; +create table t2 ( +pk int primary key, +kp1 int, +kp2 int, +col1 int, +key (kp1,kp2) +) engine=rocksdb; +insert into t2 select a,a,a,a from t1; +create table t3 like t2; +insert into t3 select * from t2; +include/sync_slave_sql_with_master.inc +set global debug= 'd,dbug.rocksdb.get_row_by_rowid'; +include/stop_slave.inc +include/start_slave.inc +update t2 set col1=100 where kp1 between 1 and 3 and mod(kp2,2)=0; +set debug_sync= 'now WAIT_FOR Reached'; +set global debug = ''; +set sql_log_bin=0; +delete from t2 where pk=2; +delete from t2 where pk=3; +set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running'; +include/sync_slave_sql_with_master.inc +select * from t2 where pk < 5; +pk kp1 kp2 col1 +0 0 0 0 +1 1 1 1 +4 4 4 4 +set global debug= 'd,dbug.rocksdb.get_row_by_rowid'; +include/stop_slave.inc +include/start_slave.inc +update t3 set col1=100 where kp1 between 1 and 4 and mod(kp2,2)=0; +call mtr.add_suppression("Deadlock found when trying to get lock"); +set debug_sync= 'now WAIT_FOR Reached'; +set global debug = ''; +set sql_log_bin=0; +delete from t3 where pk=2; +delete from t3 where pk=3; +set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running'; +include/sync_slave_sql_with_master.inc +select * from t3 where pk < 5; +pk kp1 kp2 col1 +0 0 0 0 +1 1 1 1 +4 4 4 100 +drop table t0, t1, t2, t3; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result index eb23b71808b..eac329a24e7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result @@ -138,6 +138,9 @@ __system__ TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY # __system__ TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE # __system__ TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER # __system__ TABLE_FACTORY::INDEX_TYPE # +__system__ TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE # +__system__ TABLE_FACTORY::INDEX_SHORTENING # +__system__ TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO # __system__ TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION # __system__ TABLE_FACTORY::CHECKSUM # __system__ TABLE_FACTORY::NO_BLOCK_CACHE # @@ -147,6 +150,7 @@ __system__ TABLE_FACTORY::BLOCK_CACHE_OPTIONS # __system__ TABLE_FACTORY::CAPACITY # __system__ TABLE_FACTORY::NUM_SHARD_BITS # __system__ TABLE_FACTORY::STRICT_CAPACITY_LIMIT # +__system__ TABLE_FACTORY::MEMORY_ALLOCATOR # __system__ TABLE_FACTORY::HIGH_PRI_POOL_RATIO # __system__ TABLE_FACTORY::BLOCK_CACHE_COMPRESSED # __system__ TABLE_FACTORY::PERSISTENT_CACHE # @@ -211,6 +215,9 @@ cf_t1 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY # cf_t1 TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE # cf_t1 TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER # cf_t1 TABLE_FACTORY::INDEX_TYPE # +cf_t1 TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE # +cf_t1 TABLE_FACTORY::INDEX_SHORTENING # +cf_t1 TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO # cf_t1 TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION # cf_t1 TABLE_FACTORY::CHECKSUM # cf_t1 TABLE_FACTORY::NO_BLOCK_CACHE # @@ -220,6 +227,7 @@ cf_t1 TABLE_FACTORY::BLOCK_CACHE_OPTIONS # cf_t1 TABLE_FACTORY::CAPACITY # cf_t1 TABLE_FACTORY::NUM_SHARD_BITS # cf_t1 TABLE_FACTORY::STRICT_CAPACITY_LIMIT # +cf_t1 TABLE_FACTORY::MEMORY_ALLOCATOR # cf_t1 TABLE_FACTORY::HIGH_PRI_POOL_RATIO # cf_t1 TABLE_FACTORY::BLOCK_CACHE_COMPRESSED # cf_t1 TABLE_FACTORY::PERSISTENT_CACHE # @@ -284,6 +292,9 @@ default TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY # default TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE # default TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER # default TABLE_FACTORY::INDEX_TYPE # +default TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE # +default TABLE_FACTORY::INDEX_SHORTENING # +default TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO # default TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION # default TABLE_FACTORY::CHECKSUM # default TABLE_FACTORY::NO_BLOCK_CACHE # @@ -293,6 +304,7 @@ default TABLE_FACTORY::BLOCK_CACHE_OPTIONS # default TABLE_FACTORY::CAPACITY # default TABLE_FACTORY::NUM_SHARD_BITS # default TABLE_FACTORY::STRICT_CAPACITY_LIMIT # +default TABLE_FACTORY::MEMORY_ALLOCATOR # default TABLE_FACTORY::HIGH_PRI_POOL_RATIO # default TABLE_FACTORY::BLOCK_CACHE_COMPRESSED # default TABLE_FACTORY::PERSISTENT_CACHE # @@ -357,6 +369,9 @@ rev:cf_t2 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY # rev:cf_t2 TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE # rev:cf_t2 TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER # rev:cf_t2 TABLE_FACTORY::INDEX_TYPE # +rev:cf_t2 TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE # +rev:cf_t2 TABLE_FACTORY::INDEX_SHORTENING # +rev:cf_t2 TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO # rev:cf_t2 TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION # rev:cf_t2 TABLE_FACTORY::CHECKSUM # rev:cf_t2 TABLE_FACTORY::NO_BLOCK_CACHE # @@ -366,6 +381,7 @@ rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_OPTIONS # rev:cf_t2 TABLE_FACTORY::CAPACITY # rev:cf_t2 TABLE_FACTORY::NUM_SHARD_BITS # rev:cf_t2 TABLE_FACTORY::STRICT_CAPACITY_LIMIT # +rev:cf_t2 TABLE_FACTORY::MEMORY_ALLOCATOR # rev:cf_t2 TABLE_FACTORY::HIGH_PRI_POOL_RATIO # rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_COMPRESSED # rev:cf_t2 TABLE_FACTORY::PERSISTENT_CACHE # diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result b/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result index 989ddc0f03e..572cac19f4a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result @@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true; CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8; SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' ); Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary -t1 ROCKSDB 10 Fixed 2 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N -t2 ROCKSDB 10 Fixed 1 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL 0 N SHOW TABLE STATUS WHERE name LIKE 't2'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary -t2 ROCKSDB 10 Fixed 10000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N DROP TABLE t1, t2, t3; CREATE DATABASE `db_new..............................................end`; USE `db_new..............................................end`; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result new file mode 100644 index 00000000000..60d9f69a398 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result @@ -0,0 +1,31 @@ +create table mz(c int); +affected rows: 0 +insert into mz values(1); +affected rows: 1 +commit; +affected rows: 0 +SET debug= '+d,abort_with_io_write_error'; +affected rows: 0 +set global binlog_error_action=1; +affected rows: 0 +show session variables like 'debug'; +Variable_name Value +debug d,abort_with_io_write_error +affected rows: 1 +show global variables like 'binlog_error_action'; +Variable_name Value +binlog_error_action ABORT_SERVER +affected rows: 1 +show global variables like 'skip_core_dump_on_error'; +Variable_name Value +skip_core_dump_on_error ON +affected rows: 1 +# crash_during_update +update mz set c=13; +ERROR HY000: Binary logging not possible. Message: An error occurred during sync stage of the commit. 'binlog_error_action' is set to 'ABORT_SERVER'. Hence aborting the server. +# server aborted +Pattern "mysqld got signal 6" found +# but no core written +Pattern "Writing a core file" not found +drop table mz; +affected rows: 0 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/statistics.result b/storage/rocksdb/mysql-test/rocksdb/r/statistics.result index 9fdd50a7e14..1d0993527c9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/statistics.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/statistics.result @@ -29,9 +29,9 @@ true set global rocksdb_force_flush_memtable_now = true; SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE(); table_name table_rows -t1 100000 -t2 4999 -t3 4999 +t1 1000 +t2 1000 +t3 1000 SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE(); table_name data_length>0 index_length>0 t1 1 1 @@ -40,9 +40,9 @@ t3 1 1 # restart SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE(); table_name table_rows -t1 100000 -t2 4999 -t3 4999 +t1 1000 +t2 1000 +t3 1000 SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE(); table_name data_length>0 index_length>0 t1 1 1 @@ -62,9 +62,9 @@ test.t5 analyze Error Table 'test.t5' doesn't exist test.t5 analyze status Operation failed SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE(); table_name table_rows -t1 100000 -t2 4999 -t3 4999 +t1 1000 +t2 1000 +t3 1000 SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE(); table_name data_length>0 index_length>0 t1 1 1 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result index d1e445f734c..95dae68b4e6 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result @@ -1,18 +1,16 @@ DROP TABLE IF EXISTS t1; CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb DATA DIRECTORY = '/foo/bar/data'; -ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") +ERROR HY000: Can't create table `test`.`t1` (errno: 198 "Unknown error 198") show warnings; Level Code Message +Error 1005 Can't create table `test`.`t1` (errno: 198 "Unknown error 198") Warning 1296 Got error 198 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB -Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine ROCKSDB CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INDEX DIRECTORY = '/foo/bar/index'; -ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") +ERROR HY000: Can't create table `test`.`t1` (errno: 199 "Unknown error 199") show warnings; Level Code Message +Error 1005 Can't create table `test`.`t1` (errno: 199 "Unknown error 199") Warning 1296 Got error 199 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB -Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine ROCKSDB CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY) ENGINE=rocksdb PARTITION BY RANGE (id) ( PARTITION P0 VALUES LESS THAN (1000) @@ -21,7 +19,12 @@ PARTITION P1 VALUES LESS THAN (2000) DATA DIRECTORY = '/foo/bar/data/', PARTITION P2 VALUES LESS THAN (MAXVALUE) ); -ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") +ERROR HY000: Can't create table `test`.`t1` (errno: 198 "Unknown error 198") +show warnings; +Level Code Message +Error 1005 Can't create table `test`.`t1` (errno: 198 "Unknown error 198") +Warning 1296 Got error 198 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB +Error 6 Error on delete of './test/t1.par' (Errcode: 2 "No such file or directory") CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE (id) ( PARTITION P0 VALUES LESS THAN (1000) @@ -30,4 +33,9 @@ PARTITION P1 VALUES LESS THAN (2000) INDEX DIRECTORY = '/foo/bar/data/', PARTITION P2 VALUES LESS THAN (MAXVALUE) ); -ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") +ERROR HY000: Can't create table `test`.`t1` (errno: 199 "Unknown error 199") +show warnings; +Level Code Message +Error 1005 Can't create table `test`.`t1` (errno: 199 "Unknown error 199") +Warning 1296 Got error 199 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB +Error 6 Error on delete of './test/t1.par' (Errcode: 2 "No such file or directory") diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result new file mode 100644 index 00000000000..67f655b66d7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result @@ -0,0 +1,620 @@ +# +# table(hidden key) +# +CREATE TABLE t1 ( +a INT, +b INT +) ENGINE=ROCKSDB +PARTITION BY RANGE (b) ( +PARTITION p0 VALUES LESS THAN (3), +PARTITION p1 VALUES LESS THAN (6), +PARTITION p2 VALUES LESS THAN MAXVALUE +); +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8); +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +2 4 +3 8 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +2 4 +SELECT a FROM t1 WHERE b > 2; +a +2 +SELECT b from t1 where a != 3; +b +1 +4 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1 ORDER BY b; +a b +1 1 +SELECT a FROM t1 WHERE b > 2; +a +SELECT b from t1 where a != 3; +b +1 +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1 ORDER BY a; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8); +SELECT a,b FROM t1; +a b +4 1 +5 4 +6 8 +SELECT a FROM t1 WHERE b < 5; +a +4 +5 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1; +a b +4 1 +5 4 +INSERT INTO t1(a,b) VALUES(7, 1); +SELECT b from t1 WHERE a > 2; +b +1 +1 +4 +SELECT a,b FROM t1; +a b +4 1 +5 4 +7 1 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1; +a b +4 1 +7 1 +INSERT INTO t1(a,b) VALUES(8, 4); +SELECT a,b FROM t1; +a b +4 1 +7 1 +8 4 +SELECT b from t1 WHERE a < 9; +b +1 +1 +4 +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1; +a b +8 4 +INSERT INTO t1(a,b) VALUES(9, 8); +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +set global rocksdb_force_flush_memtable_now = true; +set global rocksdb_compact_cf = 'default'; +SELECT b FROM t1 WHERE a < 5; +b +1 +2 +3 +4 +TRUNCATE TABLE t1; +SELECT b FROM t1 WHERE a < 5; +b +DROP TABLE t1; +# +# table(secondary key) +# +CREATE TABLE t1( +a INT, +b INT, +KEY (b) +) ENGINE=ROCKSDB +PARTITION BY HASH(a) PARTITIONS 3; +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8); +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +2 4 +3 8 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +3 8 +SELECT a FROM t1 WHERE b > 2; +a +3 +SELECT b from t1 where a != 3; +b +1 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1 ORDER BY b; +a b +3 8 +SELECT a FROM t1 WHERE b > 2; +a +3 +SELECT b from t1 where a != 3; +b +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1 ORDER BY a; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8); +SELECT a,b FROM t1; +a b +4 1 +5 4 +6 8 +SELECT a FROM t1 WHERE b < 5; +a +4 +5 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1; +a b +4 1 +6 8 +INSERT INTO t1(a,b) VALUES(7, 1); +SELECT b from t1 WHERE a > 2; +b +1 +1 +8 +SELECT a,b FROM t1; +a b +4 1 +6 8 +7 1 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1; +a b +6 8 +INSERT INTO t1(a,b) VALUES(8, 4); +SELECT a,b FROM t1; +a b +6 8 +8 4 +SELECT b from t1 WHERE a < 9; +b +4 +8 +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1; +a b +8 4 +INSERT INTO t1(a,b) VALUES(9, 8); +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +set global rocksdb_force_flush_memtable_now = true; +set global rocksdb_compact_cf = 'default'; +SELECT b FROM t1 WHERE a < 5; +b +1 +2 +3 +4 +TRUNCATE TABLE t1; +SELECT b FROM t1 WHERE a < 5; +b +DROP TABLE t1; +# +# table(primary key, auto increment) +# +CREATE TABLE t1( +a INT NOT NULL AUTO_INCREMENT, +b INT, +PRIMARY KEY(a) +) ENGINE=ROCKSDB +PARTITION BY KEY() PARTITIONS 3; +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8); +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +2 4 +3 8 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +SELECT a FROM t1 WHERE b > 2; +a +SELECT b from t1 where a != 3; +b +1 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1 ORDER BY b; +a b +SELECT a FROM t1 WHERE b > 2; +a +SELECT b from t1 where a != 3; +b +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1 ORDER BY a; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8); +SELECT a,b FROM t1; +a b +4 1 +5 4 +6 8 +SELECT a FROM t1 WHERE b < 5; +a +4 +5 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1; +a b +4 1 +5 4 +6 8 +INSERT INTO t1(a,b) VALUES(7, 1); +SELECT b from t1 WHERE a > 2; +b +1 +1 +4 +8 +SELECT a,b FROM t1; +a b +4 1 +5 4 +6 8 +7 1 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1; +a b +4 1 +5 4 +INSERT INTO t1(a,b) VALUES(8, 4); +SELECT a,b FROM t1; +a b +4 1 +5 4 +8 4 +SELECT b from t1 WHERE a < 9; +b +1 +4 +4 +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1; +a b +8 4 +INSERT INTO t1(a,b) VALUES(9, 8); +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +set global rocksdb_force_flush_memtable_now = true; +set global rocksdb_compact_cf = 'default'; +SELECT b FROM t1 WHERE a < 5; +b +1 +2 +3 +4 +TRUNCATE TABLE t1; +SELECT b FROM t1 WHERE a < 5; +b +DROP TABLE t1; +# +# table(cf) +# +CREATE TABLE t1 ( +a INT, +b INT, +PRIMARY KEY (`a`, `b`) COMMENT 'testcomment' +) ENGINE=ROCKSDB +PARTITION BY LIST(a) ( +PARTITION p0 VALUES IN (1, 4, 7), +PARTITION p1 VALUES IN (2, 5, 8), +PARTITION p2 VALUES IN (3, 6, 9) +); +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8); +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +2 4 +3 8 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +2 4 +SELECT a FROM t1 WHERE b > 2; +a +2 +SELECT b from t1 where a != 3; +b +1 +4 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1 ORDER BY b; +a b +1 1 +SELECT a FROM t1 WHERE b > 2; +a +SELECT b from t1 where a != 3; +b +1 +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1 ORDER BY a; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8); +SELECT a,b FROM t1; +a b +4 1 +5 4 +6 8 +SELECT a FROM t1 WHERE b < 5; +a +4 +5 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1; +a b +4 1 +5 4 +INSERT INTO t1(a,b) VALUES(7, 1); +SELECT b from t1 WHERE a > 2; +b +1 +1 +4 +SELECT a,b FROM t1; +a b +4 1 +5 4 +7 1 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1; +a b +4 1 +7 1 +INSERT INTO t1(a,b) VALUES(8, 4); +SELECT a,b FROM t1; +a b +4 1 +7 1 +8 4 +SELECT b from t1 WHERE a < 9; +b +1 +1 +4 +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1; +a b +8 4 +INSERT INTO t1(a,b) VALUES(9, 8); +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +set global rocksdb_force_flush_memtable_now = true; +set global rocksdb_compact_cf = 'default'; +SELECT b FROM t1 WHERE a < 5; +b +1 +2 +3 +4 +TRUNCATE TABLE t1; +SELECT b FROM t1 WHERE a < 5; +b +DROP TABLE t1; +# +# table(reverse cf) +# +CREATE TABLE t1 ( +a INT, +b INT, +PRIMARY KEY (`a`, `b`) COMMENT 'p0_cfname=rev:foo;p1_cfname=bar;p2_cfname=baz' +) ENGINE=ROCKSDB +PARTITION BY LIST(a) ( +PARTITION p0 VALUES IN (1, 4, 7), +PARTITION p1 VALUES IN (2, 5, 8), +PARTITION p2 VALUES IN (3, 6, 9) +); +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p2; +INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8); +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +2 4 +3 8 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1 ORDER BY a; +a b +1 1 +2 4 +SELECT a FROM t1 WHERE b > 2; +a +2 +SELECT b from t1 where a != 3; +b +1 +4 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1 ORDER BY b; +a b +1 1 +SELECT a FROM t1 WHERE b > 2; +a +SELECT b from t1 where a != 3; +b +1 +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1 ORDER BY a; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8); +SELECT a,b FROM t1; +a b +4 1 +5 4 +6 8 +SELECT a FROM t1 WHERE b < 5; +a +4 +5 +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1; +a b +4 1 +5 4 +INSERT INTO t1(a,b) VALUES(7, 1); +SELECT b from t1 WHERE a > 2; +b +1 +1 +4 +SELECT a,b FROM t1; +a b +4 1 +5 4 +7 1 +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1; +a b +4 1 +7 1 +INSERT INTO t1(a,b) VALUES(8, 4); +SELECT a,b FROM t1; +a b +4 1 +7 1 +8 4 +SELECT b from t1 WHERE a < 9; +b +1 +1 +4 +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1; +a b +8 4 +INSERT INTO t1(a,b) VALUES(9, 8); +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +TRUNCATE TABLE t1; +SELECT a,b FROM t1; +a b +set global rocksdb_force_flush_memtable_now = true; +set global rocksdb_compact_cf = 'default'; +SELECT b FROM t1 WHERE a < 5; +b +1 +2 +3 +4 +TRUNCATE TABLE t1; +SELECT b FROM t1 WHERE a < 5; +b +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result new file mode 100644 index 00000000000..b4e718f0f9e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result @@ -0,0 +1,45 @@ +set debug_sync='RESET'; +set global rocksdb_debug_ttl_read_filter_ts = -10; +connect conn1, localhost, root,,test; +connect conn2, localhost, root,,test; +connection conn1; +CREATE TABLE t_re ( +a INT, b INT, PRIMARY KEY (a) +) ENGINE=ROCKSDB +COMMENT 'ttl_duration=1'; +affected rows: 0 +set global rocksdb_debug_ttl_rec_ts = -13; +affected rows: 0 +insert into t_re values (1,1); +affected rows: 1 +insert into t_re values (2,2); +affected rows: 1 +set global rocksdb_debug_ttl_rec_ts = 0; +affected rows: 0 +commit; +affected rows: 0 +set debug_sync='rocksdb.ttl_rows_examined SIGNAL parked WAIT_FOR go'; +affected rows: 0 +SELECT * FROM t_re; +connection conn2; +set debug_sync='now WAIT_FOR parked'; +affected rows: 0 +SHOW PROCESSLIST; +Id User Host db Command Time State Info Progress +### ### ### ### Query ### Init SHOW PROCESSLIST 0.000 +### ### ### ### Query ### debug sync point: rocksdb.ttl_rows_examined SELECT * FROM t_re 0.000 +### ### ### ### Sleep ### NULL 0.000 +affected rows: 3 +set debug_sync='now SIGNAL go'; +affected rows: 0 +connection conn1; +a b +affected rows: 0 +set debug_sync='RESET'; +affected rows: 0 +set global rocksdb_debug_ttl_read_filter_ts = DEFAULT; +affected rows: 0 +drop table t_re; +affected rows: 0 +disconnect conn1; +disconnect conn2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/unique_check.result b/storage/rocksdb/mysql-test/rocksdb/r/unique_check.result index 8de94e0297e..db9b119043a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/unique_check.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/unique_check.result @@ -66,13 +66,14 @@ id id2 value 2 1 2 truncate table t2; connection con1; -set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked1 WAIT_FOR go1'; +set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked1 WAIT_FOR go'; insert into t1 values (1,1); +connection default; +set debug_sync='now WAIT_FOR parked1'; connection con2; -set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked2 WAIT_FOR go2'; +set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked2 WAIT_FOR go'; insert into t2 values (1,1,1); connection default; -set debug_sync='now WAIT_FOR parked1'; set debug_sync='now WAIT_FOR parked2'; connection con3; set session rocksdb_lock_wait_timeout=1; @@ -81,8 +82,7 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction insert into t2 values (2,1,2); ERROR HY000: Lock wait timeout exceeded; try restarting transaction connection default; -set debug_sync='now SIGNAL go1'; -set debug_sync='now SIGNAL go2'; +set debug_sync='now SIGNAL go'; connection con1; connection con2; connection default; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result new file mode 100644 index 00000000000..8a4ee14c116 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result @@ -0,0 +1,18 @@ +Checking direct reads +CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `pk` int(11) NOT NULL DEFAULT 0, + `a` int(11) DEFAULT NULL, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`pk`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (1, 1,'a'); +INSERT INTO t1 (a,b) VALUES (2,'b'); +set global rocksdb_force_flush_memtable_now=1; +SELECT a,b FROM t1; +a b +1 a +2 b +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test index af9d2667e82..aeadf5381b0 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test @@ -4,6 +4,13 @@ --echo # Disable for valgrind because this takes too long --source include/not_valgrind.inc +# MariaDB: tooling to slowdown commits (also when running on ramdisk, we need +# write_prepared for some reason, this is set in the .opt file) +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_write_prepared.inc + + --disable_warnings DROP DATABASE IF EXISTS mysqlslap; --enable_warnings @@ -15,6 +22,7 @@ CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE= SET @save_rocksdb_enable_2pc= @@rocksdb_enable_2pc; SET @save_rocksdb_flush_log_at_trx_commit= @@rocksdb_flush_log_at_trx_commit; + # # In MariaDB, regular group commit operation does not cause increment of # rocksdb_wal_group_syncs. @@ -39,6 +47,19 @@ from information_schema.global_status where variable_name='Binlog_group_commits' select IF(variable_value - @b3 between 1000 and 1500, 'OK', variable_value - @b3) as Rocksdb_wal_synced from information_schema.global_status where variable_name='Rocksdb_wal_synced'; +# SQL layer solution is sufficient for Binlog counts but not RocksDB. +#set @tmp_bcwc= @@binlog_commit_wait_count; +#set @tmp_bcwu= @@binlog_commit_wait_usec; +#set global binlog_commit_wait_count=30; +#set global binlog_commit_wait_usec=500*1000; + +# RocksDB-side solution: + +set debug_dbug='+d,rocksdb_enable_delay_commits'; +create table dummy10(a int) engine=rocksdb; +drop table dummy10; +set debug_dbug='-d,rocksdb_enable_delay_commits'; + --echo ## --echo ## 2PC + durability + group commit --echo ## @@ -55,6 +76,14 @@ from information_schema.global_status where variable_name='Binlog_group_commits' select IF(variable_value - @b3 between 1 and 9000, 'OK', variable_value - @b3) from information_schema.global_status where variable_name='Rocksdb_wal_synced'; +#set global binlog_commit_wait_count= @tmp_bcwc; +#set global binlog_commit_wait_usec= @tmp_bcwu; + +set debug_dbug='+d,rocksdb_disable_delay_commits'; +create table dummy10(a int) engine=rocksdb; +drop table dummy10; +set debug_dbug='-d,rocksdb_disable_delay_commits'; + --echo ## --echo # 2PC enabled, MyRocks durability disabled, single thread --echo ## diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test index f4da0b7cb58..085324481b8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test @@ -149,3 +149,23 @@ INSERT INTO t1 (a) VALUES (1); UPDATE t1 SET pk = 3; ALTER TABLE t1 AUTO_INCREMENT 2; DROP TABLE t1; + +--echo #---------------------------------- +--echo # Issue #902 Debug assert in autoincrement with small field type +--echo #---------------------------------- + +SET auto_increment_increment=100, auto_increment_offset=10; +CREATE TABLE t1(i INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615; +# ha_rocksdb::get_auto_increment would assert here +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL); +SELECT * FROM t1; +ALTER TABLE t1 AUTO_INCREMENT=1; +INSERT INTO t1 VALUES (NULL); +SELECT * FROM t1; +ALTER TABLE t1 AUTO_INCREMENT=18446744073709551615; +# ha_rocksdb::get_auto_increment would assert here +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL); +SELECT * FROM t1; +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test index 78521fbc9ef..5a40d28e1c9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test @@ -23,22 +23,28 @@ connect (con3, localhost, root,,); # Start each thread on an insert that will block waiting for a signal connection con1; +SET debug_sync='rocksdb.autoinc_vars2 SIGNAL go2'; SET debug_sync='rocksdb.autoinc_vars SIGNAL parked1 WAIT_FOR go1'; send INSERT INTO t1 VALUES(NULL); +connection default; +SET debug_sync='now WAIT_FOR parked1'; + connection con2; SET debug_sync='rocksdb.autoinc_vars SIGNAL parked2 WAIT_FOR go2'; send INSERT INTO t1 VALUES(NULL); # Wait for both threads to be at debug_sync point connection default; -SET debug_sync='now WAIT_FOR parked1'; SET debug_sync='now WAIT_FOR parked2'; + # Signal both threads to continue +# (In MariaDB, we signal one of them which continues and signals the other) send SET debug_sync='now SIGNAL go1'; connection con3; -SET debug_sync='now SIGNAL go2'; +# MariaDB: do nothing here + connection default; reap; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.cnf b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf index a76f1244bab..a76f1244bab 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.cnf +++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test new file mode 100644 index 00000000000..9b5c4571c19 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test @@ -0,0 +1,3 @@ +let $trx_isolation = READ COMMITTED; +--source blind_delete_without_tx_api.inc + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf new file mode 100644 index 00000000000..a76f1244bab --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf @@ -0,0 +1,11 @@ +!include suite/rpl/my.cnf + +[mysqld.1] +sync_binlog=0 +binlog_format=row +slave-exec-mode=strict + +[mysqld.2] +sync_binlog=0 +binlog_format=row +slave-exec-mode=strict diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test new file mode 100644 index 00000000000..4369f6baa62 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test @@ -0,0 +1,3 @@ +let $trx_isolation = REPEATABLE READ; +--source blind_delete_without_tx_api.inc + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.test b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc index e5f70be4c3b..4f03695bf02 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc @@ -5,6 +5,7 @@ source include/master-slave.inc; connection master; +eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation; set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key; set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api; @@ -43,6 +44,7 @@ SELECT count(*) FROM t1; --source include/sync_slave_sql_with_master.inc connection slave; +eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation; SELECT count(*) FROM t1; connection master; @@ -109,8 +111,8 @@ call mtr.add_suppression("Slave: Can't find record in 't1'.*"); --source include/wait_for_slave_sql_error.inc connection slave; -set @save_rocksdb_read_free_rpl_tables=@@global.rocksdb_read_free_rpl_tables; -set global rocksdb_read_free_rpl_tables="t.*"; +set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl; +set global rocksdb_read_free_rpl=PK_SK; START SLAVE; connection master; --source include/sync_slave_sql_with_master.inc @@ -121,7 +123,7 @@ connection master; # cleanup connection slave; -set global rocksdb_read_free_rpl_tables=@save_rocksdb_read_free_rpl_tables; +set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl; connection master; SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key; SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt index ef6d0fd554a..a21608c7c1d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt @@ -1,3 +1,4 @@ --rocksdb_default_cf_options=write_buffer_size=64k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:20 --rocksdb_debug_optimizer_n_rows=1000 --rocksdb_table_stats_sampling_pct=100 +--rocksdb_info_log_level=debug_level diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test index a15e2a89693..dc2a0da506d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test @@ -18,6 +18,7 @@ CREATE TABLE `linktable` ( ) ENGINE=RocksDB DEFAULT COLLATE=latin1_bin; --disable_query_log +call mtr.add_suppression("LibRocksDB"); let $i = 1; while ($i <= 10000) { let $insert = INSERT INTO linktable VALUES($i, $i, $i, $i, 1, 1, $i, $i, $i); @@ -33,9 +34,26 @@ select id1, id2, link_type, visibility, data, time, version from linktable FORCE select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; # BF len 20 + +--echo # MariaDB: we don't have optimizer_force_index_for_range, but we can use EITS +--echo # to get the query plan we want. +set @tmp_use_stat_tables= @@use_stat_tables; +set use_stat_tables='preferably'; +analyze table linktable persistent for all; +flush tables; +explain select * from linktable; +--echo # This must use range(id1_type2), key_len=24 +explain +select id1, id2, link_type, visibility, data, time, version from linktable +FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc; + select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; +# MariaDB: no support for optimizer_force_index_for_range: +#set @tmp_force_index_for_range=@@optimizer_force_index_for_range; +#set optimizer_force_index_for_range=on; select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc; select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; +#set global optimizer_force_index_for_range=@tmp_force_index_for_range; # BF len 13 select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt index efcd69ba5bf..4576d20f45b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt @@ -1,3 +1,3 @@ --rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;} ---rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4}; +--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4};bf5_1={prefix_extractor=capped:4} diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test index 00968aebb62..11890dcfbaf 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test @@ -56,6 +56,31 @@ insert into t4 values (1, 0xFFFF, 0xFFF, 12345); --echo # This must not fail an assert: select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; -drop table t1,t2,t3,t4; +--echo # +--echo # Issue #881: Issue #809 still occurs for reverse scans on forward cfs +--echo # + +# The same as t1 above but uses forward-ordered column family: + +create table t5 ( + id1 bigint not null, + id2 bigint not null, + id3 varchar(100) not null, + id4 int not null, + id5 int not null, + value bigint, + value2 varchar(100), + primary key (id1, id2, id3, id4) COMMENT 'bf5_1' +) engine=ROCKSDB; + +insert into t5 select * from t1; + +set global rocksdb_force_flush_memtable_now=1; + +--echo # An index scan starting from the end of the table: +explain +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +drop table t1,t2,t3,t4,t5; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test index 0db5e6d9cc4..b1afc5b2f9d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test @@ -5,6 +5,7 @@ --source include/big_test.inc --let pk_cf=cf1 +--let pk_cf_name=cf1 --let data_order_desc=0 --source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test index 3f085269365..0409784811f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test @@ -53,11 +53,31 @@ INSERT INTO t1 VALUES(1); INSERT INTO t1 VALUES(2); INSERT INTO t1 VALUES(20); INSERT INTO t1 VALUES(21); +let $ID = `SELECT connection_id()`; --connection default --disconnect con1 SELECT * FROM t1; +--disable_parsing +# MariaDB: no support for $RPC_PROTOCOL +if (`SELECT $RPC_PROTOCOL > 0`) { + # for --rpc_protocol mode wait for the background detached session to + # go away + let $wait_condition = + SELECT COUNT(*) = 0 + FROM information_schema.srv_sessions + WHERE id = $ID; + --source include/wait_condition.inc +} + +if (`SELECT $RPC_PROTOCOL = 0`) { + # for non --rpc_protocol mode simply wait until the number of sessions + # returns to earlier levels + --source include/wait_until_count_sessions.inc +} +--enable_parsing +# MariaDB: --source include/wait_until_count_sessions.inc # Note: in MariaDB, session count will be decremented *before* diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test index 67d68ac7a2d..f011964db34 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test @@ -4,6 +4,7 @@ --source include/big_test.inc --let pk_cf=rev:cf1 +--let pk_cf_name=cf1 --let data_order_desc=0 --source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test index 7110fe5f1d7..37f19a39564 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test @@ -4,6 +4,7 @@ --source include/big_test.inc --let pk_cf=rev:cf1 +--let pk_cf_name=cf1 --let data_order_desc=1 --source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test index 6c6e51a2a51..4f3ffd23bd9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test @@ -4,6 +4,7 @@ --source include/big_test.inc --let pk_cf=cf1 +--let pk_cf_name=cf1 --let data_order_desc=1 --source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc new file mode 100644 index 00000000000..1f5c9fbb3f2 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc @@ -0,0 +1,213 @@ +--source include/have_rocksdb.inc + +--source ../include/bypass_create_table.inc + +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; + +SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version +from link_table WHERE id1=1 and id2=2 and link_type=3; +SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; + +--echo # Point query +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2) and link_type=3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1) and id2 IN (2) and link_type=3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3; + +--echo # Prefix range query + +--echo # Prefix range query with SK +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME DESC LIMIT 10; +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME ASC LIMIT 10; + +--echo # Prefix range query with SK with limits +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,10; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,5; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,1; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,10; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,5; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,1; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,10; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,5; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,1; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,0; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,10; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,5; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,1; + +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,0; + +--echo # Prefix range query with PK +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; + +--echo # Prefix range query with PK + value +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; + +--echo # Transaction +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +COMMIT; + +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +ROLLBACK; + +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; + +--echo # Data types +SELECT /*+ bypass */ id1 FROM link_table where link_type="3"; +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1"; +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True; +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1'; +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01'; +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL; + +DROP TABLE count_table; +DROP TABLE link_table; +DROP TABLE link_table3; +DROP TABLE link_table2; +DROP TABLE id_table; +DROP TABLE node_table; +DROP TABLE link_table5; +DROP TABLE link_table6; +DROP TABLE link_table4; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test new file mode 100644 index 00000000000..51064356de7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test @@ -0,0 +1,3 @@ +--source include/have_rocksdb.inc + +--source bypass_select_basic.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt new file mode 100644 index 00000000000..81bc90b0531 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt @@ -0,0 +1,3 @@ +--rocksdb_default_cf_options=write_buffer_size=128m;target_file_size_base=32m;max_bytes_for_level_base=512m;level0_file_num_compaction_trigger=4;level0_slowdown_writes_trigger=20;level0_stop_writes_trigger=30;max_write_buffer_number=4;compression_per_level=kLZ4Compression;bottommost_compression=kZSTD;compression_opts=-14:6:0;block_based_table_factory={cache_index_and_filter_blocks=1;filter_policy=bloomfilter:10:false;whole_key_filtering=0};prefix_extractor=capped:12;level_compaction_dynamic_level_bytes=true;optimize_filters_for_hits=true;memtable_prefix_bloom_size_ratio=0.039;max_compaction_bytes=402653184;report_bg_io_stats=true;compaction_pri=kMinOverlappingRatio;soft_pending_compaction_bytes_limit=20480000000 +--rocksdb_override_cf_options=cf_assoc={prefix_extractor=capped:28};cf_assoc_count={prefix_extractor=capped:20};rev:cf_assoc_id1_type={prefix_extractor=capped:20};cf_fbobj_type_id={prefix_extractor=capped:16};cf_assoc_disagg={prefix_extractor=capped:20};__system__={write_buffer_size=16m}; + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test new file mode 100644 index 00000000000..51064356de7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test @@ -0,0 +1,3 @@ +--source include/have_rocksdb.inc + +--source bypass_select_basic.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test b/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test new file mode 100644 index 00000000000..9afe562f114 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test @@ -0,0 +1,117 @@ +--source include/have_rocksdb.inc +--source include/have_debug_sync.inc + +set debug_sync='RESET'; +set global rocksdb_debug_ttl_read_filter_ts = -10; + +connect (conn1, localhost, root,,); +--let $conn1_id = `SELECT CONNECTION_ID()` +connection default; + +CREATE TABLE t1 (id INT, value INT, KEY (id), KEY (value)) ENGINE=ROCKSDB; +CREATE TABLE t2 (id INT, value INT) ENGINE=ROCKSDB; +CREATE TABLE t3 (id INT, kp1 INT, PRIMARY KEY (id), KEY(kp1)) ENGINE=ROCKSDB COMMENT='ttl_duration=1'; + +INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5); +INSERT INTO t2 SELECT * FROM t1; +INSERT INTO t3 SELECT * FROM t1; + +connection conn1; +set debug_sync='rocksdb.check_flags_rmi SIGNAL parked WAIT_FOR go'; +send SELECT value FROM t1 WHERE value = 3; + +connection default; +set debug_sync='now WAIT_FOR parked'; +--echo KILL QUERY \$conn1_id; +--disable_query_log +eval KILL QUERY $conn1_id; +--enable_query_log +set debug_sync='now SIGNAL go'; + +connection conn1; +--error ER_QUERY_INTERRUPTED +--reap + +set debug_sync='RESET'; + +connection conn1; +set debug_sync='rocksdb.check_flags_rmi_scan SIGNAL parked WAIT_FOR go'; +send SELECT DISTINCT(id) FROM t1 WHERE value = 5 AND id IN (1, 3, 5); + +connection default; +set debug_sync='now WAIT_FOR parked'; +--echo KILL QUERY \$conn1_id; +--disable_query_log +eval KILL QUERY $conn1_id; +--enable_query_log +set debug_sync='now SIGNAL go'; + +connection conn1; +--error ER_QUERY_INTERRUPTED +--reap + +set debug_sync='RESET'; + +connection conn1; +set debug_sync='rocksdb.check_flags_inwd SIGNAL parked WAIT_FOR go'; +send SELECT value FROM t1 WHERE value > 3; + +connection default; +set debug_sync='now WAIT_FOR parked'; +--echo KILL QUERY \$conn1_id; +--disable_query_log +eval KILL QUERY $conn1_id; +--enable_query_log +set debug_sync='now SIGNAL go'; + +connection conn1; +--error ER_QUERY_INTERRUPTED +--reap + +set debug_sync='RESET'; + +connection conn1; +set debug_sync='rocksdb.check_flags_rnwd SIGNAL parked WAIT_FOR go'; +send SELECT id FROM t2; + +connection default; +set debug_sync='now WAIT_FOR parked'; +--echo KILL QUERY \$conn1_id; +--disable_query_log +eval KILL QUERY $conn1_id; +--enable_query_log +set debug_sync='now SIGNAL go'; + +connection conn1; +--error ER_QUERY_INTERRUPTED +--reap + +set debug_sync='RESET'; + + +connection conn1; +set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go'; +send SELECT kp1 FROM t3 ORDER BY kp1; + +connection default; +set debug_sync='now WAIT_FOR parked'; +--echo KILL QUERY \$conn1_id; +--disable_query_log +eval KILL QUERY $conn1_id; +--enable_query_log +set debug_sync='now SIGNAL go'; + +connection conn1; +--error ER_QUERY_INTERRUPTED +--reap + +connection default; +--disconnect conn1 + +set debug_sync='RESET'; + +set global rocksdb_debug_ttl_read_filter_ts = DEFAULT; + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test index b39c022fc46..c8c12626139 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test @@ -1,22 +1,50 @@ -# MariaDB: "xargs" is not present on windows builders. -# we could work around this but this is not a priority. ---source include/not_windows.inc - --disable_warnings let $MYSQLD_DATADIR= `select @@datadir`; let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; let $error_log= $MYSQLTEST_VARDIR/log/my_restart.err; select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; ---exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -t- -k 2 -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i 's/rocksdb_version=.*/rocksdb_version=99.9.9/' {}" ---exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -t- -k 2 -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}" +# +# MariaDB: The following shell commands are not portable so we are +# using perl instead: +#--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -t- -k 2 -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i 's/rocksdb_version=.*/rocksdb_version=99.9.9/' {}" +#--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -t- -k 2 -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}" + +perl; + my $path=$ENV{MYSQLTEST_VARDIR} . "/mysqld.1/data/\#rocksdb"; + opendir(my $dh, $path) || die "Can't opendir $some_dir: $!"; + my @files = grep { /^OPTIONS/ } readdir($dh); + closedir($dh); + sub compare_second_as_number { + local $aa= shift; + local $bb= shift; + $aa =~ s/OPTIONS-//; + $bb =~ s/OPTIONS-//; + return $aa <=> $bb; + } + + @sorted_files = sort { compare_second_as_number($a, $b); } @files; + my $last_file= $sorted_files[-1]; + + my $contents=""; + open(my $fh, "<", "$path/$last_file") || die ("Couldn't open $path/$last_file"); + while (<$fh>) { + $_ =~ s/rocksdb_version=.*/rocksdb_version=99.9.9/; + $contents .= $_; + } + close($fh); + $contents .= "hello=world\n"; + open(my $fh, ">", "$path/$last_file") || die("Can't open $path/$file for writing"); + print $fh $contents; + close($fh); +EOF --exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect --shutdown_server 10 --error 1 ---exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO --rocksdb_ignore_unknown_options=0 --loose-console --log-error=$error_log +--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO --rocksdb_ignore_unknown_options=0 --log-error=$error_log let SEARCH_FILE= $error_log; let SEARCH_PATTERN= RocksDB: Compatibility check against existing database options failed; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test b/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test index c2058474b01..963f6c247fa 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test @@ -1,5 +1,8 @@ --source "include/have_rocksdb.inc" --source "include/have_log_bin.inc" +# Don't run this with --rpc_protocol because it is doing its own work with +# the RPC protocol +--source "include/not_rpc_protocol.inc" # # This test was created because 2pc transactions were failing in MyRocks diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt new file mode 100644 index 00000000000..418e4c3f056 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt @@ -0,0 +1 @@ +--rocksdb_default_cf_options=disable_auto_compactions=true
\ No newline at end of file diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt new file mode 100644 index 00000000000..418e4c3f056 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt @@ -0,0 +1 @@ +--rocksdb_default_cf_options=disable_auto_compactions=true
\ No newline at end of file diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt new file mode 100644 index 00000000000..418e4c3f056 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt @@ -0,0 +1 @@ +--rocksdb_default_cf_options=disable_auto_compactions=true
\ No newline at end of file diff --git a/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test b/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test index c5650359d8c..3ef35cb2633 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test @@ -7,7 +7,7 @@ USE test; CREATE TABLE mysql_table (a INT) ENGINE=ROCKSDB; -- error ER_BLOCK_NO_PRIMARY_KEY CREATE TABLE IF NOT EXISTS mysql_table_2 (a INT) ENGINE=ROCKSDB; --- error ER_BLOCK_NO_PRIMARY_KEY +-- error ER_TABLE_MUST_HAVE_COLUMNS CREATE TABLE mysql_table_no_cols ENGINE=ROCKSDB; CREATE TABLE mysql.mysql_table_2 (a INT) ENGINE=ROCKSDB; @@ -35,10 +35,29 @@ DROP INDEX `PRIMARY` ON mysql_primkey4; ALTER TABLE mysql.mysql_table ADD PRIMARY KEY (a); ALTER TABLE mysql.mysql_table DROP PRIMARY KEY; +SET default_storage_engine=ROCKSDB; +-- error ER_BLOCK_NO_PRIMARY_KEY +CREATE TABLE mysql_noeng(a INT, b INT); + +# Disable no_engine_substitution +SET sql_mode=""; +-- error ER_BLOCK_NO_PRIMARY_KEY +CREATE TABLE mysql_noeng_sub(a INT, b INT) ENGINE=BOGUS_ENGINE; + +CREATE TABLE mysql_primkey5 LIKE mysql_primkey; + +SET @@global.block_create_no_primary_key = false; +CREATE TABLE mysql_no_primkey (a INT) ENGINE=ROCKSDB; +SET @@global.block_create_no_primary_key = true; +-- error ER_BLOCK_NO_PRIMARY_KEY +CREATE TABLE mysql_block_no_primkey LIKE mysql_no_primkey; + DROP TABLE mysql_primkey; DROP TABLE mysql_primkey2; DROP TABLE mysql_primkey3; DROP TABLE mysql_primkey4; +DROP TABLE mysql_primkey5; +DROP TABLE mysql_no_primkey; USE mysql; DROP TABLE mysql_table; DROP TABLE mysql_table_2; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/delete_before_lock.test b/storage/rocksdb/mysql-test/rocksdb/t/delete_before_lock.test deleted file mode 100644 index 93a9d1adaf9..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/t/delete_before_lock.test +++ /dev/null @@ -1,36 +0,0 @@ ---source include/have_rocksdb.inc ---source include/have_debug_sync.inc - -# This is a test case to reproduce https://github.com/facebook/mysql-5.6/issues/162 -# Expected output of the last select for update was (1,2,100) and (1,3,100), but -# currently it returns (1,2,1) and (1,3,1), which must be fixed. - -connect (con, localhost, root,,); -connection default; - ---disable_warnings -set debug_sync='RESET'; -drop table if exists t1; ---enable_warnings - -create table t1 (id1 int, id2 int, value int, primary key (id1, id2)) engine=rocksdb; -insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1), (2, 2, 2); - -connection con; -set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; -send update t1 set value=100 where id1=1; - -connection default; -set debug_sync='now WAIT_FOR parked'; -delete from t1 where id1=1 and id2=1; -set debug_sync='now SIGNAL go'; - -connection con; -reap; -select * from t1 where id1=1 for update; - -# Cleanup -connection default; -disconnect con; -set debug_sync='RESET'; -drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def index 91bf571371e..2fb1404219a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def +++ b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def @@ -25,6 +25,20 @@ create_no_primary_key_table: MariaDB doesn't have --block_create_no_primary_key explicit_snapshot: MariaDB doesn't support Shared/Explicit snapshots percona_nonflushing_analyze_debug : Requires Percona Server's Non-flushing ANALYZE feature com_rpc_tx : Requires connection attributes and detached sessions +mysqlbinlog_blind_replace: requires @@enable_blind_replace support +optimize_myrocks_replace_into_base: requires @@enable_blind_replace support +optimize_myrocks_replace_into_lock: requires @@enable_blind_replace support +rocksdb.skip_core_dump_on_error: requires @@binlog_error_action support +bypass_select_basic_bloom : Query bypass is not supported +bypass_select_basic : Query bypass is not supported + +rocksdb_read_free_rpl : Read-Free replication is not supported +rocksdb_read_free_rpl_stress : Read-Free replication is not supported + +blind_delete_rr : Read-Free replication is not supported +blind_delete_rc : Read-Free replication is not supported + +force_shutdown: requires support for SHUTDOWN statement which calls exit(). ## ## Tests that do not fit MariaDB's test environment. Upstream seems to test @@ -56,11 +70,12 @@ gap_lock_raise_error: MDEV-11735: MyRocks: Gap Lock detector support show_engine : MariaRocks: MariaDB doesnt support SHOW ENGINE rocksdb TRANSACTION STATUS issue243_transactionStatus: MariaDB doesnt support SHOW ENGINE rocksdb TRANSACTION STATUS rpl_row_not_found : MariaDB doesnt support slave_exec_mode='SEMI_STRICT' +rpl_row_not_found_rc : MariaDB doesnt support slave_exec_mode='SEMI_STRICT' + ddl_high_priority: Needs fractional @@lock_wait_timeout deadlock_tracking : Needs SHOW ENGINE ROCKSDB TRANSACTION STATUS bytes_written: Needs I_S.TABLE_STATISTICS.IO_WRITE_BYTES trx_info_rpl : MariaRocks: @@rpl_skip_tx_api doesn't work, yet. -rpl_read_free: MDEV-10976 lock_wait_timeout_stats: MDEV-13404 rpl_row_triggers : Requires read-free slave. diff --git a/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test b/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test new file mode 100644 index 00000000000..1817bc06fc3 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test @@ -0,0 +1,97 @@ +--source include/have_rocksdb.inc + +--source include/have_debug.inc +--source include/not_valgrind.inc + +connect (conn1, localhost, root,,test); + +create table t1 ( + pk int not null primary key, + col1 varchar(10) +) engine=rocksdb; + +insert into t1 values (1,1),(2,2),(3,3); + +connection conn1; +set session debug= "+d,myrocks_busy_loop_on_row_read"; +send select * from t1 where pk=1; + +--echo # testing unclean shutdown on stuck instance +connection default; +let $wait_condition= + select count(*) = 1 from information_schema.processlist + where info = 'select * from t1 where pk=1'; +--source include/wait_condition.inc +--echo # Run shutdown sql command with forcing kill (exit code 127) +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2006,2013 +shutdown 1; +--source include/wait_until_disconnected.inc + +--echo # verifying exit code is printed +let $error_log=$MYSQLTEST_VARDIR/log/testlog.err; +let SEARCH_FILE=$error_log; +--echo # restart the server +--exec echo "restart:--log-error=$error_log" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc + +--error 2006,2013 +shutdown 230; +--source include/wait_until_disconnected.inc +let SEARCH_PATTERN=COM_SHUTDOWN received from host/user = localhost/root, exit code 230; +--source include/search_pattern_in_file.inc + +--echo # restart the server +--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc + +--echo # verifying SHUTDOWN is refused if exit code > 255 +--error ER_UNKNOWN_ERROR +SHUTDOWN 256; +--error ER_UNKNOWN_ERROR +SHUTDOWN 10000; + +--echo # verifying SHUTDOWN is refused if instances are not read only +--error ER_UNKNOWN_ERROR +SHUTDOWN 0 read_only; +--error ER_UNKNOWN_ERROR +SHUTDOWN 127 read_only; +--error 2006,2013 +SHUTDOWN 127; +--source include/wait_until_disconnected.inc + +--echo # restart the server +--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc + +connect (conn2, localhost, root,,test); + +connection conn2; +set session debug= "+d,myrocks_busy_loop_on_row_read"; +send select * from t1 where pk=1; + +connection default; +let $wait_condition= + select count(*) = 1 from information_schema.processlist + where info = 'select * from t1 where pk=1'; +--source include/wait_condition.inc + +SET GLOBAL read_only=1; +--echo # verifying SHUTDOWN read_only works with read_only instance +--echo # Run shutdown sql command with forcing kill (exit code 127) +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2006,2013 +shutdown 255 read_only; +--source include/wait_until_disconnected.inc + +--echo # restart the server +--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc + +disconnect conn1; +disconnect conn2; +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt new file mode 100644 index 00000000000..ba9364e1523 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt @@ -0,0 +1 @@ +--rocksdb_debug_optimizer_n_rows=1000 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test new file mode 100644 index 00000000000..a9c44a71edd --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test @@ -0,0 +1,8 @@ +--source include/have_debug.inc +set global debug="+d,force_group_by"; + +let $engine=RocksDB; +--source include/group_min_max.inc + +set global debug="-d,force_group_by"; + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test b/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test new file mode 100644 index 00000000000..0d0fad2e5fa --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test @@ -0,0 +1,15 @@ +--source include/have_debug.inc +--source include/have_rocksdb.inc + +CREATE TABLE t1 (a INT, b CHAR(8), KEY ab(a, b)) ENGINE=rocksdb DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_bin; +INSERT INTO t1 (a,b) VALUES (76,'bar'); +INSERT INTO t1 (a,b) VALUES (35,'foo'); +INSERT INTO t1 (a,b) VALUES (77,'baz'); + +SET debug_dbug="+d,dbug.rocksdb.HA_EXTRA_KEYREAD"; + +SELECT b FROM t1 FORCE INDEX(ab) WHERE a=35; + + +SET debug_dbug="-d,dbug.rocksdb.HA_EXTRA_KEYREAD"; +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test b/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test index b2f37a07999..1332fe143d0 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test @@ -4,12 +4,6 @@ # INSERT statements for tables with keys # -################################################## -# TODO: -# A part of the test is disabled because currently -# unique indexes are not supported -################################################## - --disable_warnings DROP TABLE IF EXISTS t1; --enable_warnings @@ -24,12 +18,6 @@ INSERT INTO t1 (a,b) VALUES (3,'a'),(0,''); SELECT a,b FROM t1; DROP TABLE t1; ---echo #---------------------------------------- ---echo # UNIQUE KEYS are not supported currently ---echo #----------------------------------------- - ---disable_parsing - CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a)) ENGINE=rocksdb; INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'); INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f'); @@ -67,9 +55,11 @@ SELECT a,b FROM t1; --error ER_DUP_ENTRY INSERT INTO t1 (a,b) VALUES (101,'x'),(101,'x'); -DROP TABLE t1; ---enable_parsing +--sorted_result +SELECT a,b FROM t1; + +DROP TABLE t1; CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; @@ -91,3 +81,89 @@ SELECT a,b FROM t1; DROP TABLE t1; +# +# INSERT on DUPLICATE KEY UPDATE with multiple keys +# +--echo +--echo INSERT on DUPLICATE KEY UPDATE with multiple keys +--echo +CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); + +--sorted_result +SELECT a,b FROM t1; + +DROP TABLE t1; + +# +# INSERT on DUPLICATE KEY UPDATE with secondary key +# +--echo +--echo INSERT on DUPLICATE KEY UPDATE with secondary key +--echo +CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1; + +--sorted_result +SELECT a,b,c,pk FROM t1; + +DROP TABLE t1; + +--echo +--echo Disable caching and see if it still functions properly +--echo +SELECT @@rocksdb_enable_insert_with_update_caching; +SET GLOBAL rocksdb_enable_insert_with_update_caching=0; +SELECT @@rocksdb_enable_insert_with_update_caching; + +# +# INSERT on DUPLICATE KEY UPDATE with multiple keys +# +--echo +--echo INSERT on DUPLICATE KEY UPDATE with multiple keys +--echo +CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a'); + +--sorted_result +SELECT a,b FROM t1; + +DROP TABLE t1; + +# +# INSERT on DUPLICATE KEY UPDATE with secondary key +# +--echo +--echo INSERT on DUPLICATE KEY UPDATE with secondary key +--echo +CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb; +INSERT INTO t1 (a,b) VALUES (1,'a'); +INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1; +INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1; + +--sorted_result +SELECT a,b,c,pk FROM t1; + +DROP TABLE t1; + +--echo +--echo Cleanup +--echo +SET GLOBAL rocksdb_enable_insert_with_update_caching=1; + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue884.test b/storage/rocksdb/mysql-test/rocksdb/t/issue884.test new file mode 100644 index 00000000000..6bf3e5177f2 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/issue884.test @@ -0,0 +1,43 @@ +--source include/have_rocksdb.inc + +create table test ( + a bigint(20) not null, + b bigint(20) not null, + c varchar(500) not null, + d bigint(20) not null, + e bigint(20) not null, + f varchar(500) not null, + g varchar(500) not null, + h varchar(500) not null, + i varchar(1000) not null, + j varchar(16384) not null, + k varchar(200) not null, + l varchar(500) not null, + m varchar(100) not null, + n bigint(20) not null, + primary key (a, b, m, c(100), l(100), d, e, f(100), g(100), h(100), n), + key n (n), + key d (d, a) +) engine = rocksdb default charset = latin1; + +--disable_query_log +let $i = 1000; +while ($i) { + --eval insert into test values (10, 1, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i); + --eval insert into test values (10, 2, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i); + --eval insert into test values (10, 3, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i); + --eval insert into test values (10, 4, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i); + --eval insert into test values (10, 5, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i); + dec $i; +} +set global rocksdb_force_flush_memtable_now = true; +analyze table test; +--enable_query_log + +--replace_column 9 # +explain +select * from test where d = 10 and a = 10 and b = 2; +select * from test where d = 10 and a = 10 and b = 2; + + +drop table test; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue896.test b/storage/rocksdb/mysql-test/rocksdb/t/issue896.test new file mode 100644 index 00000000000..ba57fb99832 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/issue896.test @@ -0,0 +1,17 @@ +# issue 896 : Segmentation fault in myrocks::Rdb_string_reader::read +--source include/have_rocksdb.inc + +CREATE TABLE `t1` ( +`a` bigint(20) NOT NULL, +`b` varchar(10) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, +`u` bigint(20) unsigned NOT NULL, +`d` bigint(20) DEFAULT NULL, +PRIMARY KEY (`a`,`b`), +KEY `d` (`d`) +) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='ttl_duration=1000;ttl_col=u'; +INSERT INTO t1 VALUES (100, 'aaabbb', UNIX_TIMESTAMP(), 200); +--replace_column 9 # +EXPLAIN SELECT COUNT(*) FROM t1 FORCE INDEX(d); +--echo # segfault here without the fix +SELECT COUNT(*) FROM t1 FORCE INDEX(d); +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue900.test b/storage/rocksdb/mysql-test/rocksdb/t/issue900.test new file mode 100644 index 00000000000..c420d418c20 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/issue900.test @@ -0,0 +1,13 @@ +--source include/have_rocksdb.inc + +# Issue 900 : Segmentation fault in myrocks::Rdb_string_reader::read +CREATE TABLE t1(c1 VARCHAR(1) CHARACTER SET 'utf8' COLLATE 'utf8_bin', c2 YEAR, c3 REAL(1,0) UNSIGNED, PRIMARY KEY(c1)) ENGINE=RocksDB; +INSERT INTO t1 VALUES(0,'0','0'); +INSERT INTO t1 VALUES('{0}','0','0'); +INSERT INTO t1 VALUES('1','0','1'); +# Would segfault here +--error ER_DUP_ENTRY +ALTER TABLE t1 ADD INDEX(c3), ADD UNIQUE (c3); +--error ER_KEY_DOES_NOT_EXITS +SELECT c3 FROM t1 FORCE INDEX(c3) ORDER BY c3; +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt new file mode 100644 index 00000000000..d77439930fd --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt @@ -0,0 +1,2 @@ +--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;} +--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:12}; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test new file mode 100644 index 00000000000..2cced2a1d7a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test @@ -0,0 +1,29 @@ +# +# Issue #878: Descending scans from reverse column families return no results +# due to iterator bounds +# + +create table t (i int primary key) engine=rocksdb; + +let $cond=1; +while ($cond) +{ + --disable_query_log + truncate table t; + --enable_query_log + let $cond=`select RIGHT(HEX(index_number), 2) != "FD" from information_schema.rocksdb_ddl where table_name = 't'`; +} + +# Index id is now at FD. Create a table with primary and secondary key, so +# that the secondary key index id ends in 0xFF. + +drop table t; +create table t (i int primary key, j int, key(j) comment 'rev:bf5_2') engine=rocksdb; +select RIGHT(HEX(index_number), 2) from information_schema.rocksdb_ddl where table_name = 't'; + +insert into t values (1, 1); + +select j from t order by j asc; +select j from t order by j desc; + +drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt b/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt new file mode 100644 index 00000000000..418e4c3f056 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt @@ -0,0 +1 @@ +--rocksdb_default_cf_options=disable_auto_compactions=true
\ No newline at end of file diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test new file mode 100644 index 00000000000..2b033023b2a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test @@ -0,0 +1,62 @@ +# +# This test is intended to check that when blind replace is enabled, +# mysqlbinlog is able to pass this information in the captured binlog +# events and we are able to reapply such events +# + +--source include/have_log_bin.inc +--source include/have_rocksdb.inc +--source include/have_debug.inc + +reset master; +set GLOBAL binlog_format= 'ROW'; +SET GLOBAL enable_blind_replace=ON; +set binlog_format=row; + +create table t5 (c1 int primary key, c2 int); +insert into t5 values (1, 1); +insert into t5 values (2, 2); +insert into t5 values (3, 3); +select * from t5; +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t5 values (1, 11); +replace into t5 values (2, 22); +replace into t5 values (3, 33); + +# Ensure that this was a blind replace +select case when variable_value-@c = 3 then 'true' else 'false' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t5; +--source include/show_binlog_events.inc + +flush logs; + +# Capture binlog events using mysqlbinlog +let $MYSQLD_DATADIR= `select @@datadir`; +--let $log_file_name = query_get_value("SHOW BINARY LOGS", Log_name, 1) +--exec $MYSQL_BINLOG $MYSQLD_DATADIR/$log_file_name > $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output + +# Drop the table. This will be recreated when we reapply binlog events +drop table t5; +reset master; + +# Now replay the binlog events +--echo Replaying binlog events containing blind replace statements should work +--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output +select * from t5; +--source include/show_binlog_events.inc + +drop table t5; +reset master; + +# Replay the same binlog events again, but with blind_replace turned off +--echo Replaying the same binlog events with blind replace disabled should work +--echo The server should internally convert such events into updates +SET GLOBAL enable_blind_replace=OFF; +--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output +select * from t5; + +--source include/show_binlog_events.inc + +set GLOBAL binlog_format=DEFAULT; +SET GLOBAL enable_blind_replace=DEFAULT; +drop table t5; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test index 4947ffb59b8..473bebdda89 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test @@ -9,7 +9,7 @@ connect (con1,localhost,root,,); connect (con2,localhost,root,,); connection con1; -create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4)) engine=rocksdb; +create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4), KEY (value1, value2, value3)) engine=rocksdb; insert into r1 values (1,1,1,1,1,1,1,1); insert into r1 values (1,1,1,2,2,2,2,2); insert into r1 values (1,1,2,1,3,3,3,3); @@ -32,8 +32,12 @@ BEGIN; insert into r1 values (5,5,5,5,5,5,5,5); update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1'; +--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/ --exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test +--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/ +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load --rocksdb_bulk_load_allow_sk test + rollback; connection con1; @@ -44,11 +48,13 @@ source include/search_pattern_in_file.inc; set @save_default_storage_engine=@@global.default_storage_engine; SET GLOBAL default_storage_engine=rocksdb; +--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/ --exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test source include/search_pattern_in_file.inc; # Sanity test mysqldump when the --innodb-stats-on-metadata is specified (no effect) --echo ==== mysqldump with --innodb-stats-on-metadata ==== +--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/ --exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test # testing mysqldump work with statement based binary logging diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test new file mode 100644 index 00000000000..b37f532a21e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test @@ -0,0 +1,96 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc + +SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level; +SET GLOBAL rocksdb_perf_context_level=3; +SET GLOBAL enable_blind_replace=ON; + +# +# case 1: table only with primary key, support replace blind write +# +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +drop table t1; + +# +# case 2: table only with primary key but with trigger, not support replace blind write +# +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +create trigger trg before insert on t1 for each row set @a:=1; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +drop table t1; + + +# +# case 3: table without primary key, not support replace blind write +# + +create table t1(c1 int,c2 int) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +drop table t1; + + +create table t1(c1 int,c2 int unique) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +drop table t1; + + + +# +# case 4: table with primary key and secondary key, not support replace blind write +# +create table t1(c1 int primary key,c2 int unique) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +drop table t1; + + +create table t1(c1 int primary key,c2 int, key idx1(c2)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +drop table t1; + + + +# +# case 5: Disabling blind replace through enable_blind_replace should work +SET GLOBAL enable_blind_replace=OFF; +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +drop table t1; + +SET GLOBAL enable_blind_replace=DEFAULT; +SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test new file mode 100644 index 00000000000..6cce429a5de --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test @@ -0,0 +1,88 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc + +# Enable blind replace +SET GLOBAL enable_blind_replace=ON; + +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); + +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +# +# case 1: update is blocked by replace into +# +connection con1; +SELECT @@global.enable_blind_replace; +begin; +replace into t1 values(1,11); + + +connection con2; +SELECT @@global.enable_blind_replace; +begin; +send update t1 set c2=22 where c1=1; + + +connection default; +# Check that the above update is blocked +let $wait_condition= + select count(*) = 1 from information_schema.processlist + where state = 'Waiting for row lock' and + info = 'update t1 set c2=22 where c1=1'; +--source include/wait_condition.inc + + +connection con1; +commit; + +connection con2; +--echo # Reap update. +--reap +commit; +select * from t1; + + +# +# cast 2: replace into is blocked by update +# + +connection con1; +SELECT @@global.enable_blind_replace; +begin; +update t1 set c2=55 where c1=1; + +connection con2; +SELECT @@global.enable_blind_replace; +begin; +send replace into t1 values(1,66); + + +connection default; +# Check that the above replace into is blocked +let $wait_condition= + select count(*) = 1 from information_schema.processlist + where state = 'Waiting for row lock' and + info = 'replace into t1 values(1,66)'; +--source include/wait_condition.inc + + +connection con1; +commit; + +connection con2; +--echo # Reap replace into. +--reap +commit; +select * from t1; + +connection default; +drop table t1; + +disconnect con1; +disconnect con2; + +# Disable blind replace +SET GLOBAL enable_blind_replace=DEFAULT; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test index 161f7b566f5..8fa43e15827 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test @@ -42,6 +42,10 @@ SET @@global.rocksdb_update_cf_options = 'cf1={prefix_extractor=capped:26};'; # Restart no longer needed SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%'; +# set cf_options for non-existent cf2, cf2 should be created automatically +SET @@global.rocksdb_update_cf_options = 'cf2={prefix_extractor=capped:28};'; +SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%'; + # Satisfies can_use_bloom_filter (4+8+8+8), but can't use because the old SST # files have old prefix extractor select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test index 13f1bd68a72..96fe1a90bc9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test @@ -1114,7 +1114,7 @@ update t1 set a = sleep(100) where pk = 1; --connect (con1,localhost,root,,) -let $wait_condition= select State='User sleep' from information_schema.processlist where id=$con_id; +let $wait_condition= select State='User sleep' from information_schema.processlist where id=$con_id/* or srv_id=$con_id*/; --source include/wait_condition.inc --echo kill query \$con_id; @@ -1251,14 +1251,15 @@ drop table t0, t1; --echo # --echo # Check status variables +--echo # NOTE: We exclude rocksdb_num_get_for_update_calls because it's a debug only status var --echo # --replace_column 2 # -show status like 'rocksdb%'; +show status where variable_name like 'rocksdb%' and variable_name not like '%num_get_for_update%'; -select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%'; +select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%'; --echo # RocksDB-SE's status variables are global internally --echo # but they are shown as both session and global, like InnoDB's status vars. -select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%'; +select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%'; --echo # diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test index 3ba54dd9c84..fcbd8527acc 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test @@ -421,6 +421,7 @@ SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name=' # Verify that correct partition and key are used when searching. ANALYZE TABLE t2; +--replace_column 10 # EXPLAIN PARTITIONS SELECT * FROM t2 WHERE col3 = 0x4 AND col2 = 0x34567; DROP TABLE t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc new file mode 100644 index 00000000000..55f466a4d31 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc @@ -0,0 +1,106 @@ +--source include/have_rocksdb.inc +--source include/have_debug_sync.inc + +--source include/count_sessions.inc + +# Usage: +# +# let $order = ASC; # or DESC +# let $comment = "rev:cf2"; # or "" +# --source suite/rocksdb/t/rocksdb_concurrent_delete.inc + +let $first_row = -1; # Error this should never happen +if ($order == 'ASC') +{ + let $first_row = 1; + let $middle_row = 3; + let $end_row = 5; +} +if ($order == 'DESC') +{ + let $first_row = 5; + let $middle_row = 3; + let $end_row = 1; +} + +connect (con, localhost, root,,); +connection default; +eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; + +SET debug_sync='RESET'; + +eval CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT $comment, a INT); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); + +# This will cause the SELECT to block after finding the first row, but +# before locking and reading it. +--echo --PK first row delete +connection con; +eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE; + +# While that connection is waiting, delete the first row (the one con +# is about to lock and read +connection default; +SET debug_sync='now WAIT_FOR parked'; +eval DELETE FROM t1 WHERE pk = $first_row; + +# Signal the waiting select to continue +SET debug_sync='now SIGNAL go'; + +# Now get the results from the select. The first entry (1,1) (or (3,3) when +# using reverse ordering) should be missing. Prior to the fix the SELECT +# would have returned: "1815: Internal error: NotFound:" +connection con; +reap; + +# Deleting a middle row +--echo --PK middle row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE; + +connection default; +SET debug_sync='now WAIT_FOR parked'; +eval DELETE FROM t1 WHERE pk = $middle_row; +SET debug_sync='now SIGNAL go'; + +connection con; +if ($isolation_level == "REPEATABLE READ") +{ + --error ER_LOCK_DEADLOCK + reap; +} +if ($isolation_level == "READ COMMITTED") +{ + reap; +} + +# Deleting the end row +--echo --PK end row delete +SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; +send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE; + +connection default; +SET debug_sync='now WAIT_FOR parked'; +eval DELETE FROM t1 WHERE pk = $end_row; +SET debug_sync='now SIGNAL go'; + +connection con; +if ($isolation_level == "REPEATABLE READ") +{ + --error ER_LOCK_DEADLOCK + reap; +} +if ($isolation_level == "READ COMMITTED") +{ + reap; +} + + +# Cleanup +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test index 52f9485e6b7..47818bfdbe1 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test @@ -1,24 +1,38 @@ +# rocksdb_concurrent_delete test case covers snapshot conflicts, and +# verifying locking reads not stopping scanning when hitting row not found. +# The following code coverages are covered. +# +# 1. PK full scan (key=NULL) +# first row, and other rows +# ha_rnd_next -> rnd_next -> rnd_next_with_direction +# +# 2. PK range scan (key=PRIMARY) +# first row +# read_range_first -> index_read_map_impl -> read_row_from_primary_key +# next row +# index_next -> index_next_with_direction -> rnd_next_with_direction +# +# 3. SK full scan +# first row +# index_first -> index_first_intern -> index_next_with_direction -> secondary_index_read +# next row +# index_next -> index_next_with_direction -> secondary_index_read +# +# 4. SK range scan +# first row +# read_range_first -> index_read_map_impl -> read_row_from_secondary_key +# next row +# index_next -> index_next_with_direction -> secondary_index_read +# +# In all cases, RR gets snapshot conflict errors if non-first rows get +# deleted by another transaction after scanning. + --source include/have_rocksdb.inc --source include/have_debug_sync.inc -# This validates the fix for Issue #144. The problem was that with more -# than one client accessing/deleting the same row there was a possibility -# of client A finding a row (through Next() or Prev()) but the row being -# deleted before the GetForUpdate() call could occur. When this happened -# a nearly useless error was being returned. - -let $order=ASC; -let $comment=""; ---source include/rocksdb_concurrent_delete.inc - -let $order=DESC; -let $comment=""; ---source include/rocksdb_concurrent_delete.inc +let $isolation_level = REPEATABLE READ; +--source rocksdb_concurrent_delete_main.inc -let $order=ASC; -let $comment="rev:cf2"; ---source include/rocksdb_concurrent_delete.inc +let $isolation_level = READ COMMITTED; +--source rocksdb_concurrent_delete_main.inc -let $order=DESC; -let $comment="rev:cf2"; ---source include/rocksdb_concurrent_delete.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc new file mode 100644 index 00000000000..bcd86af96aa --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc @@ -0,0 +1,30 @@ +--source include/have_rocksdb.inc +--source include/have_debug_sync.inc + +# This validates the fix for Issue #144. The problem was that with more +# than one client accessing/deleting the same row there was a possibility +# of client A finding a row (through Next() or Prev()) but the row being +# deleted before the GetForUpdate() call could occur. When this happened +# a nearly useless error was being returned. + +let $order=ASC; +let $comment=""; +--source rocksdb_concurrent_delete.inc +--source rocksdb_concurrent_delete_sk.inc + +let $order=DESC; +let $comment=""; +--source rocksdb_concurrent_delete.inc + +let $order=ASC; +let $comment="rev:cf2"; +--source rocksdb_concurrent_delete.inc + +let $order=DESC; +let $comment="rev:cf2"; +--source rocksdb_concurrent_delete.inc + +let $index=PRIMARY; +--source rocksdb_concurrent_delete_range.inc +let $index=sk; +--source rocksdb_concurrent_delete_range.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc new file mode 100644 index 00000000000..a85527141f6 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc @@ -0,0 +1,85 @@ +--source include/have_rocksdb.inc +--source include/have_debug_sync.inc + +--source include/count_sessions.inc + +# This is a test case to reproduce https://github.com/facebook/mysql-5.6/issues/162 +# Expected output of the last select for update was (1,2,100) and (1,3,100), but +# currently it returns (1,2,1) and (1,3,1), which must be fixed. + +connect (con, localhost, root,,); +connection default; + +set debug_sync='RESET'; +eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; +create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb; +insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); + +# deleting a first row +--echo --First row delete with $index +connection con; +eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +send_eval update t1 force index ($index) set value=100 where id1=1; + +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=1; +set debug_sync='now SIGNAL go'; + +connection con; +reap; +select * from t1 where id1=1; + +# deleting a middle row +--echo --Middle row delete with $index +eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +send_eval update t1 force index ($index) set value=200 where id1=1; + +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=3; +set debug_sync='now SIGNAL go'; + +connection con; +if ($isolation_level == "REPEATABLE READ") +{ + --error ER_LOCK_DEADLOCK + reap; +} +if ($isolation_level == "READ COMMITTED") +{ + reap; +} +select * from t1 where id1=1; + +# deleting the end row +--echo --End row delete with $index +eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; +set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; +send_eval update t1 force index ($index) set value=300 where id1=1; + +connection default; +set debug_sync='now WAIT_FOR parked'; +delete from t1 where id1=1 and id2=5; +set debug_sync='now SIGNAL go'; + +connection con; +if ($isolation_level == "REPEATABLE READ") +{ + --error ER_LOCK_DEADLOCK + reap; +} +if ($isolation_level == "READ COMMITTED") +{ + reap; +} +select * from t1 where id1=1; + +# Cleanup +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc new file mode 100644 index 00000000000..ac0b5d76854 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc @@ -0,0 +1,82 @@ +--source include/have_rocksdb.inc +--source include/have_debug_sync.inc + +--source include/count_sessions.inc + +connect (con, localhost, root,,); +connection default; +eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; + +SET debug_sync='RESET'; + +eval CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a)); +INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); + +# This will cause the SELECT to block after finding the first row, but +# before locking and reading it. +--echo --SK first row delete +connection con; +eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; + +# While that connection is waiting, delete the first row (the one con +# is about to lock and read +connection default; +SET debug_sync='now WAIT_FOR parked'; +eval DELETE FROM t1 WHERE pk = 1; + +# Signal the waiting select to continue +SET debug_sync='now SIGNAL go'; + +connection con; +reap; + +# Deleting a middle row +--echo --SK middle row delete +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; + +connection default; +SET debug_sync='now WAIT_FOR parked'; +eval DELETE FROM t1 WHERE pk = 3; +SET debug_sync='now SIGNAL go'; + +connection con; +if ($isolation_level == "REPEATABLE READ") +{ + --error ER_LOCK_DEADLOCK + reap; +} +if ($isolation_level == "READ COMMITTED") +{ + reap; +} + +# Deleting the end row +--echo --SK end row delete +SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go'; +send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; + +connection default; +SET debug_sync='now WAIT_FOR parked'; +eval DELETE FROM t1 WHERE pk = 5; +SET debug_sync='now SIGNAL go'; + +connection con; +if ($isolation_level == "REPEATABLE READ") +{ + --error ER_LOCK_DEADLOCK + reap; +} +if ($isolation_level == "READ COMMITTED") +{ + reap; +} + +# Cleanup +connection default; +disconnect con; +set debug_sync='RESET'; +drop table t1; +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test index 9a25f39a8e3..ff092773737 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test @@ -27,8 +27,9 @@ begin; --connection default --echo ### Connection default -let $wait_condition= select 1 from INFORMATION_SCHEMA.PROCESSLIST - where ID = $ID and STATE = "Waiting for row lock"; +let $wait_condition= + select 1 from INFORMATION_SCHEMA.PROCESSLIST + where (ID = $ID /* or SRV_ID = $ID */) and STATE = "Waiting for row lock"; --source include/wait_condition.inc ## Waiting for row lock ## select connection_id(); diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf index 13dea1236d8..9ceb0cc0a97 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.cnf +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf @@ -3,12 +3,14 @@ [mysqld.1] sync_binlog=0 binlog_format=row -rocksdb_read_free_rpl_tables="t.*" +rocksdb_read_free_rpl=PK_SK slave-exec-mode=strict +rocksdb_perf_context_level=3 [mysqld.2] sync_binlog=0 binlog_format=row -rocksdb_read_free_rpl_tables="t.*" +rocksdb_read_free_rpl=PK_SK slave-exec-mode=strict rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k +rocksdb_perf_context_level=3 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test new file mode 100644 index 00000000000..e1fb9db0b19 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test @@ -0,0 +1,414 @@ +source include/have_rocksdb.inc; +source include/master-slave.inc; +source include/have_debug.inc; + + +connection master; +--disable_warnings +drop table if exists t1; +--enable_warnings + +# initialization/insert +connection master; +create table t1 (id int primary key, value int); +insert into t1 values (1,1), (2,2), (3,3), (4,4); +--source include/sync_slave_sql_with_master.inc + +--let $diff_tables= master:t1, slave:t1 + +--echo +--echo # regular update/delete. With rocks_read_free_rpl=PK_SK, rocksdb_rows_read does not increase on slaves +--echo +connection slave; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +update t1 set value=value+1 where id=1; +delete from t1 where id=4; +select * from t1; +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t1; + + +--echo +--echo # "rocks_read_free_rpl=PK_SK" makes "row not found error" not happen anymore +--echo +connection slave; +--source include/stop_slave.inc +delete from t1 where id in (2, 3); +--source include/start_slave.inc +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +connection master; +update t1 set value=value+1 where id=3; +delete from t1 where id=2; +select * from t1; +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t1; + + +--echo +--echo ## tables without primary key -- read free replication should be disabled +--echo +--echo +--echo #no index +--echo +connection master; +drop table t1; +create table t1 (c1 int, c2 int); +insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5); +--source include/sync_slave_sql_with_master.inc +connection slave; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +update t1 set c2=100 where c1=3; +delete from t1 where c1 <= 2; +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t1; + +--echo +--echo #secondary index only +--echo +connection master; +drop table t1; +create table t1 (c1 int, c2 int, index i(c1)); +insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5); +--source include/sync_slave_sql_with_master.inc +connection slave; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +update t1 set c2=100 where c1=3; +delete from t1 where c1 <= 2; +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t1; + + + +--echo +--echo ## large row operations -- primary key modification, secondary key modification +--echo +connection master; +drop table t1; +create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2)); + +--disable_query_log +let $i=1; +while ($i<=10000) +{ + eval insert t1(id1,id2,c1,c2,c3,c4,c5,c6,c7) + values($i,0,$i,0,0,0,0,0,0); + inc $i; +} +--enable_query_log + +--source include/sync_slave_sql_with_master.inc +connection slave; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; + +--echo +--echo #updating all secondary keys by 1 +--echo +--disable_query_log +let $i=1; +while ($i<=10000) +{ + eval update t1 set c2=c2+1 where id1=$i and id2=0; + inc $i; +} +--enable_query_log +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +--source include/diff_tables.inc + +--echo +--echo #updating all primary keys by 2 +--echo +connection slave; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +--disable_query_log +let $i=1; +while ($i<=10000) +{ + eval update t1 set id2=id2+2 where id1=$i and id2=0; + inc $i; +} +--enable_query_log +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +--source include/diff_tables.inc + +--echo +--echo #updating secondary keys after truncating t1 on slave +--echo +connection slave; +truncate table t1; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +update t1 set c2=c2+10; +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +--source include/diff_tables.inc + +--echo +--echo #updating primary keys after truncating t1 on slave +--echo +connection slave; +truncate table t1; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +update t1 set id2=id2+10; +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +--source include/diff_tables.inc + +--echo +--echo #deleting half rows +--echo +connection slave; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +delete from t1 where id1 <= 5000; +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +--source include/diff_tables.inc + +--echo +--echo # rocksdb_read_free_rpl = PK_ONLY i.e. it only works on tables with only PK +--echo +connection slave; +--echo [on slave] +stop slave; +set @@global.rocksdb_read_free_rpl = PK_ONLY; +start slave; +connection master; +--echo [on master] +create table t2 (id int primary key, i1 int, i2 int, value int); +create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); +insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +--source include/sync_slave_sql_with_master.inc + +# make a mismatch between the slave and the master +connection slave; +--echo [on slave] +delete from t2 where id <= 2; +delete from u2 where id <= 2; + +# make changes on the master +connection master; +--echo [on master] +update t2 set i2=100, value=100 where id=1; +update u2 set i2=100, value=100 where id=1; + +connection slave; +--echo [on slave] +call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*"); +call mtr.add_suppression("Slave: Can't find record in 'u2'.*"); +# wait until we have the expected error +--let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND) +--source include/wait_for_slave_sql_error.inc + +# query the t2 table on the slave +connection slave; +select count(*) from t2 force index(primary); +select * from t2 where id=1; +select i1 from t2 where i1=1; +select i2 from t2 where i2=100; + +# query the u2 table on the slave +select count(*) from u2 force index(primary); +select count(*) from u2 force index(i1); +select count(*) from u2 force index(i2); +select * from u2 where id=1; +select i1 from u2 where i1=1; +select i2 from u2 where i2=100; + +# the slave replication thread stopped because of the errors; +# cleanup the problem and restart it +--disable_query_log +insert into u2 values(1,1,1,1), (2,2,2,2); +start slave sql_thread; +--source include/wait_for_slave_sql_to_start.inc +--enable_query_log + +connection slave; +--echo [on slave] +stop slave; +set @@global.rocksdb_read_free_rpl = PK_SK; +start slave; + +--echo +--echo # some tables with read-free replication on and some with it off +--echo +# We'll set the table filter to all tables starting with 't' +connection slave; +--echo [on slave] +stop slave; +set @@global.rocksdb_read_free_rpl_tables = "t.*"; +start slave; +connection master; +--echo [on master] +drop table if exists t2; +drop table if exists u2; +create table t2 (id int primary key, i1 int, i2 int, value int); +create table u2 (id int primary key, i1 int, i2 int, value int); +insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +--source include/sync_slave_sql_with_master.inc + +# make a mismatch between the slave and the master +connection slave; +--echo [on slave] +delete from t2 where id <= 2; +delete from u2 where id <= 2; + +# make changes on the master +connection master; +--echo [on master] +update t2 set i2=100, value=100 where id=1; +update u2 set i2=100, value=100 where id=1; + +connection slave; +--echo [on slave] +call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*"); +call mtr.add_suppression("Slave: Can't find record in 'u2'.*"); +# wait until we have the expected error +--let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND) +--source include/wait_for_slave_sql_error.inc + +# query the t2 table on the slave +connection slave; +select count(*) from t2 force index(primary); +select * from t2 where id=1; +select i1 from t2 where i1=1; +select i2 from t2 where i2=100; + +# query the u2 table on the slave +select count(*) from u2 force index(primary); +select * from u2 where id=1; +select i1 from u2 where i1=1; +select i2 from u2 where i2=100; + +# the slave replication thread stopped because of the errors; +# cleanup the problem and restart it +--disable_query_log +insert into u2 values(1,1,1,1), (2,2,2,2); +start slave sql_thread; +--source include/wait_for_slave_sql_to_start.inc +--enable_query_log + +connection slave; +--echo [on slave] +stop slave; +set @@global.rocksdb_read_free_rpl_tables = ".*"; +start slave; + +--echo +--echo # secondary keys lose rows +--echo +connection master; +--echo [on master] +create table t3 (id int primary key, i1 int, i2 int, value int, index(i1), +index(i2)); +insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +--source include/sync_slave_sql_with_master.inc + +# make a mismatch between the slave and the master +connection slave; +--echo [on slave] +delete from t3 where id <= 2; + +# make changes on the master +connection master; +--echo [on master] +update t3 set i2=100, value=100 where id=1; + +# make sure the slave is caught up +--source include/sync_slave_sql_with_master.inc + +# query the t3 table on the slave +connection slave; +select count(*) from t3 force index(primary); +select count(*) from t3 force index(i1); +select count(*) from t3 force index(i2); +select * from t3 where id=1; +select i1 from t3 where i1=1; +select i2 from t3 where i2=100; + +--echo +--echo # secondary keys have extra rows +--echo +connection master; +--echo [on master] +create table t4 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); +insert into t4 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +--source include/sync_slave_sql_with_master.inc + +# make a mismatch between the slave and the master +connection slave; +--echo [on slave] +update t4 set i1=100 where id=1; + +# make changes on the master +connection master; +--echo [on master] +delete from t4 where id=1; + +# make sure the slave is caught up +--source include/sync_slave_sql_with_master.inc + +# query the t4 table on the slave +connection slave; +--echo [on slave] +select count(*) from t4 force index(primary); +select count(*) from t4 force index(i1); +select count(*) from t4 force index(i2); +select i1 from t4 where i1=100; + +--echo +--echo # inserts are also read-free +--echo +connection master; +--echo [on master] +drop table if exists t2; +drop table if exists t3; +create table t2 (id int primary key, i1 int, i2 int); +create table t3 (id int primary key, i1 int, i2 int, key(i1)); +connection slave; +select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +connection master; +insert into t2 values(1, 1, 1); +insert into t2 values(2, 2, 2); +insert into t3 values(1, 1, 1); +insert into t3 values(2, 2, 2); +--source include/sync_slave_sql_with_master.inc +connection slave; +select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t2; +select * from t3; + +# cleanup +connection master; +drop table t1, t2, t3, t4, u2; + +--source include/rpl_end.inc + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf new file mode 100644 index 00000000000..f225d5dd71b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf @@ -0,0 +1,17 @@ +!include suite/rpl/my.cnf + +[mysqld.1] +sync_binlog=0 +binlog_format=row +rocksdb_perf_context_level=3 + +[mysqld.2] +sync_binlog=0 +binlog_format=row +slave-exec-mode=strict +rocksdb_perf_context_level=3 +slave_use_idempotent_for_recovery=YES +slave_parallel_workers=8 +mts_dependency_replication=STMT +mts_dependency_order_commits=0 +slave_tx_isolation=READ-COMMITTED diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc new file mode 100644 index 00000000000..e69bcce72d8 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc @@ -0,0 +1,69 @@ + +disable_query_log; + +# Create a schema with different kinds of tables (with different kinds of keys) +connection master; +create table t1(a int, b int, c int, d int); #no keys +create table t2(a int primary key, b int, c int, d int); #only pk +create table t3(a int, b int, c int, d int, key(b)); #only sk +create table t4(a int, b int unique, c int, d int); #only unique sk +create table t5(a int primary key, b int, c int, d int, key(b)); #pk + sk +create table t6(a int primary key, b int unique, c int, d int); #pk + unique sk +create table t7(a int, b int unique, c int, d int, key(c)); #sk + unique sk +create table t8(a int primary key, b int unique, c int, d int, key(c)); #pk + sk + unique sk + +# Insert a bunch of rows +let $iter = 0; +while ($iter < 1000) { + let $t = 1; + while ($t <= 8) { + eval insert into t$t values($iter, $iter, $iter, $iter); + inc $t; + } + inc $iter; +} + +let $iter = 0; +while ($iter < 10) { + let $t = 1; + while ($t <= 8) { + eval update t$t set a = a + 10000 where a > 900; # update pk (if any) + eval update t$t set b = b + 10000 where b > 900; # update sk or unique (if any) + eval update t$t set c = c + 10000 where c > 900; # update sk or unique(if any) + eval update t$t set d = d + 10000 where d > 900; # update non key col + + eval delete from t$t where a < 25; + eval delete from t$t where b < 50; + eval delete from t$t where c < 75; + eval delete from t$t where d < 100; + + # Re-insert the deleted rows + let $i = 0; + while ($i < 100) { + eval insert into t$t values($i, $i, $i, $i); + inc $i; + } + inc $t; + } + inc $iter; +} +source include/sync_slave_sql_with_master.inc; + +connection master; +let $t = 1; +while ($t <= 8) { + let $diff_tables = master:t$t, slave:t$t; + source include/diff_tables.inc; + inc $t; +} + +# Cleanup +connection master; +let $t = 1; +while ($t <= 8) { + eval drop table t$t; + inc $t; +} +source include/sync_slave_sql_with_master.inc; + +enable_query_log; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test new file mode 100644 index 00000000000..31e65db8d5d --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test @@ -0,0 +1,22 @@ +source include/have_rocksdb.inc; +source include/master-slave.inc; +source include/not_valgrind.inc; + +connection slave; +source include/stop_slave.inc; +set @@global.rocksdb_read_free_rpl = PK_SK; +source include/start_slave.inc; +source rocksdb_read_free_rpl_stress.inc; + +connection slave; +source include/stop_slave.inc; +set @@global.rocksdb_read_free_rpl = PK_ONLY; +source include/start_slave.inc; +source rocksdb_read_free_rpl_stress.inc; + +connection slave; +source include/stop_slave.inc; +set @@global.rocksdb_read_free_rpl = default; +source include/start_slave.inc; + +source include/rpl_end.inc; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt new file mode 100644 index 00000000000..8d8ae3d65f3 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt @@ -0,0 +1 @@ +--rocksdb_lock_wait_timeout=2 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test new file mode 100644 index 00000000000..d47af90d842 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test @@ -0,0 +1,78 @@ +--source include/have_rocksdb.inc + +--disable_warnings +drop table if exists t1; +--enable_warnings + +SET @@global.rocksdb_rollback_on_timeout = 1; +show variables like 'rocksdb_rollback_on_timeout'; + +create table t1 (a int unsigned not null primary key) engine = rocksdb; +insert into t1 values (1); +commit; + +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); + +connection con2; +begin work; +insert into t1 values (5); +insert into t1 values (6); + +update t1 set a = a + 1 where a = 1; + +connection con1; +begin work; +insert into t1 values (7); +insert into t1 values (8); + +# This statement will time out. The whole transaction will be +# rolled back. So values 7 and 8 are not inserted. +--error ER_LOCK_WAIT_TIMEOUT +update t1 set a = a + 1 where a = 1; + +select * from t1; +commit; + +connection con2; +select * from t1; +commit; + +connection default; +select * from t1; + +SET @@global.rocksdb_rollback_on_timeout = 0; +show variables like 'rocksdb_rollback_on_timeout'; + +connection con2; +begin work; +insert into t1 values (9); +insert into t1 values (10); + +update t1 set a = a + 1 where a = 2; + +connection con1; +begin work; +insert into t1 values (11); +insert into t1 values (12); + +# This statement will time out. Only this statement will be +# rolled back. So values 11 and 12 are inserted. +--error ER_LOCK_WAIT_TIMEOUT +update t1 set a = a + 1 where a = 2; + +select * from t1; +commit; + +connection con2; +select * from t1; +commit; + +connection default; +select * from t1; + +SET @@global.rocksdb_rollback_on_timeout = DEFAULT; + +drop table t1; +disconnect con1; +disconnect con2; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.test deleted file mode 100644 index 38fb3c32149..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.test +++ /dev/null @@ -1,302 +0,0 @@ ---source include/have_rocksdb.inc - -source include/master-slave.inc; - - -connection master; ---disable_warnings -drop table if exists t1; ---enable_warnings - -# initialization/insert -connection master; ---source init_stats_procedure.inc - -create table t1 (id int primary key, value int); -insert into t1 values (1,1), (2,2), (3,3), (4,4); ---source include/sync_slave_sql_with_master.inc - ---let $diff_tables= master:t1, slave:t1 - ---echo ---echo # regular update/delete. With rocks_read_free_rpl_tables=.*, rocksdb_rows_read does not increase on slaves ---echo -connection slave; -call save_read_stats(); -connection master; -update t1 set value=value+1 where id=1; -delete from t1 where id=4; -select * from t1; ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -select * from t1; - - ---echo ---echo # "rocks_read_free_rpl_tables=.*" makes "row not found error" not happen anymore ---echo -connection slave; ---source include/stop_slave.inc -delete from t1 where id in (2, 3); ---source include/start_slave.inc -call save_read_stats(); - -connection master; -update t1 set value=value+1 where id=3; -delete from t1 where id=2; -select * from t1; ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -select * from t1; - - ---echo ---echo ## tables without primary key -- read free replication should be disabled ---echo ---echo ---echo #no index ---echo -connection master; -drop table t1; -create table t1 (c1 int, c2 int); -insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5); ---source include/sync_slave_sql_with_master.inc -connection slave; -call save_read_stats(); -connection master; -update t1 set c2=100 where c1=3; -delete from t1 where c1 <= 2; ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -select * from t1; - ---echo ---echo #secondary index only ---echo -connection master; -drop table t1; -create table t1 (c1 int, c2 int, index i(c1)); -insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5); ---source include/sync_slave_sql_with_master.inc -connection slave; -call save_read_stats(); -connection master; -update t1 set c2=100 where c1=3; -delete from t1 where c1 <= 2; ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -select * from t1; - - - ---echo ---echo ## large row operations -- primary key modification, secondary key modification ---echo -connection master; -drop table t1; -create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2)); - ---disable_query_log -let $i=1; -while ($i<=10000) -{ - eval insert t1(id1,id2,c1,c2,c3,c4,c5,c6,c7) - values($i,0,$i,0,0,0,0,0,0); - inc $i; -} ---enable_query_log - ---source include/sync_slave_sql_with_master.inc -connection slave; -call save_read_stats(); -connection master; - ---echo ---echo #updating all seconary keys by 1 ---echo ---disable_query_log -let $i=1; -while ($i<=10000) -{ - eval update t1 set c2=c2+1 where id1=$i and id2=0; - inc $i; -} ---enable_query_log ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -connection master; ---source include/diff_tables.inc - ---echo ---echo #updating all primary keys by 2 ---echo -connection slave; -call save_read_stats(); -connection master; ---disable_query_log -let $i=1; -while ($i<=10000) -{ - eval update t1 set id2=id2+2 where id1=$i and id2=0; - inc $i; -} ---enable_query_log ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -connection master; ---source include/diff_tables.inc - ---echo ---echo #updating secondary keys after truncating t1 on slave ---echo -connection slave; -truncate table t1; -call save_read_stats(); -connection master; -update t1 set c2=c2+10; ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -connection master; ---source include/diff_tables.inc - ---echo ---echo #updating primary keys after truncating t1 on slave ---echo -connection slave; -truncate table t1; -call save_read_stats(); -connection master; -update t1 set id2=id2+10; ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -connection master; ---source include/diff_tables.inc - ---echo ---echo #deleting half rows ---echo -connection slave; -call save_read_stats(); -connection master; -delete from t1 where id1 <= 5000; ---source include/sync_slave_sql_with_master.inc -connection slave; -call get_read_stats(); -connection master; ---source include/diff_tables.inc - -#--echo -#--echo # some tables with read-free replication on and some with it off -#--echo # secondary keys lose rows -#--echo -# The configuration is set up so the slave will do read-free replication on -# all tables starting with 't' -connection master; ---echo [on master] -create table t2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); -create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); -insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); -insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3); ---source include/sync_slave_sql_with_master.inc - -# make a mismatch between the slave and the master -connection slave; ---echo [on slave] -delete from t2 where id <= 2; -delete from u2 where id <= 2; - -# make changes on the master -connection master; ---echo [on master] -update t2 set i2=100, value=100 where id=1; -update u2 set i2=100, value=100 where id=1; - -connection slave; ---echo [on slave] -call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*"); -call mtr.add_suppression("Slave: Can't find record in 'u2'.*"); -# wait until we have the expected error ---let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND) ---source include/wait_for_slave_sql_error.inc - -# query the t2 table on the slave -connection slave; -select count(*) from t2 force index(primary); -select count(*) from t2 force index(i1); -select count(*) from t2 force index(i2); -select * from t2 where id=1; -select i1 from t2 where i1=1; -select i2 from t2 where i2=100; - -# query the u2 table on the slave -select count(*) from u2 force index(primary); -select count(*) from u2 force index(i1); -select count(*) from u2 force index(i2); -select * from u2 where id=1; -select i1 from u2 where i1=1; -select i2 from u2 where i2=100; - -# the slave replication thread stopped because of the errors; -# cleanup the problem and restart it ---disable_query_log -insert into u2 values(1,1,1,1), (2,2,2,2); -start slave sql_thread; ---source include/wait_for_slave_sql_to_start.inc ---enable_query_log - ---echo ---echo # some tables with read-free replication on and some with it off ---echo # secondary keys have extra rows ---echo -connection master; ---echo [on master] -create table t3 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); -create table u3 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2)); -insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3); -insert into u3 values (1,1,1,1),(2,2,2,2),(3,3,3,3); ---source include/sync_slave_sql_with_master.inc - -# make a mismatch between the slave and the master -connection slave; ---echo [on slave] -update t3 set i1=100 where id=1; -update u3 set i1=100 where id=1; - -# make changes on the master -connection master; ---echo [on master] -delete from t3 where id=1; -delete from u3 where id=1; - -# make sure the slave is caught up ---source include/sync_slave_sql_with_master.inc - -# query the t3 table on the slave -connection slave; ---echo [on slave] -select count(*) from t3 force index(primary); -select count(*) from t3 force index(i1); -select count(*) from t3 force index(i2); -select i1 from t3 where i1=100; - -# query the u3 table on the slave -select count(*) from u3 force index(primary); -select count(*) from u3 force index(i1); -select count(*) from u3 force index(i2); -select i1 from u3 where i1=100; - -# cleanup -connection master; -drop table t1, t2, t3, u2, u3; ---source drop_stats_procedure.inc - ---source include/rpl_end.inc - diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf new file mode 100644 index 00000000000..110d18abac7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf @@ -0,0 +1,11 @@ +!include suite/rpl/my.cnf + +[mysqld.1] +binlog_format=row +transaction_isolation=read-committed +[mysqld.2] +binlog_format=row +slave_parallel_workers=4 +slave_exec_mode=SEMI_STRICT +rocksdb_lock_wait_timeout=5 +transaction_isolation=read-committed diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test new file mode 100644 index 00000000000..36188427585 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test @@ -0,0 +1,4 @@ +--source include/have_binlog_format_row.inc + +--source rpl_row_not_found.inc + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf index d20d3396f0a..b0a37fd30ad 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf +++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf @@ -6,7 +6,7 @@ gtid_mode=ON enforce_gtid_consistency log_slave_updates binlog_row_image=FULL -rocksdb_read_free_rpl_tables=.* +rocksdb_read_free_rpl=PK_SK rocksdb_strict_collation_check=0 [mysqld.2] binlog_format=row @@ -14,6 +14,6 @@ gtid_mode=ON enforce_gtid_consistency log_slave_updates binlog_row_image=FULL -rocksdb_read_free_rpl_tables=.* +rocksdb_read_free_rpl=PK_SK rocksdb_strict_collation_check=0 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test index 4eb02ac648a..80bae00424b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test @@ -1,4 +1,8 @@ --source include/have_rocksdb.inc +# Don't run the RQG tests with --rpc_protocol because the connections and +# queries will be coming from Perl where we don't have any ability to +# specify the query attributes needed for the RPC protocol. +--source include/not_rpc_protocol.inc # RQG's examples test let $TESTDIR = examples; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test index 16d978c71b7..2e560c86c62 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test @@ -1,4 +1,8 @@ --source include/have_rocksdb.inc +# Don't run the RQG tests with --rpc_protocol because the connections and +# queries will be coming from Perl where we don't have any ability to +# specify the query attributes needed for the RPC protocol. +--source include/not_rpc_protocol.inc call mtr.add_suppression("Did not write failed "); call mtr.add_suppression("Can't open and lock privilege tables"); diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test index f29ddcb8c81..383b9aed39f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test @@ -1,4 +1,8 @@ --source include/have_rocksdb.inc +# Don't run the RQG tests with --rpc_protocol because the connections and +# queries will be coming from Perl where we don't have any ability to +# specify the query attributes needed for the RPC protocol. +--source include/not_rpc_protocol.inc call mtr.add_suppression("Deadlock found when trying to get lock"); diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt new file mode 100644 index 00000000000..c07b063f07c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt @@ -0,0 +1 @@ +--rocksdb_strict_collation_check=off --binlog_format=row --log-bin diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test new file mode 100644 index 00000000000..451eed057ac --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test @@ -0,0 +1,53 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc +--source include/not_valgrind.inc + +--enable_connect_log +--enable_info + +# setup search pattern and file (new log error file) +--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/skip_core_dump_on_error.err + +# restart the server with the custom error log file +--let $_mysqld_option=--log-error=$SEARCH_FILE --default-storage-engine=rocksdb +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_option.inc + +# setup +create table mz(c int); +insert into mz values(1); +commit; + +# simulate a write error +SET debug= '+d,abort_with_io_write_error'; + +# we want to abort server if we fail to write (ABORT_SERVER) +set global binlog_error_action=1; + +# diplay the values of the key parameters +show session variables like 'debug'; +show global variables like 'binlog_error_action'; +show global variables like 'skip_core_dump_on_error'; + +--echo # crash_during_update +# tell client that crash is expected +--error 1598 +# run an update to trigger a write error +update mz set c=13; + +# should find server abort (prints: Pattern "..." found) +--echo # server aborted +--let SEARCH_PATTERN=mysqld got signal 6 +--source include/search_pattern.inc + +# should not find a core dump (prints: Pattern "..." not found) +--echo # but no core written +--let SEARCH_PATTERN=Writing a core file +--source include/search_pattern.inc + +--let _$mysqld_option= +--source include/start_mysqld.inc +--remove_file $SEARCH_FILE + +# tidy up +drop table mz; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test index 99cb2253d94..6fcfd491af1 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test @@ -13,18 +13,27 @@ DROP TABLE IF EXISTS t1; --enable_warnings +# On a Mac, strerror() prints "Unknown error: nn", as +# opposed to "Unknown error nn" on Linux/etc. +# Replacing 'error:' with 'error' below to make the output uniform. + +--replace_result error: error --error ER_CANT_CREATE_TABLE CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb DATA DIRECTORY = '/foo/bar/data'; +--replace_result error: error show warnings; +--replace_result error: error --error ER_CANT_CREATE_TABLE CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INDEX DIRECTORY = '/foo/bar/index'; +--replace_result error: error show warnings; # # Verify that we'll get the same error codes when using the partitions. # +--replace_result error: error --error ER_CANT_CREATE_TABLE CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY) ENGINE=rocksdb PARTITION BY RANGE (id) ( @@ -34,7 +43,10 @@ CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY) ENGINE=rocksdb PARTITION BY RANGE DATA DIRECTORY = '/foo/bar/data/', PARTITION P2 VALUES LESS THAN (MAXVALUE) ); +--replace_result error: error +show warnings; +--replace_result error: error --error ER_CANT_CREATE_TABLE CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE (id) ( @@ -44,3 +56,5 @@ CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE INDEX DIRECTORY = '/foo/bar/data/', PARTITION P2 VALUES LESS THAN (MAXVALUE) ); +--replace_result error: error +show warnings; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc new file mode 100644 index 00000000000..2193aa9f1de --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc @@ -0,0 +1,102 @@ + +# Truncate table multiple times +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; +TRUNCATE TABLE t1; + +# Truncate partition multiple times +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; +ALTER TABLE t1 TRUNCATE PARTITION p0; + +# TRUNCATE multiple times +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p1; + +# TRUNCATE multiple times +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; +ALTER TABLE t1 TRUNCATE PARTITION p2; + +# TRUNCATE different partition +ALTER TABLE t1 TRUNCATE PARTITION p1; +ALTER TABLE t1 TRUNCATE PARTITION p0; +TRUNCATE TABLE t1; +ALTER TABLE t1 TRUNCATE PARTITION p2; + +# Insert value once and truncate multiple times +INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8); +SELECT a,b FROM t1 ORDER BY a; + +ALTER TABLE t1 TRUNCATE PARTITION p2; +SELECT a,b FROM t1 ORDER BY a; +SELECT a FROM t1 WHERE b > 2; +SELECT b from t1 where a != 3; + +ALTER TABLE t1 TRUNCATE PARTITION p1; +SELECT a,b FROM t1 ORDER BY b; +SELECT a FROM t1 WHERE b > 2; +SELECT b from t1 where a != 3; + +ALTER TABLE t1 TRUNCATE PARTITION p0; +SELECT a,b FROM t1 ORDER BY a; + +TRUNCATE TABLE t1; +SELECT a,b FROM t1; + +# Insert value multiple times and truncate multiple times +INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8); +--sorted_result +SELECT a,b FROM t1; +--sorted_result +SELECT a FROM t1 WHERE b < 5; + +ALTER TABLE t1 TRUNCATE PARTITION p2; +--sorted_result +SELECT a,b FROM t1; +INSERT INTO t1(a,b) VALUES(7, 1); +--sorted_result +SELECT b from t1 WHERE a > 2; +--sorted_result +SELECT a,b FROM t1; + +ALTER TABLE t1 TRUNCATE PARTITION p1; +--sorted_result +SELECT a,b FROM t1; +INSERT INTO t1(a,b) VALUES(8, 4); +--sorted_result +SELECT a,b FROM t1; +--sorted_result +SELECT b from t1 WHERE a < 9; + +ALTER TABLE t1 TRUNCATE PARTITION p0; +--sorted_result +SELECT a,b FROM t1; +INSERT INTO t1(a,b) VALUES(9, 8); + +TRUNCATE TABLE t1; +SELECT a,b FROM t1; + +TRUNCATE TABLE t1; +SELECT a,b FROM t1; + +# manual commpact +set global rocksdb_force_flush_memtable_now = true; +set global rocksdb_compact_cf = 'default'; + +--disable_query_log +let $i = 0; +while($i < 9) +{ + inc $i; + eval insert t1 values($i, $i); +} +--enable_query_log +--sorted_result +SELECT b FROM t1 WHERE a < 5; + +TRUNCATE TABLE t1; +SELECT b FROM t1 WHERE a < 5; + +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test new file mode 100644 index 00000000000..f9a89517e2a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test @@ -0,0 +1,83 @@ +--source include/have_rocksdb.inc +--source include/have_partition.inc + +# +# TRUNCATE PARTITION +# + + +# Hidden Key table +--echo # +--echo # table(hidden key) +--echo # +CREATE TABLE t1 ( + a INT, + b INT +) ENGINE=ROCKSDB +PARTITION BY RANGE (b) ( + PARTITION p0 VALUES LESS THAN (3), + PARTITION p1 VALUES LESS THAN (6), + PARTITION p2 VALUES LESS THAN MAXVALUE +); + +--source truncate_partition.inc + + +--echo # +--echo # table(secondary key) +--echo # +CREATE TABLE t1( + a INT, + b INT, + KEY (b) +) ENGINE=ROCKSDB +PARTITION BY HASH(a) PARTITIONS 3; + +--source truncate_partition.inc + + +--echo # +--echo # table(primary key, auto increment) +--echo # +CREATE TABLE t1( + a INT NOT NULL AUTO_INCREMENT, + b INT, + PRIMARY KEY(a) +) ENGINE=ROCKSDB +PARTITION BY KEY() PARTITIONS 3; + +--source truncate_partition.inc + + +--echo # +--echo # table(cf) +--echo # +CREATE TABLE t1 ( + a INT, + b INT, + PRIMARY KEY (`a`, `b`) COMMENT 'testcomment' +) ENGINE=ROCKSDB + PARTITION BY LIST(a) ( + PARTITION p0 VALUES IN (1, 4, 7), + PARTITION p1 VALUES IN (2, 5, 8), + PARTITION p2 VALUES IN (3, 6, 9) +); + +--source truncate_partition.inc + + +--echo # +--echo # table(reverse cf) +--echo # +CREATE TABLE t1 ( + a INT, + b INT, + PRIMARY KEY (`a`, `b`) COMMENT 'p0_cfname=rev:foo;p1_cfname=bar;p2_cfname=baz' +) ENGINE=ROCKSDB +PARTITION BY LIST(a) ( + PARTITION p0 VALUES IN (1, 4, 7), + PARTITION p1 VALUES IN (2, 5, 8), + PARTITION p2 VALUES IN (3, 6, 9) +); + +--source truncate_partition.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test new file mode 100644 index 00000000000..f9e871ff1c8 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test @@ -0,0 +1,56 @@ +--source include/have_rocksdb.inc +--source include/have_debug_sync.inc + +#### +# Bump rows_examined count whenever MyRocks filters out a row due to expired TTL +#### + +# clean start +set debug_sync='RESET'; +set global rocksdb_debug_ttl_read_filter_ts = -10; + +--enable_connect_log +--enable_info + +connect (conn1, localhost, root,,test); +connect (conn2, localhost, root,,test); + +connection conn1; + +# create table with TTL policy (1s) +CREATE TABLE t_re ( + a INT, b INT, PRIMARY KEY (a) +) ENGINE=ROCKSDB +COMMENT 'ttl_duration=1'; + +# start with 2 rows, expired at the insertion time +set global rocksdb_debug_ttl_rec_ts = -13; +insert into t_re values (1,1); +insert into t_re values (2,2); +set global rocksdb_debug_ttl_rec_ts = 0; +commit; + +# setup signal to stop in code where we skip expired records +set debug_sync='rocksdb.ttl_rows_examined SIGNAL parked WAIT_FOR go'; +send SELECT * FROM t_re; + +connection conn2; +set debug_sync='now WAIT_FOR parked'; + +# display "Rows Examined" before returning from call +--replace_column 1 ### 2 ### 3 ### 4 ### 6 ### 10 ### 11 ### 12 ### +--sorted_result +SHOW PROCESSLIST; + +set debug_sync='now SIGNAL go'; + +connection conn1; +reap; + +# tidy up +set debug_sync='RESET'; +set global rocksdb_debug_ttl_read_filter_ts = DEFAULT; +drop table t_re; + +disconnect conn1; +disconnect conn2; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test b/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test index 86ae15924cb..e2520388ea7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test @@ -30,7 +30,8 @@ send insert into t1 values (1,2); connection con1; let $wait_condition= select 1 from INFORMATION_SCHEMA.PROCESSLIST - where ID = $ID and STATE = "Waiting for row lock"; + where (ID = $ID /* or SRV_ID = $ID*/) + and STATE = "Waiting for row lock"; --source include/wait_condition.inc commit; @@ -101,15 +102,17 @@ truncate table t2; # 4) simulating T1 GetForUpdate() -> T2 GetForUpdate(). T2 should fail with lock wait timeout. connection con1; -set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked1 WAIT_FOR go1'; +set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked1 WAIT_FOR go'; send insert into t1 values (1,1); +connection default; +set debug_sync='now WAIT_FOR parked1'; + connection con2; -set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked2 WAIT_FOR go2'; +set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked2 WAIT_FOR go'; send insert into t2 values (1,1,1); connection default; -set debug_sync='now WAIT_FOR parked1'; set debug_sync='now WAIT_FOR parked2'; connection con3; @@ -120,8 +123,7 @@ insert into t1 values (1,2); insert into t2 values (2,1,2); connection default; -set debug_sync='now SIGNAL go1'; -set debug_sync='now SIGNAL go2'; +set debug_sync='now SIGNAL go'; connection con1; reap; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test new file mode 100644 index 00000000000..782e2a369a4 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test @@ -0,0 +1,5 @@ +--source include/have_rocksdb.inc + +--let $io_option=--rocksdb_use_direct_io_for_flush_and_compaction=1 + +--source ../include/use_direct_io_option.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test index c347a85518f..a1b717e85fc 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test @@ -1,37 +1,5 @@ --source include/have_rocksdb.inc ---perl -use Cwd 'abs_path'; - -open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/data_in_shm.inc") or die; -my $real_path= abs_path($ENV{'MYSQLTEST_VARDIR'}); -my $in_shm= index($real_path, "/dev/shm") != -1; -print FILE "let \$DATA_IN_SHM= $in_shm;\n"; -close FILE; -EOF - ---source $MYSQL_TMP_DIR/data_in_shm.inc ---remove_file $MYSQL_TMP_DIR/data_in_shm.inc - -if ($DATA_IN_SHM) -{ - --skip DATADIR is in /dev/shm, possibly due to --mem -} - ---echo Checking direct reads ---let $_mysqld_option=--rocksdb_use_direct_reads=1 ---source include/restart_mysqld_with_option.inc - -CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -INSERT INTO t1 VALUES (1, 1,'a'); -INSERT INTO t1 (a,b) VALUES (2,'b'); -set global rocksdb_force_flush_memtable_now=1; ---sorted_result -SELECT a,b FROM t1; -DROP TABLE t1; - -# cleanup ---let _$mysqld_option= ---source include/restart_mysqld.inc +--let $io_option=--rocksdb_use_direct_reads=1 +--source ../include/use_direct_io_option.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test index 8dfbe312ea8..53ba5161d16 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test @@ -1,4 +1,5 @@ --source include/have_rocksdb.inc +--source include/have_direct_io.inc call mtr.add_suppression("rocksdb"); call mtr.add_suppression("Aborting"); diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh new file mode 100755 index 00000000000..98a1fecceba --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +COPY_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_copy_log" +SIGNAL_FILE=${MYSQL_TMP_DIR}/myrocks_hotbackup_signal +MOVEBACK_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_moveback_log" +rm -f $COPY_LOG +rm -f $SIGNAL_FILE +rm -f $MOVEBACK_LOG diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh index b6735abb0a9..6108cfbb1aa 100755 --- a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh @@ -1,5 +1,7 @@ #!/bin/bash +. suite/rocksdb_hotbackup/include/clean_tmpfiles.sh + if [ "$STREAM_TYPE" == 'wdt' ]; then which wdt >/dev/null 2>&1 if [ $? -ne 0 ]; then @@ -31,11 +33,8 @@ rm -rf $backup_dir/* rm -rf $dest_data_dir/ mkdir $dest_data_dir -COPY_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_copy_log" + SIGNAL_CONDITION="" -SIGNAL_FILE=${MYSQL_TMP_DIR}/myrocks_hotbackup_signal -rm -f $COPY_LOG -rm -f $SIGNAL_FILE if [ "$FRM" == '1' ]; then suite/rocksdb_hotbackup/include/create_table.sh $COPY_LOG $SIGNAL_FILE 2>&1 & @@ -49,23 +48,23 @@ if [ "$STREAM_TYPE" == 'tar' ]; then BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \ --stream=tar --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \ $COPY_LOG | tar -xi -C $backup_dir" -elif [ "$STREAM_TYPE" == 'xbstream' ]; then - BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \ - --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \ - $COPY_LOG | xbstream -x \ - --directory=$backup_dir" -elif [ "$STREAM_TYPE" == "xbstream_socket" ]; then - BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --socket=${MASTER_MYSOCK} \ - --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \ - $COPY_LOG | xbstream -x \ - --directory=$backup_dir" -else +elif [ "$STREAM_TYPE" == 'wdt' ]; then BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --stream=wdt \ --port=${MASTER_MYPORT} --destination=localhost --backup_dir=$backup_dir \ --avg_mbytes_per_sec=10 --interval=5 $SIGNAL_CONDITION \ --extra_wdt_sender_options='--block_size_mbytes=1' \ --checkpoint_dir=$checkpoint_dir 2> \ $COPY_LOG" +elif [ "$STREAM_TYPE" == "xbstream_socket" ]; then + BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --socket=${MASTER_MYSOCK} \ + --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \ + $COPY_LOG | xbstream -x \ + --directory=$backup_dir" +else + BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \ + --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \ + $COPY_LOG | xbstream -x \ + --directory=$backup_dir" fi echo "myrocks_hotbackup copy phase" @@ -73,7 +72,6 @@ eval "$BACKUP_CMD" mkdir ${backup_dir}/test # TODO: Fix skipping empty directories -MOVEBACK_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_moveback_log" echo "myrocks_hotbackup move-back phase" $MYSQL_MYROCKS_HOTBACKUP --move_back --datadir=$dest_data_dir \ diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result new file mode 100644 index 00000000000..31ed2677444 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result @@ -0,0 +1,21 @@ +include/rpl_init.inc [topology=none] +include/rpl_default_connections.inc +create database db1; +create table db1.t1 ( +`id` int(10) not null auto_increment, +`k` int(10), +`data` varchar(2048), +primary key (`id`), +key (`k`) +) engine=rocksdb; +include/rpl_stop_server.inc [server_number=2] +myrocks_hotbackup copy phase +myrocks_hotbackup copy phase +myrocks_hotbackup move-back phase +include/rpl_start_server.inc [server_number=2] +select count(*) from db1.t1; +count(*) +250000 +drop database db1; +drop database db1; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc new file mode 100644 index 00000000000..52456a68140 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc @@ -0,0 +1,25 @@ + +source suite/rocksdb_hotbackup/include/setup.inc; + +--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1 +--let $rpl_server_number= 2 +--source include/rpl_stop_server.inc + +--error 1 +--exec STREAM_TYPE=xbstream FRM=1 DEBUG_SIGNAL=1 suite/rocksdb_hotbackup/include/stream_run.sh 2>&1 + +--exec STREAM_TYPE=xbstream suite/rocksdb_hotbackup/include/stream_run.sh 2>&1 + +--let $rpl_server_number= 2 +--source include/rpl_start_server.inc + +connection server_2; +select count(*) from db1.t1; + +connection server_1; +drop database db1; +connection server_2; +drop database db1; + +source suite/rocksdb_hotbackup/include/cleanup.inc; + diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test index 52456a68140..18816c34446 100644 --- a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test @@ -1,25 +1,7 @@ +--source include/have_rocksdb.inc +--source xbstream.inc +let SEARCH_FILE= $MYSQL_TMP_DIR/myrocks_hotbackup_copy_log; +let SEARCH_PATTERN= Direct I/O: 0; +--source include/search_pattern_in_file.inc -source suite/rocksdb_hotbackup/include/setup.inc; - ---exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1 ---let $rpl_server_number= 2 ---source include/rpl_stop_server.inc - ---error 1 ---exec STREAM_TYPE=xbstream FRM=1 DEBUG_SIGNAL=1 suite/rocksdb_hotbackup/include/stream_run.sh 2>&1 - ---exec STREAM_TYPE=xbstream suite/rocksdb_hotbackup/include/stream_run.sh 2>&1 - ---let $rpl_server_number= 2 ---source include/rpl_start_server.inc - -connection server_2; -select count(*) from db1.t1; - -connection server_1; -drop database db1; -connection server_2; -drop database db1; - -source suite/rocksdb_hotbackup/include/cleanup.inc; - +--exec suite/rocksdb_hotbackup/include/clean_tmpfiles.sh diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt new file mode 100644 index 00000000000..4ab98aeabe1 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt @@ -0,0 +1 @@ +--rocksdb_use_direct_reads=ON --rocksdb_use_direct_io_for_flush_and_compaction=ON diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test new file mode 100644 index 00000000000..41357d68415 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test @@ -0,0 +1,7 @@ +--source include/have_rocksdb.inc +--source xbstream.inc +let SEARCH_FILE= $MYSQL_TMP_DIR/myrocks_hotbackup_copy_log; +let SEARCH_PATTERN= Direct I/O: 1; +--source include/search_pattern_in_file.inc + +--exec suite/rocksdb_hotbackup/include/clean_tmpfiles.sh diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/combinations b/storage/rocksdb/mysql-test/rocksdb_rpl/combinations index fe97111940a..eae7431662b 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/combinations +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/combinations @@ -5,4 +5,3 @@ rocksdb_write_policy=write_committed [row-write-prepared] binlog-format=row rocksdb_write_policy=write_prepared -rocksdb_commit_time_batch_for_recovery=on diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc new file mode 100644 index 00000000000..f0c0134e4d1 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc @@ -0,0 +1,37 @@ + +-- let $engine = ROCKSDB + +call mtr.add_suppression("Recovery from master pos"); + +-- let $debug_option = crash_before_update_pos +-- source extra/rpl_tests/rpl_gtid_crash_safe.inc + +-- source include/rpl_reset.inc +-- let $debug_option = crash_after_update_pos_before_apply +-- source extra/rpl_tests/rpl_gtid_crash_safe.inc + +-- source include/rpl_reset.inc +-- let $debug_option = crash_before_writing_xid +-- source extra/rpl_tests/rpl_gtid_crash_safe.inc + +-- source include/rpl_reset.inc +-- let $debug_option = half_binlogged_transaction +-- source extra/rpl_tests/rpl_gtid_crash_safe.inc + +-- source include/rpl_reset.inc +-- let $debug_option = crash_commit_before +-- source extra/rpl_tests/rpl_gtid_crash_safe.inc + +-- source include/rpl_reset.inc +-- let $debug_option = crash_commit_after_log +-- source extra/rpl_tests/rpl_gtid_crash_safe.inc + +-- source include/rpl_reset.inc +-- let $debug_option = crash_commit_after_prepare +-- source extra/rpl_tests/rpl_gtid_crash_safe.inc + +-- source include/rpl_reset.inc +-- let $debug_option = crash_commit_after +-- source extra/rpl_tests/rpl_gtid_crash_safe.inc + +-- source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result new file mode 100644 index 00000000000..1b41405fd5e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result @@ -0,0 +1,282 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level; +SET GLOBAL rocksdb_perf_context_level=3; +SET GLOBAL enable_blind_replace=ON; +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; +c1 c2 +1 1 +2 2 +3 3 +create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t2 values(1,1),(2,2),(3,3); +select * from t2; +c1 c2 +1 1 +2 2 +3 3 +create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t3 values(1,1),(2,2),(3,3); +select * from t3; +c1 c2 +1 1 +2 2 +3 3 +SET GLOBAL enable_blind_replace=ON; +create trigger trg before insert on t2 for each row set @a:=1; +alter table t3 add constraint slave_unique_key unique (c2); +connect slave +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +Case 1 +connect master +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(1,11); +replace into t1 values(2,22); +replace into t1 values(3,33); +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t1; +c1 c2 +1 11 +2 22 +3 33 +connect slave +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t1; +c1 c2 +1 11 +2 22 +3 33 +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +Case 2 +connect master +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(2,44),(3,55); +select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t1; +c1 c2 +1 11 +2 44 +3 55 +connect slave +select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t1; +c1 c2 +1 11 +2 44 +3 55 +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +Case 3 +connect master +update t1 set c2=66 where c1=3; +select * from t1; +c1 c2 +1 11 +2 44 +3 66 +connect slave +select * from t1; +c1 c2 +1 11 +2 44 +3 66 +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +Case 4 +connect master +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t2 values(1,111); +replace into t2 values(2,222); +replace into t2 values(3,333); +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +select * from t2; +c1 c2 +1 111 +2 222 +3 333 +connect slave +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +select * from t2; +c1 c2 +1 111 +2 222 +3 333 +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +Case 5 +connect master +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t3 values(1,1111); +replace into t3 values(2,2222); +replace into t3 values(3,3333); +select * from t3; +c1 c2 +1 1111 +2 2222 +3 3333 +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +true +connect slave +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +read_free +false +select * from t3; +c1 c2 +1 1111 +2 2222 +3 3333 +select * from t3 use index (slave_unique_key); +c1 c2 +1 1111 +2 2222 +3 3333 +Case 6 +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Query # # use `test`; create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # use `test`; create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t2) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # use `test`; create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t3) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t2) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t2) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t2) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t3) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t3) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t3) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +slave-bin.000001 # Query # # use `test`; create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t1) +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # use `test`; create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t2) +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # use `test`; create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t3) +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` trigger trg before insert on t2 for each row set @a:=1 +slave-bin.000001 # Query # # use `test`; alter table t3 add constraint slave_unique_key unique (c2) +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t1) +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t1) +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t1) +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t1) +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t1) +slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t2) +slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t2) +slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t2) +slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t3) +slave-bin.000001 # Delete_rows # # table_id: # +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t3) +slave-bin.000001 # Delete_rows # # table_id: # +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Query # # BEGIN +slave-bin.000001 # Table_map # # table_id: # (test.t3) +slave-bin.000001 # Delete_rows # # table_id: # +slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +drop table t1; +drop table t2; +drop table t3; +SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level; +SET GLOBAL enable_blind_replace=DEFAULT; +SET GLOBAL enable_blind_replace=DEFAULT; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result new file mode 100644 index 00000000000..a770822285b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result @@ -0,0 +1,165 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +call mtr.add_suppression("Error_code: 1032"); +create table t1 (a int primary key, b int, c int) engine = rocksdb; +create table t2 (a int unique, b int, c int) engine = rocksdb; +create table t3 (a int, b int, c int, key(a)) engine = rocksdb; +create table t4 (a int, b int, c int) engine = rocksdb; +insert into t1 values(1, 1, 1); +insert into t2 values(1, 1, 1); +insert into t3 values(1, 1, 1); +insert into t4 values(1, 1, 1); +include/sync_slave_sql_with_master.inc +set @@sql_log_bin = 0; +update t1 set c = 2; +update t2 set c = 2; +update t3 set c = 2; +update t4 set c = 2; +set @@sql_log_bin = 1; +update t1 set b = 2; +include/wait_for_slave_sql_error.inc [errno=1032] +set @@sql_log_bin = 0; +update t1 set c = 1; +set @@sql_log_bin = 1; +include/stop_slave.inc +include/start_slave.inc +include/sync_slave_sql_with_master.inc +update t2 set b = 2; +include/wait_for_slave_sql_error.inc [errno=1032] +set @@sql_log_bin = 0; +update t2 set c = 1; +set @@sql_log_bin = 1; +include/stop_slave.inc +include/start_slave.inc +include/sync_slave_sql_with_master.inc +update t3 set b = 2; +include/wait_for_slave_sql_error.inc [errno=1032] +set @@sql_log_bin = 0; +update t3 set c = 1; +set @@sql_log_bin = 1; +include/stop_slave.inc +include/start_slave.inc +include/sync_slave_sql_with_master.inc +update t4 set b = 2; +include/wait_for_slave_sql_error.inc [errno=1032] +set @@sql_log_bin = 0; +update t4 set c = 1; +set @@sql_log_bin = 1; +include/stop_slave.inc +include/start_slave.inc +include/sync_slave_sql_with_master.inc +select * from t1; +a b c +1 2 1 +select * from t2; +a b c +1 2 1 +select * from t3; +a b c +1 2 1 +select * from t4; +a b c +1 2 1 +select * from t1; +a b c +1 2 1 +select * from t2; +a b c +1 2 1 +select * from t3; +a b c +1 2 1 +select * from t4; +a b c +1 2 1 +drop table t1; +drop table t2; +drop table t3; +drop table t4; +include/sync_slave_sql_with_master.inc +include/stop_slave.inc +set @@global.slave_rows_search_algorithms = 'INDEX_SCAN,TABLE_SCAN,HASH_SCAN'; +include/start_slave.inc +create table t1 (a int primary key, b int, c int) engine = rocksdb; +create table t2 (a int unique, b int, c int) engine = rocksdb; +create table t3 (a int, b int, c int, key(a)) engine = rocksdb; +create table t4 (a int, b int, c int) engine = rocksdb; +insert into t1 values(1, 1, 1); +insert into t2 values(1, 1, 1); +insert into t3 values(1, 1, 1); +insert into t4 values(1, 1, 1); +include/sync_slave_sql_with_master.inc +set @@sql_log_bin = 0; +update t1 set c = 2; +update t2 set c = 2; +update t3 set c = 2; +update t4 set c = 2; +set @@sql_log_bin = 1; +update t1 set b = 2; +include/wait_for_slave_sql_error.inc [errno=1032] +set @@sql_log_bin = 0; +update t1 set c = 1; +set @@sql_log_bin = 1; +include/stop_slave.inc +include/start_slave.inc +include/sync_slave_sql_with_master.inc +update t2 set b = 2; +include/wait_for_slave_sql_error.inc [errno=1032] +set @@sql_log_bin = 0; +update t2 set c = 1; +set @@sql_log_bin = 1; +include/stop_slave.inc +include/start_slave.inc +include/sync_slave_sql_with_master.inc +update t3 set b = 2; +include/wait_for_slave_sql_error.inc [errno=1032] +set @@sql_log_bin = 0; +update t3 set c = 1; +set @@sql_log_bin = 1; +include/stop_slave.inc +include/start_slave.inc +include/sync_slave_sql_with_master.inc +update t4 set b = 2; +include/wait_for_slave_sql_error.inc [errno=1032] +set @@sql_log_bin = 0; +update t4 set c = 1; +set @@sql_log_bin = 1; +include/stop_slave.inc +include/start_slave.inc +include/sync_slave_sql_with_master.inc +select * from t1; +a b c +1 2 1 +select * from t2; +a b c +1 2 1 +select * from t3; +a b c +1 2 1 +select * from t4; +a b c +1 2 1 +select * from t1; +a b c +1 2 1 +select * from t2; +a b c +1 2 1 +select * from t3; +a b c +1 2 1 +select * from t4; +a b c +1 2 1 +drop table t1; +drop table t2; +drop table t3; +drop table t4; +include/sync_slave_sql_with_master.inc +include/stop_slave.inc +set @@global.slave_rows_search_algorithms = DEFAULT; +include/start_slave.inc +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result new file mode 100644 index 00000000000..a518de2b6e3 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result @@ -0,0 +1,361 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +call mtr.add_suppression("Recovery from master pos"); +create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB; +insert into t1 values(1); +insert into t1 values(2); +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 mtr +2 test uuid:4 +SET GLOBAL debug = '+d,crash_before_update_pos'; +insert into t1 values(3); +include/rpl_reconnect.inc +SET GLOBAL debug = ``; +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 mtr +2 test uuid:4 +use test; +select * from t1; +a +1 +2 +change master to master_auto_position = 1; +include/start_slave.inc +rename table t1 to test1; +use test; +select * from test1; +a +1 +2 +3 +use test; +select * from test1; +a +1 +2 +3 +drop table test1; +include/stop_slave.inc +change master to master_auto_position = 0; +include/start_slave.inc +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 mtr +2 test uuid:5 +include/rpl_reset.inc +create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB; +insert into t1 values(1); +insert into t1 values(2); +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +SET GLOBAL debug = '+d,crash_after_update_pos_before_apply'; +insert into t1 values(3); +include/rpl_reconnect.inc +SET GLOBAL debug = ``; +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +use test; +select * from t1; +a +1 +2 +change master to master_auto_position = 1; +include/start_slave.inc +rename table t1 to test1; +use test; +select * from test1; +a +1 +2 +3 +use test; +select * from test1; +a +1 +2 +3 +drop table test1; +include/stop_slave.inc +change master to master_auto_position = 0; +include/start_slave.inc +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:4 +include/rpl_reset.inc +create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB; +insert into t1 values(1); +insert into t1 values(2); +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +SET GLOBAL debug = '+d,crash_before_writing_xid'; +insert into t1 values(3); +include/rpl_reconnect.inc +SET GLOBAL debug = ``; +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +use test; +select * from t1; +a +1 +2 +change master to master_auto_position = 1; +include/start_slave.inc +rename table t1 to test1; +use test; +select * from test1; +a +1 +2 +3 +use test; +select * from test1; +a +1 +2 +3 +drop table test1; +include/stop_slave.inc +change master to master_auto_position = 0; +include/start_slave.inc +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:4 +include/rpl_reset.inc +create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB; +insert into t1 values(1); +insert into t1 values(2); +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +SET GLOBAL debug = '+d,half_binlogged_transaction'; +insert into t1 values(3); +include/rpl_reconnect.inc +SET GLOBAL debug = ``; +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +use test; +select * from t1; +a +1 +2 +change master to master_auto_position = 1; +include/start_slave.inc +rename table t1 to test1; +use test; +select * from test1; +a +1 +2 +3 +use test; +select * from test1; +a +1 +2 +3 +drop table test1; +include/stop_slave.inc +change master to master_auto_position = 0; +include/start_slave.inc +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:4 +include/rpl_reset.inc +create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB; +insert into t1 values(1); +insert into t1 values(2); +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +SET GLOBAL debug = '+d,crash_commit_before'; +insert into t1 values(3); +include/rpl_reconnect.inc +SET GLOBAL debug = ``; +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +use test; +select * from t1; +a +1 +2 +change master to master_auto_position = 1; +include/start_slave.inc +rename table t1 to test1; +use test; +select * from test1; +a +1 +2 +3 +use test; +select * from test1; +a +1 +2 +3 +drop table test1; +include/stop_slave.inc +change master to master_auto_position = 0; +include/start_slave.inc +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:4 +include/rpl_reset.inc +create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB; +insert into t1 values(1); +insert into t1 values(2); +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +SET GLOBAL debug = '+d,crash_commit_after_log'; +insert into t1 values(3); +include/rpl_reconnect.inc +SET GLOBAL debug = ``; +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +use test; +select * from t1; +a +1 +2 +change master to master_auto_position = 1; +include/start_slave.inc +rename table t1 to test1; +use test; +select * from test1; +a +1 +2 +3 +use test; +select * from test1; +a +1 +2 +3 +drop table test1; +include/stop_slave.inc +change master to master_auto_position = 0; +include/start_slave.inc +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:4 +include/rpl_reset.inc +create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB; +insert into t1 values(1); +insert into t1 values(2); +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +SET GLOBAL debug = '+d,crash_commit_after_prepare'; +insert into t1 values(3); +include/rpl_reconnect.inc +SET GLOBAL debug = ``; +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +use test; +select * from t1; +a +1 +2 +change master to master_auto_position = 1; +include/start_slave.inc +rename table t1 to test1; +use test; +select * from test1; +a +1 +2 +3 +use test; +select * from test1; +a +1 +2 +3 +drop table test1; +include/stop_slave.inc +change master to master_auto_position = 0; +include/start_slave.inc +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:4 +include/rpl_reset.inc +create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB; +insert into t1 values(1); +insert into t1 values(2); +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +SET GLOBAL debug = '+d,crash_commit_after'; +insert into t1 values(3); +include/rpl_reconnect.inc +SET GLOBAL debug = ``; +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:3 +use test; +select * from t1; +a +1 +2 +change master to master_auto_position = 1; +include/start_slave.inc +rename table t1 to test1; +use test; +select * from test1; +a +1 +2 +3 +use test; +select * from test1; +a +1 +2 +3 +drop table test1; +include/stop_slave.inc +change master to master_auto_position = 0; +include/start_slave.inc +use mysql; +select * from slave_gtid_info; +Id Database_name Last_gtid +1 test uuid:4 +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result new file mode 100644 index 00000000000..1f6acf32872 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result @@ -0,0 +1,43 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +Make changes in master +create table test1 (a int primary key, b int) engine=rocksdb; +insert into test1 values (1, 1); +Make sure slave is up-to-date and mysql.slave_gtid_info is good +select * from test1; +a b +1 1 +select id, database_name, last_gtid from mysql.slave_gtid_info; +id database_name last_gtid +1 test UUID:2 +Make changes in master +insert into test1 values (2, 2); +Make sure slave is up-to-date and mysql.slave_gtid_info is good +select @@slave_gtid_info; +@@slave_gtid_info +OPTIMIZED +select * from test1; +a b +1 1 +2 2 +select * from mysql.slave_gtid_info; +Id Database_name Last_gtid +1 test UUID:3 +Make changes in master +insert into test1 values (3, 3); +insert into test1 values (4, 4); +Make sure slave is up-to-date and mysql.slave_gtid_info is good +select * from test1; +a b +1 1 +2 2 +3 3 +4 4 +select id, database_name, last_gtid from mysql.slave_gtid_info; +id database_name last_gtid +1 test UUID:5 +DROP TABLE IF EXISTS test1; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/combinations b/storage/rocksdb/mysql-test/rocksdb_rpl/t/combinations deleted file mode 100644 index f09d338c357..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/combinations +++ /dev/null @@ -1,2 +0,0 @@ -[row] -binlog-format=row diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def index 3896a822872..2147e3e086d 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def @@ -11,6 +11,8 @@ rpl_gtid_rocksdb_sys_header : MariaDB doesn't support printing "RocksDB: Last My singledelete_idempotent_recovery: MariaDB doesn't support --slave-use-idempotent-for-recovery rpl_mts_dependency_unique_key_conflicts: MariaDB doesn't support --slave-use-idempotent-for-recovery rpl_missing_columns_sk_update : Uses log_column_names=ON feature which is only present in FB/MySQL +optimize_myrocks_replace_into: requires @@enable_blind_replace support. +rpl_gtid_crash_safe_optimized: requires slave_gtid_info=optimized ## ## Tests that do not fit MariaDB's test environment (Functional tests only, @@ -28,4 +30,5 @@ rpl_gtid_crash_safe : Didn't try with MariaDB, yet rpl_gtid_crash_safe_wal_corrupt : Didn't try with MariaDB, yet rpl_rocksdb_snapshot : Didn't try with MariaDB, yet rpl_rocksdb_snapshot_without_gtid : Didn't try with MariaDB, yet - +rpl_rocksdb_slave_gtid_info_optimized: requires slave-gtid-info=optimized which is an FB/MySQL-only feature +rocksdb_slave_check_before_image_consistency: requires slave_check_before_image_consistency feature diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test new file mode 100644 index 00000000000..82b231d489a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test @@ -0,0 +1,149 @@ +--source include/have_rocksdb.inc +--source include/master-slave.inc +--source include/have_debug.inc + +connection master; +SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level; +SET GLOBAL rocksdb_perf_context_level=3; +SET GLOBAL enable_blind_replace=ON; + +# Create and insert some rows in a table +create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t1 values(1,1),(2,2),(3,3); +select * from t1; + +# Create table which has a trigger only in slave +create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t2 values(1,1),(2,2),(3,3); +select * from t2; + +# Create table which has a secondary key only in slave +create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb; +insert into t3 values(1,1),(2,2),(3,3); +select * from t3; + +sync_slave_with_master; + +# Enable blind replace in both slave and master +connection slave; +SET GLOBAL enable_blind_replace=ON; +create trigger trg before insert on t2 for each row set @a:=1; +alter table t3 add constraint slave_unique_key unique (c2); + +connection master; + +sync_slave_with_master; +--echo connect slave +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +# Case 1 - 'replace into' on a table with no triggers or secondary keys. Blind replace optimization should kick in both in master and slave +--echo Case 1 +connection master; +--echo connect master +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +replace into t1 values(1,11); +replace into t1 values(2,22); +replace into t1 values(3,33); +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +select * from t1; + +sync_slave_with_master; +--echo connect slave +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t1; + +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +# Case 2 - Multiple replaces in a single statement. blind replace optimization should kick in +connection master; +--echo Case 2 +--echo connect master +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t1 values(2,44),(3,55); +select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t1; + +sync_slave_with_master; +--echo connect slave +select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t1; + +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +# Case 3 - A regular update. This is not a blind replace +--echo Case 3 +connection master; +--echo connect master +update t1 set c2=66 where c1=3; +select * from t1; + +sync_slave_with_master; +--echo connect slave +select * from t1; + +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +# Case 4 - Slave has trigger on its table. No triggers on the table in master. +# Blind replace optimization should kick in on master. +# Slave should convert this statement into a regular update +--echo Case 4 +connection master; +--echo connect master +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t2 values(1,111); +replace into t2 values(2,222); +replace into t2 values(3,333); +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t2; + +sync_slave_with_master; +--echo connect slave +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t2; + +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +# Case 5 - Slave has secondary keys on the table. No secondary keys on the table in master +# Blind replace optimization should kick in on master. +# Slave should convert this statement into a regular delete_insert +--echo Case 5 +connection master; +--echo connect master +select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +replace into t3 values(1,1111); +replace into t3 values(2,2222); +replace into t3 values(3,3333); +select * from t3; + +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; + +sync_slave_with_master; +--echo connect slave +select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls'; +select * from t3; +select * from t3 use index (slave_unique_key); + +# Case 6 - Just to verify all binlog events. +# blind replace will generate a write_rows event. +# Or else, it will be a update_rows event or a delete_rows_write_rows event +--echo Case 6 +connection master; +--source include/show_binlog_events.inc + +connection slave; +--source include/show_binlog_events.inc + +# Cleanup +connection master; +drop table t1; +drop table t2; +drop table t3; +SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level; +SET GLOBAL enable_blind_replace=DEFAULT; + +connection slave; +SET GLOBAL enable_blind_replace=DEFAULT; + +--source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt new file mode 100644 index 00000000000..78b517e93ab --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt @@ -0,0 +1 @@ +--slave_check_before_image_consistency=ON diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test new file mode 100644 index 00000000000..d7db127a207 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test @@ -0,0 +1,22 @@ +source include/master-slave.inc; +source include/have_binlog_format_row.inc; + +call mtr.add_suppression("Error_code: 1032"); + +let $engine= rocksdb; + +source extra/rpl_tests/rpl_slave_check_before_image_consistency.inc; + +# check detection with HASH_SCAN enabled +connection slave; +source include/stop_slave.inc; +set @@global.slave_rows_search_algorithms = 'INDEX_SCAN,TABLE_SCAN,HASH_SCAN'; +source include/start_slave.inc; +source extra/rpl_tests/rpl_slave_check_before_image_consistency.inc; + +# cleanup +source include/stop_slave.inc; +set @@global.slave_rows_search_algorithms = DEFAULT; +source include/start_slave.inc; + +source include/rpl_end.inc; diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test index f1b1b16704f..5a3e665a025 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test @@ -4,39 +4,8 @@ -- source include/have_debug.inc -- source include/not_valgrind.inc --- let $engine = ROCKSDB +if (`select count(*) = 1 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) { + --skip Test does not support row_write_committed_slave_gtid_optimized policy due to subtle behavioral differences. rpl_gtid_crash_safe_optimized covers slave_gtid_info=optimized. +} -call mtr.add_suppression("Recovery from master pos"); - --- let $debug_option = crash_before_update_pos --- source extra/rpl_tests/rpl_gtid_crash_safe.inc - --- source include/rpl_reset.inc --- let $debug_option = crash_after_update_pos_before_apply --- source extra/rpl_tests/rpl_gtid_crash_safe.inc - --- source include/rpl_reset.inc --- let $debug_option = crash_before_writing_xid --- source extra/rpl_tests/rpl_gtid_crash_safe.inc - --- source include/rpl_reset.inc --- let $debug_option = half_binlogged_transaction --- source extra/rpl_tests/rpl_gtid_crash_safe.inc - --- source include/rpl_reset.inc --- let $debug_option = crash_commit_before --- source extra/rpl_tests/rpl_gtid_crash_safe.inc - --- source include/rpl_reset.inc --- let $debug_option = crash_commit_after_log --- source extra/rpl_tests/rpl_gtid_crash_safe.inc - --- source include/rpl_reset.inc --- let $debug_option = crash_commit_after_prepare --- source extra/rpl_tests/rpl_gtid_crash_safe.inc - --- source include/rpl_reset.inc --- let $debug_option = crash_commit_after --- source extra/rpl_tests/rpl_gtid_crash_safe.inc - --- source include/rpl_end.inc +-- source ../include/rpl_gtid_crash_safe.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt new file mode 100644 index 00000000000..397310d37b4 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt @@ -0,0 +1 @@ +--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --rocksdb_enable_2pc=OFF diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt new file mode 100644 index 00000000000..e41dcc5eecd --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt @@ -0,0 +1,2 @@ +--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --rocksdb_enable_2pc=OFF +--sync_binlog=1000 --relay_log_recovery=1 --slave_gtid_info=optimized diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test new file mode 100644 index 00000000000..c262403286c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test @@ -0,0 +1,11 @@ +-- source include/have_rocksdb.inc +-- source include/have_gtid.inc +-- source include/master-slave.inc +-- source include/have_debug.inc +-- source include/not_valgrind.inc + +if (`select count(*) = 0 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) { + --skip Test requires row_write_committed_slave_gtid_optimized policy where slave_gtid_info=optimized +} + +-- source ../include/rpl_gtid_crash_safe.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt new file mode 100644 index 00000000000..c747adc94d5 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt @@ -0,0 +1 @@ +--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt new file mode 100644 index 00000000000..6cde3c553d4 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt @@ -0,0 +1 @@ +--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates --slave-gtid-info=optimized diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test new file mode 100644 index 00000000000..c8a0c8daf10 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test @@ -0,0 +1,51 @@ +--source include/have_rocksdb.inc +--source include/master-slave.inc +--source include/have_binlog_format_row.inc + +if (`select count(*) = 0 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) { + --skip Test requires row_write_committed_slave_gtid_optimized policy where slave_gtid_info=optimized +} + +--echo Make changes in master +create table test1 (a int primary key, b int) engine=rocksdb; +insert into test1 values (1, 1); + +--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good +sync_slave_with_master slave; +connection slave; +select * from test1; +-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/ +select id, database_name, last_gtid from mysql.slave_gtid_info; + +--echo Make changes in master +connection master; +insert into test1 values (2, 2); + +--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good +sync_slave_with_master slave; +connection slave; +select @@slave_gtid_info; +select * from test1; + +-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/ +select * from mysql.slave_gtid_info; + +--echo Make changes in master +connection master; +insert into test1 values (3, 3); +insert into test1 values (4, 4); + +--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good +sync_slave_with_master slave; +connection slave; +select * from test1; + +-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/ +select id, database_name, last_gtid from mysql.slave_gtid_info; + +connection master; +DROP TABLE IF EXISTS test1; + +sync_slave_with_master slave; + +--source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result new file mode 100644 index 00000000000..90fc99ef21a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result @@ -0,0 +1,19 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); +INSERT INTO valid_values VALUES('true'); +INSERT INTO valid_values VALUES('false'); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +SET @start_global_value = @@global.ROCKSDB_CACHE_DUMP; +SELECT @start_global_value; +@start_global_value +1 +"Trying to set variable @@global.ROCKSDB_CACHE_DUMP to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_CACHE_DUMP = 444; +ERROR HY000: Variable 'rocksdb_cache_dump' is a read only variable +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result new file mode 100644 index 00000000000..9d098385789 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result @@ -0,0 +1,22 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1.0); +INSERT INTO valid_values VALUES(0.0); +INSERT INTO valid_values VALUES(0.5); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES(2.0); +INSERT INTO invalid_values VALUES(-0.5); +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); +INSERT INTO invalid_values VALUES('\'0.5\''); +SET @start_global_value = @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO; +SELECT @start_global_value; +@start_global_value +0 +"Trying to set variable @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO = 444; +ERROR HY000: Variable 'rocksdb_cache_high_pri_pool_ratio' is a read only variable +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result new file mode 100644 index 00000000000..819425c8bce --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result @@ -0,0 +1,19 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); +INSERT INTO valid_values VALUES('true'); +INSERT INTO valid_values VALUES('false'); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +SET @start_global_value = @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY; +SELECT @start_global_value; +@start_global_value +1 +"Trying to set variable @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY = 444; +ERROR HY000: Variable 'rocksdb_cache_index_and_filter_with_high_priority' is a read only variable +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result index 1d409bbedb4..8d3f4a6e5af 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result @@ -9,11 +9,11 @@ INSERT INTO invalid_values VALUES('\'bbb\''); SET @start_global_value = @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SELECT @start_global_value; @start_global_value -0 +1 SET @start_session_value = @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SELECT @start_session_value; @start_session_value -0 +1 '# Setting to valid values in global scope#' "Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1" SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1; @@ -24,7 +24,7 @@ SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 "Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0" SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0; SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@ -34,7 +34,7 @@ SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 "Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on" SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on; SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@ -44,7 +44,7 @@ SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 "Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off" SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off; SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@ -54,7 +54,7 @@ SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 '# Setting to valid values in session scope#' "Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1" SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1; @@ -65,7 +65,7 @@ SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 "Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0" SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0; SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@ -75,7 +75,7 @@ SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 "Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on" SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on; SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@ -85,7 +85,7 @@ SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 "Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off" SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off; SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@ -95,27 +95,27 @@ SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 '# Testing with invalid values in global scope #' "Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'aaa'" SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'aaa'; Got one of the listed errors SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 "Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'bbb'" SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'bbb'; Got one of the listed errors SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_global_value; SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_session_value; SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY -0 +1 DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result new file mode 100644 index 00000000000..b840baf29f8 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result @@ -0,0 +1,6 @@ +SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf'; +ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of 'nonexistent_cf' +SET @@global.ROCKSDB_DELETE_CF = '__system__'; +ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of '__system__' +SET @@global.ROCKSDB_DELETE_CF = 'default'; +ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of 'default' diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result new file mode 100644 index 00000000000..ede1690f776 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result @@ -0,0 +1,75 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +SET @start_global_value = @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +SELECT @start_global_value; +@start_global_value +1 +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 1" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 1; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 0" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 0; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to on" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = on; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to off" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = off; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +"Trying to set variable @@session.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 444. It should fail because it is not session." +SET @@session.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 444; +ERROR HY000: Variable 'rocksdb_enable_insert_with_update_caching' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 'aaa'" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 'bbb'" +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = @start_global_value; +SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING; +@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +1 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result new file mode 100644 index 00000000000..788379927cf --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result @@ -0,0 +1,58 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES('PK_SK'); +INSERT INTO valid_values VALUES('OFF'); +INSERT INTO valid_values VALUES('PK_ONLY'); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('a'); +SET @start_global_value = @@global.ROCKSDB_READ_FREE_RPL; +SELECT @start_global_value; +@start_global_value +OFF +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to PK_SK" +SET @@global.ROCKSDB_READ_FREE_RPL = PK_SK; +SELECT @@global.ROCKSDB_READ_FREE_RPL; +@@global.ROCKSDB_READ_FREE_RPL +PK_SK +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT; +SELECT @@global.ROCKSDB_READ_FREE_RPL; +@@global.ROCKSDB_READ_FREE_RPL +OFF +"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to OFF" +SET @@global.ROCKSDB_READ_FREE_RPL = OFF; +SELECT @@global.ROCKSDB_READ_FREE_RPL; +@@global.ROCKSDB_READ_FREE_RPL +OFF +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT; +SELECT @@global.ROCKSDB_READ_FREE_RPL; +@@global.ROCKSDB_READ_FREE_RPL +OFF +"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to PK_ONLY" +SET @@global.ROCKSDB_READ_FREE_RPL = PK_ONLY; +SELECT @@global.ROCKSDB_READ_FREE_RPL; +@@global.ROCKSDB_READ_FREE_RPL +PK_ONLY +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT; +SELECT @@global.ROCKSDB_READ_FREE_RPL; +@@global.ROCKSDB_READ_FREE_RPL +OFF +"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL to 444. It should fail because it is not session." +SET @@session.ROCKSDB_READ_FREE_RPL = 444; +ERROR HY000: Variable 'rocksdb_read_free_rpl' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to a" +SET @@global.ROCKSDB_READ_FREE_RPL = a; +Got one of the listed errors +SELECT @@global.ROCKSDB_READ_FREE_RPL; +@@global.ROCKSDB_READ_FREE_RPL +OFF +SET @@global.ROCKSDB_READ_FREE_RPL = @start_global_value; +SELECT @@global.ROCKSDB_READ_FREE_RPL; +@@global.ROCKSDB_READ_FREE_RPL +OFF +SET GLOBAL ROCKSDB_READ_FREE_RPL=DEFAULT; +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result index b218fe034aa..2643eb08617 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result @@ -1,15 +1,13 @@ +call mtr.add_suppression(".*Invalid pattern in rocksdb_read_free_rpl_tables.*"); CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; INSERT INTO valid_values VALUES('a'); INSERT INTO valid_values VALUES('b'); CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'*\''); SET @start_global_value = @@global.ROCKSDB_READ_FREE_RPL_TABLES; SELECT @start_global_value; @start_global_value - -SET @start_session_value = @@session.ROCKSDB_READ_FREE_RPL_TABLES; -SELECT @start_session_value; -@start_session_value - +.* '# Setting to valid values in global scope#' "Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to a" SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = a; @@ -20,7 +18,7 @@ a SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT; SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES; @@global.ROCKSDB_READ_FREE_RPL_TABLES - +.* "Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to b" SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = b; SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES; @@ -30,36 +28,22 @@ b SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT; SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES; @@global.ROCKSDB_READ_FREE_RPL_TABLES - -'# Setting to valid values in session scope#' -"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL_TABLES to a" -SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = a; -SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES; -@@session.ROCKSDB_READ_FREE_RPL_TABLES -a -"Setting the session scope variable back to default" -SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT; -SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES; -@@session.ROCKSDB_READ_FREE_RPL_TABLES - -"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL_TABLES to b" -SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = b; -SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES; -@@session.ROCKSDB_READ_FREE_RPL_TABLES -b -"Setting the session scope variable back to default" -SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT; -SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES; -@@session.ROCKSDB_READ_FREE_RPL_TABLES - +.* +"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL_TABLES to 444. It should fail because it is not session." +SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = 444; +ERROR HY000: Variable 'rocksdb_read_free_rpl_tables' is a GLOBAL variable and should be set with SET GLOBAL '# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to '*'" +SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = '*'; +Got one of the listed errors +SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES; +@@global.ROCKSDB_READ_FREE_RPL_TABLES +.* SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = @start_global_value; SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES; @@global.ROCKSDB_READ_FREE_RPL_TABLES - -SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = @start_session_value; -SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES; -@@session.ROCKSDB_READ_FREE_RPL_TABLES - +.* +SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=NULL; +SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=DEFAULT; DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result new file mode 100644 index 00000000000..e51df4f6834 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result @@ -0,0 +1,97 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); +INSERT INTO valid_values VALUES('true'); +INSERT INTO valid_values VALUES('false'); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +SET @start_global_value = @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +SELECT @start_global_value; +@start_global_value +0 +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 1" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 1; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 0" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 0; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to on" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = on; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to off" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = off; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to true" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = true; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to false" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = false; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Trying to set variable @@session.ROCKSDB_ROLLBACK_ON_TIMEOUT to 444. It should fail because it is not session." +SET @@session.ROCKSDB_ROLLBACK_ON_TIMEOUT = 444; +ERROR HY000: Variable 'rocksdb_rollback_on_timeout' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 'aaa'" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 'bbb'" +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = @start_global_value; +SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT; +@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT +0 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result index 3e169671cc0..a3749b75e47 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result @@ -61,5 +61,7 @@ SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = @start_session_value; SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES; @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES .* +SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=NULL; +SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=DEFAULT; DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result new file mode 100644 index 00000000000..d8d218fe3e8 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result @@ -0,0 +1,85 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES(4); +INSERT INTO valid_values VALUES(2); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); +SET @start_global_value = @@global.ROCKSDB_STATS_LEVEL; +SELECT @start_global_value; +@start_global_value +0 +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 0" +SET @@global.ROCKSDB_STATS_LEVEL = 0; +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT; +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 4" +SET @@global.ROCKSDB_STATS_LEVEL = 4; +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +4 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT; +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 2" +SET @@global.ROCKSDB_STATS_LEVEL = 2; +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +2 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT; +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +"Trying to set variable @@session.ROCKSDB_STATS_LEVEL to 444. It should fail because it is not session." +SET @@session.ROCKSDB_STATS_LEVEL = 444; +ERROR HY000: Variable 'rocksdb_stats_level' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 'aaa'" +SET @@global.ROCKSDB_STATS_LEVEL = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 'bbb'" +SET @@global.ROCKSDB_STATS_LEVEL = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '-1'" +SET @@global.ROCKSDB_STATS_LEVEL = '-1'; +Got one of the listed errors +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '101'" +SET @@global.ROCKSDB_STATS_LEVEL = '101'; +Got one of the listed errors +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '484436'" +SET @@global.ROCKSDB_STATS_LEVEL = '484436'; +Got one of the listed errors +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +SET @@global.ROCKSDB_STATS_LEVEL = @start_global_value; +SELECT @@global.ROCKSDB_STATS_LEVEL; +@@global.ROCKSDB_STATS_LEVEL +0 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result index ba24fafd0ec..dcc7e1f68db 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result @@ -114,6 +114,10 @@ ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of ' SELECT @@global.rocksdb_update_cf_options; @@global.rocksdb_update_cf_options cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8}; +SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=67108864;target_file_size_base=67108864};'; +SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=67108864;target_file_size_base=67108864};'; +SET @@global.rocksdb_update_cf_options = 'cf2={write_buffer_size=67108864;target_file_size_base=67108864;max_bytes_for_level_multiplier=10.000000};'; +SET @@global.rocksdb_update_cf_options = 'cf3={write_buffer_size=67108864;target_file_size_base=67108864};'; SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; @@global.rocksdb_update_cf_options diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def new file mode 100644 index 00000000000..efa82ff6184 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def @@ -0,0 +1,5 @@ +# Disabled tests + +rocksdb_read_free_rpl_basic : MariaDB: Read-Free replication is not supported +rocksdb_read_free_rpl_tables_basic : MariaDB: Read-Free replication is not supported + diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test new file mode 100644 index 00000000000..70557621828 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test @@ -0,0 +1,21 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); +INSERT INTO valid_values VALUES('true'); +INSERT INTO valid_values VALUES('false'); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); + +--let $sys_var=ROCKSDB_CACHE_DUMP +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test new file mode 100644 index 00000000000..d97c4e20f82 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test @@ -0,0 +1,24 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1.0); +INSERT INTO valid_values VALUES(0.0); +INSERT INTO valid_values VALUES(0.5); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES(2.0); +INSERT INTO invalid_values VALUES(-0.5); +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); +INSERT INTO invalid_values VALUES('\'0.5\''); + +--let $sys_var=ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test new file mode 100644 index 00000000000..5ed7927f233 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test @@ -0,0 +1,21 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); +INSERT INTO valid_values VALUES('true'); +INSERT INTO valid_values VALUES('false'); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); + +--let $sys_var=ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt new file mode 100644 index 00000000000..ae43ab332ee --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt @@ -0,0 +1 @@ +--loose-rocksdb-dummy-option-instead-of-force-restart diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test new file mode 100644 index 00000000000..0875e492b2c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test @@ -0,0 +1,75 @@ +--disable_query_log +call mtr.add_suppression("Failed to drop column family"); +call mtr.add_suppression("Column family '[a-z_]+' not found"); +--enable_query_log + +--source include/have_rocksdb.inc + +# should fail for not existing CF +--error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf' + +# should fail for default system cf +--error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.ROCKSDB_DELETE_CF = '__system__' + +# should fail for default cf +--error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.ROCKSDB_DELETE_CF = 'default' + +--disable_parsing +# should succeed for not existing CF +--eval SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf' + +# should fail for default system cf +--error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.ROCKSDB_DELETE_CF = '__system__' + +alter table mysql.slave_worker_info engine = MyISAM; +alter table mysql.slave_relay_log_info engine = MyISAM; +alter table mysql.slave_gtid_info engine = MyISAM; +alter table mysql.slave_master_info engine = MyISAM; + +select count(*) from information_schema.rocksdb_ddl where cf = 'default'; + +# should fail for default cf +--error ER_GET_ERRMSG +--eval SET @@global.ROCKSDB_DELETE_CF = 'default' + +CREATE TABLE cf_deletion_test_table1 ( + id1 int(10) unsigned NOT NULL DEFAULT '0', + id2 int(10) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (id1) COMMENT 'cf_primary_key', + KEY `sec_key` (id2) COMMENT 'cf_secondary_key' +) ENGINE=ROCKSDB; + +# should fail, CFs are still in use +--error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key' +--error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_secondary_key' + +CREATE TABLE cf_deletion_test_table2 ( + id1 int(10) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (id1) COMMENT 'cf_primary_key' +) ENGINE=ROCKSDB; + +DROP TABLE cf_deletion_test_table1; + +# should fail, still used by second table +--error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key' + +# should succeed, no one is using it anymore +--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_secondary_key' + +DROP TABLE cf_deletion_test_table2; + +# should succeed now +--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key' + +alter table mysql.slave_worker_info engine = ROCKSDB; +alter table mysql.slave_relay_log_info engine = ROCKSDB; +alter table mysql.slave_gtid_info engine = ROCKSDB; +alter table mysql.slave_master_info engine = ROCKSDB; +--enable_parsing diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test new file mode 100644 index 00000000000..cd643cfef23 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test @@ -0,0 +1,21 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); + +--let $sys_var=ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING +--let $read_only=0 +--let $session=0 +--let $sticky=1 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; + diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test new file mode 100644 index 00000000000..f37f75b4ac5 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test @@ -0,0 +1,19 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES('PK_SK'); +INSERT INTO valid_values VALUES('OFF'); +INSERT INTO valid_values VALUES('PK_ONLY'); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('a'); + +--let $sys_var=ROCKSDB_READ_FREE_RPL +--let $read_only=0 +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +SET GLOBAL ROCKSDB_READ_FREE_RPL=DEFAULT; + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test index 71f42a47f4b..a2c900c91a9 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test @@ -1,15 +1,20 @@ --source include/have_rocksdb.inc +call mtr.add_suppression(".*Invalid pattern in rocksdb_read_free_rpl_tables.*"); + CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; INSERT INTO valid_values VALUES('a'); INSERT INTO valid_values VALUES('b'); CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'*\''); --let $sys_var=ROCKSDB_READ_FREE_RPL_TABLES --let $read_only=0 ---let $session=1 --source include/rocksdb_sys_var.inc +SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=NULL; +SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=DEFAULT; + DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test new file mode 100644 index 00000000000..793b7752198 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test @@ -0,0 +1,21 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); +INSERT INTO valid_values VALUES('true'); +INSERT INTO valid_values VALUES('false'); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); + +--let $sys_var=ROCKSDB_ROLLBACK_ON_TIMEOUT +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; + diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test index 3fe265ae930..50c2354d883 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test @@ -11,5 +11,8 @@ CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; --let $session=1 --source include/rocksdb_sys_var.inc +SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=NULL; +SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=DEFAULT; + DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test new file mode 100644 index 00000000000..89b0878fd0c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test @@ -0,0 +1,21 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES(4); +INSERT INTO valid_values VALUES(2); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); + +--let $sys_var=ROCKSDB_STATS_LEVEL +--let $read_only=0 +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test index 533b2db8204..9462e40aaf0 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test @@ -55,6 +55,11 @@ SELECT @@global.rocksdb_update_cf_options; SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE'; SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE'; +# Save these off to reset later +--let $ORIG_WRITE_BUFFER_SIZE=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE'` +--let $ORIG_TARGET_FILE_SIZE_BASE=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE'` +--let $ORIG_MAX_BYTES_FOR_LEVEL_MULTIPLIER=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER'` + # All good. Use default CF. SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=8m;target_file_size_base=2m};'; SELECT @@global.rocksdb_update_cf_options; @@ -100,6 +105,12 @@ SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FIL --eval SET @@global.rocksdb_update_cf_options = 'default={foo=bar};'; SELECT @@global.rocksdb_update_cf_options; +# Reset the cf options so the test passes with --repeat=2 +--eval SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};' +--eval SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};' +--eval SET @@global.rocksdb_update_cf_options = 'cf2={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE;max_bytes_for_level_multiplier=$ORIG_MAX_BYTES_FOR_LEVEL_MULTIPLIER};' +--eval SET @@global.rocksdb_update_cf_options = 'cf3={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};' + SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; diff --git a/storage/rocksdb/nosql_access.cc b/storage/rocksdb/nosql_access.cc new file mode 100644 index 00000000000..9dcbe3fbcbc --- /dev/null +++ b/storage/rocksdb/nosql_access.cc @@ -0,0 +1,53 @@ +/* + Copyright (c) 2019, Facebook, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define MYSQL_SERVER 1 + +#include <my_global.h> +/* This C++ file's header */ +#include "./nosql_access.h" + +/* C++ standard header files */ +#include <algorithm> +#include <array> +#include <limits> +#include <string> +#include <utility> +#include <vector> + +/* C standard header files */ +#include <ctype.h> + +/* MySQL header files */ +#include "../../sql/item.h" +#include "../../sql/sql_base.h" +#include "../../sql/sql_class.h" +#include "../../sql/strfunc.h" + +/* MyRocks header files */ +#include "./ha_rocksdb.h" +#include "./ha_rocksdb_proto.h" +#include "./rdb_buff.h" +#include "./rdb_datadic.h" + +namespace myrocks { + +bool rocksdb_handle_single_table_select(THD * /* unused */, + st_select_lex * /* unused */) { + return false; +} + +} // namespace myrocks diff --git a/storage/rocksdb/nosql_access.h b/storage/rocksdb/nosql_access.h new file mode 100644 index 00000000000..70aaa400668 --- /dev/null +++ b/storage/rocksdb/nosql_access.h @@ -0,0 +1,36 @@ +/* + Copyright (c) 2019, Facebook, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* C++ standard header files */ +#include <array> +#include <string> +#include <vector> + +/* C standard header files */ +#include <ctype.h> + +/* MySQL header files */ +#include "../../sql/protocol.h" +#include "./sql_string.h" + +#pragma once + +namespace myrocks { + +// Not needed in MyRocks: +//bool rocksdb_handle_single_table_select(THD *thd, st_select_lex *select_lex); + +} // namespace myrocks diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc index 7bae0317bd0..da6afa71912 100644 --- a/storage/rocksdb/properties_collector.cc +++ b/storage/rocksdb/properties_collector.cc @@ -51,12 +51,19 @@ my_bool rocksdb_compaction_sequential_deletes_count_sd = false; Rdb_tbl_prop_coll::Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager, const Rdb_compact_params ¶ms, - const uint32_t &cf_id, - const uint8_t &table_stats_sampling_pct) - : m_cf_id(cf_id), m_ddl_manager(ddl_manager), m_last_stats(nullptr), - m_rows(0l), m_window_pos(0l), m_deleted_rows(0l), m_max_deleted_rows(0l), - m_file_size(0), m_params(params), - m_cardinality_collector(table_stats_sampling_pct), m_recorded(false) { + const uint32_t cf_id, + const uint8_t table_stats_sampling_pct) + : m_cf_id(cf_id), + m_ddl_manager(ddl_manager), + m_last_stats(nullptr), + m_rows(0l), + m_window_pos(0l), + m_deleted_rows(0l), + m_max_deleted_rows(0l), + m_file_size(0), + m_params(params), + m_cardinality_collector(table_stats_sampling_pct), + m_recorded(false) { DBUG_ASSERT(ddl_manager != nullptr); m_deleted_rows_window.resize(m_params.m_window, false); @@ -150,35 +157,36 @@ Rdb_index_stats *Rdb_tbl_prop_coll::AccessStats(const rocksdb::Slice &key) { void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key, const rocksdb::Slice &value, const rocksdb::EntryType &type, - const uint64_t &file_size) { + const uint64_t file_size) { auto stats = AccessStats(key); stats->m_data_size += key.size() + value.size(); // Incrementing per-index entry-type statistics switch (type) { - case rocksdb::kEntryPut: - stats->m_rows++; - break; - case rocksdb::kEntryDelete: - stats->m_entry_deletes++; - break; - case rocksdb::kEntrySingleDelete: - stats->m_entry_single_deletes++; - break; - case rocksdb::kEntryMerge: - stats->m_entry_merges++; - break; - case rocksdb::kEntryOther: - stats->m_entry_others++; - break; - default: - // NO_LINT_DEBUG - sql_print_error("RocksDB: Unexpected entry type found: %u. " - "This should not happen so aborting the system.", - type); - abort(); - break; + case rocksdb::kEntryPut: + stats->m_rows++; + break; + case rocksdb::kEntryDelete: + stats->m_entry_deletes++; + break; + case rocksdb::kEntrySingleDelete: + stats->m_entry_single_deletes++; + break; + case rocksdb::kEntryMerge: + stats->m_entry_merges++; + break; + case rocksdb::kEntryOther: + stats->m_entry_others++; + break; + default: + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Unexpected entry type found: %u. " + "This should not happen so aborting the system.", + type); + abort(); + break; } stats->m_actual_disk_size += file_size - m_file_size; @@ -194,8 +202,8 @@ const char *Rdb_tbl_prop_coll::INDEXSTATS_KEY = "__indexstats__"; /* This function is called by RocksDB to compute properties to store in sst file */ -rocksdb::Status -Rdb_tbl_prop_coll::Finish(rocksdb::UserCollectedProperties *const properties) { +rocksdb::Status Rdb_tbl_prop_coll::Finish( + rocksdb::UserCollectedProperties *const properties) { uint64_t num_sst_entry_put = 0; uint64_t num_sst_entry_delete = 0; uint64_t num_sst_entry_singledelete = 0; @@ -251,8 +259,8 @@ bool Rdb_tbl_prop_coll::NeedCompact() const { /* Returns the same as above, but in human-readable way for logging */ -rocksdb::UserCollectedProperties -Rdb_tbl_prop_coll::GetReadableProperties() const { +rocksdb::UserCollectedProperties Rdb_tbl_prop_coll::GetReadableProperties() + const { std::string s; #ifdef DBUG_OFF s.append("[..."); @@ -323,8 +331,8 @@ void Rdb_tbl_prop_coll::read_stats_from_tbl_props( /* Serializes an array of Rdb_index_stats into a network string. */ -std::string -Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats) { +std::string Rdb_index_stats::materialize( + const std::vector<Rdb_index_stats> &stats) { String ret; rdb_netstr_append_uint16(&ret, INDEX_STATS_VERSION_ENTRY_TYPES); for (const auto &i : stats) { @@ -370,9 +378,10 @@ int Rdb_index_stats::unmaterialize(const std::string &s, if (version < INDEX_STATS_VERSION_INITIAL || version > INDEX_STATS_VERSION_ENTRY_TYPES) { // NO_LINT_DEBUG - sql_print_error("Index stats version %d was outside of supported range. " - "This should not happen so aborting the system.", - version); + sql_print_error( + "Index stats version %d was outside of supported range. " + "This should not happen so aborting the system.", + version); abort(); } @@ -401,8 +410,7 @@ int Rdb_index_stats::unmaterialize(const std::string &s, stats.m_entry_merges = rdb_netbuf_read_uint64(&p); stats.m_entry_others = rdb_netbuf_read_uint64(&p); } - if (p + - stats.m_distinct_keys_per_prefix.size() * + if (p + stats.m_distinct_keys_per_prefix.size() * sizeof(stats.m_distinct_keys_per_prefix[0]) > p2) { return HA_EXIT_FAILURE; @@ -419,8 +427,8 @@ int Rdb_index_stats::unmaterialize(const std::string &s, Merges one Rdb_index_stats into another. Can be used to come up with the stats for the index based on stats for each sst */ -void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment, - const int64_t &estimated_data_len) { +void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool increment, + const int64_t estimated_data_len) { std::size_t i; DBUG_ASSERT(estimated_data_len >= 0); @@ -464,7 +472,7 @@ void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment, } } -Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct) +Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t table_stats_sampling_pct) : m_table_stats_sampling_pct(table_stats_sampling_pct), m_seed(time(nullptr)) {} @@ -535,4 +543,4 @@ void Rdb_tbl_card_coll::AdjustStats(Rdb_index_stats *stats) { } } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h index 36d980d8f53..ce2773cd618 100644 --- a/storage/rocksdb/properties_collector.h +++ b/storage/rocksdb/properties_collector.h @@ -54,7 +54,7 @@ struct Rdb_index_stats { int64_t m_entry_deletes, m_entry_single_deletes; int64_t m_entry_merges, m_entry_others; std::vector<int64_t> m_distinct_keys_per_prefix; - std::string m_name; // name is not persisted + std::string m_name; // name is not persisted static std::string materialize(const std::vector<Rdb_index_stats> &stats); static int unmaterialize(const std::string &s, @@ -62,18 +62,23 @@ struct Rdb_index_stats { Rdb_index_stats() : Rdb_index_stats({0, 0}) {} explicit Rdb_index_stats(GL_INDEX_ID gl_index_id) - : m_gl_index_id(gl_index_id), m_data_size(0), m_rows(0), - m_actual_disk_size(0), m_entry_deletes(0), m_entry_single_deletes(0), - m_entry_merges(0), m_entry_others(0) {} - - void merge(const Rdb_index_stats &s, const bool &increment = true, - const int64_t &estimated_data_len = 0); + : m_gl_index_id(gl_index_id), + m_data_size(0), + m_rows(0), + m_actual_disk_size(0), + m_entry_deletes(0), + m_entry_single_deletes(0), + m_entry_merges(0), + m_entry_others(0) {} + + void merge(const Rdb_index_stats &s, const bool increment = true, + const int64_t estimated_data_len = 0); }; // The helper class to calculate index cardinality class Rdb_tbl_card_coll { public: - explicit Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct); + explicit Rdb_tbl_card_coll(const uint8_t table_stats_sampling_pct); public: void ProcessKey(const rocksdb::Slice &key, const Rdb_key_def *keydef, @@ -105,10 +110,10 @@ class Rdb_tbl_card_coll { }; class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector { -public: + public: Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager, - const Rdb_compact_params ¶ms, const uint32_t &cf_id, - const uint8_t &table_stats_sampling_pct); + const Rdb_compact_params ¶ms, const uint32_t cf_id, + const uint8_t table_stats_sampling_pct); /* Override parent class's virtual methods of interest. @@ -120,8 +125,8 @@ public: rocksdb::SequenceNumber seq, uint64_t file_size) override; - virtual rocksdb::Status - Finish(rocksdb::UserCollectedProperties *properties) override; + virtual rocksdb::Status Finish( + rocksdb::UserCollectedProperties *properties) override; virtual const char *Name() const override { return "Rdb_tbl_prop_coll"; } @@ -129,25 +134,25 @@ public: bool NeedCompact() const override; -public: + public: uint64_t GetMaxDeletedRows() const { return m_max_deleted_rows; } static void read_stats_from_tbl_props( const std::shared_ptr<const rocksdb::TableProperties> &table_props, std::vector<Rdb_index_stats> *out_stats_vector); -private: + private: static std::string GetReadableStats(const Rdb_index_stats &it); bool ShouldCollectStats(); void CollectStatsForRow(const rocksdb::Slice &key, const rocksdb::Slice &value, const rocksdb::EntryType &type, - const uint64_t &file_size); + const uint64_t file_size); Rdb_index_stats *AccessStats(const rocksdb::Slice &key); void AdjustDeletedRows(rocksdb::EntryType type); -private: + private: uint32_t m_cf_id; std::shared_ptr<const Rdb_key_def> m_keydef; Rdb_ddl_manager *m_ddl_manager; @@ -169,10 +174,10 @@ private: class Rdb_tbl_prop_coll_factory : public rocksdb::TablePropertiesCollectorFactory { -public: + public: Rdb_tbl_prop_coll_factory(const Rdb_tbl_prop_coll_factory &) = delete; - Rdb_tbl_prop_coll_factory & - operator=(const Rdb_tbl_prop_coll_factory &) = delete; + Rdb_tbl_prop_coll_factory &operator=(const Rdb_tbl_prop_coll_factory &) = + delete; explicit Rdb_tbl_prop_coll_factory(Rdb_ddl_manager *ddl_manager) : m_ddl_manager(ddl_manager) {} @@ -192,19 +197,19 @@ public: return "Rdb_tbl_prop_coll_factory"; } -public: + public: void SetCompactionParams(const Rdb_compact_params ¶ms) { m_params = params; } - void SetTableStatsSamplingPct(const uint8_t &table_stats_sampling_pct) { + void SetTableStatsSamplingPct(const uint8_t table_stats_sampling_pct) { m_table_stats_sampling_pct = table_stats_sampling_pct; } -private: + private: Rdb_ddl_manager *const m_ddl_manager; Rdb_compact_params m_params; uint8_t m_table_stats_sampling_pct; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_buff.h b/storage/rocksdb/rdb_buff.h index 532f74090b5..3005211104d 100644 --- a/storage/rocksdb/rdb_buff.h +++ b/storage/rocksdb/rdb_buff.h @@ -40,6 +40,17 @@ #define be16toh(x) OSSwapBigToHostInt16(x) #endif +/* MySQL header files */ +#include "./my_global.h" + +/* MyRocks header files */ +#include "./rdb_global.h" +#include "./rdb_utils.h" + +/* RocksDB header files */ +#include "rocksdb/slice.h" +#include "rocksdb/status.h" + namespace myrocks { /* @@ -48,7 +59,7 @@ namespace myrocks { */ inline void rdb_netstr_append_uint64(my_core::String *const out_netstr, - const uint64 &val) { + const uint64 val) { DBUG_ASSERT(out_netstr != nullptr); // Convert from host machine byte order (usually Little Endian) to network @@ -58,7 +69,7 @@ inline void rdb_netstr_append_uint64(my_core::String *const out_netstr, } inline void rdb_netstr_append_uint32(my_core::String *const out_netstr, - const uint32 &val) { + const uint32 val) { DBUG_ASSERT(out_netstr != nullptr); // Convert from host machine byte order (usually Little Endian) to network @@ -68,7 +79,7 @@ inline void rdb_netstr_append_uint32(my_core::String *const out_netstr, } inline void rdb_netstr_append_uint16(my_core::String *const out_netstr, - const uint16 &val) { + const uint16 val) { DBUG_ASSERT(out_netstr != nullptr); // Convert from host machine byte order (usually Little Endian) to network @@ -81,7 +92,7 @@ inline void rdb_netstr_append_uint16(my_core::String *const out_netstr, Basic network buffer ("netbuf") write helper functions. */ -inline void rdb_netbuf_store_uint64(uchar *const dst_netbuf, const uint64 &n) { +inline void rdb_netbuf_store_uint64(uchar *const dst_netbuf, const uint64 n) { DBUG_ASSERT(dst_netbuf != nullptr); // Convert from host byte order (usually Little Endian) to network byte order @@ -90,7 +101,7 @@ inline void rdb_netbuf_store_uint64(uchar *const dst_netbuf, const uint64 &n) { memcpy(dst_netbuf, &net_val, sizeof(net_val)); } -inline void rdb_netbuf_store_uint32(uchar *const dst_netbuf, const uint32 &n) { +inline void rdb_netbuf_store_uint32(uchar *const dst_netbuf, const uint32 n) { DBUG_ASSERT(dst_netbuf != nullptr); // Convert from host byte order (usually Little Endian) to network byte order @@ -99,7 +110,7 @@ inline void rdb_netbuf_store_uint32(uchar *const dst_netbuf, const uint32 &n) { memcpy(dst_netbuf, &net_val, sizeof(net_val)); } -inline void rdb_netbuf_store_uint16(uchar *const dst_netbuf, const uint16 &n) { +inline void rdb_netbuf_store_uint16(uchar *const dst_netbuf, const uint16 n) { DBUG_ASSERT(dst_netbuf != nullptr); // Convert from host byte order (usually Little Endian) to network byte order @@ -108,14 +119,14 @@ inline void rdb_netbuf_store_uint16(uchar *const dst_netbuf, const uint16 &n) { memcpy(dst_netbuf, &net_val, sizeof(net_val)); } -inline void rdb_netbuf_store_byte(uchar *const dst_netbuf, const uchar &c) { +inline void rdb_netbuf_store_byte(uchar *const dst_netbuf, const uchar c) { DBUG_ASSERT(dst_netbuf != nullptr); *dst_netbuf = c; } inline void rdb_netbuf_store_index(uchar *const dst_netbuf, - const uint32 &number) { + const uint32 number) { DBUG_ASSERT(dst_netbuf != nullptr); rdb_netbuf_store_uint32(dst_netbuf, number); @@ -230,10 +241,10 @@ class Rdb_string_reader { const char *m_ptr; uint m_len; -private: + private: Rdb_string_reader &operator=(const Rdb_string_reader &) = default; -public: + public: Rdb_string_reader(const Rdb_string_reader &) = default; /* named constructor */ static Rdb_string_reader read_or_empty(const rocksdb::Slice *const slice) { @@ -268,7 +279,7 @@ public: Read the next @param size bytes. Returns pointer to the bytes read, or nullptr if the remaining string doesn't have that many bytes. */ - const char *read(const uint &size) { + const char *read(const uint size) { const char *res; if (m_len < size) { res = nullptr; @@ -282,21 +293,21 @@ public: bool read_uint8(uint *const res) { const uchar *p; - if (!(p = reinterpret_cast<const uchar *>(read(1)))) - return true; // error - else { + if (!(p = reinterpret_cast<const uchar *>(read(1)))) { + return true; // error + } else { *res = *p; - return false; // Ok + return false; // Ok } } bool read_uint16(uint *const res) { const uchar *p; - if (!(p = reinterpret_cast<const uchar *>(read(2)))) - return true; // error - else { + if (!(p = reinterpret_cast<const uchar *>(read(2)))) { + return true; // error + } else { *res = rdb_netbuf_to_uint16(p); - return false; // Ok + return false; // Ok } } @@ -338,29 +349,29 @@ public: class Rdb_string_writer { std::vector<uchar> m_data; -public: + public: Rdb_string_writer(const Rdb_string_writer &) = delete; Rdb_string_writer &operator=(const Rdb_string_writer &) = delete; Rdb_string_writer() = default; void clear() { m_data.clear(); } - void write_uint8(const uint &val) { + void write_uint8(const uint val) { m_data.push_back(static_cast<uchar>(val)); } - void write_uint16(const uint &val) { + void write_uint16(const uint val) { const auto size = m_data.size(); m_data.resize(size + 2); rdb_netbuf_store_uint16(m_data.data() + size, val); } - void write_uint32(const uint &val) { + void write_uint32(const uint val) { const auto size = m_data.size(); m_data.resize(size + 4); rdb_netbuf_store_uint32(m_data.data() + size, val); } - void write(const uchar *const new_data, const size_t &len) { + void write(const uchar *const new_data, const size_t len) { DBUG_ASSERT(new_data != nullptr); m_data.insert(m_data.end(), new_data, new_data + len); } @@ -368,24 +379,24 @@ public: uchar *ptr() { return m_data.data(); } size_t get_current_pos() const { return m_data.size(); } - void write_uint8_at(const size_t &pos, const uint &new_val) { + void write_uint8_at(const size_t pos, const uint new_val) { // This function will only overwrite what was written DBUG_ASSERT(pos < get_current_pos()); m_data.data()[pos] = new_val; } - void write_uint16_at(const size_t &pos, const uint &new_val) { + void write_uint16_at(const size_t pos, const uint new_val) { // This function will only overwrite what was written DBUG_ASSERT(pos < get_current_pos() && (pos + 1) < get_current_pos()); rdb_netbuf_store_uint16(m_data.data() + pos, new_val); } - void truncate(const size_t &pos) { + void truncate(const size_t pos) { DBUG_ASSERT(pos < m_data.size()); m_data.resize(pos); } - void allocate(const size_t &len, const uchar &val = 0) { + void allocate(const size_t len, const uchar val = 0) { DBUG_ASSERT(len > 0); m_data.resize(m_data.size() + len, val); } @@ -407,14 +418,14 @@ class Rdb_bit_writer { Rdb_string_writer *m_writer; uchar m_offset; -public: + public: Rdb_bit_writer(const Rdb_bit_writer &) = delete; Rdb_bit_writer &operator=(const Rdb_bit_writer &) = delete; explicit Rdb_bit_writer(Rdb_string_writer *writer_arg) : m_writer(writer_arg), m_offset(0) {} - void write(uint size, const uint &value) { + void write(uint size, const uint value) { DBUG_ASSERT((value & ((1 << size) - 1)) == value); while (size > 0) { @@ -439,7 +450,7 @@ class Rdb_bit_reader { uint m_ret; Rdb_string_reader *const m_reader; -public: + public: Rdb_bit_reader(const Rdb_bit_reader &) = delete; Rdb_bit_reader &operator=(const Rdb_bit_reader &) = delete; @@ -472,4 +483,67 @@ public: } }; -} // namespace myrocks +template <size_t buf_length> +class Rdb_buf_writer { + public: + Rdb_buf_writer(const Rdb_buf_writer &) = delete; + Rdb_buf_writer &operator=(const Rdb_buf_writer &) = delete; + Rdb_buf_writer() { reset(); } + + void write_uint32(const uint32 n) { + DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length); + rdb_netbuf_store_uint32(m_ptr, n); + m_ptr += sizeof(n); + } + + void write_uint64(const uint64 n) { + DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length); + rdb_netbuf_store_uint64(m_ptr, n); + m_ptr += sizeof(n); + } + + void write_uint16(const uint16 n) { + DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length); + rdb_netbuf_store_uint16(m_ptr, n); + m_ptr += sizeof(n); + } + + void write_byte(const uchar c) { + DBUG_ASSERT(m_ptr + sizeof(c) <= m_buf.data() + buf_length); + rdb_netbuf_store_byte(m_ptr, c); + m_ptr += sizeof(c); + } + + void write_index(const uint32 n) { write_uint32(n); } + + void write(const char *buf, const size_t size) { + DBUG_ASSERT(m_ptr + size <= m_buf.data() + buf_length); + memcpy(m_ptr, buf, size); + m_ptr += size; + } + + void write(const uchar *buf, const size_t size) { + DBUG_ASSERT(m_ptr + size <= m_buf.data() + buf_length); + memcpy(m_ptr, buf, size); + m_ptr += size; + } + + void reset() { m_ptr = m_buf.data(); } + + const char *data() const { + return reinterpret_cast<const char *>(m_buf.data()); + } + + size_t capacity() { return buf_length; } + + /** Returns actual size of the buffer that has data */ + size_t size() { return m_ptr - m_buf.data(); } + + rocksdb::Slice to_slice() { return rocksdb::Slice(data(), size()); } + + private: + std::array<uchar, buf_length> m_buf; + uchar *m_ptr; +}; + +} // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_manager.cc b/storage/rocksdb/rdb_cf_manager.cc index 7875a7e919b..789481c5f00 100644 --- a/storage/rocksdb/rdb_cf_manager.cc +++ b/storage/rocksdb/rdb_cf_manager.cc @@ -15,7 +15,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif /* For use of 'PRIu64': */ @@ -31,8 +31,11 @@ /* MyRocks header files */ #include "./ha_rocksdb.h" #include "./ha_rocksdb_proto.h" +#include "./rdb_datadic.h" #include "./rdb_psi.h" +#include <string> + namespace myrocks { /* Check if ColumnFamily name says it's a reverse-ordered CF */ @@ -43,7 +46,7 @@ bool Rdb_cf_manager::is_cf_name_reverse(const char *const name) { } void Rdb_cf_manager::init( - std::unique_ptr<Rdb_cf_options> cf_options, + std::unique_ptr<Rdb_cf_options> &&cf_options, std::vector<rocksdb::ColumnFamilyHandle *> *const handles) { mysql_mutex_init(rdb_cfm_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST); @@ -75,9 +78,8 @@ void Rdb_cf_manager::cleanup() { @detail See Rdb_cf_manager::get_cf */ -rocksdb::ColumnFamilyHandle * -Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb, - const std::string &cf_name_arg) { +rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_or_create_cf( + rocksdb::DB *const rdb, const std::string &cf_name_arg) { DBUG_ASSERT(rdb != nullptr); rocksdb::ColumnFamilyHandle *cf_handle = nullptr; @@ -105,7 +107,10 @@ Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb, // NO_LINT_DEBUG sql_print_information("RocksDB: creating a column family %s", cf_name.c_str()); + // NO_LINT_DEBUG sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size); + + // NO_LINT_DEBUG sql_print_information(" target_file_size_base=%" PRIu64, opts.target_file_size_base); @@ -129,12 +134,13 @@ Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb, Find column family by its cf_name. */ -rocksdb::ColumnFamilyHandle * -Rdb_cf_manager::get_cf(const std::string &cf_name_arg) const { +rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf( + const std::string &cf_name_arg, const bool lock_held_by_caller) const { rocksdb::ColumnFamilyHandle *cf_handle; - RDB_MUTEX_LOCK_CHECK(m_mutex); - + if (!lock_held_by_caller) { + RDB_MUTEX_LOCK_CHECK(m_mutex); + } std::string cf_name = cf_name_arg.empty() ? DEFAULT_CF_NAME : cf_name_arg; const auto it = m_cf_name_map.find(cf_name); @@ -145,18 +151,19 @@ Rdb_cf_manager::get_cf(const std::string &cf_name_arg) const { sql_print_warning("Column family '%s' not found.", cf_name.c_str()); } - RDB_MUTEX_UNLOCK_CHECK(m_mutex); + if (!lock_held_by_caller) { + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + } return cf_handle; } -rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf(const uint32_t &id) const { +rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf(const uint32_t id) const { rocksdb::ColumnFamilyHandle *cf_handle = nullptr; RDB_MUTEX_LOCK_CHECK(m_mutex); const auto it = m_cf_id_map.find(id); - if (it != m_cf_id_map.end()) - cf_handle = it->second; + if (it != m_cf_id_map.end()) cf_handle = it->second; RDB_MUTEX_UNLOCK_CHECK(m_mutex); return cf_handle; @@ -174,8 +181,8 @@ std::vector<std::string> Rdb_cf_manager::get_cf_names(void) const { return names; } -std::vector<rocksdb::ColumnFamilyHandle *> -Rdb_cf_manager::get_all_cf(void) const { +std::vector<rocksdb::ColumnFamilyHandle *> Rdb_cf_manager::get_all_cf( + void) const { std::vector<rocksdb::ColumnFamilyHandle *> list; RDB_MUTEX_LOCK_CHECK(m_mutex); @@ -190,4 +197,77 @@ Rdb_cf_manager::get_all_cf(void) const { return list; } -} // namespace myrocks +struct Rdb_cf_scanner : public Rdb_tables_scanner { + uint32_t m_cf_id; + int m_is_cf_used; + + explicit Rdb_cf_scanner(uint32_t cf_id) + : m_cf_id(cf_id), m_is_cf_used(false) {} + + int add_table(Rdb_tbl_def *tdef) override { + DBUG_ASSERT(tdef != nullptr); + + for (uint i = 0; i < tdef->m_key_count; i++) { + const Rdb_key_def &kd = *tdef->m_key_descr_arr[i]; + + if (kd.get_cf()->GetID() == m_cf_id) { + m_is_cf_used = true; + return HA_EXIT_SUCCESS; + } + } + return HA_EXIT_SUCCESS; + } +}; + +int Rdb_cf_manager::drop_cf(const std::string &cf_name) { + auto ddl_manager = rdb_get_ddl_manager(); + uint32_t cf_id = 0; + + if (cf_name == DEFAULT_SYSTEM_CF_NAME) { + return HA_EXIT_FAILURE; + } + + RDB_MUTEX_LOCK_CHECK(m_mutex); + auto cf_handle = get_cf(cf_name, true /* lock_held_by_caller */); + if (cf_handle == nullptr) { + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + return HA_EXIT_SUCCESS; + } + + cf_id = cf_handle->GetID(); + Rdb_cf_scanner scanner(cf_id); + + auto ret = ddl_manager->scan_for_tables(&scanner); + if (ret) { + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + return ret; + } + + if (scanner.m_is_cf_used) { + // column family is used by existing key + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + return HA_EXIT_FAILURE; + } + + auto rdb = rdb_get_rocksdb_db(); + auto status = rdb->DropColumnFamily(cf_handle); + if (!status.ok()) { + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + return ha_rocksdb::rdb_error_to_mysql(status); + } + + delete cf_handle; + + auto id_iter = m_cf_id_map.find(cf_id); + DBUG_ASSERT(id_iter != m_cf_id_map.end()); + m_cf_id_map.erase(id_iter); + + auto name_iter = m_cf_name_map.find(cf_name); + DBUG_ASSERT(name_iter != m_cf_name_map.end()); + m_cf_name_map.erase(name_iter); + + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + + return HA_EXIT_SUCCESS; +} +} // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_manager.h b/storage/rocksdb/rdb_cf_manager.h index 943b6f07c2b..cf7b3d6cfb8 100644 --- a/storage/rocksdb/rdb_cf_manager.h +++ b/storage/rocksdb/rdb_cf_manager.h @@ -66,7 +66,7 @@ class Rdb_cf_manager { column families that are present in the database. The first CF is the default CF. */ - void init(std::unique_ptr<Rdb_cf_options> cf_options, + void init(std::unique_ptr<Rdb_cf_options> &&cf_options, std::vector<rocksdb::ColumnFamilyHandle *> *const handles); void cleanup(); @@ -78,10 +78,11 @@ class Rdb_cf_manager { const std::string &cf_name); /* Used by table open */ - rocksdb::ColumnFamilyHandle *get_cf(const std::string &cf_name) const; + rocksdb::ColumnFamilyHandle *get_cf( + const std::string &cf_name, const bool lock_held_by_caller = false) const; /* Look up cf by id; used by datadic */ - rocksdb::ColumnFamilyHandle *get_cf(const uint32_t &id) const; + rocksdb::ColumnFamilyHandle *get_cf(const uint32_t id) const; /* Used to iterate over column families for show status */ std::vector<std::string> get_cf_names(void) const; @@ -89,7 +90,8 @@ class Rdb_cf_manager { /* Used to iterate over column families */ std::vector<rocksdb::ColumnFamilyHandle *> get_all_cf(void) const; - // void drop_cf(); -- not implemented so far. + /* Used to delete cf by name */ + int drop_cf(const std::string &cf_name); void get_cf_options(const std::string &cf_name, rocksdb::ColumnFamilyOptions *const opts) @@ -103,4 +105,4 @@ class Rdb_cf_manager { } }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc index b71a635d8a2..c0d16893cda 100644 --- a/storage/rocksdb/rdb_cf_options.cc +++ b/storage/rocksdb/rdb_cf_options.cc @@ -15,7 +15,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif #include <my_global.h> @@ -99,10 +99,10 @@ void Rdb_cf_options::update(const std::string &cf_name, bool Rdb_cf_options::set_default(const std::string &default_config) { rocksdb::ColumnFamilyOptions options; - if (!default_config.empty() && - !rocksdb::GetColumnFamilyOptionsFromString(options, default_config, - &options) - .ok()) { + if (!default_config.empty() && !rocksdb::GetColumnFamilyOptionsFromString( + options, default_config, &options) + .ok()) { + // NO_LINT_DEBUG fprintf(stderr, "Invalid default column family config: %s\n", default_config.c_str()); return false; @@ -116,8 +116,7 @@ bool Rdb_cf_options::set_default(const std::string &default_config) { void Rdb_cf_options::skip_spaces(const std::string &input, size_t *const pos) { DBUG_ASSERT(pos != nullptr); - while (*pos < input.size() && isspace(input[*pos])) - ++(*pos); + while (*pos < input.size() && isspace(input[*pos])) ++(*pos); } // Find a valid column family name. Note that all characters except a @@ -135,8 +134,7 @@ bool Rdb_cf_options::find_column_family(const std::string &input, // Loop through the characters in the string until we see a '='. for (; *pos < input.size() && input[*pos] != '='; ++(*pos)) { // If this is not a space, move the end position to the current position. - if (input[*pos] != ' ') - end_pos = *pos; + if (input[*pos] != ' ') end_pos = *pos; } if (end_pos == beg_pos - 1) { @@ -177,24 +175,24 @@ bool Rdb_cf_options::find_options(const std::string &input, size_t *const pos, // number of closing curly braces. while (*pos < input.size()) { switch (input[*pos]) { - case '}': - // If this is a closing curly brace and we bring the count down to zero - // we can exit the loop with a valid options string. - if (--brace_count == 0) { - *options = input.substr(beg_pos, *pos - beg_pos); - ++(*pos); // Move past the last closing curly brace - return true; - } - - break; - - case '{': - // If this is an open curly brace increment the count. - ++brace_count; - break; - - default: - break; + case '}': + // If this is a closing curly brace and we bring the count down to zero + // we can exit the loop with a valid options string. + if (--brace_count == 0) { + *options = input.substr(beg_pos, *pos - beg_pos); + ++(*pos); // Move past the last closing curly brace + return true; + } + + break; + + case '{': + // If this is an open curly brace increment the count. + ++brace_count; + break; + + default: + break; } // Move to the next character. @@ -221,8 +219,7 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string &input, skip_spaces(input, pos); // We should now have a column family name. - if (!find_column_family(input, pos, cf)) - return false; + if (!find_column_family(input, pos, cf)) return false; // If we are at the end of the input then we generate an error. if (*pos == input.size()) { @@ -238,8 +235,7 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string &input, // Find the options for this column family. This should be in the format // {<options>} where <options> may contain embedded pairs of curly braces. - if (!find_options(input, pos, opt_str)) - return false; + if (!find_options(input, pos, opt_str)) return false; // Skip any trailing spaces after the option string. skip_spaces(input, pos); @@ -260,7 +256,7 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string &input, } bool Rdb_cf_options::parse_cf_options(const std::string &cf_options, - Name_to_config_t *option_map) { + Name_to_config_t *option_map) { std::string cf; std::string opt_str; rocksdb::ColumnFamilyOptions options; @@ -316,8 +312,8 @@ bool Rdb_cf_options::set_override(const std::string &override_config) { return true; } -const rocksdb::Comparator * -Rdb_cf_options::get_cf_comparator(const std::string &cf_name) { +const rocksdb::Comparator *Rdb_cf_options::get_cf_comparator( + const std::string &cf_name) { if (Rdb_cf_manager::is_cf_name_reverse(cf_name.c_str())) { return &s_rev_pk_comparator; } else { @@ -325,8 +321,8 @@ Rdb_cf_options::get_cf_comparator(const std::string &cf_name) { } } -std::shared_ptr<rocksdb::MergeOperator> -Rdb_cf_options::get_cf_merge_operator(const std::string &cf_name) { +std::shared_ptr<rocksdb::MergeOperator> Rdb_cf_options::get_cf_merge_operator( + const std::string &cf_name) { return (cf_name == DEFAULT_SYSTEM_CF_NAME) ? std::make_shared<Rdb_system_merge_op>() : nullptr; @@ -342,4 +338,4 @@ void Rdb_cf_options::get_cf_options(const std::string &cf_name, opts->merge_operator = get_cf_merge_operator(cf_name); } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h index 349f7c42e32..360356f7af1 100644 --- a/storage/rocksdb/rdb_cf_options.h +++ b/storage/rocksdb/rdb_cf_options.h @@ -39,7 +39,7 @@ namespace myrocks { families not found in the map. */ class Rdb_cf_options { -public: + public: using Name_to_config_t = std::unordered_map<std::string, std::string>; Rdb_cf_options(const Rdb_cf_options &) = delete; @@ -61,20 +61,20 @@ public: return m_default_cf_opts; } - static const rocksdb::Comparator * - get_cf_comparator(const std::string &cf_name); + static const rocksdb::Comparator *get_cf_comparator( + const std::string &cf_name); - std::shared_ptr<rocksdb::MergeOperator> - get_cf_merge_operator(const std::string &cf_name); + std::shared_ptr<rocksdb::MergeOperator> get_cf_merge_operator( + const std::string &cf_name); void get_cf_options(const std::string &cf_name, rocksdb::ColumnFamilyOptions *const opts) MY_ATTRIBUTE((__nonnull__)); static bool parse_cf_options(const std::string &cf_options, - Name_to_config_t *option_map); + Name_to_config_t *option_map); -private: + private: bool set_default(const std::string &default_config); bool set_override(const std::string &overide_config); @@ -88,7 +88,7 @@ private: std::string *const cf, std::string *const opt_str); -private: + private: static Rdb_pk_comparator s_pk_comparator; static Rdb_rev_comparator s_rev_pk_comparator; @@ -101,4 +101,4 @@ private: rocksdb::ColumnFamilyOptions m_default_cf_opts; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h index ecc78de91bf..1cd27273b56 100644 --- a/storage/rocksdb/rdb_compact_filter.h +++ b/storage/rocksdb/rdb_compact_filter.h @@ -17,12 +17,12 @@ #pragma once #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif /* C++ system header files */ -#include <string> #include <time.h> +#include <string> #include <ctime> /* RocksDB includes */ @@ -35,7 +35,7 @@ namespace myrocks { class Rdb_compact_filter : public rocksdb::CompactionFilter { -public: + public: Rdb_compact_filter(const Rdb_compact_filter &) = delete; Rdb_compact_filter &operator=(const Rdb_compact_filter &) = delete; @@ -80,7 +80,7 @@ public: m_snapshot_timestamp = static_cast<uint64_t>(std::time(nullptr)); } -#ifndef NDEBUG +#ifndef DBUG_OFF int snapshot_ts = rdb_dbug_set_ttl_snapshot_ts(); if (snapshot_ts) { m_snapshot_timestamp = @@ -134,12 +134,13 @@ public: struct Rdb_index_info index_info; if (!rdb_get_dict_manager()->get_index_info(gl_index_id, &index_info)) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Could not get index information " - "for Index Number (%u,%u)", - gl_index_id.cf_id, gl_index_id.index_id); + sql_print_error( + "RocksDB: Could not get index information " + "for Index Number (%u,%u)", + gl_index_id.cf_id, gl_index_id.index_id); } -#ifndef NDEBUG +#ifndef DBUG_OFF if (rdb_dbug_set_ttl_ignore_pk() && index_info.m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY) { *ttl_duration = 0; @@ -164,9 +165,10 @@ public: buf = rdb_hexdump(existing_value.data(), existing_value.size(), RDB_MAX_HEXDUMP_LEN); // NO_LINT_DEBUG - sql_print_error("Decoding ttl from PK value failed in compaction filter, " - "for index (%u,%u), val: %s", - m_prev_index.cf_id, m_prev_index.index_id, buf.c_str()); + sql_print_error( + "Decoding ttl from PK value failed in compaction filter, " + "for index (%u,%u), val: %s", + m_prev_index.cf_id, m_prev_index.index_id, buf.c_str()); abort(); } @@ -198,10 +200,10 @@ public: }; class Rdb_compact_filter_factory : public rocksdb::CompactionFilterFactory { -public: + public: Rdb_compact_filter_factory(const Rdb_compact_filter_factory &) = delete; - Rdb_compact_filter_factory & - operator=(const Rdb_compact_filter_factory &) = delete; + Rdb_compact_filter_factory &operator=(const Rdb_compact_filter_factory &) = + delete; Rdb_compact_filter_factory() {} ~Rdb_compact_filter_factory() {} @@ -215,4 +217,4 @@ public: } }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_comparator.h b/storage/rocksdb/rdb_comparator.h index b43118eda36..9cb25925159 100644 --- a/storage/rocksdb/rdb_comparator.h +++ b/storage/rocksdb/rdb_comparator.h @@ -36,7 +36,7 @@ namespace myrocks { Rdb_key_def) */ class Rdb_pk_comparator : public rocksdb::Comparator { -public: + public: Rdb_pk_comparator(const Rdb_pk_comparator &) = delete; Rdb_pk_comparator &operator=(const Rdb_pk_comparator &) = delete; Rdb_pk_comparator() = default; @@ -63,7 +63,7 @@ public: }; class Rdb_rev_comparator : public rocksdb::Comparator { -public: + public: Rdb_rev_comparator(const Rdb_rev_comparator &) = delete; Rdb_rev_comparator &operator=(const Rdb_rev_comparator &) = delete; Rdb_rev_comparator() = default; @@ -82,4 +82,4 @@ public: } }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_converter.cc b/storage/rocksdb/rdb_converter.cc new file mode 100644 index 00000000000..677ff106753 --- /dev/null +++ b/storage/rocksdb/rdb_converter.cc @@ -0,0 +1,838 @@ +/* + Copyright (c) 2015, Facebook, Inc. + + This program is f + i the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <my_global.h> + +/* This C++ file's header file */ +#include "./rdb_converter.h" + +/* Standard C++ header files */ +#include <algorithm> +#include <map> +#include <string> +#include <vector> + +/* MySQL header files */ +#include "./field.h" +#include "./key.h" +#include "./m_ctype.h" +#include "./my_bit.h" +#include "./my_bitmap.h" +#include "./sql_table.h" + + +/* MyRocks header files */ +#include "./ha_rocksdb.h" +#include "./ha_rocksdb_proto.h" +#include "./my_stacktrace.h" +#include "./rdb_cf_manager.h" +#include "./rdb_psi.h" +#include "./rdb_utils.h" + + +namespace myrocks { + +void dbug_modify_key_varchar8(String *on_disk_rec) { + std::string res; + // The key starts with index number + res.append(on_disk_rec->ptr(), Rdb_key_def::INDEX_NUMBER_SIZE); + + // Then, a mem-comparable form of a varchar(8) value. + res.append("ABCDE\0\0\0\xFC", 9); + on_disk_rec->length(0); + on_disk_rec->append(res.data(), res.size()); +} + +/* + Convert field from rocksdb storage format into Mysql Record format + @param buf OUT start memory to fill converted data + @param offset IN/OUT decoded data is stored in buf + offset + @param table IN current table + @param field IN current field + @param reader IN rocksdb value slice reader + @param decode IN whether to decode current field + @return + 0 OK + other HA_ERR error code (can be SE-specific) +*/ +int Rdb_convert_to_record_value_decoder::decode(uchar *const buf, uint *offset, + TABLE *table, + my_core::Field *field, + Rdb_field_encoder *field_dec, + Rdb_string_reader *reader, + bool decode, bool is_null) { + int err = HA_EXIT_SUCCESS; + + uint field_offset = field->ptr - table->record[0]; + *offset = field_offset; + uint null_offset = field->null_offset(); + bool maybe_null = field->real_maybe_null(); + field->move_field(buf + field_offset, + maybe_null ? buf + null_offset : nullptr, field->null_bit); + + if (is_null) { + if (decode) { + // This sets the NULL-bit of this record + field->set_null(); + /* + Besides that, set the field value to default value. CHECKSUM TABLE + depends on this. + */ + memcpy(field->ptr, table->s->default_values + field_offset, + field->pack_length()); + } + } else { + if (decode) { + // sets non-null bits for this record + field->set_notnull(); + } + + if (field_dec->m_field_type == MYSQL_TYPE_BLOB) { + err = decode_blob(table, field, reader, decode); + } else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) { + err = decode_varchar(field, reader, decode); + } else { + err = decode_fixed_length_field(field, field_dec, reader, decode); + } + } + + // Restore field->ptr and field->null_ptr + field->move_field(table->record[0] + field_offset, + maybe_null ? table->record[0] + null_offset : nullptr, + field->null_bit); + + return err; +} + +/* + Convert blob from rocksdb storage format into Mysql Record format + @param table IN current table + @param field IN current field + @param reader IN rocksdb value slice reader + @param decode IN whether to decode current field + @return + 0 OK + other HA_ERR error code (can be SE-specific) +*/ +int Rdb_convert_to_record_value_decoder::decode_blob(TABLE *table, Field *field, + Rdb_string_reader *reader, + bool decode) { + my_core::Field_blob *blob = (my_core::Field_blob *)field; + + // Get the number of bytes needed to store length + const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr; + + const char *data_len_str; + if (!(data_len_str = reader->read(length_bytes))) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + memcpy(blob->ptr, data_len_str, length_bytes); + uint32 data_len = + blob->get_length(reinterpret_cast<const uchar *>(data_len_str), + length_bytes); + const char *blob_ptr; + if (!(blob_ptr = reader->read(data_len))) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + if (decode) { + // set 8-byte pointer to 0, like innodb does (relevant for 32-bit + // platforms) + memset(blob->ptr + length_bytes, 0, 8); + memcpy(blob->ptr + length_bytes, &blob_ptr, sizeof(uchar **)); + } + + return HA_EXIT_SUCCESS; +} + +/* + Convert fixed length field from rocksdb storage format into Mysql Record + format + @param field IN current field + @param field_dec IN data structure conttain field encoding data + @param reader IN rocksdb value slice reader + @param decode IN whether to decode current field + @return + 0 OK + other HA_ERR error code (can be SE-specific) +*/ +int Rdb_convert_to_record_value_decoder::decode_fixed_length_field( + my_core::Field *const field, Rdb_field_encoder *field_dec, + Rdb_string_reader *const reader, bool decode) { + uint len = field_dec->m_pack_length_in_rec; + if (len > 0) { + const char *data_bytes; + if ((data_bytes = reader->read(len)) == nullptr) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + if (decode) { + memcpy(field->ptr, data_bytes, len); + } + } + + return HA_EXIT_SUCCESS; +} + +/* + Convert varchar field from rocksdb storage format into Mysql Record format + @param field IN current field + @param field_dec IN data structure conttain field encoding data + @param reader IN rocksdb value slice reader + @param decode IN whether to decode current field + @return + 0 OK + other HA_ERR error code (can be SE-specific) +*/ +int Rdb_convert_to_record_value_decoder::decode_varchar( + Field *field, Rdb_string_reader *const reader, bool decode) { + my_core::Field_varstring *const field_var = (my_core::Field_varstring *)field; + + const char *data_len_str; + if (!(data_len_str = reader->read(field_var->length_bytes))) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + uint data_len; + // field_var->length_bytes is 1 or 2 + if (field_var->length_bytes == 1) { + data_len = (uchar)data_len_str[0]; + } else { + DBUG_ASSERT(field_var->length_bytes == 2); + data_len = uint2korr(data_len_str); + } + + if (data_len > field_var->field_length) { + // The data on disk is longer than table DDL allows? + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + if (!reader->read(data_len)) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + if (decode) { + memcpy(field_var->ptr, data_len_str, field_var->length_bytes + data_len); + } + + return HA_EXIT_SUCCESS; +} + +template <typename value_field_decoder> +Rdb_value_field_iterator<value_field_decoder>::Rdb_value_field_iterator( + TABLE *table, Rdb_string_reader *value_slice_reader, + const Rdb_converter *rdb_converter, uchar *const buf) + : m_buf(buf) { + DBUG_ASSERT(table != nullptr); + DBUG_ASSERT(buf != nullptr); + + m_table = table; + m_value_slice_reader = value_slice_reader; + auto fields = rdb_converter->get_decode_fields(); + m_field_iter = fields->begin(); + m_field_end = fields->end(); + m_null_bytes = rdb_converter->get_null_bytes(); + m_offset = 0; +} + +// Iterate each requested field and decode one by one +template <typename value_field_decoder> +int Rdb_value_field_iterator<value_field_decoder>::next() { + int err = HA_EXIT_SUCCESS; + while (m_field_iter != m_field_end) { + m_field_dec = m_field_iter->m_field_enc; + bool decode = m_field_iter->m_decode; + bool maybe_null = m_field_dec->maybe_null(); + // This is_null value is bind to how stroage format store its value + m_is_null = maybe_null && ((m_null_bytes[m_field_dec->m_null_offset] & + m_field_dec->m_null_mask) != 0); + + // Skip the bytes we need to skip + int skip = m_field_iter->m_skip; + if (skip && !m_value_slice_reader->read(skip)) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + m_field = m_table->field[m_field_dec->m_field_index]; + // Decode each field + err = value_field_decoder::decode(m_buf, &m_offset, m_table, m_field, + m_field_dec, m_value_slice_reader, decode, + m_is_null); + if (err != HA_EXIT_SUCCESS) { + return err; + } + m_field_iter++; + // Only break for the field that are actually decoding rather than skipping + if (decode) { + break; + } + } + return err; +} + +template <typename value_field_decoder> +bool Rdb_value_field_iterator<value_field_decoder>::end_of_fields() const { + return m_field_iter == m_field_end; +} + +template <typename value_field_decoder> +Field *Rdb_value_field_iterator<value_field_decoder>::get_field() const { + DBUG_ASSERT(m_field != nullptr); + return m_field; +} + +template <typename value_field_decoder> +void *Rdb_value_field_iterator<value_field_decoder>::get_dst() const { + DBUG_ASSERT(m_buf != nullptr); + return m_buf + m_offset; +} + +template <typename value_field_decoder> +int Rdb_value_field_iterator<value_field_decoder>::get_field_index() const { + DBUG_ASSERT(m_field_dec != nullptr); + return m_field_dec->m_field_index; +} + +template <typename value_field_decoder> +enum_field_types Rdb_value_field_iterator<value_field_decoder>::get_field_type() + const { + DBUG_ASSERT(m_field_dec != nullptr); + return m_field_dec->m_field_type; +} + +template <typename value_field_decoder> +bool Rdb_value_field_iterator<value_field_decoder>::is_null() const { + DBUG_ASSERT(m_field != nullptr); + return m_is_null; +} + +/* + Initialize Rdb_converter with table data + @param thd IN Thread context + @param tbl_def IN MyRocks table definition + @param table IN Current open table +*/ +Rdb_converter::Rdb_converter(const THD *thd, const Rdb_tbl_def *tbl_def, + TABLE *table) + : m_thd(thd), m_tbl_def(tbl_def), m_table(table) { + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(tbl_def != nullptr); + DBUG_ASSERT(table != nullptr); + + m_key_requested = false; + m_verify_row_debug_checksums = false; + m_maybe_unpack_info = false; + m_row_checksums_checked = 0; + m_null_bytes = nullptr; + setup_field_encoders(); +} + +Rdb_converter::~Rdb_converter() { + my_free(m_encoder_arr); + m_encoder_arr = nullptr; + // These are needed to suppress valgrind errors in rocksdb.partition + m_storage_record.free(); +} + +/* + Decide storage type for each encoder +*/ +void Rdb_converter::get_storage_type(Rdb_field_encoder *const encoder, + const uint kp) { + auto pk_descr = + m_tbl_def->m_key_descr_arr[ha_rocksdb::pk_index(m_table, m_tbl_def)]; + // STORE_SOME uses unpack_info. + if (pk_descr->has_unpack_info(kp)) { + DBUG_ASSERT(pk_descr->can_unpack(kp)); + encoder->m_storage_type = Rdb_field_encoder::STORE_SOME; + m_maybe_unpack_info = true; + } else if (pk_descr->can_unpack(kp)) { + encoder->m_storage_type = Rdb_field_encoder::STORE_NONE; + } +} + +/* + @brief + Setup which fields will be unpacked when reading rows + + @detail + Three special cases when we still unpack all fields: + - When client requires decode_all_fields, such as this table is being + updated (m_lock_rows==RDB_LOCK_WRITE). + - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to + read all fields to find whether there is a row checksum at the end. We could + skip the fields instead of decoding them, but currently we do decoding.) + - On index merge as bitmap is cleared during that operation + + @seealso + Rdb_converter::setup_field_encoders() + Rdb_converter::convert_record_from_storage_format() +*/ +void Rdb_converter::setup_field_decoders(const MY_BITMAP *field_map, + bool decode_all_fields) { + m_key_requested = false; + m_decoders_vect.clear(); + int last_useful = 0; + int skip_size = 0; + + for (uint i = 0; i < m_table->s->fields; i++) { + // bitmap is cleared on index merge, but it still needs to decode columns + bool field_requested = + decode_all_fields || m_verify_row_debug_checksums || + bitmap_is_clear_all(field_map) || + bitmap_is_set(field_map, m_table->field[i]->field_index); + + // We only need the decoder if the whole record is stored. + if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { + // the field potentially needs unpacking + if (field_requested) { + // the field is in the read set + m_key_requested = true; + } + continue; + } + + if (field_requested) { + // We will need to decode this field + m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size}); + last_useful = m_decoders_vect.size(); + skip_size = 0; + } else { + if (m_encoder_arr[i].uses_variable_len_encoding() || + m_encoder_arr[i].maybe_null()) { + // For variable-length field, we need to read the data and skip it + m_decoders_vect.push_back({&m_encoder_arr[i], false, skip_size}); + skip_size = 0; + } else { + // Fixed-width field can be skipped without looking at it. + // Add appropriate skip_size to the next field. + skip_size += m_encoder_arr[i].m_pack_length_in_rec; + } + } + } + + // It could be that the last few elements are varchars that just do + // skipping. Remove them. + m_decoders_vect.erase(m_decoders_vect.begin() + last_useful, + m_decoders_vect.end()); +} + +void Rdb_converter::setup_field_encoders() { + uint null_bytes_length = 0; + uchar cur_null_mask = 0x1; + + m_encoder_arr = static_cast<Rdb_field_encoder *>( + my_malloc(m_table->s->fields * sizeof(Rdb_field_encoder), MYF(0))); + if (m_encoder_arr == nullptr) { + return; + } + + for (uint i = 0; i < m_table->s->fields; i++) { + Field *const field = m_table->field[i]; + m_encoder_arr[i].m_storage_type = Rdb_field_encoder::STORE_ALL; + + /* + Check if this field is + - a part of primary key, and + - it can be decoded back from its key image. + If both hold, we don't need to store this field in the value part of + RocksDB's key-value pair. + + If hidden pk exists, we skip this check since the field will never be + part of the hidden pk. + */ + if (!Rdb_key_def::table_has_hidden_pk(m_table)) { + KEY *const pk_info = &m_table->key_info[m_table->s->primary_key]; + for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) { + // key_part->fieldnr is counted from 1 + if (field->field_index + 1 == pk_info->key_part[kp].fieldnr) { + get_storage_type(&m_encoder_arr[i], kp); + break; + } + } + } + + m_encoder_arr[i].m_field_type = field->real_type(); + m_encoder_arr[i].m_field_index = i; + m_encoder_arr[i].m_pack_length_in_rec = field->pack_length_in_rec(); + + if (field->real_maybe_null()) { + m_encoder_arr[i].m_null_mask = cur_null_mask; + m_encoder_arr[i].m_null_offset = null_bytes_length; + if (cur_null_mask == 0x80) { + cur_null_mask = 0x1; + null_bytes_length++; + } else { + cur_null_mask = cur_null_mask << 1; + } + } else { + m_encoder_arr[i].m_null_mask = 0; + } + } + + // Count the last, unfinished NULL-bits byte + if (cur_null_mask != 0x1) { + null_bytes_length++; + } + + m_null_bytes_length_in_record = null_bytes_length; +} + +/* + EntryPoint for Decode: + Decode key slice(if requested) and value slice using built-in field + decoders + @param key_def IN key definition to decode + @param dst OUT Mysql buffer to fill decoded content + @param key_slice IN RocksDB key slice to decode + @param value_slice IN RocksDB value slice to decode + @return + 0 OK + other HA_ERR error code (can be SE-specific) +*/ +int Rdb_converter::decode(const std::shared_ptr<Rdb_key_def> &key_def, + uchar *dst, // address to fill data + const rocksdb::Slice *key_slice, + const rocksdb::Slice *value_slice) { + // Currently only support decode primary key, Will add decode secondary later + DBUG_ASSERT(key_def->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY || + key_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY); + + const rocksdb::Slice *updated_key_slice = key_slice; +#ifndef DBUG_OFF + String last_rowkey; + last_rowkey.copy(key_slice->data(), key_slice->size(), &my_charset_bin); + DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_read1", + { dbug_modify_key_varchar8(&last_rowkey); }); + rocksdb::Slice rowkey_slice(last_rowkey.ptr(), last_rowkey.length()); + updated_key_slice = &rowkey_slice; +#endif + return convert_record_from_storage_format(key_def, updated_key_slice, + value_slice, dst); +} + +/* + Decode value slice header + @param reader IN value slice reader + @param pk_def IN key definition to decode + @param unpack_slice OUT unpack info slice + @return + 0 OK + other HA_ERR error code (can be SE-specific) +*/ +int Rdb_converter::decode_value_header( + Rdb_string_reader *reader, const std::shared_ptr<Rdb_key_def> &pk_def, + rocksdb::Slice *unpack_slice) { + /* If it's a TTL record, skip the 8 byte TTL value */ + if (pk_def->has_ttl()) { + const char *ttl_bytes; + if ((ttl_bytes = reader->read(ROCKSDB_SIZEOF_TTL_RECORD))) { + memcpy(m_ttl_bytes, ttl_bytes, ROCKSDB_SIZEOF_TTL_RECORD); + } else { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + } + + /* Other fields are decoded from the value */ + if (m_null_bytes_length_in_record && + !(m_null_bytes = reader->read(m_null_bytes_length_in_record))) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + if (m_maybe_unpack_info) { + const char *unpack_info = reader->get_current_ptr(); + if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) || + !reader->read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + uint16 unpack_info_len = + rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1)); + *unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len); + + reader->read(unpack_info_len - + Rdb_key_def::get_unpack_header_size(unpack_info[0])); + } + + return HA_EXIT_SUCCESS; +} + +/* + Convert RocksDb key slice and value slice to Mysql format + @param key_def IN key definition to decode + @param key_slice IN RocksDB key slice + @param value_slice IN RocksDB value slice + @param dst OUT MySql format address + @return + 0 OK + other HA_ERR error code (can be SE-specific) +*/ +int Rdb_converter::convert_record_from_storage_format( + const std::shared_ptr<Rdb_key_def> &pk_def, + const rocksdb::Slice *const key_slice, + const rocksdb::Slice *const value_slice, uchar *const dst) { + int err = HA_EXIT_SUCCESS; + + Rdb_string_reader value_slice_reader(value_slice); + rocksdb::Slice unpack_slice; + err = decode_value_header(&value_slice_reader, pk_def, &unpack_slice); + if (err != HA_EXIT_SUCCESS) { + return err; + } + + /* + Decode PK fields from the key + */ + if (m_key_requested) { + err = pk_def->unpack_record(m_table, dst, key_slice, + !unpack_slice.empty() ? &unpack_slice : nullptr, + false /* verify_checksum */); + } + if (err != HA_EXIT_SUCCESS) { + return err; + } + + Rdb_value_field_iterator<Rdb_convert_to_record_value_decoder> + value_field_iterator(m_table, &value_slice_reader, this, dst); + + // Decode value slices + while (!value_field_iterator.end_of_fields()) { + err = value_field_iterator.next(); + + if (err != HA_EXIT_SUCCESS) { + return err; + } + } + + if (m_verify_row_debug_checksums) { + return verify_row_debug_checksum(pk_def, &value_slice_reader, key_slice, + value_slice); + } + return HA_EXIT_SUCCESS; +} + +/* + Verify checksum for row + @param pk_def IN key def + @param reader IN RocksDB value slice reader + @param key IN RocksDB key slice + @param value IN RocksDB value slice + @return + 0 OK + other HA_ERR error code (can be SE-specific) +*/ +int Rdb_converter::verify_row_debug_checksum( + const std::shared_ptr<Rdb_key_def> &pk_def, Rdb_string_reader *reader, + const rocksdb::Slice *key, const rocksdb::Slice *value) { + if (reader->remaining_bytes() == RDB_CHECKSUM_CHUNK_SIZE && + reader->read(1)[0] == RDB_CHECKSUM_DATA_TAG) { + uint32_t stored_key_chksum = + rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE)); + uint32_t stored_val_chksum = + rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE)); + + const uint32_t computed_key_chksum = + my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size()); + const uint32_t computed_val_chksum = + my_core::crc32(0, rdb_slice_to_uchar_ptr(value), + value->size() - RDB_CHECKSUM_CHUNK_SIZE); + + DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", stored_key_chksum++;); + + if (stored_key_chksum != computed_key_chksum) { + pk_def->report_checksum_mismatch(true, key->data(), key->size()); + return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH; + } + + DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum2", stored_val_chksum++;); + if (stored_val_chksum != computed_val_chksum) { + pk_def->report_checksum_mismatch(false, value->data(), value->size()); + return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH; + } + + m_row_checksums_checked++; + } + if (reader->remaining_bytes()) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + return HA_EXIT_SUCCESS; +} + +/** + Convert record from table->record[0] form into a form that can be written + into rocksdb. + + @param pk_def IN Current key def + @pk_unpack_info IN Unpack info generated during key pack + @is_update_row IN Whether it is update row + @store_row_debug_checksums IN Whether to store checksums + @param ttl_bytes IN/OUT Old ttl value from previous record and + ttl value during current encode + @is_ttl_bytes_updated OUT Whether ttl bytes is updated + @param value_slice OUT Data slice with record data. +*/ +int Rdb_converter::encode_value_slice( + const std::shared_ptr<Rdb_key_def> &pk_def, + const rocksdb::Slice &pk_packed_slice, Rdb_string_writer *pk_unpack_info, + bool is_update_row, bool store_row_debug_checksums, char *ttl_bytes, + bool *is_ttl_bytes_updated, rocksdb::Slice *const value_slice) { + DBUG_ASSERT(pk_def != nullptr); + // Currently only primary key will store value slice + DBUG_ASSERT(pk_def->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY || + pk_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY); + DBUG_ASSERT_IMP(m_maybe_unpack_info, pk_unpack_info); + + bool has_ttl = pk_def->has_ttl(); + bool has_ttl_column = !pk_def->m_ttl_column.empty(); + + m_storage_record.length(0); + + if (has_ttl) { + /* If it's a TTL record, reserve space for 8 byte TTL value in front. */ + m_storage_record.fill( + ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_length_in_record, 0); + // NOTE: is_ttl_bytes_updated is only used for update case + // During update, skip update sk key/values slice iff none of sk fields + // have changed and ttl bytes isn't changed. see + // ha_rocksdb::update_write_sk() for more info + *is_ttl_bytes_updated = false; + char *const data = const_cast<char *>(m_storage_record.ptr()); + if (has_ttl_column) { + DBUG_ASSERT(pk_def->get_ttl_field_index() != UINT_MAX); + Field *const field = m_table->field[pk_def->get_ttl_field_index()]; + DBUG_ASSERT(field->pack_length_in_rec() == ROCKSDB_SIZEOF_TTL_RECORD); + DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG); + + uint64 ts = uint8korr(field->ptr); +#ifndef DBUG_OFF + ts += rdb_dbug_set_ttl_rec_ts(); +#endif + rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts); + if (is_update_row) { + *is_ttl_bytes_updated = + memcmp(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); + } + // Also store in m_ttl_bytes to propagate to update_write_sk + memcpy(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); + } else { + /* + For implicitly generated TTL records we need to copy over the old + TTL value from the old record in the event of an update. It was stored + in m_ttl_bytes. + + Otherwise, generate a timestamp using the current time. + */ + if (is_update_row) { + memcpy(data, ttl_bytes, sizeof(uint64)); + } else { + uint64 ts = static_cast<uint64>(std::time(nullptr)); +#ifndef DBUG_OFF + ts += rdb_dbug_set_ttl_rec_ts(); +#endif + rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts); + // Also store in m_ttl_bytes to propagate to update_write_sk + memcpy(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); + } + } + } else { + /* All NULL bits are initially 0 */ + m_storage_record.fill(m_null_bytes_length_in_record, 0); + } + + // If a primary key may have non-empty unpack_info for certain values, + // (m_maybe_unpack_info=TRUE), we write the unpack_info block. The block + // itself was prepared in Rdb_key_def::pack_record. + if (m_maybe_unpack_info) { + m_storage_record.append(reinterpret_cast<char *>(pk_unpack_info->ptr()), + pk_unpack_info->get_current_pos()); + } + for (uint i = 0; i < m_table->s->fields; i++) { + Rdb_field_encoder &encoder = m_encoder_arr[i]; + /* Don't pack decodable PK key parts */ + if (encoder.m_storage_type != Rdb_field_encoder::STORE_ALL) { + continue; + } + + Field *const field = m_table->field[i]; + if (encoder.maybe_null()) { + char *data = const_cast<char *>(m_storage_record.ptr()); + if (has_ttl) { + data += ROCKSDB_SIZEOF_TTL_RECORD; + } + + if (field->is_null()) { + data[encoder.m_null_offset] |= encoder.m_null_mask; + /* Don't write anything for NULL values */ + continue; + } + } + + if (encoder.m_field_type == MYSQL_TYPE_BLOB) { + my_core::Field_blob *blob = + reinterpret_cast<my_core::Field_blob *>(field); + /* Get the number of bytes needed to store length*/ + const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr; + + /* Store the length of the value */ + m_storage_record.append(reinterpret_cast<char *>(blob->ptr), + length_bytes); + + /* Store the blob value itself */ + char *data_ptr; + memcpy(&data_ptr, blob->ptr + length_bytes, sizeof(uchar **)); + m_storage_record.append(data_ptr, blob->get_length()); + } else if (encoder.m_field_type == MYSQL_TYPE_VARCHAR) { + Field_varstring *const field_var = + reinterpret_cast<Field_varstring *>(field); + uint data_len; + /* field_var->length_bytes is 1 or 2 */ + if (field_var->length_bytes == 1) { + data_len = field_var->ptr[0]; + } else { + DBUG_ASSERT(field_var->length_bytes == 2); + data_len = uint2korr(field_var->ptr); + } + m_storage_record.append(reinterpret_cast<char *>(field_var->ptr), + field_var->length_bytes + data_len); + } else { + /* Copy the field data */ + const uint len = field->pack_length_in_rec(); + m_storage_record.append(reinterpret_cast<char *>(field->ptr), len); + } + } + + if (store_row_debug_checksums) { + const uint32_t key_crc32 = my_core::crc32( + 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size()); + const uint32_t val_crc32 = + my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record), + m_storage_record.length()); + uchar key_crc_buf[RDB_CHECKSUM_SIZE]; + uchar val_crc_buf[RDB_CHECKSUM_SIZE]; + rdb_netbuf_store_uint32(key_crc_buf, key_crc32); + rdb_netbuf_store_uint32(val_crc_buf, val_crc32); + m_storage_record.append((const char *)&RDB_CHECKSUM_DATA_TAG, 1); + m_storage_record.append((const char *)key_crc_buf, RDB_CHECKSUM_SIZE); + m_storage_record.append((const char *)val_crc_buf, RDB_CHECKSUM_SIZE); + } + + *value_slice = + rocksdb::Slice(m_storage_record.ptr(), m_storage_record.length()); + + return HA_EXIT_SUCCESS; +} +} // namespace myrocks diff --git a/storage/rocksdb/rdb_converter.h b/storage/rocksdb/rdb_converter.h new file mode 100644 index 00000000000..a4eae341f16 --- /dev/null +++ b/storage/rocksdb/rdb_converter.h @@ -0,0 +1,247 @@ +/* + Copyright (c) 2018, Facebook, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#pragma once + +// C++ standard header files +#include <string> +#include <vector> + +// MySQL header files +#include "./handler.h" // handler +#include "./my_global.h" // ulonglong +#include "./sql_string.h" +#include "./ut0counter.h" + +// MyRocks header files +#include "./ha_rocksdb.h" +#include "./rdb_datadic.h" + +namespace myrocks { +class Rdb_field_encoder; + +/** + Describes instructions on how to decode the field for value slice +*/ +struct READ_FIELD { + // Points to Rdb_field_encoder describing the field + Rdb_field_encoder *m_field_enc; + // if true, decode the field, otherwise skip it + bool m_decode; + // Skip this many bytes before reading (or skipping) this field + int m_skip; +}; + +/** + Class to convert rocksdb value slice from storage format to mysql record + format. +*/ +class Rdb_convert_to_record_value_decoder { + public: + Rdb_convert_to_record_value_decoder() = delete; + Rdb_convert_to_record_value_decoder( + const Rdb_convert_to_record_value_decoder &decoder) = delete; + Rdb_convert_to_record_value_decoder &operator=( + const Rdb_convert_to_record_value_decoder &decoder) = delete; + + static int decode(uchar *const buf, uint *offset, TABLE *table, + my_core::Field *field, Rdb_field_encoder *field_dec, + Rdb_string_reader *reader, bool decode, bool is_null); + + private: + static int decode_blob(TABLE *table, Field *field, Rdb_string_reader *reader, + bool decode); + static int decode_fixed_length_field(Field *const field, + Rdb_field_encoder *field_dec, + Rdb_string_reader *const reader, + bool decode); + + static int decode_varchar(Field *const field, Rdb_string_reader *const reader, + bool decode); +}; + +/** + Class to iterator fields in RocksDB value slice + A template class instantiation represent a way to decode the data. + The reason to use template class instead of normal class is to elimate + virtual method call. +*/ +template <typename value_field_decoder> +class Rdb_value_field_iterator { + private: + bool m_is_null; + std::vector<READ_FIELD>::const_iterator m_field_iter; + std::vector<READ_FIELD>::const_iterator m_field_end; + Rdb_string_reader *m_value_slice_reader; + // null value map + const char *m_null_bytes; + // The current open table + TABLE *m_table; + // The current field + Field *m_field; + Rdb_field_encoder *m_field_dec; + uchar *const m_buf; + uint m_offset; + + public: + Rdb_value_field_iterator(TABLE *table, Rdb_string_reader *value_slice_reader, + const Rdb_converter *rdb_converter, + uchar *const buf); + Rdb_value_field_iterator(const Rdb_value_field_iterator &field_iterator) = + delete; + Rdb_value_field_iterator &operator=( + const Rdb_value_field_iterator &field_iterator) = delete; + + /* + Move and decode next field + Run next() before accessing data + */ + int next(); + // Whether current field is the end of fields + bool end_of_fields() const; + void *get_dst() const; + // Whether the value of current field is null + bool is_null() const; + // get current field index + int get_field_index() const; + // get current field type + enum_field_types get_field_type() const; + // get current field + Field *get_field() const; +}; + +/** + Class to convert Mysql formats to rocksdb storage format, and vice versa. +*/ +class Rdb_converter { + public: + /* + Initialize converter with table data + */ + Rdb_converter(const THD *thd, const Rdb_tbl_def *tbl_def, TABLE *table); + Rdb_converter(const Rdb_converter &decoder) = delete; + Rdb_converter &operator=(const Rdb_converter &decoder) = delete; + ~Rdb_converter(); + + void setup_field_decoders(const MY_BITMAP *field_map, + bool decode_all_fields = false); + + int decode(const std::shared_ptr<Rdb_key_def> &key_def, uchar *dst, + const rocksdb::Slice *key_slice, + const rocksdb::Slice *value_slice); + + int encode_value_slice(const std::shared_ptr<Rdb_key_def> &pk_def, + const rocksdb::Slice &pk_packed_slice, + Rdb_string_writer *pk_unpack_info, bool is_update_row, + bool store_row_debug_checksums, char *ttl_bytes, + bool *is_ttl_bytes_updated, + rocksdb::Slice *const value_slice); + + my_core::ha_rows get_row_checksums_checked() const { + return m_row_checksums_checked; + } + bool get_verify_row_debug_checksums() const { + return m_verify_row_debug_checksums; + } + void set_verify_row_debug_checksums(bool verify_row_debug_checksums) { + m_verify_row_debug_checksums = verify_row_debug_checksums; + } + + const Rdb_field_encoder *get_encoder_arr() const { return m_encoder_arr; } + int get_null_bytes_in_record() { return m_null_bytes_length_in_record; } + const char *get_null_bytes() const { return m_null_bytes; } + void set_is_key_requested(bool key_requested) { + m_key_requested = key_requested; + } + bool get_maybe_unpack_info() const { return m_maybe_unpack_info; } + + char *get_ttl_bytes_buffer() { return m_ttl_bytes; } + + const std::vector<READ_FIELD> *get_decode_fields() const { + return &m_decoders_vect; + } + + private: + int decode_value_header(Rdb_string_reader *reader, + const std::shared_ptr<Rdb_key_def> &pk_def, + rocksdb::Slice *unpack_slice); + + void setup_field_encoders(); + + void get_storage_type(Rdb_field_encoder *const encoder, const uint kp); + + int convert_record_from_storage_format( + const std::shared_ptr<Rdb_key_def> &pk_def, + const rocksdb::Slice *const key, const rocksdb::Slice *const value, + uchar *const buf); + + int verify_row_debug_checksum(const std::shared_ptr<Rdb_key_def> &pk_def, + Rdb_string_reader *reader, + const rocksdb::Slice *key, + const rocksdb::Slice *value); + + private: + /* + This tells if any field which is part of the key needs to be unpacked and + decoded. + */ + bool m_key_requested; + /* + Controls whether verifying checksums during reading, This is updated from + the session variable at the start of each query. + */ + bool m_verify_row_debug_checksums; + // Thread handle + const THD *m_thd; + /* MyRocks table definition*/ + const Rdb_tbl_def *m_tbl_def; + /* The current open table */ + TABLE *m_table; + /* + Number of bytes in on-disk (storage) record format that are used for + storing SQL NULL flags. + */ + int m_null_bytes_length_in_record; + /* + Pointer to null bytes value + */ + const char *m_null_bytes; + /* + TRUE <=> Some fields in the PK may require unpack_info. + */ + bool m_maybe_unpack_info; + /* + Pointer to the original TTL timestamp value (8 bytes) during UPDATE. + */ + char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD]; + /* + Array of table->s->fields elements telling how to store fields in the + record. + */ + Rdb_field_encoder *m_encoder_arr; + /* + Array of request fields telling how to decode data in RocksDB format + */ + std::vector<READ_FIELD> m_decoders_vect; + /* + A counter of how many row checksums were checked for this table. Note that + this does not include checksums for secondary index entries. + */ + my_core::ha_rows m_row_checksums_checked; + // buffer to hold data during encode_value_slice + String m_storage_record; +}; +} // namespace myrocks diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc index a3a811e1a6f..e369c08dd51 100644 --- a/storage/rocksdb/rdb_datadic.cc +++ b/storage/rocksdb/rdb_datadic.cc @@ -15,7 +15,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif /* For use of 'PRIu64': */ @@ -45,9 +45,11 @@ #include "./sql_table.h" /* MyRocks header files */ +#include "./ha_rocksdb.h" #include "./ha_rocksdb_proto.h" #include "./my_stacktrace.h" #include "./rdb_cf_manager.h" +#include "./rdb_psi.h" #include "./rdb_utils.h" namespace myrocks { @@ -68,9 +70,229 @@ inline bool field_check_field_name_match(Field *field, const char *name) /* - Rdb_key_def class implementation + Decode current key field + @param fpi IN data structure contains field metadata + @param field IN current field + @param reader IN key slice reader + @param unp_reader IN unpack information reader + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code +*/ +int Rdb_convert_to_record_key_decoder::decode_field( + Rdb_field_packing *fpi, Field *field, Rdb_string_reader *reader, + const uchar *const default_value, Rdb_string_reader *unpack_reader) { + if (fpi->m_maybe_null) { + const char *nullp; + if (!(nullp = reader->read(1))) { + return HA_EXIT_FAILURE; + } + + if (*nullp == 0) { + /* Set the NULL-bit of this field */ + field->set_null(); + /* Also set the field to its default value */ + memcpy(field->ptr, default_value, field->pack_length()); + return HA_EXIT_SUCCESS; + } else if (*nullp == 1) { + field->set_notnull(); + } else { + return HA_EXIT_FAILURE; + } + } + + return (fpi->m_unpack_func)(fpi, field, field->ptr, reader, unpack_reader); +} + +/* + Decode current key field + + @param buf OUT the buf starting address + @param offset OUT the bytes offset when data is written + @param fpi IN data structure contains field metadata + @param table IN current table + @param field IN current field + @param has_unpack_inf IN whether contains unpack inf + @param reader IN key slice reader + @param unp_reader IN unpack information reader + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code */ +int Rdb_convert_to_record_key_decoder::decode( + uchar *const buf, uint *offset, Rdb_field_packing *fpi, TABLE *table, + Field *field, bool has_unpack_info, Rdb_string_reader *reader, + Rdb_string_reader *unpack_reader) { + DBUG_ASSERT(buf != nullptr); + DBUG_ASSERT(offset != nullptr); + + uint field_offset = field->ptr - table->record[0]; + *offset = field_offset; + uint null_offset = field->null_offset(); + bool maybe_null = field->real_maybe_null(); + + field->move_field(buf + field_offset, + maybe_null ? buf + null_offset : nullptr, field->null_bit); + + // If we need unpack info, but there is none, tell the unpack function + // this by passing unp_reader as nullptr. If we never read unpack_info + // during unpacking anyway, then there won't an error. + bool maybe_missing_unpack = !has_unpack_info && fpi->uses_unpack_info(); + + int res = + decode_field(fpi, field, reader, table->s->default_values + field_offset, + maybe_missing_unpack ? nullptr : unpack_reader); + + // Restore field->ptr and field->null_ptr + field->move_field(table->record[0] + field_offset, + maybe_null ? table->record[0] + null_offset : nullptr, + field->null_bit); + if (res != UNPACK_SUCCESS) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + return HA_EXIT_SUCCESS; +} + +/* + Skip current key field + @param fpi IN data structure contains field metadata + @param field IN current field + @param reader IN key slice reader + @param unp_reader IN unpack information reader + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code +*/ +int Rdb_convert_to_record_key_decoder::skip(const Rdb_field_packing *fpi, + const Field *field, + Rdb_string_reader *reader, + Rdb_string_reader *unp_reader) { + /* It is impossible to unpack the column. Skip it. */ + if (fpi->m_maybe_null) { + const char *nullp; + if (!(nullp = reader->read(1))) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + if (*nullp == 0) { + /* This is a NULL value */ + return HA_EXIT_SUCCESS; + } + /* If NULL marker is not '0', it can be only '1' */ + if (*nullp != 1) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + } + if ((fpi->m_skip_func)(fpi, field, reader)) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + // If this is a space padded varchar, we need to skip the indicator + // bytes for trailing bytes. They're useless since we can't restore the + // field anyway. + // + // There is a special case for prefixed varchars where we do not + // generate unpack info, because we know prefixed varchars cannot be + // unpacked. In this case, it is not necessary to skip. + if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad && + !fpi->m_unpack_info_stores_value) { + unp_reader->read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1); + } + return HA_EXIT_SUCCESS; +} + +Rdb_key_field_iterator::Rdb_key_field_iterator( + const Rdb_key_def *key_def, Rdb_field_packing *pack_info, + Rdb_string_reader *reader, Rdb_string_reader *unp_reader, TABLE *table, + bool has_unpack_info, const MY_BITMAP *covered_bitmap, uchar *const buf) { + m_key_def = key_def; + m_pack_info = pack_info; + m_iter_index = 0; + m_iter_end = key_def->get_key_parts(); + m_reader = reader; + m_unp_reader = unp_reader; + m_table = table; + m_has_unpack_info = has_unpack_info; + m_covered_bitmap = covered_bitmap; + m_buf = buf; + m_secondary_key = + (key_def->m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY); + m_hidden_pk_exists = Rdb_key_def::table_has_hidden_pk(table); + m_is_hidden_pk = + (key_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY); + m_curr_bitmap_pos = 0; + m_offset = 0; +} + +void *Rdb_key_field_iterator::get_dst() const { return m_buf + m_offset; } + +int Rdb_key_field_iterator::get_field_index() const { + DBUG_ASSERT(m_field != nullptr); + return m_field->field_index; +} + +bool Rdb_key_field_iterator::get_is_null() const { return m_is_null; } +Field *Rdb_key_field_iterator::get_field() const { + DBUG_ASSERT(m_field != nullptr); + return m_field; +} + +bool Rdb_key_field_iterator::has_next() { return m_iter_index < m_iter_end; } + +/** + Iterate each field in the key and decode/skip one by one +*/ +int Rdb_key_field_iterator::next() { + int status = HA_EXIT_SUCCESS; + while (m_iter_index < m_iter_end) { + int curr_index = m_iter_index++; + + m_fpi = &m_pack_info[curr_index]; + /* + Hidden pk field is packed at the end of the secondary keys, but the SQL + layer does not know about it. Skip retrieving field if hidden pk. + */ + if ((m_secondary_key && m_hidden_pk_exists && + curr_index + 1 == m_iter_end) || + m_is_hidden_pk) { + DBUG_ASSERT(m_fpi->m_unpack_func); + if ((m_fpi->m_skip_func)(m_fpi, nullptr, m_reader)) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + return HA_EXIT_SUCCESS; + } + + m_field = m_fpi->get_field_in_table(m_table); + + bool covered_column = true; + if (m_covered_bitmap != nullptr && + m_field->real_type() == MYSQL_TYPE_VARCHAR && !m_fpi->m_covered) { + covered_column = m_curr_bitmap_pos < MAX_REF_PARTS && + bitmap_is_set(m_covered_bitmap, m_curr_bitmap_pos++); + } + + if (m_fpi->m_unpack_func && covered_column) { + /* It is possible to unpack this column. Do it. */ + status = Rdb_convert_to_record_key_decoder::decode( + m_buf, &m_offset, m_fpi, m_table, m_field, m_has_unpack_info, + m_reader, m_unp_reader); + if (status) { + return status; + } + break; + } else { + status = Rdb_convert_to_record_key_decoder::skip(m_fpi, m_field, m_reader, + m_unp_reader); + if (status) { + return status; + } + } + } + return HA_EXIT_SUCCESS; +} + +/* + Rdb_key_def class implementation +*/ Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg, rocksdb::ColumnFamilyHandle *cf_handle_arg, uint16_t index_dict_version_arg, uchar index_type_arg, @@ -78,16 +300,26 @@ Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg, bool is_per_partition_cf_arg, const char *_name, Rdb_index_stats _stats, uint32 index_flags_bitmap, uint32 ttl_rec_offset, uint64 ttl_duration) - : m_index_number(indexnr_arg), m_cf_handle(cf_handle_arg), + : m_index_number(indexnr_arg), + m_cf_handle(cf_handle_arg), m_index_dict_version(index_dict_version_arg), - m_index_type(index_type_arg), m_kv_format_version(kv_format_version_arg), + m_index_type(index_type_arg), + m_kv_format_version(kv_format_version_arg), m_is_reverse_cf(is_reverse_cf_arg), - m_is_per_partition_cf(is_per_partition_cf_arg), m_name(_name), - m_stats(_stats), m_index_flags_bitmap(index_flags_bitmap), - m_ttl_rec_offset(ttl_rec_offset), m_ttl_duration(ttl_duration), - m_ttl_column(""), m_pk_part_no(nullptr), m_pack_info(nullptr), - m_keyno(keyno_arg), m_key_parts(0), m_ttl_pk_key_part_offset(UINT_MAX), - m_ttl_field_offset(UINT_MAX), m_prefix_extractor(nullptr), + m_is_per_partition_cf(is_per_partition_cf_arg), + m_name(_name), + m_stats(_stats), + m_index_flags_bitmap(index_flags_bitmap), + m_ttl_rec_offset(ttl_rec_offset), + m_ttl_duration(ttl_duration), + m_ttl_column(""), + m_pk_part_no(nullptr), + m_pack_info(nullptr), + m_keyno(keyno_arg), + m_key_parts(0), + m_ttl_pk_key_part_offset(UINT_MAX), + m_ttl_field_index(UINT_MAX), + m_prefix_extractor(nullptr), m_maxlength(0) // means 'not intialized' { mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); @@ -104,16 +336,23 @@ Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg, } Rdb_key_def::Rdb_key_def(const Rdb_key_def &k) - : m_index_number(k.m_index_number), m_cf_handle(k.m_cf_handle), + : m_index_number(k.m_index_number), + m_cf_handle(k.m_cf_handle), m_is_reverse_cf(k.m_is_reverse_cf), - m_is_per_partition_cf(k.m_is_per_partition_cf), m_name(k.m_name), - m_stats(k.m_stats), m_index_flags_bitmap(k.m_index_flags_bitmap), - m_ttl_rec_offset(k.m_ttl_rec_offset), m_ttl_duration(k.m_ttl_duration), - m_ttl_column(k.m_ttl_column), m_pk_part_no(k.m_pk_part_no), - m_pack_info(k.m_pack_info), m_keyno(k.m_keyno), + m_is_per_partition_cf(k.m_is_per_partition_cf), + m_name(k.m_name), + m_stats(k.m_stats), + m_index_flags_bitmap(k.m_index_flags_bitmap), + m_ttl_rec_offset(k.m_ttl_rec_offset), + m_ttl_duration(k.m_ttl_duration), + m_ttl_column(k.m_ttl_column), + m_pk_part_no(k.m_pk_part_no), + m_pack_info(k.m_pack_info), + m_keyno(k.m_keyno), m_key_parts(k.m_key_parts), m_ttl_pk_key_part_offset(k.m_ttl_pk_key_part_offset), - m_ttl_field_offset(UINT_MAX), m_prefix_extractor(k.m_prefix_extractor), + m_ttl_field_index(UINT_MAX), + m_prefix_extractor(k.m_prefix_extractor), m_maxlength(k.m_maxlength) { mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); rdb_netbuf_store_index(m_index_number_storage_form, m_index_number); @@ -172,16 +411,15 @@ void Rdb_key_def::setup(const TABLE *const tbl, KEY *pk_info = nullptr; if (!is_hidden_pk) { key_info = &tbl->key_info[m_keyno]; - if (!hidden_pk_exists) - pk_info = &tbl->key_info[tbl->s->primary_key]; + if (!hidden_pk_exists) pk_info = &tbl->key_info[tbl->s->primary_key]; m_name = std::string(key_info->name.str); } else { m_name = HIDDEN_PK_NAME; } - if (secondary_key) + if (secondary_key) { m_pk_key_parts= hidden_pk_exists ? 1 : pk_info->ext_key_parts; - else { + } else { pk_info = nullptr; m_pk_key_parts = 0; } @@ -207,11 +445,12 @@ void Rdb_key_def::setup(const TABLE *const tbl, m_key_parts += m_pk_key_parts; } - if (secondary_key) + if (secondary_key) { m_pk_part_no = reinterpret_cast<uint *>( my_malloc(sizeof(uint) * m_key_parts, MYF(0))); - else + } else { m_pk_part_no = nullptr; + } const size_t size = sizeof(Rdb_field_packing) * m_key_parts; m_pack_info = @@ -222,7 +461,7 @@ void Rdb_key_def::setup(const TABLE *const tbl, table creation. */ Rdb_key_def::extract_ttl_col(tbl, tbl_def, &m_ttl_column, - &m_ttl_field_offset, true); + &m_ttl_field_index, true); size_t max_len = INDEX_NUMBER_SIZE; int unpack_len = 0; @@ -266,8 +505,7 @@ void Rdb_key_def::setup(const TABLE *const tbl, } } - if (field && field->real_maybe_null()) - max_len += 1; // NULL-byte + if (field && field->real_maybe_null()) max_len += 1; // NULL-byte m_pack_info[dst_i].setup(this, field, keyno_to_set, keypart_to_set, key_part ? key_part->length : 0); @@ -287,8 +525,7 @@ void Rdb_key_def::setup(const TABLE *const tbl, appended to the end of the sk. */ m_pk_part_no[dst_i] = -1; - if (simulating_extkey) - m_pk_part_no[dst_i] = 0; + if (simulating_extkey) m_pk_part_no[dst_i] = 0; } max_len += m_pack_info[dst_i].m_max_image_len; @@ -405,7 +642,7 @@ uint Rdb_key_def::extract_ttl_duration(const TABLE *const table_arg, uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg, std::string *ttl_column, - uint *ttl_field_offset, bool skip_checks) { + uint *ttl_field_index, bool skip_checks) { std::string table_comment(table_arg->s->comment.str, table_arg->s->comment.length); /* @@ -423,7 +660,7 @@ uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg, Field *const field = table_arg->field[i]; if (field_check_field_name_match(field, ttl_col_str.c_str())) { *ttl_column = ttl_col_str; - *ttl_field_offset = i; + *ttl_field_index = i; } } return HA_EXIT_SUCCESS; @@ -439,7 +676,7 @@ uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg, field->key_type() == HA_KEYTYPE_ULONGLONG && !field->real_maybe_null()) { *ttl_column = ttl_col_str; - *ttl_field_offset = i; + *ttl_field_index = i; found = true; break; } @@ -454,9 +691,8 @@ uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg, return HA_EXIT_SUCCESS; } -const std::string -Rdb_key_def::gen_qualifier_for_table(const char *const qualifier, - const std::string &partition_name) { +const std::string Rdb_key_def::gen_qualifier_for_table( + const char *const qualifier, const std::string &partition_name) { bool has_partition = !partition_name.empty(); std::string qualifier_str = ""; @@ -484,8 +720,8 @@ Rdb_key_def::gen_qualifier_for_table(const char *const qualifier, Formats the string and returns the column family name assignment part for a specific partition. */ -const std::string -Rdb_key_def::gen_cf_name_qualifier_for_partition(const std::string &prefix) { +const std::string Rdb_key_def::gen_cf_name_qualifier_for_partition( + const std::string &prefix) { DBUG_ASSERT(!prefix.empty()); return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_CF_NAME_QUALIFIER + @@ -500,8 +736,8 @@ const std::string Rdb_key_def::gen_ttl_duration_qualifier_for_partition( RDB_TTL_DURATION_QUALIFIER + RDB_QUALIFIER_VALUE_SEP; } -const std::string -Rdb_key_def::gen_ttl_col_qualifier_for_partition(const std::string &prefix) { +const std::string Rdb_key_def::gen_ttl_col_qualifier_for_partition( + const std::string &prefix) { DBUG_ASSERT(!prefix.empty()); return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_TTL_COL_QUALIFIER + @@ -604,15 +840,13 @@ int Rdb_key_def::read_memcmp_key_part(const TABLE *table_arg, /* It is impossible to unpack the column. Skip it. */ if (m_pack_info[part_num].m_maybe_null) { const char *nullp; - if (!(nullp = reader->read(1))) - return 1; + if (!(nullp = reader->read(1))) return 1; if (*nullp == 0) { /* This is a NULL value */ return -1; } else { /* If NULL marker is not '0', it can be only '1' */ - if (*nullp != 1) - return 1; + if (*nullp != 1) return 1; } } @@ -622,11 +856,12 @@ int Rdb_key_def::read_memcmp_key_part(const TABLE *table_arg, bool is_hidden_pk_part = (part_num + 1 == m_key_parts) && (table_arg->s->primary_key == MAX_INDEXES); Field *field = nullptr; - if (!is_hidden_pk_part) + if (!is_hidden_pk_part) { field = fpi->get_field_in_table(table_arg); - if ((this->*fpi->m_skip_func)(fpi, field, reader)) + } + if ((fpi->m_skip_func)(fpi, field, reader)) { return 1; - + } return 0; } @@ -661,6 +896,7 @@ uint Rdb_key_def::get_primary_key_tuple(const TABLE *const table, uchar *const pk_buffer) const { DBUG_ASSERT(table != nullptr); DBUG_ASSERT(key != nullptr); + DBUG_ASSERT(m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY); DBUG_ASSERT(pk_buffer); uint size = 0; @@ -679,8 +915,7 @@ uint Rdb_key_def::get_primary_key_tuple(const TABLE *const table, Rdb_string_reader reader(key); // Skip the index number - if ((!reader.read(INDEX_NUMBER_SIZE))) - return RDB_INVALID_KEY_LEN; + if ((!reader.read(INDEX_NUMBER_SIZE))) return RDB_INVALID_KEY_LEN; for (i = 0; i < m_key_parts; i++) { if ((pk_key_part = m_pk_part_no[i]) != -1) { @@ -731,8 +966,7 @@ uint Rdb_key_def::get_memcmp_sk_parts(const TABLE *table, const char *start = reader.get_current_ptr(); // Skip the index number - if ((!reader.read(INDEX_NUMBER_SIZE))) - return RDB_INVALID_KEY_LEN; + if ((!reader.read(INDEX_NUMBER_SIZE))) return RDB_INVALID_KEY_LEN; for (uint i = 0; i < table->key_info[m_keyno].user_defined_key_parts; i++) { if ((res = read_memcmp_key_part(table, &reader, i)) > 0) { @@ -772,8 +1006,7 @@ uint Rdb_key_def::pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer, key_restore(tbl->record[0], key_tuple, &tbl->key_info[m_keyno], key_len); uint n_used_parts = my_count_bits(keypart_map); - if (keypart_map == HA_WHOLE_KEY) - n_used_parts = 0; // Full key is used + if (keypart_map == HA_WHOLE_KEY) n_used_parts = 0; // Full key is used /* Then, convert the record into a mem-comparable form */ return pack_record(tbl, pack_buffer, tbl->record[0], packed_tuple, nullptr, @@ -811,7 +1044,7 @@ bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) { /* @return Number of bytes that were changed */ -int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) { +int Rdb_key_def::successor(uchar *const packed_tuple, const uint len) { DBUG_ASSERT(packed_tuple != nullptr); int changed = 0; @@ -830,7 +1063,7 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) { /* @return Number of bytes that were changed */ -int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint &len) { +int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint len) { DBUG_ASSERT(packed_tuple != nullptr); int changed = 0; @@ -889,30 +1122,30 @@ void Rdb_key_def::get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const { } switch (field->real_type()) { - // This type may be covered depending on the record. If it was requested, - // we require the covered bitmap to have this bit set. - case MYSQL_TYPE_VARCHAR: - if (curr_bitmap_pos < MAX_REF_PARTS) { + // This type may be covered depending on the record. If it was requested, + // we require the covered bitmap to have this bit set. + case MYSQL_TYPE_VARCHAR: + if (curr_bitmap_pos < MAX_REF_PARTS) { + if (bitmap_is_set(table->read_set, field->field_index)) { + bitmap_set_bit(map, curr_bitmap_pos); + bitmap_set_bit(&maybe_covered_bitmap, field->field_index); + } + curr_bitmap_pos++; + } else { + bitmap_free(&maybe_covered_bitmap); + bitmap_free(map); + return; + } + break; + // This column is a type which is never covered. If it was requested, we + // know this lookup will never be covered. + default: if (bitmap_is_set(table->read_set, field->field_index)) { - bitmap_set_bit(map, curr_bitmap_pos); - bitmap_set_bit(&maybe_covered_bitmap, field->field_index); + bitmap_free(&maybe_covered_bitmap); + bitmap_free(map); + return; } - curr_bitmap_pos++; - } else { - bitmap_free(&maybe_covered_bitmap); - bitmap_free(map); - return; - } - break; - // This column is a type which is never covered. If it was requested, we - // know this lookup will never be covered. - default: - if (bitmap_is_set(table->read_set, field->field_index)) { - bitmap_free(&maybe_covered_bitmap); - bitmap_free(map); - return; - } - break; + break; } } @@ -930,8 +1163,7 @@ void Rdb_key_def::get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const { - All values for columns that are prefix-only indexes are shorter or equal in length to the prefix */ -bool Rdb_key_def::covers_lookup(TABLE *const table, - const rocksdb::Slice *const unpack_info, +bool Rdb_key_def::covers_lookup(const rocksdb::Slice *const unpack_info, const MY_BITMAP *const lookup_bitmap) const { DBUG_ASSERT(lookup_bitmap != nullptr); if (!use_covered_bitmap_format() || lookup_bitmap->bitmap == nullptr) { @@ -960,6 +1192,14 @@ bool Rdb_key_def::covers_lookup(TABLE *const table, return bitmap_is_subset(lookup_bitmap, &covered_bitmap); } +/* Indicates that all key parts can be unpacked to cover a secondary lookup */ +bool Rdb_key_def::can_cover_lookup() const { + for (uint i = 0; i < m_key_parts; i++) { + if (!m_pack_info[i].m_covered) return false; + } + return true; +} + uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info, uchar *tuple, uchar *const packed_tuple, uchar *const pack_buffer, @@ -971,8 +1211,7 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info, /* NULL value. store '\0' so that it sorts before non-NULL values */ *tuple++ = 0; /* That's it, don't store anything else */ - if (n_null_fields) - (*n_null_fields)++; + if (n_null_fields) (*n_null_fields)++; return tuple; } else { /* Not a NULL value. Store '1' */ @@ -986,16 +1225,15 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info, Rdb_pack_field_context pack_ctx(unpack_info); // Set the offset for methods which do not take an offset as an argument - DBUG_ASSERT(is_storage_available(tuple - packed_tuple, - pack_info->m_max_image_len)); + DBUG_ASSERT( + is_storage_available(tuple - packed_tuple, pack_info->m_max_image_len)); - (this->*pack_info->m_pack_func)(pack_info, field, pack_buffer, &tuple, - &pack_ctx); + (pack_info->m_pack_func)(pack_info, field, pack_buffer, &tuple, &pack_ctx); /* Make "unpack info" to be stored in the value */ if (create_unpack_info) { - (this->*pack_info->m_make_unpack_info_func)(pack_info->m_charset_codec, - field, &pack_ctx); + (pack_info->m_make_unpack_info_func)(pack_info->m_charset_codec, field, + &pack_ctx); } return tuple; @@ -1014,8 +1252,8 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info, unpack_info_len OUT Unpack data length n_key_parts Number of keyparts to process. 0 means all of them. n_null_fields OUT Number of key fields with NULL value. - ttl_pk_offset OUT Offset of the ttl column if specified and in the key - + ttl_bytes IN Previous ttl bytes from old record for update case or + current ttl bytes from just packed primary key/value @detail Some callers do not need the unpack information, they can pass unpack_info=nullptr, unpack_info_len=nullptr. @@ -1024,12 +1262,14 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info, Length of the packed tuple */ -uint Rdb_key_def::pack_record( - const TABLE *const tbl, uchar *const pack_buffer, const uchar *const record, - uchar *const packed_tuple, Rdb_string_writer *const unpack_info, - const bool &should_store_row_debug_checksums, const longlong &hidden_pk_id, - uint n_key_parts, uint *const n_null_fields, uint *const ttl_pk_offset, - const char *const ttl_bytes) const { +uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer, + const uchar *const record, + uchar *const packed_tuple, + Rdb_string_writer *const unpack_info, + const bool should_store_row_debug_checksums, + const longlong hidden_pk_id, uint n_key_parts, + uint *const n_null_fields, + const char *const ttl_bytes) const { DBUG_ASSERT(tbl != nullptr); DBUG_ASSERT(pack_buffer != nullptr); DBUG_ASSERT(record != nullptr); @@ -1056,13 +1296,13 @@ uint Rdb_key_def::pack_record( // If hidden pk exists, but hidden pk wasnt passed in, we can't pack the // hidden key part. So we skip it (its always 1 part). - if (hidden_pk_exists && !hidden_pk_id && use_all_columns) + if (hidden_pk_exists && !hidden_pk_id && use_all_columns) { n_key_parts = m_key_parts - 1; - else if (use_all_columns) + } else if (use_all_columns) { n_key_parts = m_key_parts; + } - if (n_null_fields) - *n_null_fields = 0; + if (n_null_fields) *n_null_fields = 0; // Check if we need a covered bitmap. If it is certain that all key parts are // covering, we don't need one. @@ -1130,18 +1370,9 @@ uint Rdb_key_def::pack_record( uint null_offset = field->null_offset(tbl->record[0]); bool maybe_null = field->real_maybe_null(); - // Save the ttl duration offset in the key so we can store it in front of - // the record later. - if (ttl_pk_offset && m_ttl_duration > 0 && i == m_ttl_pk_key_part_offset) { - DBUG_ASSERT(field_check_field_name_match(field, m_ttl_column.c_str())); - DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG); - DBUG_ASSERT(field->key_type() == HA_KEYTYPE_ULONGLONG); - DBUG_ASSERT(!field->real_maybe_null()); - *ttl_pk_offset = tuple - packed_tuple; - } - - field->move_field(const_cast<uchar*>(record) + field_offset, - maybe_null ? const_cast<uchar*>(record) + null_offset : nullptr, + field->move_field( + const_cast<uchar *>(record) + field_offset, + maybe_null ? const_cast<uchar *>(record) + null_offset : nullptr, field->null_bit); // WARNING! Don't return without restoring field->ptr and field->null_ptr @@ -1226,7 +1457,7 @@ uint Rdb_key_def::pack_record( Length of the packed tuple */ -uint Rdb_key_def::pack_hidden_pk(const longlong &hidden_pk_id, +uint Rdb_key_def::pack_hidden_pk(const longlong hidden_pk_id, uchar *const packed_tuple) const { DBUG_ASSERT(packed_tuple != nullptr); @@ -1250,7 +1481,7 @@ uint Rdb_key_def::pack_hidden_pk(const longlong &hidden_pk_id, void Rdb_key_def::pack_with_make_sort_key( Rdb_field_packing *const fpi, Field *const field, uchar *const buf MY_ATTRIBUTE((__unused__)), uchar **dst, - Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const { + Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) { DBUG_ASSERT(fpi != nullptr); DBUG_ASSERT(field != nullptr); DBUG_ASSERT(dst != nullptr); @@ -1290,11 +1521,9 @@ int Rdb_key_def::compare_keys(const rocksdb::Slice *key1, Rdb_string_reader reader2(key2); // Skip the index number - if ((!reader1.read(INDEX_NUMBER_SIZE))) - return HA_EXIT_FAILURE; + if ((!reader1.read(INDEX_NUMBER_SIZE))) return HA_EXIT_FAILURE; - if ((!reader2.read(INDEX_NUMBER_SIZE))) - return HA_EXIT_FAILURE; + if ((!reader2.read(INDEX_NUMBER_SIZE))) return HA_EXIT_FAILURE; for (uint i = 0; i < m_key_parts; i++) { const Rdb_field_packing *const fpi = &m_pack_info[i]; @@ -1320,10 +1549,12 @@ int Rdb_key_def::compare_keys(const rocksdb::Slice *key1, const auto before_skip1 = reader1.get_current_ptr(); const auto before_skip2 = reader2.get_current_ptr(); DBUG_ASSERT(fpi->m_skip_func); - if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader1)) + if ((fpi->m_skip_func)(fpi, nullptr, &reader1)) { return HA_EXIT_FAILURE; - if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader2)) + } + if ((fpi->m_skip_func)(fpi, nullptr, &reader2)) { return HA_EXIT_FAILURE; + } const auto size1 = reader1.get_current_ptr() - before_skip1; const auto size2 = reader2.get_current_ptr() - before_skip2; if (size1 != size2) { @@ -1355,48 +1586,20 @@ size_t Rdb_key_def::key_length(const TABLE *const table, Rdb_string_reader reader(&key); - if ((!reader.read(INDEX_NUMBER_SIZE))) + if ((!reader.read(INDEX_NUMBER_SIZE))) { return size_t(-1); - + } for (uint i = 0; i < m_key_parts; i++) { const Rdb_field_packing *fpi = &m_pack_info[i]; const Field *field = nullptr; - if (m_index_type != INDEX_TYPE_HIDDEN_PRIMARY) + if (m_index_type != INDEX_TYPE_HIDDEN_PRIMARY) { field = fpi->get_field_in_table(table); - if ((this->*fpi->m_skip_func)(fpi, field, &reader)) - return size_t(-1); - } - return key.size() - reader.remaining_bytes(); -} - -int Rdb_key_def::unpack_field( - Rdb_field_packing *const fpi, - Field *const field, - Rdb_string_reader* reader, - const uchar *const default_value, - Rdb_string_reader* unp_reader) const -{ - if (fpi->m_maybe_null) { - const char *nullp; - if (!(nullp = reader->read(1))) { - return HA_EXIT_FAILURE; } - - if (*nullp == 0) { - /* Set the NULL-bit of this field */ - field->set_null(); - /* Also set the field to its default value */ - memcpy(field->ptr, default_value, field->pack_length()); - return HA_EXIT_SUCCESS; - } else if (*nullp == 1) { - field->set_notnull(); - } else { - return HA_EXIT_FAILURE; + if ((fpi->m_skip_func)(fpi, field, &reader)) { + return size_t(-1); } } - - return (this->*fpi->m_unpack_func)(fpi, field, field->ptr, reader, - unp_reader); + return key.size() - reader.remaining_bytes(); } /* @@ -1413,34 +1616,37 @@ int Rdb_key_def::unpack_field( int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, const rocksdb::Slice *const packed_key, const rocksdb::Slice *const unpack_info, - const bool &verify_row_debug_checksums) const { + const bool verify_row_debug_checksums) const { Rdb_string_reader reader(packed_key); Rdb_string_reader unp_reader = Rdb_string_reader::read_or_empty(unpack_info); - const bool is_hidden_pk = (m_index_type == INDEX_TYPE_HIDDEN_PRIMARY); - const bool hidden_pk_exists = table_has_hidden_pk(table); - const bool secondary_key = (m_index_type == INDEX_TYPE_SECONDARY); // There is no checksuming data after unpack_info for primary keys, because // the layout there is different. The checksum is verified in // ha_rocksdb::convert_record_from_storage_format instead. - DBUG_ASSERT_IMP(!secondary_key, !verify_row_debug_checksums); + DBUG_ASSERT_IMP(!(m_index_type == INDEX_TYPE_SECONDARY), + !verify_row_debug_checksums); // Skip the index number if ((!reader.read(INDEX_NUMBER_SIZE))) { return HA_ERR_ROCKSDB_CORRUPT_DATA; } - // For secondary keys, we expect the value field to contain unpack data and - // checksum data in that order. One or both can be missing, but they cannot - // be reordered. + // For secondary keys, we expect the value field to contain index flags, + // unpack data, and checksum data in that order. One or all can be missing, + // but they cannot be reordered. + if (unp_reader.remaining_bytes()) { + if (m_index_type == INDEX_TYPE_SECONDARY && + m_total_index_flags_length > 0 && + !unp_reader.read(m_total_index_flags_length)) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + } + const char *unpack_header = unp_reader.get_current_ptr(); - const bool has_unpack_info = + bool has_unpack_info = unp_reader.remaining_bytes() && is_unpack_data_tag(unpack_header[0]); if (has_unpack_info) { - if ((m_index_type == INDEX_TYPE_SECONDARY && - m_total_index_flags_length > 0 && - !unp_reader.read(m_total_index_flags_length)) || - !unp_reader.read(get_unpack_header_size(unpack_header[0]))) { + if (!unp_reader.read(get_unpack_header_size(unpack_header[0]))) { return HA_ERR_ROCKSDB_CORRUPT_DATA; } } @@ -1448,9 +1654,7 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, // Read the covered bitmap MY_BITMAP covered_bitmap; my_bitmap_map covered_bits; - uint curr_bitmap_pos = 0; - - const bool has_covered_bitmap = + bool has_covered_bitmap = has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG); if (has_covered_bitmap) { bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false); @@ -1459,87 +1663,16 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, RDB_UNPACK_COVERED_DATA_LEN_SIZE); } - for (uint i = 0; i < m_key_parts; i++) { - Rdb_field_packing *const fpi = &m_pack_info[i]; - - /* - Hidden pk field is packed at the end of the secondary keys, but the SQL - layer does not know about it. Skip retrieving field if hidden pk. - */ - if ((secondary_key && hidden_pk_exists && i + 1 == m_key_parts) || - is_hidden_pk) { - DBUG_ASSERT(fpi->m_unpack_func); - if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader)) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - continue; - } + int err = HA_EXIT_SUCCESS; - Field *const field = fpi->get_field_in_table(table); - bool covered_column = true; - if (has_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR && - !m_pack_info[i].m_covered) { - covered_column = curr_bitmap_pos < MAX_REF_PARTS && - bitmap_is_set(&covered_bitmap, curr_bitmap_pos); - curr_bitmap_pos++; - } - if (fpi->m_unpack_func && covered_column) { - /* It is possible to unpack this column. Do it. */ - - uint field_offset = field->ptr - table->record[0]; - uint null_offset = field->null_offset(); - bool maybe_null = field->real_maybe_null(); - field->move_field(buf + field_offset, - maybe_null ? buf + null_offset : nullptr, - field->null_bit); - // WARNING! Don't return without restoring field->ptr and field->null_ptr - - // If we need unpack info, but there is none, tell the unpack function - // this by passing unp_reader as nullptr. If we never read unpack_info - // during unpacking anyway, then there won't an error. - const bool maybe_missing_unpack = - !has_unpack_info && fpi->uses_unpack_info(); - int res = unpack_field(fpi, field, &reader, - table->s->default_values + field_offset, - maybe_missing_unpack ? nullptr : &unp_reader); - - // Restore field->ptr and field->null_ptr - field->move_field(table->record[0] + field_offset, - maybe_null ? table->record[0] + null_offset : nullptr, - field->null_bit); - - if (res != UNPACK_SUCCESS) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - } else { - /* It is impossible to unpack the column. Skip it. */ - if (fpi->m_maybe_null) { - const char *nullp; - if (!(nullp = reader.read(1))) - return HA_ERR_ROCKSDB_CORRUPT_DATA; - if (*nullp == 0) { - /* This is a NULL value */ - continue; - } - /* If NULL marker is not '0', it can be only '1' */ - if (*nullp != 1) - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - if ((this->*fpi->m_skip_func)(fpi, field, &reader)) - return HA_ERR_ROCKSDB_CORRUPT_DATA; - - // If this is a space padded varchar, we need to skip the indicator - // bytes for trailing bytes. They're useless since we can't restore the - // field anyway. - // - // There is a special case for prefixed varchars where we do not - // generate unpack info, because we know prefixed varchars cannot be - // unpacked. In this case, it is not necessary to skip. - if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad && - !fpi->m_unpack_info_stores_value) { - unp_reader.read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1); - } + Rdb_key_field_iterator iter( + this, m_pack_info, &reader, &unp_reader, table, has_unpack_info, + has_covered_bitmap ? &covered_bitmap : nullptr, buf); + while (iter.has_next()) { + err = iter.next(); + if (err) { + return err; } } @@ -1578,8 +1711,7 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, } } - if (reader.remaining_bytes()) - return HA_ERR_ROCKSDB_CORRUPT_DATA; + if (reader.remaining_bytes()) return HA_ERR_ROCKSDB_CORRUPT_DATA; return HA_EXIT_SUCCESS; } @@ -1588,7 +1720,7 @@ bool Rdb_key_def::table_has_hidden_pk(const TABLE *const table) { return table->s->primary_key == MAX_INDEXES; } -void Rdb_key_def::report_checksum_mismatch(const bool &is_key, +void Rdb_key_def::report_checksum_mismatch(const bool is_key, const char *const data, const size_t data_size) const { // NO_LINT_DEBUG @@ -1603,17 +1735,17 @@ void Rdb_key_def::report_checksum_mismatch(const bool &is_key, my_error(ER_INTERNAL_ERROR, MYF(0), "Record checksum mismatch"); } -bool Rdb_key_def::index_format_min_check(const int &pk_min, - const int &sk_min) const { +bool Rdb_key_def::index_format_min_check(const int pk_min, + const int sk_min) const { switch (m_index_type) { - case INDEX_TYPE_PRIMARY: - case INDEX_TYPE_HIDDEN_PRIMARY: - return (m_kv_format_version >= pk_min); - case INDEX_TYPE_SECONDARY: - return (m_kv_format_version >= sk_min); - default: - DBUG_ASSERT(0); - return false; + case INDEX_TYPE_PRIMARY: + case INDEX_TYPE_HIDDEN_PRIMARY: + return (m_kv_format_version >= pk_min); + case INDEX_TYPE_SECONDARY: + return (m_kv_format_version >= sk_min); + default: + DBUG_ASSERT(0); + return false; } } @@ -1628,9 +1760,8 @@ bool Rdb_key_def::index_format_min_check(const int &pk_min, int Rdb_key_def::skip_max_length(const Rdb_field_packing *const fpi, const Field *const field MY_ATTRIBUTE((__unused__)), - Rdb_string_reader *const reader) const { - if (!reader->read(fpi->m_max_image_len)) - return HA_EXIT_FAILURE; + Rdb_string_reader *const reader) { + if (!reader->read(fpi->m_max_image_len)) return HA_EXIT_FAILURE; return HA_EXIT_SUCCESS; } @@ -1639,27 +1770,26 @@ int Rdb_key_def::skip_max_length(const Rdb_field_packing *const fpi, split in the middle of an UTF-8 character. See the implementation of unpack_binary_or_utf8_varchar. */ - #define RDB_ESCAPE_LENGTH 9 #define RDB_LEGACY_ESCAPE_LENGTH RDB_ESCAPE_LENGTH static_assert((RDB_ESCAPE_LENGTH - 1) % 2 == 0, "RDB_ESCAPE_LENGTH-1 must be even."); -#define RDB_ENCODED_SIZE(len) \ - ((len + (RDB_ESCAPE_LENGTH - 2)) / (RDB_ESCAPE_LENGTH - 1)) * \ +#define RDB_ENCODED_SIZE(len) \ + ((len + (RDB_ESCAPE_LENGTH - 2)) / (RDB_ESCAPE_LENGTH - 1)) * \ RDB_ESCAPE_LENGTH -#define RDB_LEGACY_ENCODED_SIZE(len) \ - ((len + (RDB_LEGACY_ESCAPE_LENGTH - 1)) / (RDB_LEGACY_ESCAPE_LENGTH - 1)) * \ +#define RDB_LEGACY_ENCODED_SIZE(len) \ + ((len + (RDB_LEGACY_ESCAPE_LENGTH - 1)) / (RDB_LEGACY_ESCAPE_LENGTH - 1)) * \ RDB_LEGACY_ESCAPE_LENGTH /* Function of type rdb_index_field_skip_t */ -int Rdb_key_def::skip_variable_length( - const Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)), - const Field *const field, Rdb_string_reader *const reader) const { +int Rdb_key_def::skip_variable_length(const Rdb_field_packing *const fpi, + const Field *const field, + Rdb_string_reader *const reader) { const uchar *ptr; bool finished = false; @@ -1672,7 +1802,7 @@ int Rdb_key_def::skip_variable_length( dst_len = UINT_MAX; } - bool use_legacy_format = use_legacy_varbinary_format(); + bool use_legacy_format = fpi->m_use_legacy_varbinary_format; /* Decode the length-emitted encoding here */ while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) { @@ -1713,9 +1843,9 @@ const int VARCHAR_CMP_GREATER_THAN_SPACES = 3; Skip a keypart that uses Variable-Length Space-Padded encoding */ -int Rdb_key_def::skip_variable_space_pad( - const Rdb_field_packing *const fpi, const Field *const field, - Rdb_string_reader *const reader) const { +int Rdb_key_def::skip_variable_space_pad(const Rdb_field_packing *const fpi, + const Field *const field, + Rdb_string_reader *const reader) { const uchar *ptr; bool finished = false; @@ -1760,31 +1890,33 @@ int Rdb_key_def::skip_variable_space_pad( int Rdb_key_def::unpack_integer( Rdb_field_packing *const fpi, Field *const field, uchar *const to, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const { + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { const int length = fpi->m_max_image_len; const uchar *from; - if (!(from = (const uchar *)reader->read(length))) + if (!(from = (const uchar *)reader->read(length))) { return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ + } #ifdef WORDS_BIGENDIAN { - if (((Field_num *)field)->unsigned_flag) + if (static_cast<Field_num *>(field)->unsigned_flag) { to[0] = from[0]; - else - to[0] = (char)(from[0] ^ 128); // Reverse the sign bit. + } else { + to[0] = static_cast<char>(from[0] ^ 128); // Reverse the sign bit. + } memcpy(to + 1, from + 1, length - 1); } #else { const int sign_byte = from[0]; - if (((Field_num *)field)->unsigned_flag) + if (static_cast<Field_num *>(field)->unsigned_flag) { to[length - 1] = sign_byte; - else + } else { to[length - 1] = - static_cast<char>(sign_byte ^ 128); // Reverse the sign bit. - for (int i = 0, j = length - 1; i < length - 1; ++i, --j) - to[i] = from[j]; + static_cast<char>(sign_byte ^ 128); // Reverse the sign bit. + } + for (int i = 0, j = length - 1; i < length - 1; ++i, --j) to[i] = from[j]; } #endif return UNPACK_SUCCESS; @@ -1826,13 +1958,14 @@ static void rdb_swap_float_bytes(uchar *const dst, const uchar *const src) { #endif int Rdb_key_def::unpack_floating_point( - uchar *const dst, Rdb_string_reader *const reader, const size_t &size, - const int &exp_digit, const uchar *const zero_pattern, - const uchar *const zero_val, - void (*swap_func)(uchar *, const uchar *)) const { + uchar *const dst, Rdb_string_reader *const reader, const size_t size, + const int exp_digit, const uchar *const zero_pattern, + const uchar *const zero_val, void (*swap_func)(uchar *, const uchar *)) { const uchar *const from = (const uchar *)reader->read(size); - if (from == nullptr) - return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ + if (from == nullptr) { + /* Mem-comparable image doesn't have enough bytes */ + return UNPACK_FAILURE; + } /* Check to see if the value is zero */ if (memcmp(from, zero_pattern, size) == 0) { @@ -1854,15 +1987,14 @@ int Rdb_key_def::unpack_floating_point( // If the high bit is set the original value was positive so // remove the high bit and subtract one from the exponent. ushort exp_part = ((ushort)tmp[0] << 8) | (ushort)tmp[1]; - exp_part &= 0x7FFF; // clear high bit; - exp_part -= (ushort)1 << (16 - 1 - exp_digit); // subtract from exponent + exp_part &= 0x7FFF; // clear high bit; + exp_part -= (ushort)1 << (16 - 1 - exp_digit); // subtract from exponent tmp[0] = (uchar)(exp_part >> 8); tmp[1] = (uchar)exp_part; } else { // Otherwise the original value was negative and all bytes have been // negated. - for (size_t ii = 0; ii < size; ii++) - tmp[ii] ^= 0xFF; + for (size_t ii = 0; ii < size; ii++) tmp[ii] ^= 0xFF; } #if !defined(WORDS_BIGENDIAN) @@ -1891,7 +2023,7 @@ int Rdb_key_def::unpack_double( Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)), Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const { + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { static double zero_val = 0.0; static const uchar zero_pattern[8] = {128, 0, 0, 0, 0, 0, 0, 0}; @@ -1915,7 +2047,7 @@ int Rdb_key_def::unpack_double( int Rdb_key_def::unpack_float( Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const { + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { static float zero_val = 0.0; static const uchar zero_pattern[4] = {128, 0, 0, 0}; @@ -1932,12 +2064,14 @@ int Rdb_key_def::unpack_float( int Rdb_key_def::unpack_newdate( Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const { + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { const char *from; DBUG_ASSERT(fpi->m_max_image_len == 3); - if (!(from = reader->read(3))) - return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ + if (!(from = reader->read(3))) { + /* Mem-comparable image doesn't have enough bytes */ + return UNPACK_FAILURE; + } field_ptr[0] = from[2]; field_ptr[1] = from[1]; @@ -1954,10 +2088,12 @@ int Rdb_key_def::unpack_newdate( int Rdb_key_def::unpack_binary_str( Rdb_field_packing *const fpi, Field *const field, uchar *const to, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const { + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { const char *from; - if (!(from = reader->read(fpi->m_max_image_len))) - return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ + if (!(from = reader->read(fpi->m_max_image_len))) { + /* Mem-comparable image doesn't have enough bytes */ + return UNPACK_FAILURE; + } memcpy(to, from, fpi->m_max_image_len); return UNPACK_SUCCESS; @@ -1972,11 +2108,13 @@ int Rdb_key_def::unpack_binary_str( int Rdb_key_def::unpack_utf8_str( Rdb_field_packing *const fpi, Field *const field, uchar *dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const { + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { my_core::CHARSET_INFO *const cset = (my_core::CHARSET_INFO *)field->charset(); const uchar *src; - if (!(src = (const uchar *)reader->read(fpi->m_max_image_len))) - return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ + if (!(src = (const uchar *)reader->read(fpi->m_max_image_len))) { + /* Mem-comparable image doesn't have enough bytes */ + return UNPACK_FAILURE; + } const uchar *const src_end = src + fpi->m_max_image_len; uchar *const dst_end = dst + field->pack_length(); @@ -1986,8 +2124,7 @@ int Rdb_key_def::unpack_utf8_str( src += 2; int res = cset->cset->wc_mb(cset, wc, dst, dst_end); DBUG_ASSERT(res > 0 && res <= 3); - if (res < 0) - return UNPACK_FAILURE; + if (res < 0) return UNPACK_FAILURE; dst += res; } @@ -2015,9 +2152,9 @@ int Rdb_key_def::unpack_utf8_str( See pack_variable_format for the newer algorithm. */ void Rdb_key_def::pack_legacy_variable_format( - const uchar *src, // The data to encode - size_t src_len, // The length of the data to encode - uchar **dst) const // The location to encode the data + const uchar *src, // The data to encode + size_t src_len, // The length of the data to encode + uchar **dst) // The location to encode the data { size_t copy_len; size_t padding_bytes; @@ -2067,9 +2204,9 @@ void Rdb_key_def::pack_legacy_variable_format( - 10 bytes is encoded as X X X X X X X X 9 X X 0 0 0 0 0 0 2 */ void Rdb_key_def::pack_variable_format( - const uchar *src, // The data to encode - size_t src_len, // The length of the data to encode - uchar **dst) const // The location to encode the data + const uchar *src, // The data to encode + size_t src_len, // The length of the data to encode + uchar **dst) // The location to encode the data { uchar *ptr = *dst; @@ -2108,7 +2245,7 @@ void Rdb_key_def::pack_variable_format( void Rdb_key_def::pack_with_varchar_encoding( Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst, - Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const { + Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) { const CHARSET_INFO *const charset = field->charset(); Field_varstring *const field_var = (Field_varstring *)field; @@ -2120,7 +2257,7 @@ void Rdb_key_def::pack_with_varchar_encoding( field_var->ptr + field_var->length_bytes, value_length, 0); /* Got a mem-comparable image in 'buf'. Now, produce varlength encoding */ - if (use_legacy_varbinary_format()) { + if (fpi->m_use_legacy_varbinary_format) { pack_legacy_variable_format(buf, xfrm_len, dst); } else { pack_variable_format(buf, xfrm_len, dst); @@ -2132,14 +2269,13 @@ void Rdb_key_def::pack_with_varchar_encoding( sequence of strings in space_xfrm */ -static int -rdb_compare_string_with_spaces(const uchar *buf, const uchar *const buf_end, - const std::vector<uchar> *const space_xfrm) { +static int rdb_compare_string_with_spaces( + const uchar *buf, const uchar *const buf_end, + const std::vector<uchar> *const space_xfrm) { int cmp = 0; while (buf < buf_end) { size_t bytes = std::min((size_t)(buf_end - buf), space_xfrm->size()); - if ((cmp = memcmp(buf, space_xfrm->data(), bytes)) != 0) - break; + if ((cmp = memcmp(buf, space_xfrm->data(), bytes)) != 0) break; buf += bytes; } return cmp; @@ -2219,7 +2355,7 @@ static const int RDB_TRIMMED_CHARS_OFFSET = 8; void Rdb_key_def::pack_with_varchar_space_pad( Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst, - Rdb_pack_field_context *const pack_ctx) const { + Rdb_pack_field_context *const pack_ctx) { Rdb_string_writer *const unpack_info = pack_ctx->writer; const CHARSET_INFO *const charset = field->charset(); const auto field_var = static_cast<Field_varstring *>(field); @@ -2252,7 +2388,7 @@ void Rdb_key_def::pack_with_varchar_space_pad( if (padding_bytes) { memcpy(ptr, fpi->space_xfrm->data(), padding_bytes); ptr += padding_bytes; - *ptr = VARCHAR_CMP_EQUAL_TO_SPACES; // last segment + *ptr = VARCHAR_CMP_EQUAL_TO_SPACES; // last segment } else { // Compare the string suffix with a hypothetical infinite string of // spaces. It could be that the first difference is beyond the end of @@ -2260,19 +2396,18 @@ void Rdb_key_def::pack_with_varchar_space_pad( const int cmp = rdb_compare_string_with_spaces(buf, buf_end, fpi->space_xfrm); - if (cmp < 0) + if (cmp < 0) { *ptr = VARCHAR_CMP_LESS_THAN_SPACES; - else if (cmp > 0) + } else if (cmp > 0) { *ptr = VARCHAR_CMP_GREATER_THAN_SPACES; - else { + } else { // It turns out all the rest are spaces. *ptr = VARCHAR_CMP_EQUAL_TO_SPACES; } } encoded_size += fpi->m_segment_size; - if (*(ptr++) == VARCHAR_CMP_EQUAL_TO_SPACES) - break; + if (*(ptr++) == VARCHAR_CMP_EQUAL_TO_SPACES) break; } // m_unpack_info_stores_value means unpack_info stores the whole original @@ -2305,8 +2440,7 @@ void Rdb_key_def::pack_with_varchar_space_pad( last chunk in the input. This is based on the old legacy format - see pack_legacy_variable_format. */ -uint Rdb_key_def::calc_unpack_legacy_variable_format(uchar flag, - bool *done) const { +uint Rdb_key_def::calc_unpack_legacy_variable_format(uchar flag, bool *done) { uint pad = 255 - flag; uint used_bytes = RDB_LEGACY_ESCAPE_LENGTH - 1 - pad; if (used_bytes > RDB_LEGACY_ESCAPE_LENGTH - 1) { @@ -2322,7 +2456,7 @@ uint Rdb_key_def::calc_unpack_legacy_variable_format(uchar flag, last chunk in the input. This is based on the new format - see pack_variable_format. */ -uint Rdb_key_def::calc_unpack_variable_format(uchar flag, bool *done) const { +uint Rdb_key_def::calc_unpack_variable_format(uchar flag, bool *done) { // Check for invalid flag values if (flag > RDB_ESCAPE_LENGTH) { return (uint)-1; @@ -2345,13 +2479,13 @@ uint Rdb_key_def::calc_unpack_variable_format(uchar flag, bool *done) const { treated as a wide-character and converted to its multibyte equivalent in the output. */ -static int -unpack_charset(const CHARSET_INFO *cset, // character set information - const uchar *src, // source data to unpack - uint src_len, // length of source data - uchar *dst, // destination of unpacked data - uint dst_len, // length of destination data - uint *used_bytes) // output number of bytes used +static int unpack_charset( + const CHARSET_INFO *cset, // character set information + const uchar *src, // source data to unpack + uint src_len, // length of source data + uchar *dst, // destination of unpacked data + uint dst_len, // length of destination data + uint *used_bytes) // output number of bytes used { if (src_len & 1) { /* @@ -2386,7 +2520,7 @@ unpack_charset(const CHARSET_INFO *cset, // character set information int Rdb_key_def::unpack_binary_or_utf8_varchar( Rdb_field_packing *const fpi, Field *const field, uchar *dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const { + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { const uchar *ptr; size_t len = 0; bool finished = false; @@ -2396,7 +2530,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar( // How much we can unpack size_t dst_len = field_var->pack_length() - field_var->length_bytes; - bool use_legacy_format = use_legacy_varbinary_format(); + bool use_legacy_format = fpi->m_use_legacy_varbinary_format; /* Decode the length-emitted encoding here */ while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) { @@ -2460,8 +2594,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar( */ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad( Rdb_field_packing *const fpi, Field *const field, uchar *dst, - Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const { + Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader) { const uchar *ptr; size_t len = 0; bool finished = false; @@ -2482,8 +2615,9 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad( space_padding_bytes = -(static_cast<int>(extra_spaces) - RDB_TRIMMED_CHARS_OFFSET); extra_spaces = 0; - } else + } else { extra_spaces -= RDB_TRIMMED_CHARS_OFFSET; + } space_padding_bytes *= fpi->space_xfrm_len; @@ -2491,16 +2625,17 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad( while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) { const char last_byte = ptr[fpi->m_segment_size - 1]; size_t used_bytes; - if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) // this is the last segment + if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) // this is the last segment { - if (space_padding_bytes > (fpi->m_segment_size - 1)) - return UNPACK_FAILURE; // Cannot happen, corrupted data + if (space_padding_bytes > (fpi->m_segment_size - 1)) { + return UNPACK_FAILURE; // Cannot happen, corrupted data + } used_bytes = (fpi->m_segment_size - 1) - space_padding_bytes; finished = true; } else { if (last_byte != VARCHAR_CMP_LESS_THAN_SPACES && last_byte != VARCHAR_CMP_GREATER_THAN_SPACES) { - return UNPACK_FAILURE; // Invalid value + return UNPACK_FAILURE; // Invalid value } used_bytes = fpi->m_segment_size - 1; } @@ -2523,14 +2658,12 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad( const CHARSET_INFO *cset = fpi->m_varchar_charset; int res = cset->cset->wc_mb(cset, wc, dst, dst_end); DBUG_ASSERT(res <= 3); - if (res <= 0) - return UNPACK_FAILURE; + if (res <= 0) return UNPACK_FAILURE; dst += res; len += res; } } else { - if (dst + used_bytes > dst_end) - return UNPACK_FAILURE; + if (dst + used_bytes > dst_end) return UNPACK_FAILURE; memcpy(dst, ptr, used_bytes); dst += used_bytes; len += used_bytes; @@ -2540,8 +2673,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad( if (extra_spaces) { // Both binary and UTF-8 charset store space as ' ', // so the following is ok: - if (dst + extra_spaces > dst_end) - return UNPACK_FAILURE; + if (dst + extra_spaces > dst_end) return UNPACK_FAILURE; memset(dst, fpi->m_varchar_charset->pad_char, extra_spaces); len += extra_spaces; } @@ -2549,8 +2681,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad( } } - if (!finished) - return UNPACK_FAILURE; + if (!finished) return UNPACK_FAILURE; /* Save the length */ if (field_var->length_bytes == 1) { @@ -2570,7 +2701,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad( void Rdb_key_def::make_unpack_unknown( const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)), - const Field *const field, Rdb_pack_field_context *const pack_ctx) const { + const Field *const field, Rdb_pack_field_context *const pack_ctx) { pack_ctx->writer->write(field->ptr, field->pack_length()); } @@ -2585,7 +2716,7 @@ void Rdb_key_def::make_unpack_unknown( void Rdb_key_def::dummy_make_unpack_info( const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)), const Field *field MY_ATTRIBUTE((__unused__)), - Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) const { + Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) { // Do nothing } @@ -2596,7 +2727,7 @@ void Rdb_key_def::dummy_make_unpack_info( int Rdb_key_def::unpack_unknown(Rdb_field_packing *const fpi, Field *const field, uchar *const dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const { + Rdb_string_reader *const unp_reader) { const uchar *ptr; const uint len = fpi->m_unpack_data_len; // We don't use anything from the key, so skip over it. @@ -2619,7 +2750,7 @@ int Rdb_key_def::unpack_unknown(Rdb_field_packing *const fpi, void Rdb_key_def::make_unpack_unknown_varchar( const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)), - const Field *const field, Rdb_pack_field_context *const pack_ctx) const { + const Field *const field, Rdb_pack_field_context *const pack_ctx) { const auto f = static_cast<const Field_varstring *>(field); uint len = f->length_bytes == 1 ? (uint)*f->ptr : uint2korr(f->ptr); len += f->length_bytes; @@ -2640,17 +2771,17 @@ void Rdb_key_def::make_unpack_unknown_varchar( make_unpack_unknown, unpack_unknown */ -int Rdb_key_def::unpack_unknown_varchar( - Rdb_field_packing *const fpi, Field *const field, uchar *dst, - Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const { +int Rdb_key_def::unpack_unknown_varchar(Rdb_field_packing *const fpi, + Field *const field, uchar *dst, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader) { const uchar *ptr; uchar *const d0 = dst; const auto f = static_cast<Field_varstring *>(field); dst += f->length_bytes; const uint len_bytes = f->length_bytes; // We don't use anything from the key, so skip over it. - if ((this->*fpi->m_skip_func)(fpi, field, reader)) { + if ((fpi->m_skip_func)(fpi, field, reader)) { return UNPACK_FAILURE; } @@ -2682,8 +2813,8 @@ static void rdb_write_unpack_simple(Rdb_bit_writer *const writer, static uint rdb_read_unpack_simple(Rdb_bit_reader *const reader, const Rdb_collation_codec *const codec, - const uchar *const src, - const size_t &src_len, uchar *const dst) { + const uchar *const src, const size_t src_len, + uchar *const dst) { for (uint i = 0; i < src_len; i++) { if (codec->m_dec_size[src[i]] > 0) { uint *ret; @@ -2710,7 +2841,7 @@ static uint rdb_read_unpack_simple(Rdb_bit_reader *const reader, void Rdb_key_def::make_unpack_simple_varchar( const Rdb_collation_codec *const codec, const Field *const field, - Rdb_pack_field_context *const pack_ctx) const { + Rdb_pack_field_context *const pack_ctx) { const auto f = static_cast<const Field_varstring *>(field); uchar *const src = f->ptr + f->length_bytes; const size_t src_len = @@ -2732,8 +2863,7 @@ void Rdb_key_def::make_unpack_simple_varchar( int Rdb_key_def::unpack_simple_varchar_space_pad( Rdb_field_packing *const fpi, Field *const field, uchar *dst, - Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const { + Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader) { const uchar *ptr; size_t len = 0; bool finished = false; @@ -2759,20 +2889,22 @@ int Rdb_key_def::unpack_simple_varchar_space_pad( if (extra_spaces <= 8) { space_padding_bytes = -(static_cast<int>(extra_spaces) - 8); extra_spaces = 0; - } else + } else { extra_spaces -= 8; + } space_padding_bytes *= fpi->space_xfrm_len; /* Decode the length-emitted encoding here */ while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) { const char last_byte = - ptr[fpi->m_segment_size - 1]; // number of padding bytes + ptr[fpi->m_segment_size - 1]; // number of padding bytes size_t used_bytes; if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) { // this is the last one - if (space_padding_bytes > (fpi->m_segment_size - 1)) - return UNPACK_FAILURE; // Cannot happen, corrupted data + if (space_padding_bytes > (fpi->m_segment_size - 1)) { + return UNPACK_FAILURE; // Cannot happen, corrupted data + } used_bytes = (fpi->m_segment_size - 1) - space_padding_bytes; finished = true; } else { @@ -2799,8 +2931,7 @@ int Rdb_key_def::unpack_simple_varchar_space_pad( if (finished) { if (extra_spaces) { - if (dst + extra_spaces > dst_end) - return UNPACK_FAILURE; + if (dst + extra_spaces > dst_end) return UNPACK_FAILURE; // pad_char has a 1-byte form in all charsets that // are handled by rdb_init_collation_mapping. memset(dst, field_var->charset()->pad_char, extra_spaces); @@ -2810,8 +2941,7 @@ int Rdb_key_def::unpack_simple_varchar_space_pad( } } - if (!finished) - return UNPACK_FAILURE; + if (!finished) return UNPACK_FAILURE; /* Save the length */ if (field_var->length_bytes == 1) { @@ -2834,9 +2964,9 @@ int Rdb_key_def::unpack_simple_varchar_space_pad( The VARCHAR variant is in make_unpack_simple_varchar */ -void Rdb_key_def::make_unpack_simple( - const Rdb_collation_codec *const codec, const Field *const field, - Rdb_pack_field_context *const pack_ctx) const { +void Rdb_key_def::make_unpack_simple(const Rdb_collation_codec *const codec, + const Field *const field, + Rdb_pack_field_context *const pack_ctx) { const uchar *const src = field->ptr; Rdb_bit_writer bit_writer(pack_ctx->writer); rdb_write_unpack_simple(&bit_writer, codec, src, field->pack_length()); @@ -2850,7 +2980,7 @@ int Rdb_key_def::unpack_simple(Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)), uchar *const dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const { + Rdb_string_reader *const unp_reader) { const uchar *ptr; const uint len = fpi->m_max_image_len; Rdb_bit_reader bit_reader(unp_reader); @@ -2869,7 +2999,7 @@ const int RDB_SPACE_XFRM_SIZE = 32; // A class holding information about how space character is represented in a // charset. class Rdb_charset_space_info { -public: + public: Rdb_charset_space_info(const Rdb_charset_space_info &) = delete; Rdb_charset_space_info &operator=(const Rdb_charset_space_info &) = delete; Rdb_charset_space_info() = default; @@ -2927,16 +3057,17 @@ static void rdb_get_mem_comparable_space(const CHARSET_INFO *const cs, const size_t space_mb_len = cs->cset->wc_mb( cs, (my_wc_t)cs->pad_char, space_mb, space_mb + sizeof(space_mb)); - uchar space[20]; // mem-comparable image of the space character + // mem-comparable image of the space character + std::array<uchar, 20> space; - const size_t space_len = cs->coll->strnxfrm(cs, space, sizeof(space), 1, - space_mb, space_mb_len, 0); + const size_t space_len = cs->coll->strnxfrm( + cs, space.data(), sizeof(space), 1, space_mb, space_mb_len, 0); Rdb_charset_space_info *const info = new Rdb_charset_space_info; info->space_xfrm_len = space_len; info->space_mb_len = space_mb_len; while (info->spaces_xfrm.size() < RDB_SPACE_XFRM_SIZE) { - info->spaces_xfrm.insert(info->spaces_xfrm.end(), space, - space + space_len); + info->spaces_xfrm.insert(info->spaces_xfrm.end(), space.data(), + space.data() + space_len); } rdb_mem_comparable_space[cs->number].reset(info); } @@ -2959,8 +3090,8 @@ bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs) { !(cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)); } -static const Rdb_collation_codec * -rdb_init_collation_mapping(const my_core::CHARSET_INFO *const cs) { +static const Rdb_collation_codec *rdb_init_collation_mapping( + const my_core::CHARSET_INFO *const cs) { DBUG_ASSERT(cs && cs->state & MY_CS_AVAILABLE); const Rdb_collation_codec *codec = rdb_collation_data[cs->number]; @@ -2996,11 +3127,10 @@ rdb_init_collation_mapping(const my_core::CHARSET_INFO *const cs) { } } - cur->m_make_unpack_info_func = { - &Rdb_key_def::make_unpack_simple_varchar, - &Rdb_key_def::make_unpack_simple}; - cur->m_unpack_func = {&Rdb_key_def::unpack_simple_varchar_space_pad, - &Rdb_key_def::unpack_simple}; + cur->m_make_unpack_info_func = {Rdb_key_def::make_unpack_simple_varchar, + Rdb_key_def::make_unpack_simple}; + cur->m_unpack_func = {Rdb_key_def::unpack_simple_varchar_space_pad, + Rdb_key_def::unpack_simple}; } else { // Out of luck for now. } @@ -3069,9 +3199,9 @@ static int get_segment_size_from_collation(const CHARSET_INFO *const cs) { */ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, - const Field *const field, const uint &keynr_arg, - const uint &key_part_arg, - const uint16 &key_length) { + const Field *const field, const uint keynr_arg, + const uint key_part_arg, + const uint16 key_length) { int res = false; enum_field_types type = field ? field->real_type() : MYSQL_TYPE_LONGLONG; @@ -3082,86 +3212,96 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, m_unpack_func = nullptr; m_make_unpack_info_func = nullptr; m_unpack_data_len = 0; - space_xfrm = nullptr; // safety - + space_xfrm = nullptr; // safety + // whether to use legacy format for varchar + m_use_legacy_varbinary_format = false; + // ha_rocksdb::index_flags() will pass key_descr == null to + // see whether field(column) can be read-only reads through return value, + // but the legacy vs. new varchar format doesn't affect return value. + // Just change m_use_legacy_varbinary_format to true if key_descr isn't given. + if (!key_descr || key_descr->use_legacy_varbinary_format()) { + m_use_legacy_varbinary_format = true; + } /* Calculate image length. By default, is is pack_length() */ m_max_image_len = field ? field->pack_length() : ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN; - m_skip_func = &Rdb_key_def::skip_max_length; - m_pack_func = &Rdb_key_def::pack_with_make_sort_key; + m_skip_func = Rdb_key_def::skip_max_length; + m_pack_func = Rdb_key_def::pack_with_make_sort_key; m_covered = false; switch (type) { - case MYSQL_TYPE_LONGLONG: - case MYSQL_TYPE_LONG: - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_SHORT: - case MYSQL_TYPE_TINY: - m_unpack_func = &Rdb_key_def::unpack_integer; - m_covered = true; - return true; - - case MYSQL_TYPE_DOUBLE: - m_unpack_func = &Rdb_key_def::unpack_double; - m_covered = true; - return true; + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_TINY: + m_unpack_func = Rdb_key_def::unpack_integer; + m_covered = true; + return true; - case MYSQL_TYPE_FLOAT: - m_unpack_func = &Rdb_key_def::unpack_float; - m_covered = true; - return true; + case MYSQL_TYPE_DOUBLE: + m_unpack_func = Rdb_key_def::unpack_double; + m_covered = true; + return true; - case MYSQL_TYPE_NEWDECIMAL: - /* - Decimal is packed with Field_new_decimal::make_sort_key, which just - does memcpy. - Unpacking decimal values was supported only after fix for issue#253, - because of that ha_rocksdb::get_storage_type() handles decimal values - in a special way. - */ - case MYSQL_TYPE_DATETIME2: - case MYSQL_TYPE_TIMESTAMP2: - /* These are packed with Field_temporal_with_date_and_timef::make_sort_key */ - case MYSQL_TYPE_TIME2: /* TIME is packed with Field_timef::make_sort_key */ - case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */ - /* Everything that comes here is packed with just a memcpy(). */ - m_unpack_func = &Rdb_key_def::unpack_binary_str; - m_covered = true; - return true; + case MYSQL_TYPE_FLOAT: + m_unpack_func = Rdb_key_def::unpack_float; + m_covered = true; + return true; - case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_NEWDECIMAL: /* - This is packed by Field_newdate::make_sort_key. It assumes the data is - 3 bytes, and packing is done by swapping the byte order (for both big- - and little-endian) + Decimal is packed with Field_new_decimal::make_sort_key, which just + does memcpy. + Unpacking decimal values was supported only after fix for issue#253, + because of that ha_rocksdb::get_storage_type() handles decimal values + in a special way. */ - m_unpack_func = &Rdb_key_def::unpack_newdate; - m_covered = true; - return true; - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_BLOB: { - if (key_descr) { - // The my_charset_bin collation is special in that it will consider - // shorter strings sorting as less than longer strings. - // - // See Field_blob::make_sort_key for details. - m_max_image_len = + case MYSQL_TYPE_DATETIME2: + case MYSQL_TYPE_TIMESTAMP2: + /* These are packed with Field_temporal_with_date_and_timef::make_sort_key + */ + case MYSQL_TYPE_TIME2: /* TIME is packed with Field_timef::make_sort_key */ + case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */ + /* Everything that comes here is packed with just a memcpy(). */ + m_unpack_func = Rdb_key_def::unpack_binary_str; + m_covered = true; + return true; + + case MYSQL_TYPE_NEWDATE: + /* + This is packed by Field_newdate::make_sort_key. It assumes the data is + 3 bytes, and packing is done by swapping the byte order (for both big- + and little-endian) + */ + m_unpack_func = Rdb_key_def::unpack_newdate; + m_covered = true; + return true; + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: { + if (key_descr) { + // The my_charset_bin collation is special in that it will consider + // shorter strings sorting as less than longer strings. + // + // See Field_blob::make_sort_key for details. + m_max_image_len = key_length + (field->charset()->number == COLLATION_BINARY - ? reinterpret_cast<const Field_blob *>(field) - ->pack_length_no_ptr() - : 0); - // Return false because indexes on text/blob will always require - // a prefix. With a prefix, the optimizer will not be able to do an - // index-only scan since there may be content occuring after the prefix - // length. - return false; + ? reinterpret_cast<const Field_blob *>(field) + ->pack_length_no_ptr() + : 0); + // Return false because indexes on text/blob will always require + // a prefix. With a prefix, the optimizer will not be able to do an + // index-only scan since there may be content occuring after the prefix + // length. + return false; + } + break; } - } - default: - break; + default: + break; } m_unpack_info_stores_value = false; @@ -3184,8 +3324,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, // The default for varchar is variable-length, without space-padding for // comparisons m_varchar_charset = cs; - m_skip_func = &Rdb_key_def::skip_variable_length; - m_pack_func = &Rdb_key_def::pack_with_varchar_encoding; + m_skip_func = Rdb_key_def::skip_variable_length; + m_pack_func = Rdb_key_def::pack_with_varchar_encoding; if (!key_descr || key_descr->use_legacy_varbinary_format()) { m_max_image_len = RDB_LEGACY_ENCODED_SIZE(m_max_image_len); } else { @@ -3210,8 +3350,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, // - For VARBINARY(N), values may have different lengths, so we're using // variable-length encoding. This is also the only charset where the // values are not space-padded for comparison. - m_unpack_func = is_varchar ? &Rdb_key_def::unpack_binary_or_utf8_varchar - : &Rdb_key_def::unpack_binary_str; + m_unpack_func = is_varchar ? Rdb_key_def::unpack_binary_or_utf8_varchar + : Rdb_key_def::unpack_binary_str; res = true; } else if (cs->number == COLLATION_LATIN1_BIN || cs->number == COLLATION_UTF8_BIN) { // For _bin collations, mem-comparable form of the string is the string @@ -3221,10 +3361,10 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, // VARCHARs - are compared as if they were space-padded - but are // not actually space-padded (reading the value back produces the // original value, without the padding) - m_unpack_func = &Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad; - m_skip_func = &Rdb_key_def::skip_variable_space_pad; - m_pack_func = &Rdb_key_def::pack_with_varchar_space_pad; - m_make_unpack_info_func = &Rdb_key_def::dummy_make_unpack_info; + m_unpack_func = Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad; + m_skip_func = Rdb_key_def::skip_variable_space_pad; + m_pack_func = Rdb_key_def::pack_with_varchar_space_pad; + m_make_unpack_info_func = Rdb_key_def::dummy_make_unpack_info; m_segment_size = get_segment_size_from_collation(cs); m_max_image_len = (max_image_len_before_chunks / (m_segment_size - 1) + 1) * @@ -3234,15 +3374,15 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, } else { // SQL layer pads CHAR(N) values to their maximum length. // We just store that and restore it back. - m_unpack_func = (cs->number == COLLATION_LATIN1_BIN) ? - &Rdb_key_def::unpack_binary_str - : &Rdb_key_def::unpack_utf8_str; + m_unpack_func = (cs->number == COLLATION_LATIN1_BIN) + ? Rdb_key_def::unpack_binary_str + : Rdb_key_def::unpack_utf8_str; } res = true; } else { // This is [VAR]CHAR(n) and the collation is not $(charset_name)_bin - res = true; // index-only scans are possible + res = true; // index-only scans are possible m_unpack_data_len = is_varchar ? 0 : field->field_length; const uint idx = is_varchar ? 0 : 1; const Rdb_collation_codec *codec = nullptr; @@ -3258,8 +3398,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, // Currently we handle these collations as NO_PAD, even if they have // PAD_SPACE attribute. if (cs->levels_for_order == 1) { - m_pack_func = &Rdb_key_def::pack_with_varchar_space_pad; - m_skip_func = &Rdb_key_def::skip_variable_space_pad; + m_pack_func = Rdb_key_def::pack_with_varchar_space_pad; + m_skip_func = Rdb_key_def::skip_variable_space_pad; m_segment_size = get_segment_size_from_collation(cs); m_max_image_len = (max_image_len_before_chunks / (m_segment_size - 1) + 1) * @@ -3268,14 +3408,16 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, &space_mb_len); } else { // NO_LINT_DEBUG - sql_print_warning("RocksDB: you're trying to create an index " - "with a multi-level collation %s", - cs->name); + sql_print_warning( + "RocksDB: you're trying to create an index " + "with a multi-level collation %s", + cs->name); // NO_LINT_DEBUG - sql_print_warning("MyRocks will handle this collation internally " - " as if it had a NO_PAD attribute."); - m_pack_func = &Rdb_key_def::pack_with_varchar_encoding; - m_skip_func = &Rdb_key_def::skip_variable_length; + sql_print_warning( + "MyRocks will handle this collation internally " + " as if it had a NO_PAD attribute."); + m_pack_func = Rdb_key_def::pack_with_varchar_encoding; + m_skip_func = Rdb_key_def::skip_variable_length; } } @@ -3291,18 +3433,18 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, // form. Our way of restoring the original value is to keep a copy of // the original value in unpack_info. m_unpack_info_stores_value = true; - m_make_unpack_info_func = - is_varchar ? &Rdb_key_def::make_unpack_unknown_varchar - : &Rdb_key_def::make_unpack_unknown; - m_unpack_func = is_varchar ? &Rdb_key_def::unpack_unknown_varchar - : &Rdb_key_def::unpack_unknown; + m_make_unpack_info_func = is_varchar + ? Rdb_key_def::make_unpack_unknown_varchar + : Rdb_key_def::make_unpack_unknown; + m_unpack_func = is_varchar ? Rdb_key_def::unpack_unknown_varchar + : Rdb_key_def::unpack_unknown; } else { // Same as above: we don't know how to restore the value from its // mem-comparable form. // Here, we just indicate to the SQL layer we can't do it. DBUG_ASSERT(m_unpack_func == nullptr); m_unpack_info_stores_value = false; - res = false; // Indicate that index-only reads are not possible + res = false; // Indicate that index-only reads are not possible } } @@ -3344,7 +3486,7 @@ Field *Rdb_field_packing::get_field_in_table(const TABLE *const tbl) const { } void Rdb_field_packing::fill_hidden_pk_val(uchar **dst, - const longlong &hidden_pk_id) const { + const longlong hidden_pk_id) const { DBUG_ASSERT(m_max_image_len == 8); String to; @@ -3387,8 +3529,8 @@ Rdb_tbl_def::~Rdb_tbl_def() { */ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict, - rocksdb::WriteBatch *const batch, uchar *const key, - const size_t &keylen) { + rocksdb::WriteBatch *const batch, + const rocksdb::Slice &key) { StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes; indexes.alloc(Rdb_key_def::VERSION_SIZE + m_key_count * Rdb_key_def::PACKED_SIZE * 2); @@ -3429,10 +3571,12 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict, } rdb_netstr_append_uint32(&indexes, cf_id); - rdb_netstr_append_uint32(&indexes, kd.m_index_number); + + uint32 index_number = kd.get_index_number(); + rdb_netstr_append_uint32(&indexes, index_number); struct Rdb_index_info index_info; - index_info.m_gl_index_id = {cf_id, kd.m_index_number}; + index_info.m_gl_index_id = {cf_id, index_number}; index_info.m_index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST; index_info.m_index_type = kd.m_index_type; index_info.m_kv_version = kd.m_kv_format_version; @@ -3442,10 +3586,9 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict, dict->add_or_update_index_cf_mapping(batch, &index_info); } - const rocksdb::Slice skey((char *)key, keylen); const rocksdb::Slice svalue(indexes.c_ptr(), indexes.length()); - dict->put_key(batch, skey, svalue); + dict->put_key(batch, key, svalue); return false; } @@ -3461,7 +3604,6 @@ bool Rdb_key_def::has_index_flag(uint32 index_flags, enum INDEX_FLAG flag) { uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags, enum INDEX_FLAG flag, uint *const length) { - DBUG_ASSERT_IMP(flag != MAX_FLAG, Rdb_key_def::has_index_flag(index_flags, flag)); @@ -3496,7 +3638,9 @@ void Rdb_key_def::write_index_flag_field(Rdb_string_writer *const buf, void Rdb_tbl_def::check_if_is_mysql_system_table() { static const char *const system_dbs[] = { - "mysql", "performance_schema", "information_schema", + "mysql", + "performance_schema", + "information_schema", }; m_is_mysql_system_table = false; @@ -3508,6 +3652,15 @@ void Rdb_tbl_def::check_if_is_mysql_system_table() { } } +void Rdb_tbl_def::check_and_set_read_free_rpl_table() { + m_is_read_free_rpl_table = +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + rdb_read_free_regex_handler.matches(base_tablename()); +#else + false; +#endif +} + void Rdb_tbl_def::set_name(const std::string &name) { int err MY_ATTRIBUTE((__unused__)); @@ -3533,30 +3686,6 @@ GL_INDEX_ID Rdb_tbl_def::get_autoincr_gl_index_id() { return GL_INDEX_ID(); } -/* - Static function of type my_hash_get_key that gets invoked by - the m_ddl_hash object of type my_core::HASH. - It manufactures a key (db+table name in our case) from a record - (Rdb_tbl_def in our case). -*/ -const uchar * -Rdb_ddl_manager::get_hash_key(Rdb_tbl_def *const rec, size_t *const length, - my_bool not_used MY_ATTRIBUTE((__unused__))) { - const std::string &dbname_tablename = rec->full_tablename(); - *length = dbname_tablename.size(); - return reinterpret_cast<const uchar *>(dbname_tablename.c_str()); -} - -/* - Static function of type void (*my_hash_free_element_func_t)(void*) that gets - invoked by the m_ddl_hash object of type my_core::HASH. - It deletes a record (Rdb_tbl_def in our case). -*/ -void Rdb_ddl_manager::free_hash_elem(void *const data) { - Rdb_tbl_def *elem = reinterpret_cast<Rdb_tbl_def *>(data); - delete elem; -} - void Rdb_ddl_manager::erase_index_num(const GL_INDEX_ID &gl_index_id) { m_index_num_to_keydef.erase(gl_index_id); } @@ -3579,7 +3708,7 @@ void Rdb_ddl_manager::remove_uncommitted_keydefs( mysql_rwlock_unlock(&m_rwlock); } -namespace // anonymous namespace = not visible outside this source file +namespace // anonymous namespace = not visible outside this source file { struct Rdb_validate_tbls : public Rdb_tables_scanner { using tbl_info_t = std::pair<std::string, bool>; @@ -3597,7 +3726,7 @@ struct Rdb_validate_tbls : public Rdb_tables_scanner { bool check_frm_file(const std::string &fullpath, const std::string &dbname, const std::string &tablename, bool *has_errors); }; -} // anonymous namespace +} // anonymous namespace /* Get a list of tables that we expect to have .frm files for. This will use the @@ -3641,6 +3770,7 @@ bool Rdb_validate_tbls::check_frm_file(const std::string &fullpath, bool is_sequence; enum Table_type type = dd_frm_type(nullptr, fullfilename.c_ptr(), &eng_type_str, &is_sequence); if (type == TABLE_TYPE_UNKNOWN) { + // NO_LINT_DEBUG sql_print_warning("RocksDB: Failed to open/read .from file: %s", fullfilename.ptr()); return false; @@ -3655,10 +3785,12 @@ bool Rdb_validate_tbls::check_frm_file(const std::string &fullpath, */ tbl_info_t element(tablename, false); if (m_list.count(dbname) == 0 || m_list[dbname].erase(element) == 0) { - sql_print_warning("RocksDB: Schema mismatch - " - "A .frm file exists for table %s.%s, " - "but that table is not registered in RocksDB", - dbname.c_str(), tablename.c_str()); + // NO_LINT_DEBUG + sql_print_warning( + "RocksDB: Schema mismatch - " + "A .frm file exists for table %s.%s, " + "but that table is not registered in RocksDB", + dbname.c_str(), tablename.c_str()); *has_errors = true; } } else if (!strncmp(eng_type_str.str, "partition", eng_type_str.length)) { @@ -3686,6 +3818,7 @@ bool Rdb_validate_tbls::scan_for_frms(const std::string &datadir, /* Access the directory */ if (dir_info == nullptr) { + // NO_LINT_DEBUG sql_print_warning("RocksDB: Could not open database directory: %s", fullpath.c_str()); return false; @@ -3732,6 +3865,7 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir, dir_info = my_dir(datadir.c_str(), MYF(MY_DONT_SORT | MY_WANT_STAT)); if (dir_info == nullptr) { + // NO_LINT_DEBUG sql_print_warning("RocksDB: could not open datadir: %s", datadir.c_str()); return false; } @@ -3739,12 +3873,10 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir, file_info = dir_info->dir_entry; for (uint ii = 0; ii < dir_info->number_of_files; ii++, file_info++) { /* Ignore files/dirs starting with '.' */ - if (file_info->name[0] == '.') - continue; + if (file_info->name[0] == '.') continue; /* Ignore all non-directory files */ - if (!MY_S_ISDIR(file_info->mystat->st_mode)) - continue; + if (!MY_S_ISDIR(file_info->mystat->st_mode)) continue; /* Scan all the .frm files in the directory */ if (!scan_for_frms(datadir, file_info->name, has_errors)) { @@ -3777,8 +3909,9 @@ bool Rdb_ddl_manager::validate_auto_incr() { GL_INDEX_ID gl_index_id; if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE && - memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) + memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) { break; + } if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3) { return false; @@ -3795,10 +3928,11 @@ bool Rdb_ddl_manager::validate_auto_incr() { rdb_netbuf_read_gl_index(&ptr, &gl_index_id); if (!m_dict->get_index_info(gl_index_id, nullptr)) { // NO_LINT_DEBUG - sql_print_warning("RocksDB: AUTOINC mismatch - " - "Index number (%u, %u) found in AUTOINC " - "but does not exist as a DDL entry", - gl_index_id.cf_id, gl_index_id.index_id); + sql_print_warning( + "RocksDB: AUTOINC mismatch - " + "Index number (%u, %u) found in AUTOINC " + "but does not exist as a DDL entry", + gl_index_id.cf_id, gl_index_id.index_id); return false; } @@ -3806,10 +3940,11 @@ bool Rdb_ddl_manager::validate_auto_incr() { const int version = rdb_netbuf_read_uint16(&ptr); if (version > Rdb_key_def::AUTO_INCREMENT_VERSION) { // NO_LINT_DEBUG - sql_print_warning("RocksDB: AUTOINC mismatch - " - "Index number (%u, %u) found in AUTOINC " - "is on unsupported version %d", - gl_index_id.cf_id, gl_index_id.index_id, version); + sql_print_warning( + "RocksDB: AUTOINC mismatch - " + "Index number (%u, %u) found in AUTOINC " + "is on unsupported version %d", + gl_index_id.cf_id, gl_index_id.index_id, version); return false; } } @@ -3846,10 +3981,12 @@ bool Rdb_ddl_manager::validate_schemas(void) { */ for (const auto &db : table_list.m_list) { for (const auto &table : db.second) { - sql_print_warning("RocksDB: Schema mismatch - " - "Table %s.%s is registered in RocksDB " - "but does not have a .frm file", - db.first.c_str(), table.first.c_str()); + // NO_LINT_DEBUG + sql_print_warning( + "RocksDB: Schema mismatch - " + "Table %s.%s is registered in RocksDB " + "but does not have a .frm file", + db.first.c_str(), table.first.c_str()); has_errors = true; } } @@ -3859,14 +3996,9 @@ bool Rdb_ddl_manager::validate_schemas(void) { bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, Rdb_cf_manager *const cf_manager, - const uint32_t &validate_tables) { - const ulong TABLE_HASH_SIZE = 32; + const uint32_t validate_tables) { m_dict = dict_arg; mysql_rwlock_init(0, &m_rwlock); - (void)my_hash_init(&m_ddl_hash, - /*system_charset_info*/ &my_charset_bin, TABLE_HASH_SIZE, - 0, 0, (my_hash_get_key)Rdb_ddl_manager::get_hash_key, - Rdb_ddl_manager::free_hash_elem, 0); /* Read the data dictionary and populate the hash */ uchar ddl_entry[Rdb_key_def::INDEX_NUMBER_SIZE]; @@ -3888,10 +4020,12 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, const rocksdb::Slice val = it->value(); if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE && - memcmp(key.data(), ddl_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) + memcmp(key.data(), ddl_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) { break; + } if (key.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Table_store: key has length %d (corruption?)", (int)key.size()); return true; @@ -3903,6 +4037,7 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, // Now, read the DDLs. const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE; if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Table_store: invalid keylist for table %s", tdef->full_tablename().c_str()); return true; @@ -3913,9 +4048,11 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, ptr = reinterpret_cast<const uchar *>(val.data()); const int version = rdb_netbuf_read_uint16(&ptr); if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) { - sql_print_error("RocksDB: DDL ENTRY Version was not expected." - "Expected: %d, Actual: %d", - Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: DDL ENTRY Version was not expected." + "Expected: %d, Actual: %d", + Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version); return true; } ptr_end = ptr + real_val_size; @@ -3925,32 +4062,40 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, uint flags = 0; struct Rdb_index_info index_info; if (!m_dict->get_index_info(gl_index_id, &index_info)) { - sql_print_error("RocksDB: Could not get index information " - "for Index Number (%u,%u), table %s", - gl_index_id.cf_id, gl_index_id.index_id, - tdef->full_tablename().c_str()); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Could not get index information " + "for Index Number (%u,%u), table %s", + gl_index_id.cf_id, gl_index_id.index_id, + tdef->full_tablename().c_str()); return true; } if (max_index_id_in_dict < gl_index_id.index_id) { - sql_print_error("RocksDB: Found max index id %u from data dictionary " - "but also found larger index id %u from dictionary. " - "This should never happen and possibly a bug.", - max_index_id_in_dict, gl_index_id.index_id); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Found max index id %u from data dictionary " + "but also found larger index id %u from dictionary. " + "This should never happen and possibly a bug.", + max_index_id_in_dict, gl_index_id.index_id); return true; } if (!m_dict->get_cf_flags(gl_index_id.cf_id, &flags)) { - sql_print_error("RocksDB: Could not get Column Family Flags " - "for CF Number %d, table %s", - gl_index_id.cf_id, tdef->full_tablename().c_str()); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Could not get Column Family Flags " + "for CF Number %d, table %s", + gl_index_id.cf_id, tdef->full_tablename().c_str()); return true; } if ((flags & Rdb_key_def::AUTO_CF_FLAG) != 0) { // The per-index cf option is deprecated. Make sure we don't have the // flag set in any existing database. NO_LINT_DEBUG - sql_print_error("RocksDB: The defunct AUTO_CF_FLAG is enabled for CF " - "number %d, table %s", - gl_index_id.cf_id, tdef->full_tablename().c_str()); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: The defunct AUTO_CF_FLAG is enabled for CF " + "number %d, table %s", + gl_index_id.cf_id, tdef->full_tablename().c_str()); } rocksdb::ColumnFamilyHandle *const cfh = @@ -3988,11 +4133,13 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, if (validate_tables > 0) { std::string msg; if (!validate_schemas()) { - msg = "RocksDB: Problems validating data dictionary " - "against .frm files, exiting"; + msg = + "RocksDB: Problems validating data dictionary " + "against .frm files, exiting"; } else if (!validate_auto_incr()) { - msg = "RocksDB: Problems validating auto increment values in " - "data dictionary, exiting"; + msg = + "RocksDB: Problems validating auto increment values in " + "data dictionary, exiting"; } if (validate_tables == 1 && !msg.empty()) { // NO_LINT_DEBUG @@ -4014,20 +4161,23 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, return true; } delete it; + // NO_LINT_DEBUG sql_print_information("RocksDB: Table_store: loaded DDL data for %d tables", i); return false; } Rdb_tbl_def *Rdb_ddl_manager::find(const std::string &table_name, - const bool &lock) { + const bool lock) { if (lock) { mysql_rwlock_rdlock(&m_rwlock); } - Rdb_tbl_def *const rec = reinterpret_cast<Rdb_tbl_def *>(my_hash_search( - &m_ddl_hash, reinterpret_cast<const uchar *>(table_name.c_str()), - table_name.size())); + Rdb_tbl_def *rec = nullptr; + const auto it = m_ddl_map.find(table_name); + if (it != m_ddl_map.end()) { + rec = it->second; + } if (lock) { mysql_rwlock_unlock(&m_rwlock); @@ -4040,8 +4190,8 @@ Rdb_tbl_def *Rdb_ddl_manager::find(const std::string &table_name, // lock on m_rwlock to make sure the Rdb_key_def is not discarded while we // are finding it. Copying it into 'ret' increments the count making sure // that the object will not be discarded until we are finished with it. -std::shared_ptr<const Rdb_key_def> -Rdb_ddl_manager::safe_find(GL_INDEX_ID gl_index_id) { +std::shared_ptr<const Rdb_key_def> Rdb_ddl_manager::safe_find( + GL_INDEX_ID gl_index_id) { std::shared_ptr<const Rdb_key_def> ret(nullptr); mysql_rwlock_rdlock(&m_rwlock); @@ -4071,8 +4221,8 @@ Rdb_ddl_manager::safe_find(GL_INDEX_ID gl_index_id) { } // this method assumes at least read-only lock on m_rwlock -const std::shared_ptr<Rdb_key_def> & -Rdb_ddl_manager::find(GL_INDEX_ID gl_index_id) { +const std::shared_ptr<Rdb_key_def> &Rdb_ddl_manager::find( + GL_INDEX_ID gl_index_id) { auto it = m_index_num_to_keydef.find(gl_index_id); if (it != m_index_num_to_keydef.end()) { auto table_def = find(it->second.first, false); @@ -4095,8 +4245,8 @@ Rdb_ddl_manager::find(GL_INDEX_ID gl_index_id) { // this method returns the name of the table based on an index id. It acquires // a read lock on m_rwlock. -const std::string -Rdb_ddl_manager::safe_get_table_name(const GL_INDEX_ID &gl_index_id) { +const std::string Rdb_ddl_manager::safe_get_table_name( + const GL_INDEX_ID &gl_index_id) { std::string ret; mysql_rwlock_rdlock(&m_rwlock); auto it = m_index_num_to_keydef.find(gl_index_id); @@ -4145,7 +4295,7 @@ void Rdb_ddl_manager::adjust_stats( } } -void Rdb_ddl_manager::persist_stats(const bool &sync) { +void Rdb_ddl_manager::persist_stats(const bool sync) { mysql_rwlock_wrlock(&m_rwlock); const auto local_stats2store = std::move(m_stats2store); m_stats2store.clear(); @@ -4170,18 +4320,15 @@ void Rdb_ddl_manager::persist_stats(const bool &sync) { int Rdb_ddl_manager::put_and_write(Rdb_tbl_def *const tbl, rocksdb::WriteBatch *const batch) { - uchar buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE]; - uint pos = 0; + Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> buf_writer; - rdb_netbuf_store_index(buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); - pos += Rdb_key_def::INDEX_NUMBER_SIZE; + buf_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); const std::string &dbname_tablename = tbl->full_tablename(); - memcpy(buf + pos, dbname_tablename.c_str(), dbname_tablename.size()); - pos += dbname_tablename.size(); + buf_writer.write(dbname_tablename.c_str(), dbname_tablename.size()); int res; - if ((res = tbl->put_dict(m_dict, batch, buf, pos))) { + if ((res = tbl->put_dict(m_dict, batch, buf_writer.to_slice()))) { return res; } if ((res = put(tbl))) { @@ -4192,62 +4339,58 @@ int Rdb_ddl_manager::put_and_write(Rdb_tbl_def *const tbl, /* Return 0 - ok, other value - error */ /* TODO: - This function modifies m_ddl_hash and m_index_num_to_keydef. + This function modifies m_ddl_map and m_index_num_to_keydef. However, these changes need to be reversed if dict_manager.commit fails See the discussion here: https://reviews.facebook.net/D35925#inline-259167 Tracked by https://github.com/facebook/mysql-5.6/issues/33 */ -int Rdb_ddl_manager::put(Rdb_tbl_def *const tbl, const bool &lock) { +int Rdb_ddl_manager::put(Rdb_tbl_def *const tbl, const bool lock) { Rdb_tbl_def *rec; - my_bool result; const std::string &dbname_tablename = tbl->full_tablename(); - if (lock) - mysql_rwlock_wrlock(&m_rwlock); + if (lock) mysql_rwlock_wrlock(&m_rwlock); // We have to do this find because 'tbl' is not yet in the list. We need // to find the one we are replacing ('rec') rec = find(dbname_tablename, false); if (rec) { - // this will free the old record. - my_hash_delete(&m_ddl_hash, reinterpret_cast<uchar *>(rec)); + // Free the old record. + delete rec; + m_ddl_map.erase(dbname_tablename); } - result = my_hash_insert(&m_ddl_hash, reinterpret_cast<uchar *>(tbl)); + m_ddl_map.emplace(dbname_tablename, tbl); for (uint keyno = 0; keyno < tbl->m_key_count; keyno++) { m_index_num_to_keydef[tbl->m_key_descr_arr[keyno]->get_gl_index_id()] = std::make_pair(dbname_tablename, keyno); } + tbl->check_and_set_read_free_rpl_table(); - if (lock) - mysql_rwlock_unlock(&m_rwlock); - return result; + if (lock) mysql_rwlock_unlock(&m_rwlock); + return 0; } void Rdb_ddl_manager::remove(Rdb_tbl_def *const tbl, rocksdb::WriteBatch *const batch, - const bool &lock) { - if (lock) - mysql_rwlock_wrlock(&m_rwlock); - - uchar buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE]; - uint pos = 0; - - rdb_netbuf_store_index(buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); - pos += Rdb_key_def::INDEX_NUMBER_SIZE; + const bool lock) { + if (lock) mysql_rwlock_wrlock(&m_rwlock); + Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> key_writer; + key_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); const std::string &dbname_tablename = tbl->full_tablename(); - memcpy(buf + pos, dbname_tablename.c_str(), dbname_tablename.size()); - pos += dbname_tablename.size(); + key_writer.write(dbname_tablename.c_str(), dbname_tablename.size()); - const rocksdb::Slice tkey((char *)buf, pos); - m_dict->delete_key(batch, tkey); + m_dict->delete_key(batch, key_writer.to_slice()); - /* The following will also delete the object: */ - my_hash_delete(&m_ddl_hash, reinterpret_cast<uchar *>(tbl)); + const auto it = m_ddl_map.find(dbname_tablename); + if (it != m_ddl_map.end()) { + // Free Rdb_tbl_def + delete it->second; - if (lock) - mysql_rwlock_unlock(&m_rwlock); + m_ddl_map.erase(it); + } + + if (lock) mysql_rwlock_unlock(&m_rwlock); } bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to, @@ -4255,8 +4398,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to, Rdb_tbl_def *rec; Rdb_tbl_def *new_rec; bool res = true; - uchar new_buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE]; - uint new_pos = 0; + Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> new_buf_writer; mysql_rwlock_wrlock(&m_rwlock); if (!(rec = find(from, false))) { @@ -4278,18 +4420,16 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to, rec->m_key_descr_arr = nullptr; // Create a new key - rdb_netbuf_store_index(new_buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); - new_pos += Rdb_key_def::INDEX_NUMBER_SIZE; + new_buf_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); const std::string &dbname_tablename = new_rec->full_tablename(); - memcpy(new_buf + new_pos, dbname_tablename.c_str(), dbname_tablename.size()); - new_pos += dbname_tablename.size(); + new_buf_writer.write(dbname_tablename.c_str(), dbname_tablename.size()); // Create a key to add - if (!new_rec->put_dict(m_dict, batch, new_buf, new_pos)) { + if (!new_rec->put_dict(m_dict, batch, new_buf_writer.to_slice())) { remove(rec, batch, false); put(new_rec, false); - res = false; // ok + res = false; // ok } mysql_rwlock_unlock(&m_rwlock); @@ -4297,13 +4437,17 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to, } void Rdb_ddl_manager::cleanup() { - my_hash_free(&m_ddl_hash); + for (const auto &kv : m_ddl_map) { + delete kv.second; + } + m_ddl_map.clear(); + mysql_rwlock_destroy(&m_rwlock); m_sequence.cleanup(); } int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner *const tables_scanner) { - int i, ret; + int ret; Rdb_tbl_def *rec; DBUG_ASSERT(tables_scanner != nullptr); @@ -4311,14 +4455,11 @@ int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner *const tables_scanner) { mysql_rwlock_rdlock(&m_rwlock); ret = 0; - i = 0; - while (( - rec = reinterpret_cast<Rdb_tbl_def *>(my_hash_element(&m_ddl_hash, i)))) { + for (const auto &kv : m_ddl_map) { + rec = kv.second; ret = tables_scanner->add_table(rec); - if (ret) - break; - i++; + if (ret) break; } mysql_rwlock_unlock(&m_rwlock); @@ -4333,9 +4474,9 @@ bool Rdb_binlog_manager::init(Rdb_dict_manager *const dict_arg) { DBUG_ASSERT(dict_arg != nullptr); m_dict = dict_arg; - rdb_netbuf_store_index(m_key_buf, Rdb_key_def::BINLOG_INFO_INDEX_NUMBER); - m_key_slice = rocksdb::Slice(reinterpret_cast<char *>(m_key_buf), - Rdb_key_def::INDEX_NUMBER_SIZE); + m_key_writer.reset(); + m_key_writer.write_index(Rdb_key_def::BINLOG_INFO_INDEX_NUMBER); + m_key_slice = m_key_writer.to_slice(); return false; } @@ -4357,10 +4498,36 @@ void Rdb_binlog_manager::update(const char *const binlog_name, if (binlog_name && binlog_pos) { // max binlog length (512) + binlog pos (4) + binlog gtid (57) < 1024 const size_t RDB_MAX_BINLOG_INFO_LEN = 1024; - uchar value_buf[RDB_MAX_BINLOG_INFO_LEN]; - m_dict->put_key( - batch, m_key_slice, - pack_value(value_buf, binlog_name, binlog_pos, NULL)); + Rdb_buf_writer<RDB_MAX_BINLOG_INFO_LEN> value_writer; + + // store version + value_writer.write_uint16(Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION); + + // store binlog file name length + DBUG_ASSERT(strlen(binlog_name) <= FN_REFLEN); + const uint16_t binlog_name_len = strlen(binlog_name); + value_writer.write_uint16(binlog_name_len); + + // store binlog file name + value_writer.write(binlog_name, binlog_name_len); + + // store binlog pos + value_writer.write_uint32(binlog_pos); + +#ifdef MARIADB_MERGE_2019 + // store binlog gtid length. + // If gtid was not set, store 0 instead + const uint16_t binlog_max_gtid_len = + binlog_max_gtid ? strlen(binlog_max_gtid) : 0; + value_writer.write_uint16(binlog_max_gtid_len); + + if (binlog_max_gtid_len > 0) { + // store binlog gtid + value_writer.write(binlog_max_gtid, binlog_max_gtid_len); + } +#endif + + m_dict->put_key(batch, m_key_slice, value_writer.to_slice()); } } @@ -4382,67 +4549,15 @@ bool Rdb_binlog_manager::read(char *const binlog_name, rocksdb::Status status = m_dict->get_value(m_key_slice, &value); if (status.ok()) { if (!unpack_value((const uchar *)value.c_str(), value.size(), binlog_name, binlog_pos, - binlog_gtid)) + binlog_gtid)) { ret = true; + } } } return ret; } /** - Pack binlog_name, binlog_pos, binlog_gtid into preallocated - buffer, then converting and returning a RocksDB Slice - @param buf Preallocated buffer to set binlog info. - @param binlog_name Binlog name - @param binlog_pos Binlog pos - @return rocksdb::Slice converted from buf and its length -*/ -rocksdb::Slice -Rdb_binlog_manager::pack_value(uchar *const buf, const char *const binlog_name, - const my_off_t &binlog_pos, - const char *const binlog_gtid) const { - uint pack_len = 0; - - // store version - rdb_netbuf_store_uint16(buf, Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION); - pack_len += Rdb_key_def::VERSION_SIZE; - - // store binlog file name length - DBUG_ASSERT(strlen(binlog_name) <= FN_REFLEN); - const uint16_t binlog_name_len = (uint16_t)strlen(binlog_name); - rdb_netbuf_store_uint16(buf + pack_len, binlog_name_len); - pack_len += sizeof(uint16); - - // store binlog file name - memcpy(buf + pack_len, binlog_name, binlog_name_len); - pack_len += binlog_name_len; - - // store binlog pos - rdb_netbuf_store_uint32(buf + pack_len, binlog_pos); - pack_len += sizeof(uint32); - - // store binlog gtid length. - // If gtid was not set, store 0 instead -#ifdef MARIAROCKS_NOT_YET - const uint16_t binlog_gtid_len = binlog_gtid ? (uint16_t)strlen(binlog_gtid) : 0; - rdb_netbuf_store_uint16(buf + pack_len, binlog_gtid_len); -#endif - pack_len += sizeof(uint16); - // MariaDB: - rdb_netbuf_store_uint16(buf + pack_len, 0); - -#ifdef MARIAROCKS_NOT_YET - if (binlog_gtid_len > 0) { - // store binlog gtid - memcpy(buf + pack_len, binlog_gtid, binlog_gtid_len); - pack_len += binlog_gtid_len; - } -#endif - - return rocksdb::Slice((char *)buf, pack_len); -} - -/** Unpack value then split into binlog_name, binlog_pos (and binlog_gtid) @param[IN] value Binlog state info fetched from RocksDB @param[OUT] binlog_name Binlog name @@ -4466,8 +4581,7 @@ bool Rdb_binlog_manager::unpack_value(const uchar *const value, const uint16_t version = rdb_netbuf_to_uint16(value); pack_len += Rdb_key_def::VERSION_SIZE; - if (version != Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION) - return true; + if (version != Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION) return true; if ((value_size -= sizeof(uint16)) < 0) return true; @@ -4525,7 +4639,7 @@ bool Rdb_binlog_manager::unpack_value(const uchar *const value, @param[IN] write_batch Handle to storage engine writer. */ void Rdb_binlog_manager::update_slave_gtid_info( - const uint &id, const char *const db, const char *const gtid, + const uint id, const char *const db, const char *const gtid, rocksdb::WriteBatchBase *const write_batch) { if (id && db && gtid) { // Make sure that if the slave_gtid_info table exists we have a @@ -4545,41 +4659,30 @@ void Rdb_binlog_manager::update_slave_gtid_info( String value; // Build key - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE + 4] = {0}; - uchar *buf = key_buf; - rdb_netbuf_store_index(buf, kd->get_index_number()); - buf += Rdb_key_def::INDEX_NUMBER_SIZE; - rdb_netbuf_store_uint32(buf, id); - buf += 4; - const rocksdb::Slice key_slice = - rocksdb::Slice((const char *)key_buf, buf - key_buf); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE + 4> key_writer; + key_writer.write_index(kd->get_index_number()); + key_writer.write_uint32(id); // Build value - uchar value_buf[128] = {0}; + Rdb_buf_writer<128> value_writer; DBUG_ASSERT(gtid); const uint db_len = strlen(db); const uint gtid_len = strlen(gtid); - buf = value_buf; // 1 byte used for flags. Empty here. - buf++; + value_writer.write_byte(0); // Write column 1. DBUG_ASSERT(strlen(db) <= 64); - rdb_netbuf_store_byte(buf, db_len); - buf++; - memcpy(buf, db, db_len); - buf += db_len; + value_writer.write_byte(db_len); + value_writer.write(db, db_len); // Write column 2. DBUG_ASSERT(gtid_len <= 56); - rdb_netbuf_store_byte(buf, gtid_len); - buf++; - memcpy(buf, gtid, gtid_len); - buf += gtid_len; - const rocksdb::Slice value_slice = - rocksdb::Slice((const char *)value_buf, buf - value_buf); + value_writer.write_byte(gtid_len); + value_writer.write(gtid, gtid_len); - write_batch->Put(kd->get_cf(), key_slice, value_slice); + write_batch->Put(kd->get_cf(), key_writer.to_slice(), + value_writer.to_slice()); } } @@ -4651,16 +4754,15 @@ rocksdb::Iterator *Rdb_dict_manager::new_iterator() const { } int Rdb_dict_manager::commit(rocksdb::WriteBatch *const batch, - const bool &sync) const { - if (!batch) - return HA_ERR_ROCKSDB_COMMIT_FAILED; + const bool sync) const { + if (!batch) return HA_ERR_ROCKSDB_COMMIT_FAILED; int res = HA_EXIT_SUCCESS; rocksdb::WriteOptions options; options.sync = sync; rocksdb::TransactionDBWriteOptimizations optimize; optimize.skip_concurrency_control = true; rocksdb::Status s = m_db->Write(options, optimize, batch); - res = !s.ok(); // we return true when something failed + res = !s.ok(); // we return true when something failed if (res) { rdb_handle_io_error(s, RDB_IO_ERROR_DICT_COMMIT); } @@ -4681,54 +4783,44 @@ void Rdb_dict_manager::dump_index_id(uchar *const netbuf, void Rdb_dict_manager::delete_with_prefix( rocksdb::WriteBatch *const batch, Rdb_key_def::DATA_DICT_TYPE dict_type, const GL_INDEX_ID &gl_index_id) const { - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - dump_index_id(key_buf, dict_type, gl_index_id); - rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + dump_index_id(&key_writer, dict_type, gl_index_id); - delete_key(batch, key); + delete_key(batch, key_writer.to_slice()); } void Rdb_dict_manager::add_or_update_index_cf_mapping( rocksdb::WriteBatch *batch, struct Rdb_index_info *const index_info) const { - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - uchar value_buf[256] = {0}; - dump_index_id(key_buf, Rdb_key_def::INDEX_INFO, index_info->m_gl_index_id); - const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); - - uchar *ptr = value_buf; - rdb_netbuf_store_uint16(ptr, Rdb_key_def::INDEX_INFO_VERSION_LATEST); - ptr += RDB_SIZEOF_INDEX_INFO_VERSION; - rdb_netbuf_store_byte(ptr, index_info->m_index_type); - ptr += RDB_SIZEOF_INDEX_TYPE; - rdb_netbuf_store_uint16(ptr, index_info->m_kv_version); - ptr += RDB_SIZEOF_KV_VERSION; - rdb_netbuf_store_uint32(ptr, index_info->m_index_flags); - ptr += RDB_SIZEOF_INDEX_FLAGS; - rdb_netbuf_store_uint64(ptr, index_info->m_ttl_duration); - ptr += ROCKSDB_SIZEOF_TTL_RECORD; - - const rocksdb::Slice value = - rocksdb::Slice((char *)value_buf, ptr - value_buf); - batch->Put(m_system_cfh, key, value); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + dump_index_id(&key_writer, Rdb_key_def::INDEX_INFO, + index_info->m_gl_index_id); + + Rdb_buf_writer<256> value_writer; + + value_writer.write_uint16(Rdb_key_def::INDEX_INFO_VERSION_LATEST); + value_writer.write_byte(index_info->m_index_type); + value_writer.write_uint16(index_info->m_kv_version); + value_writer.write_uint32(index_info->m_index_flags); + value_writer.write_uint64(index_info->m_ttl_duration); + + batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice()); } void Rdb_dict_manager::add_cf_flags(rocksdb::WriteBatch *const batch, - const uint32_t &cf_id, - const uint32_t &cf_flags) const { + const uint32_t cf_id, + const uint32_t cf_flags) const { DBUG_ASSERT(batch != nullptr); - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2] = {0}; - uchar value_buf[Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE] = - {0}; - rdb_netbuf_store_uint32(key_buf, Rdb_key_def::CF_DEFINITION); - rdb_netbuf_store_uint32(key_buf + Rdb_key_def::INDEX_NUMBER_SIZE, cf_id); - const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); - - rdb_netbuf_store_uint16(value_buf, Rdb_key_def::CF_DEFINITION_VERSION); - rdb_netbuf_store_uint32(value_buf + Rdb_key_def::VERSION_SIZE, cf_flags); - const rocksdb::Slice value = - rocksdb::Slice((char *)value_buf, sizeof(value_buf)); - batch->Put(m_system_cfh, key, value); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 2> key_writer; + key_writer.write_uint32(Rdb_key_def::CF_DEFINITION); + key_writer.write_uint32(cf_id); + + Rdb_buf_writer<Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE> + value_writer; + value_writer.write_uint16(Rdb_key_def::CF_DEFINITION_VERSION); + value_writer.write_uint32(cf_flags); + + batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice()); } void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch, @@ -4741,7 +4833,6 @@ void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch, bool Rdb_dict_manager::get_index_info( const GL_INDEX_ID &gl_index_id, struct Rdb_index_info *const index_info) const { - if (index_info) { index_info->m_gl_index_id = gl_index_id; } @@ -4749,11 +4840,10 @@ bool Rdb_dict_manager::get_index_info( bool found = false; bool error = false; std::string value; - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - dump_index_id(key_buf, Rdb_key_def::INDEX_INFO, gl_index_id); - const rocksdb::Slice &key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + dump_index_id(&key_writer, Rdb_key_def::INDEX_INFO, gl_index_id); - const rocksdb::Status &status = get_value(key, &value); + const rocksdb::Status &status = get_value(key_writer.to_slice(), &value); if (status.ok()) { if (!index_info) { return true; @@ -4765,73 +4855,73 @@ bool Rdb_dict_manager::get_index_info( ptr += RDB_SIZEOF_INDEX_INFO_VERSION; switch (index_info->m_index_dict_version) { - case Rdb_key_def::INDEX_INFO_VERSION_FIELD_FLAGS: - /* Sanity check to prevent reading bogus TTL record. */ - if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION + - RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION + - RDB_SIZEOF_INDEX_FLAGS + - ROCKSDB_SIZEOF_TTL_RECORD) { - error = true; + case Rdb_key_def::INDEX_INFO_VERSION_FIELD_FLAGS: + /* Sanity check to prevent reading bogus TTL record. */ + if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION + + RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION + + RDB_SIZEOF_INDEX_FLAGS + + ROCKSDB_SIZEOF_TTL_RECORD) { + error = true; + break; + } + index_info->m_index_type = rdb_netbuf_to_byte(ptr); + ptr += RDB_SIZEOF_INDEX_TYPE; + index_info->m_kv_version = rdb_netbuf_to_uint16(ptr); + ptr += RDB_SIZEOF_KV_VERSION; + index_info->m_index_flags = rdb_netbuf_to_uint32(ptr); + ptr += RDB_SIZEOF_INDEX_FLAGS; + index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr); + found = true; break; - } - index_info->m_index_type = rdb_netbuf_to_byte(ptr); - ptr += RDB_SIZEOF_INDEX_TYPE; - index_info->m_kv_version = rdb_netbuf_to_uint16(ptr); - ptr += RDB_SIZEOF_KV_VERSION; - index_info->m_index_flags = rdb_netbuf_to_uint32(ptr); - ptr += RDB_SIZEOF_INDEX_FLAGS; - index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr); - found = true; - break; - case Rdb_key_def::INDEX_INFO_VERSION_TTL: - /* Sanity check to prevent reading bogus into TTL record. */ - if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION + - RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION + - ROCKSDB_SIZEOF_TTL_RECORD) { - error = true; + case Rdb_key_def::INDEX_INFO_VERSION_TTL: + /* Sanity check to prevent reading bogus into TTL record. */ + if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION + + RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION + + ROCKSDB_SIZEOF_TTL_RECORD) { + error = true; + break; + } + index_info->m_index_type = rdb_netbuf_to_byte(ptr); + ptr += RDB_SIZEOF_INDEX_TYPE; + index_info->m_kv_version = rdb_netbuf_to_uint16(ptr); + ptr += RDB_SIZEOF_KV_VERSION; + index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr); + if ((index_info->m_kv_version == + Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) && + index_info->m_ttl_duration > 0) { + index_info->m_index_flags = Rdb_key_def::TTL_FLAG; + } + found = true; break; - } - index_info->m_index_type = rdb_netbuf_to_byte(ptr); - ptr += RDB_SIZEOF_INDEX_TYPE; - index_info->m_kv_version = rdb_netbuf_to_uint16(ptr); - ptr += RDB_SIZEOF_KV_VERSION; - index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr); - if ((index_info->m_kv_version == - Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) && - index_info->m_ttl_duration > 0) { - index_info->m_index_flags = Rdb_key_def::TTL_FLAG; - } - found = true; - break; - case Rdb_key_def::INDEX_INFO_VERSION_VERIFY_KV_FORMAT: - case Rdb_key_def::INDEX_INFO_VERSION_GLOBAL_ID: - index_info->m_index_type = rdb_netbuf_to_byte(ptr); - ptr += RDB_SIZEOF_INDEX_TYPE; - index_info->m_kv_version = rdb_netbuf_to_uint16(ptr); - found = true; - break; + case Rdb_key_def::INDEX_INFO_VERSION_VERIFY_KV_FORMAT: + case Rdb_key_def::INDEX_INFO_VERSION_GLOBAL_ID: + index_info->m_index_type = rdb_netbuf_to_byte(ptr); + ptr += RDB_SIZEOF_INDEX_TYPE; + index_info->m_kv_version = rdb_netbuf_to_uint16(ptr); + found = true; + break; - default: - error = true; - break; + default: + error = true; + break; } switch (index_info->m_index_type) { - case Rdb_key_def::INDEX_TYPE_PRIMARY: - case Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY: { - error = - index_info->m_kv_version > Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; - break; - } - case Rdb_key_def::INDEX_TYPE_SECONDARY: - error = index_info->m_kv_version > - Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST; - break; - default: - error = true; - break; + case Rdb_key_def::INDEX_TYPE_PRIMARY: + case Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY: { + error = index_info->m_kv_version > + Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; + break; + } + case Rdb_key_def::INDEX_TYPE_SECONDARY: + error = index_info->m_kv_version > + Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST; + break; + default: + error = true; + break; } } @@ -4849,20 +4939,18 @@ bool Rdb_dict_manager::get_index_info( return found; } -bool Rdb_dict_manager::get_cf_flags(const uint32_t &cf_id, +bool Rdb_dict_manager::get_cf_flags(const uint32_t cf_id, uint32_t *const cf_flags) const { DBUG_ASSERT(cf_flags != nullptr); bool found = false; std::string value; - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2] = {0}; + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 2> key_writer; - rdb_netbuf_store_uint32(key_buf, Rdb_key_def::CF_DEFINITION); - rdb_netbuf_store_uint32(key_buf + Rdb_key_def::INDEX_NUMBER_SIZE, cf_id); + key_writer.write_uint32(Rdb_key_def::CF_DEFINITION); + key_writer.write_uint32(cf_id); - const rocksdb::Slice key = - rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)); - const rocksdb::Status status = get_value(key, &value); + const rocksdb::Status status = get_value(key_writer.to_slice(), &value); if (status.ok()) { const uchar *val = (const uchar *)value.c_str(); @@ -4890,10 +4978,9 @@ void Rdb_dict_manager::get_ongoing_index_operation( DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING || dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING); - uchar index_buf[Rdb_key_def::INDEX_NUMBER_SIZE]; - rdb_netbuf_store_uint32(index_buf, dd_type); - const rocksdb::Slice index_slice(reinterpret_cast<char *>(index_buf), - Rdb_key_def::INDEX_NUMBER_SIZE); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE> index_writer; + index_writer.write_uint32(dd_type); + const rocksdb::Slice index_slice = index_writer.to_slice(); rocksdb::Iterator *it = new_iterator(); for (it->Seek(index_slice); it->Valid(); it->Next()) { @@ -4937,11 +5024,10 @@ bool Rdb_dict_manager::is_index_operation_ongoing( bool found = false; std::string value; - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - dump_index_id(key_buf, dd_type, gl_index_id); - const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + dump_index_id(&key_writer, dd_type, gl_index_id); - const rocksdb::Status status = get_value(key, &value); + const rocksdb::Status status = get_value(key_writer.to_slice(), &value); if (status.ok()) { found = true; } @@ -4958,23 +5044,19 @@ void Rdb_dict_manager::start_ongoing_index_operation( DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING || dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING); - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - uchar value_buf[Rdb_key_def::VERSION_SIZE] = {0}; - dump_index_id(key_buf, dd_type, gl_index_id); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + Rdb_buf_writer<Rdb_key_def::VERSION_SIZE> value_writer; + + dump_index_id(&key_writer, dd_type, gl_index_id); // version as needed if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) { - rdb_netbuf_store_uint16(value_buf, - Rdb_key_def::DDL_DROP_INDEX_ONGOING_VERSION); + value_writer.write_uint16(Rdb_key_def::DDL_DROP_INDEX_ONGOING_VERSION); } else { - rdb_netbuf_store_uint16(value_buf, - Rdb_key_def::DDL_CREATE_INDEX_ONGOING_VERSION); + value_writer.write_uint16(Rdb_key_def::DDL_CREATE_INDEX_ONGOING_VERSION); } - const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); - const rocksdb::Slice value = - rocksdb::Slice((char *)value_buf, sizeof(value_buf)); - batch->Put(m_system_cfh, key, value); + batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice()); } /* @@ -5006,7 +5088,7 @@ bool Rdb_dict_manager::is_drop_index_empty() const { all associated indexes to be removed */ void Rdb_dict_manager::add_drop_table( - std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys, + std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 n_keys, rocksdb::WriteBatch *const batch) const { std::unordered_set<GL_INDEX_ID> dropped_index_ids; for (uint32 i = 0; i < n_keys; i++) { @@ -5100,12 +5182,13 @@ void Rdb_dict_manager::resume_drop_indexes() const { for (const auto &gl_index_id : gl_index_ids) { log_start_drop_index(gl_index_id, "Resume"); if (max_index_id_in_dict < gl_index_id.index_id) { - sql_print_error("RocksDB: Found max index id %u from data dictionary " - "but also found dropped index id (%u,%u) from drop_index " - "dictionary. This should never happen and is possibly a " - "bug.", - max_index_id_in_dict, gl_index_id.cf_id, - gl_index_id.index_id); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Found max index id %u from data dictionary " + "but also found dropped index id (%u,%u) from drop_index " + "dictionary. This should never happen and is possibly a " + "bug.", + max_index_id_in_dict, gl_index_id.cf_id, gl_index_id.index_id); abort(); } } @@ -5130,7 +5213,7 @@ void Rdb_dict_manager::rollback_ongoing_index_creation() const { } void Rdb_dict_manager::log_start_drop_table( - const std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys, + const std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 n_keys, const char *const log_action) const { for (uint32 i = 0; i < n_keys; i++) { log_start_drop_index(key_descr[i]->get_gl_index_id(), log_action); @@ -5151,10 +5234,12 @@ void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id, if (!incomplete_create_indexes.count(gl_index_id)) { /* If it's not a partially created index, something is very wrong. */ - sql_print_error("RocksDB: Failed to get column family info " - "from index id (%u,%u). MyRocks data dictionary may " - "get corrupted.", - gl_index_id.cf_id, gl_index_id.index_id); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Failed to get column family info " + "from index id (%u,%u). MyRocks data dictionary may " + "get corrupted.", + gl_index_id.cf_id, gl_index_id.index_id); abort(); } } @@ -5167,7 +5252,7 @@ bool Rdb_dict_manager::get_max_index_id(uint32_t *const index_id) const { const rocksdb::Status status = get_value(m_key_slice_max_index_id, &value); if (status.ok()) { const uchar *const val = (const uchar *)value.c_str(); - const uint16_t &version = rdb_netbuf_to_uint16(val); + const uint16_t version = rdb_netbuf_to_uint16(val); if (version == Rdb_key_def::MAX_INDEX_ID_VERSION) { *index_id = rdb_netbuf_to_uint32(val + Rdb_key_def::VERSION_SIZE); found = true; @@ -5177,27 +5262,28 @@ bool Rdb_dict_manager::get_max_index_id(uint32_t *const index_id) const { } bool Rdb_dict_manager::update_max_index_id(rocksdb::WriteBatch *const batch, - const uint32_t &index_id) const { + const uint32_t index_id) const { DBUG_ASSERT(batch != nullptr); uint32_t old_index_id = -1; if (get_max_index_id(&old_index_id)) { if (old_index_id > index_id) { - sql_print_error("RocksDB: Found max index id %u from data dictionary " - "but trying to update to older value %u. This should " - "never happen and possibly a bug.", - old_index_id, index_id); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Found max index id %u from data dictionary " + "but trying to update to older value %u. This should " + "never happen and possibly a bug.", + old_index_id, index_id); return true; } } - uchar value_buf[Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE] = - {0}; - rdb_netbuf_store_uint16(value_buf, Rdb_key_def::MAX_INDEX_ID_VERSION); - rdb_netbuf_store_uint32(value_buf + Rdb_key_def::VERSION_SIZE, index_id); - const rocksdb::Slice value = - rocksdb::Slice((char *)value_buf, sizeof(value_buf)); - batch->Put(m_system_cfh, m_key_slice_max_index_id, value); + Rdb_buf_writer<Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE> + value_writer; + value_writer.write_uint16(Rdb_key_def::MAX_INDEX_ID_VERSION); + value_writer.write_uint32(index_id); + + batch->Put(m_system_cfh, m_key_slice_max_index_id, value_writer.to_slice()); return false; } @@ -5207,27 +5293,24 @@ void Rdb_dict_manager::add_stats( DBUG_ASSERT(batch != nullptr); for (const auto &it : stats) { - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - dump_index_id(key_buf, Rdb_key_def::INDEX_STATISTICS, it.m_gl_index_id); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + dump_index_id(&key_writer, Rdb_key_def::INDEX_STATISTICS, it.m_gl_index_id); // IndexStats::materialize takes complete care of serialization including // storing the version const auto value = Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it}); - batch->Put(m_system_cfh, rocksdb::Slice((char *)key_buf, sizeof(key_buf)), - value); + batch->Put(m_system_cfh, key_writer.to_slice(), value); } } Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const { - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - dump_index_id(key_buf, Rdb_key_def::INDEX_STATISTICS, gl_index_id); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + dump_index_id(&key_writer, Rdb_key_def::INDEX_STATISTICS, gl_index_id); std::string value; - const rocksdb::Status status = get_value( - rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)), - &value); + const rocksdb::Status status = get_value(key_writer.to_slice(), &value); if (status.ok()) { std::vector<Rdb_index_stats> v; // unmaterialize checks if the version matches @@ -5239,41 +5322,34 @@ Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const { return Rdb_index_stats(); } -rocksdb::Status -Rdb_dict_manager::put_auto_incr_val(rocksdb::WriteBatchBase *batch, - const GL_INDEX_ID &gl_index_id, - ulonglong val, bool overwrite) const { - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id); - const rocksdb::Slice key = - rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)); +rocksdb::Status Rdb_dict_manager::put_auto_incr_val( + rocksdb::WriteBatchBase *batch, const GL_INDEX_ID &gl_index_id, + ulonglong val, bool overwrite) const { + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + dump_index_id(&key_writer, Rdb_key_def::AUTO_INC, gl_index_id); // Value is constructed by storing the version and the value. - uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION + - ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0}; - uchar *ptr = value_buf; - rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION); - ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION; - rdb_netbuf_store_uint64(ptr, val); - ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE; - const rocksdb::Slice value = - rocksdb::Slice(reinterpret_cast<char *>(value_buf), ptr - value_buf); + Rdb_buf_writer<RDB_SIZEOF_AUTO_INCREMENT_VERSION + + ROCKSDB_SIZEOF_AUTOINC_VALUE> + value_writer; + value_writer.write_uint16(Rdb_key_def::AUTO_INCREMENT_VERSION); + value_writer.write_uint64(val); if (overwrite) { - return batch->Put(m_system_cfh, key, value); + return batch->Put(m_system_cfh, key_writer.to_slice(), + value_writer.to_slice()); } - return batch->Merge(m_system_cfh, key, value); + return batch->Merge(m_system_cfh, key_writer.to_slice(), + value_writer.to_slice()); } bool Rdb_dict_manager::get_auto_incr_val(const GL_INDEX_ID &gl_index_id, ulonglong *new_val) const { - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; - dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id); + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer; + dump_index_id(&key_writer, Rdb_key_def::AUTO_INC, gl_index_id); std::string value; - const rocksdb::Status status = get_value( - rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)), - &value); + const rocksdb::Status status = get_value(key_writer.to_slice(), &value); if (status.ok()) { const uchar *const val = reinterpret_cast<const uchar *>(value.data()); @@ -5307,4 +5383,4 @@ uint Rdb_seq_generator::get_and_update_next_number( return res; } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 27ca5ca980d..0a3a747c633 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -22,6 +22,7 @@ #include <map> #include <mutex> #include <string> +#include <unordered_map> #include <unordered_set> #include <utility> #include <vector> @@ -48,6 +49,27 @@ class Rdb_ddl_manager; const uint32_t GTID_BUF_LEN = 60; +class Rdb_convert_to_record_key_decoder { + public: + Rdb_convert_to_record_key_decoder() = default; + Rdb_convert_to_record_key_decoder( + const Rdb_convert_to_record_key_decoder &decoder) = delete; + Rdb_convert_to_record_key_decoder &operator=( + const Rdb_convert_to_record_key_decoder &decoder) = delete; + static int decode(uchar *const buf, uint *offset, Rdb_field_packing *fpi, + TABLE *table, Field *field, bool has_unpack_info, + Rdb_string_reader *reader, + Rdb_string_reader *unpack_reader); + static int skip(const Rdb_field_packing *fpi, const Field *field, + Rdb_string_reader *reader, Rdb_string_reader *unpack_reader); + + private: + static int decode_field(Rdb_field_packing *fpi, Field *field, + Rdb_string_reader *reader, + const uchar *const default_value, + Rdb_string_reader *unpack_reader); +}; + /* @brief Field packing context. @@ -63,7 +85,7 @@ const uint32_t GTID_BUF_LEN = 60; unpack_info is passed as context data between the two. */ class Rdb_pack_field_context { -public: + public: Rdb_pack_field_context(const Rdb_pack_field_context &) = delete; Rdb_pack_field_context &operator=(const Rdb_pack_field_context &) = delete; @@ -74,6 +96,45 @@ public: Rdb_string_writer *writer; }; +class Rdb_key_field_iterator { + private: + Rdb_field_packing *m_pack_info; + int m_iter_index; + int m_iter_end; + TABLE *m_table; + Rdb_string_reader *m_reader; + Rdb_string_reader *m_unp_reader; + uint m_curr_bitmap_pos; + const MY_BITMAP *m_covered_bitmap; + uchar *m_buf; + bool m_has_unpack_info; + const Rdb_key_def *m_key_def; + bool m_secondary_key; + bool m_hidden_pk_exists; + bool m_is_hidden_pk; + bool m_is_null; + Field *m_field; + uint m_offset; + Rdb_field_packing *m_fpi; + + public: + Rdb_key_field_iterator(const Rdb_key_field_iterator &) = delete; + Rdb_key_field_iterator &operator=(const Rdb_key_field_iterator &) = delete; + Rdb_key_field_iterator(const Rdb_key_def *key_def, + Rdb_field_packing *pack_info, + Rdb_string_reader *reader, + Rdb_string_reader *unp_reader, TABLE *table, + bool has_unpack_info, const MY_BITMAP *covered_bitmap, + uchar *buf); + + int next(); + bool has_next(); + bool get_is_null() const; + Field *get_field() const; + int get_field_index() const; + void *get_dst() const; +}; + struct Rdb_collation_codec; struct Rdb_index_info; @@ -81,18 +142,19 @@ struct Rdb_index_info; C-style "virtual table" allowing different handling of packing logic based on the field type. See Rdb_field_packing::setup() implementation. */ -using rdb_make_unpack_info_t = - void (Rdb_key_def::*)(const Rdb_collation_codec *codec, const Field *field, - Rdb_pack_field_context *pack_ctx) const; -using rdb_index_field_unpack_t = int (Rdb_key_def::*)( - Rdb_field_packing *fpi, Field *field, uchar *field_ptr, - Rdb_string_reader *reader, Rdb_string_reader *unpack_reader) const; -using rdb_index_field_skip_t = - int (Rdb_key_def::*)(const Rdb_field_packing *fpi, const Field *field, - Rdb_string_reader *reader) const; -using rdb_index_field_pack_t = - void (Rdb_key_def::*)(Rdb_field_packing *fpi, Field *field, uchar *buf, - uchar **dst, Rdb_pack_field_context *pack_ctx) const; +using rdb_make_unpack_info_t = void (*)(const Rdb_collation_codec *codec, + const Field *field, + Rdb_pack_field_context *pack_ctx); +using rdb_index_field_unpack_t = int (*)(Rdb_field_packing *fpi, Field *field, + uchar *field_ptr, + Rdb_string_reader *reader, + Rdb_string_reader *unpack_reader); +using rdb_index_field_skip_t = int (*)(const Rdb_field_packing *fpi, + const Field *field, + Rdb_string_reader *reader); +using rdb_index_field_pack_t = void (*)(Rdb_field_packing *fpi, Field *field, + uchar *buf, uchar **dst, + Rdb_pack_field_context *pack_ctx); const uint RDB_INVALID_KEY_LEN = uint(-1); @@ -187,7 +249,7 @@ enum { */ class Rdb_key_def { -public: + public: /* Convert a key from KeyTupleFormat to mem-comparable form */ uint pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer, uchar *const packed_tuple, const uchar *const key_tuple, @@ -202,23 +264,17 @@ public: uint pack_record(const TABLE *const tbl, uchar *const pack_buffer, const uchar *const record, uchar *const packed_tuple, Rdb_string_writer *const unpack_info, - const bool &should_store_row_debug_checksums, - const longlong &hidden_pk_id = 0, uint n_key_parts = 0, + const bool should_store_row_debug_checksums, + const longlong hidden_pk_id = 0, uint n_key_parts = 0, uint *const n_null_fields = nullptr, - uint *const ttl_pk_offset = nullptr, const char *const ttl_bytes = nullptr) const; /* Pack the hidden primary key into mem-comparable form. */ - uint pack_hidden_pk(const longlong &hidden_pk_id, + uint pack_hidden_pk(const longlong hidden_pk_id, uchar *const packed_tuple) const; - int unpack_field(Rdb_field_packing *const fpi, - Field *const field, - Rdb_string_reader* reader, - const uchar *const default_value, - Rdb_string_reader* unp_reader) const; int unpack_record(TABLE *const table, uchar *const buf, const rocksdb::Slice *const packed_key, const rocksdb::Slice *const unpack_info, - const bool &verify_row_debug_checksums) const; + const bool verify_row_debug_checksums) const; static bool unpack_info_has_checksum(const rocksdb::Slice &unpack_info); int compare_keys(const rocksdb::Slice *key1, const rocksdb::Slice *key2, @@ -240,33 +296,67 @@ public: /* Get the first key that you need to position at to start iterating. - Stores into *key a "supremum" or "infimum" key value for the index. - + @parameters key OUT Big Endian, value is m_index_number or + m_index_number + 1 + @parameters size OUT key size, value is INDEX_NUMBER_SIZE @return Number of bytes in the key that are usable for bloom filter use. */ inline int get_first_key(uchar *const key, uint *const size) const { - if (m_is_reverse_cf) + if (m_is_reverse_cf) { get_supremum_key(key, size); - else + /* Find out how many bytes of infimum are the same as m_index_number */ + uchar unmodified_key[INDEX_NUMBER_SIZE]; + rdb_netbuf_store_index(unmodified_key, m_index_number); + int i; + for (i = 0; i < INDEX_NUMBER_SIZE; i++) { + if (key[i] != unmodified_key[i]) { + break; + } + } + return i; + } else { get_infimum_key(key, size); + // For infimum key, its value will be m_index_number + // Thus return its own size instead. + return INDEX_NUMBER_SIZE; + } + } + + /* + The same as get_first_key, but get the key for the last entry in the index + @parameters key OUT Big Endian, value is m_index_number or + m_index_number + 1 + @parameters size OUT key size, value is INDEX_NUMBER_SIZE - /* Find out how many bytes of infimum are the same as m_index_number */ - uchar unmodified_key[INDEX_NUMBER_SIZE]; - rdb_netbuf_store_index(unmodified_key, m_index_number); - int i; - for (i = 0; i < INDEX_NUMBER_SIZE; i++) { - if (key[i] != unmodified_key[i]) - break; + @return Number of bytes in the key that are usable for bloom filter use. + */ + inline int get_last_key(uchar *const key, uint *const size) const { + if (m_is_reverse_cf) { + get_infimum_key(key, size); + // For infimum key, its value will be m_index_number + // Thus return its own size instead. + return INDEX_NUMBER_SIZE; + } else { + get_supremum_key(key, size); + /* Find out how many bytes are the same as m_index_number */ + uchar unmodified_key[INDEX_NUMBER_SIZE]; + rdb_netbuf_store_index(unmodified_key, m_index_number); + int i; + for (i = 0; i < INDEX_NUMBER_SIZE; i++) { + if (key[i] != unmodified_key[i]) { + break; + } + } + return i; } - return i; } /* Make a key that is right after the given key. */ - static int successor(uchar *const packed_tuple, const uint &len); + static int successor(uchar *const packed_tuple, const uint len); /* Make a key that is right before the given key. */ - static int predecessor(uchar *const packed_tuple, const uint &len); + static int predecessor(uchar *const packed_tuple, const uint len); /* This can be used to compare prefixes. @@ -282,19 +372,18 @@ public: /* Check if given mem-comparable key belongs to this index */ bool covers_key(const rocksdb::Slice &slice) const { - if (slice.size() < INDEX_NUMBER_SIZE) - return false; + if (slice.size() < INDEX_NUMBER_SIZE) return false; - if (memcmp(slice.data(), m_index_number_storage_form, INDEX_NUMBER_SIZE)) + if (memcmp(slice.data(), m_index_number_storage_form, INDEX_NUMBER_SIZE)) { return false; + } return true; } void get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const; - bool covers_lookup(TABLE *const table, - const rocksdb::Slice *const unpack_info, + bool covers_lookup(const rocksdb::Slice *const unpack_info, const MY_BITMAP *const map) const; inline bool use_covered_bitmap_format() const { @@ -302,6 +391,9 @@ public: m_kv_format_version >= SECONDARY_FORMAT_VERSION_UPDATE3; } + /* Indicates that all key parts can be unpacked to cover a secondary lookup */ + bool can_cover_lookup() const; + /* Return true if the passed mem-comparable key - is from this index, and @@ -339,7 +431,7 @@ public: uint get_key_parts() const { return m_key_parts; } - uint get_ttl_field_offset() const { return m_ttl_field_offset; } + uint get_ttl_field_index() const { return m_ttl_field_index; } /* Get a field object for key part #part_no @@ -377,7 +469,7 @@ public: VERSION_SIZE = 2, CF_NUMBER_SIZE = 4, CF_FLAG_SIZE = 4, - PACKED_SIZE = 4, // one int + PACKED_SIZE = 4, // one int }; // bit flags for combining bools when writing to disk @@ -505,7 +597,7 @@ public: uint64 *ttl_duration); static uint extract_ttl_col(const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg, - std::string *ttl_column, uint *ttl_field_offset, + std::string *ttl_column, uint *ttl_field_index, bool skip_checks = false); inline bool has_ttl() const { return m_ttl_duration > 0; } @@ -517,15 +609,14 @@ public: const uchar *const val, enum INDEX_FLAG flag) const; - static const std::string - gen_qualifier_for_table(const char *const qualifier, - const std::string &partition_name = ""); - static const std::string - gen_cf_name_qualifier_for_partition(const std::string &s); - static const std::string - gen_ttl_duration_qualifier_for_partition(const std::string &s); - static const std::string - gen_ttl_col_qualifier_for_partition(const std::string &s); + static const std::string gen_qualifier_for_table( + const char *const qualifier, const std::string &partition_name = ""); + static const std::string gen_cf_name_qualifier_for_partition( + const std::string &s); + static const std::string gen_ttl_duration_qualifier_for_partition( + const std::string &s); + static const std::string gen_ttl_col_qualifier_for_partition( + const std::string &s); static const std::string parse_comment_for_qualifier( const std::string &comment, const TABLE *const table_arg, @@ -535,133 +626,133 @@ public: rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; } /* Check if keypart #kp can be unpacked from index tuple */ - inline bool can_unpack(const uint &kp) const; + inline bool can_unpack(const uint kp) const; /* Check if keypart #kp needs unpack info */ - inline bool has_unpack_info(const uint &kp) const; + inline bool has_unpack_info(const uint kp) const; /* Check if given table has a primary key */ static bool table_has_hidden_pk(const TABLE *const table); - void report_checksum_mismatch(const bool &is_key, const char *const data, + void report_checksum_mismatch(const bool is_key, const char *const data, const size_t data_size) const; /* Check if index is at least pk_min if it is a PK, or at least sk_min if SK.*/ - bool index_format_min_check(const int &pk_min, const int &sk_min) const; + bool index_format_min_check(const int pk_min, const int sk_min) const; - void pack_with_make_sort_key( + static void pack_with_make_sort_key( Rdb_field_packing *const fpi, Field *const field, uchar *buf MY_ATTRIBUTE((__unused__)), uchar **dst, - Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const; + Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))); - void pack_with_varchar_encoding( + static void pack_with_varchar_encoding( Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst, - Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const; - - void - pack_with_varchar_space_pad(Rdb_field_packing *const fpi, Field *const field, - uchar *buf, uchar **dst, - Rdb_pack_field_context *const pack_ctx) const; - - int unpack_integer(Rdb_field_packing *const fpi, Field *const field, - uchar *const to, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader - MY_ATTRIBUTE((__unused__))) const; - - int unpack_double(Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)), - Field *const field MY_ATTRIBUTE((__unused__)), - uchar *const field_ptr, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader - MY_ATTRIBUTE((__unused__))) const; - - int unpack_float(Rdb_field_packing *const fpi, - Field *const field MY_ATTRIBUTE((__unused__)), - uchar *const field_ptr, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader - MY_ATTRIBUTE((__unused__))) const; - - int unpack_binary_str(Rdb_field_packing *const fpi, Field *const field, - uchar *const to, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader - MY_ATTRIBUTE((__unused__))) const; - - int unpack_binary_or_utf8_varchar( - Rdb_field_packing *const fpi, Field *const field, uchar *dst, + Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))); + + static void pack_with_varchar_space_pad( + Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst, + Rdb_pack_field_context *const pack_ctx); + + static int unpack_integer(Rdb_field_packing *const fpi, Field *const field, + uchar *const to, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader + MY_ATTRIBUTE((__unused__))); + + static int unpack_double( + Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)), + Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))); + + static int unpack_float( + Rdb_field_packing *const fpi, + Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const; + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))); + + static int unpack_binary_str(Rdb_field_packing *const fpi, Field *const field, + uchar *const to, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader + MY_ATTRIBUTE((__unused__))); - int unpack_binary_or_utf8_varchar_space_pad( + static int unpack_binary_or_utf8_varchar( Rdb_field_packing *const fpi, Field *const field, uchar *dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const; + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))); - int unpack_newdate(Rdb_field_packing *const fpi, - Field *const field MY_ATTRIBUTE((__unused__)), - uchar *const field_ptr, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader - MY_ATTRIBUTE((__unused__))) const; + static int unpack_binary_or_utf8_varchar_space_pad( + Rdb_field_packing *const fpi, Field *const field, uchar *dst, + Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader); - int unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field, - uchar *dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader - MY_ATTRIBUTE((__unused__))) const; + static int unpack_newdate( + Rdb_field_packing *const fpi, + Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))); - int unpack_unknown_varchar(Rdb_field_packing *const fpi, Field *const field, + static int unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field, uchar *dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const; + Rdb_string_reader *const unp_reader + MY_ATTRIBUTE((__unused__))); - int unpack_simple_varchar_space_pad( + static int unpack_unknown_varchar(Rdb_field_packing *const fpi, + Field *const field, uchar *dst, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader); + + static int unpack_simple_varchar_space_pad( Rdb_field_packing *const fpi, Field *const field, uchar *dst, - Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const; + Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader); - int unpack_simple(Rdb_field_packing *const fpi, - Field *const field MY_ATTRIBUTE((__unused__)), - uchar *const dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const; + static int unpack_simple(Rdb_field_packing *const fpi, + Field *const field MY_ATTRIBUTE((__unused__)), + uchar *const dst, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader); - int unpack_unknown(Rdb_field_packing *const fpi, Field *const field, - uchar *const dst, Rdb_string_reader *const reader, - Rdb_string_reader *const unp_reader) const; + static int unpack_unknown(Rdb_field_packing *const fpi, Field *const field, + uchar *const dst, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader); - int unpack_floating_point(uchar *const dst, Rdb_string_reader *const reader, - const size_t &size, const int &exp_digit, - const uchar *const zero_pattern, - const uchar *const zero_val, - void (*swap_func)(uchar *, const uchar *)) const; + static int unpack_floating_point(uchar *const dst, + Rdb_string_reader *const reader, + const size_t size, const int exp_digit, + const uchar *const zero_pattern, + const uchar *const zero_val, + void (*swap_func)(uchar *, const uchar *)); - void make_unpack_simple_varchar(const Rdb_collation_codec *const codec, - const Field *const field, - Rdb_pack_field_context *const pack_ctx) const; + static void make_unpack_simple_varchar( + const Rdb_collation_codec *const codec, const Field *const field, + Rdb_pack_field_context *const pack_ctx); - void make_unpack_simple(const Rdb_collation_codec *const codec, - const Field *const field, - Rdb_pack_field_context *const pack_ctx) const; + static void make_unpack_simple(const Rdb_collation_codec *const codec, + const Field *const field, + Rdb_pack_field_context *const pack_ctx); - void make_unpack_unknown( + static void make_unpack_unknown( const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)), - const Field *const field, Rdb_pack_field_context *const pack_ctx) const; + const Field *const field, Rdb_pack_field_context *const pack_ctx); - void make_unpack_unknown_varchar( + static void make_unpack_unknown_varchar( const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)), - const Field *const field, Rdb_pack_field_context *const pack_ctx) const; + const Field *const field, Rdb_pack_field_context *const pack_ctx); - void dummy_make_unpack_info( + static void dummy_make_unpack_info( const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)), const Field *field MY_ATTRIBUTE((__unused__)), - Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) const; + Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))); - int skip_max_length(const Rdb_field_packing *const fpi, - const Field *const field MY_ATTRIBUTE((__unused__)), - Rdb_string_reader *const reader) const; + static int skip_max_length(const Rdb_field_packing *const fpi, + const Field *const field + MY_ATTRIBUTE((__unused__)), + Rdb_string_reader *const reader); - int skip_variable_length( - const Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)), - const Field *const field, Rdb_string_reader *const reader) const; + static int skip_variable_length(const Rdb_field_packing *const fpi, + const Field *const field, + Rdb_string_reader *const reader); - int skip_variable_space_pad(const Rdb_field_packing *const fpi, - const Field *const field, - Rdb_string_reader *const reader) const; + static int skip_variable_space_pad(const Rdb_field_packing *const fpi, + const Field *const field, + Rdb_string_reader *const reader); inline bool use_legacy_varbinary_format() const { return !index_format_min_check(PRIMARY_FORMAT_VERSION_UPDATE2, @@ -674,7 +765,7 @@ public: private: #ifndef DBUG_OFF - inline bool is_storage_available(const int &offset, const int &needed) const { + inline bool is_storage_available(const int offset, const int needed) const { const int storage_length = static_cast<int>(max_storage_fmt_length()); return (storage_length - offset) >= needed; } @@ -682,7 +773,7 @@ public: inline bool is_storage_available(const int &offset, const int &needed) const { return 1; } -#endif // DBUG_OFF +#endif // DBUG_OFF /* Global number of this index (used as prefix in StorageFormat) */ const uint32 m_index_number; @@ -691,15 +782,15 @@ public: rocksdb::ColumnFamilyHandle *m_cf_handle; - void pack_legacy_variable_format(const uchar *src, size_t src_len, - uchar **dst) const; + static void pack_legacy_variable_format(const uchar *src, size_t src_len, + uchar **dst); - void pack_variable_format(const uchar *src, size_t src_len, - uchar **dst) const; + static void pack_variable_format(const uchar *src, size_t src_len, + uchar **dst); - uint calc_unpack_legacy_variable_format(uchar flag, bool *done) const; + static uint calc_unpack_legacy_variable_format(uchar flag, bool *done); - uint calc_unpack_variable_format(uchar flag, bool *done) const; + static uint calc_unpack_variable_format(uchar flag, bool *done); public: uint16_t m_index_dict_version; @@ -738,8 +829,6 @@ public: std::string m_ttl_column; private: - friend class Rdb_tbl_def; // for m_index_number above - /* Number of key parts in the primary key*/ uint m_pk_key_parts; @@ -770,7 +859,7 @@ public: Index of the TTL column in table->s->fields, if it exists. Default is UINT_MAX to denote that it does not exist. */ - uint m_ttl_field_offset; + uint m_ttl_field_index; /* Prefix extractor for the column family of the key definiton */ std::shared_ptr<const rocksdb::SliceTransform> m_prefix_extractor; @@ -818,7 +907,7 @@ extern std::array<const Rdb_collation_codec *, MY_ALL_CHARSETS_SIZE> rdb_collation_data; class Rdb_field_packing { -public: + public: Rdb_field_packing(const Rdb_field_packing &) = delete; Rdb_field_packing &operator=(const Rdb_field_packing &) = delete; Rdb_field_packing() = default; @@ -836,9 +925,10 @@ public: Valid only for VARCHAR fields. */ const CHARSET_INFO *m_varchar_charset; + bool m_use_legacy_varbinary_format; // (Valid when Variable Length Space Padded Encoding is used): - uint m_segment_size; // size of segment used + uint m_segment_size; // size of segment used // number of bytes used to store number of trimmed (or added) // spaces in the upack_info @@ -881,7 +971,7 @@ public: */ rdb_index_field_skip_t m_skip_func; -private: + private: /* Location of the field in the table (key number and key part number). @@ -907,12 +997,12 @@ private: uint m_keynr; uint m_key_part; -public: + public: bool setup(const Rdb_key_def *const key_descr, const Field *const field, - const uint &keynr_arg, const uint &key_part_arg, - const uint16 &key_length); + const uint keynr_arg, const uint key_part_arg, + const uint16 key_length); Field *get_field_in_table(const TABLE *const tbl) const; - void fill_hidden_pk_val(uchar **dst, const longlong &hidden_pk_id) const; + void fill_hidden_pk_val(uchar **dst, const longlong hidden_pk_id) const; }; /* @@ -923,7 +1013,7 @@ public: For encoding/decoding of index tuples, see Rdb_key_def. */ class Rdb_field_encoder { -public: + public: Rdb_field_encoder(const Rdb_field_encoder &) = delete; Rdb_field_encoder &operator=(const Rdb_field_encoder &) = delete; /* @@ -944,7 +1034,7 @@ public: uint m_null_offset; uint16 m_field_index; - uchar m_null_mask; // 0 means the field cannot be null + uchar m_null_mask; // 0 means the field cannot be null my_core::enum_field_types m_field_type; @@ -964,12 +1054,12 @@ inline Field *Rdb_key_def::get_table_field_for_part_no(TABLE *table, return m_pack_info[part_no].get_field_in_table(table); } -inline bool Rdb_key_def::can_unpack(const uint &kp) const { +inline bool Rdb_key_def::can_unpack(const uint kp) const { DBUG_ASSERT(kp < m_key_parts); return (m_pack_info[kp].m_unpack_func != nullptr); } -inline bool Rdb_key_def::has_unpack_info(const uint &kp) const { +inline bool Rdb_key_def::has_unpack_info(const uint kp) const { DBUG_ASSERT(kp < m_key_parts); return m_pack_info[kp].uses_unpack_info(); } @@ -984,7 +1074,7 @@ inline bool Rdb_key_def::has_unpack_info(const uint &kp) const { */ class Rdb_tbl_def { -private: + private: void check_if_is_mysql_system_table(); /* Stores 'dbname.tablename' */ @@ -997,7 +1087,7 @@ private: void set_name(const std::string &name); -public: + public: Rdb_tbl_def(const Rdb_tbl_def &) = delete; Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete; @@ -1006,18 +1096,20 @@ public: set_name(name); } - Rdb_tbl_def(const char *const name, const size_t &len) + Rdb_tbl_def(const char *const name, const size_t len) : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) { set_name(std::string(name, len)); } - explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0) + explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t pos = 0) : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) { set_name(std::string(slice.data() + pos, slice.size() - pos)); } ~Rdb_tbl_def(); + void check_and_set_read_free_rpl_table(); + /* Number of indexes */ uint m_key_count; @@ -1030,8 +1122,11 @@ public: /* Is this a system table */ bool m_is_mysql_system_table; + /* Is this table read free repl enabled */ + std::atomic_bool m_is_read_free_rpl_table{false}; + bool put_dict(Rdb_dict_manager *const dict, rocksdb::WriteBatch *const batch, - uchar *const key, const size_t &keylen); + const rocksdb::Slice &key); const std::string &full_tablename() const { return m_dbname_tablename; } const std::string &base_dbname() const { return m_dbname; } @@ -1050,12 +1145,12 @@ class Rdb_seq_generator { mysql_mutex_t m_mutex; -public: + public: Rdb_seq_generator(const Rdb_seq_generator &) = delete; Rdb_seq_generator &operator=(const Rdb_seq_generator &) = delete; Rdb_seq_generator() = default; - void init(const uint &initial_number) { + void init(const uint initial_number) { mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); m_next_number = initial_number; } @@ -1080,14 +1175,17 @@ interface Rdb_tables_scanner { class Rdb_ddl_manager { Rdb_dict_manager *m_dict = nullptr; - my_core::HASH m_ddl_hash; // Contains Rdb_tbl_def elements + + // Contains Rdb_tbl_def elements + std::unordered_map<std::string, Rdb_tbl_def *> m_ddl_map; + // Maps index id to <table_name, index number> std::map<GL_INDEX_ID, std::pair<std::string, uint>> m_index_num_to_keydef; // Maps index id to key definitons not yet committed to data dictionary. // This is mainly used to store key definitions during ALTER TABLE. std::map<GL_INDEX_ID, std::shared_ptr<Rdb_key_def>> - m_index_num_to_uncommitted_keydef; + m_index_num_to_uncommitted_keydef; mysql_rwlock_t m_rwlock; Rdb_seq_generator m_sequence; @@ -1098,30 +1196,30 @@ class Rdb_ddl_manager { const std::shared_ptr<Rdb_key_def> &find(GL_INDEX_ID gl_index_id); -public: + public: Rdb_ddl_manager(const Rdb_ddl_manager &) = delete; Rdb_ddl_manager &operator=(const Rdb_ddl_manager &) = delete; Rdb_ddl_manager() {} /* Load the data dictionary from on-disk storage */ bool init(Rdb_dict_manager *const dict_arg, Rdb_cf_manager *const cf_manager, - const uint32_t &validate_tables); + const uint32_t validate_tables); void cleanup(); - Rdb_tbl_def *find(const std::string &table_name, const bool &lock = true); + Rdb_tbl_def *find(const std::string &table_name, const bool lock = true); std::shared_ptr<const Rdb_key_def> safe_find(GL_INDEX_ID gl_index_id); void set_stats(const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats); void adjust_stats(const std::vector<Rdb_index_stats> &new_data, const std::vector<Rdb_index_stats> &deleted_data = std::vector<Rdb_index_stats>()); - void persist_stats(const bool &sync = false); + void persist_stats(const bool sync = false); /* Modify the mapping and write it to on-disk storage */ int put_and_write(Rdb_tbl_def *const key_descr, rocksdb::WriteBatch *const batch); void remove(Rdb_tbl_def *const rec, rocksdb::WriteBatch *const batch, - const bool &lock = true); + const bool lock = true); bool rename(const std::string &from, const std::string &to, rocksdb::WriteBatch *const batch); @@ -1140,9 +1238,9 @@ public: void remove_uncommitted_keydefs( const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes); -private: + private: /* Put the data into in-memory table (only) */ - int put(Rdb_tbl_def *const key_descr, const bool &lock = true); + int put(Rdb_tbl_def *const key_descr, const bool lock = true); /* Helper functions to be passed to my_core::HASH object */ static const uchar *get_hash_key(Rdb_tbl_def *const rec, size_t *const length, @@ -1170,7 +1268,7 @@ private: binlog_gtid */ class Rdb_binlog_manager { -public: + public: Rdb_binlog_manager(const Rdb_binlog_manager &) = delete; Rdb_binlog_manager &operator=(const Rdb_binlog_manager &) = delete; Rdb_binlog_manager() = default; @@ -1181,18 +1279,15 @@ public: rocksdb::WriteBatchBase *const batch); bool read(char *const binlog_name, my_off_t *const binlog_pos, char *const binlog_gtid) const; - void update_slave_gtid_info(const uint &id, const char *const db, + void update_slave_gtid_info(const uint id, const char *const db, const char *const gtid, rocksdb::WriteBatchBase *const write_batch); -private: + private: Rdb_dict_manager *m_dict = nullptr; - uchar m_key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; + Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE> m_key_writer; rocksdb::Slice m_key_slice; - rocksdb::Slice pack_value(uchar *const buf, const char *const binlog_name, - const my_off_t &binlog_pos, - const char *const binlog_gtid) const; bool unpack_value(const uchar *const value, size_t value_size, char *const binlog_name, my_off_t *const binlog_pos, char *const binlog_gtid) const; @@ -1259,7 +1354,7 @@ private: */ class Rdb_dict_manager { -private: + private: mysql_mutex_t m_mutex; rocksdb::TransactionDB *m_db = nullptr; rocksdb::ColumnFamilyHandle *m_system_cfh = nullptr; @@ -1271,18 +1366,27 @@ private: static void dump_index_id(uchar *const netbuf, Rdb_key_def::DATA_DICT_TYPE dict_type, const GL_INDEX_ID &gl_index_id); + template <size_t T> + static void dump_index_id(Rdb_buf_writer<T> *buf_writer, + Rdb_key_def::DATA_DICT_TYPE dict_type, + const GL_INDEX_ID &gl_index_id) { + buf_writer->write_uint32(dict_type); + buf_writer->write_uint32(gl_index_id.cf_id); + buf_writer->write_uint32(gl_index_id.index_id); + } + void delete_with_prefix(rocksdb::WriteBatch *const batch, Rdb_key_def::DATA_DICT_TYPE dict_type, const GL_INDEX_ID &gl_index_id) const; /* Functions for fast DROP TABLE/INDEX */ void resume_drop_indexes() const; void log_start_drop_table(const std::shared_ptr<Rdb_key_def> *const key_descr, - const uint32 &n_keys, + const uint32 n_keys, const char *const log_action) const; void log_start_drop_index(GL_INDEX_ID gl_index_id, const char *log_action) const; -public: + public: Rdb_dict_manager(const Rdb_dict_manager &) = delete; Rdb_dict_manager &operator=(const Rdb_dict_manager &) = delete; Rdb_dict_manager() = default; @@ -1302,7 +1406,7 @@ public: /* Raw RocksDB operations */ std::unique_ptr<rocksdb::WriteBatch> begin() const; - int commit(rocksdb::WriteBatch *const batch, const bool &sync = true) const; + int commit(rocksdb::WriteBatch *const batch, const bool sync = true) const; rocksdb::Status get_value(const rocksdb::Slice &key, std::string *const value) const; void put_key(rocksdb::WriteBatchBase *const batch, const rocksdb::Slice &key, @@ -1312,23 +1416,23 @@ public: rocksdb::Iterator *new_iterator() const; /* Internal Index id => CF */ - void - add_or_update_index_cf_mapping(rocksdb::WriteBatch *batch, - struct Rdb_index_info *const index_info) const; + void add_or_update_index_cf_mapping( + rocksdb::WriteBatch *batch, + struct Rdb_index_info *const index_info) const; void delete_index_info(rocksdb::WriteBatch *batch, const GL_INDEX_ID &index_id) const; bool get_index_info(const GL_INDEX_ID &gl_index_id, struct Rdb_index_info *const index_info) const; /* CF id => CF flags */ - void add_cf_flags(rocksdb::WriteBatch *const batch, const uint &cf_id, - const uint &cf_flags) const; - bool get_cf_flags(const uint &cf_id, uint *const cf_flags) const; + void add_cf_flags(rocksdb::WriteBatch *const batch, const uint cf_id, + const uint cf_flags) const; + bool get_cf_flags(const uint cf_id, uint *const cf_flags) const; /* Functions for fast CREATE/DROP TABLE/INDEX */ - void - get_ongoing_index_operation(std::unordered_set<GL_INDEX_ID> *gl_index_ids, - Rdb_key_def::DATA_DICT_TYPE dd_type) const; + void get_ongoing_index_operation( + std::unordered_set<GL_INDEX_ID> *gl_index_ids, + Rdb_key_def::DATA_DICT_TYPE dd_type) const; bool is_index_operation_ongoing(const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const; void start_ongoing_index_operation(rocksdb::WriteBatch *batch, @@ -1339,15 +1443,15 @@ public: Rdb_key_def::DATA_DICT_TYPE dd_type) const; bool is_drop_index_empty() const; void add_drop_table(std::shared_ptr<Rdb_key_def> *const key_descr, - const uint32 &n_keys, + const uint32 n_keys, rocksdb::WriteBatch *const batch) const; void add_drop_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, rocksdb::WriteBatch *const batch) const; void add_create_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, rocksdb::WriteBatch *const batch) const; - void - finish_indexes_operation(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, - Rdb_key_def::DATA_DICT_TYPE dd_type) const; + void finish_indexes_operation( + const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + Rdb_key_def::DATA_DICT_TYPE dd_type) const; void rollback_ongoing_index_creation() const; inline void get_ongoing_drop_indexes( @@ -1390,7 +1494,7 @@ public: bool get_max_index_id(uint32_t *const index_id) const; bool update_max_index_id(rocksdb::WriteBatch *const batch, - const uint32_t &index_id) const; + const uint32_t index_id) const; void add_stats(rocksdb::WriteBatch *const batch, const std::vector<Rdb_index_stats> &stats) const; Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const; @@ -1517,4 +1621,4 @@ class Rdb_system_merge_op : public rocksdb::AssociativeMergeOperator { bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs); -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_global.h b/storage/rocksdb/rdb_global.h new file mode 100644 index 00000000000..7213571bf61 --- /dev/null +++ b/storage/rocksdb/rdb_global.h @@ -0,0 +1,392 @@ +/* + Copyright (c) 2018, Facebook, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* MyRocks global type definitions goes here */ + +#pragma once + +/* C++ standard header files */ +#include <limits> +#include <string> +#include <vector> + +/* MySQL header files */ +#include "./handler.h" /* handler */ +#include "./my_global.h" /* ulonglong */ +#include "./sql_string.h" +#include "./ut0counter.h" + +namespace myrocks { +/* + * class for exporting transaction information for + * information_schema.rocksdb_trx + */ +struct Rdb_trx_info { + std::string name; + ulonglong trx_id; + ulonglong write_count; + ulonglong lock_count; + int timeout_sec; + std::string state; + std::string waiting_key; + ulonglong waiting_cf_id; + int is_replication; + int skip_trx_api; + int read_only; + int deadlock_detect; + int num_ongoing_bulk_load; + ulong thread_id; + std::string query_str; +}; + +std::vector<Rdb_trx_info> rdb_get_all_trx_info(); + +/* + * class for exporting deadlock transaction information for + * information_schema.rocksdb_deadlock + */ +struct Rdb_deadlock_info { + struct Rdb_dl_trx_info { + ulonglong trx_id; + std::string cf_name; + std::string waiting_key; + bool exclusive_lock; + std::string index_name; + std::string table_name; + }; + std::vector<Rdb_dl_trx_info> path; + int64_t deadlock_time; + ulonglong victim_trx_id; +}; + +std::vector<Rdb_deadlock_info> rdb_get_deadlock_info(); + +/* + This is + - the name of the default Column Family (the CF which stores indexes which + didn't explicitly specify which CF they are in) + - the name used to set the default column family parameter for per-cf + arguments. +*/ +extern const std::string DEFAULT_CF_NAME; + +/* + This is the name of the Column Family used for storing the data dictionary. +*/ +extern const std::string DEFAULT_SYSTEM_CF_NAME; + +/* + This is the name of the hidden primary key for tables with no pk. +*/ +const char *const HIDDEN_PK_NAME = "HIDDEN_PK_ID"; + +/* + Column family name which means "put this index into its own column family". + DEPRECATED!!! +*/ +extern const std::string PER_INDEX_CF_NAME; + +/* + Name for the background thread. +*/ +const char *const BG_THREAD_NAME = "myrocks-bg"; + +/* + Name for the drop index thread. +*/ +const char *const INDEX_THREAD_NAME = "myrocks-index"; + +/* + Name for the manual compaction thread. +*/ +const char *const MANUAL_COMPACTION_THREAD_NAME = "myrocks-mc"; + +/* + Separator between partition name and the qualifier. Sample usage: + + - p0_cfname=foo + - p3_tts_col=bar +*/ +const char RDB_PER_PARTITION_QUALIFIER_NAME_SEP = '_'; + +/* + Separator between qualifier name and value. Sample usage: + + - p0_cfname=foo + - p3_tts_col=bar +*/ +const char RDB_QUALIFIER_VALUE_SEP = '='; + +/* + Separator between multiple qualifier assignments. Sample usage: + + - p0_cfname=foo;p1_cfname=bar;p2_cfname=baz +*/ +const char RDB_QUALIFIER_SEP = ';'; + +/* + Qualifier name for a custom per partition column family. +*/ +const char *const RDB_CF_NAME_QUALIFIER = "cfname"; + +/* + Qualifier name for a custom per partition ttl duration. +*/ +const char *const RDB_TTL_DURATION_QUALIFIER = "ttl_duration"; + +/* + Qualifier name for a custom per partition ttl duration. +*/ +const char *const RDB_TTL_COL_QUALIFIER = "ttl_col"; + +/* + Default, minimal valid, and maximum valid sampling rate values when collecting + statistics about table. +*/ +#define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10 +#define RDB_TBL_STATS_SAMPLE_PCT_MIN 1 +#define RDB_TBL_STATS_SAMPLE_PCT_MAX 100 + +/* + Default and maximum values for rocksdb-compaction-sequential-deletes and + rocksdb-compaction-sequential-deletes-window to add basic boundary checking. +*/ +#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0 +#define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000 + +#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0 +#define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000 + +/* + Default and maximum values for various compaction and flushing related + options. Numbers are based on the hardware we currently use and our internal + benchmarks which indicate that parallelization helps with the speed of + compactions. + + Ideally of course we'll use heuristic technique to determine the number of + CPU-s and derive the values from there. This however has its own set of + problems and we'll choose simplicity for now. +*/ +#define MAX_BACKGROUND_JOBS 64 + +#define DEFAULT_SUBCOMPACTIONS 1 +#define MAX_SUBCOMPACTIONS 64 + +/* + Default value for rocksdb_sst_mgr_rate_bytes_per_sec = 0 (disabled). +*/ +#define DEFAULT_SST_MGR_RATE_BYTES_PER_SEC 0 + +/* + Defines the field sizes for serializing XID object to a string representation. + string byte format: [field_size: field_value, ...] + [ + 8: XID.formatID, + 1: XID.gtrid_length, + 1: XID.bqual_length, + XID.gtrid_length + XID.bqual_length: XID.data + ] +*/ +#define RDB_FORMATID_SZ 8 +#define RDB_GTRID_SZ 1 +#define RDB_BQUAL_SZ 1 +#define RDB_XIDHDR_LEN (RDB_FORMATID_SZ + RDB_GTRID_SZ + RDB_BQUAL_SZ) + +/* + To fix an unhandled exception we specify the upper bound as LONGLONGMAX + instead of ULONGLONGMAX because the latter is -1 and causes an exception when + cast to jlong (signed) of JNI + + The reason behind the cast issue is the lack of unsigned int support in Java. +*/ +#define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast<uint64_t>(LLONG_MAX) + +/* + Hidden PK column (for tables with no primary key) is a longlong (aka 8 bytes). + static_assert() in code will validate this assumption. +*/ +#define ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN sizeof(longlong) + +/* + Bytes used to store TTL, in the beginning of all records for tables with TTL + enabled. +*/ +#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong) + +#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong) + +/* + Maximum index prefix length in bytes. +*/ +#define MAX_INDEX_COL_LEN_LARGE 3072 +#define MAX_INDEX_COL_LEN_SMALL 767 + +/* + MyRocks specific error codes. NB! Please make sure that you will update + HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in + rdb_error_messages to include any new error messages. +*/ +#define HA_ERR_ROCKSDB_FIRST (HA_ERR_LAST + 1) +#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_ROCKSDB_FIRST + 0) +#define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED \ + (HA_ERR_ROCKSDB_FIRST + 1) +#define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED \ + (HA_ERR_ROCKSDB_FIRST + 2) +#define HA_ERR_ROCKSDB_COMMIT_FAILED (HA_ERR_ROCKSDB_FIRST + 3) +#define HA_ERR_ROCKSDB_BULK_LOAD (HA_ERR_ROCKSDB_FIRST + 4) +#define HA_ERR_ROCKSDB_CORRUPT_DATA (HA_ERR_ROCKSDB_FIRST + 5) +#define HA_ERR_ROCKSDB_CHECKSUM_MISMATCH (HA_ERR_ROCKSDB_FIRST + 6) +#define HA_ERR_ROCKSDB_INVALID_TABLE (HA_ERR_ROCKSDB_FIRST + 7) +#define HA_ERR_ROCKSDB_PROPERTIES (HA_ERR_ROCKSDB_FIRST + 8) +#define HA_ERR_ROCKSDB_MERGE_FILE_ERR (HA_ERR_ROCKSDB_FIRST + 9) +/* + Each error code below maps to a RocksDB status code found in: + rocksdb/include/rocksdb/status.h +*/ +#define HA_ERR_ROCKSDB_STATUS_NOT_FOUND (HA_ERR_LAST + 10) +#define HA_ERR_ROCKSDB_STATUS_CORRUPTION (HA_ERR_LAST + 11) +#define HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED (HA_ERR_LAST + 12) +#define HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT (HA_ERR_LAST + 13) +#define HA_ERR_ROCKSDB_STATUS_IO_ERROR (HA_ERR_LAST + 14) +#define HA_ERR_ROCKSDB_STATUS_NO_SPACE (HA_ERR_LAST + 15) +#define HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS (HA_ERR_LAST + 16) +#define HA_ERR_ROCKSDB_STATUS_INCOMPLETE (HA_ERR_LAST + 17) +#define HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS (HA_ERR_LAST + 18) +#define HA_ERR_ROCKSDB_STATUS_TIMED_OUT (HA_ERR_LAST + 19) +#define HA_ERR_ROCKSDB_STATUS_ABORTED (HA_ERR_LAST + 20) +#define HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT (HA_ERR_LAST + 21) +#define HA_ERR_ROCKSDB_STATUS_BUSY (HA_ERR_LAST + 22) +#define HA_ERR_ROCKSDB_STATUS_DEADLOCK (HA_ERR_LAST + 23) +#define HA_ERR_ROCKSDB_STATUS_EXPIRED (HA_ERR_LAST + 24) +#define HA_ERR_ROCKSDB_STATUS_TRY_AGAIN (HA_ERR_LAST + 25) +#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_STATUS_TRY_AGAIN + +const char *const rocksdb_hton_name = "ROCKSDB"; + +typedef struct _gl_index_id_s { + uint32_t cf_id; + uint32_t index_id; + bool operator==(const struct _gl_index_id_s &other) const { + return cf_id == other.cf_id && index_id == other.index_id; + } + bool operator!=(const struct _gl_index_id_s &other) const { + return cf_id != other.cf_id || index_id != other.index_id; + } + bool operator<(const struct _gl_index_id_s &other) const { + return cf_id < other.cf_id || + (cf_id == other.cf_id && index_id < other.index_id); + } + bool operator<=(const struct _gl_index_id_s &other) const { + return cf_id < other.cf_id || + (cf_id == other.cf_id && index_id <= other.index_id); + } + bool operator>(const struct _gl_index_id_s &other) const { + return cf_id > other.cf_id || + (cf_id == other.cf_id && index_id > other.index_id); + } + bool operator>=(const struct _gl_index_id_s &other) const { + return cf_id > other.cf_id || + (cf_id == other.cf_id && index_id >= other.index_id); + } +} GL_INDEX_ID; + +enum operation_type : int { + ROWS_DELETED = 0, + ROWS_INSERTED, + ROWS_READ, + ROWS_UPDATED, + ROWS_DELETED_BLIND, + ROWS_EXPIRED, + ROWS_FILTERED, + ROWS_HIDDEN_NO_SNAPSHOT, + ROWS_MAX +}; + +enum query_type : int { QUERIES_POINT = 0, QUERIES_RANGE, QUERIES_MAX }; + +#if defined(HAVE_SCHED_GETCPU) +#define RDB_INDEXER get_sched_indexer_t +#else +#define RDB_INDEXER thread_id_indexer_t +#endif + +/* Global statistics struct used inside MyRocks */ +struct st_global_stats { + ib_counter_t<ulonglong, 64, RDB_INDEXER> rows[ROWS_MAX]; + + // system_rows_ stats are only for system + // tables. They are not counted in rows_* stats. + ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX]; + + ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX]; + + ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups; +}; + +/* Struct used for exporting status to MySQL */ +struct st_export_stats { + ulonglong rows_deleted; + ulonglong rows_inserted; + ulonglong rows_read; + ulonglong rows_updated; + ulonglong rows_deleted_blind; + ulonglong rows_expired; + ulonglong rows_filtered; + ulonglong rows_hidden_no_snapshot; + + ulonglong system_rows_deleted; + ulonglong system_rows_inserted; + ulonglong system_rows_read; + ulonglong system_rows_updated; + + ulonglong queries_point; + ulonglong queries_range; + + ulonglong covered_secondary_key_lookups; +}; + +/* Struct used for exporting RocksDB memory status */ +struct st_memory_stats { + ulonglong memtable_total; + ulonglong memtable_unflushed; +}; + +/* Struct used for exporting RocksDB IO stalls stats */ +struct st_io_stall_stats { + ulonglong level0_slowdown; + ulonglong level0_slowdown_with_compaction; + ulonglong level0_numfiles; + ulonglong level0_numfiles_with_compaction; + ulonglong stop_for_pending_compaction_bytes; + ulonglong slowdown_for_pending_compaction_bytes; + ulonglong memtable_compaction; + ulonglong memtable_slowdown; + ulonglong total_stop; + ulonglong total_slowdown; + + st_io_stall_stats() + : level0_slowdown(0), + level0_slowdown_with_compaction(0), + level0_numfiles(0), + level0_numfiles_with_compaction(0), + stop_for_pending_compaction_bytes(0), + slowdown_for_pending_compaction_bytes(0), + memtable_compaction(0), + memtable_slowdown(0), + total_stop(0), + total_slowdown(0) {} +}; +} // namespace myrocks diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc index 472d2c87b1c..5350ec3bce9 100644 --- a/storage/rocksdb/rdb_i_s.cc +++ b/storage/rocksdb/rdb_i_s.cc @@ -52,10 +52,10 @@ namespace myrocks { engine. */ -#define ROCKSDB_FIELD_INFO(_name_, _len_, _type_, _flag_) \ +#define ROCKSDB_FIELD_INFO(_name_, _len_, _type_, _flag_) \ { _name_, _len_, _type_, 0, _flag_, nullptr, 0 } -#define ROCKSDB_FIELD_INFO_END \ +#define ROCKSDB_FIELD_INFO_END \ ROCKSDB_FIELD_INFO(nullptr, 0, MYSQL_TYPE_NULL, 0) /* @@ -63,7 +63,7 @@ namespace myrocks { */ namespace RDB_CFSTATS_FIELD { enum { CF_NAME = 0, STAT_TYPE, VALUE }; -} // namespace RDB_CFSTATS_FIELD +} // namespace RDB_CFSTATS_FIELD using namespace Show; @@ -165,7 +165,7 @@ static int rdb_i_s_cfstats_init(void *p) { */ namespace RDB_DBSTATS_FIELD { enum { STAT_TYPE = 0, VALUE }; -} // namespace RDB_DBSTATS_FIELD +} // namespace RDB_DBSTATS_FIELD static ST_FIELD_INFO rdb_i_s_dbstats_fields_info[] = { Column("STAT_TYPE", Varchar(NAME_LEN + 1), NOT_NULL), @@ -261,7 +261,7 @@ static int rdb_i_s_dbstats_init(void *const p) { */ namespace RDB_PERF_CONTEXT_FIELD { enum { TABLE_SCHEMA = 0, TABLE_NAME, PARTITION_NAME, STAT_TYPE, VALUE }; -} // namespace RDB_PERF_CONTEXT_FIELD +} // namespace RDB_PERF_CONTEXT_FIELD static ST_FIELD_INFO rdb_i_s_perf_context_fields_info[] = { Column("TABLE_SCHEMA", Varchar(NAME_LEN + 1), NOT_NULL), @@ -363,7 +363,7 @@ static int rdb_i_s_perf_context_init(void *const p) { */ namespace RDB_PERF_CONTEXT_GLOBAL_FIELD { enum { STAT_TYPE = 0, VALUE }; -} // namespace RDB_PERF_CONTEXT_GLOBAL_FIELD +} // namespace RDB_PERF_CONTEXT_GLOBAL_FIELD static ST_FIELD_INFO rdb_i_s_perf_context_global_fields_info[] = { Column("STAT_TYPE", Varchar(NAME_LEN + 1), NOT_NULL), @@ -433,7 +433,7 @@ static int rdb_i_s_perf_context_global_init(void *const p) { */ namespace RDB_CFOPTIONS_FIELD { enum { CF_NAME = 0, OPTION_TYPE, VALUE }; -} // namespace RDB_CFOPTIONS_FIELD +} // namespace RDB_CFOPTIONS_FIELD static ST_FIELD_INFO rdb_i_s_cfoptions_fields_info[] = { Column("CF_NAME", Varchar(NAME_LEN + 1), NOT_NULL), @@ -593,20 +593,20 @@ static int rdb_i_s_cfoptions_fill_table( // get COMPACTION_STYLE option switch (opts.compaction_style) { - case rocksdb::kCompactionStyleLevel: - val = "kCompactionStyleLevel"; - break; - case rocksdb::kCompactionStyleUniversal: - val = "kCompactionStyleUniversal"; - break; - case rocksdb::kCompactionStyleFIFO: - val = "kCompactionStyleFIFO"; - break; - case rocksdb::kCompactionStyleNone: - val = "kCompactionStyleNone"; - break; - default: - val = "NULL"; + case rocksdb::kCompactionStyleLevel: + val = "kCompactionStyleLevel"; + break; + case rocksdb::kCompactionStyleUniversal: + val = "kCompactionStyleUniversal"; + break; + case rocksdb::kCompactionStyleFIFO: + val = "kCompactionStyleFIFO"; + break; + case rocksdb::kCompactionStyleNone: + val = "kCompactionStyleNone"; + break; + default: + val = "NULL"; } cf_option_types.push_back({"COMPACTION_STYLE", val}); @@ -629,14 +629,14 @@ static int rdb_i_s_cfoptions_fill_table( val.append("; STOP_STYLE="); switch (compac_opts.stop_style) { - case rocksdb::kCompactionStopStyleSimilarSize: - val.append("kCompactionStopStyleSimilarSize}"); - break; - case rocksdb::kCompactionStopStyleTotalSize: - val.append("kCompactionStopStyleTotalSize}"); - break; - default: - val.append("}"); + case rocksdb::kCompactionStopStyleSimilarSize: + val.append("kCompactionStopStyleSimilarSize}"); + break; + case rocksdb::kCompactionStopStyleTotalSize: + val.append("kCompactionStopStyleTotalSize}"); + break; + default: + val.append("}"); } cf_option_types.push_back({"COMPACTION_OPTIONS_UNIVERSAL", val}); @@ -795,10 +795,11 @@ static int rdb_i_s_global_info_fill_table( if (!dict_manager->get_cf_flags(cf_handle->GetID(), &flags)) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Failed to get column family flags " - "from CF with id = %u. MyRocks data dictionary may " - "be corrupted.", - cf_handle->GetID()); + sql_print_error( + "RocksDB: Failed to get column family flags " + "from CF with id = %u. MyRocks data dictionary may " + "be corrupted.", + cf_handle->GetID()); abort(); } @@ -907,7 +908,7 @@ static ST_FIELD_INFO rdb_i_s_compact_stats_fields_info[] = { Column("VALUE", Double(MY_INT64_NUM_DECIMAL_DIGITS), NOT_NULL), CEnd()}; -namespace // anonymous namespace = not visible outside this source file +namespace // anonymous namespace = not visible outside this source file { struct Rdb_ddl_scanner : public Rdb_tables_scanner { my_core::THD *m_thd; @@ -915,7 +916,7 @@ struct Rdb_ddl_scanner : public Rdb_tables_scanner { int add_table(Rdb_tbl_def *tdef) override; }; -} // anonymous namespace +} // anonymous namespace /* Support for INFORMATION_SCHEMA.ROCKSDB_DDL dynamic table @@ -935,7 +936,7 @@ enum { CF, AUTO_INCREMENT }; -} // namespace RDB_DDL_FIELD +} // namespace RDB_DDL_FIELD static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = { Column("TABLE_SCHEMA", Varchar(NAME_LEN + 1), NOT_NULL), @@ -1007,8 +1008,7 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) { } ret = my_core::schema_table_store_record(m_thd, m_table); - if (ret) - return ret; + if (ret) return ret; } return HA_EXIT_SUCCESS; } @@ -1145,7 +1145,11 @@ enum { TOP_LEVEL_INDEX_SIZE, FILTER_BLOCK_SIZE, COMPRESSION_ALGO, - CREATION_TIME + CREATION_TIME, + FILE_CREATION_TIME, + OLDEST_KEY_TIME, + FILTER_POLICY, + COMPRESSION_OPTIONS, }; } // namespace RDB_SST_PROPS_FIELD @@ -1163,6 +1167,10 @@ static ST_FIELD_INFO rdb_i_s_sst_props_fields_info[] = { Column("FILTER_BLOCK_SIZE", SLonglong(), NOT_NULL), Column("COMPRESSION_ALGO", Varchar(NAME_LEN + 1), NOT_NULL), Column("CREATION_TIME", SLonglong(), NOT_NULL), + Column("FILE_CREATION_TIME", SLonglong(), NOT_NULL), + Column("OLDEST_KEY_TIME", SLonglong(), NOT_NULL), + Column("FILTER_POLICY", Varchar(NAME_LEN + 1), NOT_NULL), + Column("COMPRESSION_OPTIONS", Varchar(NAME_LEN + 1), NOT_NULL), CEnd()}; static int rdb_i_s_sst_props_fill_table( @@ -1235,6 +1243,24 @@ static int rdb_i_s_sst_props_fill_table( } field[RDB_SST_PROPS_FIELD::CREATION_TIME]->store( props.second->creation_time, true); + field[RDB_SST_PROPS_FIELD::FILE_CREATION_TIME]->store( + props.second->file_creation_time, true); + field[RDB_SST_PROPS_FIELD::OLDEST_KEY_TIME]->store( + props.second->oldest_key_time, true); + if (props.second->filter_policy_name.empty()) { + field[RDB_SST_PROPS_FIELD::FILTER_POLICY]->set_null(); + } else { + field[RDB_SST_PROPS_FIELD::FILTER_POLICY]->store( + props.second->filter_policy_name.c_str(), + props.second->filter_policy_name.size(), system_charset_info); + } + if (props.second->compression_options.empty()) { + field[RDB_SST_PROPS_FIELD::COMPRESSION_OPTIONS]->set_null(); + } else { + field[RDB_SST_PROPS_FIELD::COMPRESSION_OPTIONS]->store( + props.second->compression_options.c_str(), + props.second->compression_options.size(), system_charset_info); + } /* Tell MySQL about this row in the virtual table */ ret = static_cast<int>( @@ -1281,7 +1307,7 @@ enum { ENTRY_OTHERS, DISTINCT_KEYS_PREFIX }; -} // namespace RDB_INDEX_FILE_MAP_FIELD +} // namespace RDB_INDEX_FILE_MAP_FIELD static ST_FIELD_INFO rdb_i_s_index_file_map_fields_info[] = { /* The information_schema.rocksdb_index_file_map virtual table has four @@ -1432,7 +1458,7 @@ static int rdb_i_s_index_file_map_init(void *const p) { */ namespace RDB_LOCKS_FIELD { enum { COLUMN_FAMILY_ID = 0, TRANSACTION_ID, KEY, MODE }; -} // namespace RDB_LOCKS_FIELD +} // namespace RDB_LOCKS_FIELD static ST_FIELD_INFO rdb_i_s_lock_info_fields_info[] = { Column("COLUMN_FAMILY_ID", SLong(), NOT_NULL), @@ -1533,7 +1559,7 @@ enum { THREAD_ID, QUERY }; -} // namespace RDB_TRX_FIELD +} // namespace RDB_TRX_FIELD static ST_FIELD_INFO rdb_i_s_trx_info_fields_info[] = { Column("TRANSACTION_ID", SLonglong(), NOT_NULL), @@ -1655,7 +1681,7 @@ enum { TABLE_NAME, ROLLED_BACK, }; -} // namespace RDB_TRX_FIELD +} // namespace RDB_DEADLOCK_FIELD static ST_FIELD_INFO rdb_i_s_deadlock_info_fields_info[] = { Column("DEADLOCK_ID", SLonglong(), NOT_NULL), @@ -1968,4 +1994,4 @@ struct st_maria_plugin rdb_i_s_deadlock_info = { nullptr, /* config options */ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_i_s.h b/storage/rocksdb/rdb_i_s.h index a0783f7b8c0..6001742d984 100644 --- a/storage/rocksdb/rdb_i_s.h +++ b/storage/rocksdb/rdb_i_s.h @@ -34,4 +34,4 @@ extern struct st_maria_plugin rdb_i_s_index_file_map; extern struct st_maria_plugin rdb_i_s_lock_info; extern struct st_maria_plugin rdb_i_s_trx_info; extern struct st_maria_plugin rdb_i_s_deadlock_info; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_index_merge.cc b/storage/rocksdb/rdb_index_merge.cc index b6088eb8f4e..2aac7c7a658 100644 --- a/storage/rocksdb/rdb_index_merge.cc +++ b/storage/rocksdb/rdb_index_merge.cc @@ -29,14 +29,17 @@ namespace myrocks { Rdb_index_merge::Rdb_index_merge(const char *const tmpfile_path, - const ulonglong &merge_buf_size, - const ulonglong &merge_combine_read_size, - const ulonglong &merge_tmp_file_removal_delay, + const ulonglong merge_buf_size, + const ulonglong merge_combine_read_size, + const ulonglong merge_tmp_file_removal_delay, rocksdb::ColumnFamilyHandle *cf) - : m_tmpfile_path(tmpfile_path), m_merge_buf_size(merge_buf_size), + : m_tmpfile_path(tmpfile_path), + m_merge_buf_size(merge_buf_size), m_merge_combine_read_size(merge_combine_read_size), m_merge_tmp_file_removal_delay(merge_tmp_file_removal_delay), - m_cf_handle(cf), m_rec_buf_unsorted(nullptr), m_output_buf(nullptr) {} + m_cf_handle(cf), + m_rec_buf_unsorted(nullptr), + m_output_buf(nullptr) {} Rdb_index_merge::~Rdb_index_merge() { /* @@ -152,8 +155,9 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) { */ if (m_offset_tree.empty()) { // NO_LINT_DEBUG - sql_print_error("Sort buffer size is too small to process merge. " - "Please set merge buffer size to a higher value."); + sql_print_error( + "Sort buffer size is too small to process merge. " + "Please set merge buffer size to a higher value."); return HA_ERR_ROCKSDB_MERGE_FILE_ERR; } @@ -623,4 +627,4 @@ void Rdb_index_merge::merge_reset() { } } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_index_merge.h b/storage/rocksdb/rdb_index_merge.h index e70923bbb0e..756b99ca4f1 100644 --- a/storage/rocksdb/rdb_index_merge.h +++ b/storage/rocksdb/rdb_index_merge.h @@ -61,7 +61,7 @@ class Rdb_index_merge { /* heap memory allocated for main memory sort/merge */ std::unique_ptr<uchar[]> m_block; const ulonglong - m_block_len; /* amount of data bytes allocated for block above */ + m_block_len; /* amount of data bytes allocated for block above */ ulonglong m_curr_offset; /* offset of the record pointer for the block */ ulonglong m_disk_start_offset; /* where the chunk starts on disk */ ulonglong m_disk_curr_offset; /* current offset on disk */ @@ -87,8 +87,11 @@ class Rdb_index_merge { } explicit merge_buf_info(const ulonglong merge_block_size) - : m_block(nullptr), m_block_len(merge_block_size), m_curr_offset(0), - m_disk_start_offset(0), m_disk_curr_offset(0), + : m_block(nullptr), + m_block_len(merge_block_size), + m_curr_offset(0), + m_disk_start_offset(0), + m_disk_curr_offset(0), m_total_size(merge_block_size) { /* Will throw an exception if it runs out of memory here */ m_block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]); @@ -189,9 +192,9 @@ class Rdb_index_merge { public: Rdb_index_merge(const char *const tmpfile_path, - const ulonglong &merge_buf_size, - const ulonglong &merge_combine_read_size, - const ulonglong &merge_tmp_file_removal_delay, + const ulonglong merge_buf_size, + const ulonglong merge_combine_read_size, + const ulonglong merge_tmp_file_removal_delay, rocksdb::ColumnFamilyHandle *cf); ~Rdb_index_merge(); diff --git a/storage/rocksdb/rdb_io_watchdog.cc b/storage/rocksdb/rdb_io_watchdog.cc index 5b809dbf553..07834118db0 100644 --- a/storage/rocksdb/rdb_io_watchdog.cc +++ b/storage/rocksdb/rdb_io_watchdog.cc @@ -40,10 +40,11 @@ void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) { // At this point we know that I/O has been stuck in `write()` for more than // `m_write_timeout` seconds. We'll log a message and shut down the service. // NO_LINT_DEBUG - sql_print_error("MyRocks has detected a combination of I/O requests which " - "have cumulatively been blocking for more than %u seconds. " - "Shutting the service down.", - m_write_timeout); + sql_print_error( + "MyRocks has detected a combination of I/O requests which " + "have cumulatively been blocking for more than %u seconds. " + "Shutting the service down.", + m_write_timeout); abort(); } @@ -151,7 +152,7 @@ int Rdb_io_watchdog::check_write_access(const std::string &dirname) const { return HA_EXIT_SUCCESS; } -int Rdb_io_watchdog::reset_timeout(const uint32_t &write_timeout) { +int Rdb_io_watchdog::reset_timeout(const uint32_t write_timeout) { // This function will be called either from a thread initializing MyRocks // engine or handling system variable changes. We need to account for the // possibility of I/O callback executing at the same time. If that happens diff --git a/storage/rocksdb/rdb_io_watchdog.h b/storage/rocksdb/rdb_io_watchdog.h index 9c391eee3f3..8ee5b1f6c93 100644 --- a/storage/rocksdb/rdb_io_watchdog.h +++ b/storage/rocksdb/rdb_io_watchdog.h @@ -17,12 +17,12 @@ #pragma once /* C++ standard header files */ -#include <atomic> #include <signal.h> #include <stdlib.h> -#include <string> #include <string.h> #include <time.h> +#include <atomic> +#include <string> #include <vector> /* MySQL header files */ @@ -92,9 +92,11 @@ class Rdb_io_watchdog { } public: - explicit Rdb_io_watchdog(const std::vector<std::string> &directories) - : m_io_check_timer(nullptr), m_io_check_watchdog_timer(nullptr), - m_io_in_progress(false), m_dirs_to_check(std::move(directories)), + explicit Rdb_io_watchdog(std::vector<std::string> &&directories) + : m_io_check_timer(nullptr), + m_io_check_watchdog_timer(nullptr), + m_io_in_progress(false), + m_dirs_to_check(std::move(directories)), m_buf(nullptr) { DBUG_ASSERT(m_dirs_to_check.size() > 0); mysql_mutex_init(0, &m_reset_mutex, MY_MUTEX_INIT_FAST); @@ -107,7 +109,7 @@ class Rdb_io_watchdog { free(m_buf); } - int reset_timeout(const uint32_t &write_timeout); + int reset_timeout(const uint32_t write_timeout); Rdb_io_watchdog(const Rdb_io_watchdog &) = delete; Rdb_io_watchdog &operator=(const Rdb_io_watchdog &) = delete; diff --git a/storage/rocksdb/rdb_mariadb_server_port.cc b/storage/rocksdb/rdb_mariadb_server_port.cc index eab208d51a3..e74c792c75b 100644 --- a/storage/rocksdb/rdb_mariadb_server_port.cc +++ b/storage/rocksdb/rdb_mariadb_server_port.cc @@ -9,9 +9,7 @@ #include "./log.h" #include <mysys_err.h> #include <mysql/psi/mysql_table.h> -#ifdef MARIAROCKS_NOT_YET -#include <mysql/thread_pool_priv.h> -#endif +//#include <mysql/thread_pool_priv.h> #include <string> diff --git a/storage/rocksdb/rdb_mutex_wrapper.cc b/storage/rocksdb/rdb_mutex_wrapper.cc index 9bead8ba660..5e0591fe003 100644 --- a/storage/rocksdb/rdb_mutex_wrapper.cc +++ b/storage/rocksdb/rdb_mutex_wrapper.cc @@ -67,9 +67,9 @@ Status Rdb_cond_var::Wait(const std::shared_ptr<TransactionDBMutex> mutex_arg) { thd_killed() to determine which occurred) */ -Status -Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg, - int64_t timeout_micros) { +Status Rdb_cond_var::WaitFor( + const std::shared_ptr<TransactionDBMutex> mutex_arg, + int64_t timeout_micros) { auto *mutex_obj = reinterpret_cast<Rdb_mutex *>(mutex_arg.get()); DBUG_ASSERT(mutex_obj != nullptr); @@ -78,8 +78,7 @@ Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg, int res = 0; struct timespec wait_timeout; - if (timeout_micros < 0) - timeout_micros = ONE_YEAR_IN_MICROSECS; + if (timeout_micros < 0) timeout_micros = ONE_YEAR_IN_MICROSECS; set_timespec_nsec(wait_timeout, timeout_micros * 1000); #ifndef STANDALONE_UNITTEST @@ -108,15 +107,15 @@ Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg, res = mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout); #ifndef STANDALONE_UNITTEST - if (current_thd) - killed= thd_killed(current_thd); + if (current_thd) killed = thd_killed(current_thd); #endif } while (!killed && res == EINTR); - if (res || killed) + if (res || killed) { return Status::TimedOut(); - else + } else { return Status::OK(); + } } /* @@ -212,4 +211,4 @@ void Rdb_mutex::UnLock() { RDB_MUTEX_UNLOCK_CHECK(m_mutex); } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_mutex_wrapper.h b/storage/rocksdb/rdb_mutex_wrapper.h index 567e81e5ef6..33eefe9d50c 100644 --- a/storage/rocksdb/rdb_mutex_wrapper.h +++ b/storage/rocksdb/rdb_mutex_wrapper.h @@ -37,9 +37,9 @@ class Rdb_mutex : public rocksdb::TransactionDBMutex { Rdb_mutex(const Rdb_mutex &p) = delete; Rdb_mutex &operator=(const Rdb_mutex &p) = delete; -public: + public: Rdb_mutex(); - virtual ~Rdb_mutex(); + virtual ~Rdb_mutex() override; /* Override parent class's virtual methods of interrest. @@ -55,13 +55,13 @@ public: // TimedOut if timed out, // or other Status on failure. // If returned status is OK, TransactionDB will eventually call UnLock(). - virtual rocksdb::Status - TryLockFor(int64_t timeout_time MY_ATTRIBUTE((__unused__))) override; + virtual rocksdb::Status TryLockFor( + int64_t timeout_time MY_ATTRIBUTE((__unused__))) override; // Unlock Mutex that was successfully locked by Lock() or TryLockUntil() virtual void UnLock() override; -private: + private: mysql_mutex_t m_mutex; friend class Rdb_cond_var; @@ -75,9 +75,9 @@ class Rdb_cond_var : public rocksdb::TransactionDBCondVar { Rdb_cond_var(const Rdb_cond_var &) = delete; Rdb_cond_var &operator=(const Rdb_cond_var &) = delete; -public: + public: Rdb_cond_var(); - virtual ~Rdb_cond_var(); + virtual ~Rdb_cond_var() override; /* Override parent class's virtual methods of interrest. @@ -88,8 +88,8 @@ public: // Returns OK if notified. // Returns non-OK if TransactionDB should stop waiting and fail the operation. // May return OK spuriously even if not notified. - virtual rocksdb::Status - Wait(const std::shared_ptr<rocksdb::TransactionDBMutex> mutex) override; + virtual rocksdb::Status Wait( + const std::shared_ptr<rocksdb::TransactionDBMutex> mutex) override; // Block current thread until condition variable is notifiesd by a call to // Notify() or NotifyAll(), or if the timeout is reached. @@ -103,9 +103,9 @@ public: // Returns other status if TransactionDB should otherwis stop waiting and // fail the operation. // May return OK spuriously even if not notified. - virtual rocksdb::Status - WaitFor(const std::shared_ptr<rocksdb::TransactionDBMutex> mutex, - int64_t timeout_time) override; + virtual rocksdb::Status WaitFor( + const std::shared_ptr<rocksdb::TransactionDBMutex> mutex, + int64_t timeout_time) override; // If any threads are waiting on *this, unblock at least one of the // waiting threads. @@ -114,12 +114,12 @@ public: // Unblocks all threads waiting on *this. virtual void NotifyAll() override; -private: + private: mysql_cond_t m_cond; }; class Rdb_mutex_factory : public rocksdb::TransactionDBMutexFactory { -public: + public: Rdb_mutex_factory(const Rdb_mutex_factory &) = delete; Rdb_mutex_factory &operator=(const Rdb_mutex_factory &) = delete; Rdb_mutex_factory() {} @@ -127,17 +127,17 @@ public: Override parent class's virtual methods of interrest. */ - virtual std::shared_ptr<rocksdb::TransactionDBMutex> - AllocateMutex() override { + virtual std::shared_ptr<rocksdb::TransactionDBMutex> AllocateMutex() + override { return std::make_shared<Rdb_mutex>(); } - virtual std::shared_ptr<rocksdb::TransactionDBCondVar> - AllocateCondVar() override { + virtual std::shared_ptr<rocksdb::TransactionDBCondVar> AllocateCondVar() + override { return std::make_shared<Rdb_cond_var>(); } - virtual ~Rdb_mutex_factory() {} + virtual ~Rdb_mutex_factory() override {} }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc index d744ff56d89..0b22a7586dd 100644 --- a/storage/rocksdb/rdb_perf_context.cc +++ b/storage/rocksdb/rdb_perf_context.cc @@ -98,17 +98,19 @@ std::string rdb_pc_stat_types[] = { "IO_RANGE_SYNC_NANOS", "IO_LOGGER_NANOS"}; -#define IO_PERF_RECORD(_field_) \ - do { \ - if (rocksdb::get_perf_context()->_field_ > 0) \ - counters->m_value[idx] += rocksdb::get_perf_context()->_field_; \ - idx++; \ +#define IO_PERF_RECORD(_field_) \ + do { \ + if (rocksdb::get_perf_context()->_field_ > 0) { \ + counters->m_value[idx] += rocksdb::get_perf_context()->_field_; \ + } \ + idx++; \ } while (0) -#define IO_STAT_RECORD(_field_) \ - do { \ - if (rocksdb::get_iostats_context()->_field_ > 0) \ - counters->m_value[idx] += rocksdb::get_iostats_context()->_field_; \ - idx++; \ +#define IO_STAT_RECORD(_field_) \ + do { \ + if (rocksdb::get_iostats_context()->_field_ > 0) { \ + counters->m_value[idx] += rocksdb::get_iostats_context()->_field_; \ + } \ + idx++; \ } while (0) static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { @@ -280,4 +282,4 @@ void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) { #endif } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h index 036c497c2f5..d8381b8ea94 100644 --- a/storage/rocksdb/rdb_perf_context.h +++ b/storage/rocksdb/rdb_perf_context.h @@ -22,8 +22,8 @@ #include <string> /* MySQL header files */ -#include "./handler.h" #include <my_global.h> +#include "./handler.h" #include "rdb_mariadb_port.h" @@ -108,7 +108,7 @@ class Rdb_perf_counters { Rdb_perf_counters(const Rdb_perf_counters &) = delete; Rdb_perf_counters &operator=(const Rdb_perf_counters &) = delete; -public: + public: Rdb_perf_counters() = default; uint64_t m_value[PC_MAX_IDX]; @@ -158,8 +158,11 @@ class Rdb_io_perf { void end_and_record(const uint32_t perf_context_level); explicit Rdb_io_perf() - : m_atomic_counters(nullptr), m_shared_io_perf_read(nullptr), - m_stats(nullptr), io_write_bytes(0), io_write_requests(0) {} + : m_atomic_counters(nullptr), + m_shared_io_perf_read(nullptr), + m_stats(nullptr), + io_write_bytes(0), + io_write_requests(0) {} }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_psi.cc b/storage/rocksdb/rdb_psi.cc index 361a648bba4..77003b1bb48 100644 --- a/storage/rocksdb/rdb_psi.cc +++ b/storage/rocksdb/rdb_psi.cc @@ -14,7 +14,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif #define MYSQL_SERVER 1 @@ -22,9 +22,6 @@ /* The C++ file's header */ #include "./rdb_psi.h" -/* MySQL header files */ -#include <mysql/psi/mysql_stage.h> - namespace myrocks { /* @@ -94,17 +91,14 @@ void init_rocksdb_psi_keys() { const char *const category = "rocksdb"; int count; - if (PSI_server == nullptr) - return; - count = array_elements(all_rocksdb_mutexes); - PSI_server->register_mutex(category, all_rocksdb_mutexes, count); + mysql_mutex_register(category, all_rocksdb_mutexes, count); count = array_elements(all_rocksdb_rwlocks); - PSI_server->register_rwlock(category, all_rocksdb_rwlocks, count); + mysql_rwlock_register(category, all_rocksdb_rwlocks, count); count = array_elements(all_rocksdb_conds); - //TODO Disabling PFS for conditions due to the bug + // TODO(jay) Disabling PFS for conditions due to the bug // https://github.com/MySQLOnRocksDB/mysql-5.6/issues/92 // PSI_server->register_cond(category, all_rocksdb_conds, count); @@ -114,7 +108,7 @@ void init_rocksdb_psi_keys() { count = array_elements(all_rocksdb_threads); mysql_thread_register(category, all_rocksdb_threads, count); } -#else // HAVE_PSI_INTERFACE +#else // HAVE_PSI_INTERFACE void init_rocksdb_psi_keys() {} #endif // HAVE_PSI_INTERFACE diff --git a/storage/rocksdb/rdb_psi.h b/storage/rocksdb/rdb_psi.h index e0d6e7e3a47..2703837a156 100644 --- a/storage/rocksdb/rdb_psi.h +++ b/storage/rocksdb/rdb_psi.h @@ -20,7 +20,8 @@ /* MySQL header files */ #include <my_global.h> #include <my_pthread.h> -#include <mysql/psi/psi.h> + +#include <mysql/psi/mysql_stage.h> /* MyRocks header files */ #include "./rdb_utils.h" diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc index 6201c9f0207..9f470ea2fef 100644 --- a/storage/rocksdb/rdb_sst_info.cc +++ b/storage/rocksdb/rdb_sst_info.cc @@ -51,8 +51,13 @@ Rdb_sst_file_ordered::Rdb_sst_file::Rdb_sst_file( rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf, const rocksdb::DBOptions &db_options, const std::string &name, const bool tracing) - : m_db(db), m_cf(cf), m_db_options(db_options), m_sst_file_writer(nullptr), - m_name(name), m_tracing(tracing), m_comparator(cf->GetComparator()) { + : m_db(db), + m_cf(cf), + m_db_options(db_options), + m_sst_file_writer(nullptr), + m_name(name), + m_tracing(tracing), + m_comparator(cf->GetComparator()) { DBUG_ASSERT(db != nullptr); DBUG_ASSERT(cf != nullptr); } @@ -61,11 +66,6 @@ Rdb_sst_file_ordered::Rdb_sst_file::~Rdb_sst_file() { // Make sure we clean up delete m_sst_file_writer; m_sst_file_writer = nullptr; - - // In case something went wrong attempt to delete the temporary file. - // If everything went fine that file will have been renamed and this - // function call will fail. - std::remove(m_name.c_str()); } rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::open() { @@ -102,9 +102,8 @@ rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::open() { return s; } -rocksdb::Status -Rdb_sst_file_ordered::Rdb_sst_file::put(const rocksdb::Slice &key, - const rocksdb::Slice &value) { +rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::put( + const rocksdb::Slice &key, const rocksdb::Slice &value) { DBUG_ASSERT(m_sst_file_writer != nullptr); #ifdef __GNUC__ @@ -118,8 +117,8 @@ Rdb_sst_file_ordered::Rdb_sst_file::put(const rocksdb::Slice &key, return m_sst_file_writer->Add(key, value); } -std::string -Rdb_sst_file_ordered::Rdb_sst_file::generateKey(const std::string &key) { +std::string Rdb_sst_file_ordered::Rdb_sst_file::generateKey( + const std::string &key) { static char const hexdigit[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; @@ -140,7 +139,7 @@ rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::commit() { DBUG_ASSERT(m_sst_file_writer != nullptr); rocksdb::Status s; - rocksdb::ExternalSstFileInfo fileinfo; /// Finish may should be modified + rocksdb::ExternalSstFileInfo fileinfo; /// Finish may should be modified // Close out the sst file s = m_sst_file_writer->Finish(&fileinfo); @@ -153,30 +152,15 @@ rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::commit() { if (s.ok()) { if (m_tracing) { // NO_LINT_DEBUG - sql_print_information("SST Tracing: Adding file %s, smallest key: %s, " - "largest key: %s, file size: %" PRIu64 ", " - "num_entries: %" PRIu64, - fileinfo.file_path.c_str(), - generateKey(fileinfo.smallest_key).c_str(), - generateKey(fileinfo.largest_key).c_str(), - fileinfo.file_size, fileinfo.num_entries); - } - - // Add the file to the database - // Set the snapshot_consistency parameter to false since no one - // should be accessing the table we are bulk loading - rocksdb::IngestExternalFileOptions opts; - opts.move_files = true; - opts.snapshot_consistency = false; - opts.allow_global_seqno = false; - opts.allow_blocking_flush = false; - s = m_db->IngestExternalFile(m_cf, {m_name}, opts); - - if (m_tracing) { - // NO_LINT_DEBUG - sql_print_information("SST Tracing: AddFile(%s) returned %s", - fileinfo.file_path.c_str(), - s.ok() ? "ok" : "not ok"); + sql_print_information( + "SST Tracing: Adding file %s, smallest key: %s, " + "largest key: %s, file size: %" PRIu64 + ", " + "num_entries: %" PRIu64, + fileinfo.file_path.c_str(), + generateKey(fileinfo.smallest_key).c_str(), + generateKey(fileinfo.largest_key).c_str(), fileinfo.file_size, + fileinfo.num_entries); } } @@ -222,7 +206,9 @@ Rdb_sst_file_ordered::Rdb_sst_file_ordered( rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf, const rocksdb::DBOptions &db_options, const std::string &name, const bool tracing, size_t max_size) - : m_use_stack(false), m_first(true), m_stack(max_size), + : m_use_stack(false), + m_first(true), + m_stack(max_size), m_file(db, cf, db_options, name, tracing) { m_stack.reset(); } @@ -329,21 +315,26 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, const std::string &indexname, rocksdb::ColumnFamilyHandle *const cf, const rocksdb::DBOptions &db_options, - const bool &tracing) - : m_db(db), m_cf(cf), m_db_options(db_options), m_curr_size(0), - m_sst_count(0), m_background_error(HA_EXIT_SUCCESS), m_committed(false), -#if defined(RDB_SST_INFO_USE_THREAD) - m_queue(), m_mutex(), m_cond(), m_thread(nullptr), m_finished(false), -#endif - m_sst_file(nullptr), m_tracing(tracing), m_print_client_error(true) { + const bool tracing) + : m_db(db), + m_cf(cf), + m_db_options(db_options), + m_curr_size(0), + m_sst_count(0), + m_background_error(HA_EXIT_SUCCESS), + m_done(false), + m_sst_file(nullptr), + m_tracing(tracing), + m_print_client_error(true) { m_prefix = db->GetName() + "/"; std::string normalized_table; if (rdb_normalize_tablename(tablename.c_str(), &normalized_table)) { // We failed to get a normalized table name. This should never happen, // but handle it anyway. - m_prefix += "fallback_" + std::to_string(reinterpret_cast<intptr_t>( - reinterpret_cast<void *>(this))) + + m_prefix += "fallback_" + + std::to_string(reinterpret_cast<intptr_t>( + reinterpret_cast<void *>(this))) + "_" + indexname + "_"; } else { m_prefix += normalized_table + "_" + indexname + "_"; @@ -367,9 +358,15 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, Rdb_sst_info::~Rdb_sst_info() { DBUG_ASSERT(m_sst_file == nullptr); -#if defined(RDB_SST_INFO_USE_THREAD) - DBUG_ASSERT(m_thread == nullptr); -#endif + + for (auto sst_file : m_committed_files) { + // In case something went wrong attempt to delete the temporary file. + // If everything went fine that file will have been renamed and this + // function call will fail. + std::remove(sst_file.c_str()); + } + m_committed_files.clear(); + mysql_mutex_destroy(&m_commit_mutex); } @@ -380,8 +377,8 @@ int Rdb_sst_info::open_new_sst_file() { const std::string name = m_prefix + std::to_string(m_sst_count++) + m_suffix; // Create the new sst file object - m_sst_file = new Rdb_sst_file_ordered(m_db, m_cf, m_db_options, - name, m_tracing, m_max_size); + m_sst_file = new Rdb_sst_file_ordered(m_db, m_cf, m_db_options, name, + m_tracing, m_max_size); // Open the sst file const rocksdb::Status s = m_sst_file->open(); @@ -397,35 +394,23 @@ int Rdb_sst_info::open_new_sst_file() { return HA_EXIT_SUCCESS; } -void Rdb_sst_info::close_curr_sst_file() { - DBUG_ASSERT(m_sst_file != nullptr); - DBUG_ASSERT(m_curr_size > 0); - -#if defined(RDB_SST_INFO_USE_THREAD) - if (m_thread == nullptr) { - // We haven't already started a background thread, so start one - m_thread = new std::thread(thread_fcn, this); +void Rdb_sst_info::commit_sst_file(Rdb_sst_file_ordered *sst_file) { + const rocksdb::Status s = sst_file->commit(); + if (!s.ok()) { + set_error_msg(sst_file->get_name(), s); + set_background_error(HA_ERR_ROCKSDB_BULK_LOAD); } - DBUG_ASSERT(m_thread != nullptr); + m_committed_files.push_back(sst_file->get_name()); - { - // Add this finished sst file to the queue (while holding mutex) - const std::lock_guard<std::mutex> guard(m_mutex); - m_queue.push(m_sst_file); - } + delete sst_file; +} - // Notify the background thread that there is a new entry in the queue - m_cond.notify_one(); -#else - const rocksdb::Status s = m_sst_file->commit(); - if (!s.ok()) { - set_error_msg(m_sst_file->get_name(), s); - set_background_error(HA_ERR_ROCKSDB_BULK_LOAD); - } +void Rdb_sst_info::close_curr_sst_file() { + DBUG_ASSERT(m_sst_file != nullptr); + DBUG_ASSERT(m_curr_size > 0); - delete m_sst_file; -#endif + commit_sst_file(m_sst_file); // Reset for next sst file m_sst_file = nullptr; @@ -435,7 +420,7 @@ void Rdb_sst_info::close_curr_sst_file() { int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) { int rc; - DBUG_ASSERT(!m_committed); + DBUG_ASSERT(!m_done); if (m_curr_size + key.size() + value.size() >= m_max_size) { // The current sst file has reached its maximum, close it out @@ -470,15 +455,22 @@ int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) { return HA_EXIT_SUCCESS; } -int Rdb_sst_info::commit(bool print_client_error) { +/* + Finish the current work and return the list of SST files ready to be + ingested. This function need to be idempotent and atomic + */ +int Rdb_sst_info::finish(Rdb_sst_commit_info *commit_info, + bool print_client_error) { int ret = HA_EXIT_SUCCESS; // Both the transaction clean up and the ha_rocksdb handler have // references to this Rdb_sst_info and both can call commit, so // synchronize on the object here. + // This also means in such case the bulk loading operation stop being truly + // atomic, and we should consider fixing this in the future RDB_MUTEX_LOCK_CHECK(m_commit_mutex); - if (m_committed) { + if (is_done()) { RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex); return ret; } @@ -490,20 +482,13 @@ int Rdb_sst_info::commit(bool print_client_error) { close_curr_sst_file(); } -#if defined(RDB_SST_INFO_USE_THREAD) - if (m_thread != nullptr) { - // Tell the background thread we are done - m_finished = true; - m_cond.notify_one(); + // This checks out the list of files so that the caller can collect/group + // them and ingest them all in one go, and any racing calls to commit + // won't see them at all + commit_info->init(m_cf, std::move(m_committed_files)); + DBUG_ASSERT(m_committed_files.size() == 0); - // Wait for the background thread to finish - m_thread->join(); - delete m_thread; - m_thread = nullptr; - } -#endif - - m_committed = true; + m_done = true; RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex); // Did we get any errors? @@ -517,16 +502,13 @@ int Rdb_sst_info::commit(bool print_client_error) { void Rdb_sst_info::set_error_msg(const std::string &sst_file_name, const rocksdb::Status &s) { + if (!m_print_client_error) return; - if (!m_print_client_error) - return; + report_error_msg(s, sst_file_name.c_str()); +} -#if defined(RDB_SST_INFO_USE_THREAD) - // Both the foreground and background threads can set the error message - // so lock the mutex to protect it. We only want the first error that - // we encounter. - const std::lock_guard<std::mutex> guard(m_mutex); -#endif +void Rdb_sst_info::report_error_msg(const rocksdb::Status &s, + const char *sst_file_name) { if (s.IsInvalidArgument() && strcmp(s.getState(), "Keys must be added in order") == 0) { my_printf_error(ER_KEYS_OUT_OF_ORDER, @@ -536,57 +518,16 @@ void Rdb_sst_info::set_error_msg(const std::string &sst_file_name, } else if (s.IsInvalidArgument() && strcmp(s.getState(), "Global seqno is required, but disabled") == 0) { - my_printf_error(ER_OVERLAPPING_KEYS, "Rows inserted during bulk load " - "must not overlap existing rows", + my_printf_error(ER_OVERLAPPING_KEYS, + "Rows inserted during bulk load " + "must not overlap existing rows", MYF(0)); } else { my_printf_error(ER_UNKNOWN_ERROR, "[%s] bulk load error: %s", MYF(0), - sst_file_name.c_str(), s.ToString().c_str()); + sst_file_name, s.ToString().c_str()); } } -#if defined(RDB_SST_INFO_USE_THREAD) -// Static thread function - the Rdb_sst_info object is in 'object' -void Rdb_sst_info::thread_fcn(void *object) { - reinterpret_cast<Rdb_sst_info *>(object)->run_thread(); -} - -void Rdb_sst_info::run_thread() { - std::unique_lock<std::mutex> lk(m_mutex); - - do { - // Wait for notification or 1 second to pass - m_cond.wait_for(lk, std::chrono::seconds(1)); - - // Inner loop pulls off all Rdb_sst_file_ordered entries and processes them - while (!m_queue.empty()) { - Rdb_sst_file_ordered *const sst_file = m_queue.front(); - m_queue.pop(); - - // Release the lock - we don't want to hold it while committing the file - lk.unlock(); - - // Close out the sst file and add it to the database - const rocksdb::Status s = sst_file->commit(); - if (!s.ok()) { - set_error_msg(sst_file->get_name(), s); - set_background_error(HA_ERR_ROCKSDB_BULK_LOAD); - } - - delete sst_file; - - // Reacquire the lock for the next inner loop iteration - lk.lock(); - } - - // If the queue is empty and the main thread has indicated we should exit - // break out of the loop. - } while (!m_finished); - - DBUG_ASSERT(m_queue.empty()); -} -#endif - void Rdb_sst_info::init(const rocksdb::DB *const db) { const std::string path = db->GetName() + FN_DIRSEP; struct st_my_dir *const dir_info = my_dir(path.c_str(), MYF(MY_DONT_SORT)); @@ -618,4 +559,4 @@ void Rdb_sst_info::init(const rocksdb::DB *const db) { std::atomic<uint64_t> Rdb_sst_info::m_prefix_counter(0); std::string Rdb_sst_info::m_suffix = ".bulk_load.tmp"; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h index f50645b1eeb..66da3b7c1e7 100644 --- a/storage/rocksdb/rdb_sst_info.h +++ b/storage/rocksdb/rdb_sst_info.h @@ -34,8 +34,6 @@ /* MyRocks header files */ #include "./rdb_utils.h" -// #define RDB_SST_INFO_USE_THREAD /* uncomment to use threads */ - namespace myrocks { class Rdb_sst_file_ordered { @@ -125,43 +123,114 @@ class Rdb_sst_info { uint64_t m_max_size; uint32_t m_sst_count; std::atomic<int> m_background_error; + bool m_done; std::string m_prefix; static std::atomic<uint64_t> m_prefix_counter; static std::string m_suffix; - bool m_committed; mysql_mutex_t m_commit_mutex; -#if defined(RDB_SST_INFO_USE_THREAD) - std::queue<Rdb_sst_file_ordered *> m_queue; - std::mutex m_mutex; - std::condition_variable m_cond; - std::thread *m_thread; - bool m_finished; -#endif Rdb_sst_file_ordered *m_sst_file; + + // List of committed SST files - we'll ingest them later in one single batch + std::vector<std::string> m_committed_files; + const bool m_tracing; bool m_print_client_error; int open_new_sst_file(); void close_curr_sst_file(); + void commit_sst_file(Rdb_sst_file_ordered *sst_file); + void set_error_msg(const std::string &sst_file_name, const rocksdb::Status &s); -#if defined(RDB_SST_INFO_USE_THREAD) - void run_thread(); - - static void thread_fcn(void *object); -#endif - public: Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, const std::string &indexname, rocksdb::ColumnFamilyHandle *const cf, - const rocksdb::DBOptions &db_options, const bool &tracing); + const rocksdb::DBOptions &db_options, const bool tracing); ~Rdb_sst_info(); + /* + This is the unit of work returned from Rdb_sst_info::finish and represents + a group of SST to be ingested atomically with other Rdb_sst_commit_info. + This is always local to the bulk loading complete operation so no locking + is required + */ + class Rdb_sst_commit_info { + public: + Rdb_sst_commit_info() : m_committed(true), m_cf(nullptr) {} + + Rdb_sst_commit_info(Rdb_sst_commit_info &&rhs) noexcept + : m_committed(rhs.m_committed), + m_cf(rhs.m_cf), + m_committed_files(std::move(rhs.m_committed_files)) { + rhs.m_committed = true; + rhs.m_cf = nullptr; + } + + Rdb_sst_commit_info &operator=(Rdb_sst_commit_info &&rhs) noexcept { + reset(); + + m_cf = rhs.m_cf; + m_committed_files = std::move(rhs.m_committed_files); + m_committed = rhs.m_committed; + + rhs.m_committed = true; + rhs.m_cf = nullptr; + + return *this; + } + + Rdb_sst_commit_info(const Rdb_sst_commit_info &) = delete; + Rdb_sst_commit_info &operator=(const Rdb_sst_commit_info &) = delete; + + ~Rdb_sst_commit_info() { reset(); } + + void reset() { + if (!m_committed) { + for (auto sst_file : m_committed_files) { + // In case something went wrong attempt to delete the temporary file. + // If everything went fine that file will have been renamed and this + // function call will fail. + std::remove(sst_file.c_str()); + } + } + m_committed_files.clear(); + m_cf = nullptr; + m_committed = true; + } + + bool has_work() const { + return m_cf != nullptr && m_committed_files.size() > 0; + } + + void init(rocksdb::ColumnFamilyHandle *cf, + std::vector<std::string> &&files) { + DBUG_ASSERT(m_cf == nullptr && m_committed_files.size() == 0 && + m_committed); + m_cf = cf; + m_committed_files = std::move(files); + m_committed = false; + } + + rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf; } + + const std::vector<std::string> &get_committed_files() const { + return m_committed_files; + } + + void commit() { m_committed = true; } + + private: + bool m_committed; + rocksdb::ColumnFamilyHandle *m_cf; + std::vector<std::string> m_committed_files; + }; + int put(const rocksdb::Slice &key, const rocksdb::Slice &value); - int commit(bool print_client_error = true); - bool is_committed() const { return m_committed; } + int finish(Rdb_sst_commit_info *commit_info, bool print_client_error = true); + + bool is_done() const { return m_done; } bool have_background_error() { return m_background_error != 0; } @@ -180,7 +249,17 @@ class Rdb_sst_info { m_background_error.compare_exchange_strong(expected, code); } + /** Return the list of committed files later to be ingested **/ + const std::vector<std::string> &get_committed_files() { + return m_committed_files; + } + + rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf; } + static void init(const rocksdb::DB *const db); + + static void report_error_msg(const rocksdb::Status &s, + const char *sst_file_name); }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_threads.cc b/storage/rocksdb/rdb_threads.cc index 0f331bdaeb2..06683d6e2f1 100644 --- a/storage/rocksdb/rdb_threads.cc +++ b/storage/rocksdb/rdb_threads.cc @@ -16,7 +16,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif #include <my_global.h> @@ -42,7 +42,7 @@ void Rdb_thread::init( my_core::PSI_mutex_key stop_bg_psi_mutex_key, my_core::PSI_cond_key stop_bg_psi_cond_key #endif - ) { +) { DBUG_ASSERT(!m_run_once); mysql_mutex_init(stop_bg_psi_mutex_key, &m_signal_mutex, MY_MUTEX_INIT_FAST); mysql_cond_init(stop_bg_psi_cond_key, &m_signal_cond, nullptr); @@ -58,7 +58,7 @@ int Rdb_thread::create_thread(const std::string &thread_name , PSI_thread_key background_psi_thread_key #endif - ) { +) { // Make a copy of the name so we can return without worrying that the // caller will free the memory m_name = thread_name; @@ -68,7 +68,7 @@ int Rdb_thread::create_thread(const std::string &thread_name } -void Rdb_thread::signal(const bool &stop_thread) { +void Rdb_thread::signal(const bool stop_thread) { RDB_MUTEX_LOCK_CHECK(m_signal_mutex); if (stop_thread) { @@ -80,4 +80,4 @@ void Rdb_thread::signal(const bool &stop_thread) { RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex); } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_threads.h b/storage/rocksdb/rdb_threads.h index b3331db1738..7d89fe0616b 100644 --- a/storage/rocksdb/rdb_threads.h +++ b/storage/rocksdb/rdb_threads.h @@ -34,9 +34,7 @@ #undef pthread_getspecific #endif #include <mysql/psi/mysql_table.h> -#ifdef MARIAROCKS_NOT_YET -#include <mysql/thread_pool_priv.h> -#endif +// #include <mysql/thread_pool_priv.h> /* MyRocks header files */ #include "./rdb_utils.h" @@ -45,7 +43,7 @@ namespace myrocks { class Rdb_thread { -private: + private: // Disable Copying Rdb_thread(const Rdb_thread &); Rdb_thread &operator=(const Rdb_thread &); @@ -57,12 +55,12 @@ private: std::string m_name; -protected: + protected: mysql_mutex_t m_signal_mutex; mysql_cond_t m_signal_cond; bool m_stop = false; -public: + public: Rdb_thread() : m_run_once(false) {} #ifdef HAVE_PSI_INTERFACE @@ -77,7 +75,7 @@ public: virtual void run(void) = 0; - void signal(const bool &stop_thread = false); + void signal(const bool stop_thread = false); int join() { @@ -116,8 +114,7 @@ public: DBUG_ASSERT(!m_name.empty()); #ifdef __linux__ int err = pthread_setname_np(m_handle, m_name.c_str()); - if (err) - { + if (err) { // NO_LINT_DEBUG sql_print_warning( "MyRocks: Failed to set name (%s) for current thread, errno=%d,%d", @@ -130,7 +127,7 @@ public: virtual ~Rdb_thread() {} -private: + private: static void *thread_func(void *const thread_ptr); }; @@ -141,7 +138,7 @@ private: */ class Rdb_background_thread : public Rdb_thread { -private: + private: bool m_save_stats = false; void reset() { @@ -150,7 +147,7 @@ private: m_save_stats = false; } -public: + public: virtual void run() override; void request_save_stats() { @@ -195,4 +192,4 @@ struct Rdb_drop_index_thread : public Rdb_thread { virtual void run() override; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc index 7d16e7d899d..dc1b7c8892e 100644 --- a/storage/rocksdb/rdb_utils.cc +++ b/storage/rocksdb/rdb_utils.cc @@ -21,9 +21,9 @@ /* C++ standard header files */ #include <array> +#include <sstream> #include <string> #include <vector> -#include <sstream> /* C standard header files */ #include <ctype.h> @@ -209,8 +209,8 @@ const char *rdb_skip_id(const struct charset_info_st *const cs, /* Parses a given string into tokens (if any) separated by a specific delimiter. */ -const std::vector<std::string> parse_into_tokens( - const std::string& s, const char delim) { +const std::vector<std::string> parse_into_tokens(const std::string &s, + const char delim) { std::vector<std::string> tokens; std::string t; std::stringstream ss(s); @@ -338,14 +338,18 @@ void rdb_persist_corruption_marker() { /* O_SYNC is not supported on windows */ int fd = my_open(fileName.c_str(), O_CREAT | IF_WIN(0, O_SYNC), MYF(MY_WME)); if (fd < 0) { - sql_print_error("RocksDB: Can't create file %s to mark rocksdb as " - "corrupted.", - fileName.c_str()); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Can't create file %s to mark rocksdb as " + "corrupted.", + fileName.c_str()); } else { - sql_print_information("RocksDB: Creating the file %s to abort mysqld " - "restarts. Remove this file from the data directory " - "after fixing the corruption to recover. ", - fileName.c_str()); + // NO_LINT_DEBUG + sql_print_information( + "RocksDB: Creating the file %s to abort mysqld " + "restarts. Remove this file from the data directory " + "after fixing the corruption to recover. ", + fileName.c_str()); } #ifdef _WIN32 @@ -362,4 +366,4 @@ void rdb_persist_corruption_marker() { } } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h index 44d90d78437..0ef74b9fd06 100644 --- a/storage/rocksdb/rdb_utils.h +++ b/storage/rocksdb/rdb_utils.h @@ -21,6 +21,7 @@ #include <chrono> #include <string> #include <vector> +#include <functional> /* MySQL header files */ #include "../sql/log.h" @@ -44,7 +45,7 @@ namespace myrocks { #ifndef interface #define interface struct -#endif // interface +#endif // interface /* Introduce C-style pseudo-namespaces, a handy way to make code more readble @@ -62,13 +63,13 @@ namespace myrocks { // to non-obvious MySQL functions, like the ones that do not start with well // known prefixes: "my_", "sql_", and "mysql_". #define my_core -#endif // my_core +#endif // my_core /* The intent behind a SHIP_ASSERT() macro is to have a mechanism for validating invariants in retail builds. Traditionally assertions (such as macros defined in <cassert>) are evaluated for performance reasons only in debug builds and - become NOOP in retail builds when NDEBUG is defined. + become NOOP in retail builds when DBUG_OFF is defined. This macro is intended to validate the invariants which are critical for making sure that data corruption and data loss won't take place. Proper @@ -80,14 +81,14 @@ namespace myrocks { */ #ifndef SHIP_ASSERT -#define SHIP_ASSERT(expr) \ - do { \ - if (!(expr)) { \ - my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \ - abort(); \ - } \ +#define SHIP_ASSERT(expr) \ + do { \ + if (!(expr)) { \ + my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \ + abort(); \ + } \ } while (0) -#endif // SHIP_ASSERT +#endif // SHIP_ASSERT /* Assert a implies b. @@ -103,7 +104,7 @@ namespace myrocks { a and b must be both true or both false. */ #ifndef DBUG_ASSERT_IFF -#define DBUG_ASSERT_IFF(a, b) \ +#define DBUG_ASSERT_IFF(a, b) \ DBUG_ASSERT(static_cast<bool>(a) == static_cast<bool>(b)) #endif @@ -151,10 +152,10 @@ namespace myrocks { Macros to better convey the intent behind checking the results from locking and unlocking mutexes. */ -#define RDB_MUTEX_LOCK_CHECK(m) \ +#define RDB_MUTEX_LOCK_CHECK(m) \ rdb_check_mutex_call_result(__MYROCKS_PORTABLE_PRETTY_FUNCTION__, true, \ mysql_mutex_lock(&m)) -#define RDB_MUTEX_UNLOCK_CHECK(m) \ +#define RDB_MUTEX_UNLOCK_CHECK(m) \ rdb_check_mutex_call_result(__MYROCKS_PORTABLE_PRETTY_FUNCTION__, false, \ mysql_mutex_unlock(&m)) @@ -243,10 +244,10 @@ inline void rdb_check_mutex_call_result(const char *function_name, const int result) { if (unlikely(result)) { /* NO_LINT_DEBUG */ - sql_print_error("%s a mutex inside %s failed with an " - "error code %d.", - attempt_lock ? "Locking" : "Unlocking", function_name, - result); + sql_print_error( + "%s a mutex inside %s failed with an " + "error code %d.", + attempt_lock ? "Locking" : "Unlocking", function_name, result); // This will hopefully result in a meaningful stack trace which we can use // to efficiently debug the root cause. @@ -291,7 +292,7 @@ const char *rdb_parse_id(const struct charset_info_st *const cs, const char *rdb_skip_id(const struct charset_info_st *const cs, const char *str) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); -const std::vector<std::string> parse_into_tokens(const std::string& s, +const std::vector<std::string> parse_into_tokens(const std::string &s, const char delim); /* @@ -309,4 +310,26 @@ bool rdb_database_exists(const std::string &db_name); const char *get_rocksdb_supported_compression_types(); -} // namespace myrocks +/* + Helper class to make sure cleanup always happens. Helpful for complicated + logic where there can be multiple exits/returns requiring cleanup + */ +class Ensure_cleanup { + public: + explicit Ensure_cleanup(std::function<void()> cleanup) + : m_cleanup(cleanup), m_skip_cleanup(false) {} + + ~Ensure_cleanup() { + if (!m_skip_cleanup) { + m_cleanup(); + } + } + + // If you want to skip cleanup (such as when the operation is successful) + void skip() { m_skip_cleanup = true; } + + private: + std::function<void()> m_cleanup; + bool m_skip_cleanup; +}; +} // namespace myrocks diff --git a/storage/rocksdb/rocksdb b/storage/rocksdb/rocksdb -Subproject 926f3a78a64b327475ee6c60b6c8ab4f3425320 +Subproject e731f4402258554812c46334dc0d9483e6cc769 diff --git a/storage/sphinx/ha_sphinx.cc b/storage/sphinx/ha_sphinx.cc index 544b7a80383..337a1dc22a1 100644 --- a/storage/sphinx/ha_sphinx.cc +++ b/storage/sphinx/ha_sphinx.cc @@ -2310,7 +2310,7 @@ int ha_sphinx::extra ( enum ha_extra_function op ) } -int ha_sphinx::write_row ( byte * ) +int ha_sphinx::write_row ( const byte * ) { SPH_ENTER_METHOD(); if ( !m_pShare || !m_pShare->m_bSphinxQL ) diff --git a/storage/sphinx/ha_sphinx.h b/storage/sphinx/ha_sphinx.h index 8e6af908aab..decd88bad5a 100644 --- a/storage/sphinx/ha_sphinx.h +++ b/storage/sphinx/ha_sphinx.h @@ -85,7 +85,7 @@ public: int open ( const char * name, int mode, uint test_if_locked ); int close (); - int write_row ( byte * buf ); + int write_row ( const byte * buf ); int update_row ( const byte * old_data, const byte * new_data ); int delete_row ( const byte * buf ); int extra ( enum ha_extra_function op ); diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc index d3bc24dc0e0..5920f802c9e 100644 --- a/storage/spider/ha_spider.cc +++ b/storage/spider/ha_spider.cc @@ -9805,7 +9805,7 @@ int ha_spider::end_bulk_insert() } int ha_spider::write_row( - uchar *buf + const uchar *buf ) { int error_num; THD *thd = ha_thd(); diff --git a/storage/spider/ha_spider.h b/storage/spider/ha_spider.h index 6f5359007f1..a146745aa97 100644 --- a/storage/spider/ha_spider.h +++ b/storage/spider/ha_spider.h @@ -568,7 +568,7 @@ public: #endif int end_bulk_insert(); int write_row( - uchar *buf + const uchar *buf ); #ifdef HA_CAN_BULK_ACCESS int pre_write_row( diff --git a/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_deinit.inc new file mode 100644 index 00000000000..4ea789feae8 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_deinit.inc @@ -0,0 +1,13 @@ +--let $CHILD2_1_SELECT_TABLES= $CHILD2_1_SELECT_TABLES_BACKUP +--connection master_1 +set session binlog_format= @old_binlog_format; +--connection slave1_1 +--disable_warnings +--disable_query_log +--disable_result_log +--source ../include/deinit_spider.inc +--source ../t/slave_test_deinit.inc +--source ../t/test_deinit.inc +--enable_result_log +--enable_query_log +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_init.inc b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_init.inc new file mode 100644 index 00000000000..f2f8635f9e3 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_init.inc @@ -0,0 +1,24 @@ +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_init.inc +--source ../t/slave_test_init.inc +--enable_result_log +--enable_query_log +--enable_warnings +--let $CHILD2_1_SELECT_TABLES_BACKUP= $CHILD2_1_SELECT_TABLES +let $CHILD2_1_SELECT_TABLES= + SELECT pkey, f FROM tbl_a ORDER BY pkey; +let $CHILD2_1_SELECT_ARGUMENT1= + SELECT argument FROM mysql.general_log WHERE argument LIKE '%delete %'; +--connection slave1_1 +--disable_warnings +--disable_query_log +--disable_result_log +--source ../include/init_spider.inc +--enable_result_log +--enable_query_log +--enable_warnings +--connection master_1 +set @old_binlog_format= @@binlog_format; +set session binlog_format= 'ROW'; diff --git a/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mariadb_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mariadb_deinit.inc new file mode 100644 index 00000000000..4fd217a5676 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mariadb_deinit.inc @@ -0,0 +1,4 @@ +--let $MASTER_1_COMMENT_2_1= $MASTER_1_COMMENT_2_1_BACKUP +--let $CHILD2_1_DROP_TABLES= $CHILD2_1_DROP_TABLES_BACKUP +--let $CHILD2_1_CREATE_TABLES= $CHILD2_1_CREATE_TABLES_BACKUP +--source delete_with_float_column_deinit.inc diff --git a/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mariadb_init.inc b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mariadb_init.inc new file mode 100644 index 00000000000..9d3961491e6 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mariadb_init.inc @@ -0,0 +1,14 @@ +--source delete_with_float_column_init.inc +--let $MASTER_1_COMMENT_2_1_BACKUP= $MASTER_1_COMMENT_2_1 +let $MASTER_1_COMMENT_2_1= + COMMENT='table "tbl_a", srv "s_2_1", wrapper "mariadb"'; +--let $CHILD2_1_DROP_TABLES_BACKUP= $CHILD2_1_DROP_TABLES +let $CHILD2_1_DROP_TABLES= + DROP TABLE IF EXISTS tbl_a; +--let $CHILD2_1_CREATE_TABLES_BACKUP= $CHILD2_1_CREATE_TABLES +let $CHILD2_1_CREATE_TABLES= + CREATE TABLE tbl_a ( + pkey int NOT NULL, + f float DEFAULT NULL, + PRIMARY KEY (pkey) + ) $CHILD2_1_ENGINE $CHILD2_1_CHARSET; diff --git a/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mysql_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mysql_deinit.inc new file mode 100644 index 00000000000..4fd217a5676 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mysql_deinit.inc @@ -0,0 +1,4 @@ +--let $MASTER_1_COMMENT_2_1= $MASTER_1_COMMENT_2_1_BACKUP +--let $CHILD2_1_DROP_TABLES= $CHILD2_1_DROP_TABLES_BACKUP +--let $CHILD2_1_CREATE_TABLES= $CHILD2_1_CREATE_TABLES_BACKUP +--source delete_with_float_column_deinit.inc diff --git a/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mysql_init.inc b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mysql_init.inc new file mode 100644 index 00000000000..d98f3c12397 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/delete_with_float_column_mysql_init.inc @@ -0,0 +1,16 @@ +--source delete_with_float_column_init.inc +--let $MASTER_1_COMMENT_2_1_BACKUP= $MASTER_1_COMMENT_2_1 +let $MASTER_1_COMMENT_2_1= + COMMENT='table "tbl_a", srv "s_2_1", wrapper "mysql"'; +--let $CHILD2_1_DROP_TABLES_BACKUP= $CHILD2_1_DROP_TABLES +let $CHILD2_1_DROP_TABLES= + DROP TABLE IF EXISTS tbl_a $STR_SEMICOLON + DROP FUNCTION to_float; +--let $CHILD2_1_CREATE_TABLES_BACKUP= $CHILD2_1_CREATE_TABLES +let $CHILD2_1_CREATE_TABLES= + CREATE TABLE tbl_a ( + pkey int NOT NULL, + f float DEFAULT NULL, + PRIMARY KEY (pkey) + ) $CHILD2_1_ENGINE $CHILD2_1_CHARSET $STR_SEMICOLON + CREATE FUNCTION to_float(a decimal(20,6)) RETURNS float RETURN cast(a as double); diff --git a/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_deinit.inc new file mode 100644 index 00000000000..9d255152dd8 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_deinit.inc @@ -0,0 +1,14 @@ +--let $MASTER_1_COMMENT_2_1= $MASTER_1_COMMENT_2_1_BACKUP +--let $CHILD2_1_DROP_TABLES= $CHILD2_1_DROP_TABLES_BACKUP +--let $CHILD2_1_CREATE_TABLES= $CHILD2_1_CREATE_TABLES_BACKUP +--let $CHILD2_1_SELECT_TABLES= $CHILD2_1_SELECT_TABLES_BACKUP +--let $CHILD2_2_DROP_TABLES= $CHILD2_2_DROP_TABLES_BACKUP +--let $CHILD2_2_CREATE_TABLES= $CHILD2_2_CREATE_TABLES_BACKUP +--let $CHILD2_2_SELECT_TABLES= $CHILD2_2_SELECT_TABLES_BACKUP +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_deinit.inc +--enable_result_log +--enable_query_log +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_init.inc b/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_init.inc new file mode 100644 index 00000000000..dceae8226b0 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_init.inc @@ -0,0 +1,52 @@ +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_init.inc +if (!$HAVE_PARTITION) +{ + --source group_by_order_by_limit_deinit.inc + --enable_result_log + --enable_query_log + --enable_warnings + skip Test requires partitioning; +} +--enable_result_log +--enable_query_log +--enable_warnings +--let $MASTER_1_COMMENT_2_1_BACKUP= $MASTER_1_COMMENT_2_1 +let $MASTER_1_COMMENT_2_1= + COMMENT='table "tbl_a"' + PARTITION BY KEY(pkey) ( + PARTITION pt1 COMMENT='srv "s_2_1"', + PARTITION pt2 COMMENT='srv "s_2_2"' + ); +--let $CHILD2_1_DROP_TABLES_BACKUP= $CHILD2_1_DROP_TABLES +let $CHILD2_1_DROP_TABLES= + DROP TABLE IF EXISTS tbl_a; +--let $CHILD2_1_CREATE_TABLES_BACKUP= $CHILD2_1_CREATE_TABLES +let $CHILD2_1_CREATE_TABLES= + CREATE TABLE tbl_a ( + pkey int NOT NULL, + val char(1) NOT NULL, + PRIMARY KEY (pkey) + ) $CHILD2_1_ENGINE $CHILD2_1_CHARSET; +--let $CHILD2_1_SELECT_TABLES_BACKUP= $CHILD2_1_SELECT_TABLES +let $CHILD2_1_SELECT_TABLES= + SELECT pkey, val FROM tbl_a ORDER BY pkey; +let $CHILD2_1_SELECT_ARGUMENT1= + SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; +--let $CHILD2_2_DROP_TABLES_BACKUP= $CHILD2_2_DROP_TABLES +let $CHILD2_2_DROP_TABLES= + DROP TABLE IF EXISTS tbl_a; +--let $CHILD2_2_CREATE_TABLES_BACKUP= $CHILD2_2_CREATE_TABLES +let $CHILD2_2_CREATE_TABLES= + CREATE TABLE tbl_a ( + pkey int NOT NULL, + val char(1) NOT NULL, + PRIMARY KEY (pkey) + ) $CHILD2_2_ENGINE $CHILD2_2_CHARSET; +--let $CHILD2_2_SELECT_TABLES_BACKUP= $CHILD2_2_SELECT_TABLES +let $CHILD2_2_SELECT_TABLES= + SELECT pkey, val FROM tbl_a ORDER BY pkey; +let $CHILD2_2_SELECT_ARGUMENT1= + SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; diff --git a/storage/spider/mysql-test/spider/bugfix/include/select_with_backquote_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/select_with_backquote_deinit.inc new file mode 100644 index 00000000000..76b7582abfe --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/select_with_backquote_deinit.inc @@ -0,0 +1,11 @@ +--let $MASTER_1_COMMENT_2_1= $MASTER_1_COMMENT_2_1_BACKUP +--let $CHILD2_1_DROP_TABLES= $CHILD2_1_DROP_TABLES_BACKUP +--let $CHILD2_1_CREATE_TABLES= $CHILD2_1_CREATE_TABLES_BACKUP +--let $CHILD2_1_SELECT_TABLES= $CHILD2_1_SELECT_TABLES_BACKUP +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_deinit.inc +--enable_result_log +--enable_query_log +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/include/select_with_backquote_init.inc b/storage/spider/mysql-test/spider/bugfix/include/select_with_backquote_init.inc new file mode 100644 index 00000000000..37bf690c066 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/select_with_backquote_init.inc @@ -0,0 +1,25 @@ +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_init.inc +--enable_result_log +--enable_query_log +--enable_warnings +--let $MASTER_1_COMMENT_2_1_BACKUP= $MASTER_1_COMMENT_2_1 +let $MASTER_1_COMMENT_2_1= + COMMENT='table "tbl_a", srv "s_2_1"'; +--let $CHILD2_1_DROP_TABLES_BACKUP= $CHILD2_1_DROP_TABLES +let $CHILD2_1_DROP_TABLES= + DROP TABLE IF EXISTS tbl_a; +--let $CHILD2_1_CREATE_TABLES_BACKUP= $CHILD2_1_CREATE_TABLES +let $CHILD2_1_CREATE_TABLES= + CREATE TABLE tbl_a ( + pkey int NOT NULL, + txt_utf8 char(8) NOT NULL, + PRIMARY KEY (pkey) + ) $CHILD2_1_ENGINE DEFAULT CHARACTER SET utf8; +--let $CHILD2_1_SELECT_TABLES_BACKUP= $CHILD2_1_SELECT_TABLES +let $CHILD2_1_SELECT_TABLES= + SELECT pkey, txt_utf8 FROM tbl_a ORDER BY pkey; +let $CHILD2_1_SELECT_ARGUMENT1= + SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; diff --git a/storage/spider/mysql-test/spider/bugfix/include/slave_transaction_retry_errors_5digit_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/slave_transaction_retry_errors_5digit_deinit.inc new file mode 100644 index 00000000000..5ac67cdf783 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/slave_transaction_retry_errors_5digit_deinit.inc @@ -0,0 +1,10 @@ +--connection slave1_1 +--disable_warnings +--disable_query_log +--disable_result_log +--source ../include/deinit_spider.inc +--source ../t/slave_test_deinit.inc +--source ../t/test_deinit.inc +--enable_result_log +--enable_query_log +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/include/slave_transaction_retry_errors_5digit_init.inc b/storage/spider/mysql-test/spider/bugfix/include/slave_transaction_retry_errors_5digit_init.inc new file mode 100644 index 00000000000..052d6ebb2eb --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/slave_transaction_retry_errors_5digit_init.inc @@ -0,0 +1,10 @@ +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_init.inc +--source ../t/slave_test_init.inc +--connection slave1_1 +--source ../include/init_spider.inc +--enable_result_log +--enable_query_log +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/include/spider_table_sts_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/spider_table_sts_deinit.inc new file mode 100644 index 00000000000..01645e85f32 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/spider_table_sts_deinit.inc @@ -0,0 +1,12 @@ +--connection master_1 +alter table mysql.spider_table_sts add column checksum bigint unsigned default null after update_time; +DROP DATABASE IF EXISTS auto_test_local; + +--let $MASTER_1_COMMENT_2_1= $MASTER_1_COMMENT_2_1_BACKUP +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_deinit.inc +--enable_result_log +--enable_query_log +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/include/spider_table_sts_init.inc b/storage/spider/mysql-test/spider/bugfix/include/spider_table_sts_init.inc new file mode 100644 index 00000000000..1e438812af7 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/spider_table_sts_init.inc @@ -0,0 +1,13 @@ +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_init.inc +--enable_result_log +--enable_query_log +--enable_warnings +--let $MASTER_1_COMMENT_2_1_BACKUP= $MASTER_1_COMMENT_2_1 +let $MASTER_1_COMMENT_2_1= + COMMENT='table "tbl_a", host "127.0.0.1", port "$MASTER_1_MYPORT", user "root"'; +--connection master_1 +alter table mysql.spider_table_sts drop column checksum; +insert into mysql.spider_table_sts values ('auto_test_local', 'tbl_a', 0, 0, 0, 0, 0, '2019-01-01 00:00:00', '2019-01-01 00:00:00', '2019-01-01 00:00:00'); diff --git a/storage/spider/mysql-test/spider/bugfix/include/sql_mode_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_deinit.inc new file mode 100644 index 00000000000..07f4d39a184 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_deinit.inc @@ -0,0 +1,13 @@ +--let $MASTER_1_COMMENT_2_1= $MASTER_1_COMMENT_2_1_BACKUP +--let $CHILD2_1_DROP_TABLES= $CHILD2_1_DROP_TABLES_BACKUP +--let $CHILD2_1_CREATE_TABLES= $CHILD2_1_CREATE_TABLES_BACKUP +--let $CHILD2_1_SELECT_TABLES= $CHILD2_1_SELECT_TABLES_BACKUP +--connection master_1 +set session sql_mode= @old_sql_mode; +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_deinit.inc +--enable_result_log +--enable_query_log +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/include/sql_mode_init.inc b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_init.inc new file mode 100644 index 00000000000..65e2f9102e7 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_init.inc @@ -0,0 +1,40 @@ +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_init.inc +--enable_result_log +--enable_query_log +--enable_warnings +--let $SQL_MODES= real_as_float,pipes_as_concat,ansi_quotes,ignore_space,ignore_bad_table_options,only_full_group_by,no_unsigned_subtraction,no_dir_in_create,postgresql,oracle,mssql,db2,maxdb,no_key_options,no_table_options,no_field_options,mysql323,mysql40,ansi,no_auto_value_on_zero,no_backslash_escapes,strict_trans_tables,strict_all_tables,no_zero_in_date,no_zero_date,allow_invalid_dates,error_for_division_by_zero,traditional,no_auto_create_user,high_not_precedence,no_engine_substitution,pad_char_to_full_length +if (`SELECT IF(STRCMP('$SERVER_NAME', 'MariaDB') = 0, 1, 0)`) +{ + if (`SELECT IF($SERVER_MAJOR_VERSION = 10, 1, 0)`) + { + if (`SELECT IF($SERVER_MINOR_VERSION >= 3, 1, 0)`) + { + --let $SQL_MODES= $SQL_MODES,empty_string_is_null,simultaneous_assignment + } + if (`SELECT IF($SERVER_MINOR_VERSION >= 4, 1, 0)`) + { + --let $SQL_MODES= $SQL_MODES,time_round_fractional + } + } +} +--connection master_1 +set @old_sql_mode= @@sql_mode; +eval set session sql_mode= '$SQL_MODES'; +--let $MASTER_1_COMMENT_2_1_BACKUP= $MASTER_1_COMMENT_2_1 +--let $CHILD2_1_DROP_TABLES_BACKUP= $CHILD2_1_DROP_TABLES +let $CHILD2_1_DROP_TABLES= + DROP TABLE IF EXISTS tbl_a; +--let $CHILD2_1_CREATE_TABLES_BACKUP= $CHILD2_1_CREATE_TABLES +let $CHILD2_1_CREATE_TABLES= + CREATE TABLE tbl_a ( + pkey int NOT NULL, + PRIMARY KEY (pkey) + ) $CHILD2_1_ENGINE $CHILD2_1_CHARSET; +--let $CHILD2_1_SELECT_TABLES_BACKUP= $CHILD2_1_SELECT_TABLES +let $CHILD2_1_SELECT_TABLES= + SELECT pkey FROM tbl_a ORDER BY pkey; +let $CHILD2_1_SELECT_ARGUMENT1= + SELECT argument FROM mysql.general_log WHERE argument LIKE '%sql_mode%'; diff --git a/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mariadb_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mariadb_deinit.inc new file mode 100644 index 00000000000..0dbff811c95 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mariadb_deinit.inc @@ -0,0 +1 @@ +--source sql_mode_deinit.inc diff --git a/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mariadb_init.inc b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mariadb_init.inc new file mode 100644 index 00000000000..8e03b94b160 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mariadb_init.inc @@ -0,0 +1,3 @@ +--source sql_mode_init.inc +let $MASTER_1_COMMENT_2_1= + COMMENT='table "tbl_a", srv "s_2_1", wrapper "mariadb"'; diff --git a/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mysql_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mysql_deinit.inc new file mode 100644 index 00000000000..0dbff811c95 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mysql_deinit.inc @@ -0,0 +1 @@ +--source sql_mode_deinit.inc diff --git a/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mysql_init.inc b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mysql_init.inc new file mode 100644 index 00000000000..2cdd56bc689 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_mysql_init.inc @@ -0,0 +1,3 @@ +--source sql_mode_init.inc +let $MASTER_1_COMMENT_2_1= + COMMENT='table "tbl_a", srv "s_2_1", wrapper "mysql"'; diff --git a/storage/spider/mysql-test/spider/bugfix/r/delete_with_float_column_mariadb.result b/storage/spider/mysql-test/spider/bugfix/r/delete_with_float_column_mariadb.result new file mode 100644 index 00000000000..d9fb00a09fc --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/delete_with_float_column_mariadb.result @@ -0,0 +1,85 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +for slave1_1 +connection slave1_1; +connection master_1; +set @old_binlog_format= @@binlog_format; +set session binlog_format= 'ROW'; + +drop and create databases +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection slave1_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection child2_1; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; + +create table and insert +connection child2_1; +CHILD2_1_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection master_1; +connection slave1_1; +connection master_1; +SET SESSION sql_log_bin= 0; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +f float DEFAULT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE2 MASTER_1_CHARSET MASTER_1_COMMENT_2_1 +SET SESSION sql_log_bin= 1; +connection slave1_1; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +f float DEFAULT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1 +connection master_1; +INSERT INTO tbl_a (pkey, f) VALUES (0,NULL); +INSERT INTO tbl_a (pkey, f) VALUES (1,0.671437); +DELETE FROM tbl_a WHERE pkey = 0; +DELETE FROM tbl_a WHERE pkey = 1; +connection slave1_1; +connection master_1; +SET SESSION sql_log_bin= 0; +connection child2_1; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%delete %'; +argument +delete from `auto_test_remote`.`tbl_a` where `pkey` = 0 and `f` is null limit 1 +delete from `auto_test_remote`.`tbl_a` where `pkey` = 1 and `f` = cast(0.671437 as float) limit 1 +SELECT argument FROM mysql.general_log WHERE argument LIKE '%delete %' +SELECT pkey, f FROM tbl_a ORDER BY pkey; +pkey f +connection slave1_1; +SELECT pkey, f FROM tbl_a ORDER BY pkey; +pkey f + +deinit +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection slave1_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; +connection master_1; +set session binlog_format= @old_binlog_format; +connection slave1_1; +for slave1_1 +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/r/delete_with_float_column_mysql.result b/storage/spider/mysql-test/spider/bugfix/r/delete_with_float_column_mysql.result new file mode 100644 index 00000000000..c41cfe8156a --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/delete_with_float_column_mysql.result @@ -0,0 +1,85 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +for slave1_1 +connection slave1_1; +connection master_1; +set @old_binlog_format= @@binlog_format; +set session binlog_format= 'ROW'; + +drop and create databases +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection slave1_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection child2_1; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; + +create table and insert +connection child2_1; +CHILD2_1_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection master_1; +connection slave1_1; +connection master_1; +SET SESSION sql_log_bin= 0; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +f float DEFAULT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE2 MASTER_1_CHARSET MASTER_1_COMMENT_2_1 +SET SESSION sql_log_bin= 1; +connection slave1_1; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +f float DEFAULT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1 +connection master_1; +INSERT INTO tbl_a (pkey, f) VALUES (0,NULL); +INSERT INTO tbl_a (pkey, f) VALUES (1,0.671437); +DELETE FROM tbl_a WHERE pkey = 0; +DELETE FROM tbl_a WHERE pkey = 1; +connection slave1_1; +connection master_1; +SET SESSION sql_log_bin= 0; +connection child2_1; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%delete %'; +argument +delete from `auto_test_remote`.`tbl_a` where `pkey` = 0 and `f` is null limit 1 +delete from `auto_test_remote`.`tbl_a` where `pkey` = 1 and `f` = /* create function to_float(a decimal(20,6)) returns float return cast(a as double) */ to_float(0.671437) limit 1 +SELECT argument FROM mysql.general_log WHERE argument LIKE '%delete %' +SELECT pkey, f FROM tbl_a ORDER BY pkey; +pkey f +connection slave1_1; +SELECT pkey, f FROM tbl_a ORDER BY pkey; +pkey f + +deinit +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection slave1_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; +connection master_1; +set session binlog_format= @old_binlog_format; +connection slave1_1; +for slave1_1 +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_19866.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_19866.result new file mode 100644 index 00000000000..5d483481edd --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_19866.result @@ -0,0 +1,111 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +this test is for MDEV-19866 + +drop and create databases +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection child2_1; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +connection child2_2; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote2; +USE auto_test_remote2; + +create table and insert +connection child2_1; +CHILD2_1_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection child2_2; +CHILD2_2_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection master_1; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +val char(1) NOT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1 +INSERT INTO tbl_a (pkey,val) VALUES (1,'1'),(2,'2'),(3,'3'),(4,'4'); + +select test 1 +connection child2_1; +TRUNCATE TABLE mysql.general_log; +connection child2_2; +TRUNCATE TABLE mysql.general_log; +connection master_1; +SELECT * FROM tbl_a; +pkey val +1 1 +3 3 +2 2 +4 4 +SELECT * FROM tbl_a WHERE pkey = 1; +pkey val +1 1 +SELECT * FROM tbl_a; +pkey val +1 1 +3 3 +2 2 +4 4 +SELECT * FROM tbl_a WHERE pkey = 2; +pkey val +2 2 +SELECT * FROM tbl_a; +pkey val +1 1 +3 3 +2 2 +4 4 +connection child2_1; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; +argument +select `pkey`,`val` from `auto_test_remote`.`tbl_a` +select `pkey`,`val` from `auto_test_remote`.`tbl_a` where `pkey` = 1 +select `pkey`,`val` from `auto_test_remote`.`tbl_a` +select `pkey`,`val` from `auto_test_remote`.`tbl_a` +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %' +SELECT pkey, val FROM tbl_a ORDER BY pkey; +pkey val +1 1 +3 3 +connection child2_2; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; +argument +select `pkey`,`val` from `auto_test_remote2`.`tbl_a` +select `pkey`,`val` from `auto_test_remote2`.`tbl_a` +select `pkey`,`val` from `auto_test_remote2`.`tbl_a` where `pkey` = 2 +select `pkey`,`val` from `auto_test_remote2`.`tbl_a` +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %' +SELECT pkey, val FROM tbl_a ORDER BY pkey; +pkey val +2 2 +4 4 + +deinit +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; +connection child2_2; +DROP DATABASE IF EXISTS auto_test_remote2; +SET GLOBAL log_output = @old_log_output; +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/r/select_with_backquote.result b/storage/spider/mysql-test/spider/bugfix/r/select_with_backquote.result new file mode 100644 index 00000000000..52f8c98bf98 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/select_with_backquote.result @@ -0,0 +1,75 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +this test is for MDEV-17204 + +drop and create databases +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection child2_1; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; + +create table and insert +connection child2_1; +CHILD2_1_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection master_1; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +txt_utf8 char(8) NOT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE DEFAULT CHARACTER SET utf8 MASTER_1_COMMENT_2_1 +INSERT INTO tbl_a (pkey,txt_utf8) VALUES (0,'01234567'),(1,'12345678'),(2,'23456789'),(3,'34567890'),(4,'45678901'),(5,'56789012'),(6,'67890123'),(7,'78901234'),(8,'89012345'),(9,'90123456'); +FLUSH TABLES; + +test 1 +connection child2_1; +TRUNCATE TABLE mysql.general_log; +connection master_1; +SET NAMES utf8; +SELECT `pkey`, LEFT(`txt_utf8`, 4) FROM `auto_test_local`.`tbl_a` ORDER BY LEFT(`txt_utf8`, 4) LIMIT 3; +pkey LEFT(`txt_utf8`, 4) +0 0123 +1 1234 +2 2345 +connection child2_1; +SET NAMES utf8; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; +argument +select t0.`pkey` `pkey`,(left(t0.`txt_utf8` , 4)) `LEFT(``txt_utf8``, 4)` from `auto_test_remote`.`tbl_a` t0 order by `LEFT(``txt_utf8``, 4)` limit 3 +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %' +SELECT pkey, txt_utf8 FROM tbl_a ORDER BY pkey; +pkey txt_utf8 +0 01234567 +1 12345678 +2 23456789 +3 34567890 +4 45678901 +5 56789012 +6 67890123 +7 78901234 +8 89012345 +9 90123456 + +deinit +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/r/slave_transaction_retry_errors_5digit.result b/storage/spider/mysql-test/spider/bugfix/r/slave_transaction_retry_errors_5digit.result new file mode 100644 index 00000000000..f2cab6b0a95 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/slave_transaction_retry_errors_5digit.result @@ -0,0 +1,22 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +for slave1_1 + +connection slave1_1; +SHOW VARIABLES LIKE 'slave_transaction_retry_errors'; +Variable_name Value +slave_transaction_retry_errors 1158,1159,1160,1161,1205,1213,1429,2013,12701,10000,20000,30000 +connection slave1_1; +for slave1_1 +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/r/slave_trx_isolation.result b/storage/spider/mysql-test/spider/bugfix/r/slave_trx_isolation.result index e93eb78417f..d7a0c1044a7 100644 --- a/storage/spider/mysql-test/spider/bugfix/r/slave_trx_isolation.result +++ b/storage/spider/mysql-test/spider/bugfix/r/slave_trx_isolation.result @@ -50,7 +50,7 @@ SELECT argument FROM mysql.general_log WHERE argument LIKE '%set %'; argument set session time_zone = '+00:00' SET NAMES utf8 -set session transaction isolation level read committed;set session autocommit = 1;set session wait_timeout = 604800;start transaction +set session transaction isolation level read committed;set session autocommit = 1;set session wait_timeout = 604800;set session sql_mode = 'strict_trans_tables,error_for_division_by_zero,no_auto_create_user,no_engine_substitution';start transaction SELECT argument FROM mysql.general_log WHERE argument LIKE '%set %' SELECT pkey FROM tbl_a ORDER BY pkey; pkey diff --git a/storage/spider/mysql-test/spider/bugfix/r/spider_table_sts.result b/storage/spider/mysql-test/spider/bugfix/r/spider_table_sts.result new file mode 100644 index 00000000000..f915cc951b1 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/spider_table_sts.result @@ -0,0 +1,38 @@ +for master_1 +for child2 +for child3 +connection master_1; +alter table mysql.spider_table_sts drop column checksum; +insert into mysql.spider_table_sts values ('auto_test_local', 'tbl_a', 0, 0, 0, 0, 0, '2019-01-01 00:00:00', '2019-01-01 00:00:00', '2019-01-01 00:00:00'); + +this test is for MDEV-19842 + +drop and create databases +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; + +create table +connection master_1; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1 + +select test 1 +connection master_1; +SELECT pkey FROM tbl_a; +ERROR HY000: System table spider_table_sts is different version + +deinit +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +ERROR HY000: System table spider_table_sts is different version +connection master_1; +alter table mysql.spider_table_sts add column checksum bigint unsigned default null after update_time; +DROP DATABASE IF EXISTS auto_test_local; +for master_1 +for child2 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/r/sql_mode_mariadb.result b/storage/spider/mysql-test/spider/bugfix/r/sql_mode_mariadb.result new file mode 100644 index 00000000000..5048fbb423c --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/sql_mode_mariadb.result @@ -0,0 +1,83 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +connection master_1; +set @old_sql_mode= @@sql_mode; +set session sql_mode= 'real_as_float,pipes_as_concat,ansi_quotes,ignore_space,ignore_bad_table_options,only_full_group_by,no_unsigned_subtraction,no_dir_in_create,postgresql,oracle,mssql,db2,maxdb,no_key_options,no_table_options,no_field_options,mysql323,mysql40,ansi,no_auto_value_on_zero,no_backslash_escapes,strict_trans_tables,strict_all_tables,no_zero_in_date,no_zero_date,allow_invalid_dates,error_for_division_by_zero,traditional,no_auto_create_user,high_not_precedence,no_engine_substitution,pad_char_to_full_length,empty_string_is_null,simultaneous_assignment,time_round_fractional'; + +this test is for MDEV-16508 + +drop and create databases +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection child2_1; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; + +create table and insert +connection child2_1; +CHILD2_1_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection master_1; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1 +INSERT INTO tbl_a (pkey) VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +select test +connection child2_1; +TRUNCATE TABLE mysql.general_log; +connection master_1; +SELECT * FROM tbl_a ORDER BY pkey; +pkey +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +connection child2_1; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%sql_mode%'; +argument +set session transaction isolation level repeatable read;set session autocommit = 1;set session sql_log_off = 0;set session wait_timeout = 604800;set session sql_mode = 'real_as_float,ignore_bad_table_options,no_unsigned_subtraction,no_dir_in_create,no_auto_value_on_zero,strict_trans_tables,strict_all_tables,no_zero_in_date,no_zero_date,allow_invalid_dates,error_for_division_by_zero,no_auto_create_user,high_not_precedence,no_engine_substitution,pad_char_to_full_length,empty_string_is_null,simultaneous_assignment,time_round_fractional';set session time_zone = '+00:00';start transaction +SELECT argument FROM mysql.general_log WHERE argument LIKE '%sql_mode%' +SELECT pkey FROM tbl_a ORDER BY pkey; +pkey +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + +deinit +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; +connection master_1; +set session sql_mode= @old_sql_mode; +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/r/sql_mode_mysql.result b/storage/spider/mysql-test/spider/bugfix/r/sql_mode_mysql.result new file mode 100644 index 00000000000..08f9a6007aa --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/sql_mode_mysql.result @@ -0,0 +1,83 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +connection master_1; +set @old_sql_mode= @@sql_mode; +set session sql_mode= 'real_as_float,pipes_as_concat,ansi_quotes,ignore_space,ignore_bad_table_options,only_full_group_by,no_unsigned_subtraction,no_dir_in_create,postgresql,oracle,mssql,db2,maxdb,no_key_options,no_table_options,no_field_options,mysql323,mysql40,ansi,no_auto_value_on_zero,no_backslash_escapes,strict_trans_tables,strict_all_tables,no_zero_in_date,no_zero_date,allow_invalid_dates,error_for_division_by_zero,traditional,no_auto_create_user,high_not_precedence,no_engine_substitution,pad_char_to_full_length,empty_string_is_null,simultaneous_assignment,time_round_fractional'; + +this test is for MDEV-16508 + +drop and create databases +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection child2_1; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; + +create table and insert +connection child2_1; +CHILD2_1_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection master_1; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1 +INSERT INTO tbl_a (pkey) VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +select test +connection child2_1; +TRUNCATE TABLE mysql.general_log; +connection master_1; +SELECT * FROM tbl_a ORDER BY pkey; +pkey +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +connection child2_1; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%sql_mode%'; +argument +set session transaction isolation level repeatable read;set session autocommit = 1;set session sql_log_off = 0;set session wait_timeout = 604800;set session sql_mode = 'real_as_float,ignore_bad_table_options,no_unsigned_subtraction,no_dir_in_create,no_auto_value_on_zero,strict_trans_tables,strict_all_tables,no_zero_in_date,no_zero_date,allow_invalid_dates,error_for_division_by_zero,no_auto_create_user,high_not_precedence,no_engine_substitution,pad_char_to_full_length';set session time_zone = '+00:00';start transaction +SELECT argument FROM mysql.general_log WHERE argument LIKE '%sql_mode%' +SELECT pkey FROM tbl_a ORDER BY pkey; +pkey +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + +deinit +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; +connection master_1; +set session sql_mode= @old_sql_mode; +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column.inc b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column.inc new file mode 100644 index 00000000000..794ebedf355 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column.inc @@ -0,0 +1,98 @@ +--echo +--echo drop and create databases +--connection master_1 +--disable_warnings +CREATE DATABASE auto_test_local; +USE auto_test_local; + +--connection slave1_1 +CREATE DATABASE auto_test_local; +USE auto_test_local; + +--connection child2_1 +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +--enable_warnings + +--echo +--echo create table and insert + +--connection child2_1 +--disable_query_log +echo CHILD2_1_CREATE_TABLES; +eval $CHILD2_1_CREATE_TABLES; +--enable_query_log +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +save_master_pos; + +--connection slave1_1 +sync_with_master; + +--connection master_1 +SET SESSION sql_log_bin= 0; +--disable_query_log +echo CREATE TABLE tbl_a ( + pkey int NOT NULL, + f float DEFAULT NULL, + PRIMARY KEY (pkey) +) MASTER_1_ENGINE2 MASTER_1_CHARSET MASTER_1_COMMENT_2_1; +eval CREATE TABLE tbl_a ( + pkey int NOT NULL, + f float DEFAULT NULL, + PRIMARY KEY (pkey) +) $MASTER_1_ENGINE2 $MASTER_1_CHARSET $MASTER_1_COMMENT_2_1; +--enable_query_log +SET SESSION sql_log_bin= 1; + +--connection slave1_1 +--disable_query_log +echo CREATE TABLE tbl_a ( + pkey int NOT NULL, + f float DEFAULT NULL, + PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1; +eval CREATE TABLE tbl_a ( + pkey int NOT NULL, + f float DEFAULT NULL, + PRIMARY KEY (pkey) +) $MASTER_1_ENGINE $MASTER_1_CHARSET $MASTER_1_COMMENT_2_1; +--enable_query_log + +--connection master_1 +INSERT INTO tbl_a (pkey, f) VALUES (0,NULL); +INSERT INTO tbl_a (pkey, f) VALUES (1,0.671437); +DELETE FROM tbl_a WHERE pkey = 0; +DELETE FROM tbl_a WHERE pkey = 1; +save_master_pos; + +--connection slave1_1 +sync_with_master; + +--connection master_1 +SET SESSION sql_log_bin= 0; + +--connection child2_1 +eval $CHILD2_1_SELECT_ARGUMENT1; +eval $CHILD2_1_SELECT_TABLES; + +--connection slave1_1 +SELECT pkey, f FROM tbl_a ORDER BY pkey; + +--echo +--echo deinit +--disable_warnings +--connection master_1 +DROP DATABASE IF EXISTS auto_test_local; + +--connection slave1_1 +DROP DATABASE IF EXISTS auto_test_local; + +--connection child2_1 +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; + +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mariadb.cnf b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mariadb.cnf new file mode 100644 index 00000000000..45019d6c537 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mariadb.cnf @@ -0,0 +1,4 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf +!include ../my_4_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mariadb.test b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mariadb.test new file mode 100644 index 00000000000..ce1a09d6287 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mariadb.test @@ -0,0 +1,5 @@ +--source ../include/delete_with_float_column_mariadb_init.inc +--source delete_with_float_column.inc +--source ../include/delete_with_float_column_mariadb_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mysql.cnf b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mysql.cnf new file mode 100644 index 00000000000..45019d6c537 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mysql.cnf @@ -0,0 +1,4 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf +!include ../my_4_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mysql.test b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mysql.test new file mode 100644 index 00000000000..c687f947a91 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column_mysql.test @@ -0,0 +1,5 @@ +--source ../include/delete_with_float_column_mysql_init.inc +--source delete_with_float_column.inc +--source ../include/delete_with_float_column_mysql_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.cnf b/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.cnf new file mode 100644 index 00000000000..e0ffb99c38e --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.cnf @@ -0,0 +1,4 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf +!include ../my_2_2.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.test new file mode 100644 index 00000000000..05b753ae8bb --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.test @@ -0,0 +1,97 @@ +--source ../include/mdev_19866_init.inc +--echo +--echo this test is for MDEV-19866 +--echo +--echo drop and create databases +--connection master_1 +--disable_warnings +CREATE DATABASE auto_test_local; +USE auto_test_local; + +--connection child2_1 +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; + +--connection child2_2 +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote2; +USE auto_test_remote2; +--enable_warnings + +--echo +--echo create table and insert + +--connection child2_1 +--disable_query_log +echo CHILD2_1_CREATE_TABLES; +eval $CHILD2_1_CREATE_TABLES; +--enable_query_log +TRUNCATE TABLE mysql.general_log; + +--connection child2_2 +--disable_query_log +echo CHILD2_2_CREATE_TABLES; +eval $CHILD2_2_CREATE_TABLES; +--enable_query_log +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +--disable_query_log +echo CREATE TABLE tbl_a ( + pkey int NOT NULL, + val char(1) NOT NULL, + PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1; +eval CREATE TABLE tbl_a ( + pkey int NOT NULL, + val char(1) NOT NULL, + PRIMARY KEY (pkey) +) $MASTER_1_ENGINE $MASTER_1_CHARSET $MASTER_1_COMMENT_2_1; +--enable_query_log +INSERT INTO tbl_a (pkey,val) VALUES (1,'1'),(2,'2'),(3,'3'),(4,'4'); + +--echo +--echo select test 1 + +--connection child2_1 +TRUNCATE TABLE mysql.general_log; + +--connection child2_2 +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +SELECT * FROM tbl_a; +SELECT * FROM tbl_a WHERE pkey = 1; +SELECT * FROM tbl_a; +SELECT * FROM tbl_a WHERE pkey = 2; +SELECT * FROM tbl_a; + +--connection child2_1 +eval $CHILD2_1_SELECT_ARGUMENT1; +eval $CHILD2_1_SELECT_TABLES; + +--connection child2_2 +eval $CHILD2_2_SELECT_ARGUMENT1; +eval $CHILD2_2_SELECT_TABLES; + +--echo +--echo deinit +--disable_warnings +--connection master_1 +DROP DATABASE IF EXISTS auto_test_local; + +--connection child2_1 +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; + +--connection child2_2 +DROP DATABASE IF EXISTS auto_test_remote2; +SET GLOBAL log_output = @old_log_output; + +--enable_warnings +--source ../include/mdev_19866_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.cnf b/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.cnf new file mode 100644 index 00000000000..05dfd8a0bce --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.cnf @@ -0,0 +1,3 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.test b/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.test new file mode 100644 index 00000000000..4c4d687cf89 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.test @@ -0,0 +1,74 @@ +--source ../include/select_with_backquote_init.inc +--echo +--echo this test is for MDEV-17204 +--echo +--echo drop and create databases + +--connection master_1 +--disable_warnings +CREATE DATABASE auto_test_local; +USE auto_test_local; + +--connection child2_1 +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +--enable_warnings + +--echo +--echo create table and insert + +--connection child2_1 +--disable_query_log +echo CHILD2_1_CREATE_TABLES; +eval $CHILD2_1_CREATE_TABLES; +--enable_query_log +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +--disable_query_log +echo CREATE TABLE tbl_a ( + pkey int NOT NULL, + txt_utf8 char(8) NOT NULL, + PRIMARY KEY (pkey) +) MASTER_1_ENGINE DEFAULT CHARACTER SET utf8 MASTER_1_COMMENT_2_1; +eval CREATE TABLE tbl_a ( + pkey int NOT NULL, + txt_utf8 char(8) NOT NULL, + PRIMARY KEY (pkey) +) $MASTER_1_ENGINE DEFAULT CHARACTER SET utf8 $MASTER_1_COMMENT_2_1; +--enable_query_log +INSERT INTO tbl_a (pkey,txt_utf8) VALUES (0,'01234567'),(1,'12345678'),(2,'23456789'),(3,'34567890'),(4,'45678901'),(5,'56789012'),(6,'67890123'),(7,'78901234'),(8,'89012345'),(9,'90123456'); +FLUSH TABLES; + +--echo +--echo test 1 + +--connection child2_1 +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +SET NAMES utf8; +SELECT `pkey`, LEFT(`txt_utf8`, 4) FROM `auto_test_local`.`tbl_a` ORDER BY LEFT(`txt_utf8`, 4) LIMIT 3; + +--connection child2_1 +SET NAMES utf8; +eval $CHILD2_1_SELECT_ARGUMENT1; +eval $CHILD2_1_SELECT_TABLES; + +--echo +--echo deinit +--disable_warnings + +--connection master_1 +DROP DATABASE IF EXISTS auto_test_local; + +--connection child2_1 +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; + +--enable_warnings +--source ../include/select_with_backquote_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/slave_transaction_retry_errors_5digit.cnf b/storage/spider/mysql-test/spider/bugfix/t/slave_transaction_retry_errors_5digit.cnf new file mode 100644 index 00000000000..c861d27b01d --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/slave_transaction_retry_errors_5digit.cnf @@ -0,0 +1,6 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf +!include ../my_4_1.cnf +[mysqld.4.1] +slave_transaction_retry_errors="10000,20000,30000" diff --git a/storage/spider/mysql-test/spider/bugfix/t/slave_transaction_retry_errors_5digit.test b/storage/spider/mysql-test/spider/bugfix/t/slave_transaction_retry_errors_5digit.test new file mode 100644 index 00000000000..fc91a43eeb0 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/slave_transaction_retry_errors_5digit.test @@ -0,0 +1,9 @@ +--source ../include/slave_transaction_retry_errors_5digit_init.inc +--echo + +--connection slave1_1 +SHOW VARIABLES LIKE 'slave_transaction_retry_errors'; + +--source ../include/slave_transaction_retry_errors_5digit_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/spider_table_sts.cnf b/storage/spider/mysql-test/spider/bugfix/t/spider_table_sts.cnf new file mode 100644 index 00000000000..b0853e32654 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/spider_table_sts.cnf @@ -0,0 +1,2 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/spider_table_sts.test b/storage/spider/mysql-test/spider/bugfix/t/spider_table_sts.test new file mode 100644 index 00000000000..7213017505b --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/spider_table_sts.test @@ -0,0 +1,46 @@ +--source ../include/spider_table_sts_init.inc +--echo +--echo this test is for MDEV-19842 +--echo +--echo drop and create databases + +--connection master_1 +--disable_warnings +CREATE DATABASE auto_test_local; +USE auto_test_local; +--enable_warnings + +--echo +--echo create table + +--connection master_1 +--disable_query_log +echo CREATE TABLE tbl_a ( + pkey int NOT NULL, + PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1; +eval CREATE TABLE tbl_a ( + pkey int NOT NULL, + PRIMARY KEY (pkey) +) $MASTER_1_ENGINE $MASTER_1_CHARSET $MASTER_1_COMMENT_2_1; +--enable_query_log + +--echo +--echo select test 1 + +--connection master_1 +--error 12609 +SELECT pkey FROM tbl_a; + +--echo +--echo deinit +--disable_warnings + +--connection master_1 +--error 12609 +DROP DATABASE IF EXISTS auto_test_local; + +--enable_warnings +--source ../include/spider_table_sts_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/sql_mode.inc b/storage/spider/mysql-test/spider/bugfix/t/sql_mode.inc new file mode 100644 index 00000000000..ae7c15c5081 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/sql_mode.inc @@ -0,0 +1,65 @@ +--echo +--echo this test is for MDEV-16508 +--echo +--echo drop and create databases + +--connection master_1 +--disable_warnings +CREATE DATABASE auto_test_local; +USE auto_test_local; + +--connection child2_1 +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +--enable_warnings + +--echo +--echo create table and insert + +--connection child2_1 +--disable_query_log +echo CHILD2_1_CREATE_TABLES; +eval $CHILD2_1_CREATE_TABLES; +--enable_query_log +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +--disable_query_log +echo CREATE TABLE tbl_a ( + pkey int NOT NULL, + PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1; +eval CREATE TABLE tbl_a ( + pkey int NOT NULL, + PRIMARY KEY (pkey) +) $MASTER_1_ENGINE $MASTER_1_CHARSET $MASTER_1_COMMENT_2_1; +--enable_query_log +INSERT INTO tbl_a (pkey) VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +--echo +--echo select test + +--connection child2_1 +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +SELECT * FROM tbl_a ORDER BY pkey; + +--connection child2_1 +eval $CHILD2_1_SELECT_ARGUMENT1; +eval $CHILD2_1_SELECT_TABLES; + +--echo +--echo deinit +--disable_warnings + +--connection master_1 +DROP DATABASE IF EXISTS auto_test_local; + +--connection child2_1 +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; + +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mariadb.cnf b/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mariadb.cnf new file mode 100644 index 00000000000..05dfd8a0bce --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mariadb.cnf @@ -0,0 +1,3 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mariadb.test b/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mariadb.test new file mode 100644 index 00000000000..c63514d22f0 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mariadb.test @@ -0,0 +1,5 @@ +--source ../include/sql_mode_mariadb_init.inc +--source sql_mode.inc +--source ../include/sql_mode_mariadb_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mysql.cnf b/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mysql.cnf new file mode 100644 index 00000000000..05dfd8a0bce --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mysql.cnf @@ -0,0 +1,3 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mysql.test b/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mysql.test new file mode 100644 index 00000000000..3a0f8d20da3 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/sql_mode_mysql.test @@ -0,0 +1,5 @@ +--source ../include/sql_mode_mysql_init.inc +--source sql_mode.inc +--source ../include/sql_mode_mysql_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/mysql-test/spider/include/deinit_spider.inc b/storage/spider/mysql-test/spider/include/deinit_spider.inc index 3609551e169..51cc075edaa 100644 --- a/storage/spider/mysql-test/spider/include/deinit_spider.inc +++ b/storage/spider/mysql-test/spider/include/deinit_spider.inc @@ -1,8 +1,39 @@ -DROP FUNCTION spider_direct_sql; -DROP FUNCTION spider_bg_direct_sql; -DROP FUNCTION spider_ping_table; -DROP FUNCTION spider_copy_tables; +let $SERVER_NAME= + `SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(version(), '-', 2), '-', -1)`; +let $SERVER_MAJOR_VERSION= + `SELECT SUBSTRING_INDEX(version(), '.', 1)`; +let $SERVER_MINOR_VERSION= + `SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(version(), '.', 2), '.', -1)`; +let $PLUGIN_VERSION= + `SELECT SUBSTRING_INDEX(plugin_version, '.', 1) + FROM information_schema.plugins + WHERE plugin_name = 'SPIDER'`; +if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) +{ + let $HAS_REWRITE= + `SELECT IF (STRCMP('$SERVER_NAME', 'MariaDB') = 0, + IF ($SERVER_MAJOR_VERSION = 10, + IF ($SERVER_MINOR_VERSION < 4, 0, 1), + IF ($SERVER_MAJOR_VERSION < 10, 0, 1)), + 0)`; + let $HAS_REWRITE= 0; + if ($HAS_REWRITE) + { + DROP FUNCTION spider_flush_rewrite_cache; + UNINSTALL PLUGIN spider_rewrite; + DROP TABLE IF EXISTS mysql.spider_rewrite_tables; + DROP TABLE IF EXISTS mysql.spider_rewrite_table_tables; + DROP TABLE IF EXISTS mysql.spider_rewrite_table_partitions; + DROP TABLE IF EXISTS mysql.spider_rewrite_table_subpartitions; + DROP TABLE IF EXISTS mysql.spider_rewritten_tables; + } +} DROP FUNCTION spider_flush_table_mon_cache; +DROP FUNCTION spider_copy_tables; +DROP FUNCTION spider_ping_table; +DROP FUNCTION spider_bg_direct_sql; +DROP FUNCTION spider_direct_sql; +UNINSTALL PLUGIN spider_alloc_mem; UNINSTALL PLUGIN spider; DROP TABLE IF EXISTS mysql.spider_xa; DROP TABLE IF EXISTS mysql.spider_xa_member; @@ -13,9 +44,57 @@ DROP TABLE IF EXISTS mysql.spider_link_failed_log; DROP TABLE IF EXISTS mysql.spider_table_position_for_recovery; DROP TABLE IF EXISTS mysql.spider_table_sts; DROP TABLE IF EXISTS mysql.spider_table_crd; -DROP SERVER s_2_1; -DROP SERVER s_2_2; -DROP SERVER s_2_3; -DROP SERVER s_3_1; -DROP SERVER s_3_2; -DROP SERVER s_3_3; +if ($VERSION_COMPILE_OS_WIN) +{ + if ($CHILD2_1_MYPORT) + { + DROP SERVER s_2_1; + } + if ($CHILD2_2_MYPORT) + { + DROP SERVER s_2_2; + } + if ($CHILD2_3_MYPORT) + { + DROP SERVER s_2_3; + } + if ($CHILD3_1_MYPORT) + { + DROP SERVER s_3_1; + } + if ($CHILD3_2_MYPORT) + { + DROP SERVER s_3_2; + } + if ($CHILD2_3_MYPORT) + { + DROP SERVER s_3_3; + } +} +if (!$VERSION_COMPILE_OS_WIN) +{ + if ($CHILD2_1_MYSOCK) + { + DROP SERVER s_2_1; + } + if ($CHILD2_2_MYSOCK) + { + DROP SERVER s_2_2; + } + if ($CHILD2_3_MYSOCK) + { + DROP SERVER s_2_3; + } + if ($CHILD3_1_MYSOCK) + { + DROP SERVER s_3_1; + } + if ($CHILD3_2_MYSOCK) + { + DROP SERVER s_3_2; + } + if ($CHILD3_3_MYSOCK) + { + DROP SERVER s_3_3; + } +} diff --git a/storage/spider/mysql-test/spider/include/init_spider.inc b/storage/spider/mysql-test/spider/include/init_spider.inc index c1167b3c37e..69d1fae425e 100644 --- a/storage/spider/mysql-test/spider/include/init_spider.inc +++ b/storage/spider/mysql-test/spider/include/init_spider.inc @@ -3,104 +3,130 @@ let $VERSION_COMPILE_OS_WIN= if ($VERSION_COMPILE_OS_WIN) { INSTALL PLUGIN spider SONAME 'ha_spider.dll'; - CREATE FUNCTION spider_direct_sql RETURNS INT SONAME 'ha_spider.dll'; - CREATE AGGREGATE FUNCTION spider_bg_direct_sql RETURNS INT SONAME 'ha_spider.dll'; - CREATE FUNCTION spider_ping_table RETURNS INT SONAME 'ha_spider.dll'; - CREATE FUNCTION spider_copy_tables RETURNS INT SONAME 'ha_spider.dll'; - CREATE FUNCTION spider_flush_table_mon_cache RETURNS INT SONAME 'ha_spider.dll'; - eval CREATE SERVER s_2_1 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_remote', - USER 'root', - PASSWORD '', - PORT $CHILD2_1_MYPORT - ); - eval CREATE SERVER s_2_2 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_remote2', - USER 'root', - PASSWORD '', - PORT $CHILD2_2_MYPORT - ); - eval CREATE SERVER s_2_3 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_remote3', - USER 'root', - PASSWORD '', - PORT $CHILD2_3_MYPORT - ); - eval CREATE SERVER s_3_1 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_local', - USER 'root', - PASSWORD '', - PORT $CHILD3_1_MYPORT - ); - eval CREATE SERVER s_3_2 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_local', - USER 'root', - PASSWORD '', - PORT $CHILD3_2_MYPORT - ); - eval CREATE SERVER s_3_3 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_local', - USER 'root', - PASSWORD '', - PORT $CHILD2_3_MYPORT - ); + if ($CHILD2_1_MYPORT) + { + eval CREATE SERVER s_2_1 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_remote', + USER 'root', + PASSWORD '', + PORT $CHILD2_1_MYPORT + ); + } + if ($CHILD2_2_MYPORT) + { + eval CREATE SERVER s_2_2 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_remote2', + USER 'root', + PASSWORD '', + PORT $CHILD2_2_MYPORT + ); + } + if ($CHILD2_3_MYPORT) + { + eval CREATE SERVER s_2_3 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_remote3', + USER 'root', + PASSWORD '', + PORT $CHILD2_3_MYPORT + ); + } + if ($CHILD3_1_MYPORT) + { + eval CREATE SERVER s_3_1 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_local', + USER 'root', + PASSWORD '', + PORT $CHILD3_1_MYPORT + ); + } + if ($CHILD3_2_MYPORT) + { + eval CREATE SERVER s_3_2 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_local', + USER 'root', + PASSWORD '', + PORT $CHILD3_2_MYPORT + ); + } + if ($CHILD2_3_MYPORT) + { + eval CREATE SERVER s_3_3 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_local', + USER 'root', + PASSWORD '', + PORT $CHILD2_3_MYPORT + ); + } } if (!$VERSION_COMPILE_OS_WIN) { INSTALL PLUGIN spider SONAME 'ha_spider.so'; - CREATE FUNCTION spider_direct_sql RETURNS INT SONAME 'ha_spider.so'; - CREATE AGGREGATE FUNCTION spider_bg_direct_sql RETURNS INT SONAME 'ha_spider.so'; - CREATE FUNCTION spider_ping_table RETURNS INT SONAME 'ha_spider.so'; - CREATE FUNCTION spider_copy_tables RETURNS INT SONAME 'ha_spider.so'; - CREATE FUNCTION spider_flush_table_mon_cache RETURNS INT SONAME 'ha_spider.so'; - eval CREATE SERVER s_2_1 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_remote', - USER 'root', - PASSWORD '', - SOCKET '$CHILD2_1_MYSOCK' - ); - eval CREATE SERVER s_2_2 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_remote2', - USER 'root', - PASSWORD '', - SOCKET '$CHILD2_2_MYSOCK' - ); - eval CREATE SERVER s_2_3 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_remote3', - USER 'root', - PASSWORD '', - SOCKET '$CHILD2_3_MYSOCK' - ); - eval CREATE SERVER s_3_1 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_local', - USER 'root', - PASSWORD '', - SOCKET '$CHILD3_1_MYSOCK' - ); - eval CREATE SERVER s_3_2 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_local', - USER 'root', - PASSWORD '', - SOCKET '$CHILD3_2_MYSOCK' - ); - eval CREATE SERVER s_3_3 FOREIGN DATA WRAPPER mysql OPTIONS ( - HOST 'localhost', - DATABASE 'auto_test_local', - USER 'root', - PASSWORD '', - SOCKET '$CHILD3_3_MYSOCK' - ); + if ($CHILD2_1_MYSOCK) + { + eval CREATE SERVER s_2_1 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_remote', + USER 'root', + PASSWORD '', + SOCKET '$CHILD2_1_MYSOCK' + ); + } + if ($CHILD2_2_MYSOCK) + { + eval CREATE SERVER s_2_2 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_remote2', + USER 'root', + PASSWORD '', + SOCKET '$CHILD2_2_MYSOCK' + ); + } + if ($CHILD2_3_MYSOCK) + { + eval CREATE SERVER s_2_3 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_remote3', + USER 'root', + PASSWORD '', + SOCKET '$CHILD2_3_MYSOCK' + ); + } + if ($CHILD3_1_MYSOCK) + { + eval CREATE SERVER s_3_1 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_local', + USER 'root', + PASSWORD '', + SOCKET '$CHILD3_1_MYSOCK' + ); + } + if ($CHILD3_2_MYSOCK) + { + eval CREATE SERVER s_3_2 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_local', + USER 'root', + PASSWORD '', + SOCKET '$CHILD3_2_MYSOCK' + ); + } + if ($CHILD3_3_MYSOCK) + { + eval CREATE SERVER s_3_3 FOREIGN DATA WRAPPER mysql OPTIONS ( + HOST 'localhost', + DATABASE 'auto_test_local', + USER 'root', + PASSWORD '', + SOCKET '$CHILD3_3_MYSOCK' + ); + } } let $SERVER_NAME= @@ -113,314 +139,34 @@ let $PLUGIN_VERSION= `SELECT SUBSTRING_INDEX(plugin_version, '.', 1) FROM information_schema.plugins WHERE plugin_name = 'SPIDER'`; - -if (`SELECT IF($PLUGIN_VERSION = 1, 1, 0)`) -{ - DROP TABLE IF EXISTS mysql.spider_xa; - CREATE TABLE mysql.spider_xa( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data char(128) charset binary not null default '', - status char(8) not null default '', - PRIMARY KEY (data, format_id, gtrid_length), - KEY idx1 (status) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_xa_member; - CREATE TABLE mysql.spider_xa_member( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data char(128) charset binary not null default '', - scheme char(64) not null default '', - host char(64) not null default '', - port char(5) not null default '', - socket char(64) not null default '', - username char(64) not null default '', - password char(64) not null default '', - PRIMARY KEY (data, format_id, gtrid_length, host, port, socket) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_tables; - CREATE TABLE mysql.spider_tables( - db_name char(64) not null default '', - table_name char(64) not null default '', - priority bigint not null default 0, - server char(64) default null, - scheme char(64) default null, - host char(64) default null, - port char(5) default null, - socket char(64) default null, - username char(64) default null, - password char(64) default null, - tgt_db_name char(64) default null, - tgt_table_name char(64) default null, - PRIMARY KEY (db_name, table_name), - KEY idx1 (priority) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; -} -if (`SELECT IF($PLUGIN_VERSION = 2, 1, 0)`) -{ - DROP TABLE IF EXISTS mysql.spider_xa; - CREATE TABLE mysql.spider_xa( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data char(128) charset binary not null default '', - status char(8) not null default '', - PRIMARY KEY (data, format_id, gtrid_length), - KEY idx1 (status) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_xa_member; - CREATE TABLE mysql.spider_xa_member( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data char(128) charset binary not null default '', - scheme char(64) not null default '', - host char(64) not null default '', - port char(5) not null default '', - socket char(64) not null default '', - username char(64) not null default '', - password char(64) not null default '', - ssl_ca char(64) default null, - ssl_capath char(64) default null, - ssl_cert char(64) default null, - ssl_cipher char(64) default null, - ssl_key char(64) default null, - ssl_verify_server_cert tinyint not null default 0, - default_file char(64) default null, - default_group char(64) default null, - PRIMARY KEY (data, format_id, gtrid_length, host, port, socket) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_tables; - CREATE TABLE mysql.spider_tables( - db_name char(64) not null default '', - table_name char(64) not null default '', - link_id int not null default 0, - priority bigint not null default 0, - server char(64) default null, - scheme char(64) default null, - host char(64) default null, - port char(5) default null, - socket char(64) default null, - username char(64) default null, - password char(64) default null, - ssl_ca char(64) default null, - ssl_capath char(64) default null, - ssl_cert char(64) default null, - ssl_cipher char(64) default null, - ssl_key char(64) default null, - ssl_verify_server_cert tinyint not null default 0, - default_file char(64) default null, - default_group char(64) default null, - tgt_db_name char(64) default null, - tgt_table_name char(64) default null, - link_status tinyint not null default 1, - PRIMARY KEY (db_name, table_name, link_id), - KEY idx1 (priority) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_link_mon_servers; - CREATE TABLE mysql.spider_link_mon_servers( - db_name char(64) not null default '', - table_name char(64) not null default '', - link_id char(5) not null default '', - sid int not null default 0, - server char(64) default null, - scheme char(64) default null, - host char(64) default null, - port char(5) default null, - socket char(64) default null, - username char(64) default null, - password char(64) default null, - ssl_ca char(64) default null, - ssl_capath char(64) default null, - ssl_cert char(64) default null, - ssl_cipher char(64) default null, - ssl_key char(64) default null, - ssl_verify_server_cert tinyint not null default 0, - default_file char(64) default null, - default_group char(64) default null, - PRIMARY KEY (db_name, table_name, link_id, sid) - ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_link_failed_log; - CREATE TABLE mysql.spider_link_failed_log( - db_name char(64) not null default '', - table_name char(64) not null default '', - link_id int not null default 0, - failed_time timestamp not null default current_timestamp - ) ENGINE=MYISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; -} if (`SELECT IF($PLUGIN_VERSION = 3, 1, 0)`) { - let $ENGINE_NAME= + let $HAS_REWRITE= `SELECT IF (STRCMP('$SERVER_NAME', 'MariaDB') = 0, IF ($SERVER_MAJOR_VERSION = 10, - IF ($SERVER_MINOR_VERSION < 4, 'MyISAM', - 'Aria transactional=1'), - IF ($SERVER_MAJOR_VERSION < 10, 'MyISAM', - 'Aria transactional=1')), - 'MyISAM')`; - DROP TABLE IF EXISTS mysql.spider_xa; - eval - CREATE TABLE mysql.spider_xa( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data binary(128) not null default '', - status char(8) not null default '', - PRIMARY KEY (data, format_id, gtrid_length), - KEY idx1 (status) - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_xa_member; - eval - CREATE TABLE mysql.spider_xa_member( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data binary(128) not null default '', - scheme char(64) not null default '', - host char(64) not null default '', - port char(5) not null default '', - socket text not null default '', - username char(64) not null default '', - password char(64) not null default '', - ssl_ca text default null, - ssl_capath text default null, - ssl_cert text default null, - ssl_cipher char(64) default null, - ssl_key text default null, - ssl_verify_server_cert tinyint not null default 0, - default_file text default null, - default_group char(64) default null, - KEY idx1 (data, format_id, gtrid_length, host) - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_xa_failed_log; - eval - CREATE TABLE mysql.spider_xa_failed_log( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data binary(128) not null default '', - scheme char(64) not null default '', - host char(64) not null default '', - port char(5) not null default '', - socket text not null, - username char(64) not null default '', - password char(64) not null default '', - ssl_ca text, - ssl_capath text, - ssl_cert text, - ssl_cipher char(64) default null, - ssl_key text, - ssl_verify_server_cert tinyint not null default 0, - default_file text, - default_group char(64) default null, - thread_id int default null, - status char(8) not null default '', - failed_time timestamp not null default current_timestamp, - key idx1 (data, format_id, gtrid_length, host) - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_tables; - eval - CREATE TABLE mysql.spider_tables( - db_name char(64) not null default '', - table_name char(199) not null default '', - link_id int not null default 0, - priority bigint not null default 0, - server char(64) default null, - scheme char(64) default null, - host char(64) default null, - port char(5) default null, - socket text default null, - username char(64) default null, - password char(64) default null, - ssl_ca text default null, - ssl_capath text default null, - ssl_cert text default null, - ssl_cipher char(64) default null, - ssl_key text default null, - ssl_verify_server_cert tinyint not null default 0, - monitoring_binlog_pos_at_failing tinyint not null default 0, - default_file text default null, - default_group char(64) default null, - tgt_db_name char(64) default null, - tgt_table_name char(64) default null, - link_status tinyint not null default 1, - block_status tinyint not null default 0, - static_link_id char(64) default null, - PRIMARY KEY (db_name, table_name, link_id), - KEY idx1 (priority), - UNIQUE KEY uidx1 (db_name, table_name, static_link_id) - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_link_mon_servers; - eval - CREATE TABLE mysql.spider_link_mon_servers( - db_name char(64) not null default '', - table_name char(199) not null default '', - link_id char(64) not null default '', - sid int unsigned not null default 0, - server char(64) default null, - scheme char(64) default null, - host char(64) default null, - port char(5) default null, - socket text default null, - username char(64) default null, - password char(64) default null, - ssl_ca text default null, - ssl_capath text default null, - ssl_cert text default null, - ssl_cipher char(64) default null, - ssl_key text default null, - ssl_verify_server_cert tinyint not null default 0, - default_file text default null, - default_group char(64) default null, - PRIMARY KEY (db_name, table_name, link_id, sid) - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_link_failed_log; - eval - CREATE TABLE mysql.spider_link_failed_log( - db_name char(64) not null default '', - table_name char(199) not null default '', - link_id char(64) not null default '', - failed_time timestamp not null default current_timestamp - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_table_position_for_recovery; - eval - CREATE TABLE mysql.spider_table_position_for_recovery( - db_name char(64) not null default '', - table_name char(199) not null default '', - failed_link_id int not null default 0, - source_link_id int not null default 0, - file text, - position text, - gtid text, - primary key (db_name, table_name, failed_link_id, source_link_id) - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_table_sts; - eval - CREATE TABLE mysql.spider_table_sts( - db_name char(64) not null default '', - table_name char(199) not null default '', - data_file_length bigint unsigned not null default 0, - max_data_file_length bigint unsigned not null default 0, - index_file_length bigint unsigned not null default 0, - records bigint unsigned not null default 0, - mean_rec_length bigint unsigned not null default 0, - check_time datetime not null default '0000-00-00 00:00:00', - create_time datetime not null default '0000-00-00 00:00:00', - update_time datetime not null default '0000-00-00 00:00:00', - checksum bigint unsigned default null, - primary key (db_name, table_name) - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; - DROP TABLE IF EXISTS mysql.spider_table_crd; - eval - CREATE TABLE mysql.spider_table_crd( - db_name char(64) not null default '', - table_name char(199) not null default '', - key_seq int unsigned not null default 0, - cardinality bigint not null default 0, - primary key (db_name, table_name, key_seq) - ) ENGINE=$ENGINE_NAME DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + IF ($SERVER_MINOR_VERSION < 4, 0, 1), + IF ($SERVER_MAJOR_VERSION < 10, 0, 1)), + 0)`; + let $HAS_REWRITE= 0; + if ($HAS_REWRITE) + { + let $PLUGIN_NAME= spider_flush_rewrite_cache; + let $PLUGIN_EXIST= + `SELECT COUNT(*) FROM mysql.func WHERE name = '$PLUGIN_NAME'`; + while (!$PLUGIN_EXIST) + { + let $PLUGIN_EXIST= + `SELECT COUNT(*) FROM mysql.func WHERE name = '$PLUGIN_NAME'`; + } + } +} +let $PLUGIN_NAME= spider_flush_table_mon_cache; +let $PLUGIN_EXIST= + `SELECT COUNT(*) FROM mysql.func WHERE name = '$PLUGIN_NAME'`; +while (!$PLUGIN_EXIST) +{ + let $PLUGIN_EXIST= + `SELECT COUNT(*) FROM mysql.func WHERE name = '$PLUGIN_NAME'`; } SET spider_internal_sql_log_off= 0; diff --git a/storage/spider/mysql-test/spider/r/slave_trx_isolation.result b/storage/spider/mysql-test/spider/r/slave_trx_isolation.result index 4fd2e71d3f2..e68b4a2c82d 100644 --- a/storage/spider/mysql-test/spider/r/slave_trx_isolation.result +++ b/storage/spider/mysql-test/spider/r/slave_trx_isolation.result @@ -53,7 +53,7 @@ SELECT argument FROM mysql.general_log WHERE argument LIKE '%set %'; argument set session time_zone = '+00:00' SET NAMES utf8 -set session transaction isolation level read committed;set session autocommit = 1;set session wait_timeout = 604800;start transaction +set session transaction isolation level read committed;set session autocommit = 1;set session wait_timeout = 604800;set session sql_mode = 'strict_trans_tables,error_for_division_by_zero,no_auto_create_user,no_engine_substitution';start transaction SELECT argument FROM mysql.general_log WHERE argument LIKE '%set %' SELECT pkey FROM tbl_a ORDER BY pkey; pkey diff --git a/storage/spider/scripts/install_spider.sql b/storage/spider/scripts/install_spider.sql index 30c92e6c710..403bd99fd68 100644 --- a/storage/spider/scripts/install_spider.sql +++ b/storage/spider/scripts/install_spider.sql @@ -1,4 +1,4 @@ -# Copyright (C) 2010-2018 Kentoku Shiba +# Copyright (C) 2010-2019 Kentoku Shiba # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,488 +13,6 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA -# This SQL script creates system tables for SPIDER -# or fixes incompatibilities if ones already exist. - --- Create system tables if not exist -create table if not exists mysql.spider_xa( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data char(128) charset binary not null default '', - status char(8) not null default '', - primary key (data, format_id, gtrid_length), - key idx1 (status) -) engine=MyISAM default charset=utf8 collate=utf8_bin; -create table if not exists mysql.spider_xa_member( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data char(128) charset binary not null default '', - scheme char(64) not null default '', - host char(64) not null default '', - port char(5) not null default '', - socket text not null, - username char(64) not null default '', - password char(64) not null default '', - ssl_ca text, - ssl_capath text, - ssl_cert text, - ssl_cipher char(64) default null, - ssl_key text, - ssl_verify_server_cert tinyint not null default 0, - default_file text, - default_group char(64) default null, - key idx1 (data, format_id, gtrid_length, host) -) engine=MyISAM default charset=utf8 collate=utf8_bin; -create table if not exists mysql.spider_xa_failed_log( - format_id int not null default 0, - gtrid_length int not null default 0, - bqual_length int not null default 0, - data char(128) charset binary not null default '', - scheme char(64) not null default '', - host char(64) not null default '', - port char(5) not null default '', - socket text not null, - username char(64) not null default '', - password char(64) not null default '', - ssl_ca text, - ssl_capath text, - ssl_cert text, - ssl_cipher char(64) default null, - ssl_key text, - ssl_verify_server_cert tinyint not null default 0, - default_file text, - default_group char(64) default null, - thread_id int default null, - status char(8) not null default '', - failed_time timestamp not null default current_timestamp, - key idx1 (data, format_id, gtrid_length, host) -) engine=MyISAM default charset=utf8 collate=utf8_bin; -create table if not exists mysql.spider_tables( - db_name char(64) not null default '', - table_name char(199) not null default '', - link_id int not null default 0, - priority bigint not null default 0, - server char(64) default null, - scheme char(64) default null, - host char(64) default null, - port char(5) default null, - socket text, - username char(64) default null, - password char(64) default null, - ssl_ca text, - ssl_capath text, - ssl_cert text, - ssl_cipher char(64) default null, - ssl_key text, - ssl_verify_server_cert tinyint not null default 0, - monitoring_binlog_pos_at_failing tinyint not null default 0, - default_file text, - default_group char(64) default null, - tgt_db_name char(64) default null, - tgt_table_name char(64) default null, - link_status tinyint not null default 1, - block_status tinyint not null default 0, - static_link_id char(64) default null, - primary key (db_name, table_name, link_id), - key idx1 (priority), - unique key uidx1 (db_name, table_name, static_link_id) -) engine=MyISAM default charset=utf8 collate=utf8_bin; -create table if not exists mysql.spider_link_mon_servers( - db_name char(64) not null default '', - table_name char(199) not null default '', - link_id char(64) not null default '', - sid int unsigned not null default 0, - server char(64) default null, - scheme char(64) default null, - host char(64) default null, - port char(5) default null, - socket text, - username char(64) default null, - password char(64) default null, - ssl_ca text, - ssl_capath text, - ssl_cert text, - ssl_cipher char(64) default null, - ssl_key text, - ssl_verify_server_cert tinyint not null default 0, - default_file text, - default_group char(64) default null, - primary key (db_name, table_name, link_id, sid) -) engine=MyISAM default charset=utf8 collate=utf8_bin; -create table if not exists mysql.spider_link_failed_log( - db_name char(64) not null default '', - table_name char(199) not null default '', - link_id char(64) not null default '', - failed_time timestamp not null default current_timestamp -) engine=MyISAM default charset=utf8 collate=utf8_bin; -create table if not exists mysql.spider_table_position_for_recovery( - db_name char(64) not null default '', - table_name char(199) not null default '', - failed_link_id int not null default 0, - source_link_id int not null default 0, - file text, - position text, - gtid text, - primary key (db_name, table_name, failed_link_id, source_link_id) -) engine=MyISAM default charset=utf8 collate=utf8_bin; -create table if not exists mysql.spider_table_sts( - db_name char(64) not null default '', - table_name char(199) not null default '', - data_file_length bigint unsigned not null default 0, - max_data_file_length bigint unsigned not null default 0, - index_file_length bigint unsigned not null default 0, - records bigint unsigned not null default 0, - mean_rec_length bigint unsigned not null default 0, - check_time datetime not null default '0000-00-00 00:00:00', - create_time datetime not null default '0000-00-00 00:00:00', - update_time datetime not null default '0000-00-00 00:00:00', - checksum bigint unsigned default null, - primary key (db_name, table_name) -) engine=MyISAM default charset=utf8 collate=utf8_bin; -create table if not exists mysql.spider_table_crd( - db_name char(64) not null default '', - table_name char(199) not null default '', - key_seq int unsigned not null default 0, - cardinality bigint not null default 0, - primary key (db_name, table_name, key_seq) -) engine=MyISAM default charset=utf8 collate=utf8_bin; - --- If tables already exist and their definition differ from the latest ones, --- we fix them here. -drop procedure if exists mysql.spider_fix_one_table; -drop procedure if exists mysql.spider_fix_system_tables; -delimiter // -create procedure mysql.spider_fix_one_table - (tab_name char(255) charset utf8 collate utf8_bin, - test_col_name char(255) charset utf8 collate utf8_bin, - _sql text charset utf8 collate utf8_bin) -begin - set @col_exists := 0; - select 1 into @col_exists from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = tab_name - AND COLUMN_NAME = test_col_name; - if @col_exists = 0 then - select @stmt := _sql; - prepare sp_stmt1 from @stmt; - execute sp_stmt1; - end if; -end;// - -create procedure mysql.spider_fix_system_tables() -begin - select substring_index(substring_index(version(), '-', 2), '-', -1) - into @server_name; - select substring_index(version(), '.', 1) - into @server_major_version; - select substring_index(substring_index(version(), '.', 2), '.', -1) - into @server_minor_version; - - -- Fix for 0.5 - call mysql.spider_fix_one_table('spider_tables', 'server', - 'alter table mysql.spider_tables - add server char(64) default null, - add scheme char(64) default null, - add host char(64) default null, - add port char(5) default null, - add socket char(64) default null, - add username char(64) default null, - add password char(64) default null, - add tgt_db_name char(64) default null, - add tgt_table_name char(64) default null'); - - -- Fix for version 0.17 - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_xa' - AND COLUMN_NAME = 'data'; - if @col_type != 'binary(128)' then - alter table mysql.spider_xa modify data binary(128) not null default ''; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_xa_member' - AND COLUMN_NAME = 'data'; - if @col_type != 'binary(128)' then - alter table mysql.spider_xa_member modify data binary(128) not null default ''; - end if; - - -- Fix for version 2.7 - call mysql.spider_fix_one_table('spider_tables', 'link_id', - 'alter table mysql.spider_tables - add column link_id int not null default 0 after table_name, - drop primary key, - add primary key (db_name, table_name, link_id)'); - - -- Fix for version 2.8 - call mysql.spider_fix_one_table('spider_tables', 'link_status', - 'alter table mysql.spider_tables - add column link_status tinyint not null default 1'); - - -- Fix for version 2.10 - call mysql.spider_fix_one_table('spider_xa_member', 'ssl_ca', - 'alter table mysql.spider_xa_member - add column ssl_ca char(64) default null after password, - add column ssl_capath char(64) default null after ssl_ca, - add column ssl_cert char(64) default null after ssl_capath, - add column ssl_cipher char(64) default null after ssl_cert, - add column ssl_key char(64) default null after ssl_cipher, - add column ssl_verify_server_cert tinyint not null default 0 after ssl_key, - add column default_file char(64) default null after ssl_verify_server_cert, - add column default_group char(64) default null after default_file'); - call mysql.spider_fix_one_table('spider_tables', 'ssl_ca', - 'alter table mysql.spider_tables - add column ssl_ca char(64) default null after password, - add column ssl_capath char(64) default null after ssl_ca, - add column ssl_cert char(64) default null after ssl_capath, - add column ssl_cipher char(64) default null after ssl_cert, - add column ssl_key char(64) default null after ssl_cipher, - add column ssl_verify_server_cert tinyint not null default 0 after ssl_key, - add column default_file char(64) default null after ssl_verify_server_cert, - add column default_group char(64) default null after default_file'); - call mysql.spider_fix_one_table('spider_link_mon_servers', 'ssl_ca', - 'alter table mysql.spider_link_mon_servers - add column ssl_ca char(64) default null after password, - add column ssl_capath char(64) default null after ssl_ca, - add column ssl_cert char(64) default null after ssl_capath, - add column ssl_cipher char(64) default null after ssl_cert, - add column ssl_key char(64) default null after ssl_cipher, - add column ssl_verify_server_cert tinyint not null default 0 after ssl_key, - add column default_file char(64) default null after ssl_verify_server_cert, - add column default_group char(64) default null after default_file'); - - -- Fix for version 2.25 - -- select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - -- where TABLE_SCHEMA = 'mysql' - -- AND TABLE_NAME = 'spider_link_mon_servers' - -- AND COLUMN_NAME = 'link_id'; - -- if @col_type != 'char(5)' then - -- alter table mysql.spider_link_mon_servers - -- modify link_id char(5) not null default ''; - -- end if; - - -- Fix for version 2.28 - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_link_mon_servers' - AND COLUMN_NAME = 'sid'; - if @col_type != 'int(10) unsigned' then - alter table mysql.spider_link_mon_servers - modify sid int unsigned not null default 0; - end if; - - -- Fix for version 3.1 - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_xa_member' - AND COLUMN_NAME = 'socket'; - if @col_type = 'char(64)' then - alter table mysql.spider_xa_member - drop primary key, - add index idx1 (data, format_id, gtrid_length, host), - modify socket text not null, - modify ssl_ca text, - modify ssl_capath text, - modify ssl_cert text, - modify ssl_key text, - modify default_file text; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_tables' - AND COLUMN_NAME = 'socket'; - if @col_type = 'char(64)' then - alter table mysql.spider_tables - modify socket text, - modify ssl_ca text, - modify ssl_capath text, - modify ssl_cert text, - modify ssl_key text, - modify default_file text; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_link_mon_servers' - AND COLUMN_NAME = 'socket'; - if @col_type = 'char(64)' then - alter table mysql.spider_link_mon_servers - modify socket text, - modify ssl_ca text, - modify ssl_capath text, - modify ssl_cert text, - modify ssl_key text, - modify default_file text; - end if; - - -- Fix for version 3.3.0 - call mysql.spider_fix_one_table('spider_tables', - 'monitoring_binlog_pos_at_failing', - 'alter table mysql.spider_tables - add monitoring_binlog_pos_at_failing tinyint not null default 0 after ssl_verify_server_cert'); - - -- Fix for version 3.3.6 - call mysql.spider_fix_one_table('spider_tables', 'block_status', - 'alter table mysql.spider_tables - add column block_status tinyint not null default 0 after link_status'); - call mysql.spider_fix_one_table('spider_tables', 'static_link_id', - 'alter table mysql.spider_tables - add column static_link_id char(64) default null after block_status, - add unique index uidx1 (db_name, table_name, static_link_id)'); - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_link_mon_servers' - AND COLUMN_NAME = 'link_id'; - if @col_type != 'char(64)' then - alter table mysql.spider_link_mon_servers - modify link_id char(64) not null default ''; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_link_failed_log' - AND COLUMN_NAME = 'link_id'; - if @col_type != 'char(64)' then - alter table mysql.spider_link_failed_log - modify link_id char(64) not null default ''; - end if; - - -- Fix for version 3.3.10 - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_tables' - AND COLUMN_NAME = 'table_name'; - if @col_type != 'char(199)' then - alter table mysql.spider_tables - modify table_name char(199) not null default ''; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_link_mon_servers' - AND COLUMN_NAME = 'table_name'; - if @col_type != 'char(199)' then - alter table mysql.spider_link_mon_servers - modify table_name char(199) not null default ''; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_link_failed_log' - AND COLUMN_NAME = 'table_name'; - if @col_type != 'char(199)' then - alter table mysql.spider_link_failed_log - modify table_name char(199) not null default ''; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_table_position_for_recovery' - AND COLUMN_NAME = 'table_name'; - if @col_type != 'char(199)' then - alter table mysql.spider_table_position_for_recovery - modify table_name char(199) not null default ''; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_table_sts' - AND COLUMN_NAME = 'table_name'; - if @col_type != 'char(199)' then - alter table mysql.spider_table_sts - modify table_name char(199) not null default ''; - end if; - select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_table_crd' - AND COLUMN_NAME = 'table_name'; - if @col_type != 'char(199)' then - alter table mysql.spider_table_crd - modify table_name char(199) not null default ''; - end if; - - -- Fix for version 3.3.15 - call mysql.spider_fix_one_table('spider_table_sts', 'checksum', - 'alter table mysql.spider_table_sts - add column checksum bigint unsigned default null after update_time'); - - -- Fix for MariaDB 10.4: Crash-Safe system tables - if @server_name = 'MariaDB' and - ( - @server_major_version > 10 or - ( - @server_major_version = 10 and - @server_minor_version >= 4 - ) - ) - then - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_link_failed_log'; - if @engine_name != 'Aria' then - alter table mysql.spider_link_failed_log - engine=Aria transactional=1; - end if; - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_link_mon_servers'; - if @engine_name != 'Aria' then - alter table mysql.spider_link_mon_servers - engine=Aria transactional=1; - end if; - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_table_crd'; - if @engine_name != 'Aria' then - alter table mysql.spider_table_crd - engine=Aria transactional=1; - end if; - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_table_position_for_recovery'; - if @engine_name != 'Aria' then - alter table mysql.spider_table_position_for_recovery - engine=Aria transactional=1; - end if; - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_table_sts'; - if @engine_name != 'Aria' then - alter table mysql.spider_table_sts - engine=Aria transactional=1; - end if; - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_tables'; - if @engine_name != 'Aria' then - alter table mysql.spider_tables - engine=Aria transactional=1; - end if; - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_xa'; - if @engine_name != 'Aria' then - alter table mysql.spider_xa - engine=Aria transactional=1; - end if; - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_xa_failed_log'; - if @engine_name != 'Aria' then - alter table mysql.spider_xa_failed_log - engine=Aria transactional=1; - end if; - select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES - where TABLE_SCHEMA = 'mysql' - AND TABLE_NAME = 'spider_xa_member'; - if @engine_name != 'Aria' then - alter table mysql.spider_xa_member - engine=Aria transactional=1; - end if; - end if; -end;// -delimiter ; -call mysql.spider_fix_system_tables; -drop procedure mysql.spider_fix_one_table; -drop procedure mysql.spider_fix_system_tables; - --- Install a plugin and UDFs drop procedure if exists mysql.spider_plugin_installer; delimiter // create procedure mysql.spider_plugin_installer() @@ -518,71 +36,6 @@ begin install plugin spider soname 'ha_spider.dll'; end if; end if; - set @have_spider_i_s_alloc_mem_plugin := 0; - select @have_spider_i_s_alloc_mem_plugin := 1 from INFORMATION_SCHEMA.plugins where PLUGIN_NAME = 'SPIDER_ALLOC_MEM'; - set @have_spider_alloc_mem_plugin := 0; - select @have_spider_alloc_mem_plugin := 1 from mysql.plugin where name = 'spider_alloc_mem'; - if @have_spider_i_s_alloc_mem_plugin = 0 then - if @have_spider_alloc_mem_plugin = 1 then - -- spider_alloc_mem plugin is present in mysql.plugin but not in - -- information_schema.plugins. Remove spider_alloc_mem plugin entry - -- in mysql.plugin first. - delete from mysql.plugin where name = 'spider_alloc_mem'; - end if; - -- Install spider_alloc_mem plugin - if @win_plugin = 0 then - install plugin spider_alloc_mem soname 'ha_spider.so'; - else - install plugin spider_alloc_mem soname 'ha_spider.dll'; - end if; - end if; - set @have_spider_direct_sql_udf := 0; - select @have_spider_direct_sql_udf := 1 from mysql.func where name = 'spider_direct_sql'; - if @have_spider_direct_sql_udf = 0 then - if @win_plugin = 0 then - create function spider_direct_sql returns int soname 'ha_spider.so'; - else - create function spider_direct_sql returns int soname 'ha_spider.dll'; - end if; - end if; - set @have_spider_bg_direct_sql_udf := 0; - select @have_spider_bg_direct_sql_udf := 1 from mysql.func where name = 'spider_bg_direct_sql'; - if @have_spider_bg_direct_sql_udf = 0 then - if @win_plugin = 0 then - create aggregate function spider_bg_direct_sql returns int soname 'ha_spider.so'; - else - create aggregate function spider_bg_direct_sql returns int soname 'ha_spider.dll'; - end if; - end if; - set @have_spider_ping_table_udf := 0; - select @have_spider_ping_table_udf := 1 from mysql.func where name = 'spider_ping_table'; - if @have_spider_ping_table_udf = 0 then - if @win_plugin = 0 then - create function spider_ping_table returns int soname 'ha_spider.so'; - else - create function spider_ping_table returns int soname 'ha_spider.dll'; - end if; - end if; - set @have_spider_copy_tables_udf := 0; - select @have_spider_copy_tables_udf := 1 from mysql.func where name = 'spider_copy_tables'; - if @have_spider_copy_tables_udf = 0 then - if @win_plugin = 0 then - create function spider_copy_tables returns int soname 'ha_spider.so'; - else - create function spider_copy_tables returns int soname 'ha_spider.dll'; - end if; - end if; - - set @have_spider_flush_table_mon_cache_udf := 0; - select @have_spider_flush_table_mon_cache_udf := 1 from mysql.func where name = 'spider_flush_table_mon_cache'; - if @have_spider_flush_table_mon_cache_udf = 0 then - if @win_plugin = 0 then - create function spider_flush_table_mon_cache returns int soname 'ha_spider.so'; - else - create function spider_flush_table_mon_cache returns int soname 'ha_spider.dll'; - end if; - end if; - end;// delimiter ; call mysql.spider_plugin_installer; diff --git a/storage/spider/spd_conn.cc b/storage/spider/spd_conn.cc index 2f372d8f692..17631b3c658 100644 --- a/storage/spider/spd_conn.cc +++ b/storage/spider/spd_conn.cc @@ -92,6 +92,9 @@ extern PSI_thread_key spd_key_thd_bg_mon; /* UTC time zone for timestamp columns */ extern Time_zone *UTC; +extern sql_mode_t full_sql_mode; +extern sql_mode_t pushdown_sql_mode; + HASH spider_open_connections; uint spider_open_connections_id; HASH spider_ipport_conns; @@ -150,6 +153,7 @@ int spider_reset_conn_setted_parameter( conn->autocommit = spider_param_remote_autocommit(); conn->sql_log_off = spider_param_remote_sql_log_off(); conn->wait_timeout = spider_param_remote_wait_timeout(thd); + conn->sql_mode = full_sql_mode + 1; if (thd && spider_param_remote_time_zone()) { int tz_length = strlen(spider_param_remote_time_zone()); @@ -1444,6 +1448,18 @@ void spider_conn_queue_wait_timeout( DBUG_VOID_RETURN; } +void spider_conn_queue_sql_mode( + SPIDER_CONN *conn, + sql_mode_t sql_mode +) { + DBUG_ENTER("spider_conn_queue_sql_mode"); + DBUG_PRINT("info", ("spider conn=%p", conn)); + DBUG_ASSERT(!(sql_mode & ~full_sql_mode)); + conn->queued_sql_mode = TRUE; + conn->queued_sql_mode_val = (sql_mode & pushdown_sql_mode); + DBUG_VOID_RETURN; +} + void spider_conn_queue_time_zone( SPIDER_CONN *conn, Time_zone *time_zone @@ -1500,6 +1516,7 @@ void spider_conn_clear_queue( conn->queued_autocommit = FALSE; conn->queued_sql_log_off = FALSE; conn->queued_wait_timeout = FALSE; + conn->queued_sql_mode = FALSE; conn->queued_time_zone = FALSE; conn->queued_trx_start = FALSE; conn->queued_xa_start = FALSE; diff --git a/storage/spider/spd_conn.h b/storage/spider/spd_conn.h index 7be18f8c207..1612593a1cb 100644 --- a/storage/spider/spd_conn.h +++ b/storage/spider/spd_conn.h @@ -141,6 +141,11 @@ void spider_conn_queue_wait_timeout( int wait_timeout ); +void spider_conn_queue_sql_mode( + SPIDER_CONN *conn, + sql_mode_t sql_mode +); + void spider_conn_queue_time_zone( SPIDER_CONN *conn, Time_zone *time_zone diff --git a/storage/spider/spd_db_conn.cc b/storage/spider/spd_db_conn.cc index 8e31c132539..98cd0c3e731 100644 --- a/storage/spider/spd_db_conn.cc +++ b/storage/spider/spd_db_conn.cc @@ -389,6 +389,13 @@ int spider_db_conn_queue_action( append_wait_timeout(&sql_str, conn->queued_wait_timeout_val)) ) || ( + conn->queued_sql_mode && + conn->queued_sql_mode_val != conn->sql_mode && + conn->db_conn->set_sql_mode_in_bulk_sql() && + (error_num = spider_dbton[conn->dbton_id].db_util-> + append_sql_mode(&sql_str, conn->queued_sql_mode_val)) + ) || + ( conn->queued_time_zone && conn->queued_time_zone_val != conn->time_zone && conn->db_conn->set_time_zone_in_bulk_sql() && @@ -470,6 +477,15 @@ int spider_db_conn_queue_action( DBUG_RETURN(error_num); } if ( + conn->queued_sql_mode && + conn->queued_sql_mode_val != conn->sql_mode && + !conn->db_conn->set_sql_mode_in_bulk_sql() && + (error_num = spider_dbton[conn->dbton_id].db_util-> + append_sql_mode(&sql_str, conn->queued_sql_mode_val)) + ) { + DBUG_RETURN(error_num); + } + if ( conn->queued_time_zone && conn->queued_time_zone_val != conn->time_zone && !conn->db_conn->set_time_zone_in_bulk_sql() && @@ -543,6 +559,13 @@ int spider_db_conn_queue_action( conn->wait_timeout = conn->queued_wait_timeout_val; } + if ( + conn->queued_sql_mode && + conn->queued_sql_mode_val != conn->sql_mode + ) { + conn->sql_mode = conn->queued_sql_mode_val; + } + if (conn->queued_autocommit) { if (conn->queued_autocommit_val && conn->autocommit != 1) @@ -1416,7 +1439,7 @@ int spider_db_append_name_with_quote_str( ) { DBUG_ENTER("spider_db_append_name_with_quote_str"); DBUG_RETURN(spider_db_append_name_with_quote_str_internal( - str, name, strlen(name), dbton_id)); + str, name, strlen(name), system_charset_info, dbton_id)); } int spider_db_append_name_with_quote_str( @@ -1426,13 +1449,25 @@ int spider_db_append_name_with_quote_str( ) { DBUG_ENTER("spider_db_append_name_with_quote_str"); DBUG_RETURN(spider_db_append_name_with_quote_str_internal( - str, name.str, name.length, dbton_id)); + str, name.str, name.length, system_charset_info, dbton_id)); +} + +int spider_db_append_name_with_quote_str_internal( + spider_string *str, + const char *name, + int length, + uint dbton_id +) { + DBUG_ENTER("spider_db_append_name_with_quote_str_internal"); + DBUG_RETURN(spider_db_append_name_with_quote_str_internal( + str, name, length, system_charset_info, dbton_id)); } int spider_db_append_name_with_quote_str_internal( spider_string *str, const char *name, int length, + CHARSET_INFO *cs, uint dbton_id ) { int error_num; @@ -1443,9 +1478,9 @@ int spider_db_append_name_with_quote_str_internal( { head_code = *name; #ifdef SPIDER_HAS_MY_CHARLEN - if ((length = my_charlen(system_charset_info, name, name_end)) < 1) + if ((length = my_charlen(cs, name, name_end)) < 1) #else - if (!(length = my_mbcharlen(system_charset_info, (uchar) head_code))) + if (!(length = my_mbcharlen(cs, (uchar) head_code))) #endif { my_message(ER_SPIDER_WRONG_CHARACTER_IN_NAME_NUM, @@ -1462,7 +1497,7 @@ int spider_db_append_name_with_quote_str_internal( DBUG_RETURN(error_num); } } else { - if (str->append(name, length, system_charset_info)) + if (str->append(name, length, cs)) DBUG_RETURN(HA_ERR_OUT_OF_MEM); } } @@ -9039,10 +9074,11 @@ int spider_db_open_item_ident( str->q_append(alias, alias_length); #ifdef SPIDER_use_LEX_CSTRING_for_KEY_Field_name if ((error_num = spider_dbton[dbton_id].db_util-> - append_name(str, item_ident->field_name.str, field_name_length))) + append_escaped_name(str, item_ident->field_name.str, + field_name_length))) #else if ((error_num = spider_dbton[dbton_id].db_util-> - append_name(str, item_ident->field_name, field_name_length))) + append_escaped_name(str, item_ident->field_name, field_name_length))) #endif { DBUG_RETURN(error_num); @@ -9053,11 +9089,11 @@ int spider_db_open_item_ident( str->q_append(alias, alias_length); #ifdef SPIDER_use_LEX_CSTRING_for_KEY_Field_name if ((error_num = spider_dbton[dbton_id].db_util-> - append_name_with_charset(str, item_ident->field_name.str, + append_escaped_name_with_charset(str, item_ident->field_name.str, field_name_length, system_charset_info))) #else if ((error_num = spider_dbton[dbton_id].db_util-> - append_name_with_charset(str, item_ident->field_name, + append_escaped_name_with_charset(str, item_ident->field_name, field_name_length, system_charset_info))) #endif { diff --git a/storage/spider/spd_db_conn.h b/storage/spider/spd_db_conn.h index e6ce926f4ee..0300dc6c407 100644 --- a/storage/spider/spd_db_conn.h +++ b/storage/spider/spd_db_conn.h @@ -429,6 +429,14 @@ int spider_db_append_name_with_quote_str_internal( uint dbton_id ); +int spider_db_append_name_with_quote_str_internal( + spider_string *str, + const char *name, + int length, + CHARSET_INFO *cs, + uint dbton_id +); + int spider_db_append_select( ha_spider *spider ); diff --git a/storage/spider/spd_db_handlersocket.cc b/storage/spider/spd_db_handlersocket.cc index 27257ee08e6..091f48a2460 100644 --- a/storage/spider/spd_db_handlersocket.cc +++ b/storage/spider/spd_db_handlersocket.cc @@ -1848,6 +1848,23 @@ int spider_db_handlersocket::set_wait_timeout( DBUG_RETURN(0); } +bool spider_db_handlersocket::set_sql_mode_in_bulk_sql() +{ + DBUG_ENTER("spider_db_handlersocket::set_sql_mode_in_bulk_sql"); + DBUG_PRINT("info",("spider this=%p", this)); + DBUG_RETURN(FALSE); +} + +int spider_db_handlersocket::set_sql_mode( + sql_mode_t sql_mode, + int *need_mon +) { + DBUG_ENTER("spider_db_handlersocket::set_sql_mode"); + DBUG_PRINT("info",("spider this=%p", this)); + /* nothing to do */ + DBUG_RETURN(0); +} + bool spider_db_handlersocket::set_time_zone_in_bulk_sql() { DBUG_ENTER("spider_db_handlersocket::set_time_zone_in_bulk_sql"); @@ -2487,6 +2504,57 @@ int spider_db_handlersocket_util::append_name_with_charset( DBUG_RETURN(0); } +int spider_db_handlersocket_util::append_escaped_name( + spider_string *str, + const char *name, + uint name_length +) { + int error_num; + DBUG_ENTER("spider_db_handlersocket_util::append_name"); + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN * 2 + name_length * 2)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + if ((error_num = spider_db_append_name_with_quote_str_internal( + str, name, name_length, dbton_id))) + { + DBUG_RETURN(error_num); + } + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + DBUG_RETURN(0); +} + +int spider_db_handlersocket_util::append_escaped_name_with_charset( + spider_string *str, + const char *name, + uint name_length, + CHARSET_INFO *name_charset +) { + int error_num; + DBUG_ENTER("spider_db_handlersocket_util::append_name_with_charset"); + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN * 2 + name_length * 2)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + if ((error_num = spider_db_append_name_with_quote_str_internal( + str, name, name_length, name_charset, dbton_id))) + { + DBUG_RETURN(error_num); + } + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + DBUG_RETURN(0); +} + bool spider_db_handlersocket_util::is_name_quote( const char head_code ) { @@ -2694,6 +2762,16 @@ int spider_db_handlersocket_util::append_wait_timeout( DBUG_RETURN(0); } +int spider_db_handlersocket_util::append_sql_mode( + spider_string *str, + sql_mode_t sql_mode +) { + DBUG_ENTER("spider_db_handlersocket_util::append_sql_mode"); + DBUG_PRINT("info",("spider this=%p", this)); + /* nothing to do */ + DBUG_RETURN(0); +} + int spider_db_handlersocket_util::append_time_zone( spider_string *str, Time_zone *time_zone diff --git a/storage/spider/spd_db_handlersocket.h b/storage/spider/spd_db_handlersocket.h index f94be204a70..d2beb2124c0 100644 --- a/storage/spider/spd_db_handlersocket.h +++ b/storage/spider/spd_db_handlersocket.h @@ -34,6 +34,17 @@ public: uint name_length, CHARSET_INFO *name_charset ); + int append_escaped_name( + spider_string *str, + const char *name, + uint name_length + ); + int append_escaped_name_with_charset( + spider_string *str, + const char *name, + uint name_length, + CHARSET_INFO *name_charset + ); bool is_name_quote( const char head_code ); @@ -63,6 +74,10 @@ public: spider_string *str, int wait_timeout ); + int append_sql_mode( + spider_string *str, + sql_mode_t sql_mode + ); int append_time_zone( spider_string *str, Time_zone *time_zone @@ -381,6 +396,11 @@ public: int wait_timeout, int *need_mon ); + bool set_sql_mode_in_bulk_sql(); + int set_sql_mode( + sql_mode_t sql_mode, + int *need_mon + ); bool set_time_zone_in_bulk_sql(); int set_time_zone( Time_zone *time_zone, diff --git a/storage/spider/spd_db_include.h b/storage/spider/spd_db_include.h index 5ab7aff2c9a..56a88a2b7bc 100644 --- a/storage/spider/spd_db_include.h +++ b/storage/spider/spd_db_include.h @@ -161,6 +161,8 @@ typedef st_spider_result SPIDER_RESULT; #define SPIDER_SQL_MBR_DISJOINT_LEN (sizeof(SPIDER_SQL_MBR_DISJOINT_STR) - 1) #define SPIDER_SQL_NOT_BETWEEN_STR "not between" #define SPIDER_SQL_NOT_BETWEEN_LEN (sizeof(SPIDER_SQL_NOT_BETWEEN_STR) - 1) +#define SPIDER_SQL_TO_FLOAT_STR "/* create function to_float(a decimal(20,6)) returns float return a */ to_float(" +#define SPIDER_SQL_TO_FLOAT_LEN (sizeof(SPIDER_SQL_TO_FLOAT_STR) - 1) #define SPIDER_SQL_IN_STR "in(" #define SPIDER_SQL_IN_LEN (sizeof(SPIDER_SQL_IN_STR) - 1) #define SPIDER_SQL_NOT_IN_STR "not in(" @@ -185,6 +187,8 @@ typedef st_spider_result SPIDER_RESULT; #define SPIDER_SQL_AS_TIME_LEN (sizeof(SPIDER_SQL_AS_TIME_STR) - 1) #define SPIDER_SQL_AS_BINARY_STR " as binary" #define SPIDER_SQL_AS_BINARY_LEN (sizeof(SPIDER_SQL_AS_BINARY_STR) - 1) +#define SPIDER_SQL_AS_FLOAT_STR " as float" +#define SPIDER_SQL_AS_FLOAT_LEN (sizeof(SPIDER_SQL_AS_FLOAT_STR) - 1) #define SPIDER_SQL_IS_TRUE_STR " is true" #define SPIDER_SQL_IS_TRUE_LEN (sizeof(SPIDER_SQL_IS_TRUE_STR) - 1) #define SPIDER_SQL_IS_NOT_TRUE_STR " is not true" @@ -821,6 +825,17 @@ public: uint name_length, CHARSET_INFO *name_charset ) = 0; + virtual int append_escaped_name( + spider_string *str, + const char *name, + uint name_length + ) = 0; + virtual int append_escaped_name_with_charset( + spider_string *str, + const char *name, + uint name_length, + CHARSET_INFO *name_charset + ) = 0; virtual bool is_name_quote( const char head_code ) = 0; @@ -850,6 +865,10 @@ public: spider_string *str, int wait_timeout ) = 0; + virtual int append_sql_mode( + spider_string *str, + sql_mode_t sql_mode + ) = 0; virtual int append_time_zone( spider_string *str, Time_zone *time_zone @@ -1151,6 +1170,11 @@ public: int wait_timeout, int *need_mon ) = 0; + virtual bool set_sql_mode_in_bulk_sql() = 0; + virtual int set_sql_mode( + sql_mode_t sql_mode, + int *need_mon + ) = 0; virtual bool set_time_zone_in_bulk_sql() = 0; virtual int set_time_zone( Time_zone *time_zone, diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc index 7258ac0c1ae..6b551804c87 100644 --- a/storage/spider/spd_db_mysql.cc +++ b/storage/spider/spd_db_mysql.cc @@ -93,6 +93,9 @@ static const char *name_quote_str = SPIDER_SQL_NAME_QUOTE_STR; #define SPIDER_SQL_WAIT_TIMEOUT_STR "set session wait_timeout = " #define SPIDER_SQL_WAIT_TIMEOUT_LEN sizeof(SPIDER_SQL_WAIT_TIMEOUT_STR) - 1 +#define SPIDER_SQL_SQL_MODE_STR "set session sql_mode = '" +#define SPIDER_SQL_SQL_MODE_LEN sizeof(SPIDER_SQL_SQL_MODE_STR) - 1 + #define SPIDER_SQL_TIME_ZONE_STR "set session time_zone = '" #define SPIDER_SQL_TIME_ZONE_LEN sizeof(SPIDER_SQL_TIME_ZONE_STR) - 1 @@ -2878,6 +2881,54 @@ int spider_db_mbase::set_wait_timeout( DBUG_RETURN(0); } +bool spider_db_mbase::set_sql_mode_in_bulk_sql() +{ + DBUG_ENTER("spider_db_mbase::set_sql_mode_in_bulk_sql"); + DBUG_PRINT("info",("spider this=%p", this)); + DBUG_RETURN(TRUE); +} + +int spider_db_mbase::set_sql_mode( + sql_mode_t sql_mode, + int *need_mon +) { + int error_num; + char sql_buf[MAX_FIELD_WIDTH]; + spider_string sql_str(sql_buf, sizeof(sql_buf), &my_charset_bin); + DBUG_ENTER("spider_db_mbase::set_sql_mode"); + DBUG_PRINT("info",("spider this=%p", this)); + sql_str.init_calc_mem(265); + sql_str.length(0); + if (sql_str.reserve(SPIDER_SQL_SQL_MODE_LEN)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + sql_str.q_append(SPIDER_SQL_SQL_MODE_STR, SPIDER_SQL_SQL_MODE_LEN); + if ((error_num = spider_db_mbase_utility->append_sql_mode_internal(&sql_str, sql_mode))) + { + DBUG_RETURN(error_num); + } + if (sql_str.length() > SPIDER_SQL_SQL_MODE_LEN) + { + sql_str.length(sql_str.length() - SPIDER_SQL_COMMA_LEN); + } else { + if (sql_str.reserve(SPIDER_SQL_VALUE_QUOTE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + } + sql_str.q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); + if (spider_db_query( + conn, + sql_str.ptr(), + sql_str.length(), + -1, + need_mon) + ) + DBUG_RETURN(spider_db_errorno(conn)); + SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos); + pthread_mutex_unlock(&conn->mta_conn_mutex); + DBUG_RETURN(0); +} + bool spider_db_mbase::set_time_zone_in_bulk_sql() { DBUG_ENTER("spider_db_mbase::set_time_zone_in_bulk_sql"); @@ -3536,6 +3587,57 @@ int spider_db_mbase_util::append_name_with_charset( DBUG_RETURN(0); } +int spider_db_mbase_util::append_escaped_name( + spider_string *str, + const char *name, + uint name_length +) { + int error_num; + DBUG_ENTER("spider_db_mbase_util::append_name"); + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN * 2 + name_length * 2)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + if ((error_num = spider_db_append_name_with_quote_str_internal( + str, name, name_length, dbton_id))) + { + DBUG_RETURN(error_num); + } + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + DBUG_RETURN(0); +} + +int spider_db_mbase_util::append_escaped_name_with_charset( + spider_string *str, + const char *name, + uint name_length, + CHARSET_INFO *name_charset +) { + int error_num; + DBUG_ENTER("spider_db_mbase_util::append_name_with_charset"); + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN * 2 + name_length * 2)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + if ((error_num = spider_db_append_name_with_quote_str_internal( + str, name, name_length, name_charset, dbton_id))) + { + DBUG_RETURN(error_num); + } + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + DBUG_RETURN(0); +} + bool spider_db_mbase_util::is_name_quote( const char head_code ) { @@ -3554,13 +3656,14 @@ int spider_db_mbase_util::append_escaped_name_quote( DBUG_RETURN(0); } -int spider_db_mbase_util::append_column_value( +int spider_db_mariadb_util::append_column_value( ha_spider *spider, spider_string *str, Field *field, const uchar *new_ptr, CHARSET_INFO *access_charset ) { + bool float_value = FALSE; int error_num; char buf[MAX_FIELD_WIDTH]; spider_string tmp_str(buf, MAX_FIELD_WIDTH, field->charset()); @@ -3568,7 +3671,7 @@ int spider_db_mbase_util::append_column_value( uint length; THD *thd = field->table->in_use; Time_zone *saved_time_zone = thd->variables.time_zone; - DBUG_ENTER("spider_db_mbase_util::append_column_value"); + DBUG_ENTER("spider_db_mariadb_util::append_column_value"); tmp_str.init_calc_mem(113); thd->variables.time_zone = UTC; @@ -3667,6 +3770,10 @@ int spider_db_mbase_util::append_column_value( } else { ptr = field->val_str(tmp_str.get_str()); tmp_str.mem_calc(); + if (field->type() == MYSQL_TYPE_FLOAT) + { + float_value = TRUE; + } } thd->variables.time_zone = saved_time_zone; @@ -3731,13 +3838,258 @@ int spider_db_mbase_util::append_column_value( str->q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); } else if (field->str_needs_quotes()) { + if (str->charset() != field->charset()) + { + if ((error_num = spider_db_append_charset_name_before_string(str, + field->charset()))) + { + DBUG_RETURN(error_num); + } + } if (str->reserve(SPIDER_SQL_VALUE_QUOTE_LEN * 2 + ptr->length() * 2 + 2)) DBUG_RETURN(HA_ERR_OUT_OF_MEM); str->q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); append_escaped_util(str, ptr); str->q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); + } else if (float_value) + { + if (str->reserve(SPIDER_SQL_CAST_LEN + ptr->length() + + SPIDER_SQL_AS_FLOAT_LEN, SPIDER_SQL_CLOSE_PAREN_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_CAST_STR, SPIDER_SQL_CAST_LEN); + str->q_append(ptr->ptr(), ptr->length()); + str->q_append(SPIDER_SQL_AS_FLOAT_STR, SPIDER_SQL_AS_FLOAT_LEN); + str->q_append(SPIDER_SQL_CLOSE_PAREN_STR, SPIDER_SQL_CLOSE_PAREN_LEN); } else if (str->append(*ptr)) + { DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + DBUG_RETURN(0); +} + +int spider_db_mysql_util::append_column_value( + ha_spider *spider, + spider_string *str, + Field *field, + const uchar *new_ptr, + CHARSET_INFO *access_charset +) { + bool float_value = FALSE; + int error_num; + char buf[MAX_FIELD_WIDTH]; + spider_string tmp_str(buf, MAX_FIELD_WIDTH, field->charset()); + String *ptr; + uint length; + THD *thd = field->table->in_use; + Time_zone *saved_time_zone = thd->variables.time_zone; + DBUG_ENTER("spider_db_mysql_util::append_column_value"); + tmp_str.init_calc_mem(266); + + thd->variables.time_zone = UTC; + + if (new_ptr) + { + if ( + field->type() == MYSQL_TYPE_BLOB || + field->real_type() == MYSQL_TYPE_VARCHAR + ) { + length = uint2korr(new_ptr); + tmp_str.set_quick((char *) new_ptr + HA_KEY_BLOB_LENGTH, length, + field->charset()); + ptr = tmp_str.get_str(); + } else if (field->type() == MYSQL_TYPE_GEOMETRY) + { +/* + uint mlength = SIZEOF_STORED_DOUBLE, lcnt; + uchar *dest = (uchar *) buf; + const uchar *source; + for (lcnt = 0; lcnt < 4; lcnt++) + { + mlength = SIZEOF_STORED_DOUBLE; + source = new_ptr + mlength + SIZEOF_STORED_DOUBLE * lcnt; + while (mlength--) + *dest++ = *--source; + } + tmp_str.length(SIZEOF_STORED_DOUBLE * lcnt); +*/ +#ifndef DBUG_OFF + double xmin, xmax, ymin, ymax; +/* + float8store(buf,xmin); + float8store(buf+8,xmax); + float8store(buf+16,ymin); + float8store(buf+24,ymax); + memcpy(&xmin,new_ptr,sizeof(xmin)); + memcpy(&xmax,new_ptr + 8,sizeof(xmax)); + memcpy(&ymin,new_ptr + 16,sizeof(ymin)); + memcpy(&ymax,new_ptr + 24,sizeof(ymax)); + float8get(xmin, buf); + float8get(xmax, buf + 8); + float8get(ymin, buf + 16); + float8get(ymax, buf + 24); + DBUG_PRINT("info", ("spider geo is %f %f %f %f", + xmin, xmax, ymin, ymax)); + DBUG_PRINT("info", ("spider geo is %.14g %.14g %.14g %.14g", + xmin, xmax, ymin, ymax)); +*/ + float8get(xmin, new_ptr); + float8get(xmax, new_ptr + 8); + float8get(ymin, new_ptr + 16); + float8get(ymax, new_ptr + 24); + DBUG_PRINT("info", ("spider geo is %f %f %f %f", + xmin, xmax, ymin, ymax)); +/* + float8get(xmin, new_ptr + SIZEOF_STORED_DOUBLE * 4); + float8get(xmax, new_ptr + SIZEOF_STORED_DOUBLE * 5); + float8get(ymin, new_ptr + SIZEOF_STORED_DOUBLE * 6); + float8get(ymax, new_ptr + SIZEOF_STORED_DOUBLE * 7); + DBUG_PRINT("info", ("spider geo is %f %f %f %f", + xmin, xmax, ymin, ymax)); + float8get(xmin, new_ptr + SIZEOF_STORED_DOUBLE * 8); + float8get(xmax, new_ptr + SIZEOF_STORED_DOUBLE * 9); + float8get(ymin, new_ptr + SIZEOF_STORED_DOUBLE * 10); + float8get(ymax, new_ptr + SIZEOF_STORED_DOUBLE * 11); + DBUG_PRINT("info", ("spider geo is %f %f %f %f", + xmin, xmax, ymin, ymax)); + float8get(xmin, new_ptr + SIZEOF_STORED_DOUBLE * 12); + float8get(xmax, new_ptr + SIZEOF_STORED_DOUBLE * 13); + float8get(ymin, new_ptr + SIZEOF_STORED_DOUBLE * 14); + float8get(ymax, new_ptr + SIZEOF_STORED_DOUBLE * 15); + DBUG_PRINT("info", ("spider geo is %f %f %f %f", + xmin, xmax, ymin, ymax)); +*/ +#endif +/* + tmp_str.set_quick((char *) new_ptr, SIZEOF_STORED_DOUBLE * 4, + &my_charset_bin); +*/ + tmp_str.length(0); + tmp_str.q_append((char *) SPIDER_SQL_LINESTRING_HEAD_STR, + SPIDER_SQL_LINESTRING_HEAD_LEN); + tmp_str.q_append((char *) new_ptr, SIZEOF_STORED_DOUBLE); + tmp_str.q_append((char *) new_ptr + SIZEOF_STORED_DOUBLE * 2, + SIZEOF_STORED_DOUBLE); + tmp_str.q_append((char *) new_ptr + SIZEOF_STORED_DOUBLE, + SIZEOF_STORED_DOUBLE); + tmp_str.q_append((char *) new_ptr + SIZEOF_STORED_DOUBLE * 3, + SIZEOF_STORED_DOUBLE); + ptr = tmp_str.get_str(); + } else { + ptr = field->val_str(tmp_str.get_str(), new_ptr); + tmp_str.mem_calc(); + } + } else { + ptr = field->val_str(tmp_str.get_str()); + tmp_str.mem_calc(); + if (field->type() == MYSQL_TYPE_FLOAT) + { + float_value = TRUE; + } + } + + thd->variables.time_zone = saved_time_zone; + + DBUG_PRINT("info", ("spider field->type() is %d", field->type())); + DBUG_PRINT("info", ("spider ptr->length() is %d", ptr->length())); +/* + if ( + field->type() == MYSQL_TYPE_BIT || + (field->type() >= MYSQL_TYPE_TINY_BLOB && + field->type() <= MYSQL_TYPE_BLOB) + ) { + uchar *hex_ptr = (uchar *) ptr->ptr(), *end_ptr; + char *str_ptr; + DBUG_PRINT("info", ("spider HEX")); + if (str->reserve(SPIDER_SQL_HEX_LEN + ptr->length() * 2)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + str->q_append(SPIDER_SQL_HEX_STR, SPIDER_SQL_HEX_LEN); + str_ptr = (char *) str->ptr() + str->length(); + for (end_ptr = hex_ptr + ptr->length(); hex_ptr < end_ptr; hex_ptr++) + { + *str_ptr++ = spider_dig_upper[(*hex_ptr) >> 4]; + *str_ptr++ = spider_dig_upper[(*hex_ptr) & 0x0F]; + } + str->length(str->length() + ptr->length() * 2); + } else +*/ + if (field->result_type() == STRING_RESULT) + { + DBUG_PRINT("info", ("spider STRING_RESULT")); + if (str->charset() != field->charset()) + { + if ((error_num = spider_db_append_charset_name_before_string(str, + field->charset()))) + { + DBUG_RETURN(error_num); + } + } + if (str->reserve(SPIDER_SQL_VALUE_QUOTE_LEN)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + str->q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); + if ( + field->type() == MYSQL_TYPE_VARCHAR || + (field->type() >= MYSQL_TYPE_ENUM && + field->type() <= MYSQL_TYPE_GEOMETRY) + ) { + DBUG_PRINT("info", ("spider append_escaped")); + char buf2[MAX_FIELD_WIDTH]; + spider_string tmp_str2(buf2, MAX_FIELD_WIDTH, field->charset()); + tmp_str2.init_calc_mem(267); + tmp_str2.length(0); + if ( + tmp_str2.append(ptr->ptr(), ptr->length(), field->charset()) || + str->reserve(tmp_str2.length() * 2) || + append_escaped_util(str, tmp_str2.get_str()) + ) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } else if (str->append(*ptr)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + if (str->reserve(SPIDER_SQL_VALUE_QUOTE_LEN)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + str->q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); + } else if (field->str_needs_quotes()) + { + if (str->charset() != field->charset()) + { + if ((error_num = spider_db_append_charset_name_before_string(str, + field->charset()))) + { + DBUG_RETURN(error_num); + } + } + if (str->reserve(SPIDER_SQL_VALUE_QUOTE_LEN * 2 + ptr->length() * 2 + 2)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + str->q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); + append_escaped_util(str, ptr); + str->q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); + } else if (float_value) + { + if (str->reserve(SPIDER_SQL_TO_FLOAT_LEN + ptr->length() + + SPIDER_SQL_CLOSE_PAREN_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_TO_FLOAT_STR, SPIDER_SQL_TO_FLOAT_LEN); + str->q_append(ptr->ptr(), ptr->length()); + str->q_append(SPIDER_SQL_CLOSE_PAREN_STR, SPIDER_SQL_CLOSE_PAREN_LEN); + } else if (str->append(*ptr)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + DBUG_RETURN(0); +} + +int spider_db_mbase_util::append_column_value( + ha_spider *spider, + spider_string *str, + Field *field, + const uchar *new_ptr, + CHARSET_INFO *access_charset +) { + DBUG_ENTER("spider_db_mbase_util::append_column_value"); + DBUG_ASSERT(0); DBUG_RETURN(0); } @@ -3880,6 +4232,861 @@ int spider_db_mbase_util::append_wait_timeout( DBUG_RETURN(0); } +#define SPIDER_REAL_AS_FLOAT_STR "real_as_float" +#define SPIDER_REAL_AS_FLOAT_LEN (sizeof(SPIDER_REAL_AS_FLOAT_STR) - 1) +#define SPIDER_PIPES_AS_CONCAT_STR "pipes_as_concat" +#define SPIDER_PIPES_AS_CONCAT_LEN (sizeof(SPIDER_PIPES_AS_CONCAT_STR) - 1) +#define SPIDER_ANSI_QUOTES_STR "ansi_quotes" +#define SPIDER_ANSI_QUOTES_LEN (sizeof(SPIDER_ANSI_QUOTES_STR) - 1) +#define SPIDER_IGNORE_SPACE_STR "ignore_space" +#define SPIDER_IGNORE_SPACE_LEN (sizeof(SPIDER_IGNORE_SPACE_STR) - 1) +#define SPIDER_IGNORE_BAD_TABLE_OPTIONS_STR "ignore_bad_table_options" +#define SPIDER_IGNORE_BAD_TABLE_OPTIONS_LEN (sizeof(SPIDER_IGNORE_BAD_TABLE_OPTIONS_STR) - 1) +#define SPIDER_ONLY_FULL_GROUP_BY_STR "only_full_group_by" +#define SPIDER_ONLY_FULL_GROUP_BY_LEN (sizeof(SPIDER_ONLY_FULL_GROUP_BY_STR) - 1) +#define SPIDER_NO_UNSIGNED_SUBTRACTION_STR "no_unsigned_subtraction" +#define SPIDER_NO_UNSIGNED_SUBTRACTION_LEN (sizeof(SPIDER_NO_UNSIGNED_SUBTRACTION_STR) - 1) +#define SPIDER_NO_DIR_IN_CREATE_STR "no_dir_in_create" +#define SPIDER_NO_DIR_IN_CREATE_LEN (sizeof(SPIDER_NO_DIR_IN_CREATE_STR) - 1) +#define SPIDER_POSTGRESQL_STR "postgresql" +#define SPIDER_POSTGRESQL_LEN (sizeof(SPIDER_POSTGRESQL_STR) - 1) +#define SPIDER_ORACLE_STR "oracle" +#define SPIDER_ORACLE_LEN (sizeof(SPIDER_ORACLE_STR) - 1) +#define SPIDER_MSSQL_STR "mssql" +#define SPIDER_MSSQL_LEN (sizeof(SPIDER_MSSQL_STR) - 1) +#define SPIDER_DB2_STR "db2" +#define SPIDER_DB2_LEN (sizeof(SPIDER_DB2_STR) - 1) +#define SPIDER_MAXDB_STR "maxdb" +#define SPIDER_MAXDB_LEN (sizeof(SPIDER_MAXDB_STR) - 1) +#define SPIDER_NO_KEY_OPTIONS_STR "no_key_options" +#define SPIDER_NO_KEY_OPTIONS_LEN (sizeof(SPIDER_NO_KEY_OPTIONS_STR) - 1) +#define SPIDER_NO_TABLE_OPTIONS_STR "no_table_options" +#define SPIDER_NO_TABLE_OPTIONS_LEN (sizeof(SPIDER_NO_TABLE_OPTIONS_STR) - 1) +#define SPIDER_NO_FIELD_OPTIONS_STR "no_field_options" +#define SPIDER_NO_FIELD_OPTIONS_LEN (sizeof(SPIDER_NO_FIELD_OPTIONS_STR) - 1) +#define SPIDER_MYSQL323_STR "mysql323" +#define SPIDER_MYSQL323_LEN (sizeof(SPIDER_MYSQL323_STR) - 1) +#define SPIDER_MYSQL40_STR "mysql40" +#define SPIDER_MYSQL40_LEN (sizeof(SPIDER_MYSQL40_STR) - 1) +#define SPIDER_ANSI_STR "ansi" +#define SPIDER_ANSI_LEN (sizeof(SPIDER_ANSI_STR) - 1) +#define SPIDER_NO_AUTO_VALUE_ON_ZERO_STR "no_auto_value_on_zero" +#define SPIDER_NO_AUTO_VALUE_ON_ZERO_LEN (sizeof(SPIDER_NO_AUTO_VALUE_ON_ZERO_STR) - 1) +#define SPIDER_NO_BACKSLASH_ESCAPES_STR "no_backslash_escapes" +#define SPIDER_NO_BACKSLASH_ESCAPES_LEN (sizeof(SPIDER_NO_BACKSLASH_ESCAPES_STR) - 1) +#define SPIDER_STRICT_TRANS_TABLES_STR "strict_trans_tables" +#define SPIDER_STRICT_TRANS_TABLES_LEN (sizeof(SPIDER_STRICT_TRANS_TABLES_STR) - 1) +#define SPIDER_STRICT_ALL_TABLES_STR "strict_all_tables" +#define SPIDER_STRICT_ALL_TABLES_LEN (sizeof(SPIDER_STRICT_ALL_TABLES_STR) - 1) +#define SPIDER_NO_ZERO_IN_DATE_STR "no_zero_in_date" +#define SPIDER_NO_ZERO_IN_DATE_LEN (sizeof(SPIDER_NO_ZERO_IN_DATE_STR) - 1) +#define SPIDER_NO_ZERO_DATE_STR "no_zero_date" +#define SPIDER_NO_ZERO_DATE_LEN (sizeof(SPIDER_NO_ZERO_DATE_STR) - 1) +#define SPIDER_INVALID_DATES_STR "allow_invalid_dates" +#define SPIDER_INVALID_DATES_LEN (sizeof(SPIDER_INVALID_DATES_STR) - 1) +#define SPIDER_ERROR_FOR_DIVISION_BY_ZERO_STR "error_for_division_by_zero" +#define SPIDER_ERROR_FOR_DIVISION_BY_ZERO_LEN (sizeof(SPIDER_ERROR_FOR_DIVISION_BY_ZERO_STR) - 1) +#define SPIDER_TRADITIONAL_STR "traditional" +#define SPIDER_TRADITIONAL_LEN (sizeof(SPIDER_TRADITIONAL_STR) - 1) +#define SPIDER_NO_AUTO_CREATE_USER_STR "no_auto_create_user" +#define SPIDER_NO_AUTO_CREATE_USER_LEN (sizeof(SPIDER_NO_AUTO_CREATE_USER_STR) - 1) +#define SPIDER_HIGH_NOT_PRECEDENCE_STR "high_not_precedence" +#define SPIDER_HIGH_NOT_PRECEDENCE_LEN (sizeof(SPIDER_HIGH_NOT_PRECEDENCE_STR) - 1) +#define SPIDER_NO_ENGINE_SUBSTITUTION_STR "no_engine_substitution" +#define SPIDER_NO_ENGINE_SUBSTITUTION_LEN (sizeof(SPIDER_NO_ENGINE_SUBSTITUTION_STR) - 1) +#define SPIDER_PAD_CHAR_TO_FULL_LENGTH_STR "pad_char_to_full_length" +#define SPIDER_PAD_CHAR_TO_FULL_LENGTH_LEN (sizeof(SPIDER_PAD_CHAR_TO_FULL_LENGTH_STR) - 1) +#define SPIDER_EMPTY_STRING_IS_NULL_STR "empty_string_is_null" +#define SPIDER_EMPTY_STRING_IS_NULL_LEN (sizeof(SPIDER_EMPTY_STRING_IS_NULL_STR) - 1) +#define SPIDER_SIMULTANEOUS_ASSIGNMENT_STR "simultaneous_assignment" +#define SPIDER_SIMULTANEOUS_ASSIGNMENT_LEN (sizeof(SPIDER_SIMULTANEOUS_ASSIGNMENT_STR) - 1) +#define SPIDER_TIME_ROUND_FRACTIONAL_STR "time_round_fractional" +#define SPIDER_TIME_ROUND_FRACTIONAL_LEN (sizeof(SPIDER_TIME_ROUND_FRACTIONAL_STR) - 1) + +sql_mode_t full_sql_mode = +#ifdef MODE_REAL_AS_FLOAT + MODE_REAL_AS_FLOAT | +#endif +#ifdef MODE_PIPES_AS_CONCAT + MODE_PIPES_AS_CONCAT | +#endif +#ifdef MODE_ANSI_QUOTES + MODE_ANSI_QUOTES | +#endif +#ifdef MODE_IGNORE_SPACE + MODE_IGNORE_SPACE | +#endif +#ifdef MODE_IGNORE_BAD_TABLE_OPTIONS + MODE_IGNORE_BAD_TABLE_OPTIONS | +#endif +#ifdef MODE_ONLY_FULL_GROUP_BY + MODE_ONLY_FULL_GROUP_BY | +#endif +#ifdef MODE_NO_UNSIGNED_SUBTRACTION + MODE_NO_UNSIGNED_SUBTRACTION | +#endif +#ifdef MODE_NO_DIR_IN_CREATE + MODE_NO_DIR_IN_CREATE | +#endif +#ifdef MODE_POSTGRESQL + MODE_POSTGRESQL | +#endif +#ifdef MODE_ORACLE + MODE_ORACLE | +#endif +#ifdef MODE_MSSQL + MODE_MSSQL | +#endif +#ifdef MODE_DB2 + MODE_DB2 | +#endif +#ifdef MODE_MAXDB + MODE_MAXDB | +#endif +#ifdef MODE_NO_KEY_OPTIONS + MODE_NO_KEY_OPTIONS | +#endif +#ifdef MODE_NO_TABLE_OPTIONS + MODE_NO_TABLE_OPTIONS | +#endif +#ifdef MODE_NO_FIELD_OPTIONS + MODE_NO_FIELD_OPTIONS | +#endif +#ifdef MODE_MYSQL323 + MODE_MYSQL323 | +#endif +#ifdef MODE_MYSQL40 + MODE_MYSQL40 | +#endif +#ifdef MODE_ANSI + MODE_ANSI | +#endif +#ifdef MODE_NO_AUTO_VALUE_ON_ZERO + MODE_NO_AUTO_VALUE_ON_ZERO | +#endif +#ifdef MODE_NO_BACKSLASH_ESCAPES + MODE_NO_BACKSLASH_ESCAPES | +#endif +#ifdef MODE_STRICT_TRANS_TABLES + MODE_STRICT_TRANS_TABLES | +#endif +#ifdef MODE_STRICT_ALL_TABLES + MODE_STRICT_ALL_TABLES | +#endif +#ifdef MODE_NO_ZERO_IN_DATE + MODE_NO_ZERO_IN_DATE | +#endif +#ifdef MODE_NO_ZERO_DATE + MODE_NO_ZERO_DATE | +#endif +#ifdef MODE_INVALID_DATES + MODE_INVALID_DATES | +#endif +#ifdef MODE_ERROR_FOR_DIVISION_BY_ZERO + MODE_ERROR_FOR_DIVISION_BY_ZERO | +#endif +#ifdef MODE_TRADITIONAL + MODE_TRADITIONAL | +#endif +#ifdef MODE_NO_AUTO_CREATE_USER + MODE_NO_AUTO_CREATE_USER | +#endif +#ifdef MODE_HIGH_NOT_PRECEDENCE + MODE_HIGH_NOT_PRECEDENCE | +#endif +#ifdef MODE_NO_ENGINE_SUBSTITUTION + MODE_NO_ENGINE_SUBSTITUTION | +#endif +#ifdef MODE_PAD_CHAR_TO_FULL_LENGTH + MODE_PAD_CHAR_TO_FULL_LENGTH | +#endif +#ifdef MODE_EMPTY_STRING_IS_NULL + MODE_EMPTY_STRING_IS_NULL | +#endif +#ifdef MODE_SIMULTANEOUS_ASSIGNMENT + MODE_SIMULTANEOUS_ASSIGNMENT | +#endif +#ifdef MODE_TIME_ROUND_FRACTIONAL + MODE_TIME_ROUND_FRACTIONAL | +#endif + 0; + +#ifdef MODE_REAL_AS_FLOAT +/* pushdown */ +#define SPIDER_SQL_MODE_REAL_AS_FLOAT +#endif +#ifdef MODE_PIPES_AS_CONCAT +/* no pushdown */ +#endif +#ifdef MODE_ANSI_QUOTES +/* no pushdown */ +#endif +#ifdef MODE_IGNORE_SPACE +/* no pushdown */ +#endif +#ifdef MODE_IGNORE_BAD_TABLE_OPTIONS +/* pushdown */ +#define SPIDER_SQL_MODE_IGNORE_BAD_TABLE_OPTIONS +#endif +#ifdef MODE_ONLY_FULL_GROUP_BY +/* no pushdown */ +#endif +#ifdef MODE_NO_UNSIGNED_SUBTRACTION +/* pushdown */ +#define SPIDER_SQL_MODE_NO_UNSIGNED_SUBTRACTION +#endif +#ifdef MODE_NO_DIR_IN_CREATE +/* pushdown */ +#define SPIDER_SQL_MODE_NO_DIR_IN_CREATE +#endif +#ifdef MODE_POSTGRESQL +/* no pushdown */ +#endif +#ifdef MODE_ORACLE +/* no pushdown */ +#endif +#ifdef MODE_MSSQL +/* no pushdown */ +#endif +#ifdef MODE_DB2 +/* no pushdown */ +#endif +#ifdef MODE_MAXDB +/* no pushdown */ +#endif +#ifdef MODE_NO_KEY_OPTIONS +/* no pushdown */ +#endif +#ifdef MODE_NO_TABLE_OPTIONS +/* no pushdown */ +#endif +#ifdef MODE_NO_FIELD_OPTIONS +/* no pushdown */ +#endif +#ifdef MODE_MYSQL323 +/* no pushdown */ +#endif +#ifdef MODE_MYSQL40 +/* no pushdown */ +#endif +#ifdef MODE_ANSI +/* no pushdown */ +#endif +#ifdef MODE_NO_AUTO_VALUE_ON_ZERO +/* pushdown */ +#define SPIDER_SQL_MODE_NO_AUTO_VALUE_ON_ZERO +#endif +#ifdef MODE_NO_BACKSLASH_ESCAPES +/* no pushdown */ +#endif +#ifdef MODE_STRICT_TRANS_TABLES +/* pushdown */ +#define SPIDER_SQL_MODE_STRICT_TRANS_TABLES +#endif +#ifdef MODE_STRICT_ALL_TABLES +/* pushdown */ +#define SPIDER_SQL_MODE_STRICT_ALL_TABLES +#endif +#ifdef MODE_NO_ZERO_IN_DATE +/* pushdown */ +#define SPIDER_SQL_MODE_NO_ZERO_IN_DATE +#endif +#ifdef MODE_NO_ZERO_DATE +/* pushdown */ +#define SPIDER_SQL_MODE_NO_ZERO_DATE +#endif +#ifdef MODE_INVALID_DATES +/* pushdown */ +#define SPIDER_SQL_MODE_INVALID_DATES +#endif +#ifdef MODE_ERROR_FOR_DIVISION_BY_ZERO +/* pushdown */ +#define SPIDER_SQL_MODE_ERROR_FOR_DIVISION_BY_ZERO +#endif +#ifdef MODE_TRADITIONAL +/* no pushdown */ +#endif +#ifdef MODE_NO_AUTO_CREATE_USER +/* pushdown */ +#define SPIDER_SQL_MODE_NO_AUTO_CREATE_USER +#endif +#ifdef MODE_HIGH_NOT_PRECEDENCE +/* pushdown */ +#define SPIDER_SQL_MODE_HIGH_NOT_PRECEDENCE +#endif +#ifdef MODE_NO_ENGINE_SUBSTITUTION +/* pushdown */ +#define SPIDER_SQL_MODE_NO_ENGINE_SUBSTITUTION +#endif +#ifdef MODE_PAD_CHAR_TO_FULL_LENGTH +/* pushdown */ +#define SPIDER_SQL_MODE_PAD_CHAR_TO_FULL_LENGTH +#endif +#ifdef MODE_EMPTY_STRING_IS_NULL +/* pushdown */ +#define SPIDER_SQL_MODE_EMPTY_STRING_IS_NULL +#endif +#ifdef MODE_SIMULTANEOUS_ASSIGNMENT +/* pushdown */ +#define SPIDER_SQL_MODE_SIMULTANEOUS_ASSIGNMENT +#endif +#ifdef MODE_TIME_ROUND_FRACTIONAL +/* pushdown */ +#define SPIDER_SQL_MODE_TIME_ROUND_FRACTIONAL +#endif + +sql_mode_t pushdown_sql_mode = +#ifdef SPIDER_SQL_MODE_REAL_AS_FLOAT + MODE_REAL_AS_FLOAT | +#endif +#ifdef SPIDER_SQL_MODE_PIPES_AS_CONCAT + MODE_PIPES_AS_CONCAT | +#endif +#ifdef SPIDER_SQL_MODE_ANSI_QUOTES + MODE_ANSI_QUOTES | +#endif +#ifdef SPIDER_SQL_MODE_IGNORE_SPACE + MODE_IGNORE_SPACE | +#endif +#ifdef SPIDER_SQL_MODE_IGNORE_BAD_TABLE_OPTIONS + MODE_IGNORE_BAD_TABLE_OPTIONS | +#endif +#ifdef SPIDER_SQL_MODE_ONLY_FULL_GROUP_BY + MODE_ONLY_FULL_GROUP_BY | +#endif +#ifdef SPIDER_SQL_MODE_NO_UNSIGNED_SUBTRACTION + MODE_NO_UNSIGNED_SUBTRACTION | +#endif +#ifdef SPIDER_SQL_MODE_NO_DIR_IN_CREATE + MODE_NO_DIR_IN_CREATE | +#endif +#ifdef SPIDER_SQL_MODE_POSTGRESQL + MODE_POSTGRESQL | +#endif +#ifdef SPIDER_SQL_MODE_ORACLE + MODE_ORACLE | +#endif +#ifdef SPIDER_SQL_MODE_MSSQL + MODE_MSSQL | +#endif +#ifdef SPIDER_SQL_MODE_DB2 + MODE_DB2 | +#endif +#ifdef SPIDER_SQL_MODE_MAXDB + MODE_MAXDB | +#endif +#ifdef SPIDER_SQL_MODE_NO_KEY_OPTIONS + MODE_NO_KEY_OPTIONS | +#endif +#ifdef SPIDER_SQL_MODE_NO_TABLE_OPTIONS + MODE_NO_TABLE_OPTIONS | +#endif +#ifdef SPIDER_SQL_MODE_NO_FIELD_OPTIONS + MODE_NO_FIELD_OPTIONS | +#endif +#ifdef SPIDER_SQL_MODE_MYSQL323 + MODE_MYSQL323 | +#endif +#ifdef SPIDER_SQL_MODE_MYSQL40 + MODE_MYSQL40 | +#endif +#ifdef SPIDER_SQL_MODE_ANSI + MODE_ANSI | +#endif +#ifdef SPIDER_SQL_MODE_NO_AUTO_VALUE_ON_ZERO + MODE_NO_AUTO_VALUE_ON_ZERO | +#endif +#ifdef SPIDER_SQL_MODE_NO_BACKSLASH_ESCAPES + MODE_NO_BACKSLASH_ESCAPES | +#endif +#ifdef SPIDER_SQL_MODE_STRICT_TRANS_TABLES + MODE_STRICT_TRANS_TABLES | +#endif +#ifdef SPIDER_SQL_MODE_STRICT_ALL_TABLES + MODE_STRICT_ALL_TABLES | +#endif +#ifdef SPIDER_SQL_MODE_NO_ZERO_IN_DATE + MODE_NO_ZERO_IN_DATE | +#endif +#ifdef SPIDER_SQL_MODE_NO_ZERO_DATE + MODE_NO_ZERO_DATE | +#endif +#ifdef SPIDER_SQL_MODE_INVALID_DATES + MODE_INVALID_DATES | +#endif +#ifdef SPIDER_SQL_MODE_ERROR_FOR_DIVISION_BY_ZERO + MODE_ERROR_FOR_DIVISION_BY_ZERO | +#endif +#ifdef SPIDER_SQL_MODE_TRADITIONAL + MODE_TRADITIONAL | +#endif +#ifdef SPIDER_SQL_MODE_NO_AUTO_CREATE_USER + MODE_NO_AUTO_CREATE_USER | +#endif +#ifdef SPIDER_SQL_MODE_HIGH_NOT_PRECEDENCE + MODE_HIGH_NOT_PRECEDENCE | +#endif +#ifdef SPIDER_SQL_MODE_NO_ENGINE_SUBSTITUTION + MODE_NO_ENGINE_SUBSTITUTION | +#endif +#ifdef SPIDER_SQL_MODE_PAD_CHAR_TO_FULL_LENGTH + MODE_PAD_CHAR_TO_FULL_LENGTH | +#endif +#ifdef SPIDER_SQL_MODE_EMPTY_STRING_IS_NULL + MODE_EMPTY_STRING_IS_NULL | +#endif +#ifdef SPIDER_SQL_MODE_SIMULTANEOUS_ASSIGNMENT + MODE_SIMULTANEOUS_ASSIGNMENT | +#endif +#ifdef SPIDER_SQL_MODE_TIME_ROUND_FRACTIONAL + MODE_TIME_ROUND_FRACTIONAL | +#endif + 0; + +int spider_db_mbase_util::append_sql_mode_internal( + spider_string *str, + sql_mode_t sql_mode +) { + DBUG_ENTER("spider_db_mbase_util::append_sql_mode_internal"); + DBUG_PRINT("info",("spider this=%p", this)); +#ifdef SPIDER_SQL_MODE_REAL_AS_FLOAT + if (sql_mode & MODE_REAL_AS_FLOAT) + { + if (str->reserve(SPIDER_REAL_AS_FLOAT_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_REAL_AS_FLOAT_STR, SPIDER_REAL_AS_FLOAT_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_PIPES_AS_CONCAT + if (sql_mode & MODE_PIPES_AS_CONCAT) + { + if (str->reserve(SPIDER_PIPES_AS_CONCAT_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_PIPES_AS_CONCAT_STR, SPIDER_PIPES_AS_CONCAT_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_ANSI_QUOTES + if (sql_mode & MODE_ANSI_QUOTES) + { + if (str->reserve(SPIDER_ANSI_QUOTES_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_ANSI_QUOTES_STR, SPIDER_ANSI_QUOTES_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_IGNORE_SPACE + if (sql_mode & MODE_IGNORE_SPACE) + { + if (str->reserve(SPIDER_IGNORE_SPACE_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_IGNORE_SPACE_STR, SPIDER_IGNORE_SPACE_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_IGNORE_BAD_TABLE_OPTIONS + if (sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) + { + if (str->reserve(SPIDER_IGNORE_BAD_TABLE_OPTIONS_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_IGNORE_BAD_TABLE_OPTIONS_STR, SPIDER_IGNORE_BAD_TABLE_OPTIONS_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_ONLY_FULL_GROUP_BY + if (sql_mode & MODE_ONLY_FULL_GROUP_BY) + { + if (str->reserve(SPIDER_ONLY_FULL_GROUP_BY_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_ONLY_FULL_GROUP_BY_STR, SPIDER_ONLY_FULL_GROUP_BY_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_UNSIGNED_SUBTRACTION + if (sql_mode & MODE_NO_UNSIGNED_SUBTRACTION) + { + if (str->reserve(SPIDER_NO_UNSIGNED_SUBTRACTION_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_UNSIGNED_SUBTRACTION_STR, SPIDER_NO_UNSIGNED_SUBTRACTION_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_DIR_IN_CREATE + if (sql_mode & MODE_NO_DIR_IN_CREATE) + { + if (str->reserve(SPIDER_NO_DIR_IN_CREATE_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_DIR_IN_CREATE_STR, SPIDER_NO_DIR_IN_CREATE_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_POSTGRESQL + if (sql_mode & MODE_POSTGRESQL) + { + if (str->reserve(SPIDER_POSTGRESQL_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_POSTGRESQL_STR, SPIDER_POSTGRESQL_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_ORACLE + if (sql_mode & MODE_ORACLE) + { + if (str->reserve(SPIDER_ORACLE_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_ORACLE_STR, SPIDER_ORACLE_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_MSSQL + if (sql_mode & MODE_MSSQL) + { + if (str->reserve(SPIDER_MSSQL_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_MSSQL_STR, SPIDER_MSSQL_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_DB2 + if (sql_mode & MODE_DB2) + { + if (str->reserve(SPIDER_DB2_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_DB2_STR, SPIDER_DB2_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_MAXDB + if (sql_mode & MODE_MAXDB) + { + if (str->reserve(SPIDER_MAXDB_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_MAXDB_STR, SPIDER_MAXDB_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_KEY_OPTIONS + if (sql_mode & MODE_NO_KEY_OPTIONS) + { + if (str->reserve(SPIDER_NO_KEY_OPTIONS_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_KEY_OPTIONS_STR, SPIDER_NO_KEY_OPTIONS_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_TABLE_OPTIONS + if (sql_mode & MODE_NO_TABLE_OPTIONS) + { + if (str->reserve(SPIDER_NO_TABLE_OPTIONS_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_TABLE_OPTIONS_STR, SPIDER_NO_TABLE_OPTIONS_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_FIELD_OPTIONS + if (sql_mode & MODE_NO_FIELD_OPTIONS) + { + if (str->reserve(SPIDER_NO_FIELD_OPTIONS_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_FIELD_OPTIONS_STR, SPIDER_NO_FIELD_OPTIONS_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_MYSQL323 + if (sql_mode & MODE_MYSQL323) + { + if (str->reserve(SPIDER_MYSQL323_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_MYSQL323_STR, SPIDER_MYSQL323_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_MYSQL40 + if (sql_mode & MODE_MYSQL40) + { + if (str->reserve(SPIDER_MYSQL40_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_MYSQL40_STR, SPIDER_MYSQL40_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_ANSI + if (sql_mode & MODE_ANSI) + { + if (str->reserve(SPIDER_ANSI_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_ANSI_STR, SPIDER_ANSI_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_AUTO_VALUE_ON_ZERO + if (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) + { + if (str->reserve(SPIDER_NO_AUTO_VALUE_ON_ZERO_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_AUTO_VALUE_ON_ZERO_STR, SPIDER_NO_AUTO_VALUE_ON_ZERO_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_BACKSLASH_ESCAPES + if (sql_mode & MODE_NO_BACKSLASH_ESCAPES) + { + if (str->reserve(SPIDER_NO_BACKSLASH_ESCAPES_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_BACKSLASH_ESCAPES_STR, SPIDER_NO_BACKSLASH_ESCAPES_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_STRICT_TRANS_TABLES + if (sql_mode & MODE_STRICT_TRANS_TABLES) + { + if (str->reserve(SPIDER_STRICT_TRANS_TABLES_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_STRICT_TRANS_TABLES_STR, SPIDER_STRICT_TRANS_TABLES_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_STRICT_ALL_TABLES + if (sql_mode & MODE_STRICT_ALL_TABLES) + { + if (str->reserve(SPIDER_STRICT_ALL_TABLES_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_STRICT_ALL_TABLES_STR, SPIDER_STRICT_ALL_TABLES_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_ZERO_IN_DATE + if (sql_mode & MODE_NO_ZERO_IN_DATE) + { + if (str->reserve(SPIDER_NO_ZERO_IN_DATE_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_ZERO_IN_DATE_STR, SPIDER_NO_ZERO_IN_DATE_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_ZERO_DATE + if (sql_mode & MODE_NO_ZERO_DATE) + { + if (str->reserve(SPIDER_NO_ZERO_DATE_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_ZERO_DATE_STR, SPIDER_NO_ZERO_DATE_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_INVALID_DATES + if (sql_mode & MODE_INVALID_DATES) + { + if (str->reserve(SPIDER_INVALID_DATES_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_INVALID_DATES_STR, SPIDER_INVALID_DATES_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_ERROR_FOR_DIVISION_BY_ZERO + if (sql_mode & MODE_ERROR_FOR_DIVISION_BY_ZERO) + { + if (str->reserve(SPIDER_ERROR_FOR_DIVISION_BY_ZERO_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_ERROR_FOR_DIVISION_BY_ZERO_STR, SPIDER_ERROR_FOR_DIVISION_BY_ZERO_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_TRADITIONAL + if (sql_mode & MODE_TRADITIONAL) + { + if (str->reserve(SPIDER_TRADITIONAL_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_TRADITIONAL_STR, SPIDER_TRADITIONAL_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_AUTO_CREATE_USER + if (sql_mode & MODE_NO_AUTO_CREATE_USER) + { + if (str->reserve(SPIDER_NO_AUTO_CREATE_USER_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_AUTO_CREATE_USER_STR, SPIDER_NO_AUTO_CREATE_USER_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_HIGH_NOT_PRECEDENCE + if (sql_mode & MODE_HIGH_NOT_PRECEDENCE) + { + if (str->reserve(SPIDER_HIGH_NOT_PRECEDENCE_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_HIGH_NOT_PRECEDENCE_STR, SPIDER_HIGH_NOT_PRECEDENCE_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_NO_ENGINE_SUBSTITUTION + if (sql_mode & MODE_NO_ENGINE_SUBSTITUTION) + { + if (str->reserve(SPIDER_NO_ENGINE_SUBSTITUTION_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_NO_ENGINE_SUBSTITUTION_STR, SPIDER_NO_ENGINE_SUBSTITUTION_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_PAD_CHAR_TO_FULL_LENGTH + if (sql_mode & MODE_PAD_CHAR_TO_FULL_LENGTH) + { + if (str->reserve(SPIDER_PAD_CHAR_TO_FULL_LENGTH_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_PAD_CHAR_TO_FULL_LENGTH_STR, SPIDER_PAD_CHAR_TO_FULL_LENGTH_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif + DBUG_RETURN(0); +} + +int spider_db_mariadb_util::append_sql_mode_internal( + spider_string *str, + sql_mode_t sql_mode +) { + int error_num; + DBUG_ENTER("spider_db_mbase_util::append_sql_mode_internal"); + DBUG_PRINT("info",("spider this=%p", this)); + if ((error_num = spider_db_mbase_util::append_sql_mode_internal( + str, sql_mode))) + { + DBUG_RETURN(error_num); + } +#ifdef SPIDER_SQL_MODE_EMPTY_STRING_IS_NULL + if (sql_mode & MODE_EMPTY_STRING_IS_NULL) + { + if (str->reserve(SPIDER_EMPTY_STRING_IS_NULL_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_EMPTY_STRING_IS_NULL_STR, SPIDER_EMPTY_STRING_IS_NULL_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_SIMULTANEOUS_ASSIGNMENT + if (sql_mode & MODE_SIMULTANEOUS_ASSIGNMENT) + { + if (str->reserve(SPIDER_SIMULTANEOUS_ASSIGNMENT_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SIMULTANEOUS_ASSIGNMENT_STR, SPIDER_SIMULTANEOUS_ASSIGNMENT_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif +#ifdef SPIDER_SQL_MODE_TIME_ROUND_FRACTIONAL + if (sql_mode & MODE_TIME_ROUND_FRACTIONAL) + { + if (str->reserve(SPIDER_TIME_ROUND_FRACTIONAL_LEN + SPIDER_SQL_COMMA_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_TIME_ROUND_FRACTIONAL_STR, SPIDER_TIME_ROUND_FRACTIONAL_LEN); + str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN); + } +#endif + DBUG_RETURN(0); +} + +int spider_db_mbase_util::append_sql_mode( + spider_string *str, + sql_mode_t sql_mode +) { + int error_num; + uint length; + DBUG_ENTER("spider_db_mbase_util::append_sql_mode"); + DBUG_PRINT("info",("spider this=%p", this)); + if (str->reserve(SPIDER_SQL_SEMICOLON_LEN + SPIDER_SQL_SQL_MODE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + if (str->length()) + { + str->q_append(SPIDER_SQL_SEMICOLON_STR, SPIDER_SQL_SEMICOLON_LEN); + } + str->q_append(SPIDER_SQL_SQL_MODE_STR, SPIDER_SQL_SQL_MODE_LEN); + length = str->length(); + if ((error_num = append_sql_mode_internal(str, sql_mode))) + { + DBUG_RETURN(error_num); + } + if (str->length() > length) + { + str->length(str->length() - SPIDER_SQL_COMMA_LEN); + } else { + if (str->reserve(SPIDER_SQL_VALUE_QUOTE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + } + str->q_append(SPIDER_SQL_VALUE_QUOTE_STR, SPIDER_SQL_VALUE_QUOTE_LEN); + DBUG_RETURN(0); +} + int spider_db_mbase_util::append_time_zone( spider_string *str, Time_zone *time_zone @@ -14025,7 +15232,7 @@ int spider_mbase_handler::append_list_item_select( )) DBUG_RETURN(HA_ERR_OUT_OF_MEM); str->q_append(SPIDER_SQL_SPACE_STR, SPIDER_SQL_SPACE_LEN); - if ((error_num = spider_db_mbase_utility->append_name(str, + if ((error_num = spider_db_mbase_utility->append_escaped_name(str, item_name, length))) { DBUG_RETURN(error_num); diff --git a/storage/spider/spd_db_mysql.h b/storage/spider/spd_db_mysql.h index 49780e0fa7f..51db3b1f2fa 100644 --- a/storage/spider/spd_db_mysql.h +++ b/storage/spider/spd_db_mysql.h @@ -29,6 +29,17 @@ public: uint name_length, CHARSET_INFO *name_charset ); + int append_escaped_name( + spider_string *str, + const char *name, + uint name_length + ); + int append_escaped_name_with_charset( + spider_string *str, + const char *name, + uint name_length, + CHARSET_INFO *name_charset + ); bool is_name_quote( const char head_code ); @@ -68,6 +79,14 @@ public: spider_string *str, int wait_timeout ); + virtual int append_sql_mode_internal( + spider_string *str, + sql_mode_t sql_mode + ); + int append_sql_mode( + spider_string *str, + sql_mode_t sql_mode + ); int append_time_zone( spider_string *str, Time_zone *time_zone @@ -183,6 +202,13 @@ class spider_db_mysql_util: public spider_db_mbase_util public: spider_db_mysql_util(); ~spider_db_mysql_util(); + int append_column_value( + ha_spider *spider, + spider_string *str, + Field *field, + const uchar *new_ptr, + CHARSET_INFO *access_charset + ); }; class spider_db_mariadb_util: public spider_db_mbase_util @@ -190,6 +216,17 @@ class spider_db_mariadb_util: public spider_db_mbase_util public: spider_db_mariadb_util(); ~spider_db_mariadb_util(); + int append_sql_mode_internal( + spider_string *str, + sql_mode_t sql_mode + ); + int append_column_value( + ha_spider *spider, + spider_string *str, + Field *field, + const uchar *new_ptr, + CHARSET_INFO *access_charset + ); }; class spider_db_mbase_row: public spider_db_row @@ -477,6 +514,11 @@ public: int wait_timeout, int *need_mon ); + bool set_sql_mode_in_bulk_sql(); + int set_sql_mode( + sql_mode_t sql_mode, + int *need_mon + ); bool set_time_zone_in_bulk_sql(); int set_time_zone( Time_zone *time_zone, diff --git a/storage/spider/spd_db_oracle.cc b/storage/spider/spd_db_oracle.cc index 3623af432dd..e56cb31a32c 100644 --- a/storage/spider/spd_db_oracle.cc +++ b/storage/spider/spd_db_oracle.cc @@ -2113,6 +2113,23 @@ int spider_db_oracle::set_wait_timeout( DBUG_RETURN(0); } +bool spider_db_oracle::set_sql_mode_in_bulk_sql() +{ + DBUG_ENTER("spider_db_oracle::set_sql_mode_in_bulk_sql"); + DBUG_PRINT("info",("spider this=%p", this)); + DBUG_RETURN(FALSE); +} + +int spider_db_oracle::set_sql_mode( + sql_mode_t sql_mode, + int *need_mon +) { + DBUG_ENTER("spider_db_oracle::set_sql_mode"); + DBUG_PRINT("info",("spider this=%p", this)); + /* nothing to do */ + DBUG_RETURN(0); +} + bool spider_db_oracle::set_time_zone_in_bulk_sql() { DBUG_ENTER("spider_db_oracle::set_time_zone_in_bulk_sql"); @@ -2528,6 +2545,57 @@ int spider_db_oracle_util::append_name_with_charset( DBUG_RETURN(0); } +int spider_db_oracle_util::append_escaped_name( + spider_string *str, + const char *name, + uint name_length +) { + int error_num; + DBUG_ENTER("spider_db_oracle_util::append_name"); + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN * 2 + name_length * 2)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + if ((error_num = spider_db_append_name_with_quote_str_internal( + str, name, name_length, dbton_id))) + { + DBUG_RETURN(error_num); + } + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + DBUG_RETURN(0); +} + +int spider_db_oracle_util::append_escaped_name_with_charset( + spider_string *str, + const char *name, + uint name_length, + CHARSET_INFO *name_charset +) { + int error_num; + DBUG_ENTER("spider_db_oracle_util::append_name_with_charset"); + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN * 2 + name_length * 2)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + if ((error_num = spider_db_append_name_with_quote_str_internal( + str, name, name_length, name_charset, dbton_id))) + { + DBUG_RETURN(error_num); + } + if (str->reserve(SPIDER_SQL_NAME_QUOTE_LEN)) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + str->q_append(SPIDER_SQL_NAME_QUOTE_STR, SPIDER_SQL_NAME_QUOTE_LEN); + DBUG_RETURN(0); +} + bool spider_db_oracle_util::is_name_quote( const char head_code ) { @@ -2829,6 +2897,16 @@ int spider_db_oracle_util::append_wait_timeout( DBUG_RETURN(0); } +int spider_db_oracle_util::append_sql_mode( + spider_string *str, + sql_mode_t sql_mode +) { + DBUG_ENTER("spider_db_oracle_util::append_sql_mode"); + DBUG_PRINT("info",("spider this=%p", this)); + /* nothing to do */ + DBUG_RETURN(0); +} + int spider_db_oracle_util::append_time_zone( spider_string *str, Time_zone *time_zone @@ -12757,7 +12835,7 @@ int spider_oracle_handler::append_list_item_select( )) DBUG_RETURN(HA_ERR_OUT_OF_MEM); str->q_append(SPIDER_SQL_SPACE_STR, SPIDER_SQL_SPACE_LEN); - if ((error_num = spider_db_oracle_utility.append_name(str, + if ((error_num = spider_db_oracle_utility.append_escaped_name(str, item_name, length))) { DBUG_RETURN(error_num); diff --git a/storage/spider/spd_db_oracle.h b/storage/spider/spd_db_oracle.h index e3f787c514e..a4be417bc67 100644 --- a/storage/spider/spd_db_oracle.h +++ b/storage/spider/spd_db_oracle.h @@ -32,6 +32,17 @@ public: uint name_length, CHARSET_INFO *name_charset ); + int append_escaped_name( + spider_string *str, + const char *name, + uint name_length + ); + int append_escaped_name_with_charset( + spider_string *str, + const char *name, + uint name_length, + CHARSET_INFO *name_charset + ); bool is_name_quote( const char head_code ); @@ -71,6 +82,10 @@ public: spider_string *str, int wait_timeout ); + int append_sql_mode( + spider_string *str, + sql_mode_t sql_mode + ); int append_time_zone( spider_string *str, Time_zone *time_zone @@ -431,6 +446,11 @@ public: int wait_timeout, int *need_mon ); + bool set_sql_mode_in_bulk_sql(); + int set_sql_mode( + sql_mode_t sql_mode, + int *need_mon + ); bool set_time_zone_in_bulk_sql(); int set_time_zone( Time_zone *time_zone, diff --git a/storage/spider/spd_group_by_handler.cc b/storage/spider/spd_group_by_handler.cc index 51cfca23106..1fddad34630 100644 --- a/storage/spider/spd_group_by_handler.cc +++ b/storage/spider/spd_group_by_handler.cc @@ -1640,21 +1640,18 @@ group_by_handler *spider_create_group_by_handler( if (from->table->part_info) { DBUG_PRINT("info",("spider partition handler")); -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) - ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); - DBUG_PRINT("info",("spider part_spec->start_part=%u", part_spec->start_part)); - DBUG_PRINT("info",("spider part_spec->end_part=%u", part_spec->end_part)); - if ( - part_spec->start_part == partition->get_no_current_part_id() || - part_spec->start_part != part_spec->end_part - ) { +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) + partition_info *part_info = from->table->part_info; + uint bits = bitmap_bits_set(&part_info->read_partitions); + DBUG_PRINT("info",("spider bits=%u", bits)); + if (bits != 1) + { DBUG_PRINT("info",("spider using multiple partitions is not supported by this feature yet")); #else DBUG_PRINT("info",("spider partition is not supported by this feature yet")); #endif DBUG_RETURN(NULL); -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif } @@ -1672,17 +1669,18 @@ group_by_handler *spider_create_group_by_handler( /* all tables are const_table */ DBUG_RETURN(NULL); } -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; @@ -1703,17 +1701,18 @@ group_by_handler *spider_create_group_by_handler( { if (from->table->const_table) continue; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; @@ -1741,17 +1740,18 @@ group_by_handler *spider_create_group_by_handler( do { if (from->table->const_table) continue; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; @@ -1889,17 +1889,18 @@ group_by_handler *spider_create_group_by_handler( { from = from->next_local; } -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; @@ -1977,17 +1978,18 @@ group_by_handler *spider_create_group_by_handler( continue; fields->clear_conn_holder_from_conn(); -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; diff --git a/storage/spider/spd_include.h b/storage/spider/spd_include.h index df0ed93935d..8b0581b9bd9 100644 --- a/storage/spider/spd_include.h +++ b/storage/spider/spd_include.h @@ -260,7 +260,7 @@ const char SPIDER_empty_string = ""; #define SPIDER_TMP_SHARE_LONG_COUNT 19 #define SPIDER_TMP_SHARE_LONGLONG_COUNT 3 -#define SPIDER_MEM_CALC_LIST_NUM 265 +#define SPIDER_MEM_CALC_LIST_NUM 268 #define SPIDER_CONN_META_BUF_LEN 64 #define SPIDER_BACKUP_DASTATUS \ @@ -292,6 +292,7 @@ typedef struct st_spider_thread volatile bool killed; volatile bool thd_wait; volatile bool first_free_wait; + volatile bool init_command; pthread_t thread; pthread_cond_t cond; pthread_mutex_t mutex; @@ -450,6 +451,7 @@ typedef struct st_spider_conn int autocommit; int sql_log_off; int wait_timeout; + sql_mode_t sql_mode; THD *thd; void *another_ha_first; void *another_ha_last; @@ -559,6 +561,7 @@ typedef struct st_spider_conn bool queued_wait_timeout; bool queued_autocommit; bool queued_sql_log_off; + bool queued_sql_mode; bool queued_time_zone; bool queued_trx_start; bool queued_xa_start; @@ -572,6 +575,7 @@ typedef struct st_spider_conn int queued_wait_timeout_val; bool queued_autocommit_val; bool queued_sql_log_off_val; + sql_mode_t queued_sql_mode_val; Time_zone *queued_time_zone_val; XID *queued_xa_start_xid; diff --git a/storage/spider/spd_init_query.h b/storage/spider/spd_init_query.h new file mode 100644 index 00000000000..4c58f8d80a4 --- /dev/null +++ b/storage/spider/spd_init_query.h @@ -0,0 +1,782 @@ +/* Copyright (C) 2010-2019 Kentoku Shiba + Copyright (C) 2019 MariaDB corp + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This SQL script creates system tables for SPIDER + or fixes incompatibilities if ones already exist. +*/ + +static LEX_STRING spider_init_queries[] = { + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_xa(" + " format_id int not null default 0," + " gtrid_length int not null default 0," + " bqual_length int not null default 0," + " data char(128) charset binary not null default ''," + " status char(8) not null default ''," + " primary key (data, format_id, gtrid_length)," + " key idx1 (status)" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_xa_member(" + " format_id int not null default 0," + " gtrid_length int not null default 0," + " bqual_length int not null default 0," + " data char(128) charset binary not null default ''," + " scheme char(64) not null default ''," + " host char(64) not null default ''," + " port char(5) not null default ''," + " socket text not null," + " username char(64) not null default ''," + " password char(64) not null default ''," + " ssl_ca text," + " ssl_capath text," + " ssl_cert text," + " ssl_cipher char(64) default null," + " ssl_key text," + " ssl_verify_server_cert tinyint not null default 0," + " default_file text," + " default_group char(64) default null," + " key idx1 (data, format_id, gtrid_length, host)" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_xa_failed_log(" + " format_id int not null default 0," + " gtrid_length int not null default 0," + " bqual_length int not null default 0," + " data char(128) charset binary not null default ''," + " scheme char(64) not null default ''," + " host char(64) not null default ''," + " port char(5) not null default ''," + " socket text not null," + " username char(64) not null default ''," + " password char(64) not null default ''," + " ssl_ca text," + " ssl_capath text," + " ssl_cert text," + " ssl_cipher char(64) default null," + " ssl_key text," + " ssl_verify_server_cert tinyint not null default 0," + " default_file text," + " default_group char(64) default null," + " thread_id int default null," + " status char(8) not null default ''," + " failed_time timestamp not null default current_timestamp," + " key idx1 (data, format_id, gtrid_length, host)" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_tables(" + " db_name char(64) not null default ''," + " table_name char(199) not null default ''," + " link_id int not null default 0," + " priority bigint not null default 0," + " server char(64) default null," + " scheme char(64) default null," + " host char(64) default null," + " port char(5) default null," + " socket text," + " username char(64) default null," + " password char(64) default null," + " ssl_ca text," + " ssl_capath text," + " ssl_cert text," + " ssl_cipher char(64) default null," + " ssl_key text," + " ssl_verify_server_cert tinyint not null default 0," + " monitoring_binlog_pos_at_failing tinyint not null default 0," + " default_file text," + " default_group char(64) default null," + " tgt_db_name char(64) default null," + " tgt_table_name char(64) default null," + " link_status tinyint not null default 1," + " block_status tinyint not null default 0," + " static_link_id char(64) default null," + " primary key (db_name, table_name, link_id)," + " key idx1 (priority)," + " unique key uidx1 (db_name, table_name, static_link_id)" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_link_mon_servers(" + " db_name char(64) not null default ''," + " table_name char(199) not null default ''," + " link_id char(64) not null default ''," + " sid int unsigned not null default 0," + " server char(64) default null," + " scheme char(64) default null," + " host char(64) default null," + " port char(5) default null," + " socket text," + " username char(64) default null," + " password char(64) default null," + " ssl_ca text," + " ssl_capath text," + " ssl_cert text," + " ssl_cipher char(64) default null," + " ssl_key text," + " ssl_verify_server_cert tinyint not null default 0," + " default_file text," + " default_group char(64) default null," + " primary key (db_name, table_name, link_id, sid)" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_link_failed_log(" + " db_name char(64) not null default ''," + " table_name char(199) not null default ''," + " link_id char(64) not null default ''," + " failed_time timestamp not null default current_timestamp" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_table_position_for_recovery(" + " db_name char(64) not null default ''," + " table_name char(199) not null default ''," + " failed_link_id int not null default 0," + " source_link_id int not null default 0," + " file text," + " position text," + " gtid text," + " primary key (db_name, table_name, failed_link_id, source_link_id)" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_table_sts(" + " db_name char(64) not null default ''," + " table_name char(199) not null default ''," + " data_file_length bigint unsigned not null default 0," + " max_data_file_length bigint unsigned not null default 0," + " index_file_length bigint unsigned not null default 0," + " records bigint unsigned not null default 0," + " mean_rec_length bigint unsigned not null default 0," + " check_time datetime not null default '0000-00-00 00:00:00'," + " create_time datetime not null default '0000-00-00 00:00:00'," + " update_time datetime not null default '0000-00-00 00:00:00'," + " checksum bigint unsigned default null," + " primary key (db_name, table_name)" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, + {C_STRING_WITH_LEN( + "create table if not exists mysql.spider_table_crd(" + " db_name char(64) not null default ''," + " table_name char(199) not null default ''," + " key_seq int unsigned not null default 0," + " cardinality bigint not null default 0," + " primary key (db_name, table_name, key_seq)" + ") engine=MyISAM default charset=utf8 collate=utf8_bin" + )}, +/* + If tables already exist and their definition differ + from the latest ones, we fix them here. +*/ + {C_STRING_WITH_LEN( + "drop procedure if exists mysql.spider_fix_one_table" + )}, + {C_STRING_WITH_LEN( + "drop procedure if exists mysql.spider_fix_system_tables" + )}, + {C_STRING_WITH_LEN( + "create procedure mysql.spider_fix_one_table" + " (tab_name char(255) charset utf8 collate utf8_bin," + " test_col_name char(255) charset utf8 collate utf8_bin," + " _sql text charset utf8 collate utf8_bin)" + "begin" + " set @col_exists := 0;" + " select 1 into @col_exists from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = tab_name" + " AND COLUMN_NAME = test_col_name;" + " if @col_exists = 0 then" + " select @stmt := _sql;" + " prepare sp_stmt1 from @stmt;" + " execute sp_stmt1;" + " end if;" + "end;" + )}, + {C_STRING_WITH_LEN( + "create procedure mysql.spider_fix_system_tables()" + "begin" + " select substring_index(substring_index(version(), '-', 2), '-', -1)" + " into @server_name;" + " select substring_index(version(), '.', 1)" + " into @server_major_version;" + " select substring_index(substring_index(version(), '.', 2), '.', -1)" + " into @server_minor_version;" +/* + Fix for 0.5 +*/ + " call mysql.spider_fix_one_table('spider_tables', 'server'," + " 'alter table mysql.spider_tables" + " add server char(64) default null," + " add scheme char(64) default null," + " add host char(64) default null," + " add port char(5) default null," + " add socket char(64) default null," + " add username char(64) default null," + " add password char(64) default null," + " add tgt_db_name char(64) default null," + " add tgt_table_name char(64) default null');" +/* + Fix for version 0.17 +*/ + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_xa'" + " AND COLUMN_NAME = 'data';" + " if @col_type != 'binary(128)' then" + " alter table mysql.spider_xa" + " modify data binary(128) not null default '';" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_xa_member'" + " AND COLUMN_NAME = 'data';" + " if @col_type != 'binary(128)' then" + " alter table mysql.spider_xa_member" + " modify data binary(128) not null default '';" + " end if;" +/* + Fix for version 2.7 +*/ + " call mysql.spider_fix_one_table('spider_tables', 'link_id'," + " 'alter table mysql.spider_tables" + " add column link_id int not null default 0 after table_name," + " drop primary key," + " add primary key (db_name, table_name, link_id)');" +/* + Fix for version 2.8 +*/ + " call mysql.spider_fix_one_table('spider_tables', 'link_status'," + " 'alter table mysql.spider_tables" + " add column link_status tinyint not null default 1');" +/* + Fix for version 2.10 +*/ + " call mysql.spider_fix_one_table('spider_xa_member', 'ssl_ca'," + " 'alter table mysql.spider_xa_member" + " add column ssl_ca char(64) default null after password," + " add column ssl_capath char(64) default null after ssl_ca," + " add column ssl_cert char(64) default null after ssl_capath," + " add column ssl_cipher char(64) default null after ssl_cert," + " add column ssl_key char(64) default null after ssl_cipher," + " add column ssl_verify_server_cert tinyint not null default 0" + " after ssl_key," + " add column default_file char(64) default null" + " after ssl_verify_server_cert," + " add column default_group char(64) default null after default_file');" + " call mysql.spider_fix_one_table('spider_tables', 'ssl_ca'," + " 'alter table mysql.spider_tables" + " add column ssl_ca char(64) default null after password," + " add column ssl_capath char(64) default null after ssl_ca," + " add column ssl_cert char(64) default null after ssl_capath," + " add column ssl_cipher char(64) default null after ssl_cert," + " add column ssl_key char(64) default null after ssl_cipher," + " add column ssl_verify_server_cert tinyint not null default 0" + " after ssl_key," + " add column default_file char(64) default null" + " after ssl_verify_server_cert," + " add column default_group char(64) default null after default_file');" + " call mysql.spider_fix_one_table('spider_link_mon_servers', 'ssl_ca'," + " 'alter table mysql.spider_link_mon_servers" + " add column ssl_ca char(64) default null after password," + " add column ssl_capath char(64) default null after ssl_ca," + " add column ssl_cert char(64) default null after ssl_capath," + " add column ssl_cipher char(64) default null after ssl_cert," + " add column ssl_key char(64) default null after ssl_cipher," + " add column ssl_verify_server_cert tinyint not null default 0" + " after ssl_key," + " add column default_file char(64) default null" + " after ssl_verify_server_cert," + " add column default_group char(64) default null after default_file');" +/* + Fix for version 2.28 +*/ + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_link_mon_servers'" + " AND COLUMN_NAME = 'sid';" + " if @col_type != 'int(10) unsigned' then" + " alter table mysql.spider_link_mon_servers" + " modify sid int unsigned not null default 0;" + " end if;" +/* + Fix for version 3.1 +*/ + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_xa_member'" + " AND COLUMN_NAME = 'socket';" + " if @col_type = 'char(64)' then" + " alter table mysql.spider_xa_member" + " drop primary key," + " add index idx1 (data, format_id, gtrid_length, host)," + " modify socket text not null," + " modify ssl_ca text," + " modify ssl_capath text," + " modify ssl_cert text," + " modify ssl_key text," + " modify default_file text;" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_tables'" + " AND COLUMN_NAME = 'socket';" + " if @col_type = 'char(64)' then" + " alter table mysql.spider_tables" + " modify socket text," + " modify ssl_ca text," + " modify ssl_capath text," + " modify ssl_cert text," + " modify ssl_key text," + " modify default_file text;" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_link_mon_servers'" + " AND COLUMN_NAME = 'socket';" + " if @col_type = 'char(64)' then" + " alter table mysql.spider_link_mon_servers" + " modify socket text," + " modify ssl_ca text," + " modify ssl_capath text," + " modify ssl_cert text," + " modify ssl_key text," + " modify default_file text;" + " end if;" +/* + Fix for version 3.3.0 +*/ + " call mysql.spider_fix_one_table('spider_tables'," + " 'monitoring_binlog_pos_at_failing'," + " 'alter table mysql.spider_tables" + " add monitoring_binlog_pos_at_failing tinyint not null default 0" + " after ssl_verify_server_cert');" +/* + Fix for version 3.3.6 +*/ + " call mysql.spider_fix_one_table('spider_tables', 'block_status'," + " 'alter table mysql.spider_tables" + " add column block_status tinyint not null default 0" + " after link_status');" + " call mysql.spider_fix_one_table('spider_tables', 'static_link_id'," + " 'alter table mysql.spider_tables" + " add column static_link_id char(64) default null after block_status," + " add unique index uidx1 (db_name, table_name, static_link_id)');" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_link_mon_servers'" + " AND COLUMN_NAME = 'link_id';" + " if @col_type != 'char(64)' then" + " alter table mysql.spider_link_mon_servers" + " modify link_id char(64) not null default '';" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_link_failed_log'" + " AND COLUMN_NAME = 'link_id';" + " if @col_type != 'char(64)' then" + " alter table mysql.spider_link_failed_log" + " modify link_id char(64) not null default '';" + " end if;" +/* + Fix for version 3.3.10 +*/ + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_tables'" + " AND COLUMN_NAME = 'table_name';" + " if @col_type != 'char(199)' then" + " alter table mysql.spider_tables" + " modify table_name char(199) not null default '';" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_link_mon_servers'" + " AND COLUMN_NAME = 'table_name';" + " if @col_type != 'char(199)' then" + " alter table mysql.spider_link_mon_servers" + " modify table_name char(199) not null default '';" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_link_failed_log'" + " AND COLUMN_NAME = 'table_name';" + " if @col_type != 'char(199)' then" + " alter table mysql.spider_link_failed_log" + " modify table_name char(199) not null default '';" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_table_position_for_recovery'" + " AND COLUMN_NAME = 'table_name';" + " if @col_type != 'char(199)' then" + " alter table mysql.spider_table_position_for_recovery" + " modify table_name char(199) not null default '';" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_table_sts'" + " AND COLUMN_NAME = 'table_name';" + " if @col_type != 'char(199)' then" + " alter table mysql.spider_table_sts" + " modify table_name char(199) not null default '';" + " end if;" + " select COLUMN_TYPE INTO @col_type from INFORMATION_SCHEMA.COLUMNS" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_table_crd'" + " AND COLUMN_NAME = 'table_name';" + " if @col_type != 'char(199)' then" + " alter table mysql.spider_table_crd" + " modify table_name char(199) not null default '';" + " end if;" +/* + Fix for version 3.3.15 +*/ + " call mysql.spider_fix_one_table('spider_table_sts', 'checksum'," + " 'alter table mysql.spider_table_sts" + " add column checksum bigint unsigned default null after update_time');" +/* + Fix for MariaDB 10.4: Crash-Safe system tables +*/ + " if @server_name = 'MariaDB' and" + " (" + " @server_major_version > 10 or" + " (" + " @server_major_version = 10 and" + " @server_minor_version >= 4" + " )" + " )" + " then" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_link_failed_log';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_link_failed_log" + " engine=Aria transactional=1;" + " end if;" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_link_mon_servers';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_link_mon_servers" + " engine=Aria transactional=1;" + " end if;" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_table_crd';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_table_crd" + " engine=Aria transactional=1;" + " end if;" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_table_position_for_recovery';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_table_position_for_recovery" + " engine=Aria transactional=1;" + " end if;" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_table_sts';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_table_sts" + " engine=Aria transactional=1;" + " end if;" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_tables';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_tables" + " engine=Aria transactional=1;" + " end if;" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_xa';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_xa" + " engine=Aria transactional=1;" + " end if;" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_xa_failed_log';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_xa_failed_log" + " engine=Aria transactional=1;" + " end if;" + " select ENGINE INTO @engine_name from INFORMATION_SCHEMA.TABLES" + " where TABLE_SCHEMA = 'mysql'" + " AND TABLE_NAME = 'spider_xa_member';" + " if @engine_name != 'Aria' then" + " alter table mysql.spider_xa_member" + " engine=Aria transactional=1;" + " end if;" + " end if;" + " if @server_name = 'MariaDB' and" + " (" + " @server_major_version > 10 or" + " (" + " @server_major_version = 10 and" + " @server_minor_version >= 6" + " )" + " )" + " then" + " /* table for ddl pushdown */" + " create table if not exists mysql.spider_rewrite_tables(" + " table_id bigint unsigned not null auto_increment," + " db_name char(64) not null default ''," + " table_name char(64) not null default ''," + " primary key (table_id)," + " unique uk1(db_name, table_name)" + " ) engine=Aria transactional=1 default charset=utf8 collate=utf8_bin;" + " create table if not exists mysql.spider_rewrite_table_tables(" + " table_id bigint unsigned not null," + " partition_id bigint unsigned not null auto_increment," + " partition_method varchar(18) default ''," + " partition_expression varchar(64) default ''," + " subpartition_method varchar(12) default ''," + " subpartition_expression varchar(64) default ''," + " connection_str text not null default ''," + " comment_str text not null default ''," + " primary key (table_id, partition_id)," + " unique uk1(table_id, partition_method, partition_expression," + " subpartition_method, subpartition_expression)" + " ) engine=Aria transactional=1 default charset=utf8 collate=utf8_bin;" + " create table if not exists mysql.spider_rewrite_table_partitions(" + " table_id bigint unsigned not null," + " partition_id bigint unsigned not null," + " partition_ordinal_position bigint unsigned not null auto_increment," + " partition_name varchar(64) not null default ''," + " partition_description varchar(64) not null default ''," + " connection_str text not null default ''," + " comment_str text not null default ''," + " primary key (table_id, partition_id, partition_ordinal_position)," + " unique key uk1 (table_id, partition_id, partition_name)" + " ) engine=Aria transactional=1 default charset=utf8 collate=utf8_bin;" + " create table if not exists mysql.spider_rewrite_table_subpartitions(" + " table_id bigint unsigned not null," + " partition_id bigint unsigned not null," + " partition_ordinal_position bigint unsigned not null," + " subpartition_ordinal_position bigint unsigned not null" + " auto_increment," + " subpartition_name varchar(64) not null default ''," + " subpartition_description varchar(64) not null default ''," + " connection_str text not null default ''," + " comment_str text not null default ''," + " primary key (table_id, partition_id, partition_ordinal_position," + " subpartition_ordinal_position)," + " unique key uk1 (table_id, partition_id, partition_ordinal_position," + " subpartition_name)" + " ) engine=Aria transactional=1 default charset=utf8 collate=utf8_bin;" + " create table if not exists mysql.spider_rewritten_tables(" + " db_name char(64) not null," + " table_name char(64) not null," + " table_id bigint unsigned not null," + " partition_id bigint unsigned not null," + " primary key (db_name, table_name, table_id, partition_id)" + " ) engine=Aria transactional=1 default charset=utf8 collate=utf8_bin;" + " end if;" + "end;" + )}, + {C_STRING_WITH_LEN( + "call mysql.spider_fix_system_tables" + )}, + {C_STRING_WITH_LEN( + "drop procedure mysql.spider_fix_one_table" + )}, + {C_STRING_WITH_LEN( + "drop procedure mysql.spider_fix_system_tables" + )}, +/* + Install a plugin and UDFs +*/ + {C_STRING_WITH_LEN( + "drop procedure if exists mysql.spider_plugin_installer" + )}, + {C_STRING_WITH_LEN( + "create procedure mysql.spider_plugin_installer()" + "begin" + " set @win_plugin := IF(@@version_compile_os like 'Win%', 1, 0);" +/* + Install spider plugin +*/ +/* + " set @have_spider_i_s_plugin := 0;" + " select @have_spider_i_s_plugin := 1 from INFORMATION_SCHEMA.plugins" + " where PLUGIN_NAME = 'SPIDER';" + " set @have_spider_plugin := 0;" + " select @have_spider_plugin := 1 from mysql.plugin" + " where name = 'spider';" + " if @have_spider_i_s_plugin = 0 then" + " if @have_spider_plugin = 1 then" + " / *" + " spider plugin is present in mysql.plugin but not in" + " information_schema.plugins. Remove spider plugin entry" + " in mysql.plugin first." + " * /" + " delete from mysql.plugin where name = 'spider';" + " end if;" + " if @win_plugin = 0 then " + " install plugin spider soname 'ha_spider.so';" + " else" + " install plugin spider soname 'ha_spider.dll';" + " end if;" + " end if;" +*/ +/* + Install spider_alloc_mem plugin +*/ + " set @have_spider_i_s_alloc_mem_plugin := 0;" + " select @have_spider_i_s_alloc_mem_plugin := 1" + " from INFORMATION_SCHEMA.plugins" + " where PLUGIN_NAME = 'SPIDER_ALLOC_MEM';" + " set @have_spider_alloc_mem_plugin := 0;" + " select @have_spider_alloc_mem_plugin := 1 from mysql.plugin" + " where name = 'spider_alloc_mem';" + " if @have_spider_i_s_alloc_mem_plugin = 0 then" + " if @have_spider_alloc_mem_plugin = 1 then" + " /*" + " spider_alloc_mem plugin is present in mysql.plugin but not in" + " information_schema.plugins. Remove spider_alloc_mem plugin entry" + " in mysql.plugin first." + " */" + " delete from mysql.plugin where name = 'spider_alloc_mem';" + " end if;" + " if @win_plugin = 0 then " + " install plugin spider_alloc_mem soname 'ha_spider.so';" + " else" + " install plugin spider_alloc_mem soname 'ha_spider.dll';" + " end if;" + " end if;" + " set @have_spider_direct_sql_udf := 0;" + " select @have_spider_direct_sql_udf := 1 from mysql.func" + " where name = 'spider_direct_sql';" + " if @have_spider_direct_sql_udf = 0 then" + " if @win_plugin = 0 then " + " create function spider_direct_sql returns int" + " soname 'ha_spider.so';" + " else" + " create function spider_direct_sql returns int" + " soname 'ha_spider.dll';" + " end if;" + " end if;" + " set @have_spider_bg_direct_sql_udf := 0;" + " select @have_spider_bg_direct_sql_udf := 1 from mysql.func" + " where name = 'spider_bg_direct_sql';" + " if @have_spider_bg_direct_sql_udf = 0 then" + " if @win_plugin = 0 then " + " create aggregate function spider_bg_direct_sql returns int" + " soname 'ha_spider.so';" + " else" + " create aggregate function spider_bg_direct_sql returns int" + " soname 'ha_spider.dll';" + " end if;" + " end if;" + " set @have_spider_ping_table_udf := 0;" + " select @have_spider_ping_table_udf := 1 from mysql.func" + " where name = 'spider_ping_table';" + " if @have_spider_ping_table_udf = 0 then" + " if @win_plugin = 0 then " + " create function spider_ping_table returns int" + " soname 'ha_spider.so';" + " else" + " create function spider_ping_table returns int" + " soname 'ha_spider.dll';" + " end if;" + " end if;" + " set @have_spider_copy_tables_udf := 0;" + " select @have_spider_copy_tables_udf := 1 from mysql.func" + " where name = 'spider_copy_tables';" + " if @have_spider_copy_tables_udf = 0 then" + " if @win_plugin = 0 then " + " create function spider_copy_tables returns int" + " soname 'ha_spider.so';" + " else" + " create function spider_copy_tables returns int" + " soname 'ha_spider.dll';" + " end if;" + " end if;" + " set @have_spider_flush_table_mon_cache_udf := 0;" + " select @have_spider_flush_table_mon_cache_udf := 1 from mysql.func" + " where name = 'spider_flush_table_mon_cache';" + " if @have_spider_flush_table_mon_cache_udf = 0 then" + " if @win_plugin = 0 then " + " create function spider_flush_table_mon_cache returns int" + " soname 'ha_spider.so';" + " else" + " create function spider_flush_table_mon_cache returns int" + " soname 'ha_spider.dll';" + " end if;" + " end if;" + " if @server_name = 'MariaDB' and" + " (" + " @server_major_version > 10 or" + " (" + " @server_major_version = 10 and" + " @server_minor_version >= 6" + " )" + " )" + " then" +/* + Install spider_rewrite plugin +*/ + " set @have_spider_i_s_rewrite_plugin := 0;" + " select @have_spider_i_s_rewrite_plugin := 1" + " from INFORMATION_SCHEMA.plugins" + " where PLUGIN_NAME = 'SPIDER_REWRITE';" + " set @have_spider_rewrite_plugin := 0;" + " select @have_spider_rewrite_plugin := 1 from mysql.plugin" + " where name = 'spider_rewrite';" + " if @have_spider_i_s_rewrite_plugin = 0 then" + " if @have_spider_rewrite_plugin = 1 then" + " /*" + " spider_rewrite plugin is present in mysql.plugin but not in" + " information_schema.plugins. Remove spider_rewrite plugin entry" + " in mysql.plugin first." + " */" + " delete from mysql.plugin where name = 'spider_rewrite';" + " end if;" + " if @win_plugin = 0 then " + " install plugin spider_rewrite soname 'ha_spider.so';" + " else" + " install plugin spider_rewrite soname 'ha_spider.dll';" + " end if;" + " end if;" + " set @have_spider_flush_rewrite_cache_udf := 0;" + " select @have_spider_flush_rewrite_cache_udf := 1 from mysql.func" + " where name = 'spider_flush_rewrite_cache';" + " if @have_spider_flush_rewrite_cache_udf = 0 then" + " if @win_plugin = 0 then " + " create function spider_flush_rewrite_cache returns int" + " soname 'ha_spider.so';" + " else" + " create function spider_flush_rewrite_cache returns int" + " soname 'ha_spider.dll';" + " end if;" + " end if;" + " end if;" + "end;" + )}, + {C_STRING_WITH_LEN( + "call mysql.spider_plugin_installer" + )}, + {C_STRING_WITH_LEN( + "drop procedure mysql.spider_plugin_installer" + )}, + {C_STRING_WITH_LEN("")} +}; diff --git a/storage/spider/spd_param.cc b/storage/spider/spd_param.cc index a9c9bf26cfb..446ccc22141 100644 --- a/storage/spider/spd_param.cc +++ b/storage/spider/spd_param.cc @@ -3405,6 +3405,26 @@ int spider_param_wait_timeout( DBUG_RETURN(604800); } +/* + FALSE: no sync + TRUE: sync + */ +static MYSQL_THDVAR_BOOL( + sync_sql_mode, /* name */ + PLUGIN_VAR_OPCMDARG, /* opt */ + "Sync sql_mode", /* comment */ + NULL, /* check */ + NULL, /* update */ + TRUE /* def */ +); + +bool spider_param_sync_sql_mode( + THD *thd +) { + DBUG_ENTER("spider_param_sync_sql_mode"); + DBUG_RETURN(THDVAR(thd, sync_sql_mode)); +} + static struct st_mysql_storage_engine spider_storage_engine = { MYSQL_HANDLERTON_INTERFACE_VERSION }; @@ -3557,6 +3577,7 @@ static struct st_mysql_sys_var* spider_system_variables[] = { MYSQL_SYSVAR(slave_trx_isolation), MYSQL_SYSVAR(remote_wait_timeout), MYSQL_SYSVAR(wait_timeout), + MYSQL_SYSVAR(sync_sql_mode), NULL }; diff --git a/storage/spider/spd_param.h b/storage/spider/spd_param.h index 4f436d50bc7..9ffb9e8c278 100644 --- a/storage/spider/spd_param.h +++ b/storage/spider/spd_param.h @@ -428,3 +428,6 @@ int spider_param_remote_wait_timeout( int spider_param_wait_timeout( THD *thd ); +bool spider_param_sync_sql_mode( + THD *thd +); diff --git a/storage/spider/spd_sys_table.cc b/storage/spider/spd_sys_table.cc index 07cd6bf1695..ada48c4982d 100644 --- a/storage/spider/spd_sys_table.cc +++ b/storage/spider/spd_sys_table.cc @@ -229,85 +229,264 @@ TABLE *spider_open_sys_table( } } #endif - if (table_name_length == SPIDER_SYS_XA_TABLE_NAME_LEN) + switch (table_name_length) { - if ( - !memcmp(table_name, - SPIDER_SYS_XA_TABLE_NAME_STR, SPIDER_SYS_XA_TABLE_NAME_LEN) && - table->s->fields != SPIDER_SYS_XA_COL_CNT - ) { - spider_close_sys_table(thd, table, open_tables_backup, need_lock); - table = NULL; - my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, - ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), - SPIDER_SYS_XA_TABLE_NAME_STR); - *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; - goto error_col_num_chk; - } - } else if (table_name_length == SPIDER_SYS_XA_MEMBER_TABLE_NAME_LEN) - { - if ( - !memcmp(table_name, - SPIDER_SYS_XA_MEMBER_TABLE_NAME_STR, - SPIDER_SYS_XA_MEMBER_TABLE_NAME_LEN) && - table->s->fields != SPIDER_SYS_XA_MEMBER_COL_CNT - ) { - spider_close_sys_table(thd, table, open_tables_backup, need_lock); - table = NULL; - my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, - ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), - SPIDER_SYS_XA_MEMBER_TABLE_NAME_STR); - *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; - goto error_col_num_chk; - } - } else if (table_name_length == SPIDER_SYS_TABLES_TABLE_NAME_LEN) - { - if ( - !memcmp(table_name, - SPIDER_SYS_TABLES_TABLE_NAME_STR, - SPIDER_SYS_TABLES_TABLE_NAME_LEN) && - table->s->fields != SPIDER_SYS_TABLES_COL_CNT - ) { - spider_close_sys_table(thd, table, open_tables_backup, need_lock); - table = NULL; - my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, - ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), - SPIDER_SYS_TABLES_TABLE_NAME_STR); - *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; - goto error_col_num_chk; - } - } else if (table_name_length == SPIDER_SYS_LINK_MON_TABLE_NAME_LEN) - { - if ( - !memcmp(table_name, - SPIDER_SYS_LINK_MON_TABLE_NAME_STR, - SPIDER_SYS_LINK_MON_TABLE_NAME_LEN) && - table->s->fields != SPIDER_SYS_LINK_MON_TABLE_COL_CNT - ) { - spider_close_sys_table(thd, table, open_tables_backup, need_lock); - table = NULL; - my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, - ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), - SPIDER_SYS_LINK_MON_TABLE_NAME_STR); - *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; - goto error_col_num_chk; - } - } else if (table_name_length == SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_LEN) - { - if ( - !memcmp(table_name, - SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_STR, - SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_LEN) && - table->s->fields != SPIDER_SYS_POS_FOR_RECOVERY_TABLE_COL_CNT - ) { - spider_close_sys_table(thd, table, open_tables_backup, need_lock); - table = NULL; - my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, - ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), - SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_STR); - *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; - goto error_col_num_chk; - } + case 9: + if (!memcmp(table_name, SPIDER_SYS_XA_TABLE_NAME_STR, + SPIDER_SYS_XA_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_XA")); + if (table->s->fields != SPIDER_SYS_XA_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_XA_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + case 13: + if (!memcmp(table_name, SPIDER_SYS_TABLES_TABLE_NAME_STR, + SPIDER_SYS_TABLES_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_TABLES")); + if (table->s->fields != SPIDER_SYS_TABLES_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_TABLES_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + case 16: + if (!memcmp(table_name, SPIDER_SYS_XA_MEMBER_TABLE_NAME_STR, + SPIDER_SYS_XA_MEMBER_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_XA_MEMBER")); + if (table->s->fields != SPIDER_SYS_XA_MEMBER_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_XA_MEMBER_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + if (!memcmp(table_name, SPIDER_SYS_TABLE_STS_TABLE_NAME_STR, + SPIDER_SYS_TABLE_STS_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_TABLE_STS")); + if (table->s->fields != SPIDER_SYS_TABLE_STS_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_TABLE_STS_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + if (!memcmp(table_name, SPIDER_SYS_TABLE_CRD_TABLE_NAME_STR, + SPIDER_SYS_TABLE_CRD_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_TABLE_CRD")); + if (table->s->fields != SPIDER_SYS_TABLE_CRD_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_TABLE_CRD_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + case 20: + if (!memcmp(table_name, SPIDER_SYS_XA_FAILED_TABLE_NAME_STR, + SPIDER_SYS_XA_FAILED_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_XA_FAILED")); + if (table->s->fields != SPIDER_SYS_XA_FAILED_TABLE_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_XA_FAILED_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + case 21: + if (!memcmp(table_name, SPIDER_SYS_RW_TBLS_TABLE_NAME_STR, + SPIDER_SYS_RW_TBLS_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_RW_TBLS")); + if (table->s->fields != SPIDER_SYS_RW_TBLS_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_RW_TBLS_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + } + DBUG_ASSERT(0); + break; + case 22: + if (!memcmp(table_name, SPIDER_SYS_LINK_FAILED_TABLE_NAME_STR, + SPIDER_SYS_LINK_FAILED_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_LINK_FAILED")); + if (table->s->fields != SPIDER_SYS_LINK_FAILED_TABLE_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_LINK_FAILED_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + case 23: + if (!memcmp(table_name, SPIDER_SYS_LINK_MON_TABLE_NAME_STR, + SPIDER_SYS_LINK_MON_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_LINK_MON")); + if (table->s->fields != SPIDER_SYS_LINK_MON_TABLE_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_LINK_MON_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + if (!memcmp(table_name, SPIDER_SYS_RWN_TBLS_TABLE_NAME_STR, + SPIDER_SYS_RWN_TBLS_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_RWN_TBLS")); + if (table->s->fields != SPIDER_SYS_RWN_TBLS_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_RWN_TBLS_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + case 27: + if (!memcmp(table_name, SPIDER_SYS_RW_TBL_TBLS_TABLE_NAME_STR, + SPIDER_SYS_RW_TBL_TBLS_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_RW_TBL_TBLS")); + if (table->s->fields != SPIDER_SYS_RW_TBL_TBLS_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_RW_TBL_TBLS_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + case 31: + if (!memcmp(table_name, SPIDER_SYS_RW_TBL_PTTS_TABLE_NAME_STR, + SPIDER_SYS_RW_TBL_PTTS_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_RW_TBL_PTTS")); + if (table->s->fields != SPIDER_SYS_RW_TBL_PTTS_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_RW_TBL_PTTS_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + case 34: + if (!memcmp(table_name, SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_STR, + SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_POS_FOR_RECOVERY")); + if (table->s->fields != SPIDER_SYS_POS_FOR_RECOVERY_TABLE_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + if (!memcmp(table_name, SPIDER_SYS_RW_TBL_SPTTS_TABLE_NAME_STR, + SPIDER_SYS_RW_TBL_SPTTS_TABLE_NAME_LEN)) + { + DBUG_PRINT("info",("spider checking for SYS_RW_TBL_SPTTS")); + if (table->s->fields != SPIDER_SYS_RW_TBL_SPTTS_COL_CNT) + { + spider_close_sys_table(thd, table, open_tables_backup, need_lock); + table = NULL; + my_printf_error(ER_SPIDER_SYS_TABLE_VERSION_NUM, + ER_SPIDER_SYS_TABLE_VERSION_STR, MYF(0), + SPIDER_SYS_RW_TBL_SPTTS_TABLE_NAME_STR); + *error_num = ER_SPIDER_SYS_TABLE_VERSION_NUM; + goto error_col_num_chk; + } + break; + } + DBUG_ASSERT(0); + break; + default: + DBUG_ASSERT(0); + break; } DBUG_RETURN(table); diff --git a/storage/spider/spd_sys_table.h b/storage/spider/spd_sys_table.h index 7a28ea20010..df933ec61b8 100644 --- a/storage/spider/spd_sys_table.h +++ b/storage/spider/spd_sys_table.h @@ -1,4 +1,5 @@ -/* Copyright (C) 2008-2018 Kentoku Shiba +/* Copyright (C) 2008-2019 Kentoku Shiba + Copyright (C) 2019 MariaDB corp This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,23 +15,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ #define SPIDER_SYS_XA_TABLE_NAME_STR "spider_xa" -#define SPIDER_SYS_XA_TABLE_NAME_LEN (sizeof(SPIDER_SYS_XA_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_XA_TABLE_NAME_LEN 9 #define SPIDER_SYS_XA_MEMBER_TABLE_NAME_STR "spider_xa_member" -#define SPIDER_SYS_XA_MEMBER_TABLE_NAME_LEN (sizeof(SPIDER_SYS_XA_MEMBER_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_XA_MEMBER_TABLE_NAME_LEN 16 #define SPIDER_SYS_TABLES_TABLE_NAME_STR "spider_tables" -#define SPIDER_SYS_TABLES_TABLE_NAME_LEN (sizeof(SPIDER_SYS_TABLES_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_TABLES_TABLE_NAME_LEN 13 #define SPIDER_SYS_LINK_MON_TABLE_NAME_STR "spider_link_mon_servers" -#define SPIDER_SYS_LINK_MON_TABLE_NAME_LEN (sizeof(SPIDER_SYS_LINK_MON_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_LINK_MON_TABLE_NAME_LEN 23 #define SPIDER_SYS_LINK_FAILED_TABLE_NAME_STR "spider_link_failed_log" -#define SPIDER_SYS_LINK_FAILED_TABLE_NAME_LEN (sizeof(SPIDER_SYS_LINK_FAILED_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_LINK_FAILED_TABLE_NAME_LEN 22 #define SPIDER_SYS_XA_FAILED_TABLE_NAME_STR "spider_xa_failed_log" -#define SPIDER_SYS_XA_FAILED_TABLE_NAME_LEN (sizeof(SPIDER_SYS_XA_FAILED_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_XA_FAILED_TABLE_NAME_LEN 20 #define SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_STR "spider_table_position_for_recovery" -#define SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_LEN (sizeof(SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_POS_FOR_RECOVERY_TABLE_NAME_LEN 34 #define SPIDER_SYS_TABLE_STS_TABLE_NAME_STR "spider_table_sts" -#define SPIDER_SYS_TABLE_STS_TABLE_NAME_LEN (sizeof(SPIDER_SYS_TABLE_STS_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_TABLE_STS_TABLE_NAME_LEN 16 #define SPIDER_SYS_TABLE_CRD_TABLE_NAME_STR "spider_table_crd" -#define SPIDER_SYS_TABLE_CRD_TABLE_NAME_LEN (sizeof(SPIDER_SYS_TABLE_CRD_TABLE_NAME_STR) - 1) +#define SPIDER_SYS_TABLE_CRD_TABLE_NAME_LEN 16 +#define SPIDER_SYS_RW_TBLS_TABLE_NAME_STR "spider_rewrite_tables" +#define SPIDER_SYS_RW_TBLS_TABLE_NAME_LEN 21 +#define SPIDER_SYS_RW_TBL_TBLS_TABLE_NAME_STR "spider_rewrite_table_tables" +#define SPIDER_SYS_RW_TBL_TBLS_TABLE_NAME_LEN 27 +#define SPIDER_SYS_RW_TBL_PTTS_TABLE_NAME_STR "spider_rewrite_table_partitions" +#define SPIDER_SYS_RW_TBL_PTTS_TABLE_NAME_LEN 31 +#define SPIDER_SYS_RW_TBL_SPTTS_TABLE_NAME_STR "spider_rewrite_table_subpartitions" +#define SPIDER_SYS_RW_TBL_SPTTS_TABLE_NAME_LEN 34 +#define SPIDER_SYS_RWN_TBLS_TABLE_NAME_STR "spider_rewritten_tables" +#define SPIDER_SYS_RWN_TBLS_TABLE_NAME_LEN 23 #define SPIDER_SYS_XA_PREPARED_STR "PREPARED" #define SPIDER_SYS_XA_NOT_YET_STR "NOT YET" @@ -47,11 +58,18 @@ #define SPIDER_SYS_TABLES_IDX1_COL_CNT 1 #define SPIDER_SYS_TABLES_UIDX1_COL_CNT 3 #define SPIDER_SYS_LINK_MON_TABLE_COL_CNT 19 +#define SPIDER_SYS_LINK_FAILED_TABLE_COL_CNT 4 +#define SPIDER_SYS_XA_FAILED_TABLE_COL_CNT 21 #define SPIDER_SYS_POS_FOR_RECOVERY_TABLE_COL_CNT 7 -#define SPIDER_SYS_TABLE_STS_COL_CNT 10 +#define SPIDER_SYS_TABLE_STS_COL_CNT 11 #define SPIDER_SYS_TABLE_STS_PK_COL_CNT 2 #define SPIDER_SYS_TABLE_CRD_COL_CNT 4 #define SPIDER_SYS_TABLE_CRD_PK_COL_CNT 3 +#define SPIDER_SYS_RW_TBLS_COL_CNT 3 +#define SPIDER_SYS_RW_TBL_TBLS_COL_CNT 8 +#define SPIDER_SYS_RW_TBL_PTTS_COL_CNT 7 +#define SPIDER_SYS_RW_TBL_SPTTS_COL_CNT 8 +#define SPIDER_SYS_RWN_TBLS_COL_CNT 4 #define SPIDER_SYS_LINK_MON_TABLE_DB_NAME_SIZE 64 #define SPIDER_SYS_LINK_MON_TABLE_TABLE_NAME_SIZE 64 diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index 95972cf4401..ae0e88eb3a8 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -1,4 +1,5 @@ -/* Copyright (C) 2008-2018 Kentoku Shiba +/* Copyright (C) 2008-2019 Kentoku Shiba + Copyright (C) 2019 MariaDB corp This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,6 +30,7 @@ #include "sql_servers.h" #include "sql_select.h" #include "tztime.h" +#include "sql_parse.h" #endif #include "spd_err.h" #include "spd_param.h" @@ -44,6 +46,7 @@ #include "spd_direct_sql.h" #include "spd_malloc.h" #include "spd_group_by_handler.h" +#include "spd_init_query.h" /* Background thread management */ #ifdef SPIDER_HAS_NEXT_THREAD_ID @@ -5028,8 +5031,7 @@ SPIDER_SHARE *spider_get_share( share->init_error = TRUE; share->init_error_time = (time_t) time((time_t*) 0); share->init = TRUE; - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_conn_keys; } if ( @@ -5076,8 +5078,7 @@ SPIDER_SHARE *spider_get_share( share->init_error = TRUE; share->init_error_time = (time_t) time((time_t*) 0); share->init = TRUE; - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } spider->conns[roop_count]->error_mode &= spider->error_mode; } @@ -5095,8 +5096,7 @@ SPIDER_SHARE *spider_get_share( share->init_error = TRUE; share->init_error_time = (time_t) time((time_t*) 0); share->init = TRUE; - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } char *table_name = db + table_share->db.length + 1; memcpy(db, table_share->db.str, table_share->db.length); @@ -5111,16 +5111,14 @@ SPIDER_SHARE *spider_get_share( share->init_error = TRUE; share->init_error_time = (time_t) time((time_t*) 0); share->init = TRUE; - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } else if (search_link_idx == -2) { *error_num = HA_ERR_OUT_OF_MEM; share->init_error = TRUE; share->init_error_time = (time_t) time((time_t*) 0); share->init = TRUE; - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } spider->search_link_idx = search_link_idx; @@ -5177,8 +5175,7 @@ SPIDER_SHARE *spider_get_share( share->init = TRUE; pthread_mutex_unlock(&share->crd_mutex); pthread_mutex_unlock(&share->sts_mutex); - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } } @@ -5187,28 +5184,48 @@ SPIDER_SHARE *spider_get_share( !same_server_link || load_sts_at_startup ) && - spider_get_sts(share, spider->search_link_idx, tmp_time, + (*error_num = spider_get_sts(share, spider->search_link_idx, tmp_time, spider, sts_interval, sts_mode, #ifdef WITH_PARTITION_STORAGE_ENGINE sts_sync, #endif - 1, HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_AUTO) + 1, HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_AUTO)) ) { - thd->clear_error(); + if (*error_num != ER_SPIDER_SYS_TABLE_VERSION_NUM) + { + thd->clear_error(); + } else { + pthread_mutex_unlock(&share->crd_mutex); + pthread_mutex_unlock(&share->sts_mutex); + share->init_error = TRUE; + share->init_error_time = (time_t) time((time_t*) 0); + share->init = TRUE; + goto error_after_alloc_dbton_handler; + } } if ( ( !same_server_link || load_crd_at_startup ) && - spider_get_crd(share, spider->search_link_idx, tmp_time, + (*error_num = spider_get_crd(share, spider->search_link_idx, tmp_time, spider, table, crd_interval, crd_mode, #ifdef WITH_PARTITION_STORAGE_ENGINE crd_sync, #endif - 1) + 1)) ) { - thd->clear_error(); + if (*error_num != ER_SPIDER_SYS_TABLE_VERSION_NUM) + { + thd->clear_error(); + } else { + pthread_mutex_unlock(&share->crd_mutex); + pthread_mutex_unlock(&share->sts_mutex); + share->init_error = TRUE; + share->init_error_time = (time_t) time((time_t*) 0); + share->init = TRUE; + goto error_after_alloc_dbton_handler; + } } pthread_mutex_unlock(&share->crd_mutex); pthread_mutex_unlock(&share->sts_mutex); @@ -5545,7 +5562,7 @@ SPIDER_SHARE *spider_get_share( spider->dbton_handler[dbton_id] = NULL; } } - goto error_but_no_delete; + goto error_after_alloc_conn_keys; } if ( @@ -5589,8 +5606,7 @@ SPIDER_SHARE *spider_get_share( FALSE ); } - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } spider->conns[roop_count]->error_mode &= spider->error_mode; } @@ -5605,8 +5621,7 @@ SPIDER_SHARE *spider_get_share( if (!db) { *error_num = HA_ERR_OUT_OF_MEM; - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } char *table_name = db + table_share->db.length + 1; memcpy(db, table_share->db.str, table_share->db.length); @@ -5618,13 +5633,11 @@ SPIDER_SHARE *spider_get_share( ER_SPIDER_ALL_LINKS_FAILED_STR, MYF(0), db, table_name); my_afree(db); *error_num = ER_SPIDER_ALL_LINKS_FAILED_NUM; - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } else if (search_link_idx == -2) { *error_num = HA_ERR_OUT_OF_MEM; - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } spider->search_link_idx = search_link_idx; @@ -5683,8 +5696,7 @@ SPIDER_SHARE *spider_get_share( spider_init_error_table->init_error_msg, MYF(0)); pthread_mutex_unlock(&share->crd_mutex); pthread_mutex_unlock(&share->sts_mutex); - spider_free_share(share); - goto error_but_no_delete; + goto error_after_alloc_dbton_handler; } } @@ -5693,28 +5705,42 @@ SPIDER_SHARE *spider_get_share( !same_server_link || load_sts_at_startup ) && - spider_get_sts(share, spider->search_link_idx, + (*error_num = spider_get_sts(share, spider->search_link_idx, tmp_time, spider, sts_interval, sts_mode, #ifdef WITH_PARTITION_STORAGE_ENGINE sts_sync, #endif - 1, HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_AUTO) + 1, HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_AUTO)) ) { - thd->clear_error(); + if (*error_num != ER_SPIDER_SYS_TABLE_VERSION_NUM) + { + thd->clear_error(); + } else { + pthread_mutex_unlock(&share->crd_mutex); + pthread_mutex_unlock(&share->sts_mutex); + goto error_after_alloc_dbton_handler; + } } if ( ( !same_server_link || load_crd_at_startup ) && - spider_get_crd(share, spider->search_link_idx, + (*error_num = spider_get_crd(share, spider->search_link_idx, tmp_time, spider, table, crd_interval, crd_mode, #ifdef WITH_PARTITION_STORAGE_ENGINE crd_sync, #endif - 1) + 1)) ) { - thd->clear_error(); + if (*error_num != ER_SPIDER_SYS_TABLE_VERSION_NUM) + { + thd->clear_error(); + } else { + pthread_mutex_unlock(&share->crd_mutex); + pthread_mutex_unlock(&share->sts_mutex); + goto error_after_alloc_dbton_handler; + } } } share->init_error = FALSE; @@ -5727,6 +5753,22 @@ SPIDER_SHARE *spider_get_share( DBUG_PRINT("info",("spider share=%p", share)); DBUG_RETURN(share); +error_after_alloc_dbton_handler: + for (roop_count = 0; roop_count < (int) share->use_dbton_count; ++roop_count) + { + uint dbton_id = share->use_dbton_ids[roop_count]; + if (spider->dbton_handler[dbton_id]) + { + delete spider->dbton_handler[dbton_id]; + spider->dbton_handler[dbton_id] = NULL; + } + } +error_after_alloc_conn_keys: + spider_free(spider_current_trx, spider->conn_keys, MYF(0)); + spider->conn_keys = NULL; + spider_free_share(share); + goto error_but_no_delete; + error_hash_insert: spider_free_share_resource_only(share); error_alloc_share: @@ -6533,6 +6575,14 @@ handler* spider_create_handler( MEM_ROOT *mem_root ) { DBUG_ENTER("spider_create_handler"); +#ifndef WITHOUT_SPIDER_BG_SEARCH + if (unlikely(spider_table_sts_threads[0].init_command)) + { + /* wait for finishing init_command */ + pthread_mutex_lock(&spider_table_sts_threads[0].mutex); + pthread_mutex_unlock(&spider_table_sts_threads[0].mutex); + } +#endif DBUG_RETURN(new (mem_root) ha_spider(hton, table)); } @@ -7262,6 +7312,7 @@ int spider_db_init( NullS)) ) goto error_alloc_mon_mutxes; + spider_table_sts_threads[0].init_command = TRUE; for (roop_count = 0; roop_count < (int) spider_param_table_sts_thread_count(); @@ -7314,6 +7365,29 @@ int spider_db_init( } } +#ifndef WITHOUT_SPIDER_BG_SEARCH + DBUG_PRINT("info",("spider before getting mutex")); + pthread_mutex_lock(&spider_table_sts_threads[0].mutex); + DBUG_PRINT("info",("spider after getting mutex")); + if (spider_table_sts_threads[0].init_command) + { + if (spider_table_sts_threads[0].thd_wait) + { + pthread_cond_signal(&spider_table_sts_threads[0].cond); + } + spider_table_sts_threads[0].first_free_wait = TRUE; + pthread_cond_wait(&spider_table_sts_threads[0].sync_cond, + &spider_table_sts_threads[0].mutex); + spider_table_sts_threads[0].first_free_wait = FALSE; + if (spider_table_sts_threads[0].thd_wait) + { + pthread_cond_signal(&spider_table_sts_threads[0].cond); + } + } + DBUG_PRINT("info",("spider before releasing mutex")); + pthread_mutex_unlock(&spider_table_sts_threads[0].mutex); + DBUG_PRINT("info",("spider after releasing mutex")); +#endif DBUG_RETURN(0); #ifndef WITHOUT_SPIDER_BG_SEARCH @@ -9954,6 +10028,54 @@ void *spider_table_bg_sts_action( trx->thd = thd; /* init end */ + if (thread->init_command) + { + uint i = 0; + tmp_disable_binlog(thd); + thd->security_ctx->skip_grants(); + thd->client_capabilities |= CLIENT_MULTI_RESULTS; + while (spider_init_queries[i + 2].length) + { + dispatch_command(COM_QUERY, thd, spider_init_queries[i].str, + (uint) spider_init_queries[i].length, FALSE, FALSE); + if (unlikely(thd->is_error())) + { + fprintf(stderr, "[ERROR] %s\n", thd->get_stmt_da()->message()); + thd->clear_error(); + break; + } + ++i; + } + DBUG_PRINT("info",("spider first_free_wait=%s", + thread->first_free_wait ? "TRUE" : "FALSE")); + if (!thread->first_free_wait) + { + thread->thd_wait = TRUE; + pthread_cond_wait(&thread->cond, &thread->mutex); + thread->thd_wait = FALSE; + } + DBUG_ASSERT(thread->first_free_wait); + pthread_cond_signal(&thread->sync_cond); + thread->thd_wait = TRUE; + pthread_cond_wait(&thread->cond, &thread->mutex); + thread->thd_wait = FALSE; + while (spider_init_queries[i].length) + { + dispatch_command(COM_QUERY, thd, spider_init_queries[i].str, + (uint) spider_init_queries[i].length, FALSE, FALSE); + if (unlikely(thd->is_error())) + { + fprintf(stderr, "[ERROR] %s\n", thd->get_stmt_da()->message()); + thd->clear_error(); + break; + } + ++i; + } + thd->client_capabilities -= CLIENT_MULTI_RESULTS; + reenable_binlog(thd); + thread->init_command = FALSE; + } + while (TRUE) { DBUG_PRINT("info",("spider bg sts loop start")); diff --git a/storage/spider/spd_trx.cc b/storage/spider/spd_trx.cc index 5cb3131ce13..ad2a35aac15 100644 --- a/storage/spider/spd_trx.cc +++ b/storage/spider/spd_trx.cc @@ -1695,6 +1695,16 @@ int spider_check_and_set_wait_timeout( DBUG_RETURN(0); } +int spider_check_and_set_sql_mode( + THD *thd, + SPIDER_CONN *conn, + int *need_mon +) { + DBUG_ENTER("spider_check_and_set_sql_mode"); + spider_conn_queue_sql_mode(conn, thd->variables.sql_mode); + DBUG_RETURN(0); +} + int spider_check_and_set_time_zone( THD *thd, SPIDER_CONN *conn, @@ -1897,6 +1907,9 @@ int spider_internal_start_trx( &spider->need_mons[link_idx])) || (error_num = spider_check_and_set_wait_timeout(thd, conn, &spider->need_mons[link_idx])) || + (spider_param_sync_sql_mode(thd) && + (error_num = spider_check_and_set_sql_mode(thd, conn, + &spider->need_mons[link_idx]))) || (sync_autocommit && (error_num = spider_check_and_set_autocommit(thd, conn, &spider->need_mons[link_idx]))) diff --git a/storage/spider/spd_trx.h b/storage/spider/spd_trx.h index a4511ba98c4..ca46bae20cc 100644 --- a/storage/spider/spd_trx.h +++ b/storage/spider/spd_trx.h @@ -107,6 +107,12 @@ int spider_check_and_set_wait_timeout( int *need_mon ); +int spider_check_and_set_sql_mode( + THD *thd, + SPIDER_CONN *conn, + int *need_mon +); + int spider_check_and_set_time_zone( THD *thd, SPIDER_CONN *conn, diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake index 2f04a33558a..eac557258ea 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake @@ -134,4 +134,4 @@ static __thread int tlsvar = 0; int main(void) { return tlsvar; }" HAVE_GNU_TLS) ## set TOKUDB_REVISION -set(CMAKE_TOKUDB_REVISION 0 CACHE INTEGER "Revision of tokudb.") +set(CMAKE_TOKUDB_REVISION 0 CACHE INTERNAL "Revision of tokudb.") diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index 2be8556a123..c19a75d680d 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -3714,7 +3714,7 @@ static bool do_unique_checks_fn(THD *thd) { #endif // defined(TOKU_INCLUDE_RFR) && TOKU_INCLUDE_RFR -int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) { +int ha_tokudb::do_uniqueness_checks(const uchar* record, DB_TXN* txn, THD* thd) { int error = 0; // // first do uniqueness checks @@ -3757,7 +3757,7 @@ cleanup: return error; } -void ha_tokudb::test_row_packing(uchar* record, DBT* pk_key, DBT* pk_val) { +void ha_tokudb::test_row_packing(const uchar* record, DBT* pk_key, DBT* pk_val) { int error; DBT row, key; // @@ -3998,7 +3998,7 @@ out: // 0 on success // error otherwise // -int ha_tokudb::write_row(uchar * record) { +int ha_tokudb::write_row(const uchar * record) { TOKUDB_HANDLER_DBUG_ENTER("%p", record); DBT row, prim_key; diff --git a/storage/tokudb/ha_tokudb.h b/storage/tokudb/ha_tokudb.h index c36c93a4c74..f95143a6b4b 100644 --- a/storage/tokudb/ha_tokudb.h +++ b/storage/tokudb/ha_tokudb.h @@ -703,11 +703,11 @@ private: void trace_create_table_info(TABLE* form); int is_index_unique(bool* is_unique, DB_TXN* txn, DB* db, KEY* key_info, int lock_flags); int is_val_unique(bool* is_unique, const uchar* record, KEY* key_info, uint dict_index, DB_TXN* txn); - int do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd); + int do_uniqueness_checks(const uchar* record, DB_TXN* txn, THD* thd); void set_main_dict_put_flags(THD* thd, bool opt_eligible, uint32_t* put_flags); int insert_row_to_main_dictionary(DBT* pk_key, DBT* pk_val, DB_TXN* txn); int insert_rows_to_dictionaries_mult(DBT* pk_key, DBT* pk_val, DB_TXN* txn, THD* thd); - void test_row_packing(uchar* record, DBT* pk_key, DBT* pk_val); + void test_row_packing(const uchar* record, DBT* pk_key, DBT* pk_val); uint32_t fill_row_mutator( uchar* buf, uint32_t* dropped_columns, @@ -785,7 +785,7 @@ public: int rename_table(const char *from, const char *to); int optimize(THD * thd, HA_CHECK_OPT * check_opt); int analyze(THD * thd, HA_CHECK_OPT * check_opt); - int write_row(uchar * buf); + int write_row(const uchar * buf); int update_row(const uchar * old_data, const uchar * new_data); int delete_row(const uchar * buf); #if MYSQL_VERSION_ID >= 100000 |