diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2018-02-15 10:22:03 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2018-02-15 10:22:03 +0200 |
commit | b006d2ead4640f0ab4e29687fd7d24988b1c98f1 (patch) | |
tree | a478984bcd7f4bb2e0fd0496eae77b871077a380 /storage | |
parent | b782971c58b5656820429b8ef3fae5fd82f5a0f7 (diff) | |
parent | dc09f8f29cb2b9fdce7d5d5a623fdc8dcf1814f9 (diff) | |
download | mariadb-git-b006d2ead4640f0ab4e29687fd7d24988b1c98f1.tar.gz |
Merge bb-10.2-ext into 10.3
Diffstat (limited to 'storage')
281 files changed, 7140 insertions, 3270 deletions
diff --git a/storage/connect/myconn.cpp b/storage/connect/myconn.cpp index 28e6f076e77..e99e089d940 100644 --- a/storage/connect/myconn.cpp +++ b/storage/connect/myconn.cpp @@ -933,8 +933,9 @@ PQRYRES MYSQLC::GetResult(PGLOBAL g, bool pdb) crp->Prec = (crp->Type == TYPE_DOUBLE || crp->Type == TYPE_DECIM) ? fld->decimals : 0; - crp->Length = MY_MAX(fld->length, fld->max_length); - crp->Clen = GetTypeSize(crp->Type, crp->Length); + CHARSET_INFO *cs= get_charset(fld->charsetnr, MYF(0)); + crp->Clen = GetTypeSize(crp->Type, fld->length); + crp->Length = fld->length / (cs ? cs->mbmaxlen : 1); uns = (fld->flags & (UNSIGNED_FLAG | ZEROFILL_FLAG)) ? true : false; if (!(crp->Kdata = AllocValBlock(g, NULL, crp->Type, m_Rows, diff --git a/storage/connect/mysql-test/connect/disabled.def b/storage/connect/mysql-test/connect/disabled.def index 6d59369a4df..0dcf030613d 100644 --- a/storage/connect/mysql-test/connect/disabled.def +++ b/storage/connect/mysql-test/connect/disabled.def @@ -19,3 +19,4 @@ json_java_3 : Need MongoDB running and its Java Driver installed mongo_c : Need MongoDB running and its C Driver installed mongo_java_2 : Need MongoDB running and its Java Driver installed mongo_java_3 : Need MongoDB running and its Java Driver installed +tbl_thread : Bug MDEV-9844,10179,14214 03/01/2018 OB Option THREAD removed diff --git a/storage/connect/mysql-test/connect/r/infoschema-9739.result b/storage/connect/mysql-test/connect/r/infoschema-9739.result index 2d54b578521..4840e981552 100644 --- a/storage/connect/mysql-test/connect/r/infoschema-9739.result +++ b/storage/connect/mysql-test/connect/r/infoschema-9739.result @@ -2,7 +2,7 @@ create table t1 (i int) engine=Connect table_type=XML option_list='xmlsup=domdoc Warnings: Warning 1105 No file name. Table will use t1.xml select * from information_schema.tables where table_schema='test' and create_options like '%table_type=XML%'; -TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT +TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT MAX_INDEX_LENGTH TEMPORARY Warnings: Warning 1296 Got error 174 'File t1.xml not found' from CONNECT drop table t1; diff --git a/storage/connect/mysql-test/connect/r/infoschema2-9739.result b/storage/connect/mysql-test/connect/r/infoschema2-9739.result index 0372874862d..32ca77dc71d 100644 --- a/storage/connect/mysql-test/connect/r/infoschema2-9739.result +++ b/storage/connect/mysql-test/connect/r/infoschema2-9739.result @@ -4,7 +4,7 @@ create table t1 (i int) engine=Connect table_type=XML option_list='xmlsup=libxml Warnings: Warning 1105 No file name. Table will use t1.xml select * from information_schema.tables where table_schema='test' and create_options like '%table_type=XML%'; -TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT +TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT MAX_INDEX_LENGTH TEMPORARY Warnings: Warning 1296 Got error 174 'File t1.xml not found' from CONNECT drop table t1; diff --git a/storage/connect/tabtbl.cpp b/storage/connect/tabtbl.cpp index 9b83a1c93a5..aa7cf4e41b4 100644 --- a/storage/connect/tabtbl.cpp +++ b/storage/connect/tabtbl.cpp @@ -1,12 +1,9 @@ /************* TabTbl C++ Program Source Code File (.CPP) **************/ /* PROGRAM NAME: TABTBL */ /* ------------- */ -/* Version 1.8 */ +/* Version 1.9 */ /* */ -/* COPYRIGHT: */ -/* ---------- */ -/* (C) Copyright to PlugDB Software Development 2008-2017 */ -/* Author: Olivier BERTRAND */ +/* Author: Olivier BERTRAND 2008-2018 */ /* */ /* WHAT THIS PROGRAM DOES: */ /* ----------------------- */ @@ -168,9 +165,14 @@ PTDB TBLDEF::GetTable(PGLOBAL g, MODE) { if (Catfunc == FNC_COL) return new(g) TDBTBC(this); - else if (Thread) - return new(g) TDBTBM(this); - else + else if (Thread) { +#if defined(DEVELOPMENT) + return new(g) TDBTBM(this); +#else + strcpy(g->Message, "Option THREAD is no more supported"); + return NULL; +#endif // DEVELOPMENT + } else return new(g) TDBTBL(this); } // end of GetTable @@ -560,6 +562,7 @@ void TBTBLK::ReadColumn(PGLOBAL) } // end of ReadColumn +#if defined(DEVELOPMENT) /* ------------------------- Class TDBTBM ---------------------------- */ /***********************************************************************/ @@ -865,5 +868,6 @@ int TDBTBM::ReadNextRemote(PGLOBAL g) return RC_OK; } // end of ReadNextRemote +#endif // DEVELOPMENT /* ------------------------------------------------------------------- */ diff --git a/storage/connect/tabtbl.h b/storage/connect/tabtbl.h index f02bf620aae..e7a84395787 100644 --- a/storage/connect/tabtbl.h +++ b/storage/connect/tabtbl.h @@ -11,30 +11,9 @@ typedef class TBLDEF *PTBLDEF; typedef class TDBTBL *PTDBTBL; -typedef class TDBTBM *PTDBTBM; typedef class MYSQLC *PMYC; /***********************************************************************/ -/* Defines the structures used for distributed TBM tables. */ -/***********************************************************************/ -typedef struct _TBMtable *PTBMT; - -typedef struct _TBMtable { - PTBMT Next; // Points to next data table struct - PTABLE Tap; // Points to the sub table - PGLOBAL G; // Needed in thread routine - bool Complete; // TRUE when all results are read - bool Ready; // TRUE when results are there - int Rows; // Total number of rows read so far - int ProgCur; // Current pos - int ProgMax; // Max pos - int Rc; // Return code - THD *Thd; - pthread_attr_t attr; // ??? - pthread_t Tid; // CheckOpen thread ID - } TBMT; - -/***********************************************************************/ /* TBL table. */ /***********************************************************************/ class DllExport TBLDEF : public PRXDEF { /* Logical table description */ @@ -123,7 +102,33 @@ class TBTBLK : public TIDBLK { protected: // Must not have additional members - }; // end of class TBTBLK +}; // end of class TBTBLK + +#if defined(DEVELOPMENT) +/***********************************************************************/ +/* This table type is buggy and removed until a fix is found. */ +/***********************************************************************/ +typedef class TDBTBM *PTDBTBM; + +/***********************************************************************/ +/* Defines the structures used for distributed TBM tables. */ +/***********************************************************************/ +typedef struct _TBMtable *PTBMT; + +typedef struct _TBMtable { + PTBMT Next; // Points to next data table struct + PTABLE Tap; // Points to the sub table + PGLOBAL G; // Needed in thread routine + bool Complete; // TRUE when all results are read + bool Ready; // TRUE when results are there + int Rows; // Total number of rows read so far + int ProgCur; // Current pos + int ProgMax; // Max pos + int Rc; // Return code + THD *Thd; + pthread_attr_t attr; // ??? + pthread_t Tid; // CheckOpen thread ID +} TBMT; /***********************************************************************/ /* This is the TBM Access Method class declaration. */ @@ -160,3 +165,4 @@ class DllExport TDBTBM : public TDBTBL { }; // end of class TDBTBM pthread_handler_t ThreadOpen(void *p); +#endif // DEVELOPMENT diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h index 8ae5d8bbc8e..16a1944b172 100644 --- a/storage/federatedx/ha_federatedx.h +++ b/storage/federatedx/ha_federatedx.h @@ -169,7 +169,7 @@ public: static void *operator new(size_t size, MEM_ROOT *mem_root) throw () { return alloc_root(mem_root, size); } static void operator delete(void *ptr, size_t size) - { TRASH(ptr, size); } + { TRASH_FREE(ptr, size); } static void operator delete(void *, MEM_ROOT *) { } diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index a75efb7fb62..b953b1dbb23 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -90,6 +90,15 @@ ha_heap::ha_heap(handlerton *hton, TABLE_SHARE *table_arg) int ha_heap::open(const char *name, int mode, uint test_if_locked) { + if (table->s->reclength < sizeof (char*)) + { + MEM_UNDEFINED(table->s->default_values + table->s->reclength, + sizeof(char*) - table->s->reclength); + table->s->reclength= sizeof(char*); + MEM_UNDEFINED(table->record[0], table->s->reclength); + MEM_UNDEFINED(table->record[1], table->s->reclength); + } + internal_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE); if (internal_table || (!(file= heap_open(name, mode)) && my_errno == ENOENT)) { diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index 64bf09f1342..0a2882220db 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -1,5 +1,5 @@ -# Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2014, 2017, MariaDB Corporation +# Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2014, 2018, MariaDB Corporation. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -191,3 +191,5 @@ IF(MSVC) ENDIF() ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup) + +ADD_DEPENDENCIES(innobase GenError) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 1c98eaa7b5d..7e3bd73c5fb 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3257,21 +3257,17 @@ fail_err: ut_ad(trx_id->len == DATA_TRX_ID_LEN); ut_ad(trx_id[1].len == DATA_ROLL_PTR_LEN); + ut_ad(*static_cast<const byte*> + (trx_id[1].data) & 0x80); if (flags & BTR_NO_UNDO_LOG_FLAG) { ut_ad(!memcmp(trx_id->data, reset_trx_id, DATA_TRX_ID_LEN)); - ut_ad(!memcmp(trx_id[1].data, - reset_trx_id + DATA_TRX_ID_LEN, - DATA_ROLL_PTR_LEN)); } else { ut_ad(thr->graph->trx->id); ut_ad(thr->graph->trx->id == trx_read_trx_id( static_cast<const byte*>( trx_id->data))); - ut_ad(!memcmp(field_ref_zero, trx_id[1].data, - DATA_ROLL_PTR_LEN) - == !(~flags & BTR_NO_UNDO_LOG_FLAG)); } } #endif diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index d75fc41376d..a1457956767 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -687,7 +688,7 @@ buf_buddy_free_low( buf_pool->buddy_stat[i].used--; recombine: - UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i); + UNIV_MEM_ALLOC(buf, BUF_BUDDY_LOW << i); if (i == BUF_BUDDY_SIZES) { buf_buddy_block_free(buf_pool, buf); diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 407408e3584..053f9fbaee1 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -3131,11 +3131,26 @@ buf_pool_clear_hash_index() see the comments in buf0buf.h */ if (!index) { +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + ut_a(!block->n_pointers); +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ continue; } - ut_ad(buf_block_get_state(block) - == BUF_BLOCK_FILE_PAGE); + ut_d(buf_page_state state + = buf_block_get_state(block)); + /* Another thread may have set the + state to BUF_BLOCK_REMOVE_HASH in + buf_LRU_block_remove_hashed(). + + The state change in buf_page_realloc() + is not observable here, because in + that case we would have !block->index. + + In the end, the entire adaptive hash + index will be removed. */ + ut_ad(state == BUF_BLOCK_FILE_PAGE + || state == BUF_BLOCK_REMOVE_HASH); # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG block->n_pointers = 0; # endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ @@ -4578,6 +4593,8 @@ evict_from_pool: buf_block_unfix(fix_block); buf_pool_mutex_exit(buf_pool); rw_lock_x_unlock(&fix_block->lock); + + *err = DB_PAGE_CORRUPTED; return NULL; } } diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 2d68b8e9ccd..cdea41ff191 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -1883,7 +1883,7 @@ buf_LRU_block_free_non_file_page( ut_d(block->page.in_free_list = TRUE); } - UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE); + UNIV_MEM_FREE(block->frame, UNIV_PAGE_SIZE); } /******************************************************************//** diff --git a/storage/innobase/data/data0type.cc b/storage/innobase/data/data0type.cc index 1d354a3a5b4..7c0539b3537 100644 --- a/storage/innobase/data/data0type.cc +++ b/storage/innobase/data/data0type.cc @@ -50,8 +50,10 @@ ulint dtype_get_at_most_n_mbchars( /*========================*/ ulint prtype, /*!< in: precise type */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of - a multi-byte character */ + ulint mbminlen, /*!< in: minimum length of + a multi-byte character, in bytes */ + ulint mbmaxlen, /*!< in: maximum length of + a multi-byte character, in bytes */ ulint prefix_len, /*!< in: length of the requested prefix, in characters, multiplied by dtype_get_mbmaxlen(dtype) */ @@ -59,9 +61,6 @@ dtype_get_at_most_n_mbchars( const char* str) /*!< in: the string whose prefix length is being determined */ { - ulint mbminlen = DATA_MBMINLEN(mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(mbminmaxlen); - ut_a(len_is_stored(data_len)); ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen)); diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 8907a5de7b0..b50e0c81015 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2017, MariaDB Corporation. +Copyright (c) 2016, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1468,8 +1468,6 @@ dict_check_sys_tables( char* filepath = dict_get_first_path(space_id); /* Check that the .ibd file exists. */ - validate = true; /* Encryption */ - dberr_t err = fil_ibd_open( validate, !srv_read_only_mode && srv_log_file_size != 0, @@ -3049,9 +3047,13 @@ err_exit: } } - if (err == DB_SUCCESS && cached && table->is_readable() - && table->supports_instant()) { - err = btr_cur_instant_init(table); + if (err == DB_SUCCESS && cached && table->is_readable()) { + if (table->space && !fil_space_get_size(table->space)) { + table->corrupted = true; + table->file_unreadable = true; + } else if (table->supports_instant()) { + err = btr_cur_instant_init(table); + } } /* Initialize table foreign_child value. Its value could be diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc index 68ccf1b6f57..2e175731d5f 100644 --- a/storage/innobase/dict/dict0mem.cc +++ b/storage/innobase/dict/dict0mem.cc @@ -2,7 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -456,8 +456,8 @@ dict_mem_table_col_rename_low( ut_ad(from_len <= NAME_LEN); ut_ad(to_len <= NAME_LEN); - char from[NAME_LEN]; - strncpy(from, s, NAME_LEN); + char from[NAME_LEN + 1]; + strncpy(from, s, NAME_LEN + 1); if (from_len == to_len) { /* The easy case: simply replace the column name in @@ -684,11 +684,11 @@ dict_mem_fill_column_struct( column->mtype = (unsigned int) mtype; column->prtype = (unsigned int) prtype; column->len = (unsigned int) col_len; + dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen); + column->mbminlen = mbminlen; + column->mbmaxlen = mbmaxlen; column->def_val.data = NULL; column->def_val.len = UNIV_SQL_DEFAULT; - - dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen); - dict_col_set_mbminmaxlen(column, mbminlen, mbmaxlen); } /**********************************************************************//** diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index b93489a10e6..ac0ea7358f6 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -624,6 +624,7 @@ retry: << " is only " << size_bytes << " bytes, should be at least " << min_size; os_file_close(node->handle); + node->handle = OS_FILE_CLOSED; return(false); } @@ -661,10 +662,12 @@ retry: ut_free(buf2); os_file_close(node->handle); + node->handle = OS_FILE_CLOSED; if (!fsp_flags_is_valid(flags, space->id)) { ulint cflags = fsp_flags_convert_from_101(flags); - if (cflags == ULINT_UNDEFINED) { + if (cflags == ULINT_UNDEFINED + || (cflags ^ space->flags) & ~FSP_FLAGS_MEM_MASK) { ib::error() << "Expected tablespace flags " << ib::hex(space->flags) @@ -2109,7 +2112,7 @@ fil_write_flushed_lsn( { byte* buf1; byte* buf; - dberr_t err; + dberr_t err = DB_TABLESPACE_NOT_FOUND; buf1 = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE)); buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE)); @@ -4638,7 +4641,9 @@ fsp_flags_try_adjust(ulint space_id, ulint flags) { ut_ad(!srv_read_only_mode); ut_ad(fsp_flags_is_valid(flags, space_id)); - + if (!fil_space_get_size(space_id)) { + return; + } mtr_t mtr; mtr.start(); if (buf_block_t* b = buf_page_get( diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc index c21c9497735..eaadaad851d 100644 --- a/storage/innobase/fsp/fsp0file.cc +++ b/storage/innobase/fsp/fsp0file.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -770,6 +770,10 @@ the double write buffer. bool Datafile::restore_from_doublewrite() { + if (srv_operation != SRV_OPERATION_NORMAL) { + return true; + } + /* Find if double write buffer contains page_no of given space id. */ const byte* page = recv_sys->dblwr.find_page(m_space_id, 0); const page_id_t page_id(m_space_id, 0); diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc index c459c8296e0..e9f04cbd6c5 100644 --- a/storage/innobase/fsp/fsp0sysspace.cc +++ b/storage/innobase/fsp/fsp0sysspace.cc @@ -567,8 +567,9 @@ SysTablespace::read_lsn_and_check_flags(lsn_t* flushed_lsn) ut_a(it->order() == 0); - - buf_dblwr_init_or_load_pages(it->handle(), it->filepath()); + if (srv_operation == SRV_OPERATION_NORMAL) { + buf_dblwr_init_or_load_pages(it->handle(), it->filepath()); + } /* Check the contents of the first page of the first datafile. */ diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 59488d15750..45be56e1997 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2017, MariaDB Corporation. +Copyright (c) 2016, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2008,7 +2008,7 @@ fts_create_one_index_table( ? DATA_VARCHAR : DATA_VARMYSQL, field->col->prtype, FTS_MAX_WORD_LEN_IN_CHAR - * DATA_MBMAXLEN(field->col->mbminmaxlen)); + * field->col->mbmaxlen); dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, @@ -4567,10 +4567,17 @@ fts_add_token( t_str.f_str = static_cast<byte*>( mem_heap_alloc(heap, t_str.f_len)); - newlen = innobase_fts_casedn_str( - result_doc->charset, - reinterpret_cast<char*>(str.f_str), str.f_len, - reinterpret_cast<char*>(t_str.f_str), t_str.f_len); + /* For binary collations, a case sensitive search is + performed. Hence don't convert to lower case. */ + if (my_binary_compare(result_doc->charset)) { + memcpy(t_str.f_str, str.f_str, str.f_len); + t_str.f_str[str.f_len]= 0; + newlen= str.f_len; + } else { + newlen = innobase_fts_casedn_str( + result_doc->charset, (char*) str.f_str, str.f_len, + (char*) t_str.f_str, t_str.f_len); + } t_str.f_len = newlen; t_str.f_str[newlen] = 0; @@ -7525,8 +7532,7 @@ fts_init_recover_doc( &doc.text.f_len, static_cast<byte*>(dfield_get_data(dfield)), dict_table_page_size(table), len, - static_cast<mem_heap_t*>(doc.self_heap->arg) - ); + static_cast<mem_heap_t*>(doc.self_heap->arg)); } else { doc.text.f_str = static_cast<byte*>( dfield_get_data(dfield)); diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc index 1b03e041871..b9977bd31e5 100644 --- a/storage/innobase/fts/fts0que.cc +++ b/storage/innobase/fts/fts0que.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2017,8 +2017,7 @@ fts_query_fetch_document( if (dfield_is_ext(dfield)) { data = btr_copy_externally_stored_field( &cur_len, data, phrase->page_size, - dfield_get_len(dfield), phrase->heap - ); + dfield_get_len(dfield), phrase->heap); } else { cur_len = dfield_get_len(dfield); } @@ -4030,9 +4029,17 @@ fts_query( lc_query_str_len = query_len * charset->casedn_multiply + 1; lc_query_str = static_cast<byte*>(ut_malloc_nokey(lc_query_str_len)); + /* For binary collations, a case sensitive search is + performed. Hence don't convert to lower case. */ + if (my_binary_compare(charset)) { + memcpy(lc_query_str, query_str, query_len); + lc_query_str[query_len]= 0; + result_len= query_len; + } else { result_len = innobase_fts_casedn_str( - charset, (char*) query_str, query_len, - (char*) lc_query_str, lc_query_str_len); + charset, (char*)( query_str), query_len, + (char*)(lc_query_str), lc_query_str_len); + } ut_ad(result_len < lc_query_str_len); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index e46cf823900..28c2615db83 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -13546,25 +13546,6 @@ innobase_rename_table( error = row_rename_table_for_mysql(norm_from, norm_to, trx, TRUE); - if (error == DB_TABLE_NOT_FOUND) { - /* May be partitioned table, which consists of partitions - named table_name#P#partition_name[#SP#subpartition_name]. - - We are doing a DDL operation. */ - ++trx->will_lock; - trx_start_if_not_started(trx, true); - error = row_rename_partitions_for_mysql(norm_from, norm_to, - trx); - if (error == DB_TABLE_NOT_FOUND) { - ib::error() << "Table " << ut_get_name(trx, norm_from) - << " does not exist in the InnoDB internal" - " data dictionary though MariaDB is trying to" - " rename the table. Have you copied the .frm" - " file of the table to the MariaDB database" - " directory from another database? " - << TROUBLESHOOTING_MSG; - } - } if (error != DB_SUCCESS) { if (error == DB_TABLE_NOT_FOUND && innobase_get_lower_case_table_names() == 1) { @@ -18100,7 +18081,7 @@ innodb_make_page_dirty( return; } - if (srv_saved_page_number_debug > space->size) { + if (srv_saved_page_number_debug >= space->size) { fil_space_release(space); return; } @@ -21631,7 +21612,7 @@ innobase_get_computed_value( if (max_prefix) { len = dtype_get_at_most_n_mbchars( col->m_col.prtype, - col->m_col.mbminmaxlen, + col->m_col.mbminlen, col->m_col.mbmaxlen, max_prefix, field->len, static_cast<char*>(dfield_get_data(field))); @@ -21968,8 +21949,19 @@ innodb_buffer_pool_size_validate( *static_cast<longlong*>(save) = requested_buf_pool_size; + if (srv_buf_pool_size == static_cast<ulint>(intbuf)) { + buf_pool_mutex_exit_all(); + /* nothing to do */ + return(0); + } + if (srv_buf_pool_size == requested_buf_pool_size) { buf_pool_mutex_exit_all(); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "innodb_buffer_pool_size must be at least" + " innodb_buffer_pool_chunk_size=%lu", + srv_buf_pool_chunk_unit); /* nothing to do */ return(0); } diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 0e6d0f5e8ff..5519d8304b9 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -1919,8 +1919,7 @@ innobase_col_to_mysql( #ifdef UNIV_DEBUG case DATA_MYSQL: ut_ad(flen >= len); - ut_ad(DATA_MBMAXLEN(col->mbminmaxlen) - >= DATA_MBMINLEN(col->mbminmaxlen)); + ut_ad(col->mbmaxlen >= col->mbminlen); memcpy(dest, data, len); break; @@ -5255,8 +5254,10 @@ new_clustered_failed: = user_table->indexes.start); DBUG_ASSERT(col->mtype == old_col->mtype); DBUG_ASSERT(col->prtype == old_col->prtype); - DBUG_ASSERT(col->mbminmaxlen - == old_col->mbminmaxlen); + DBUG_ASSERT(col->mbminlen + == old_col->mbminlen); + DBUG_ASSERT(col->mbmaxlen + == old_col->mbmaxlen); DBUG_ASSERT(col->len >= old_col->len); DBUG_ASSERT(old_col->is_instant() == (dict_col_get_clust_pos( diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h index 3ade7746f0f..7e1c362cf8d 100644 --- a/storage/innobase/include/data0type.h +++ b/storage/innobase/include/data0type.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -209,16 +209,7 @@ store the charset-collation number; one byte is left unused, though */ #define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6 /* Maximum multi-byte character length in bytes, plus 1 */ -#define DATA_MBMAX 5 - -/* Pack mbminlen, mbmaxlen to mbminmaxlen. */ -#define DATA_MBMINMAXLEN(mbminlen, mbmaxlen) \ - unsigned((mbmaxlen) * DATA_MBMAX + (mbminlen)) -/* Get mbminlen from mbminmaxlen. */ -#define DATA_MBMINLEN(mbminmaxlen) \ - unsigned(UNIV_EXPECT((mbminmaxlen) % DATA_MBMAX, 1)) -/* Get mbmaxlen from mbminmaxlen. */ -#define DATA_MBMAXLEN(mbminmaxlen) unsigned((mbminmaxlen) / DATA_MBMAX) +#define DATA_MBMAX 8 /* For checking if mtype is GEOMETRY datatype */ #define DATA_GEOMETRY_MTYPE(mtype) ((mtype) == DATA_GEOMETRY) @@ -261,8 +252,10 @@ ulint dtype_get_at_most_n_mbchars( /*========================*/ ulint prtype, /*!< in: precise type */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of - a multi-byte character */ + ulint mbminlen, /*!< in: minimum length of + a multi-byte character, in bytes */ + ulint mbmaxlen, /*!< in: maximum length of + a multi-byte character, in bytes */ ulint prefix_len, /*!< in: length of the requested prefix, in characters, multiplied by dtype_get_mbmaxlen(dtype) */ @@ -405,19 +398,6 @@ ulint dtype_get_mbmaxlen( /*===============*/ const dtype_t* type); /*!< in: type */ -/*********************************************************************//** -Sets the minimum and maximum length of a character, in bytes. */ -UNIV_INLINE -void -dtype_set_mbminmaxlen( -/*==================*/ - dtype_t* type, /*!< in/out: type */ - ulint mbminlen, /*!< in: minimum length of a char, - in bytes, or 0 if this is not - a character type */ - ulint mbmaxlen); /*!< in: maximum length of a char, - in bytes, or 0 if this is not - a character type */ /***********************************************************************//** Returns the size of a fixed size data type, 0 if not a fixed size type. @return fixed size, or 0 */ @@ -428,7 +408,9 @@ dtype_get_fixed_size_low( ulint mtype, /*!< in: main type */ ulint prtype, /*!< in: precise type */ ulint len, /*!< in: length */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of a + ulint mbminlen, /*!< in: minimum length of a + multibyte character, in bytes */ + ulint mbmaxlen, /*!< in: maximum length of a multibyte character, in bytes */ ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */ @@ -442,8 +424,8 @@ dtype_get_min_size_low( ulint mtype, /*!< in: main type */ ulint prtype, /*!< in: precise type */ ulint len, /*!< in: length */ - ulint mbminmaxlen); /*!< in: minimum and maximum length of a - multibyte character */ + ulint mbminlen, /*!< in: minimum length of a character */ + ulint mbmaxlen); /*!< in: maximum length of a character */ /***********************************************************************//** Returns the maximum size of a data type. Note: types in system tables may be incomplete and return incorrect information. @@ -555,11 +537,10 @@ struct dtype_t{ string data (in addition to the string, MySQL uses 1 or 2 bytes to store the string length) */ - unsigned mbminmaxlen:5; /*!< minimum and maximum length of a - character, in bytes; - DATA_MBMINMAXLEN(mbminlen,mbmaxlen); - mbminlen=DATA_MBMINLEN(mbminmaxlen); - mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */ + unsigned mbminlen:3; /*!< minimum length of a character, + in bytes */ + unsigned mbmaxlen:3; /*!< maximum length of a character, + in bytes */ /** @return whether this is system field */ bool vers_sys_field() const { return prtype & DATA_VERSIONED; } diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic index 0af2dcf9fa1..c0b32953cff 100644 --- a/storage/innobase/include/data0type.ic +++ b/storage/innobase/include/data0type.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -101,27 +101,6 @@ dtype_get_mblen( } /*********************************************************************//** -Sets the minimum and maximum length of a character, in bytes. */ -UNIV_INLINE -void -dtype_set_mbminmaxlen( -/*==================*/ - dtype_t* type, /*!< in/out: type */ - ulint mbminlen, /*!< in: minimum length of a char, - in bytes, or 0 if this is not - a character type */ - ulint mbmaxlen) /*!< in: maximum length of a char, - in bytes, or 0 if this is not - a character type */ -{ - ut_ad(mbminlen < DATA_MBMAX); - ut_ad(mbmaxlen < DATA_MBMAX); - ut_ad(mbminlen <= mbmaxlen); - - type->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen); -} - -/*********************************************************************//** Compute the mbminlen and mbmaxlen members of a data type structure. */ UNIV_INLINE void @@ -133,7 +112,8 @@ dtype_set_mblen( ulint mbmaxlen; dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen); - dtype_set_mbminmaxlen(type, mbminlen, mbmaxlen); + type->mbminlen = mbminlen; + type->mbmaxlen = mbmaxlen; ut_ad(dtype_validate(type)); } @@ -225,8 +205,7 @@ dtype_get_mbminlen( /*===============*/ const dtype_t* type) /*!< in: type */ { - ut_ad(type); - return(DATA_MBMINLEN(type->mbminmaxlen)); + return type->mbminlen; } /*********************************************************************//** Gets the maximum length of a character, in bytes. @@ -238,8 +217,7 @@ dtype_get_mbmaxlen( /*===============*/ const dtype_t* type) /*!< in: type */ { - ut_ad(type); - return(DATA_MBMAXLEN(type->mbminmaxlen)); + return type->mbmaxlen; } /**********************************************************************//** @@ -477,8 +455,10 @@ dtype_get_fixed_size_low( ulint mtype, /*!< in: main type */ ulint prtype, /*!< in: precise type */ ulint len, /*!< in: length */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of - a multibyte character, in bytes */ + ulint mbminlen, /*!< in: minimum length of a + multibyte character, in bytes */ + ulint mbmaxlen, /*!< in: maximum length of a + multibyte character, in bytes */ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ { switch (mtype) { @@ -518,11 +498,10 @@ dtype_get_fixed_size_low( dtype_get_charset_coll(prtype), &i_mbminlen, &i_mbmaxlen); - ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen) - == mbminmaxlen); + ut_ad(i_mbminlen == mbminlen); + ut_ad(i_mbmaxlen == mbmaxlen); #endif /* UNIV_DEBUG */ - if (DATA_MBMINLEN(mbminmaxlen) - == DATA_MBMAXLEN(mbminmaxlen)) { + if (mbminlen == mbmaxlen) { return(len); } } @@ -552,8 +531,8 @@ dtype_get_min_size_low( ulint mtype, /*!< in: main type */ ulint prtype, /*!< in: precise type */ ulint len, /*!< in: length */ - ulint mbminmaxlen) /*!< in: minimum and maximum length of a - multi-byte character */ + ulint mbminlen, /*!< in: minimum length of a character */ + ulint mbmaxlen) /*!< in: maximum length of a character */ { switch (mtype) { case DATA_SYS: @@ -583,9 +562,6 @@ dtype_get_min_size_low( if (prtype & DATA_BINARY_TYPE) { return(len); } else { - ulint mbminlen = DATA_MBMINLEN(mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(mbminmaxlen); - if (mbminlen == mbmaxlen) { return(len); } @@ -656,5 +632,5 @@ dtype_get_sql_null_size( ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ { return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len, - type->mbminmaxlen, comp)); + type->mbminlen, type->mbmaxlen, comp)); } diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 74ea8daefc9..4356ee113de 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -2,7 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -182,18 +182,6 @@ dict_col_get_mbmaxlen( const dict_col_t* col) /*!< in: column */ MY_ATTRIBUTE((nonnull, warn_unused_result)); /*********************************************************************//** -Sets the minimum and maximum number of bytes per character. */ -UNIV_INLINE -void -dict_col_set_mbminmaxlen( -/*=====================*/ - dict_col_t* col, /*!< in/out: column */ - ulint mbminlen, /*!< in: minimum multi-byte - character size, in bytes */ - ulint mbmaxlen) /*!< in: minimum multi-byte - character size, in bytes */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** Gets the column data type. */ UNIV_INLINE void diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 44a6d5e5bda..e1c2c71bc0a 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -41,7 +41,7 @@ dict_col_get_mbminlen( /*==================*/ const dict_col_t* col) /*!< in: column */ { - return(DATA_MBMINLEN(col->mbminmaxlen)); + return col->mbminlen; } /*********************************************************************//** Gets the maximum number of bytes per character. @@ -52,25 +52,7 @@ dict_col_get_mbmaxlen( /*==================*/ const dict_col_t* col) /*!< in: column */ { - return(DATA_MBMAXLEN(col->mbminmaxlen)); -} -/*********************************************************************//** -Sets the minimum and maximum number of bytes per character. */ -UNIV_INLINE -void -dict_col_set_mbminmaxlen( -/*=====================*/ - dict_col_t* col, /*!< in/out: column */ - ulint mbminlen, /*!< in: minimum multi-byte - character size, in bytes */ - ulint mbmaxlen) /*!< in: minimum multi-byte - character size, in bytes */ -{ - ut_ad(mbminlen < DATA_MBMAX); - ut_ad(mbmaxlen < DATA_MBMAX); - ut_ad(mbminlen <= mbmaxlen); - - col->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen); + return col->mbmaxlen; } /*********************************************************************//** Gets the column data type. */ @@ -87,7 +69,8 @@ dict_col_copy_type( type->mtype = col->mtype; type->prtype = col->prtype; type->len = col->len; - type->mbminmaxlen = col->mbminmaxlen; + type->mbminlen = col->mbminlen; + type->mbmaxlen = col->mbmaxlen; } #ifdef UNIV_DEBUG @@ -107,7 +90,8 @@ dict_col_type_assert_equal( ut_ad(col->mtype == type->mtype); ut_ad(col->prtype == type->prtype); //ut_ad(col->len == type->len); - ut_ad(col->mbminmaxlen == type->mbminmaxlen); + ut_ad(col->mbminlen == type->mbminlen); + ut_ad(col->mbmaxlen == type->mbmaxlen); return(TRUE); } @@ -123,7 +107,7 @@ dict_col_get_min_size( const dict_col_t* col) /*!< in: column */ { return(dtype_get_min_size_low(col->mtype, col->prtype, col->len, - col->mbminmaxlen)); + col->mbminlen, col->mbmaxlen)); } /***********************************************************************//** Returns the maximum size of the column. @@ -147,7 +131,7 @@ dict_col_get_fixed_size( ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ { return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len, - col->mbminmaxlen, comp)); + col->mbminlen, col->mbmaxlen, comp)); } /***********************************************************************//** Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column. diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 527a9c301a6..428c418629e 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -298,7 +298,7 @@ result in recursive cascading calls. This defines the maximum number of such cascading deletes/updates allowed. When exceeded, the delete from parent table will fail, and user has to drop excessive foreign constraint before proceeds. */ -#define FK_MAX_CASCADE_DEL 255 +#define FK_MAX_CASCADE_DEL 15 /**********************************************************************//** Creates a table memory object. @@ -617,11 +617,10 @@ struct dict_col_t{ the string, MySQL uses 1 or 2 bytes to store the string length) */ - unsigned mbminmaxlen:5; /*!< minimum and maximum length of a - character, in bytes; - DATA_MBMINMAXLEN(mbminlen,mbmaxlen); - mbminlen=DATA_MBMINLEN(mbminmaxlen); - mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */ + unsigned mbminlen:3; /*!< minimum length of a + character, in bytes */ + unsigned mbmaxlen:3; /*!< maximum length of a + character, in bytes */ /*----------------------*/ /* End of definitions copied from dtype_t */ /* @} */ diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h index 1f057be0877..68e9f687fcd 100644 --- a/storage/innobase/include/fsp0file.h +++ b/storage/innobase/include/fsp0file.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -363,7 +364,7 @@ private: @param[in] read_only_mode if true, then readonly mode checks are enforced. @return DB_SUCCESS or DB_IO_ERROR if page cannot be read */ - dberr_t read_first_page(bool read_first_page) + dberr_t read_first_page(bool read_only_mode) MY_ATTRIBUTE((warn_unused_result)); /** Free the first page from memory when it is no longer needed. */ diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic index 9c996b375d5..dbad7cb6950 100644 --- a/storage/innobase/include/mem0mem.ic +++ b/storage/innobase/include/mem0mem.ic @@ -280,8 +280,7 @@ mem_heap_free_heap_top( mem_block_set_free(block, old_top - (byte*) block); ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); - UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top); - UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top); + UNIV_MEM_FREE(old_top, (byte*) block + block->len - old_top); /* If free == start, we may free the block if it is not the first one */ @@ -445,7 +444,6 @@ mem_heap_free_top( /* Subtract the free field of block */ mem_block_set_free(block, mem_block_get_free(block) - MEM_SPACE_NEEDED(n)); - UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n); /* If free == start, we may free the block if it is not the first one */ @@ -454,11 +452,7 @@ mem_heap_free_top( == mem_block_get_start(block))) { mem_heap_block_free(heap, block); } else { - /* Avoid a bogus UNIV_MEM_ASSERT_W() warning in a - subsequent invocation of mem_heap_free_top(). - Originally, this was UNIV_MEM_FREE(), to catch writes - to freed memory. */ - UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n); + UNIV_MEM_FREE((byte*) block + mem_block_get_free(block), n); } } diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 2c527a0980b..6639a3448ea 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -32,7 +32,6 @@ Created 11/26/1995 Heikki Tuuri #include "log0types.h" #include "mtr0types.h" #include "buf0types.h" -#include "trx0types.h" #include "dyn0buf.h" /** Start a mini-transaction. */ @@ -320,7 +319,7 @@ struct mtr_t { the same set of tablespaces as this one */ void set_spaces(const mtr_t& mtr) { - ut_ad(m_impl.m_user_space_id == TRX_SYS_SPACE); + ut_ad(!m_impl.m_user_space_id); ut_ad(!m_impl.m_user_space); ut_ad(!m_impl.m_undo_space); ut_ad(!m_impl.m_sys_space); @@ -337,9 +336,9 @@ struct mtr_t { @return the tablespace */ fil_space_t* set_named_space(ulint space_id) { - ut_ad(m_impl.m_user_space_id == TRX_SYS_SPACE); + ut_ad(!m_impl.m_user_space_id); ut_d(m_impl.m_user_space_id = space_id); - if (space_id == TRX_SYS_SPACE) { + if (!space_id) { return(set_sys_modified()); } else { lookup_user_space(space_id); diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic index 4784f221084..bc9006a66e8 100644 --- a/storage/innobase/include/rem0rec.ic +++ b/storage/innobase/include/rem0rec.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -895,7 +895,7 @@ rec_offs_set_n_alloc( { ut_ad(offsets); ut_ad(n_alloc > REC_OFFS_HEADER_SIZE); - UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets); + UNIV_MEM_ALLOC(offsets, n_alloc * sizeof *offsets); offsets[0] = n_alloc; } diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index 77b8337ac91..61a363d6de8 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -310,6 +310,18 @@ row_create_update_node_for_mysql( /*=============================*/ dict_table_t* table, /*!< in: table to update */ mem_heap_t* heap); /*!< in: mem heap from which allocated */ + +/**********************************************************************//** +Does a cascaded delete or set null in a foreign key operation. +@return error code or DB_SUCCESS */ +dberr_t +row_update_cascade_for_mysql( +/*=========================*/ + que_thr_t* thr, /*!< in: query thread */ + upd_node_t* node, /*!< in: update node used in the cascade + or set null operation */ + dict_table_t* table) /*!< in: table where we do the operation */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*********************************************************************//** Locks the data dictionary exclusively for performing a table create or other data dictionary modification operation. */ @@ -511,18 +523,6 @@ row_rename_table_for_mysql( bool commit) /*!< in: whether to commit trx */ MY_ATTRIBUTE((nonnull, warn_unused_result)); -/** Renames a partitioned table for MySQL. -@param[in] old_name Old table name. -@param[in] new_name New table name. -@param[in,out] trx Transaction. -@return error code or DB_SUCCESS */ -dberr_t -row_rename_partitions_for_mysql( - const char* old_name, - const char* new_name, - trx_t* trx) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - /*********************************************************************//** Scans an index for either COOUNT(*) or CHECK TABLE. If CHECK TABLE; Checks that the index contains entries in an ascending order, diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h index 877b44a5b07..01fc6cda6ae 100644 --- a/storage/innobase/include/row0upd.h +++ b/storage/innobase/include/row0upd.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -32,20 +32,10 @@ Created 12/27/1996 Heikki Tuuri #include "btr0types.h" #include "dict0types.h" #include "trx0types.h" -#include <stack> #include "btr0pcur.h" #include "que0types.h" #include "pars0types.h" -/** The std::deque to store cascade update nodes, that uses mem_heap_t -as allocator. */ -typedef std::deque<upd_node_t*, mem_heap_allocator<upd_node_t*> > - deque_mem_heap_t; - -/** Double-ended queue of update nodes to be processed for cascade -operations */ -typedef deque_mem_heap_t upd_cascade_t; - /*********************************************************************//** Creates an update vector object. @return own: update vector object */ @@ -136,8 +126,7 @@ row_upd_rec_sys_fields( dict_index_t* index, /*!< in: clustered index */ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ const trx_t* trx, /*!< in: transaction */ - roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record, - can be 0 during IMPORT */ + roll_ptr_t roll_ptr);/*!< in: DB_ROLL_PTR to the undo log */ /*********************************************************************//** Sets the trx id or roll ptr field of a clustered index entry. */ void @@ -534,38 +523,12 @@ struct upd_node_t{ dict_foreign_t* foreign;/* NULL or pointer to a foreign key constraint if this update node is used in doing an ON DELETE or ON UPDATE operation */ - - bool cascade_top; - /*!< true if top level in cascade */ - - upd_cascade_t* cascade_upd_nodes; - /*!< Queue of update nodes to handle the - cascade of update and delete operations in an - iterative manner. Their parent/child - relations are properly maintained. All update - nodes point to this same queue. All these - nodes are allocated in heap pointed to by - upd_node_t::cascade_heap. */ - - upd_cascade_t* new_upd_nodes; - /*!< Intermediate list of update nodes in a - cascading update/delete operation. After - processing one update node, this will be - concatenated to cascade_upd_nodes. This extra - list is needed so that retry because of - DB_LOCK_WAIT works corrrectly. */ - - upd_cascade_t* processed_cascades; - /*!< List of processed update nodes in a - cascading update/delete operation. All the - cascade nodes are stored here, so that memory - can be freed. */ - + upd_node_t* cascade_node;/* NULL or an update node template which + is used to implement ON DELETE/UPDATE CASCADE + or ... SET NULL for foreign keys */ mem_heap_t* cascade_heap; - /*!< NULL or a mem heap where cascade_upd_nodes - are created. This heap is owned by the node - that has cascade_top=true. */ - + /*!< NULL or a mem heap where cascade + node is created.*/ sel_node_t* select; /*!< query graph subtree implementing a base table cursor: the rows returned will be updated */ @@ -612,25 +575,8 @@ struct upd_node_t{ sym_node_t* table_sym;/* table node in symbol table */ que_node_t* col_assign_list; /* column assignment list */ - - doc_id_t fts_doc_id; - /* The FTS doc id of the row that is now - pointed to by the pcur. */ - - doc_id_t fts_next_doc_id; - /* The new fts doc id that will be used - in update operation */ - ulint magic_n; -#ifndef DBUG_OFF - /** Print information about this object into the trace log file. */ - void dbug_trace(); - - /** Ensure that the member cascade_upd_nodes has only one update node - for each of the tables. This is useful for testing purposes. */ - void check_cascade_only_once(); -#endif /* !DBUG_OFF */ }; #define UPD_NODE_MAGIC_N 1579975 diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic index 11271d6e9af..364c876ecc7 100644 --- a/storage/innobase/include/row0upd.ic +++ b/storage/innobase/include/row0upd.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -165,8 +165,7 @@ row_upd_rec_sys_fields( dict_index_t* index, /*!< in: clustered index */ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ const trx_t* trx, /*!< in: transaction */ - roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record, - can be 0 during IMPORT */ + roll_ptr_t roll_ptr)/*!< in: DB_ROLL_PTR to the undo log */ { ut_ad(dict_index_is_clust(index)); ut_ad(rec_offs_validate(rec, index, offsets)); diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 80c0b5476b0..ba1256feac2 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -41,7 +41,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 7 -#define INNODB_VERSION_BUGFIX 20 +#define INNODB_VERSION_BUGFIX 21 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -647,14 +647,6 @@ typedef void* os_thread_ret_t; # define UNIV_MEM_ASSERT_W(addr, size) do {} while(0) # define UNIV_MEM_TRASH(addr, c, size) do {} while(0) #endif -#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \ - UNIV_MEM_ASSERT_W(addr, size); \ - UNIV_MEM_FREE(addr, size); \ -} while (0) -#define UNIV_MEM_ASSERT_AND_ALLOC(addr, size) do { \ - UNIV_MEM_ASSERT_W(addr, size); \ - UNIV_MEM_ALLOC(addr, size); \ -} while (0) extern ulong srv_page_size_shift; extern ulong srv_page_size; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index e823158acf1..b45e7d77dfb 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2014, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -2565,7 +2565,10 @@ RecLock::lock_add_priority( lock_t* grant_position = NULL; lock_t* add_position = NULL; - HASH_SEARCH(hash, lock_sys->rec_hash, m_rec_id.fold(), lock_t*, + /* Different lock (such as predicate lock) are on different hash */ + hash_table_t* lock_hash = lock_hash_get(m_mode); + + HASH_SEARCH(hash, lock_hash, m_rec_id.fold(), lock_t*, lock_head, ut_ad(lock_head->is_record_lock()), true); ut_ad(lock_head); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 7c6369e57c2..59f27709586 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -3079,7 +3079,9 @@ recv_init_crash_recovery_spaces() << "', but there were no modifications either."; } - buf_dblwr_process(); + if (srv_operation == SRV_OPERATION_NORMAL) { + buf_dblwr_process(); + } if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { /* Spawn the background thread to flush dirty pages diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc index 0d655cdd6c8..b4a4e8eec4a 100644 --- a/storage/innobase/mem/mem0mem.cc +++ b/storage/innobase/mem/mem0mem.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -407,15 +407,11 @@ mem_heap_block_free( len = block->len; block->magic_n = MEM_FREED_BLOCK_MAGIC_N; - UNIV_MEM_ASSERT_W(block, len); - if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) { - ut_ad(!buf_block); ut_free(block); } else { ut_ad(type & MEM_HEAP_BUFFER); - buf_block_free(buf_block); } } diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index b5437e88ea3..942b512206f 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -4995,7 +4995,7 @@ os_file_write_func( if ((ulint) n_bytes != n && !os_has_said_disk_full) { ib::error() - << "Write to file " << name << "failed at offset " + << "Write to file " << name << " failed at offset " << offset << ", " << n << " bytes should have been written," " only " << n_bytes << " were written." diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index f28c6355b16..4ef37b81c29 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -2,7 +2,7 @@ Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2017, MariaDB Corporation. +Copyright (c) 2014, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3714,8 +3714,9 @@ page_zip_write_rec( /* Copy the delete mark. */ if (rec_get_deleted_flag(rec, TRUE)) { /* In delete-marked records, DB_TRX_ID must - always refer to an existing undo log record. */ - ut_ad(!dict_index_is_clust(index) + always refer to an existing undo log record. + On non-leaf pages, the delete-mark flag is garbage. */ + ut_ad(!index->is_primary() || !page_is_leaf(page) || row_get_rec_trx_id(rec, index, offsets)); *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8; } else { diff --git a/storage/innobase/que/que0que.cc b/storage/innobase/que/que0que.cc index a45ac188f2f..47c41519b40 100644 --- a/storage/innobase/que/que0que.cc +++ b/storage/innobase/que/que0que.cc @@ -482,20 +482,17 @@ que_graph_free_recursive( case QUE_NODE_UPDATE: upd = static_cast<upd_node_t*>(node); - DBUG_PRINT("que_graph_free_recursive", - ("QUE_NODE_UPDATE: %p, processed_cascades: %p", - upd, upd->processed_cascades)); - if (upd->in_mysql_interface) { btr_pcur_free_for_mysql(upd->pcur); upd->in_mysql_interface = FALSE; } - if (upd->cascade_top) { + que_graph_free_recursive(upd->cascade_node); + + if (upd->cascade_heap) { mem_heap_free(upd->cascade_heap); upd->cascade_heap = NULL; - upd->cascade_top = false; } que_graph_free_recursive(upd->select); diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc index 142aff4270c..dcc435c091f 100644 --- a/storage/innobase/rem/rem0rec.cc +++ b/storage/innobase/rem/rem0rec.cc @@ -1154,21 +1154,18 @@ rec_get_converted_size_comp_prefix_low( if (fixed_len) { #ifdef UNIV_DEBUG - ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen); - ut_ad(len <= fixed_len); if (dict_index_is_spatial(index)) { ut_ad(type->mtype == DATA_SYS_CHILD - || !mbmaxlen - || len >= mbminlen * (fixed_len - / mbmaxlen)); + || !col->mbmaxlen + || len >= col->mbminlen + * fixed_len / col->mbmaxlen); } else { ut_ad(type->mtype != DATA_SYS_CHILD); - ut_ad(!mbmaxlen - || len >= mbminlen * (fixed_len - / mbmaxlen)); + ut_ad(!col->mbmaxlen + || len >= col->mbminlen + * fixed_len / col->mbmaxlen); } /* dict_index_add_col() should guarantee this */ @@ -1567,15 +1564,11 @@ rec_convert_dtuple_to_rec_comp( 0..127. The length will be encoded in two bytes when it is 128 or more, or when the field is stored externally. */ if (fixed_len) { -#ifdef UNIV_DEBUG - ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen); - ut_ad(len <= fixed_len); - ut_ad(!mbmaxlen || len >= mbminlen - * (fixed_len / mbmaxlen)); + ut_ad(!col->mbmaxlen + || len >= col->mbminlen + * fixed_len / col->mbmaxlen); ut_ad(!dfield_is_ext(field)); -#endif /* UNIV_DEBUG */ } else if (dfield_is_ext(field)) { ut_ad(DATA_BIG_COL(col)); ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc index 7ebf7af5dbf..b467f9d1615 100644 --- a/storage/innobase/row/row0ftsort.cc +++ b/storage/innobase/row/row0ftsort.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. +Copyright (c) 2015, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -108,8 +108,9 @@ row_merge_create_fts_sort_index( field->col->prtype = idx_field->col->prtype | DATA_NOT_NULL; field->col->mtype = charset == &my_charset_latin1 ? DATA_VARCHAR : DATA_VARMYSQL; - field->col->mbminmaxlen = idx_field->col->mbminmaxlen; - field->col->len = HA_FT_MAXCHARLEN * DATA_MBMAXLEN(field->col->mbminmaxlen); + field->col->mbminlen = idx_field->col->mbminlen; + field->col->mbmaxlen = idx_field->col->mbmaxlen; + field->col->len = HA_FT_MAXCHARLEN * field->col->mbmaxlen; field->fixed_len = 0; @@ -152,7 +153,8 @@ row_merge_create_fts_sort_index( field->col->prtype = DATA_NOT_NULL | DATA_BINARY_TYPE; - field->col->mbminmaxlen = 0; + field->col->mbminlen = 0; + field->col->mbmaxlen = 0; /* The third field is on the word's position in the original doc */ field = dict_index_get_nth_field(new_index, 2); @@ -164,7 +166,8 @@ row_merge_create_fts_sort_index( field->col->len = 4 ; field->fixed_len = 4; field->col->prtype = DATA_NOT_NULL; - field->col->mbminmaxlen = 0; + field->col->mbminlen = 0; + field->col->mbmaxlen = 0; return(new_index); } @@ -658,7 +661,8 @@ row_merge_fts_doc_tokenize( field->type.mtype = DATA_INT; field->type.prtype = DATA_NOT_NULL | DATA_BINARY_TYPE; field->type.len = len; - field->type.mbminmaxlen = 0; + field->type.mbminlen = 0; + field->type.mbmaxlen = 0; cur_len += len; dfield_dup(field, buf->heap); @@ -681,7 +685,8 @@ row_merge_fts_doc_tokenize( field->type.mtype = DATA_INT; field->type.prtype = DATA_NOT_NULL; field->type.len = len; - field->type.mbminmaxlen = 0; + field->type.mbminlen = 0; + field->type.mbmaxlen = 0; cur_len += len; dfield_dup(field, buf->heap); diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index c2bf5e15036..6caffb8f0ab 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1200,7 +1200,8 @@ row_import::match_table_columns( err = DB_ERROR; } - if (cfg_col->mbminmaxlen != col->mbminmaxlen) { + if (cfg_col->mbminlen != col->mbminlen + || cfg_col->mbmaxlen != col->mbmaxlen) { ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH, @@ -2809,7 +2810,9 @@ row_import_read_columns( col->len = mach_read_from_4(ptr); ptr += sizeof(ib_uint32_t); - col->mbminmaxlen = mach_read_from_4(ptr); + ulint mbminmaxlen = mach_read_from_4(ptr); + col->mbmaxlen = mbminmaxlen / 5; + col->mbminlen = mbminmaxlen % 5; ptr += sizeof(ib_uint32_t); col->ind = mach_read_from_4(ptr); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index f4bac326359..49a05717f82 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -151,9 +151,11 @@ row_ins_alloc_sys_fields( compile_time_assert(DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN == sizeof node->sys_buf); - memset(node->sys_buf, 0, DATA_ROW_ID_LEN); - memcpy(node->sys_buf + DATA_ROW_ID_LEN, reset_trx_id, - sizeof reset_trx_id); + memset(node->sys_buf, 0, sizeof node->sys_buf); + /* Assign DB_ROLL_PTR to 1 << ROLL_PTR_INSERT_FLAG_POS */ + node->sys_buf[DATA_ROW_ID_LEN + DATA_TRX_ID_LEN] = 0x80; + ut_ad(!memcmp(node->sys_buf + DATA_ROW_ID_LEN, reset_trx_id, + sizeof reset_trx_id)); /* 1. Populate row-id */ col = dict_table_get_sys_col(table, DATA_ROW_ID); @@ -459,12 +461,9 @@ row_ins_cascade_n_ancestors( /******************************************************************//** Calculates the update vector node->cascade->update for a child table in a cascaded update. -@return number of fields in the calculated update vector; the value -can also be 0 if no foreign key fields changed; the returned value is -ULINT_UNDEFINED if the column type in the child table is too short to -fit the new value in the parent table: that means the update fails */ +@return whether any FULLTEXT INDEX is affected */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ulint +bool row_ins_cascade_calc_update_vec( /*============================*/ upd_node_t* node, /*!< in: update node of the parent @@ -473,11 +472,9 @@ row_ins_cascade_calc_update_vec( type is != 0 */ mem_heap_t* heap, /*!< in: memory heap to use as temporary storage */ - trx_t* trx, /*!< in: update transaction */ - ibool* fts_col_affected, - /*!< out: is FTS column affected */ - upd_node_t* cascade) /*!< in: cascade update node */ + trx_t* trx) /*!< in: update transaction */ { + upd_node_t* cascade = node->cascade_node; dict_table_t* table = foreign->foreign_table; dict_index_t* index = foreign->foreign_index; upd_t* update; @@ -488,7 +485,7 @@ row_ins_cascade_calc_update_vec( ulint parent_field_no; ulint i; ulint j; - ibool doc_id_updated = FALSE; + bool doc_id_updated = false; ulint doc_id_pos = 0; doc_id_t new_doc_id = FTS_NULL_DOC_ID; ulint prefix_col; @@ -515,7 +512,7 @@ row_ins_cascade_calc_update_vec( n_fields_updated = 0; - *fts_col_affected = FALSE; + bool affects_fulltext = false; if (table->fts) { doc_id_pos = dict_table_get_nth_col_pos( @@ -567,8 +564,7 @@ row_ins_cascade_calc_update_vec( if (dfield_is_null(&ufield->new_val) && (col->prtype & DATA_NOT_NULL)) { - - return(ULINT_UNDEFINED); + goto err_exit; } /* If the new value would not fit in the @@ -576,15 +572,15 @@ row_ins_cascade_calc_update_vec( if (!dfield_is_null(&ufield->new_val) && dtype_get_at_most_n_mbchars( - col->prtype, col->mbminmaxlen, + col->prtype, + col->mbminlen, col->mbmaxlen, col->len, ufield_len, static_cast<char*>( dfield_get_data( &ufield->new_val))) < ufield_len) { - - return(ULINT_UNDEFINED); + goto err_exit; } /* If the parent column type has a different @@ -628,7 +624,7 @@ row_ins_cascade_calc_update_vec( col->prtype) == DATA_MYSQL_BINARY_CHARSET_COLL) { /* Do not pad BINARY columns */ - return(ULINT_UNDEFINED); + goto err_exit; } row_mysql_pad_col(mbminlen, @@ -645,7 +641,7 @@ row_ins_cascade_calc_update_vec( dict_col_get_no(col), dict_col_is_virtual(col)) != ULINT_UNDEFINED) { - *fts_col_affected = TRUE; + affects_fulltext = true; } /* If Doc ID is updated, check whether the @@ -662,11 +658,14 @@ row_ins_cascade_calc_update_vec( dfield_get_data( &ufield->new_val))); + affects_fulltext = true; + doc_id_updated = true; + if (new_doc_id <= 0) { ib::error() << "FTS Doc ID" " must be larger than" " 0"; - return(ULINT_UNDEFINED); + goto err_exit; } if (new_doc_id < n_doc_id) { @@ -675,12 +674,8 @@ row_ins_cascade_calc_update_vec( << n_doc_id - 1 << " for table " << table->name; - - return(ULINT_UNDEFINED); + goto err_exit; } - - *fts_col_affected = TRUE; - doc_id_updated = TRUE; } n_fields_updated++; @@ -688,8 +683,9 @@ row_ins_cascade_calc_update_vec( } } - /* Generate a new Doc ID if FTS index columns get updated */ - if (table->fts && *fts_col_affected) { + if (affects_fulltext) { + ut_ad(table->fts); + if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { doc_id_t doc_id; doc_id_t* next_doc_id; @@ -703,24 +699,25 @@ row_ins_cascade_calc_update_vec( fts_get_next_doc_id(table, next_doc_id); doc_id = fts_update_doc_id(table, ufield, next_doc_id); n_fields_updated++; - cascade->fts_next_doc_id = doc_id; + fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL); } else { if (doc_id_updated) { ut_ad(new_doc_id); - cascade->fts_next_doc_id = new_doc_id; + fts_trx_add_op(trx, table, new_doc_id, + FTS_INSERT, NULL); } else { - cascade->fts_next_doc_id = FTS_NULL_DOC_ID; ib::error() << "FTS Doc ID must be updated" " along with FTS indexed column for" " table " << table->name; - return(ULINT_UNDEFINED); +err_exit: + n_fields_updated = ULINT_UNDEFINED; } } } update->n_fields = n_fields_updated; - return(n_fields_updated); + return affects_fulltext; } /*********************************************************************//** @@ -1070,13 +1067,10 @@ row_ins_foreign_check_on_constraint( const rec_t* clust_rec; const buf_block_t* clust_block; upd_t* update; - ulint n_to_update; dberr_t err; - ulint i; trx_t* trx; mem_heap_t* tmp_heap = NULL; doc_id_t doc_id = FTS_NULL_DOC_ID; - ibool fts_col_affacted = FALSE; DBUG_ENTER("row_ins_foreign_check_on_constraint"); ut_a(thr); @@ -1120,20 +1114,15 @@ row_ins_foreign_check_on_constraint( DBUG_RETURN(DB_ROW_IS_REFERENCED); } - cascade = row_create_update_node_for_mysql(table, node->cascade_heap); - que_node_set_parent(cascade, node); - - /* For the cascaded operation, all the update nodes are allocated in - the same heap. All the update nodes will point to the same heap. - This heap is owned by the first update node. And it must be freed - only in the first update node */ - cascade->cascade_heap = node->cascade_heap; - cascade->cascade_upd_nodes = node->cascade_upd_nodes; - cascade->new_upd_nodes = node->new_upd_nodes; - cascade->processed_cascades = node->processed_cascades; + if (node->cascade_node == NULL) { + node->cascade_heap = mem_heap_create(128); + node->cascade_node = row_create_update_node_for_mysql( + table, node->cascade_heap); + que_node_set_parent(node->cascade_node, node); + } + cascade = node->cascade_node; cascade->table = table; - cascade->foreign = foreign; if (node->is_delete @@ -1149,31 +1138,32 @@ row_ins_foreign_check_on_constraint( node->cascade_heap); cascade->update_n_fields = foreign->n_fields; } - } - /* We do not allow cyclic cascaded updating (DELETE is allowed, - but not UPDATE) of the same table, as this can lead to an infinite - cycle. Check that we are not updating the same table which is - already being modified in this cascade chain. We have to check - this also because the modification of the indexes of a 'parent' - table may still be incomplete, and we must avoid seeing the indexes - of the parent table in an inconsistent state! */ + /* We do not allow cyclic cascaded updating (DELETE is + allowed, but not UPDATE) of the same table, as this + can lead to an infinite cycle. Check that we are not + updating the same table which is already being + modified in this cascade chain. We have to check this + also because the modification of the indexes of a + 'parent' table may still be incomplete, and we must + avoid seeing the indexes of the parent table in an + inconsistent state! */ - if (!cascade->is_delete - && row_ins_cascade_ancestor_updates_table(cascade, table)) { + if (row_ins_cascade_ancestor_updates_table(cascade, table)) { - /* We do not know if this would break foreign key - constraints, but play safe and return an error */ + /* We do not know if this would break foreign key + constraints, but play safe and return an error */ - err = DB_ROW_IS_REFERENCED; + err = DB_ROW_IS_REFERENCED; - row_ins_foreign_report_err( - "Trying an update, possibly causing a cyclic" - " cascaded update\n" - "in the child table,", thr, foreign, - btr_pcur_get_rec(pcur), entry); + row_ins_foreign_report_err( + "Trying an update, possibly causing a cyclic" + " cascaded update\n" + "in the child table,", thr, foreign, + btr_pcur_get_rec(pcur), entry); - goto nonstandard_exit_func; + goto nonstandard_exit_func; + } } if (row_ins_cascade_n_ancestors(cascade) >= FK_MAX_CASCADE_DEL) { @@ -1278,17 +1268,8 @@ row_ins_foreign_check_on_constraint( if (node->is_delete ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) { - /* Build the appropriate update vector which sets foreign->n_fields first fields in rec to SQL NULL */ - if (table->fts) { - - /* For the clause ON DELETE SET NULL, the cascade - operation is actually an update operation with the new - values being null. For FTS, this means that the old - values be deleted and no new values to be added.*/ - cascade->fts_next_doc_id = FTS_NULL_DOC_ID; - } update = cascade->update; @@ -1297,7 +1278,9 @@ row_ins_foreign_check_on_constraint( UNIV_MEM_INVALID(update->fields, update->n_fields * sizeof *update->fields); - for (i = 0; i < foreign->n_fields; i++) { + bool affects_fulltext = false; + + for (ulint i = 0; i < foreign->n_fields; i++) { upd_field_t* ufield = &update->fields[i]; ulint col_no = dict_index_get_nth_col_no( index, i); @@ -1313,18 +1296,19 @@ row_ins_foreign_check_on_constraint( ufield->exp = NULL; dfield_set_null(&ufield->new_val); - if (table->fts && dict_table_is_fts_column( - table->fts->indexes, - dict_index_get_nth_col_no(index, i), - dict_col_is_virtual( - dict_index_get_nth_col(index, i))) + if (!affects_fulltext + && table->fts && dict_table_is_fts_column( + table->fts->indexes, + dict_index_get_nth_col_no(index, i), + dict_col_is_virtual( + dict_index_get_nth_col(index, i))) != ULINT_UNDEFINED) { - fts_col_affacted = TRUE; + affects_fulltext = true; } } - if (fts_col_affacted) { - cascade->fts_doc_id = doc_id; + if (affects_fulltext) { + fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL); } if (foreign->v_cols != NULL @@ -1339,19 +1323,22 @@ row_ins_foreign_check_on_constraint( } } else if (table->fts && cascade->is_delete == PLAIN_DELETE) { /* DICT_FOREIGN_ON_DELETE_CASCADE case */ - for (i = 0; i < foreign->n_fields; i++) { - if (table->fts && dict_table_is_fts_column( + bool affects_fulltext = false; + + for (ulint i = 0; i < foreign->n_fields; i++) { + if (dict_table_is_fts_column( table->fts->indexes, dict_index_get_nth_col_no(index, i), dict_col_is_virtual( dict_index_get_nth_col(index, i))) != ULINT_UNDEFINED) { - fts_col_affacted = TRUE; + affects_fulltext = true; + break; } } - if (fts_col_affacted) { - cascade->fts_doc_id = doc_id; + if (affects_fulltext) { + fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL); } } @@ -1361,13 +1348,10 @@ row_ins_foreign_check_on_constraint( /* Build the appropriate update vector which sets changing foreign->n_fields first fields in rec to new values */ - n_to_update = row_ins_cascade_calc_update_vec( - node, foreign, cascade->cascade_heap, - trx, &fts_col_affacted, cascade); + bool affects_fulltext = row_ins_cascade_calc_update_vec( + node, foreign, tmp_heap, trx); - - if (foreign->v_cols != NULL - && foreign->v_cols->size() > 0) { + if (foreign->v_cols && !foreign->v_cols->empty()) { row_ins_foreign_fill_virtual( cascade, clust_rec, clust_index, node, foreign, &err); @@ -1377,7 +1361,8 @@ row_ins_foreign_check_on_constraint( } } - if (n_to_update == ULINT_UNDEFINED) { + switch (cascade->update->n_fields) { + case ULINT_UNDEFINED: err = DB_ROW_IS_REFERENCED; row_ins_foreign_report_err( @@ -1390,10 +1375,7 @@ row_ins_foreign_check_on_constraint( thr, foreign, btr_pcur_get_rec(pcur), entry); goto nonstandard_exit_func; - } - - if (cascade->update->n_fields == 0) { - + case 0: /* The update does not change any columns referred to in this foreign key constraint: no need to do anything */ @@ -1404,9 +1386,9 @@ row_ins_foreign_check_on_constraint( } /* Mark the old Doc ID as deleted */ - if (fts_col_affacted) { + if (affects_fulltext) { ut_ad(table->fts); - cascade->fts_doc_id = doc_id; + fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL); } } @@ -1428,20 +1410,15 @@ row_ins_foreign_check_on_constraint( cascade->state = UPD_NODE_UPDATE_CLUSTERED; #ifdef WITH_WSREP - err = wsrep_append_foreign_key( - thr_get_trx(thr), - foreign, - clust_rec, - clust_index, + err = wsrep_append_foreign_key(trx, foreign, clust_rec, clust_index, FALSE, FALSE); if (err != DB_SUCCESS) { fprintf(stderr, "WSREP: foreign key append failed: %d\n", err); } else #endif /* WITH_WSREP */ - node->new_upd_nodes->push_back(cascade); - - table->inc_fk_checks(); + err = row_update_cascade_for_mysql(thr, cascade, + foreign->foreign_table); /* Release the data dictionary latch for a while, so that we do not starve other threads from doing CREATE TABLE etc. if we have a huge @@ -1466,7 +1443,6 @@ row_ins_foreign_check_on_constraint( DBUG_RETURN(err); nonstandard_exit_func: - que_graph_free_recursive(cascade); if (tmp_heap) { mem_heap_free(tmp_heap); @@ -1616,7 +1592,8 @@ row_ins_check_foreign_constraint( if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) { upd_node = static_cast<upd_node_t*>(thr->run_node); - if (!(upd_node->is_delete) && upd_node->foreign == foreign) { + if (upd_node->is_delete != PLAIN_DELETE + && upd_node->foreign == foreign) { /* If a cascaded update is done as defined by a foreign key constraint, do not check that constraint for the child row. In ON UPDATE CASCADE @@ -1905,9 +1882,10 @@ do_possible_lock_wait: thr->lock_state = QUE_THR_LOCK_NOLOCK; - err = check_table->to_be_dropped - ? DB_LOCK_WAIT_TIMEOUT - : trx->error_state; + if (check_table->to_be_dropped + || trx->error_state == DB_LOCK_WAIT_TIMEOUT) { + err = DB_LOCK_WAIT_TIMEOUT; + } check_table->dec_fk_checks(); } @@ -1970,6 +1948,10 @@ row_ins_check_foreign_constraints( row_mysql_freeze_data_dictionary(trx); } + if (referenced_table) { + foreign->foreign_table->inc_fk_checks(); + } + /* NOTE that if the thread ends up waiting for a lock we will release dict_operation_lock temporarily! But the counter on the table protects the referenced @@ -1978,6 +1960,10 @@ row_ins_check_foreign_constraints( err = row_ins_check_foreign_constraint( TRUE, foreign, table, entry, thr); + if (referenced_table) { + foreign->foreign_table->dec_fk_checks(); + } + if (got_s_lock) { row_mysql_unfreeze_data_dictionary(trx); } @@ -2893,21 +2879,18 @@ row_ins_sec_index_entry_low( cursor.rtr_info = NULL; ut_ad(thr_get_trx(thr)->id != 0); - mtr_start(&mtr); - mtr.set_named_space(index->space); - - if (dict_table_is_temporary(index->table)) { - /* Disable REDO logging as the lifetime of temp-tables is - limited to server or connection lifetime and so REDO - information is not needed on restart for recovery. - Disable locking as temp-tables are local to a connection. */ + mtr.start(); + if (index->table->is_temporary()) { + /* Disable locking, because temporary tables are never + shared between transactions or connections. */ ut_ad(flags & BTR_NO_LOCKING_FLAG); mtr.set_log_mode(MTR_LOG_NO_REDO); - } else if (!dict_index_is_spatial(index)) { - /* Enable insert buffering if it's neither temp-table - nor spatial index. */ - search_mode |= BTR_INSERT; + } else { + mtr.set_named_space(index->space); + if (!dict_index_is_spatial(index)) { + search_mode |= BTR_INSERT; + } } /* Ensure that we acquire index->lock when inserting into an @@ -2979,10 +2962,8 @@ row_ins_sec_index_entry_low( } if (err != DB_SUCCESS) { - trx_t* trx = thr_get_trx(thr); - if (err == DB_DECRYPTION_FAILED) { - ib_push_warning(trx->mysql_thd, + ib_push_warning(thr_get_trx(thr)->mysql_thd, DB_DECRYPTION_FAILED, "Table %s is encrypted but encryption service or" " used key_id is not available. " @@ -3465,7 +3446,7 @@ row_ins_index_entry_set_vals( = dict_field_get_col(ind_field); len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminmaxlen, + col->prtype, col->mbminlen, col->mbmaxlen, ind_field->prefix_len, len, static_cast<const char*>( diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 9b5c9873604..64ae384ec37 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1300,7 +1300,7 @@ row_log_table_get_pk( dict_field_t* ifield; dfield_t* dfield; ulint prtype; - ulint mbminmaxlen; + ulint mbminlen, mbmaxlen; ifield = dict_index_get_nth_field(new_index, new_i); dfield = dtuple_get_nth_field(tuple, new_i); @@ -1329,7 +1329,8 @@ err_exit: goto func_exit; } - mbminmaxlen = col->mbminmaxlen; + mbminlen = col->mbminlen; + mbmaxlen = col->mbmaxlen; prtype = col->prtype; } else { /* No matching column was found in the old @@ -1339,7 +1340,8 @@ err_exit: dfield_copy(dfield, dtuple_get_nth_field( log->add_cols, col_no)); - mbminmaxlen = dfield->type.mbminmaxlen; + mbminlen = dfield->type.mbminlen; + mbmaxlen = dfield->type.mbmaxlen; prtype = dfield->type.prtype; } @@ -1348,7 +1350,7 @@ err_exit: if (ifield->prefix_len) { ulint len = dtype_get_at_most_n_mbchars( - prtype, mbminmaxlen, + prtype, mbminlen, mbmaxlen, ifield->prefix_len, dfield_get_len(dfield), static_cast<const char*>( diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 943db575948..a0b73c46703 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -450,8 +450,8 @@ row_merge_buf_redundant_convert( const page_size_t& page_size, mem_heap_t* heap) { - ut_ad(DATA_MBMINLEN(field->type.mbminmaxlen) == 1); - ut_ad(DATA_MBMAXLEN(field->type.mbminmaxlen) > 1); + ut_ad(field->type.mbminlen == 1); + ut_ad(field->type.mbmaxlen > 1); byte* buf = (byte*) mem_heap_alloc(heap, len); ulint field_len = row_field->len; @@ -593,7 +593,8 @@ row_merge_buf_add( field->type.mtype = ifield->col->mtype; field->type.prtype = ifield->col->prtype; - field->type.mbminmaxlen = DATA_MBMINMAXLEN(0, 0); + field->type.mbminlen = 0; + field->type.mbmaxlen = 0; field->type.len = ifield->col->len; } else { /* Use callback to get the virtual column value */ @@ -744,7 +745,7 @@ row_merge_buf_add( if (ifield->prefix_len) { len = dtype_get_at_most_n_mbchars( col->prtype, - col->mbminmaxlen, + col->mbminlen, col->mbmaxlen, ifield->prefix_len, len, static_cast<char*>(dfield_get_data(field))); @@ -756,8 +757,7 @@ row_merge_buf_add( fixed_len = ifield->fixed_len; if (fixed_len && !dict_table_is_comp(index->table) - && DATA_MBMINLEN(col->mbminmaxlen) - != DATA_MBMAXLEN(col->mbminmaxlen)) { + && col->mbminlen != col->mbmaxlen) { /* CHAR in ROW_FORMAT=REDUNDANT is always fixed-length, but in the temporary file it is variable-length for variable-length character @@ -767,14 +767,11 @@ row_merge_buf_add( if (fixed_len) { #ifdef UNIV_DEBUG - ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen); - /* len should be between size calcualted base on mbmaxlen and mbminlen */ ut_ad(len <= fixed_len); - ut_ad(!mbmaxlen || len >= mbminlen - * (fixed_len / mbmaxlen)); + ut_ad(!col->mbmaxlen || len >= col->mbminlen + * (fixed_len / col->mbmaxlen)); ut_ad(!dfield_is_ext(field)); #endif /* UNIV_DEBUG */ @@ -4663,6 +4660,8 @@ row_merge_build_indexes( DBUG_RETURN(DB_OUT_OF_MEMORY); } + TRASH_ALLOC(&crypt_pfx, sizeof crypt_pfx); + if (log_tmp_is_encrypted()) { crypt_block = static_cast<row_merge_block_t*>( alloc.allocate_large(3 * srv_sort_buf_size, diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index d62ab03f094..efe5f763263 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -307,29 +307,6 @@ row_mysql_store_geometry( mach_write_to_n_little_endian(dest, dest_len - 8, src_len); memcpy(dest + dest_len - 8, &src, sizeof src); - - DBUG_EXECUTE_IF("row_print_geometry_data", - { - String res; - Geometry_buffer buffer; - String wkt; - - /** Show the meaning of geometry data. */ - Geometry* g = Geometry::construct( - &buffer, (const char*)src, (uint32) src_len); - - if (g) - { - /* - if (g->as_wkt(&wkt) == 0) - { - ib::info() << "Write geometry data to" - " MySQL WKT format: " - << wkt.c_ptr_safe() << "."; - } - */ - } - }); } /*******************************************************************//** @@ -350,29 +327,6 @@ row_mysql_read_geometry( memcpy(&data, ref + col_len - 8, sizeof data); - DBUG_EXECUTE_IF("row_print_geometry_data", - { - String res; - Geometry_buffer buffer; - String wkt; - - /** Show the meaning of geometry data. */ - Geometry* g = Geometry::construct( - &buffer, (const char*) data, (uint32) *len); - - if (g) - { - /* - if (g->as_wkt(&wkt) == 0) - { - ib::info() << "Read geometry data in" - " MySQL's WKT format: " - << wkt.c_ptr_safe() << "."; - } - */ - } - }); - return(data); } @@ -1694,8 +1648,6 @@ row_create_update_node_for_mysql( node->table_sym = NULL; node->col_assign_list = NULL; - node->fts_doc_id = FTS_NULL_DOC_ID; - node->fts_next_doc_id = UINT64_UNDEFINED; DBUG_RETURN(node); } @@ -1743,9 +1695,9 @@ row_fts_do_update( doc_id_t old_doc_id, /* in: old document id */ doc_id_t new_doc_id) /* in: new document id */ { - fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL); - - if (new_doc_id != FTS_NULL_DOC_ID) { + if(trx->fts_next_doc_id) { + fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL); + if(new_doc_id != FTS_NULL_DOC_ID) fts_trx_add_op(trx, table, new_doc_id, FTS_INSERT, NULL); } } @@ -1757,24 +1709,30 @@ static dberr_t row_fts_update_or_delete( /*=====================*/ - trx_t* trx, - upd_node_t* node) /* in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL handle */ { - dict_table_t* table = node->table; - doc_id_t old_doc_id = node->fts_doc_id; + trx_t* trx = prebuilt->trx; + dict_table_t* table = prebuilt->table; + upd_node_t* node = prebuilt->upd_node; + doc_id_t old_doc_id = prebuilt->fts_doc_id; + DBUG_ENTER("row_fts_update_or_delete"); - ut_a(dict_table_has_fts_index(node->table)); + ut_a(dict_table_has_fts_index(prebuilt->table)); /* Deletes are simple; get them out of the way first. */ if (node->is_delete == PLAIN_DELETE) { /* A delete affects all FTS indexes, so we pass NULL */ fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL); } else { - doc_id_t new_doc_id = node->fts_next_doc_id; - ut_ad(new_doc_id != UINT64_UNDEFINED); + doc_id_t new_doc_id; + new_doc_id = fts_read_doc_id((byte*) &trx->fts_next_doc_id); + if (new_doc_id == 0) { + ib::error() << "InnoDB FTS: Doc ID cannot be 0"; + return(DB_FTS_INVALID_DOCID); + } row_fts_do_update(trx, table, old_doc_id, new_doc_id); } @@ -1823,14 +1781,6 @@ init_fts_doc_id_for_ref( } } -struct dec_foreign_key_checks_running -{ - void operator() (upd_node_t* node) { - node->table->dec_fk_checks(); - } -}; - - /** Does an update or delete of a row for MySQL. @param[in,out] prebuilt prebuilt struct in MySQL handle @return error code or DB_SUCCESS */ @@ -1840,15 +1790,11 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) trx_savept_t savept; dberr_t err; que_thr_t* thr; - ibool was_lock_wait; dict_index_t* clust_index; upd_node_t* node; dict_table_t* table = prebuilt->table; trx_t* trx = prebuilt->trx; ulint fk_depth = 0; - upd_cascade_t* cascade_upd_nodes; - upd_cascade_t* new_upd_nodes; - upd_cascade_t* processed_cascades; bool got_s_lock = false; DBUG_ENTER("row_update_for_mysql"); @@ -1895,26 +1841,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) const bool is_delete = node->is_delete == PLAIN_DELETE; ut_ad(node->table == table); - if (node->cascade_heap) { - mem_heap_empty(node->cascade_heap); - } else { - node->cascade_heap = mem_heap_create(128); - } - - mem_heap_allocator<upd_node_t*> mem_heap_ator(node->cascade_heap); - - cascade_upd_nodes = new - (mem_heap_ator.allocate(sizeof(upd_cascade_t))) - upd_cascade_t(deque_mem_heap_t(mem_heap_ator)); - - new_upd_nodes = new - (mem_heap_ator.allocate(sizeof(upd_cascade_t))) - upd_cascade_t(deque_mem_heap_t(mem_heap_ator)); - - processed_cascades = new - (mem_heap_ator.allocate(sizeof(upd_cascade_t))) - upd_cascade_t(deque_mem_heap_t(mem_heap_ator)); - clust_index = dict_table_get_first_index(table); if (prebuilt->pcur->btr_cur.index == clust_index) { @@ -1939,174 +1865,97 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) node->state = UPD_NODE_UPDATE_CLUSTERED; - node->cascade_top = true; - node->cascade_upd_nodes = cascade_upd_nodes; - node->new_upd_nodes = new_upd_nodes; - node->processed_cascades = processed_cascades; - node->fts_doc_id = prebuilt->fts_doc_id; - - if (trx->fts_next_doc_id != UINT64_UNDEFINED) { - node->fts_next_doc_id = fts_read_doc_id( - (byte*) &trx->fts_next_doc_id); - } else { - node->fts_next_doc_id = UINT64_UNDEFINED; - } - ut_ad(!prebuilt->sql_stat_start); que_thr_move_to_run_state_for_mysql(thr, trx); - thr->fk_cascade_depth = 0; - ut_ad(!prebuilt->versioned_write || node->table->versioned()); bool vers_set_fields = prebuilt->versioned_write && (node->is_delete ? node->is_delete == VERSIONED_DELETE : node->update->affects_versioned()); -run_again: - if (vers_set_fields) { - /* System Versioning: modify update vector to set - row_start (or row_end in case of DELETE) - to current trx_id. */ - dict_table_t* table = node->table; - dict_index_t* clust_index = dict_table_get_first_index(table); - upd_t* uvect = node->update; - upd_field_t* ufield; - dict_col_t* col; - unsigned col_idx; - if (node->is_delete) { - ufield = &uvect->fields[0]; - uvect->n_fields = 0; - node->is_delete = VERSIONED_DELETE; - col_idx = table->vers_end; - } else { - ut_ad(uvect->n_fields < table->n_cols); - ufield = &uvect->fields[uvect->n_fields]; - col_idx = table->vers_start; - } - col = &table->cols[col_idx]; - UNIV_MEM_INVALID(ufield, sizeof *ufield); - { - ulint field_no = dict_col_get_clust_pos(col, clust_index); - ut_ad(field_no != ULINT_UNDEFINED); - ufield->field_no = field_no; - } - ufield->orig_len = 0; - ufield->exp = NULL; - mach_write_to_8(node->update->vers_sys_value, trx->id); - dfield_t* dfield = &ufield->new_val; - dfield_set_data(dfield, node->update->vers_sys_value, 8); - dict_col_copy_type(col, &dfield->type); + for (;;) { + if (vers_set_fields) { + /* System Versioning: modify update vector to set + row_start (or row_end in case of DELETE) + to current trx_id. */ + dict_table_t* table = node->table; + dict_index_t* clust_index = dict_table_get_first_index(table); + upd_t* uvect = node->update; + upd_field_t* ufield; + dict_col_t* col; + unsigned col_idx; + if (node->is_delete) { + ufield = &uvect->fields[0]; + uvect->n_fields = 0; + node->is_delete = VERSIONED_DELETE; + col_idx = table->vers_end; + } else { + ut_ad(uvect->n_fields < table->n_cols); + ufield = &uvect->fields[uvect->n_fields]; + col_idx = table->vers_start; + } + col = &table->cols[col_idx]; + UNIV_MEM_INVALID(ufield, sizeof *ufield); + { + ulint field_no = dict_col_get_clust_pos(col, clust_index); + ut_ad(field_no != ULINT_UNDEFINED); + ufield->field_no = field_no; + } + ufield->orig_len = 0; + ufield->exp = NULL; - uvect->n_fields++; - ut_ad(node->in_mysql_interface); // otherwise needs to recalculate node->cmpl_info - } + mach_write_to_8(node->update->vers_sys_value, trx->id); + dfield_t* dfield = &ufield->new_val; + dfield_set_data(dfield, node->update->vers_sys_value, 8); + dict_col_copy_type(col, &dfield->type); - if (thr->fk_cascade_depth == 1 && trx->dict_operation_lock_mode == 0) { - got_s_lock = true; - row_mysql_freeze_data_dictionary(trx); - } + uvect->n_fields++; + ut_ad(node->in_mysql_interface); // otherwise needs to recalculate node->cmpl_info + } - thr->run_node = node; - thr->prev_node = node; + thr->run_node = node; + thr->prev_node = node; + thr->fk_cascade_depth = 0; - row_upd_step(thr); + row_upd_step(thr); - DBUG_EXECUTE_IF("dml_cascade_only_once", node->check_cascade_only_once();); + err = trx->error_state; - err = trx->error_state; + if (err == DB_SUCCESS) { + break; + } - if (err != DB_SUCCESS) { -handle_error: que_thr_stop_for_mysql(thr); if (err == DB_RECORD_NOT_FOUND) { trx->error_state = DB_SUCCESS; - trx->op_info = ""; - - if (thr->fk_cascade_depth > 0) { - que_graph_free_recursive(node); - } goto error; } - /* Since reporting a plain "duplicate key" error message to - the user in cases where a long CASCADE operation would lead - to a duplicate key in some other table is very confusing, - map duplicate key errors resulting from FK constraints to a - separate error code. */ - if (err == DB_DUPLICATE_KEY && thr->fk_cascade_depth > 0) { - err = DB_FOREIGN_DUPLICATE_KEY; - trx->error_state = err; - } - thr->lock_state= QUE_THR_LOCK_ROW; DEBUG_SYNC(trx->mysql_thd, "row_update_for_mysql_error"); - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, - &savept); + bool was_lock_wait = row_mysql_handle_errors( + &err, trx, thr, &savept); thr->lock_state= QUE_THR_LOCK_NOLOCK; - if (was_lock_wait) { - std::for_each(new_upd_nodes->begin(), - new_upd_nodes->end(), - dec_foreign_key_checks_running()); - std::for_each(new_upd_nodes->begin(), - new_upd_nodes->end(), - que_graph_free_recursive); - node->new_upd_nodes->clear(); - goto run_again; - } - - trx->op_info = ""; - - if (thr->fk_cascade_depth > 0) { - que_graph_free_recursive(node); + if (!was_lock_wait) { + goto error; } - goto error; - } else { - - std::copy(node->new_upd_nodes->begin(), - node->new_upd_nodes->end(), - std::back_inserter(*node->cascade_upd_nodes)); - - node->new_upd_nodes->clear(); } - if (dict_table_has_fts_index(node->table) - && node->fts_doc_id != FTS_NULL_DOC_ID - && node->fts_next_doc_id != UINT64_UNDEFINED) { - err = row_fts_update_or_delete(trx, node); - ut_a(err == DB_SUCCESS); - } - - if (thr->fk_cascade_depth > 0) { - /* Processing cascade operation */ - dec_foreign_key_checks_running()(node); - node->processed_cascades->push_back(node); - } - - if (!cascade_upd_nodes->empty()) { - DEBUG_SYNC_C("foreign_constraint_update_cascade"); - node = cascade_upd_nodes->front(); - node->cascade_upd_nodes = cascade_upd_nodes; - cascade_upd_nodes->pop_front(); - thr->fk_cascade_depth++; - vers_set_fields = node->table->versioned() - && (node->is_delete == PLAIN_DELETE - || node->update->affects_versioned()); - - if (vers_set_fields && !prebuilt->versioned_write) - { - // FIXME: timestamp-based update of row_end in run_again - err = DB_UNSUPPORTED; - trx->error_state = err; + que_thr_stop_for_mysql_no_error(thr, trx); - goto handle_error; + if (dict_table_has_fts_index(table) + && trx->fts_next_doc_id != UINT64_UNDEFINED) { + err = row_fts_update_or_delete(prebuilt); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + ut_ad(!"unexpected error"); + goto error; } - goto run_again; } /* Completed cascading operations (if any) */ @@ -2114,43 +1963,8 @@ handle_error: row_mysql_unfreeze_data_dictionary(trx); } - thr->fk_cascade_depth = 0; - - /* Update the statistics of each involved table - only after completing all operations, including - FOREIGN KEY...ON...CASCADE|SET NULL. */ bool update_statistics; - - for (upd_cascade_t::iterator i = processed_cascades->begin(); - i != processed_cascades->end(); - ++i) { - - node = *i; - - if (node->is_delete == PLAIN_DELETE) { - /* Not protected by dict_table_stats_lock() for - performance reasons, we would rather get garbage - in stat_n_rows (which is just an estimate anyway) - than protecting the following code with a latch. */ - dict_table_n_rows_dec(node->table); - - update_statistics = !srv_stats_include_delete_marked; - srv_stats.n_rows_deleted.inc(size_t(trx->id)); - } else { - update_statistics - = !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE); - srv_stats.n_rows_updated.inc(size_t(trx->id)); - } - - if (update_statistics) { - dict_stats_update_if_needed(node->table); - } else { - /* Always update the table modification counter. */ - node->table->stat_modified_counter++; - } - - que_graph_free_recursive(node); - } + ut_ad(is_delete == (node->is_delete == PLAIN_DELETE)); if (is_delete) { /* Not protected by dict_table_stats_lock() for performance @@ -2186,42 +2000,14 @@ handle_error: trx->op_info = ""; - que_thr_stop_for_mysql_no_error(thr, trx); - - DBUG_ASSERT(cascade_upd_nodes->empty()); - DBUG_RETURN(err); error: + trx->op_info = ""; if (got_s_lock) { row_mysql_unfreeze_data_dictionary(trx); } - if (thr->fk_cascade_depth > 0) { - dec_foreign_key_checks_running()(node); - thr->fk_cascade_depth = 0; - } - - std::for_each(cascade_upd_nodes->begin(), - cascade_upd_nodes->end(), - dec_foreign_key_checks_running()); - - std::for_each(new_upd_nodes->begin(), - new_upd_nodes->end(), - dec_foreign_key_checks_running()); - - std::for_each(cascade_upd_nodes->begin(), - cascade_upd_nodes->end(), - que_graph_free_recursive); - - std::for_each(new_upd_nodes->begin(), - new_upd_nodes->end(), - que_graph_free_recursive); - - std::for_each(processed_cascades->begin(), - processed_cascades->end(), - que_graph_free_recursive); - DBUG_RETURN(err); } @@ -2388,6 +2174,141 @@ row_mysql_unfreeze_data_dictionary( trx->dict_operation_lock_mode = 0; } +/**********************************************************************//** +Does a cascaded delete or set null in a foreign key operation. +@return error code or DB_SUCCESS */ +dberr_t +row_update_cascade_for_mysql( +/*=========================*/ + que_thr_t* thr, /*!< in: query thread */ + upd_node_t* node, /*!< in: update node used in the cascade + or set null operation */ + dict_table_t* table) /*!< in: table where we do the operation */ +{ + /* Increment fk_cascade_depth to record the recursive call depth on + a single update/delete that affects multiple tables chained + together with foreign key relations. */ + + if (++thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) { + return(DB_FOREIGN_EXCEED_MAX_CASCADE); + } + + trx_t* trx = thr_get_trx(thr); + + bool vers_set_fields = node->table->versioned() + && (node->is_delete == PLAIN_DELETE + || node->update->affects_versioned()); + + if (vers_set_fields && !thr->prebuilt->versioned_write) + { + // FIXME: timestamp-based update of row_end in run_again + trx->error_state = DB_UNSUPPORTED; + thr->fk_cascade_depth = 0; + return trx->error_state; + } + + for (;;) { + if (vers_set_fields) { + // FIXME: code duplication with row_update_for_mysql() + /* System Versioning: modify update vector to set + row_start (or row_end in case of DELETE) + to current trx_id. */ + dict_table_t* table = node->table; + dict_index_t* clust_index = dict_table_get_first_index(table); + upd_t* uvect = node->update; + upd_field_t* ufield; + dict_col_t* col; + unsigned col_idx; + if (node->is_delete) { + ufield = &uvect->fields[0]; + uvect->n_fields = 0; + node->is_delete = VERSIONED_DELETE; + col_idx = table->vers_end; + } else { + ut_ad(uvect->n_fields < table->n_cols); + ufield = &uvect->fields[uvect->n_fields]; + col_idx = table->vers_start; + } + col = &table->cols[col_idx]; + UNIV_MEM_INVALID(ufield, sizeof *ufield); + { + ulint field_no = dict_col_get_clust_pos(col, clust_index); + ut_ad(field_no != ULINT_UNDEFINED); + ufield->field_no = field_no; + } + ufield->orig_len = 0; + ufield->exp = NULL; + + mach_write_to_8(node->update->vers_sys_value, trx->id); + dfield_t* dfield = &ufield->new_val; + dfield_set_data(dfield, node->update->vers_sys_value, 8); + dict_col_copy_type(col, &dfield->type); + + uvect->n_fields++; + ut_ad(node->in_mysql_interface); // otherwise needs to recalculate node->cmpl_info + } + + thr->run_node = node; + thr->prev_node = node; + + DEBUG_SYNC_C("foreign_constraint_update_cascade"); + { + TABLE *mysql_table = thr->prebuilt->m_mysql_table; + thr->prebuilt->m_mysql_table = NULL; + row_upd_step(thr); + thr->prebuilt->m_mysql_table = mysql_table; + } + + switch (trx->error_state) { + case DB_LOCK_WAIT: + que_thr_stop_for_mysql(thr); + lock_wait_suspend_thread(thr); + + if (trx->error_state == DB_SUCCESS) { + continue; + } + + /* fall through */ + default: + /* Other errors are handled for the parent node. */ + thr->fk_cascade_depth = 0; + return trx->error_state; + + case DB_SUCCESS: + thr->fk_cascade_depth = 0; + bool stats; + + if (node->is_delete == PLAIN_DELETE) { + /* Not protected by + dict_table_stats_lock() for + performance reasons, we would rather + get garbage in stat_n_rows (which is + just an estimate anyway) than + protecting the following code with a + latch. */ + dict_table_n_rows_dec(node->table); + + stats = !srv_stats_include_delete_marked; + srv_stats.n_rows_deleted.inc(size_t(trx->id)); + } else { + stats = !(node->cmpl_info + & UPD_NODE_NO_ORD_CHANGE); + srv_stats.n_rows_updated.inc(size_t(trx->id)); + } + + if (stats) { + dict_stats_update_if_needed(node->table); + } else { + /* Always update the table + modification counter. */ + node->table->stat_modified_counter++; + } + + return(DB_SUCCESS); + } + } +} + /*********************************************************************//** Locks the data dictionary exclusively for performing a table create or other data dictionary modification operation. */ @@ -4923,6 +4844,8 @@ end: DICT_ERR_IGNORE_NONE); fk_tables.pop_front(); } + + table->data_dir_path= NULL; } funct_exit: @@ -4975,59 +4898,6 @@ funct_exit: return(err); } -/** Renames a partitioned table for MySQL. -@param[in] old_name Old table name. -@param[in] new_name New table name. -@param[in,out] trx Transaction. -@return error code or DB_SUCCESS */ -dberr_t -row_rename_partitions_for_mysql( - const char* old_name, - const char* new_name, - trx_t* trx) -{ - char from_name[FN_REFLEN]; - char to_name[FN_REFLEN]; - ulint from_len = strlen(old_name); - ulint to_len = strlen(new_name); - char* table_name; - dberr_t error = DB_TABLE_NOT_FOUND; - - ut_a(from_len < (FN_REFLEN - 4)); - ut_a(to_len < (FN_REFLEN - 4)); - memcpy(from_name, old_name, from_len); - from_name[from_len] = '#'; - from_name[from_len + 1] = 0; - while ((table_name = dict_get_first_table_name_in_db(from_name))) { - ut_a(memcmp(table_name, from_name, from_len) == 0); - /* Must match #[Pp]#<partition_name> */ - if (strlen(table_name) <= (from_len + 3) - || table_name[from_len] != '#' - || table_name[from_len + 2] != '#' - || (table_name[from_len + 1] != 'P' - && table_name[from_len + 1] != 'p')) { - - ut_ad(0); - ut_free(table_name); - continue; - } - memcpy(to_name, new_name, to_len); - memcpy(to_name + to_len, table_name + from_len, - strlen(table_name) - from_len + 1); - error = row_rename_table_for_mysql(table_name, to_name, - trx, false); - if (error != DB_SUCCESS) { - /* Rollback and return. */ - trx_rollback_for_mysql(trx); - ut_free(table_name); - return(error); - } - ut_free(table_name); - } - trx_commit_for_mysql(trx); - return(error); -} - /*********************************************************************//** Scans an index for either COUNT(*) or CHECK TABLE. If CHECK TABLE; Checks that the index contains entries in an ascending order, diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc index 77cb35b8f21..938f9156717 100644 --- a/storage/innobase/row/row0quiesce.cc +++ b/storage/innobase/row/row0quiesce.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -239,7 +239,11 @@ row_quiesce_write_table( mach_write_to_4(ptr, col->len); ptr += sizeof(ib_uint32_t); - mach_write_to_4(ptr, col->mbminmaxlen); + /* FIXME: This will not work if mbminlen>4. + This field is also redundant, because the lengths + are a property of the character set encoding, which + in turn is encodedin prtype above. */ + mach_write_to_4(ptr, col->mbmaxlen * 5 + col->mbminlen); ptr += sizeof(ib_uint32_t); mach_write_to_4(ptr, col->ind); diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index 024c54cfe22..f9aa4fadacc 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -333,7 +334,7 @@ row_build_index_entry_low( /* If a column prefix index, take only the prefix. */ if (ind_field->prefix_len) { len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminmaxlen, + col->prtype, col->mbminlen, col->mbmaxlen, ind_field->prefix_len, len, static_cast<char*>(dfield_get_data(dfield))); dfield_set_len(dfield, len); @@ -876,7 +877,8 @@ row_build_row_ref( dfield_set_len(dfield, dtype_get_at_most_n_mbchars( dtype->prtype, - dtype->mbminmaxlen, + dtype->mbminlen, + dtype->mbmaxlen, clust_col_prefix_len, len, (char*) field)); } @@ -977,7 +979,8 @@ row_build_row_ref_in_tuple( dfield_set_len(dfield, dtype_get_at_most_n_mbchars( dtype->prtype, - dtype->mbminmaxlen, + dtype->mbminlen, + dtype->mbmaxlen, clust_col_prefix_len, len, (char*) field)); } diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index ccc2ab0c14c..f8507443100 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -87,8 +87,10 @@ row_sel_sec_rec_is_for_blob( /*========================*/ ulint mtype, /*!< in: main type */ ulint prtype, /*!< in: precise type */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of - a multi-byte character */ + ulint mbminlen, /*!< in: minimum length of + a character, in bytes */ + ulint mbmaxlen, /*!< in: maximum length of + a character, in bytes */ const byte* clust_field, /*!< in: the locally stored part of the clustered index column, including the BLOB pointer; the clustered @@ -136,7 +138,7 @@ row_sel_sec_rec_is_for_blob( return(FALSE); } - len = dtype_get_at_most_n_mbchars(prtype, mbminmaxlen, + len = dtype_get_at_most_n_mbchars(prtype, mbminlen, mbmaxlen, prefix_len, len, (const char*) buf); return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len)); @@ -258,14 +260,14 @@ row_sel_sec_rec_is_for_clust_rec( } len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminmaxlen, + col->prtype, col->mbminlen, col->mbmaxlen, ifield->prefix_len, len, (char*) clust_field); if (rec_offs_nth_extern(clust_offs, clust_pos) && len < sec_len) { if (!row_sel_sec_rec_is_for_blob( col->mtype, col->prtype, - col->mbminmaxlen, + col->mbminlen, col->mbmaxlen, clust_field, clust_len, sec_field, sec_len, ifield->prefix_len, @@ -1026,7 +1028,7 @@ row_sel_get_clust_rec( /* Fetch the columns needed in test conditions. The clustered index record is protected by a page latch that was acquired when plan->clust_pcur was positioned. The latch will not be - released until mtr_commit(mtr). */ + released until mtr->commit(). */ ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets))); row_sel_fetch_columns(index, clust_rec, offsets, @@ -1093,14 +1095,14 @@ retry: if (err == DB_LOCK_WAIT) { re_scan: - mtr_commit(mtr); + mtr->commit(); trx->error_state = err; que_thr_stop_for_mysql(thr); thr->lock_state = QUE_THR_LOCK_ROW; if (row_mysql_handle_errors( &err, trx, thr, NULL)) { thr->lock_state = QUE_THR_LOCK_NOLOCK; - mtr_start(mtr); + mtr->start(); mutex_enter(&match->rtr_match_mutex); if (!match->valid && match->matched_recs->empty()) { @@ -1120,7 +1122,7 @@ re_scan: RW_X_LATCH, NULL, BUF_GET, __FILE__, __LINE__, mtr, &err); } else { - mtr_start(mtr); + mtr->start(); goto func_end; } @@ -1128,8 +1130,8 @@ re_scan: if (!match->valid) { /* Page got deleted */ - mtr_commit(mtr); - mtr_start(mtr); + mtr->commit(); + mtr->start(); err = DB_RECORD_NOT_FOUND; goto func_end; } @@ -1147,8 +1149,8 @@ re_scan: /* Page got splitted and promoted (only for root page it is possible). Release the page and ask for a re-search */ - mtr_commit(mtr); - mtr_start(mtr); + mtr->commit(); + mtr->start(); err = DB_RECORD_NOT_FOUND; goto func_end; } @@ -1160,8 +1162,8 @@ re_scan: /* No match record */ if (page_rec_is_supremum(rec) || !match->valid) { - mtr_commit(mtr); - mtr_start(mtr); + mtr->commit(); + mtr->start(); err = DB_RECORD_NOT_FOUND; goto func_end; } @@ -1517,7 +1519,7 @@ exhausted: /* Fetch the columns needed in test conditions. The index record is protected by a page latch that was acquired when plan->pcur was positioned. The latch will not be released - until mtr_commit(mtr). */ + until mtr->commit(). */ row_sel_fetch_columns(index, rec, offsets, UT_LIST_GET_FIRST(plan->columns)); @@ -1626,7 +1628,7 @@ table_loop: /* Open a cursor to index, or restore an open cursor position */ - mtr_start(&mtr); + mtr.start(); #ifdef BTR_CUR_HASH_ADAPT if (consistent_read && plan->unique_search && !plan->pcur_is_open @@ -1645,8 +1647,8 @@ table_loop: plan_reset_cursor(plan); - mtr_commit(&mtr); - mtr_start(&mtr); + mtr.commit(); + mtr.start(); } #endif /* BTR_CUR_HASH_ADAPT */ @@ -1910,7 +1912,7 @@ skip_lock: by row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch will not be released - until mtr_commit(mtr). */ + until mtr.commit(). */ row_sel_fetch_columns( index, rec, offsets, @@ -1940,7 +1942,7 @@ skip_lock: /* Fetch the columns needed in test conditions. The record is protected by a page latch that was acquired by row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch - will not be released until mtr_commit(mtr). */ + will not be released until mtr.commit(). */ row_sel_fetch_columns(index, rec, offsets, UT_LIST_GET_FIRST(plan->columns)); @@ -2102,7 +2104,7 @@ next_table: btr_pcur_store_position(&(plan->pcur), &mtr); } - mtr_commit(&mtr); + mtr.commit(); mtr_has_extra_clust_latch = FALSE; @@ -2142,7 +2144,7 @@ table_exhausted: plan->cursor_at_end = TRUE; - mtr_commit(&mtr); + mtr.commit(); mtr_has_extra_clust_latch = FALSE; @@ -2190,7 +2192,7 @@ stop_for_a_while: plan->stored_cursor_rec_processed = FALSE; btr_pcur_store_position(&(plan->pcur), &mtr); - mtr_commit(&mtr); + mtr.commit(); ut_ad(!sync_check_iterate(sync_check())); err = DB_SUCCESS; @@ -2205,7 +2207,7 @@ commit_mtr_for_a_while: btr_pcur_store_position(&(plan->pcur), &mtr); - mtr_commit(&mtr); + mtr.commit(); mtr_has_extra_clust_latch = FALSE; ut_ad(!sync_check_iterate(dict_sync_check())); @@ -2220,7 +2222,7 @@ lock_wait_or_error: plan->stored_cursor_rec_processed = FALSE; btr_pcur_store_position(&(plan->pcur), &mtr); - mtr_commit(&mtr); + mtr.commit(); func_exit: ut_ad(!sync_check_iterate(dict_sync_check())); @@ -4240,7 +4242,7 @@ row_search_mvcc( goto func_exit; } - mtr_start(&mtr); + mtr.start(); #ifdef BTR_CUR_HASH_ADAPT /*-------------------------------------------------------------*/ @@ -4279,7 +4281,7 @@ row_search_mvcc( a page latch that was acquired by row_sel_try_search_shortcut_for_mysql(). The latch will not be released until - mtr_commit(&mtr). */ + mtr.commit(). */ ut_ad(!rec_get_deleted_flag(rec, comp)); if (prebuilt->idx_cond) { @@ -4316,7 +4318,8 @@ row_search_mvcc( } shortcut_match: - mtr_commit(&mtr); + mtr.commit(); + /* NOTE that we do NOT store the cursor position */ err = DB_SUCCESS; @@ -4324,7 +4327,7 @@ row_search_mvcc( case SEL_EXHAUSTED: shortcut_mismatch: - mtr_commit(&mtr); + mtr.commit(); /* NOTE that we do NOT store the cursor position */ err = DB_RECORD_NOT_FOUND; @@ -4337,8 +4340,8 @@ row_search_mvcc( ut_ad(0); } - mtr_commit(&mtr); - mtr_start(&mtr); + mtr.commit(); + mtr.start(); } } #endif /* BTR_CUR_HASH_ADAPT */ @@ -5318,7 +5321,7 @@ requires_clust_rec: /* Decide whether to prefetch extra rows. At this point, the clustered index record is protected by a page latch that was acquired when pcur was positioned. - The latch will not be released until mtr_commit(&mtr). */ + The latch will not be released until mtr.commit(). */ if ((match_mode == ROW_SEL_EXACT || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD) @@ -5510,10 +5513,10 @@ next_rec: btr_pcur_store_position(pcur, &mtr); } - mtr_commit(&mtr); + mtr.commit(); mtr_has_extra_clust_latch = FALSE; - mtr_start(&mtr); + mtr.start(); if (!spatial_search && sel_restore_position_for_mysql(&same_user_rec, @@ -5571,7 +5574,7 @@ lock_wait_or_error: } lock_table_wait: - mtr_commit(&mtr); + mtr.commit(); mtr_has_extra_clust_latch = FALSE; trx->error_state = err; @@ -5588,7 +5591,7 @@ lock_table_wait: /* It was a lock wait, and it ended */ thr->lock_state = QUE_THR_LOCK_NOLOCK; - mtr_start(&mtr); + mtr.start(); /* Table lock waited, go try to obtain table lock again */ @@ -5647,7 +5650,7 @@ normal_return: trx->lock.n_active_thrs= n_active_thrs - 1; } - mtr_commit(&mtr); + mtr.commit(); /* Rollback blocking transactions from hit list for high priority transaction, if any. We should not be holding latches here as diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index b4aec1f3c4d..1c2a97a3210 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -624,13 +624,13 @@ row_undo_mod_del_unmark_sec_and_undo_update( ut_ad(trx->id != 0); - /* FIXME: Currently we do a 2-pass search for the undo due to - avoid undel-mark a wrong rec in rolling back in partial update. - Later, we could log some info in secondary index updates to avoid - this. */ if (dict_index_is_spatial(index)) { + /* FIXME: Currently we do a 2-pass search for the undo + due to avoid undel-mark a wrong rec in rolling back in + partial update. Later, we could log some info in + secondary index updates to avoid this. */ ut_ad(mode & BTR_MODIFY_LEAF); - mode |= BTR_RTREE_DELETE_MARK; + mode |= BTR_RTREE_DELETE_MARK; } try_again: diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index e204059dd04..e9002d6b41d 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -319,9 +319,16 @@ row_upd_check_references_constraints( But the counter on the table protects 'foreign' from being dropped while the check is running. */ + if (foreign_table) { + foreign_table->inc_fk_checks(); + } + err = row_ins_check_foreign_constraint( FALSE, foreign, table, entry, thr); + if (foreign_table) { + foreign_table->dec_fk_checks(); + } if (ref_table != NULL) { dict_table_close(ref_table, FALSE, FALSE); } @@ -342,11 +349,6 @@ func_exit: mem_heap_free(heap); DEBUG_SYNC_C("foreign_constraint_check_for_update_done"); - - DBUG_EXECUTE_IF("row_upd_cascade_lock_wait_err", - err = DB_LOCK_WAIT; - DBUG_SET("-d,row_upd_cascade_lock_wait_err");); - DBUG_RETURN(err); } @@ -469,9 +471,8 @@ wsrep_must_process_fk(const upd_node_t* node, const trx_t* trx) return false; } - const upd_node_t* parent = static_cast<const upd_node_t*>(node->common.parent); - - return parent->cascade_upd_nodes->empty(); + return static_cast<upd_node_t*>(node->common.parent)->cascade_node + == node; } #endif /* WITH_WSREP */ @@ -1295,7 +1296,7 @@ row_upd_index_replace_new_col_val( } len = dtype_get_at_most_n_mbchars(col->prtype, - col->mbminmaxlen, + col->mbminlen, col->mbmaxlen, field->prefix_len, len, (const char*) data); @@ -1532,12 +1533,7 @@ row_upd_replace_vcol( dfield_copy_data(dfield, upd_field->old_v_val); } - dfield_get_type(dfield)->mtype = - upd_field->new_val.type.mtype; - dfield_get_type(dfield)->prtype = - upd_field->new_val.type.prtype; - dfield_get_type(dfield)->mbminmaxlen = - upd_field->new_val.type.mbminmaxlen; + dfield->type = upd_field->new_val.type; break; } } @@ -2129,6 +2125,7 @@ row_upd_eval_new_vals( @param[in] update an update vector if it is update @param[in] thd mysql thread handle @param[in,out] mysql_table mysql table object */ +static void row_upd_store_v_row( upd_node_t* node, @@ -2176,7 +2173,6 @@ row_upd_store_v_row( cascade update. And virtual column can't be affected, so it is Ok to set it to NULL */ - ut_ad(!node->cascade_top); dfield_set_null(dfield); } else { dfield_t* vfield @@ -2271,25 +2267,6 @@ row_upd_store_row( } /***********************************************************//** -Print a MBR data from disk */ -static -void -srv_mbr_print(const byte* data) -{ - double a, b, c, d; - a = mach_double_read(data); - data += sizeof(double); - b = mach_double_read(data); - data += sizeof(double); - c = mach_double_read(data); - data += sizeof(double); - d = mach_double_read(data); - - ib::info() << "GIS MBR INFO: " << a << " and " << b << ", " << c - << ", " << d << "\n"; -} - -/***********************************************************//** Updates a secondary index entry of a row. @return DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ @@ -2452,8 +2429,6 @@ row_upd_sec_index_entry( << " of table " << index->table->name << " was not found on update: " << *entry << " at: " << rec_index_print(rec, index); - if (entry->fields[0].data) - srv_mbr_print((unsigned char*)entry->fields[0].data); #ifdef UNIV_DEBUG mtr_commit(&mtr); mtr_start(&mtr); @@ -2684,7 +2659,9 @@ row_upd_clust_rec_by_insert( que_thr_t* thr, /*!< in: query thread */ ibool referenced,/*!< in: TRUE if index may be referenced in a foreign key constraint */ - ibool foreign, /*!< in: TRUE if index is foreign key index */ +#ifdef WITH_WSREP + bool foreign,/*!< in: whether this is a foreign key */ +#endif mtr_t* mtr) /*!< in/out: mtr; gets committed here */ { mem_heap_t* heap; @@ -2897,15 +2874,14 @@ row_upd_clust_rec( down the index tree */ mtr->start(); - mtr->set_named_space(index->space); - /* Disable REDO logging as lifetime of temp-tables is limited to - server or connection lifetime and so REDO information is not needed - on restart for recovery. - Disable locking as temp-tables are not shared across connection. */ - if (dict_table_is_temporary(index->table)) { + if (index->table->is_temporary()) { + /* Disable locking, because temporary tables are never + shared between transactions or connections. */ flags |= BTR_NO_LOCKING_FLAG; mtr->set_log_mode(MTR_LOG_NO_REDO); + } else { + mtr->set_named_space(index->space); } /* NOTE: this transaction has an s-lock or x-lock on the record and @@ -2974,7 +2950,9 @@ row_upd_del_mark_clust_rec( ibool referenced, /*!< in: TRUE if index may be referenced in a foreign key constraint */ - ibool foreign,/*!< in: TRUE if index is foreign key index */ +#ifdef WITH_WSREP + bool foreign,/*!< in: whether this is a foreign key */ +#endif mtr_t* mtr) /*!< in: mtr; gets committed here */ { btr_pcur_t* pcur; @@ -3066,7 +3044,6 @@ row_upd_clust_step( ulint* offsets; ibool referenced; ulint flags; - ibool foreign = FALSE; trx_t* trx = thr_get_trx(thr); rec_offs_init(offsets_); @@ -3076,8 +3053,7 @@ row_upd_clust_step( referenced = row_upd_index_is_referenced(index, trx); #ifdef WITH_WSREP - foreign = wsrep_row_upd_index_is_foreign( - index, thr_get_trx(thr)); + const bool foreign = wsrep_row_upd_index_is_foreign(index, trx); #endif pcur = node->pcur; @@ -3085,7 +3061,6 @@ row_upd_clust_step( /* We have to restore the cursor to its position */ mtr.start(); - mtr.set_named_space(index->space); if (dict_table_is_temporary(node->table)) { /* Disable locking, because temporary tables are @@ -3097,6 +3072,7 @@ row_upd_clust_step( mtr.set_log_mode(MTR_LOG_NO_REDO); } else { flags = node->table->no_rollback() ? BTR_NO_ROLLBACK : 0; + mtr.set_named_space(index->space); } /* If the restoration does not succeed, then the same @@ -3184,7 +3160,11 @@ row_upd_clust_step( if (node->is_delete == PLAIN_DELETE) { err = row_upd_del_mark_clust_rec( - node, index, offsets, thr, referenced, foreign, &mtr); + node, index, offsets, thr, referenced, +#ifdef WITH_WSREP + foreign, +#endif + &mtr); if (err == DB_SUCCESS) { node->state = UPD_NODE_UPDATE_ALL_SEC; @@ -3230,7 +3210,11 @@ row_upd_clust_step( externally! */ err = row_upd_clust_rec_by_insert( - node, index, thr, referenced, foreign, &mtr); + node, index, thr, referenced, +#ifdef WITH_WSREP + foreign, +#endif + &mtr); if (err != DB_SUCCESS) { goto exit_func; @@ -3470,56 +3454,3 @@ error_handling: DBUG_RETURN(thr); } - -#ifndef DBUG_OFF - -/** Ensure that the member cascade_upd_nodes has only one update node -for each of the tables. This is useful for testing purposes. */ -void upd_node_t::check_cascade_only_once() -{ - DBUG_ENTER("upd_node_t::check_cascade_only_once"); - - dbug_trace(); - - for (upd_cascade_t::const_iterator i = cascade_upd_nodes->begin(); - i != cascade_upd_nodes->end(); ++i) { - - const upd_node_t* update_node = *i; - std::string table_name(update_node->table->name.m_name); - ulint count = 0; - - for (upd_cascade_t::const_iterator j - = cascade_upd_nodes->begin(); - j != cascade_upd_nodes->end(); ++j) { - - const upd_node_t* node = *j; - - if (table_name == node->table->name.m_name) { - DBUG_ASSERT(count++ == 0); - } - } - } - - DBUG_VOID_RETURN; -} - -/** Print information about this object into the trace log file. */ -void upd_node_t::dbug_trace() -{ - DBUG_ENTER("upd_node_t::dbug_trace"); - - for (upd_cascade_t::const_iterator i = cascade_upd_nodes->begin(); - i != cascade_upd_nodes->end(); ++i) { - DBUG_LOG("upd_node_t", "cascade_upd_nodes: Cascade to table: " - << (*i)->table->name); - } - - for (upd_cascade_t::const_iterator j = new_upd_nodes->begin(); - j != new_upd_nodes->end(); ++j) { - DBUG_LOG("upd_node_t", "new_upd_nodes: Cascade to table: " - << (*j)->table->name); - } - - DBUG_VOID_RETURN; -} -#endif /* !DBUG_OFF */ diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index c5663c0c828..648643ce932 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -44,25 +44,31 @@ Created 2/6/1997 Heikki Tuuri #include "lock0lock.h" #include "row0mysql.h" -/** Check whether all non-virtual columns in a virtual index match that of in -the cluster index -@param[in,out] caller_trx trx of current thread -@param[in] index the secondary index -@param[in] row the cluster index row in dtuple form -@param[in] ext externally stored column prefix or NULL -@param[in] ientry the secondary index entry -@param[in,out] heap heap used to build virtual dtuple -@param[in,out] n_non_v_col number of non-virtual columns in the index -@return true if all matches, false otherwise */ +/** Check whether all non-virtual index fields are equal. +@param[in] index the secondary index +@param[in] a first index entry to compare +@param[in] b second index entry to compare +@return whether all non-virtual fields are equal */ static bool -row_vers_non_vc_match( - dict_index_t* index, - const dtuple_t* row, - const row_ext_t* ext, - const dtuple_t* ientry, - mem_heap_t* heap, - ulint* n_non_v_col); +row_vers_non_virtual_fields_equal( + const dict_index_t* index, + const dfield_t* a, + const dfield_t* b) +{ + const dict_field_t* end = &index->fields[index->n_fields]; + + for (const dict_field_t* ifield = index->fields; ifield != end; + ifield++) { + if (!dict_col_is_virtual(ifield->col) + && cmp_dfield_dfield(a++, b++)) { + return false; + } + } + + return true; +} + /** Determine if an active transaction has inserted or modified a secondary index record. @param[in,out] caller_trx trx of current thread @@ -242,15 +248,29 @@ row_vers_impl_x_locked_low( } if (!cur_vrow) { - ulint n_non_v_col = 0; + /* Build index entry out of row */ + entry = row_build_index_entry(row, ext, index, + heap); + + /* entry could only be NULL (the + clustered index record could contain + BLOB pointers that are NULL) if we + were accessing a freshly inserted + record before it was fully inserted. + prev_version cannot possibly be such + an incomplete record, because its + transaction would have to be committed + in order for later versions of the + record to be able to exist. */ + ut_ad(entry); /* If the indexed virtual columns has changed, there must be log record to generate vrow. Otherwise, it is not changed, so no need to compare */ - if (row_vers_non_vc_match( - index, row, ext, ientry, heap, - &n_non_v_col) == 0) { + if (!row_vers_non_virtual_fields_equal( + index, + ientry->fields, entry->fields)) { if (rec_del != vers_del) { break; } @@ -267,12 +287,14 @@ row_vers_impl_x_locked_low( entry = row_build_index_entry(row, ext, index, heap); - /* entry may be NULL if a record was inserted in place - of a deleted record, and the BLOB pointers of the new - record were not initialized yet. But in that case, - prev_version should be NULL. */ - - ut_a(entry != NULL); + /* entry could only be NULL (the clustered index + record could contain BLOB pointers that are NULL) if + we were accessing a freshly inserted record before it + was fully inserted. prev_version cannot possibly be + such an incomplete record, because its transaction + would have to be committed in order for later versions + of the record to be able to exist. */ + ut_ad(entry); /* If we get here, we know that the trx_id transaction modified prev_version. Let us check if prev_version @@ -421,61 +443,6 @@ row_vers_must_preserve_del_marked( return(!purge_sys->view.changes_visible(trx_id, name)); } -/** Check whether all non-virtual columns in a virtual index match that of in -the cluster index -@param[in] index the secondary index -@param[in] row the cluster index row in dtuple form -@param[in] ext externally stored column prefix or NULL -@param[in] ientry the secondary index entry -@param[in,out] heap heap used to build virtual dtuple -@param[in,out] n_non_v_col number of non-virtual columns in the index -@return true if all matches, false otherwise */ -static -bool -row_vers_non_vc_match( - dict_index_t* index, - const dtuple_t* row, - const row_ext_t* ext, - const dtuple_t* ientry, - mem_heap_t* heap, - ulint* n_non_v_col) -{ - const dfield_t* field1; - dfield_t* field2; - ulint n_fields = dtuple_get_n_fields(ientry); - ulint ret = true; - - *n_non_v_col = 0; - - /* Build index entry out of row */ - dtuple_t* nentry = row_build_index_entry(row, ext, index, heap); - - for (ulint i = 0; i < n_fields; i++) { - const dict_field_t* ind_field = dict_index_get_nth_field( - index, i); - - const dict_col_t* col = ind_field->col; - - /* Only check non-virtual columns */ - if (dict_col_is_virtual(col)) { - continue; - } - - if (ret) { - field1 = dtuple_get_nth_field(ientry, i); - field2 = dtuple_get_nth_field(nentry, i); - - if (cmp_dfield_dfield(field1, field2) != 0) { - ret = false; - } - } - - (*n_non_v_col)++; - } - - return(ret); -} - /** build virtual column value from current cluster index record data @param[in,out] row the cluster index row in dtuple form @param[in] clust_index clustered index @@ -626,8 +593,7 @@ that of current cluster index record, which is recreated from information stored in undo log @param[in] in_purge called by purge thread @param[in] rec record in the clustered index -@param[in] row the cluster index row in dtuple form -@param[in] ext externally stored column prefix or NULL +@param[in] icentry the index entry built from a cluster row @param[in] clust_index cluster index @param[in] clust_offsets offsets on the cluster record @param[in] index the secondary index @@ -643,8 +609,7 @@ bool row_vers_vc_matches_cluster( bool in_purge, const rec_t* rec, - const dtuple_t* row, - row_ext_t* ext, + const dtuple_t* icentry, dict_index_t* clust_index, ulint* clust_offsets, dict_index_t* index, @@ -669,15 +634,27 @@ row_vers_vc_matches_cluster( dfield_t* field2; ulint i; - tuple_heap = mem_heap_create(1024); - /* First compare non-virtual columns (primary keys) */ - if (!row_vers_non_vc_match(index, row, ext, ientry, tuple_heap, - &n_non_v_col)) { - mem_heap_free(tuple_heap); - return(false); + ut_ad(index->n_fields == n_fields); + ut_ad(n_fields == dtuple_get_n_fields(icentry)); + { + const dfield_t* a = ientry->fields; + const dfield_t* b = icentry->fields; + + for (const dict_field_t *ifield = index->fields, + *const end = &index->fields[index->n_fields]; + ifield != end; ifield++, a++, b++) { + if (!dict_col_is_virtual(ifield->col)) { + if (cmp_dfield_dfield(a, b)) { + return false; + } + n_non_v_col++; + } + } } + tuple_heap = mem_heap_create(1024); + ut_ad(n_fields > n_non_v_col); *vrow = dtuple_create_with_vcol(v_heap ? v_heap : tuple_heap, 0, num_v); @@ -946,27 +923,28 @@ row_vers_old_has_index_entry( entry = row_build_index_entry( row, ext, index, heap); if (entry && !dtuple_coll_cmp(ientry, entry)) { - - mem_heap_free(heap); - - if (v_heap) { - mem_heap_free(v_heap); - } - - return(TRUE); + goto safe_to_purge; } } else { - if (row_vers_vc_matches_cluster( - also_curr, rec, row, ext, clust_index, - clust_offsets, index, ientry, roll_ptr, - trx_id, NULL, &vrow, mtr)) { - mem_heap_free(heap); - - if (v_heap) { - mem_heap_free(v_heap); - } - - return(TRUE); + /* Build index entry out of row */ + entry = row_build_index_entry(row, ext, index, heap); + /* entry could only be NULL if + the clustered index record is an uncommitted + inserted record whose BLOBs have not been + written yet. The secondary index record + can be safely removed, because it cannot + possibly refer to this incomplete + clustered index record. (Insert would + always first be completed for the + clustered index record, then proceed to + secondary indexes.) */ + + if (entry && row_vers_vc_matches_cluster( + also_curr, rec, entry, + clust_index, clust_offsets, + index, ientry, roll_ptr, + trx_id, NULL, &vrow, mtr)) { + goto safe_to_purge; } } clust_offsets = rec_get_offsets(rec, clust_index, NULL, @@ -999,7 +977,7 @@ row_vers_old_has_index_entry( a different binary value in a char field, but the collation identifies the old and new value anyway! */ if (entry && !dtuple_coll_cmp(ientry, entry)) { - +safe_to_purge: mem_heap_free(heap); if (v_heap) { @@ -1096,13 +1074,7 @@ row_vers_old_has_index_entry( and new value anyway! */ if (entry && !dtuple_coll_cmp(ientry, entry)) { - - mem_heap_free(heap); - if (v_heap) { - mem_heap_free(v_heap); - } - - return(TRUE); + goto safe_to_purge; } } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index ad246a231f8..9cb8c0ab729 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -78,10 +78,7 @@ Created 10/8/1995 Heikki Tuuri #include "fil0crypt.h" #include "fil0pagecompress.h" #include "btr0scrub.h" -#ifdef WITH_WSREP -extern int wsrep_debug; -extern int wsrep_trx_is_aborting(void *thd_ptr); -#endif + /* The following is the maximum allowed duration of a lock wait. */ UNIV_INTERN ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT; diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 54e3a77e7be..c3590302c8e 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -638,6 +638,9 @@ trx_rollback_active( roll_node_t* roll_node; dict_table_t* table; ibool dictionary_locked = FALSE; + const trx_id_t trx_id = trx->id; + + ut_ad(trx_id); heap = mem_heap_create(512); @@ -709,14 +712,13 @@ trx_rollback_active( } } + ib::info() << "Rolled back recovered transaction " << trx_id; + func_exit: if (dictionary_locked) { row_mysql_unlock_data_dictionary(trx); } - ib::info() << "Rollback of trx with id " << ib::hex(trx->id) - << " completed"; - mem_heap_free(heap); trx_roll_crash_recv_trx = NULL; diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index fe50a471e74..a5cafe5895a 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. +Copyright (c) 2015, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index 25705cb0e2c..a8ff700847a 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -40,6 +40,9 @@ Created 5/11/1994 Heikki Tuuri #include "my_cpu.h" #ifdef _WIN32 +typedef VOID(WINAPI *time_fn)(LPFILETIME); +static time_fn ut_get_system_time_as_file_time = GetSystemTimeAsFileTime; + /*****************************************************************//** NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix epoch starts from 1970/1/1. For selection of constant see: @@ -65,7 +68,7 @@ ut_gettimeofday( return(-1); } - GetSystemTimeAsFileTime(&ft); + ut_get_system_time_as_file_time(&ft); tm = (int64_t) ft.dwHighDateTime << 32; tm |= ft.dwLowDateTime; diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 321837bd425..17b56096114 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2149,11 +2149,16 @@ void ha_maria::start_bulk_insert(ha_rows rows, uint flags) safety net for now, we don't remove the test of file->state->records, because there is uncertainty on what will happen during repair if the two states disagree. + + We also have to check in case of transactional tables that the + user has not used LOCK TABLE on the table twice. */ if ((file->state->records == 0) && (share->state.state.records == 0) && can_enable_indexes && (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) && - (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK)) + (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK) && + (!share->have_versioning || !share->now_transactional || + file->used_tables->use_count == 1)) { /** @todo for a single-row INSERT SELECT, we will go into repair, which diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index fec8bf2d72d..8eaac990741 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -6166,7 +6166,7 @@ my_bool write_hook_for_undo_row_insert(enum translog_record_type type /** - @brief Upates "records" and calls the generic UNDO hook + @brief Updates "records" and calls the generic UNDO hook @return Operation status, always 0 (success) */ @@ -6316,8 +6316,8 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, uchar *buff, *dir; uint result; MARIA_PINNED_PAGE page_link; - enum pagecache_page_lock unlock_method; - enum pagecache_page_pin unpin_method; + enum pagecache_page_lock lock_method; + enum pagecache_page_pin pin_method; my_off_t end_of_page; uint error; DBUG_ENTER("_ma_apply_redo_insert_row_head_or_tail"); @@ -6345,8 +6345,8 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, fill it entirely with zeroes, then the REDO will put correct data on it. */ - unlock_method= PAGECACHE_LOCK_WRITE; - unpin_method= PAGECACHE_PIN; + lock_method= PAGECACHE_LOCK_WRITE; + pin_method= PAGECACHE_PIN; DBUG_ASSERT(rownr == 0 && new_page); if (rownr != 0 || !new_page) @@ -6361,8 +6361,8 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, } else { - unlock_method= PAGECACHE_LOCK_LEFT_WRITELOCKED; - unpin_method= PAGECACHE_PIN_LEFT_PINNED; + lock_method= PAGECACHE_LOCK_LEFT_WRITELOCKED; + pin_method= PAGECACHE_PIN_LEFT_PINNED; share->pagecache->readwrite_flags&= ~MY_WME; buff= pagecache_read(share->pagecache, &info->dfile, @@ -6463,11 +6463,11 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, this group, for this page, would be skipped) and unpin then. */ result= 0; - if (unlock_method == PAGECACHE_LOCK_WRITE && + if (lock_method == PAGECACHE_LOCK_WRITE && pagecache_write(share->pagecache, &info->dfile, page, 0, buff, PAGECACHE_PLAIN_PAGE, - unlock_method, unpin_method, + lock_method, pin_method, PAGECACHE_WRITE_DELAY, &page_link.link, LSN_IMPOSSIBLE)) result= my_errno; @@ -6488,7 +6488,7 @@ crashed_file: _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); err: error= my_errno; - if (unlock_method == PAGECACHE_LOCK_LEFT_WRITELOCKED) + if (lock_method == PAGECACHE_LOCK_LEFT_WRITELOCKED) pagecache_unlock_by_link(share->pagecache, page_link.link, PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN, LSN_IMPOSSIBLE, diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c index ebf9beab10f..e41b7dd1177 100644 --- a/storage/maria/ma_page.c +++ b/storage/maria/ma_page.c @@ -224,17 +224,35 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock, #endif page_cleanup(share, page); - res= pagecache_write(share->pagecache, - &share->kfile, - (pgcache_page_no_t) (page->pos / block_size), - level, buff, share->page_type, - lock, - lock == PAGECACHE_LOCK_LEFT_WRITELOCKED ? - PAGECACHE_PIN_LEFT_PINNED : - (lock == PAGECACHE_LOCK_WRITE_UNLOCK ? - PAGECACHE_UNPIN : PAGECACHE_PIN), - PAGECACHE_WRITE_DELAY, &page_link.link, - LSN_IMPOSSIBLE); + { + PAGECACHE_BLOCK_LINK **link; + enum pagecache_page_pin pin; + if (lock == PAGECACHE_LOCK_LEFT_WRITELOCKED) + { + pin= PAGECACHE_PIN_LEFT_PINNED; + link= &page_link.link; + } + else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK) + { + pin= PAGECACHE_UNPIN; + /* + We unlock this page so link should be 0 to prevent it usage + even accidentally + */ + link= NULL; + } + else + { + pin= PAGECACHE_PIN; + link= &page_link.link; + } + res= pagecache_write(share->pagecache, + &share->kfile, + (pgcache_page_no_t) (page->pos / block_size), + level, buff, share->page_type, + lock, pin, PAGECACHE_WRITE_DELAY, link, + LSN_IMPOSSIBLE); + } if (lock == PAGECACHE_LOCK_WRITE) { diff --git a/storage/maria/ma_state.c b/storage/maria/ma_state.c index fc79bbfac91..a3c2d50bdc6 100644 --- a/storage/maria/ma_state.c +++ b/storage/maria/ma_state.c @@ -58,6 +58,7 @@ my_bool _ma_setup_live_state(MARIA_HA *info) MARIA_USED_TABLES *tables; MARIA_STATE_HISTORY *history; DBUG_ENTER("_ma_setup_live_state"); + DBUG_PRINT("enter", ("info: %p", info)); DBUG_ASSERT(share->lock_key_trees); @@ -110,6 +111,8 @@ my_bool _ma_setup_live_state(MARIA_HA *info) end: info->state_start= &tables->state_start; info->state= &tables->state_current; + info->used_tables= tables; + tables->use_count++; /* Mark in transaction state if we are not using transid (versioning) @@ -118,6 +121,7 @@ end: */ tables->state_current.no_transid|= !(info->row_flag & ROW_FLAG_TRANSID); + DBUG_PRINT("exit", ("tables: %p info->state: %p", tables, info->state)); DBUG_RETURN(0); } @@ -241,9 +245,10 @@ void _ma_reset_state(MARIA_HA *info) DBUG_ENTER("_ma_reset_state"); /* Always true if share->now_transactional is set */ - if (history) + if (history && share->have_versioning) { MARIA_STATE_HISTORY *next; + DBUG_PRINT("info", ("resetting history")); /* Set the current history to current state */ share->state_history->state= share->state.state; @@ -255,7 +260,7 @@ void _ma_reset_state(MARIA_HA *info) my_free(history); } share->state_history->next= 0; - share->state_history->trid= 0; /* Visibile for all */ + share->state_history->trid= 0; /* Visible for all */ } DBUG_VOID_RETURN; } @@ -597,7 +602,7 @@ void _ma_remove_table_from_trnman(MARIA_SHARE *share, TRN *trn) SYNOPSIS _ma_get_status() - param Pointer to Myisam handler + param Pointer to Aria handler concurrent_insert Set to 1 if we are going to do concurrent inserts (THR_WRITE_CONCURRENT_INSERT was used) */ @@ -627,6 +632,8 @@ void _ma_block_get_status(void* param, my_bool concurrent_insert) my_bool _ma_block_start_trans(void* param) { MARIA_HA *info=(MARIA_HA*) param; + DBUG_ENTER("_ma_block_start_trans"); + if (info->s->lock_key_trees) { /* @@ -634,7 +641,7 @@ my_bool _ma_block_start_trans(void* param) out of memory conditions) TODO: Fix this by having one extra state pre-allocated */ - return _ma_setup_live_state(info); + DBUG_RETURN(_ma_setup_live_state(info)); } else { @@ -663,9 +670,9 @@ my_bool _ma_block_start_trans(void* param) Assume for now that this doesn't fail (It can only fail in out of memory conditions) */ - return maria_create_trn_hook(info) != 0; + DBUG_RETURN(maria_create_trn_hook(info) != 0); } - return 0; + DBUG_RETURN(0); } @@ -697,7 +704,7 @@ my_bool _ma_block_check_status(void *param __attribute__((unused))) my_bool _ma_block_start_trans_no_versioning(void* param) { MARIA_HA *info=(MARIA_HA*) param; - DBUG_ENTER("_ma_block_get_status_no_version"); + DBUG_ENTER("_ma_block_start_trans_no_versioning"); DBUG_ASSERT(info->s->base.born_transactional && !info->s->lock_key_trees); info->state->changed= 0; /* from _ma_reset_update_flag() */ @@ -722,6 +729,8 @@ my_bool _ma_block_start_trans_no_versioning(void* param) void maria_versioning(MARIA_HA *info, my_bool versioning) { MARIA_SHARE *share= info->s; + DBUG_ENTER("maria_versioning"); + /* For now, this is a hack */ if (share->have_versioning) { @@ -738,6 +747,7 @@ void maria_versioning(MARIA_HA *info, my_bool versioning) info->state= &share->state.state; /* Change global values by default */ info->state_start= info->state; /* Initial values */ } + DBUG_VOID_RETURN; } diff --git a/storage/maria/ma_state.h b/storage/maria/ma_state.h index 5ff6dc26337..a86aada94fd 100644 --- a/storage/maria/ma_state.h +++ b/storage/maria/ma_state.h @@ -34,6 +34,7 @@ typedef struct st_used_tables { struct st_maria_share *share; MARIA_STATUS_INFO state_current; MARIA_STATUS_INFO state_start; + uint use_count; } MARIA_USED_TABLES; diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index ff68b4bb9a2..cfa62ffec58 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -341,7 +341,7 @@ err: for (j=0 ; j < share->base.keys ; j++) maria_flush_bulk_insert(info, j); } - info->errkey= (int) i; + info->errkey= i < share->base.keys ? (int) i : -1; /* We delete keys in the reverse order of insertion. This is the order that a rollback would do and is important for CLR_ENDs generated by diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 5a8f6178ed6..f6d16960f5f 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -601,6 +601,7 @@ struct st_maria_handler struct st_ma_transaction *trn; /* Pointer to active transaction */ MARIA_STATUS_INFO *state, state_save; MARIA_STATUS_INFO *state_start; /* State at start of transaction */ + MARIA_USED_TABLES *used_tables; MARIA_ROW cur_row; /* The active row that we just read */ MARIA_ROW new_row; /* Storage for a row during update */ MARIA_KEY last_key; /* Last found key */ @@ -1183,7 +1184,7 @@ extern ulonglong transid_get_packed(MARIA_SHARE *share, const uchar *from); #ifdef IDENTICAL_PAGES_AFTER_RECOVERY void page_cleanup(MARIA_SHARE *share, MARIA_PAGE *page) #else -#define page_cleanup(A,B) while (0) +#define page_cleanup(A,B) do { } while (0) #endif extern MARIA_KEY *_ma_make_key(MARIA_HA *info, MARIA_KEY *int_key, uint keynr, diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index 69bc2f70f8c..cffe188e855 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -233,7 +233,7 @@ int main(int argc __attribute__((unused)), char *argv[]) 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 }; - uchar *long_buffer= malloc(LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); + uchar *long_buffer; char **default_argv; PAGECACHE pagecache; LSN lsn, lsn_base, first_lsn; @@ -255,6 +255,7 @@ int main(int argc __attribute__((unused)), char *argv[]) } #endif + long_buffer= malloc(LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); load_defaults("my", load_default_groups, &argc, &argv); default_argv= argv; get_options(&argc, &argv); @@ -758,9 +759,12 @@ err: if (maria_log_remove(maria_data_root)) exit(1); + free(long_buffer); + my_uuid_end(); my_free_open_file_info(); my_end(0); + return (MY_TEST(exit_status())); } diff --git a/storage/maria/unittest/ma_test_loghandler_nologs-t.c b/storage/maria/unittest/ma_test_loghandler_nologs-t.c index a6ccfd754de..310345f0885 100644 --- a/storage/maria/unittest/ma_test_loghandler_nologs-t.c +++ b/storage/maria/unittest/ma_test_loghandler_nologs-t.c @@ -191,6 +191,8 @@ int main(int argc __attribute__((unused)), char *argv[]) if (maria_log_remove(maria_data_root)) exit(1); + free(long_buffer); + my_uuid_end(); my_free_open_file_info(); my_end(0); diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp index bc598a37a4d..e0d3fadba00 100644 --- a/storage/mroonga/ha_mroonga.cpp +++ b/storage/mroonga/ha_mroonga.cpp @@ -1754,9 +1754,9 @@ static int mrn_set_geometry(grn_ctx *ctx, grn_obj *buf, #endif #ifdef MRN_HAVE_HTON_ALTER_TABLE_FLAGS -static uint mrn_alter_table_flags(uint flags) +static ulonglong mrn_alter_table_flags(ulonglong flags) { - uint alter_flags = 0; + ulonglong alter_flags = 0; #ifdef HA_INPLACE_ADD_INDEX_NO_READ_WRITE bool is_inplace_index_change; # ifdef MRN_HAVE_ALTER_INFO diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index 89d642c05b3..a73f999fc05 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -4791,8 +4791,7 @@ static int replace_data_file(HA_CHECK *param, MI_INFO *info, File new_file) */ if (info->s->file_map) { - (void) my_munmap((char*) info->s->file_map, - (size_t) info->s->mmaped_length); + (void) my_munmap((char*) info->s->file_map, info->s->mmaped_length); info->s->file_map= NULL; } diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c index c4baca3d89f..cb49ded0c20 100644 --- a/storage/myisam/mi_dynrec.c +++ b/storage/myisam/mi_dynrec.c @@ -95,7 +95,7 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size) #if defined(HAVE_MADVISE) madvise((char*) info->s->file_map, size, MADV_RANDOM); #endif - info->s->mmaped_length= size; + info->s->mmaped_length= (size_t) size; info->s->file_read= mi_mmap_pread; info->s->file_write= mi_mmap_pwrite; DBUG_RETURN(0); @@ -118,8 +118,7 @@ int mi_munmap_file(MI_INFO *info) { int ret; DBUG_ENTER("mi_unmap_file"); - if ((ret= my_munmap((void*) info->s->file_map, - (size_t) info->s->mmaped_length))) + if ((ret= my_munmap((void*) info->s->file_map, info->s->mmaped_length))) DBUG_RETURN(ret); info->s->file_read= mi_nommap_pread; info->s->file_write= mi_nommap_pwrite; diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c index 5e5e56db6e8..d2c74cf9980 100644 --- a/storage/myisam/mi_locking.c +++ b/storage/myisam/mi_locking.c @@ -1,5 +1,5 @@ -/* - Copyright (c) 2000, 2010, Oracle and/or its affiliates +/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2018, MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -113,6 +113,8 @@ int mi_lock_database(MI_INFO *info, int lock_type) share->changed=0; if (myisam_flush) { + if (share->file_map) + my_msync(info->dfile, share->file_map, share->mmaped_length, MS_SYNC); if (mysql_file_sync(share->kfile, MYF(0))) mark_crashed= error= my_errno; if (mysql_file_sync(info->dfile, MYF(0))) @@ -526,6 +528,8 @@ int _mi_writeinfo(register MI_INFO *info, uint operation) #ifdef _WIN32 if (myisam_flush) { + if (share->file_map) + my_msync(info->dfile, share->file_map, share->mmaped_length, MS_SYNC); mysql_file_sync(share->kfile, 0); mysql_file_sync(info->dfile, 0); } diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c index fc32bb7e518..242bc82bd16 100644 --- a/storage/myisam/mi_packrec.c +++ b/storage/myisam/mi_packrec.c @@ -1551,7 +1551,7 @@ void _mi_unmap_file(MI_INFO *info) { DBUG_ASSERT(info->s->options & HA_OPTION_COMPRESS_RECORD); - (void) my_munmap((char*) info->s->file_map, (size_t) info->s->mmaped_length); + (void) my_munmap((char*) info->s->file_map, info->s->mmaped_length); if (myisam_mmap_size != SIZE_T_MAX) { diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h index 2c0c1eec49a..e0e552ec5fa 100644 --- a/storage/myisam/myisamdef.h +++ b/storage/myisam/myisamdef.h @@ -225,7 +225,7 @@ typedef struct st_mi_isam_share THR_LOCK lock; mysql_mutex_t intern_lock; /* Locking for use with _locking */ mysql_rwlock_t *key_root_lock; - my_off_t mmaped_length; + size_t mmaped_length; uint nonmmaped_inserts; /* counter of writing in non-mmaped area */ mysql_rwlock_t mmap_lock; diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt index c3a95299b88..a49cbbcb85c 100644 --- a/storage/rocksdb/CMakeLists.txt +++ b/storage/rocksdb/CMakeLists.txt @@ -208,6 +208,8 @@ TARGET_LINK_LIBRARIES(sst_dump rocksdblib) MYSQL_ADD_EXECUTABLE(mysql_ldb tools/mysql_ldb.cc COMPONENT rocksdb-engine) TARGET_LINK_LIBRARIES(mysql_ldb rocksdb_tools rocksdb_aux_lib) +INSTALL_SCRIPT(myrocks_hotbackup COMPONENT rocksdb-engine) + IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") SET_TARGET_PROPERTIES(rocksdb_tools sst_dump mysql_ldb PROPERTIES COMPILE_FLAGS -frtti) ENDIF() diff --git a/storage/rocksdb/myrocks_hotbackup b/storage/rocksdb/myrocks_hotbackup new file mode 100755 index 00000000000..ef1e934f3fd --- /dev/null +++ b/storage/rocksdb/myrocks_hotbackup @@ -0,0 +1,686 @@ +#!/usr/bin/env python + +from __future__ import division +from optparse import OptionParser +import collections +import signal +import os +import stat +import sys +import re +import commands +import subprocess +import logging +import logging.handlers +import time +import datetime +import shutil +import traceback +import tempfile + +import MySQLdb +import MySQLdb.connections +from MySQLdb import OperationalError, ProgrammingError + +logger = None +opts = None +rocksdb_files = ['MANIFEST', 'CURRENT', 'OPTIONS'] +rocksdb_data_suffix = '.sst' +rocksdb_wal_suffix = '.log' +exclude_files = ['master.info', 'relay-log.info', 'worker-relay-log.info', + 'auto.cnf', 'gaplock.log', 'ibdata', 'ib_logfile', '.trash'] +wdt_bin = 'wdt' + +def is_manifest(fname): + for m in rocksdb_files: + if fname.startswith(m): + return True + return False + +class Writer(object): + a = None + def __init__(self): + a = None + +class StreamWriter(Writer): + stream_cmd= '' + + def __init__(self, stream_option): + super(StreamWriter, self).__init__() + if stream_option == 'tar': + self.stream_cmd= 'tar chf -' + elif stream_option == 'xbstream': + self.stream_cmd= 'xbstream -c' + else: + raise Exception("Only tar or xbstream is supported as streaming option.") + + def write(self, file_name): + rc= os.system(self.stream_cmd + " " + file_name) + if (rc != 0): + raise Exception("Got error on stream write: " + str(rc) + " " + file_name) + + +class MiscFilesProcessor(): + datadir = None + wildcard = r'.*\.[frm|MYD|MYI|MAD|MAI|MRG|TRG|TRN|ARM|ARZ|CSM|CSV|opt|par]' + regex = None + start_backup_time = None + skip_check_frm_timestamp = None + + def __init__(self, datadir, skip_check_frm_timestamp, start_backup_time): + self.datadir = datadir + self.regex = re.compile(self.wildcard) + self.skip_check_frm_timestamp = skip_check_frm_timestamp + self.start_backup_time = start_backup_time + + def process_db(self, db): + # do nothing + pass + + def process_file(self, path): + # do nothing + pass + + def check_frm_timestamp(self, fname, path): + if not self.skip_check_frm_timestamp and fname.endswith('.frm'): + if os.path.getmtime(path) > self.start_backup_time: + logger.error('FRM file %s was updated after starting backups. ' + 'Schema could have changed and the resulting copy may ' + 'not be valid. Aborting. ' + '(backup time: %s, file modifled time: %s)', + path, datetime.datetime.fromtimestamp(self.start_backup_time).strftime('%Y-%m-%d %H:%M:%S'), + datetime.datetime.fromtimestamp(os.path.getmtime(path)).strftime('%Y-%m-%d %H:%M:%S')) + raise Exception("Inconsistent frm file timestamp"); + + def process(self): + os.chdir(self.datadir) + for db in self.get_databases(): + logger.info("Starting MySQL misc file traversal from database %s..", db) + self.process_db(db) + for f in self.get_files(db): + if self.match(f): + rel_path = os.path.join(db, f) + self.check_frm_timestamp(f, rel_path) + self.process_file(rel_path) + logger.info("Traversing misc files from data directory..") + for f in self.get_files(""): + should_skip = False + for e in exclude_files: + if f.startswith(e) or f.endswith(e): + logger.info("Skipping %s", f) + should_skip = True + break + if not should_skip: + self.process_file(f) + + def match(self, filename): + if self.regex.match(filename): + return True + else: + return False + + def get_databases(self): + dbs = [] + dirs = [ d for d in os.listdir(self.datadir) \ + if not os.path.isfile(os.path.join(self.datadir,d))] + for db in dirs: + if not db.startswith('.') and not self._is_socket(db) and not db == "#rocksdb": + dbs.append(db) + return dbs + + def get_files(self, db): + dbdir = self.datadir + "/" + db + return [ f for f in os.listdir(dbdir) \ + if os.path.isfile(os.path.join(dbdir,f))] + + def _is_socket(self, item): + mode = os.stat(os.path.join(self.datadir, item)).st_mode + if stat.S_ISSOCK(mode): + return True + return False + + +class MySQLBackup(MiscFilesProcessor): + writer = None + + def __init__(self, datadir, writer, skip_check_frm_timestamp, start_backup_time): + MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time) + self.writer = writer + + def process_file(self, fname): # overriding base class + self.writer.write(fname) + + +class MiscFilesLinkCreator(MiscFilesProcessor): + snapshot_dir = None + + def __init__(self, datadir, snapshot_dir, skip_check_frm_timestamp, start_backup_time): + MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time) + self.snapshot_dir = snapshot_dir + + def process_db(self, db): + snapshot_sub_dir = os.path.join(self.snapshot_dir, db) + os.makedirs(snapshot_sub_dir) + + def process_file(self, path): + dst_path = os.path.join(self.snapshot_dir, path) + os.link(path, dst_path) + + +# RocksDB backup +class RocksDBBackup(): + source_dir = None + writer = None + # sst files sent in this backup round + sent_sst = {} + # target sst files in this backup round + target_sst = {} + # sst files sent in all backup rounds + total_sent_sst= {} + # sum of sst file size sent in this backup round + sent_sst_size = 0 + # sum of target sst file size in this backup round + # if sent_sst_size becomes equal to target_sst_size, + # it means the backup round finished backing up all sst files + target_sst_size = 0 + # sum of all sst file size sent all backup rounds + total_sent_sst_size= 0 + # sum of all target sst file size from all backup rounds + total_target_sst_size = 0 + show_progress_size_interval= 1073741824 # 1GB + wal_files= [] + manifest_files= [] + finished= False + + def __init__(self, source_dir, writer, prev): + self.source_dir = source_dir + self.writer = writer + os.chdir(self.source_dir) + self.init_target_files(prev) + + def init_target_files(self, prev): + sst = {} + self.sent_sst = {} + self.target_sst= {} + self.total_sent_sst = {} + self.sent_sst_size = 0 + self.target_sst_size = 0 + self.total_sent_sst_size= 0 + self.total_target_sst_size= 0 + self.wal_files= [] + self.manifest_files= [] + + for f in os.listdir(self.source_dir): + if f.endswith(rocksdb_data_suffix): + # exactly the same file (same size) was sent in previous backup rounds + if prev is not None and f in prev.total_sent_sst and int(os.stat(f).st_size) == prev.total_sent_sst[f]: + continue + sst[f]= int(os.stat(f).st_size) + self.target_sst_size = self.target_sst_size + os.stat(f).st_size + elif is_manifest(f): + self.manifest_files.append(f) + elif f.endswith(rocksdb_wal_suffix): + self.wal_files.append(f) + self.target_sst= collections.OrderedDict(sorted(sst.items())) + + if prev is not None: + self.total_sent_sst = prev.total_sent_sst + self.total_sent_sst_size = prev.total_sent_sst_size + self.total_target_sst_size = self.target_sst_size + prev.total_sent_sst_size + else: + self.total_target_sst_size = self.target_sst_size + + def do_backup_single(self, fname): + self.writer.write(fname) + os.remove(fname) + + def do_backup_sst(self, fname, size): + self.do_backup_single(fname) + self.sent_sst[fname]= size + self.total_sent_sst[fname]= size + self.sent_sst_size = self.sent_sst_size + size + self.total_sent_sst_size = self.total_sent_sst_size + size + + def do_backup_manifest(self): + for f in self.manifest_files: + self.do_backup_single(f) + + def do_backup_wal(self): + for f in self.wal_files: + self.do_backup_single(f) + + # this is the last snapshot round. backing up all the rest files + def do_backup_final(self): + logger.info("Backup WAL..") + self.do_backup_wal() + logger.info("Backup Manifest..") + self.do_backup_manifest() + self.do_cleanup() + self.finished= True + + def do_cleanup(self): + shutil.rmtree(self.source_dir) + logger.info("Cleaned up checkpoint from %s", self.source_dir) + + def do_backup_until(self, time_limit): + logger.info("Starting backup from snapshot: target files %d", len(self.target_sst)) + start_time= time.time() + last_progress_time= start_time + progress_size= 0 + for fname, size in self.target_sst.iteritems(): + self.do_backup_sst(fname, size) + progress_size= progress_size + size + elapsed_seconds = time.time() - start_time + progress_seconds = time.time() - last_progress_time + + if self.should_show_progress(size): + self.show_progress(progress_size, progress_seconds) + progress_size=0 + last_progress_time= time.time() + + if elapsed_seconds > time_limit and self.has_sent_all_sst() is False: + logger.info("Snapshot round finished. Elapsed Time: %5.2f. Remaining sst files: %d", + elapsed_seconds, len(self.target_sst) - len(self.sent_sst)) + self.do_cleanup() + break; + if self.has_sent_all_sst(): + self.do_backup_final() + + return self + + def should_show_progress(self, size): + if int(self.total_sent_sst_size/self.show_progress_size_interval) > int((self.total_sent_sst_size-size)/self.show_progress_size_interval): + return True + else: + return False + + def show_progress(self, size, seconds): + logger.info("Backup Progress: %5.2f%% Sent %6.2f GB of %6.2f GB data, Transfer Speed: %6.2f MB/s", + self.total_sent_sst_size*100/self.total_target_sst_size, + self.total_sent_sst_size/1024/1024/1024, + self.total_target_sst_size/1024/1024/1024, + size/seconds/1024/1024) + + def print_backup_report(self): + logger.info("Sent %6.2f GB of sst files, %d files in total.", + self.total_sent_sst_size/1024/1024/1024, + len(self.total_sent_sst)) + + def has_sent_all_sst(self): + if self.sent_sst_size == self.target_sst_size: + return True + return False + + +class MySQLUtil: + @staticmethod + def connect(user, password, port, socket=None): + if socket: + dbh = MySQLdb.Connect(user=user, + passwd=password, + unix_socket=socket) + else: + dbh = MySQLdb.Connect(user=user, + passwd=password, + port=port, + host="127.0.0.1") + return dbh + + @staticmethod + def create_checkpoint(dbh, checkpoint_dir): + sql = ("SET GLOBAL rocksdb_create_checkpoint='{0}'" + .format(checkpoint_dir)) + cur= dbh.cursor() + cur.execute(sql) + cur.close() + + @staticmethod + def get_datadir(dbh): + sql = "SELECT @@datadir" + cur = dbh.cursor() + cur.execute(sql) + row = cur.fetchone() + return row[0] + + +class BackupRunner: + datadir = None + start_backup_time = None + + def __init__(self, datadir): + self.datadir = datadir + self.start_backup_time = time.time() + + def start_backup_round(self, backup_round, prev_backup): + def signal_handler(*args): + logger.info("Got signal. Exit") + if b is not None: + logger.info("Cleaning up snapshot directory..") + b.do_cleanup() + sys.exit(1) + + b = None + try: + signal.signal(signal.SIGINT, signal_handler) + w = None + if opts.output_stream: + w = StreamWriter(opts.output_stream) + else: + raise Exception("Currently only streaming backup is supported.") + + snapshot_dir = opts.checkpoint_directory + '/' + str(backup_round) + dbh = MySQLUtil.connect(opts.mysql_user, + opts.mysql_password, + opts.mysql_port, + opts.mysql_socket) + if not self.datadir: + self.datadir = MySQLUtil.get_datadir(dbh) + logger.info("Set datadir: %s", self.datadir) + logger.info("Creating checkpoint at %s", snapshot_dir) + MySQLUtil.create_checkpoint(dbh, snapshot_dir) + logger.info("Created checkpoint at %s", snapshot_dir) + b = RocksDBBackup(snapshot_dir, w, prev_backup) + return b.do_backup_until(opts.checkpoint_interval) + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + if b is not None: + logger.info("Cleaning up snapshot directory.") + b.do_cleanup() + sys.exit(1) + + def backup_mysql(self): + try: + w = None + if opts.output_stream: + w = StreamWriter(opts.output_stream) + else: + raise Exception("Currently only streaming backup is supported.") + b = MySQLBackup(self.datadir, w, opts.skip_check_frm_timestamp, + self.start_backup_time) + logger.info("Taking MySQL misc backups..") + b.process() + logger.info("MySQL misc backups done.") + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + sys.exit(1) + + +class WDTBackup: + datadir = None + start_backup_time = None + + def __init__(self, datadir): + self.datadir = datadir + self.start_backup_time = time.time() + + def cleanup(self, snapshot_dir, server_log): + if server_log: + server_log.seek(0) + logger.info("WDT server log:") + logger.info(server_log.read()) + server_log.close() + if snapshot_dir: + logger.info("Cleaning up snapshot dir %s", snapshot_dir) + shutil.rmtree(snapshot_dir) + + def backup_with_timeout(self, backup_round): + def signal_handler(*args): + logger.info("Got signal. Exit") + self.cleanup(snapshot_dir, server_log) + sys.exit(1) + + logger.info("Starting backup round %d", backup_round) + snapshot_dir = None + server_log = None + try: + signal.signal(signal.SIGINT, signal_handler) + # create rocksdb snapshot + snapshot_dir = os.path.join(opts.checkpoint_directory, str(backup_round)) + dbh = MySQLUtil.connect(opts.mysql_user, + opts.mysql_password, + opts.mysql_port, + opts.mysql_socket) + logger.info("Creating checkpoint at %s", snapshot_dir) + MySQLUtil.create_checkpoint(dbh, snapshot_dir) + logger.info("Created checkpoint at %s", snapshot_dir) + + # get datadir if not provided + if not self.datadir: + self.datadir = MySQLUtil.get_datadir(dbh) + logger.info("Set datadir: %s", self.datadir) + + # create links for misc files + link_creator = MiscFilesLinkCreator(self.datadir, snapshot_dir, + opts.skip_check_frm_timestamp, + self.start_backup_time) + link_creator.process() + + current_path = os.path.join(opts.backupdir, "CURRENT") + + # construct receiver cmd, using the data directory as recovery-id. + # we delete the current file because it is not append-only, therefore not + # resumable. + remote_cmd = ( + "ssh {0} rm -f {1}; " + "{2} -directory {3} -enable_download_resumption " + "-recovery_id {4} -start_port 0 -abort_after_seconds {5} {6}" + ).format(opts.destination, + current_path, + wdt_bin, + opts.backupdir, + self.datadir, + opts.checkpoint_interval, + opts.extra_wdt_receiver_options) + logger.info("WDT remote cmd %s", remote_cmd) + server_log = tempfile.TemporaryFile() + remote_process = subprocess.Popen(remote_cmd.split(), + stdout=subprocess.PIPE, + stderr=server_log) + wdt_url = remote_process.stdout.readline().strip() + if not wdt_url: + raise Exception("Unable to get connection url from wdt receiver") + sender_cmd = ( + "{0} -connection_url \'{1}\' -directory {2} -app_name=myrocks " + "-avg_mbytes_per_sec {3} " + "-enable_download_resumption -abort_after_seconds {4} {5}" + ).format(wdt_bin, + wdt_url, + snapshot_dir, + opts.avg_mbytes_per_sec, + opts.checkpoint_interval, + opts.extra_wdt_sender_options) + sender_status = os.system(sender_cmd) >> 8 + remote_status = remote_process.wait() + self.cleanup(snapshot_dir, server_log) + # TODO: handle retryable and non-retyable errors differently + return (sender_status == 0 and remote_status == 0) + + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + self.cleanup(snapshot_dir, server_log) + sys.exit(1) + + +def backup_using_wdt(): + if not opts.destination: + logger.error("Must provide remote destination when using WDT") + sys.exit(1) + + # TODO: detect whether WDT is installed + logger.info("Backing up myrocks to %s using WDT", opts.destination) + wdt_backup = WDTBackup(opts.datadir) + finished = False + backup_round = 1 + while not finished: + start_time = time.time() + finished = wdt_backup.backup_with_timeout(backup_round) + end_time = time.time() + duration_seconds = end_time - start_time + if (not finished) and (duration_seconds < opts.checkpoint_interval): + # round finished before timeout + sleep_duration = (opts.checkpoint_interval - duration_seconds) + logger.info("Sleeping for %f seconds", sleep_duration) + time.sleep(sleep_duration) + + backup_round = backup_round + 1 + logger.info("Finished myrocks backup using WDT") + + +def init_logger(): + global logger + logger = logging.getLogger('myrocks_hotbackup') + logger.setLevel(logging.INFO) + h1= logging.StreamHandler(sys.stderr) + f = logging.Formatter("%(asctime)s.%(msecs)03d %(levelname)s %(message)s", + "%Y-%m-%d %H:%M:%S") + h1.setFormatter(f) + logger.addHandler(h1) + +backup_wdt_usage = ("Backup using WDT: myrocks_hotbackup " + "--user=root --password=pw --stream=wdt " + "--checkpoint_dir=<directory where temporary backup hard links " + "are created> --destination=<remote host name> --backup_dir=" + "<remote directory name>. This has to be executed at the src " + "host.") +backup_usage= "Backup: set -o pipefail; myrocks_hotbackup --user=root --password=pw --port=3306 --checkpoint_dir=<directory where temporary backup hard links are created> | ssh -o NoneEnabled=yes remote_server 'tar -xi -C <directory on remote server where backups will be sent>' . You need to execute backup command on a server where you take backups." +move_back_usage= "Move-Back: myrocks_hotbackup --move_back --datadir=<dest mysql datadir> --rocksdb_datadir=<dest rocksdb datadir> --rocksdb_waldir=<dest rocksdb wal dir> --backup_dir=<where backup files are stored> . You need to execute move-back command on a server where backup files are sent." + + +def parse_options(): + global opts + parser = OptionParser(usage = "\n\n" + backup_usage + "\n\n" + \ + backup_wdt_usage + "\n\n" + move_back_usage) + parser.add_option('-i', '--interval', type='int', dest='checkpoint_interval', + default=300, + help='Number of seconds to renew checkpoint') + parser.add_option('-c', '--checkpoint_dir', type='string', dest='checkpoint_directory', + default='/data/mysql/backup/snapshot', + help='Local directory name where checkpoints will be created.') + parser.add_option('-d', '--datadir', type='string', dest='datadir', + default=None, + help='backup mode: src MySQL datadir. move_back mode: dest MySQL datadir') + parser.add_option('-s', '--stream', type='string', dest='output_stream', + default='tar', + help='Setting streaming backup options. Currently tar, WDT ' + 'and xbstream are supported. Default is tar') + parser.add_option('--destination', type='string', dest='destination', + default='', + help='Remote server name. Only used for WDT mode so far.') + parser.add_option('--avg_mbytes_per_sec', type='int', + dest='avg_mbytes_per_sec', + default=500, + help='Average backup rate in MBytes/sec. WDT only.') + parser.add_option('--extra_wdt_sender_options', type='string', + dest='extra_wdt_sender_options', + default='', + help='Extra options for WDT sender') + parser.add_option('--extra_wdt_receiver_options', type='string', + dest='extra_wdt_receiver_options', + default='', + help='Extra options for WDT receiver') + parser.add_option('-u', '--user', type='string', dest='mysql_user', + default='root', + help='MySQL user name') + parser.add_option('-p', '--password', type='string', dest='mysql_password', + default='', + help='MySQL password name') + parser.add_option('-P', '--port', type='int', dest='mysql_port', + default=3306, + help='MySQL port number') + parser.add_option('-S', '--socket', type='string', dest='mysql_socket', + default=None, + help='MySQL socket path. Takes precedence over --port.') + parser.add_option('-m', '--move_back', action='store_true', dest='move_back', + default=False, + help='Moving MyRocks backup files to proper locations.') + parser.add_option('-r', '--rocksdb_datadir', type='string', dest='rocksdb_datadir', + default=None, + help='RocksDB target data directory where backup data files will be moved. Must be empty.') + parser.add_option('-w', '--rocksdb_waldir', type='string', dest='rocksdb_waldir', + default=None, + help='RocksDB target data directory where backup wal files will be moved. Must be empty.') + parser.add_option('-b', '--backup_dir', type='string', dest='backupdir', + default=None, + help='backup mode for WDT: Remote directory to store ' + 'backup. move_back mode: Locations where backup ' + 'files are stored.') + parser.add_option('-f', '--skip_check_frm_timestamp', + dest='skip_check_frm_timestamp', + action='store_true', default=False, + help='skipping to check if frm files are updated after starting backup.') + parser.add_option('-D', '--debug_signal_file', type='string', dest='debug_signal_file', + default=None, + help='debugging purpose: waiting until the specified file is created') + + opts, args = parser.parse_args() + + +def create_moveback_dir(directory): + if not os.path.exists(directory): + os.makedirs(directory) + else: + for f in os.listdir(directory): + logger.error("Directory %s has file or directory %s!", directory, f) + raise + +def print_move_back_usage(): + logger.warning(move_back_usage) + +def move_back(): + if opts.rocksdb_datadir is None or opts.rocksdb_waldir is None or opts.backupdir is None or opts.datadir is None: + print_move_back_usage() + sys.exit() + create_moveback_dir(opts.datadir) + create_moveback_dir(opts.rocksdb_datadir) + create_moveback_dir(opts.rocksdb_waldir) + + os.chdir(opts.backupdir) + for f in os.listdir(opts.backupdir): + if os.path.isfile(os.path.join(opts.backupdir,f)): + if f.endswith(rocksdb_wal_suffix): + shutil.move(f, opts.rocksdb_waldir) + elif f.endswith(rocksdb_data_suffix) or is_manifest(f): + shutil.move(f, opts.rocksdb_datadir) + else: + shutil.move(f, opts.datadir) + else: #directory + if f.endswith('.rocksdb'): + continue + shutil.move(f, opts.datadir) + +def start_backup(): + logger.info("Starting backup.") + runner = BackupRunner(opts.datadir) + b = None + backup_round= 1 + while True: + b = runner.start_backup_round(backup_round, b) + backup_round = backup_round + 1 + if b.finished is True: + b.print_backup_report() + logger.info("RocksDB Backup Done.") + break + if opts.debug_signal_file: + while not os.path.exists(opts.debug_signal_file): + logger.info("Waiting until %s is created..", opts.debug_signal_file) + time.sleep(1) + runner.backup_mysql() + logger.info("All Backups Done.") + + +def main(): + parse_options() + init_logger() + + if opts.move_back is True: + move_back() + elif opts.output_stream == 'wdt': + backup_using_wdt() + else: + start_backup() + +if __name__ == "__main__": + main() diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result index b931a61e233..90f28929db6 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result @@ -38,20 +38,20 @@ LOAD DATA INFILE <input_file> INTO TABLE t2; LOAD DATA INFILE <input_file> INTO TABLE t3; set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned 0 N ANALYZE TABLE t1, t2, t3; Table Op Msg_type Msg_text test.t1 analyze status OK test.t2 analyze status OK test.t3 analyze status OK SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned 0 N select count(pk) from t1; count(pk) 5000000 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result index 947f67434a5..c24d987a906 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result @@ -38,20 +38,20 @@ LOAD DATA INFILE <input_file> INTO TABLE t2; LOAD DATA INFILE <input_file> INTO TABLE t3; set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned 0 N ANALYZE TABLE t1, t2, t3; Table Op Msg_type Msg_text test.t1 analyze status OK test.t2 analyze status OK test.t3 analyze status OK SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned 0 N select count(pk) from t1; count(pk) 5000000 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result index 6c38e030afb..b851133ab18 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result @@ -38,20 +38,20 @@ LOAD DATA INFILE <input_file> INTO TABLE t2; LOAD DATA INFILE <input_file> INTO TABLE t3; set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned 0 N ANALYZE TABLE t1, t2, t3; Table Op Msg_type Msg_text test.t1 analyze status OK test.t2 analyze status OK test.t3 analyze status OK SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned 0 N select count(pk) from t1; count(pk) 5000000 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result index e566691af28..efd7c40ed69 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result @@ -38,20 +38,20 @@ LOAD DATA INFILE <input_file> INTO TABLE t2; LOAD DATA INFILE <input_file> INTO TABLE t3; set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned 0 N ANALYZE TABLE t1, t2, t3; Table Op Msg_type Msg_text test.t1 analyze status OK test.t2 analyze status OK test.t3 analyze status OK SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned 0 N select count(pk) from t1; count(pk) 5000000 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result index 2a7c7bd69fd..2bc8193e94f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result @@ -67,20 +67,20 @@ LOAD DATA INFILE <input_file> INTO TABLE t2; LOAD DATA INFILE <input_file> INTO TABLE t3; set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned 0 N ANALYZE TABLE t1, t2, t3; Table Op Msg_type Msg_text test.t1 analyze status OK test.t2 analyze status OK test.t3 analyze status OK SHOW TABLE STATUS WHERE name LIKE 't%'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL -t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL -t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned 0 N select count(a) from t1; count(a) 5000000 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result index 62875e378a4..be9e6d1167a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result @@ -1,14 +1,14 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT); INSERT INTO t1 VALUES (5); SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL 0 N INSERT INTO t1 VALUES ('538647864786478647864'); Warnings: Warning 1264 Out of range value for column 'pk' at row 1 SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL 0 N INSERT INTO t1 VALUES (); ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY' SELECT * FROM t1; @@ -16,6 +16,6 @@ pk 5 9223372036854775807 SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL 0 N DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result index 9b084e63cd5..875950336b6 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result @@ -1407,8 +1407,8 @@ drop table t1; create table t1 (i int primary key auto_increment) engine=RocksDB; insert into t1 values (null),(null); show table status like 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL 0 N drop table t1; # # Fix Issue #4: Crash when using pseudo-unique keys @@ -2501,8 +2501,8 @@ DROP TABLE t1; CREATE TABLE t1(a INT AUTO_INCREMENT KEY); INSERT INTO t1 VALUES(0),(-1),(0); SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL 0 N SELECT * FROM t1; a -1 @@ -2512,8 +2512,8 @@ DROP TABLE t1; CREATE TABLE t1(a INT AUTO_INCREMENT KEY); INSERT INTO t1 VALUES(0),(10),(0); SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL 0 N SELECT * FROM t1; a 1 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result b/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result index 407a8b103bd..989ddc0f03e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result @@ -6,13 +6,13 @@ INSERT INTO t2 (a,b) VALUES (1,'bar'); set global rocksdb_force_flush_memtable_now = true; CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8; SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' ); -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed 2 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL -t2 ROCKSDB 10 Fixed 1 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL -t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed 2 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t2 ROCKSDB 10 Fixed 1 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N +t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL 0 N SHOW TABLE STATUS WHERE name LIKE 't2'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t2 ROCKSDB 10 Fixed 10000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t2 ROCKSDB 10 Fixed 10000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL 0 N DROP TABLE t1, t2, t3; CREATE DATABASE `db_new..............................................end`; USE `db_new..............................................end`; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result index e6ff6e1ca32..76d00d90420 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result @@ -8,20 +8,20 @@ a b DROP TABLE t1; CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb; SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL 0 N INSERT INTO t1 (c) VALUES ('a'),('b'),('c'); SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL 0 N TRUNCATE TABLE t1; SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL 0 N INSERT INTO t1 (c) VALUES ('d'); SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL 0 N SELECT a,c FROM t1; a c 1 d diff --git a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def index 3b9726986f0..47005c1baff 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def +++ b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def @@ -70,6 +70,7 @@ blind_delete_without_tx_api: MDEV-12286: rocksdb.blind_delete_without_tx_api tes unique_check: wrong error number autoinc_vars_thread: debug sync point wait timed out information_schema: MDEV-14372: unstable testcase +bloomfilter: MDEV-14562 ## ## Tests that fail for some other reason diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result index 905feec9b1a..010ba954366 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result @@ -47,12 +47,12 @@ a b 2 2 3 3 SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL 0 N ANALYZE TABLE t1; Table Op Msg_type Msg_text test.t1 analyze status OK SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary +t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL 0 N DROP TABLE t1; diff --git a/storage/sphinx/ha_sphinx.cc b/storage/sphinx/ha_sphinx.cc index 74cf6139469..b1d9681da2a 100644 --- a/storage/sphinx/ha_sphinx.cc +++ b/storage/sphinx/ha_sphinx.cc @@ -1042,8 +1042,8 @@ static bool ParseUrl ( CSphSEShare * share, TABLE * table, bool bCreate ) bool bOk = true; bool bQL = false; char * sScheme = NULL; - char * sHost = SPHINXAPI_DEFAULT_HOST; - char * sIndex = SPHINXAPI_DEFAULT_INDEX; + char * sHost = (char*) SPHINXAPI_DEFAULT_HOST; + char * sIndex = (char*) SPHINXAPI_DEFAULT_INDEX; int iPort = SPHINXAPI_DEFAULT_PORT; // parse connection string, if any @@ -1069,12 +1069,12 @@ static bool ParseUrl ( CSphSEShare * share, TABLE * table, bool bCreate ) sHost--; // reuse last slash iPort = 0; if (!( sIndex = strrchr ( sHost, ':' ) )) - sIndex = SPHINXAPI_DEFAULT_INDEX; + sIndex = (char*) SPHINXAPI_DEFAULT_INDEX; else { *sIndex++ = '\0'; if ( !*sIndex ) - sIndex = SPHINXAPI_DEFAULT_INDEX; + sIndex = (char*) SPHINXAPI_DEFAULT_INDEX; } bOk = true; break; @@ -1096,11 +1096,11 @@ static bool ParseUrl ( CSphSEShare * share, TABLE * table, bool bCreate ) if ( sIndex ) *sIndex++ = '\0'; else - sIndex = SPHINXAPI_DEFAULT_INDEX; + sIndex = (char*) SPHINXAPI_DEFAULT_INDEX; iPort = atoi(sPort); if ( !iPort ) - iPort = SPHINXAPI_DEFAULT_PORT; + iPort = SPHINXAPI_DEFAULT_PORT; } } else { @@ -1108,7 +1108,7 @@ static bool ParseUrl ( CSphSEShare * share, TABLE * table, bool bCreate ) if ( sIndex ) *sIndex++ = '\0'; else - sIndex = SPHINXAPI_DEFAULT_INDEX; + sIndex = (char*) SPHINXAPI_DEFAULT_INDEX; } bOk = true; break; @@ -1304,8 +1304,8 @@ CSphSEQuery::CSphSEQuery ( const char * sQuery, int iLength, const char * sIndex , m_sGeoLongAttr ( "" ) , m_fGeoLatitude ( 0.0f ) , m_fGeoLongitude ( 0.0f ) - , m_sComment ( "" ) - , m_sSelect ( "*" ) + , m_sComment ( (char*) "" ) + , m_sSelect ( (char*) "*" ) , m_pBuf ( NULL ) , m_pCur ( NULL ) diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc index 98dfb2f466b..6ff2e25bc27 100644 --- a/storage/spider/spd_db_mysql.cc +++ b/storage/spider/spd_db_mysql.cc @@ -656,9 +656,10 @@ int spider_db_mysql_result::fetch_table_status( } if (mode == 1) { - if (num_fields() != 18) + /* Ok to test for 18 fields as all new fields are added last */ + if (num_fields() < 18) { - DBUG_PRINT("info",("spider field_count != 18")); + DBUG_PRINT("info",("spider field_count < 18")); DBUG_RETURN(ER_SPIDER_INVALID_REMOTE_TABLE_INFO_NUM); } diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt index 7490f4e620b..702b2eaa2dd 100644 --- a/storage/tokudb/CMakeLists.txt +++ b/storage/tokudb/CMakeLists.txt @@ -1,7 +1,9 @@ -SET(TOKUDB_VERSION 5.6.37-82.2) +SET(TOKUDB_VERSION 5.6.38-83.0) # PerconaFT only supports x86-64 and cmake-2.8.9+ IF(CMAKE_VERSION VERSION_LESS "2.8.9") MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB") +ELSEIF(NOT HAVE_DLOPEN) + MESSAGE(STATUS "dlopen is required by TokuDB") ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") # tokudb requires F_NOCACHE or O_DIRECT, and designated initializers diff --git a/storage/tokudb/PerconaFT/CMakeLists.txt b/storage/tokudb/PerconaFT/CMakeLists.txt index 1c9052c2a55..5c6d938808e 100644 --- a/storage/tokudb/PerconaFT/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/CMakeLists.txt @@ -12,9 +12,14 @@ set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") # detect when we are being built as a subproject if (DEFINED MYSQL_PROJECT_NAME_DOCSTRING) add_definitions( -DMYSQL_TOKUDB_ENGINE=1) + # Extended PFS instrumentation: + # -DTOKU_PFS_MUTEX_EXTENDED_CACHETABLEMMUTEX=1 + if (WITH_PERFSCHEMA_STORAGE_ENGINE) + add_definitions(-DTOKU_MYSQL_WITH_PFS) + endif () + include_directories(${CMAKE_SOURCE_DIR}/include) if ((CMAKE_BUILD_TYPE MATCHES "Debug") AND (CMAKE_CXX_FLAGS_DEBUG MATCHES " -DENABLED_DEBUG_SYNC")) - include_directories(${CMAKE_SOURCE_DIR}/include) include_directories(${CMAKE_SOURCE_DIR}/sql) endif () endif () diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc index 0fa876f2bd8..95d6207edda 100644 --- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc +++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc @@ -445,14 +445,15 @@ static void print_db_key_range_struct (void) { sort_and_dump_fields("db_key_range", false, NULL); } -static void print_db_lsn_struct (void) { - field_counter=0; - /* A dummy field to make sizeof(DB_LSN) equal in C and C++ */ - const char *extra[] = { "char dummy", NULL }; - sort_and_dump_fields("db_lsn", false, extra); +static void print_db_lsn_struct(void) { + field_counter = 0; + // FT-692 + STRUCT_SETUP(DB_LSN, file, "uint32_t %s"); + STRUCT_SETUP(DB_LSN, offset, "uint32_t %s"); + sort_and_dump_fields("db_lsn", false, NULL); } -static void print_dbt_struct (void) { +static void print_dbt_struct(void) { field_counter=0; #if 0 && DB_VERSION_MAJOR==4 && DB_VERSION_MINOR==1 STRUCT_SETUP(DBT, app_private, "void*%s"); diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake index 0cd9a9c5941..385723aebc7 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake @@ -53,7 +53,13 @@ macro(set_cflags_if_supported) endforeach(flag) endmacro(set_cflags_if_supported) +if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING) + set (OPTIONAL_CFLAGS "${OPTIONAL_CFLAGS} -Wmissing-format-attribute") +endif() + ## disable some warnings +## missing-format-attribute causes warnings in some MySQL include files +## if the library is built as a part of TokuDB MySQL storage engine set_cflags_if_supported( -Wno-missing-field-initializers -Wstrict-null-sentinel @@ -61,7 +67,7 @@ set_cflags_if_supported( -Wswitch -Wtrampolines -Wlogical-op - -Wmissing-format-attribute + ${OPTIONAL_CFLAGS} -Wno-error=missing-format-attribute -Wno-error=address-of-array-temporary -Wno-error=tautological-constant-out-of-range-compare @@ -143,8 +149,8 @@ set_cflags_if_supported( -Wmissing-prototypes -Wmissing-declarations -Wpointer-arith - -Wmissing-format-attribute -Wshadow + ${OPTIONAL_CFLAGS} ## other flags to try: #-Wunsafe-loop-optimizations #-Wpointer-arith diff --git a/storage/tokudb/PerconaFT/ft/cachetable/background_job_manager.cc b/storage/tokudb/PerconaFT/ft/cachetable/background_job_manager.cc index 9119a49b081..c109185f973 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/background_job_manager.cc +++ b/storage/tokudb/PerconaFT/ft/cachetable/background_job_manager.cc @@ -42,6 +42,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "cachetable/background_job_manager.h" +toku_instr_key *bjm_jobs_lock_mutex_key; +toku_instr_key *bjm_jobs_wait_key; + struct background_job_manager_struct { bool accepting_jobs; uint32_t num_jobs; @@ -49,10 +52,10 @@ struct background_job_manager_struct { toku_mutex_t jobs_lock; }; -void bjm_init(BACKGROUND_JOB_MANAGER* pbjm) { +void bjm_init(BACKGROUND_JOB_MANAGER *pbjm) { BACKGROUND_JOB_MANAGER XCALLOC(bjm); - toku_mutex_init(&bjm->jobs_lock, 0); - toku_cond_init(&bjm->jobs_wait, NULL); + toku_mutex_init(*bjm_jobs_lock_mutex_key, &bjm->jobs_lock, nullptr); + toku_cond_init(*bjm_jobs_wait_key, &bjm->jobs_wait, nullptr); bjm->accepting_jobs = true; bjm->num_jobs = 0; *pbjm = bjm; diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc index 54ef11eb1eb..b72edc8d231 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc @@ -58,6 +58,25 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "util/status.h" #include "util/context.h" +toku_instr_key *cachetable_m_mutex_key; +toku_instr_key *cachetable_ev_thread_lock_mutex_key; + +toku_instr_key *cachetable_m_list_lock_key; +toku_instr_key *cachetable_m_pending_lock_expensive_key; +toku_instr_key *cachetable_m_pending_lock_cheap_key; +toku_instr_key *cachetable_m_lock_key; + +toku_instr_key *cachetable_value_key; +toku_instr_key *cachetable_disk_nb_rwlock_key; + +toku_instr_key *cachetable_p_refcount_wait_key; +toku_instr_key *cachetable_m_flow_control_cond_key; +toku_instr_key *cachetable_m_ev_thread_cond_key; + +toku_instr_key *cachetable_disk_nb_mutex_key; +toku_instr_key *log_internal_lock_mutex_key; +toku_instr_key *eviction_thread_key; + /////////////////////////////////////////////////////////////////////////////////// // Engine status // @@ -780,18 +799,25 @@ void pair_init(PAIR p, p->checkpoint_complete_callback = write_callback.checkpoint_complete_callback; p->write_extraargs = write_callback.write_extraargs; - p->count = 0; // <CER> Is zero the correct init value? + p->count = 0; // <CER> Is zero the correct init value? p->refcount = 0; p->num_waiting_on_refs = 0; - toku_cond_init(&p->refcount_wait, NULL); + toku_cond_init(*cachetable_p_refcount_wait_key, &p->refcount_wait, nullptr); p->checkpoint_pending = false; p->mutex = list->get_mutex_for_pair(fullhash); assert(p->mutex); - p->value_rwlock.init(p->mutex); - nb_mutex_init(&p->disk_nb_mutex); + p->value_rwlock.init(p->mutex +#ifdef TOKU_MYSQL_WITH_PFS + , + *cachetable_value_key +#endif + ); + nb_mutex_init(*cachetable_disk_nb_mutex_key, + *cachetable_disk_nb_rwlock_key, + &p->disk_nb_mutex); - p->size_evicting_estimate = 0; // <CER> Is zero the correct init value? + p->size_evicting_estimate = 0; // <CER> Is zero the correct init value? p->ev = ev; p->list = list; @@ -3230,16 +3256,26 @@ void pair_list::init() { #if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP) pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); #else - // TODO: need to figure out how to make writer-preferential rwlocks - // happen on osx +// TODO: need to figure out how to make writer-preferential rwlocks +// happen on osx #endif - toku_pthread_rwlock_init(&m_list_lock, &attr); - toku_pthread_rwlock_init(&m_pending_lock_expensive, &attr); - toku_pthread_rwlock_init(&m_pending_lock_cheap, &attr); + toku_pthread_rwlock_init(*cachetable_m_list_lock_key, &m_list_lock, &attr); + toku_pthread_rwlock_init(*cachetable_m_pending_lock_expensive_key, + &m_pending_lock_expensive, + &attr); + toku_pthread_rwlock_init( + *cachetable_m_pending_lock_cheap_key, &m_pending_lock_cheap, &attr); XCALLOC_N(m_table_size, m_table); XCALLOC_N(m_num_locks, m_mutexes); for (uint64_t i = 0; i < m_num_locks; i++) { - toku_mutex_init(&m_mutexes[i].aligned_mutex, NULL); + toku_mutex_init( +#ifdef TOKU_PFS_MUTEX_EXTENDED_CACHETABLEMMUTEX + *cachetable_m_mutex_key, +#else + toku_uninstrumented, +#endif + &m_mutexes[i].aligned_mutex, + nullptr); } } @@ -3579,9 +3615,9 @@ ENSURE_POD(evictor); // This is the function that runs eviction on its own thread. // static void *eviction_thread(void *evictor_v) { - evictor* CAST_FROM_VOIDP(evictor, evictor_v); + evictor *CAST_FROM_VOIDP(evictor, evictor_v); evictor->run_eviction_thread(); - return evictor_v; + return toku_pthread_done(evictor_v); } // @@ -3630,11 +3666,14 @@ int evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KI m_pl = _pl; m_cf_list = _cf_list; - m_kibbutz = _kibbutz; - toku_mutex_init(&m_ev_thread_lock, NULL); - toku_cond_init(&m_flow_control_cond, NULL); - toku_cond_init(&m_ev_thread_cond, NULL); - m_num_sleepers = 0; + m_kibbutz = _kibbutz; + toku_mutex_init( + *cachetable_ev_thread_lock_mutex_key, &m_ev_thread_lock, nullptr); + toku_cond_init( + *cachetable_m_flow_control_cond_key, &m_flow_control_cond, nullptr); + toku_cond_init( + *cachetable_m_ev_thread_cond_key, &m_ev_thread_cond, nullptr); + m_num_sleepers = 0; m_ev_thread_is_running = false; m_period_in_seconds = eviction_period; @@ -3642,11 +3681,12 @@ int evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KI int r = myinitstate_r(seed, m_random_statebuf, sizeof m_random_statebuf, &m_random_data); assert_zero(r); - // start the background thread + // start the background thread m_run_thread = true; m_num_eviction_thread_runs = 0; m_ev_thread_init = false; - r = toku_pthread_create(&m_ev_thread, NULL, eviction_thread, this); + r = toku_pthread_create( + *eviction_thread_key, &m_ev_thread, nullptr, eviction_thread, this); if (r == 0) { m_ev_thread_init = true; } @@ -4703,7 +4743,7 @@ static_assert(std::is_pod<cachefile_list>::value, "cachefile_list isn't POD"); void cachefile_list::init() { m_next_filenum_to_use.fileid = 0; m_next_hash_id_to_use = 0; - toku_pthread_rwlock_init(&m_lock, NULL); + toku_pthread_rwlock_init(*cachetable_m_lock_key, &m_lock, nullptr); m_active_filenum.create(); m_active_fileid.create(); m_stale_fileid.create(); diff --git a/storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc b/storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc index 4d54962fe9c..aad018f4097 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc +++ b/storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc @@ -87,8 +87,16 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "util/frwlock.h" #include "util/status.h" -void -toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) { +toku_instr_key *checkpoint_safe_mutex_key; +toku_instr_key *checkpoint_safe_rwlock_key; +toku_instr_key *multi_operation_lock_key; +toku_instr_key *low_priority_multi_operation_lock_key; + +toku_instr_key *rwlock_cond_key; +toku_instr_key *rwlock_wait_read_key; +toku_instr_key *rwlock_wait_write_key; + +void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) { cp_status.init(); CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct); *statp = cp_status; @@ -117,11 +125,14 @@ multi_operation_lock_init(void) { #if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP) pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); #else - // TODO: need to figure out how to make writer-preferential rwlocks - // happen on osx +// TODO: need to figure out how to make writer-preferential rwlocks +// happen on osx #endif - toku_pthread_rwlock_init(&multi_operation_lock, &attr); - toku_pthread_rwlock_init(&low_priority_multi_operation_lock, &attr); + toku_pthread_rwlock_init( + *multi_operation_lock_key, &multi_operation_lock, &attr); + toku_pthread_rwlock_init(*low_priority_multi_operation_lock_key, + &low_priority_multi_operation_lock, + &attr); pthread_rwlockattr_destroy(&attr); locked_mo = false; } @@ -146,10 +157,15 @@ multi_operation_checkpoint_unlock(void) { toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock); } -static void -checkpoint_safe_lock_init(void) { - toku_mutex_init(&checkpoint_safe_mutex, NULL); - checkpoint_safe_lock.init(&checkpoint_safe_mutex); +static void checkpoint_safe_lock_init(void) { + toku_mutex_init( + *checkpoint_safe_mutex_key, &checkpoint_safe_mutex, nullptr); + checkpoint_safe_lock.init(&checkpoint_safe_mutex +#ifdef TOKU_MYSQL_WITH_PFS + , + *checkpoint_safe_rwlock_key +#endif + ); locked_cs = false; } diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc index 20035b1d02f..ef2009db1b1 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.cc +++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc @@ -187,9 +187,35 @@ basement nodes, bulk fetch, and partial fetch: */ static toku_mutex_t ft_open_close_lock; - -void -toku_ft_get_status(FT_STATUS s) { +static toku_instr_key *ft_open_close_lock_mutex_key; +// FIXME: the instrumentation keys below are defined here even though they +// belong to other modules, because they are registered here. If desired, they +// can be moved to their proper modules and registration done there in a +// one-time init function +// locktree +toku_instr_key *treenode_mutex_key; +toku_instr_key *manager_mutex_key; +toku_instr_key *manager_escalation_mutex_key; +toku_instr_key *manager_escalator_mutex_key; +// src +toku_instr_key *db_txn_struct_i_txn_mutex_key; +toku_instr_key *indexer_i_indexer_lock_mutex_key; +toku_instr_key *indexer_i_indexer_estimate_lock_mutex_key; +toku_instr_key *result_i_open_dbs_rwlock_key; +// locktree +toku_instr_key *lock_request_m_wait_cond_key; +toku_instr_key *manager_m_escalator_done_key; +toku_instr_key *locktree_request_info_mutex_key; +toku_instr_key *locktree_request_info_retry_mutex_key; +toku_instr_key *locktree_request_info_retry_cv_key; + +// this is a sample probe for custom instrumentation +static toku_instr_key *fti_probe_1_key; + +// This is a sample probe for custom instrumentation +toku_instr_probe *toku_instr_probe_1; + +void toku_ft_get_status(FT_STATUS s) { ft_status.init(); *s = ft_status; @@ -2644,11 +2670,14 @@ void toku_ft_set_direct_io (bool direct_io_on) { use_direct_io = direct_io_on; } -static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode) { +static inline int ft_open_maybe_direct(const char *filename, + int oflag, + int mode) { if (use_direct_io) { - return toku_os_open_direct(filename, oflag, mode); + return toku_os_open_direct( + filename, oflag, mode, *tokudb_file_data_key); } else { - return toku_os_open(filename, oflag, mode); + return toku_os_open(filename, oflag, mode, *tokudb_file_data_key); } } @@ -2722,7 +2751,7 @@ bool toku_create_subdirs_if_needed(const char *path) { if (!subdir.get()) return true; - if (toku_stat(subdir.get(), &stat) == -1) { + if (toku_stat(subdir.get(), &stat, toku_uninstrumented) == -1) { if (ENOENT == get_error_errno()) subdir_exists = false; else @@ -2934,6 +2963,8 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only CACHEFILE cf = NULL; FT ft = NULL; bool did_create = false; + bool was_already_open = false; + toku_ft_open_close_lock(); if (ft_h->did_set_flags) { @@ -2945,7 +2976,6 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only FILENUM reserved_filenum; reserved_filenum = use_filenum; fname_in_cwd = toku_cachetable_get_fname_in_cwd(cachetable, fname_in_env); - bool was_already_open; { int fd = -1; r = ft_open_file(fname_in_cwd, &fd); @@ -4615,20 +4645,265 @@ int toku_dump_ft(FILE *f, FT_HANDLE ft_handle) { return toku_dump_ftnode(f, ft_handle, root_key, 0, 0, 0); } + +static void toku_pfs_keys_init(const char *toku_instr_group_name) { + kibbutz_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, "kibbutz_mutex"); + minicron_p_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "minicron_p_mutex"); + queue_result_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "queue_result_mutex"); + tpool_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "tpool_lock_mutex"); + workset_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "workset_lock_mutex"); + bjm_jobs_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "bjm_jobs_lock_mutex"); + log_internal_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "log_internal_lock_mutex"); + cachetable_ev_thread_lock_mutex_key = + new toku_instr_key(toku_instr_object_type::mutex, + toku_instr_group_name, + "cachetable_ev_thread_lock_mutex"); + cachetable_disk_nb_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "cachetable_disk_nb_mutex"); + safe_file_size_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "safe_file_size_lock_mutex"); + cachetable_m_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "cachetable_m_mutex_key"); + checkpoint_safe_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "checkpoint_safe_mutex"); + ft_ref_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "ft_ref_lock_mutex"); + ft_open_close_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "ft_open_close_lock_mutex"); + loader_error_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "loader_error_mutex"); + bfs_mutex_key = + new toku_instr_key(toku_instr_object_type::mutex, toku_instr_group_name, + "bfs_mutex"); + loader_bl_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "loader_bl_mutex"); + loader_fi_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "loader_fi_lock_mutex"); + loader_out_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "loader_out_mutex"); + result_output_condition_lock_mutex_key = + new toku_instr_key(toku_instr_object_type::mutex, + toku_instr_group_name, + "result_output_condition_lock_mutex"); + block_table_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "block_table_mutex"); + rollback_log_node_cache_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "rollback_log_node_cache_mutex"); + txn_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, "txn_lock_mutex"); + txn_state_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "txn_state_lock_mutex"); + txn_child_manager_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "txn_child_manager_mutex"); + txn_manager_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "txn_manager_lock_mutex"); + treenode_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, "treenode_mutex"); + locktree_request_info_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "locktree_request_info_mutex"); + locktree_request_info_retry_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "locktree_request_info_retry_mutex_key"); + manager_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, "manager_mutex"); + manager_escalation_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "manager_escalation_mutex"); + db_txn_struct_i_txn_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "db_txn_struct_i_txn_mutex"); + manager_escalator_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "manager_escalator_mutex"); + indexer_i_indexer_lock_mutex_key = new toku_instr_key( + toku_instr_object_type::mutex, toku_instr_group_name, + "indexer_i_indexer_lock_mutex"); + indexer_i_indexer_estimate_lock_mutex_key = + new toku_instr_key(toku_instr_object_type::mutex, + toku_instr_group_name, + "indexer_i_indexer_estimate_lock_mutex"); + + tokudb_file_data_key = new toku_instr_key( + toku_instr_object_type::file, toku_instr_group_name, "tokudb_data_file"); + tokudb_file_load_key = new toku_instr_key( + toku_instr_object_type::file, toku_instr_group_name, "tokudb_load_file"); + tokudb_file_tmp_key = new toku_instr_key( + toku_instr_object_type::file, toku_instr_group_name, "tokudb_tmp_file"); + tokudb_file_log_key = new toku_instr_key( + toku_instr_object_type::file, toku_instr_group_name, "tokudb_log_file"); + + fti_probe_1_key = + new toku_instr_key(toku_instr_object_type::mutex, toku_instr_group_name, + "fti_probe_1"); + + extractor_thread_key = new toku_instr_key( + toku_instr_object_type::thread, toku_instr_group_name, + "extractor_thread"); + fractal_thread_key = new toku_instr_key( + toku_instr_object_type::thread, toku_instr_group_name, "fractal_thread"); + io_thread_key = + new toku_instr_key(toku_instr_object_type::thread, toku_instr_group_name, + "io_thread"); + eviction_thread_key = new toku_instr_key( + toku_instr_object_type::thread, toku_instr_group_name, + "eviction_thread"); + kibbutz_thread_key = new toku_instr_key( + toku_instr_object_type::thread, toku_instr_group_name, "kibbutz_thread"); + minicron_thread_key = new toku_instr_key( + toku_instr_object_type::thread, toku_instr_group_name, + "minicron_thread"); + tp_internal_thread_key = new toku_instr_key( + toku_instr_object_type::thread, toku_instr_group_name, + "tp_internal_thread"); + + result_state_cond_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "result_state_cond"); + bjm_jobs_wait_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, "bjm_jobs_wait"); + cachetable_p_refcount_wait_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "cachetable_p_refcount_wait"); + cachetable_m_flow_control_cond_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "cachetable_m_flow_control_cond"); + cachetable_m_ev_thread_cond_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "cachetable_m_ev_thread_cond"); + bfs_cond_key = + new toku_instr_key(toku_instr_object_type::cond, toku_instr_group_name, + "bfs_cond"); + result_output_condition_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "result_output_condition"); + manager_m_escalator_done_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "manager_m_escalator_done"); + lock_request_m_wait_cond_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "lock_request_m_wait_cond"); + queue_result_cond_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "queue_result_cond"); + ws_worker_wait_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, "ws_worker_wait"); + rwlock_wait_read_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, "rwlock_wait_read"); + rwlock_wait_write_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "rwlock_wait_write"); + rwlock_cond_key = + new toku_instr_key(toku_instr_object_type::cond, toku_instr_group_name, + "rwlock_cond"); + tp_thread_wait_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, "tp_thread_wait"); + tp_pool_wait_free_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "tp_pool_wait_free"); + frwlock_m_wait_read_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "frwlock_m_wait_read"); + kibbutz_k_cond_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, "kibbutz_k_cond"); + minicron_p_condvar_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "minicron_p_condvar"); + locktree_request_info_retry_cv_key = new toku_instr_key( + toku_instr_object_type::cond, toku_instr_group_name, + "locktree_request_info_retry_cv_key"); + + multi_operation_lock_key = new toku_instr_key( + toku_instr_object_type::rwlock, toku_instr_group_name, + "multi_operation_lock"); + low_priority_multi_operation_lock_key = + new toku_instr_key(toku_instr_object_type::rwlock, + toku_instr_group_name, + "low_priority_multi_operation_lock"); + cachetable_m_list_lock_key = new toku_instr_key( + toku_instr_object_type::rwlock, toku_instr_group_name, + "cachetable_m_list_lock"); + cachetable_m_pending_lock_expensive_key = + new toku_instr_key(toku_instr_object_type::rwlock, + toku_instr_group_name, + "cachetable_m_pending_lock_expensive"); + cachetable_m_pending_lock_cheap_key = + new toku_instr_key(toku_instr_object_type::rwlock, + toku_instr_group_name, + "cachetable_m_pending_lock_cheap"); + cachetable_m_lock_key = new toku_instr_key( + toku_instr_object_type::rwlock, toku_instr_group_name, + "cachetable_m_lock"); + result_i_open_dbs_rwlock_key = new toku_instr_key( + toku_instr_object_type::rwlock, toku_instr_group_name, + "result_i_open_dbs_rwlock"); + checkpoint_safe_rwlock_key = new toku_instr_key( + toku_instr_object_type::rwlock, toku_instr_group_name, + "checkpoint_safe_rwlock"); + cachetable_value_key = new toku_instr_key( + toku_instr_object_type::rwlock, toku_instr_group_name, + "cachetable_value"); + safe_file_size_lock_rwlock_key = new toku_instr_key( + toku_instr_object_type::rwlock, toku_instr_group_name, + "safe_file_size_lock_rwlock"); + cachetable_disk_nb_rwlock_key = new toku_instr_key( + toku_instr_object_type::rwlock, toku_instr_group_name, + "cachetable_disk_nb_rwlock"); + + toku_instr_probe_1 = new toku_instr_probe(*fti_probe_1_key); +} + int toku_ft_layer_init(void) { int r = 0; - //Portability must be initialized first + + // Portability must be initialized first r = toku_portability_init(); - if (r) { goto exit; } + if (r) { + goto exit; + } + + toku_pfs_keys_init("fti"); + r = db_env_set_toku_product_name("tokudb"); - if (r) { goto exit; } + if (r) { + goto exit; + } partitioned_counters_init(); toku_status_init(); toku_context_status_init(); toku_checkpoint_init(); toku_ft_serialize_layer_init(); - toku_mutex_init(&ft_open_close_lock, NULL); + toku_mutex_init( + *ft_open_close_lock_mutex_key, &ft_open_close_lock, nullptr); toku_scoped_malloc_init(); exit: return r; @@ -4642,7 +4917,10 @@ void toku_ft_layer_destroy(void) { toku_status_destroy(); partitioned_counters_destroy(); toku_scoped_malloc_destroy(); - //Portability must be cleaned up last + + delete toku_instr_probe_1; + + // Portability must be cleaned up last toku_portability_destroy(); } diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc index abc9d295eae..c7c2ea33c9e 100644 --- a/storage/tokudb/PerconaFT/ft/ft.cc +++ b/storage/tokudb/PerconaFT/ft/ft.cc @@ -51,9 +51,10 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include <toku_assert.h> #include <portability/toku_atomic.h> -void -toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created) { - // Reset the root_xid_that_created field to the given value. +toku_instr_key *ft_ref_lock_mutex_key; + +void toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created) { + // Reset the root_xid_that_created field to the given value. // This redefines which xid created the dictionary. // hold lock around setting and clearing of dirty bit @@ -101,15 +102,11 @@ toku_ft_free (FT ft) { toku_free(ft); } -void -toku_ft_init_reflock(FT ft) { - toku_mutex_init(&ft->ft_ref_lock, NULL); +void toku_ft_init_reflock(FT ft) { + toku_mutex_init(*ft_ref_lock_mutex_key, &ft->ft_ref_lock, nullptr); } -void -toku_ft_destroy_reflock(FT ft) { - toku_mutex_destroy(&ft->ft_ref_lock); -} +void toku_ft_destroy_reflock(FT ft) { toku_mutex_destroy(&ft->ft_ref_lock); } void toku_ft_grab_reflock(FT ft) { diff --git a/storage/tokudb/PerconaFT/ft/loader/callbacks.cc b/storage/tokudb/PerconaFT/ft/loader/callbacks.cc index 6a520dba3a3..ac69fb7e789 100644 --- a/storage/tokudb/PerconaFT/ft/loader/callbacks.cc +++ b/storage/tokudb/PerconaFT/ft/loader/callbacks.cc @@ -45,6 +45,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "loader/loader-internal.h" #include "util/dbt.h" +toku_instr_key *loader_error_mutex_key; + static void error_callback_lock(ft_loader_error_callback loader_error) { toku_mutex_lock(&loader_error->mutex); } @@ -57,10 +59,10 @@ void ft_loader_init_error_callback(ft_loader_error_callback loader_error) { memset(loader_error, 0, sizeof *loader_error); toku_init_dbt(&loader_error->key); toku_init_dbt(&loader_error->val); - toku_mutex_init(&loader_error->mutex, NULL); + toku_mutex_init(*loader_error_mutex_key, &loader_error->mutex, nullptr); } -void ft_loader_destroy_error_callback(ft_loader_error_callback loader_error) { +void ft_loader_destroy_error_callback(ft_loader_error_callback loader_error) { toku_mutex_destroy(&loader_error->mutex); toku_destroy_dbt(&loader_error->key); toku_destroy_dbt(&loader_error->val); diff --git a/storage/tokudb/PerconaFT/ft/loader/dbufio.cc b/storage/tokudb/PerconaFT/ft/loader/dbufio.cc index ad084a4fbdc..90f76cecf90 100644 --- a/storage/tokudb/PerconaFT/ft/loader/dbufio.cc +++ b/storage/tokudb/PerconaFT/ft/loader/dbufio.cc @@ -49,6 +49,10 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "loader/dbufio.h" #include "loader/loader-internal.h" +toku_instr_key *bfs_mutex_key; +toku_instr_key *bfs_cond_key; +toku_instr_key *io_thread_key; + struct dbufio_file { // i/o thread owns these int fd; @@ -276,39 +280,44 @@ static void* io_thread (void *v) toku_mutex_lock(&bfs->mutex); //printf("%s:%d Locked\n", __FILE__, __LINE__); while (1) { + if (paniced(bfs)) { + toku_mutex_unlock(&bfs->mutex); // ignore any error + toku_instr_delete_current_thread(); + return toku_pthread_done(nullptr); + } + // printf("n_not_done=%d\n", bfs->n_not_done); + if (bfs->n_not_done == 0) { + // all done (meaning we stored EOF (or another error) in + // error_code[0] for the file. + // printf("unlocked\n"); + toku_mutex_unlock(&bfs->mutex); + toku_instr_delete_current_thread(); + return toku_pthread_done(nullptr); + } - if (paniced(bfs)) { - toku_mutex_unlock(&bfs->mutex); // ignore any error - return 0; - } - //printf("n_not_done=%d\n", bfs->n_not_done); - if (bfs->n_not_done==0) { - // all done (meaning we stored EOF (or another error) in error_code[0] for the file. - //printf("unlocked\n"); - toku_mutex_unlock(&bfs->mutex); - return 0; - } - - struct dbufio_file *dbf = bfs->head; - if (dbf==NULL) { - // No I/O needs to be done yet. - // Wait until something happens that will wake us up. - toku_cond_wait(&bfs->cond, &bfs->mutex); - if (paniced(bfs)) { - toku_mutex_unlock(&bfs->mutex); // ignore any error - return 0; - } - // Have the lock so go around. - } else { - // Some I/O needs to be done. - //printf("%s:%d Need I/O\n", __FILE__, __LINE__); - assert(dbf->second_buf_ready == false); - assert(!dbf->io_done); - bfs->head = dbf->next; - if (bfs->head==NULL) bfs->tail=NULL; - - // Unlock the mutex now that we have ownership of dbf to allow consumers to get the mutex and perform swaps. They won't swap - // this buffer because second_buf_ready is false. + struct dbufio_file *dbf = bfs->head; + if (dbf == NULL) { + // No I/O needs to be done yet. + // Wait until something happens that will wake us up. + toku_cond_wait(&bfs->cond, &bfs->mutex); + if (paniced(bfs)) { + toku_mutex_unlock(&bfs->mutex); // ignore any error + toku_instr_delete_current_thread(); + return toku_pthread_done(nullptr); + } + // Have the lock so go around. + } else { + // Some I/O needs to be done. + // printf("%s:%d Need I/O\n", __FILE__, __LINE__); + assert(dbf->second_buf_ready == false); + assert(!dbf->io_done); + bfs->head = dbf->next; + if (bfs->head == NULL) + bfs->tail = NULL; + + // Unlock the mutex now that we have ownership of dbf to allow + // consumers to get the mutex and perform swaps. They won't swap + // this buffer because second_buf_ready is false. toku_mutex_unlock(&bfs->mutex); //printf("%s:%d Doing read fd=%d\n", __FILE__, __LINE__, dbf->fd); { @@ -339,14 +348,16 @@ static void* io_thread (void *v) //printf("%s:%d locking mutex again=%ld\n", __FILE__, __LINE__, readcode); { - toku_mutex_lock(&bfs->mutex); - if (paniced(bfs)) { - toku_mutex_unlock(&bfs->mutex); // ignore any error - return 0; - } - } - // Now that we have the mutex, we can decrement n_not_done (if applicable) and set second_buf_ready - if (readcode<=0) { + toku_mutex_lock(&bfs->mutex); + if (paniced(bfs)) { + toku_mutex_unlock(&bfs->mutex); // ignore any error + toku_instr_delete_current_thread(); + return toku_pthread_done(nullptr); + } + } + // Now that we have the mutex, we can decrement n_not_done (if + // applicable) and set second_buf_ready + if (readcode<=0) { bfs->n_not_done--; } //printf("%s:%d n_not_done=%d\n", __FILE__, __LINE__, bfs->n_not_done); @@ -377,34 +388,36 @@ int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t b } } } - //printf("%s:%d here\n", __FILE__, __LINE__); - if (result==0) { - toku_mutex_init(&bfs->mutex, NULL); - mutex_inited = true; + // printf("%s:%d here\n", __FILE__, __LINE__); + if (result == 0) { + toku_mutex_init(*bfs_mutex_key, &bfs->mutex, nullptr); + mutex_inited = true; } - if (result==0) { - toku_cond_init(&bfs->cond, NULL); - cond_inited = true; + if (result == 0) { + toku_cond_init(*bfs_cond_key, &bfs->cond, nullptr); + cond_inited = true; } - if (result==0) { - bfs->N = N; - bfs->n_not_done = N; - bfs->head = bfs->tail = NULL; - for (int i=0; i<N; i++) { - bfs->files[i].fd = fds[i]; - bfs->files[i].offset_in_buf = 0; - bfs->files[i].offset_in_uncompressed_file = 0; - bfs->files[i].next = NULL; - bfs->files[i].second_buf_ready = false; - for (int j=0; j<2; j++) { - if (result==0) { - MALLOC_N(bufsize, bfs->files[i].buf[j]); - if (bfs->files[i].buf[j]==NULL) { result=get_error_errno(); } - } - bfs->files[i].n_in_buf[j] = 0; - bfs->files[i].error_code[j] = 0; - } - bfs->files[i].io_done = false; + if (result == 0) { + bfs->N = N; + bfs->n_not_done = N; + bfs->head = bfs->tail = NULL; + for (int i = 0; i < N; i++) { + bfs->files[i].fd = fds[i]; + bfs->files[i].offset_in_buf = 0; + bfs->files[i].offset_in_uncompressed_file = 0; + bfs->files[i].next = NULL; + bfs->files[i].second_buf_ready = false; + for (int j = 0; j < 2; j++) { + if (result == 0) { + MALLOC_N(bufsize, bfs->files[i].buf[j]); + if (bfs->files[i].buf[j] == NULL) { + result = get_error_errno(); + } + } + bfs->files[i].n_in_buf[j] = 0; + bfs->files[i].error_code[j] = 0; + } + bfs->files[i].io_done = false; ssize_t r; if (bfs->compressed) { r = dbf_read_compressed(&bfs->files[i], bfs->files[i].buf[0], bufsize); @@ -431,13 +444,21 @@ int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t b bfs->panic = false; bfs->panic_errno = 0; } - //printf("Creating IO thread\n"); - if (result==0) { - result = toku_pthread_create(&bfs->iothread, NULL, io_thread, (void*)bfs); + // printf("Creating IO thread\n"); + if (result == 0) { + result = toku_pthread_create(*io_thread_key, + &bfs->iothread, + nullptr, + io_thread, + static_cast<void *>(bfs)); + } + if (result == 0) { + *bfsp = bfs; + return 0; } - if (result==0) { *bfsp = bfs; return 0; } // Now undo everything. - // If we got here, there is no thread (either result was zero before the thread was created, or else the thread creation itself failed. + // If we got here, there is no thread (either result was zero before the + // thread was created, or else the thread creation itself failed. if (bfs) { if (bfs->files) { // the files were allocated, so we have to free all the bufs. diff --git a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h index 1aa2c203831..6f7b0147b21 100644 --- a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h +++ b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h @@ -64,10 +64,10 @@ enum { /* These structures maintain a collection of all the open temporary files used by the loader. */ struct file_info { bool is_open; - bool is_extant; // if true, the file must be unlinked. + bool is_extant; // if true, the file must be unlinked. char *fname; - FILE *file; - uint64_t n_rows; // how many rows were written into that file + TOKU_FILE *file; + uint64_t n_rows; // how many rows were written into that file size_t buffer_size; void *buffer; }; @@ -80,11 +80,11 @@ struct file_infos { }; typedef struct fidx { int idx; } FIDX; static const FIDX FIDX_NULL __attribute__((__unused__)) = {-1}; -static int fidx_is_null (const FIDX f) __attribute__((__unused__)); -static int fidx_is_null (const FIDX f) { return f.idx==-1; } -FILE *toku_bl_fidx2file (FTLOADER bl, FIDX i); +static int fidx_is_null(const FIDX f) __attribute__((__unused__)); +static int fidx_is_null(const FIDX f) { return f.idx == -1; } +TOKU_FILE *toku_bl_fidx2file(FTLOADER bl, FIDX i); -int ft_loader_open_temp_file (FTLOADER bl, FIDX*file_idx); +int ft_loader_open_temp_file(FTLOADER bl, FIDX *file_idx); /* These data structures are used for manipulating a collection of rows in main memory. */ struct row { @@ -100,11 +100,17 @@ struct rowset { }; int init_rowset (struct rowset *rows, uint64_t memory_budget); -void destroy_rowset (struct rowset *rows); -int add_row (struct rowset *rows, DBT *key, DBT *val); - -int loader_write_row(DBT *key, DBT *val, FIDX data, FILE*, uint64_t *dataoff, struct wbuf *wb, FTLOADER bl); -int loader_read_row (FILE *f, DBT *key, DBT *val); +void destroy_rowset(struct rowset *rows); +int add_row(struct rowset *rows, DBT *key, DBT *val); + +int loader_write_row(DBT *key, + DBT *val, + FIDX data, + TOKU_FILE *, + uint64_t *dataoff, + struct wbuf *wb, + FTLOADER bl); +int loader_read_row(TOKU_FILE *f, DBT *key, DBT *val); struct merge_fileset { bool have_sorted_output; // Is there an previous key? @@ -195,12 +201,13 @@ struct ft_loader_s { bool did_reserve_memory; bool compress_intermediates; bool allow_puts; - uint64_t reserved_memory; // how much memory are we allowed to use? + uint64_t reserved_memory; // how much memory are we allowed to use? - /* To make it easier to recover from errors, we don't use FILE*, instead we use an index into the file_infos. */ + /* To make it easier to recover from errors, we don't use TOKU_FILE*, + * instead we use an index into the file_infos. */ struct file_infos file_infos; -#define PROGRESS_MAX (1<<16) +#define PROGRESS_MAX (1 << 16) int progress; // Progress runs from 0 to PROGRESS_MAX. When we call the poll function we convert to a float from 0.0 to 1.0 // We use an integer so that we can add to the progress using a fetch-and-add instruction. diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.cc b/storage/tokudb/PerconaFT/ft/loader/loader.cc index f867639b953..5f57b473bc5 100644 --- a/storage/tokudb/PerconaFT/ft/loader/loader.cc +++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc @@ -63,21 +63,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "util/x1764.h" -static size_t (*os_fwrite_fun)(const void *,size_t,size_t,FILE*)=NULL; -void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) { - os_fwrite_fun=fwrite_fun; -} +toku_instr_key *loader_bl_mutex_key; +toku_instr_key *loader_fi_lock_mutex_key; +toku_instr_key *loader_out_mutex_key; -static size_t do_fwrite (const void *ptr, size_t size, size_t nmemb, FILE *stream) { - if (os_fwrite_fun) { - return os_fwrite_fun(ptr, size, nmemb, stream); - } else { - return fwrite(ptr, size, nmemb, stream); - } -} +toku_instr_key *extractor_thread_key; +toku_instr_key *fractal_thread_key; +toku_instr_key *tokudb_file_tmp_key; +toku_instr_key *tokudb_file_load_key; -// 1024 is the right size_factor for production. +// 1024 is the right size_factor for production. // Different values for these sizes may be used for testing. static uint32_t size_factor = 1024; static uint32_t default_loader_nodesize = FT_DEFAULT_NODE_SIZE; @@ -99,7 +95,7 @@ toku_ft_loader_get_rowset_budget_for_testing (void) void ft_loader_lock_init(FTLOADER bl) { invariant(!bl->mutex_init); - toku_mutex_init(&bl->mutex, NULL); + toku_mutex_init(*loader_bl_mutex_key, &bl->mutex, nullptr); bl->mutex_init = true; } @@ -131,7 +127,10 @@ static int add_big_buffer(struct file_info *file) { newbuffer = true; } if (result == 0) { - int r = setvbuf(file->file, (char *) file->buffer, _IOFBF, file->buffer_size); + int r = setvbuf(file->file->file, + static_cast<char *>(file->buffer), + _IOFBF, + file->buffer_size); if (r != 0) { result = get_error_errno(); if (newbuffer) { @@ -150,9 +149,9 @@ static void cleanup_big_buffer(struct file_info *file) { } } -int ft_loader_init_file_infos (struct file_infos *fi) { +int ft_loader_init_file_infos(struct file_infos *fi) { int result = 0; - toku_mutex_init(&fi->lock, NULL); + toku_mutex_init(*loader_fi_lock_mutex_key, &fi->lock, nullptr); fi->n_files = 0; fi->n_files_limit = 1; fi->n_files_open = 0; @@ -196,11 +195,10 @@ void ft_loader_fi_destroy (struct file_infos *fi, bool is_error) fi->file_infos = NULL; } -static int open_file_add (struct file_infos *fi, - FILE *file, - char *fname, - /* out */ FIDX *idx) -{ +static int open_file_add(struct file_infos *fi, + TOKU_FILE *file, + char *fname, + /* out */ FIDX *idx) { int result = 0; toku_mutex_lock(&fi->lock); if (fi->n_files >= fi->n_files_limit) { @@ -230,11 +228,12 @@ int ft_loader_fi_reopen (struct file_infos *fi, FIDX idx, const char *mode) { int result = 0; toku_mutex_lock(&fi->lock); int i = idx.idx; - invariant(i>=0 && i<fi->n_files); + invariant(i >= 0 && i < fi->n_files); invariant(!fi->file_infos[i].is_open); invariant(fi->file_infos[i].is_extant); - fi->file_infos[i].file = toku_os_fopen(fi->file_infos[i].fname, mode); - if (fi->file_infos[i].file == NULL) { + fi->file_infos[i].file = + toku_os_fopen(fi->file_infos[i].fname, mode, *tokudb_file_load_key); + if (fi->file_infos[i].file == NULL) { result = get_error_errno(); } else { fi->file_infos[i].is_open = true; @@ -307,20 +306,20 @@ int ft_loader_open_temp_file (FTLOADER bl, FIDX *file_idx) */ { int result = 0; - if (result) // debug hack + if (result) // debug hack return result; - FILE *f = NULL; + TOKU_FILE *f = NULL; int fd = -1; - char *fname = toku_strdup(bl->temp_file_template); + char *fname = toku_strdup(bl->temp_file_template); if (fname == NULL) result = get_error_errno(); else { fd = mkstemp(fname); - if (fd < 0) { + if (fd < 0) { result = get_error_errno(); } else { - f = toku_os_fdopen(fd, "r+"); - if (f == NULL) + f = toku_os_fdopen(fd, "r+", fname, *tokudb_file_tmp_key); + if (f->file == nullptr) result = get_error_errno(); else result = open_file_add(&bl->file_infos, f, fname, file_idx); @@ -339,7 +338,7 @@ int ft_loader_open_temp_file (FTLOADER bl, FIDX *file_idx) return result; } -void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error) { +void toku_ft_loader_internal_destroy(FTLOADER bl, bool is_error) { ft_loader_lock_destroy(bl); // These frees rely on the fact that if you free a NULL pointer then nothing bad happens. @@ -635,12 +634,16 @@ int toku_ft_loader_open (FTLOADER *blp, /* out */ allow_puts); if (r!=0) result = r; } - if (result==0 && allow_puts) { + if (result == 0 && allow_puts) { FTLOADER bl = *blp; - int r = toku_pthread_create(&bl->extractor_thread, NULL, extractor_thread, (void*)bl); - if (r==0) { + int r = toku_pthread_create(*extractor_thread_key, + &bl->extractor_thread, + nullptr, + extractor_thread, + static_cast<void *>(bl)); + if (r == 0) { bl->extractor_live = true; - } else { + } else { result = r; (void) toku_ft_loader_internal_destroy(bl, true); } @@ -659,17 +662,17 @@ static void ft_loader_set_panic(FTLOADER bl, int error, bool callback, int which } // One of the tests uses this. -FILE *toku_bl_fidx2file (FTLOADER bl, FIDX i) { +TOKU_FILE *toku_bl_fidx2file(FTLOADER bl, FIDX i) { toku_mutex_lock(&bl->file_infos.lock); - invariant(i.idx >=0 && i.idx < bl->file_infos.n_files); + invariant(i.idx >= 0 && i.idx < bl->file_infos.n_files); invariant(bl->file_infos.file_infos[i.idx].is_open); - FILE *result=bl->file_infos.file_infos[i.idx].file; + TOKU_FILE *result = bl->file_infos.file_infos[i.idx].file; toku_mutex_unlock(&bl->file_infos.lock); return result; } -static int bl_finish_compressed_write(FILE *stream, struct wbuf *wb) { - int r; +static int bl_finish_compressed_write(TOKU_FILE *stream, struct wbuf *wb) { + int r = 0; char *compressed_buf = NULL; const size_t data_size = wb->ndone; invariant(data_size > 0); @@ -720,31 +723,23 @@ static int bl_finish_compressed_write(FILE *stream, struct wbuf *wb) { // Mark as written wb->ndone = 0; - size_t size_to_write = total_size + 4; // Includes writing total_size + size_t size_to_write = total_size + 4; // Includes writing total_size + + r = toku_os_fwrite(compressed_buf, 1, size_to_write, stream); - { - size_t written = do_fwrite(compressed_buf, 1, size_to_write, stream); - if (written!=size_to_write) { - if (os_fwrite_fun) // if using hook to induce artificial errors (for testing) ... - r = get_maybe_error_errno(); // ... then there is no error in the stream, but there is one in errno - else - r = ferror(stream); - invariant(r!=0); - goto exit; - } - } - r = 0; -exit: if (compressed_buf) { toku_free(compressed_buf); } return r; } -static int bl_compressed_write(void *ptr, size_t nbytes, FILE *stream, struct wbuf *wb) { +static int bl_compressed_write(void *ptr, + size_t nbytes, + TOKU_FILE *stream, + struct wbuf *wb) { invariant(wb->size <= MAX_UNCOMPRESSED_BUF); size_t bytes_left = nbytes; - char *buf = (char*)ptr; + char *buf = (char *)ptr; while (bytes_left > 0) { size_t bytes_to_copy = bytes_left; @@ -767,29 +762,28 @@ static int bl_compressed_write(void *ptr, size_t nbytes, FILE *stream, struct wb return 0; } -static int bl_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream, struct wbuf *wb, FTLOADER bl) -/* Effect: this is a wrapper for fwrite that returns 0 on success, otherwise returns an error number. +static int bl_fwrite(void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream, + struct wbuf *wb, + FTLOADER bl) +/* Effect: this is a wrapper for fwrite that returns 0 on success, otherwise + * returns an error number. * Arguments: * ptr the data to be writen. * size the amount of data to be written. * nmemb the number of units of size to be written. * stream write the data here. - * wb where to write uncompressed data (if we're compressing) or ignore if NULL - * bl passed so we can panic the ft_loader if something goes wrong (recording the error number). + * wb where to write uncompressed data (if we're compressing) or ignore if + * NULL + * bl passed so we can panic the ft_loader if something goes wrong + * (recording the error number). * Return value: 0 on success, an error number otherwise. */ { if (!bl->compress_intermediates || !wb) { - size_t r = do_fwrite(ptr, size, nmemb, stream); - if (r!=nmemb) { - int e; - if (os_fwrite_fun) // if using hook to induce artificial errors (for testing) ... - e = get_maybe_error_errno(); // ... then there is no error in the stream, but there is one in errno - else - e = ferror(stream); - invariant(e!=0); - return e; - } + return toku_os_fwrite(ptr, size, nmemb, stream); } else { size_t num_bytes = size * nmemb; int r = bl_compressed_write(ptr, num_bytes, stream, wb); @@ -800,8 +794,9 @@ static int bl_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream, struct return 0; } -static int bl_fread (void *ptr, size_t size, size_t nmemb, FILE *stream) -/* Effect: this is a wrapper for fread that returns 0 on success, otherwise returns an error number. +static int bl_fread(void *ptr, size_t size, size_t nmemb, TOKU_FILE *stream) +/* Effect: this is a wrapper for fread that returns 0 on success, otherwise + * returns an error number. * Arguments: * ptr read data into here. * size size of data element to be read. @@ -810,24 +805,14 @@ static int bl_fread (void *ptr, size_t size, size_t nmemb, FILE *stream) * Return value: 0 on success, an error number otherwise. */ { - size_t r = fread(ptr, size, nmemb, stream); - if (r==0) { - if (feof(stream)) return EOF; - else { - do_error: ; - int e = ferror(stream); - // r == 0 && !feof && e == 0, how does this happen? invariant(e!=0); - return e; - } - } else if (r<nmemb) { - goto do_error; - } else { - return 0; - } + return toku_os_fread(ptr, size, nmemb, stream); } -static int bl_write_dbt (DBT *dbt, FILE* datafile, uint64_t *dataoff, struct wbuf *wb, FTLOADER bl) -{ +static int bl_write_dbt(DBT *dbt, + TOKU_FILE *datafile, + uint64_t *dataoff, + struct wbuf *wb, + FTLOADER bl) { int r; int dlen = dbt->size; if ((r=bl_fwrite(&dlen, sizeof(dlen), 1, datafile, wb, bl))) return r; @@ -837,8 +822,7 @@ static int bl_write_dbt (DBT *dbt, FILE* datafile, uint64_t *dataoff, struct wbu return 0; } -static int bl_read_dbt (/*in*/DBT *dbt, FILE *stream) -{ +static int bl_read_dbt(/*in*/ DBT *dbt, TOKU_FILE *stream) { int len; { int r; @@ -892,13 +876,20 @@ static int bl_read_dbt_from_dbufio (/*in*/DBT *dbt, DBUFIO_FILESET bfs, int file return result; } - -int loader_write_row(DBT *key, DBT *val, FIDX data, FILE *dataf, uint64_t *dataoff, struct wbuf *wb, FTLOADER bl) -/* Effect: Given a key and a val (both DBTs), write them to a file. Increment *dataoff so that it's up to date. +int loader_write_row(DBT *key, + DBT *val, + FIDX data, + TOKU_FILE *dataf, + uint64_t *dataoff, + struct wbuf *wb, + FTLOADER bl) +/* Effect: Given a key and a val (both DBTs), write them to a file. Increment + * *dataoff so that it's up to date. * Arguments: * key, val write these. * data the file to write them to - * dataoff a pointer to a counter that keeps track of the amount of data written so far. + * dataoff a pointer to a counter that keeps track of the amount of data + * written so far. * wb a pointer (possibly NULL) to buffer uncompressed output * bl the ft_loader (passed so we can panic if needed). * Return value: 0 on success, an error number otherwise. @@ -916,8 +907,9 @@ int loader_write_row(DBT *key, DBT *val, FIDX data, FILE *dataf, uint64_t *datao return 0; } -int loader_read_row (FILE *f, DBT *key, DBT *val) -/* Effect: Read a key value pair from a file. The DBTs must have DB_DBT_REALLOC set. +int loader_read_row(TOKU_FILE *f, DBT *key, DBT *val) +/* Effect: Read a key value pair from a file. The DBTs must have DB_DBT_REALLOC + * set. * Arguments: * f where to read it from. * key, val read it into these. @@ -1087,7 +1079,7 @@ static void* extractor_thread (void *blv) { FTLOADER bl = (FTLOADER)blv; int r = 0; while (1) { - void *item; + void *item = nullptr; { int rq = toku_queue_deq(bl->primary_rowset_queue, &item, NULL, NULL); if (rq==EOF) break; @@ -1108,14 +1100,14 @@ static void* extractor_thread (void *blv) { //printf("%s:%d extractor finishing\n", __FILE__, __LINE__); if (r == 0) { r = finish_primary_rows(bl); - if (r) + if (r) ft_loader_set_panic(bl, r, false, 0, nullptr, nullptr); - } - return NULL; + toku_instr_delete_current_thread(); + return nullptr; } -static void enqueue_for_extraction (FTLOADER bl) { +static void enqueue_for_extraction(FTLOADER bl) { //printf("%s:%d enqueing %ld items\n", __FILE__, __LINE__, bl->primary_rowset.n_rows); struct rowset *XMALLOC(enqueue_me); *enqueue_me = bl->primary_rowset; @@ -1626,11 +1618,12 @@ static int write_rowset_to_file (FTLOADER bl, FIDX sfile, const struct rowset ro struct wbuf wb; wbuf_init(&wb, uncompressed_buffer, MAX_UNCOMPRESSED_BUF); - FILE *sstream = toku_bl_fidx2file(bl, sfile); - for (size_t i=0; i<rows.n_rows; i++) { - DBT skey = make_dbt(rows.data + rows.rows[i].off, rows.rows[i].klen); - DBT sval = make_dbt(rows.data + rows.rows[i].off + rows.rows[i].klen, rows.rows[i].vlen); - + TOKU_FILE *sstream = toku_bl_fidx2file(bl, sfile); + for (size_t i = 0; i < rows.n_rows; i++) { + DBT skey = make_dbt(rows.data + rows.rows[i].off, rows.rows[i].klen); + DBT sval = make_dbt(rows.data + rows.rows[i].off + rows.rows[i].klen, + rows.rows[i].vlen); + uint64_t soffset=0; // don't really need this. r = loader_write_row(&skey, &sval, sfile, sstream, &soffset, &wb, bl); if (r != 0) { @@ -1727,14 +1720,30 @@ int ft_loader_sort_and_write_rows (struct rowset *rows, struct merge_fileset *fs return sort_and_write_rows (*rows, fs, bl, which_db, dest_db, compare); } -int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q, int n_sources, DBUFIO_FILESET bfs, FIDX srcs_fidxs[/*n_sources*/], FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare, int progress_allocation) -/* Effect: Given an array of FILE*'s each containing sorted, merge the data and write it to an output. All the files remain open after the merge. - * This merge is performed in one pass, so don't pass too many files in. If you need a tree of merges do it elsewhere. - * If TO_Q is true then we write rowsets into queue Q. Otherwise we write into dest_data. - * Modifies: May modify the arrays of files (but if modified, it must be a permutation so the caller can use that array to close everything.) - * Requires: The number of sources is at least one, and each of the input files must have at least one row in it. +int toku_merge_some_files_using_dbufio(const bool to_q, + FIDX dest_data, + QUEUE q, + int n_sources, + DBUFIO_FILESET bfs, + FIDX srcs_fidxs[/*n_sources*/], + FTLOADER bl, + int which_db, + DB *dest_db, + ft_compare_func compare, + int progress_allocation) +/* Effect: Given an array of FILE*'s each containing sorted, merge the data and + * write it to an output. All the files remain open after the merge. + * This merge is performed in one pass, so don't pass too many files in. If + * you need a tree of merges do it elsewhere. + * If TO_Q is true then we write rowsets into queue Q. Otherwise we write + * into dest_data. + * Modifies: May modify the arrays of files (but if modified, it must be a + * permutation so the caller can use that array to close everything.) + * Requires: The number of sources is at least one, and each of the input files + * must have at least one row in it. * Arguments: - * to_q boolean indicating that output is queue (true) or a file (false) + * to_q boolean indicating that output is queue (true) or a file + * (false) * dest_data where to write the sorted data * q where to write the sorted data * n_sources how many source files. @@ -1747,9 +1756,10 @@ int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q { int result = 0; - FILE *dest_stream = to_q ? NULL : toku_bl_fidx2file(bl, dest_data); + TOKU_FILE *dest_stream = to_q ? nullptr : toku_bl_fidx2file(bl, dest_data); - //printf(" merge_some_files progress=%d fin at %d\n", bl->progress, bl->progress+progress_allocation); + // printf(" merge_some_files progress=%d fin at %d\n", bl->progress, + // bl->progress+progress_allocation); DBT keys[n_sources]; DBT vals[n_sources]; uint64_t dataoff[n_sources]; @@ -1943,12 +1953,18 @@ static int merge_some_files (const bool to_q, FIDX dest_data, QUEUE q, int n_sou int result = 0; DBUFIO_FILESET bfs = NULL; int *MALLOC_N(n_sources, fds); - if (fds==NULL) result=get_error_errno(); - if (result==0) { - for (int i=0; i<n_sources; i++) { - int r = fileno(toku_bl_fidx2file(bl, srcs_fidxs[i])); // we rely on the fact that when the files are closed, the fd is also closed. - if (r==-1) { - result=get_error_errno(); + if (fds == NULL) + result = get_error_errno(); + if (result == 0) { + for (int i = 0; i < n_sources; i++) { + int r = fileno( + toku_bl_fidx2file(bl, srcs_fidxs[i])->file); // we rely on the + // fact that when + // the files are + // closed, the fd + // is also closed. + if (r == -1) { + result = get_error_errno(); break; } fds[i] = r; @@ -2178,7 +2194,7 @@ static inline void dbout_init(struct dbout *out, FT ft) { out->current_off = 0; out->n_translations = out->n_translations_limit = 0; out->translation = NULL; - toku_mutex_init(&out->mutex, NULL); + toku_mutex_init(*loader_out_mutex_key, &out->mutex, nullptr); out->ft = ft; } @@ -2418,7 +2434,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl, assert_zero(r); return result; } - FILE *pivots_stream = toku_bl_fidx2file(bl, pivots_file); + TOKU_FILE *pivots_stream = toku_bl_fidx2file(bl, pivots_file); TXNID root_xid_that_created = TXNID_NONE; if (bl->root_xids_that_created) @@ -2705,21 +2721,35 @@ int toku_loader_write_ft_from_q_in_C (FTLOADER bl, static void* fractal_thread (void *ftav) { struct fractal_thread_args *fta = (struct fractal_thread_args *)ftav; - int r = toku_loader_write_ft_from_q (fta->bl, fta->descriptor, fta->fd, fta->progress_allocation, fta->q, fta->total_disksize_estimate, fta->which_db, fta->target_nodesize, fta->target_basementnodesize, fta->target_compression_method, fta->target_fanout); + int r = toku_loader_write_ft_from_q(fta->bl, + fta->descriptor, + fta->fd, + fta->progress_allocation, + fta->q, + fta->total_disksize_estimate, + fta->which_db, + fta->target_nodesize, + fta->target_basementnodesize, + fta->target_compression_method, + fta->target_fanout); fta->errno_result = r; - return NULL; -} - -static int loader_do_i (FTLOADER bl, - int which_db, - DB *dest_db, - ft_compare_func compare, - const DESCRIPTOR descriptor, - const char *new_fname, - int progress_allocation // how much progress do I need to add into bl->progress by the end.. - ) + toku_instr_delete_current_thread(); + return toku_pthread_done(nullptr); +} + +static int loader_do_i(FTLOADER bl, + int which_db, + DB *dest_db, + ft_compare_func compare, + const DESCRIPTOR descriptor, + const char *new_fname, + int progress_allocation // how much progress do I need + // to add into bl->progress by + // the end.. + ) /* Effect: Handle the file creating for one particular DB in the bulk loader. */ -/* Requires: The data is fully extracted, so we can do merges out of files and write the ft file. */ +/* Requires: The data is fully extracted, so we can do merges out of files and + write the ft file. */ { //printf("doing i use %d progress=%d fin at %d\n", progress_allocation, bl->progress, bl->progress+progress_allocation); struct merge_fileset *fs = &(bl->fs[which_db]); @@ -2730,10 +2760,14 @@ static int loader_do_i (FTLOADER bl, if (r) goto error; { - mode_t mode = S_IRUSR+S_IWUSR + S_IRGRP+S_IWGRP; - int fd = toku_os_open(new_fname, O_RDWR| O_CREAT | O_BINARY, mode); // #2621 + mode_t mode = S_IRUSR + S_IWUSR + S_IRGRP + S_IWGRP; + int fd = toku_os_open(new_fname, + O_RDWR | O_CREAT | O_BINARY, + mode, + *tokudb_file_load_key); // #2621 if (fd < 0) { - r = get_error_errno(); goto error; + r = get_error_errno(); + goto error; } uint32_t target_nodesize, target_basementnodesize, target_fanout; @@ -2753,24 +2787,27 @@ static int loader_do_i (FTLOADER bl, progress_allocation -= allocation_for_merge; // This structure must stay live until the join below. - struct fractal_thread_args fta = { - bl, - descriptor, - fd, - progress_allocation, - bl->fractal_queues[which_db], - bl->extracted_datasizes[which_db], - 0, - which_db, - target_nodesize, - target_basementnodesize, - target_compression_method, - target_fanout - }; - - r = toku_pthread_create(bl->fractal_threads+which_db, NULL, fractal_thread, (void*)&fta); + struct fractal_thread_args fta = {bl, + descriptor, + fd, + progress_allocation, + bl->fractal_queues[which_db], + bl->extracted_datasizes[which_db], + 0, + which_db, + target_nodesize, + target_basementnodesize, + target_compression_method, + target_fanout}; + + r = toku_pthread_create(*fractal_thread_key, + bl->fractal_threads + which_db, + nullptr, + fractal_thread, + static_cast<void *>(&fta)); if (r) { - int r2 __attribute__((__unused__)) = toku_queue_destroy(bl->fractal_queues[which_db]); + int r2 __attribute__((__unused__)) = + toku_queue_destroy(bl->fractal_queues[which_db]); // ignore r2, since we already have an error bl->fractal_queues[which_db] = nullptr; goto error; @@ -3107,7 +3144,7 @@ static int read_some_pivots (FIDX pivots_file, int n_to_read, FTLOADER bl, for (int i = 0; i < n_to_read; i++) pivots[i] = zero_dbt; - FILE *pivots_stream = toku_bl_fidx2file(bl, pivots_file); + TOKU_FILE *pivots_stream = toku_bl_fidx2file(bl, pivots_file); int result = 0; for (int i = 0; i < n_to_read; i++) { @@ -3159,8 +3196,9 @@ static int setup_nonleaf_block (int n_children, } if (result == 0) { - FILE *next_pivots_stream = toku_bl_fidx2file(bl, next_pivots_file); - int r = bl_write_dbt(&pivots[n_children-1], next_pivots_stream, NULL, nullptr, bl); + TOKU_FILE *next_pivots_stream = toku_bl_fidx2file(bl, next_pivots_file); + int r = bl_write_dbt( + &pivots[n_children - 1], next_pivots_stream, NULL, nullptr, bl); if (r) result = r; } @@ -3275,8 +3313,11 @@ static int write_nonleaves (FTLOADER bl, FIDX pivots_fidx, struct dbout *out, st // 2) We put the 15 pivots and 16 blocks into an non-leaf node. // 3) We put the 16th pivot into the next pivots file. { - int r = fseek(toku_bl_fidx2file(bl, pivots_fidx), 0, SEEK_SET); - if (r!=0) { return get_error_errno(); } + int r = + fseek(toku_bl_fidx2file(bl, pivots_fidx)->file, 0, SEEK_SET); + if (r != 0) { + return get_error_errno(); + } } FIDX next_pivots_file; @@ -3296,7 +3337,7 @@ static int write_nonleaves (FTLOADER bl, FIDX pivots_fidx, struct dbout *out, st while (sts->n_subtrees - n_subtrees_used >= n_per_block*2) { // grab the first N_PER_BLOCK and build a node. DBT *pivots; - int64_t blocknum_of_new_node; + int64_t blocknum_of_new_node = 0; struct subtree_info *subtree_info; int r = setup_nonleaf_block (n_per_block, sts, pivots_fidx, n_subtrees_used, diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.h b/storage/tokudb/PerconaFT/ft/loader/loader.h index 9c1bdab1ee2..cea2e8dfda2 100644 --- a/storage/tokudb/PerconaFT/ft/loader/loader.h +++ b/storage/tokudb/PerconaFT/ft/loader/loader.h @@ -38,6 +38,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #pragma once +#include "toku_portability.h" #include "ft/txn/txn.h" #include "ft/cachetable/cachetable.h" #include "ft/comparator.h" @@ -77,8 +78,6 @@ int toku_ft_loader_abort(FTLOADER bl, bool is_error); // For test purposes only -void toku_ft_loader_set_size_factor (uint32_t factor); - -void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)); +void toku_ft_loader_set_size_factor(uint32_t factor); size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid); diff --git a/storage/tokudb/PerconaFT/ft/logger/log-internal.h b/storage/tokudb/PerconaFT/ft/logger/log-internal.h index bee74fac346..be19e1342cd 100644 --- a/storage/tokudb/PerconaFT/ft/logger/log-internal.h +++ b/storage/tokudb/PerconaFT/ft/logger/log-internal.h @@ -70,11 +70,11 @@ struct mylock { }; static inline void ml_init(struct mylock *l) { - toku_mutex_init(&l->lock, 0); -} -static inline void ml_lock(struct mylock *l) { - toku_mutex_lock(&l->lock); + toku_mutex_init(*log_internal_lock_mutex_key, &l->lock, nullptr); } +// TODO: source location info might have be to be pulled up one caller +// to be useful +static inline void ml_lock(struct mylock *l) { toku_mutex_lock(&l->lock); } static inline void ml_unlock(struct mylock *l) { toku_mutex_unlock(&l->lock); } diff --git a/storage/tokudb/PerconaFT/ft/logger/logcursor.cc b/storage/tokudb/PerconaFT/ft/logger/logcursor.cc index 910a608cef9..07f57220bf0 100644 --- a/storage/tokudb/PerconaFT/ft/logger/logcursor.cc +++ b/storage/tokudb/PerconaFT/ft/logger/logcursor.cc @@ -86,10 +86,10 @@ static int lc_close_cur_logfile(TOKULOGCURSOR lc) { } static toku_off_t lc_file_len(const char *name) { - toku_struct_stat buf; - int r = toku_stat(name, &buf); - assert(r == 0); - return buf.st_size; + toku_struct_stat buf; + int r = toku_stat(name, &buf, *tokudb_file_data_key); + assert(r == 0); + return buf.st_size; } // Cat the file and throw away the contents. This brings the file into the file system cache diff --git a/storage/tokudb/PerconaFT/ft/logger/logger.cc b/storage/tokudb/PerconaFT/ft/logger/logger.cc index d6d1673b5e3..0e1e5a276b7 100644 --- a/storage/tokudb/PerconaFT/ft/logger/logger.cc +++ b/storage/tokudb/PerconaFT/ft/logger/logger.cc @@ -50,11 +50,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "util/status.h" -static const int log_format_version=TOKU_LOG_VERSION; +static const int log_format_version = TOKU_LOG_VERSION; -static int open_logfile (TOKULOGGER logger); -static void logger_write_buffer (TOKULOGGER logger, LSN *fsynced_lsn); -static void delete_logfile(TOKULOGGER logger, long long index, uint32_t version); +toku_instr_key *result_output_condition_lock_mutex_key; +toku_instr_key *result_output_condition_key; +toku_instr_key *tokudb_file_log_key; + +static int open_logfile(TOKULOGGER logger); +static void logger_write_buffer(TOKULOGGER logger, LSN *fsynced_lsn); +static void delete_logfile(TOKULOGGER logger, + long long index, + uint32_t version); static void grab_output(TOKULOGGER logger, LSN *fsynced_lsn); static void release_output(TOKULOGGER logger, LSN fsynced_lsn); @@ -132,10 +138,13 @@ int toku_logger_create (TOKULOGGER *resultp) { // n_in_file is uninitialized result->write_block_size = FT_DEFAULT_NODE_SIZE; // default logging size is the same as the default ft block size toku_logfilemgr_create(&result->logfilemgr); - *resultp=result; + *resultp = result; ml_init(&result->input_lock); - toku_mutex_init(&result->output_condition_lock, NULL); - toku_cond_init(&result->output_condition, NULL); + toku_mutex_init(*result_output_condition_lock_mutex_key, + &result->output_condition_lock, + nullptr); + toku_cond_init( + *result_output_condition_key, &result->output_condition, nullptr); result->rollback_cachefile = NULL; result->output_is_available = true; toku_txn_manager_init(&result->txn_manager); @@ -301,10 +310,10 @@ int toku_logger_close(TOKULOGGER *loggerp) { grab_output(logger, &fsynced_lsn); logger_write_buffer(logger, &fsynced_lsn); if (logger->fd!=-1) { - if ( logger->write_log_files ) { + if (logger->write_log_files) { toku_file_fsync_without_accounting(logger->fd); } - r = close(logger->fd); + r = toku_os_close(logger->fd); assert(r == 0); } r = close_logdir(logger); @@ -346,9 +355,13 @@ static int close_and_open_logfile (TOKULOGGER logger, LSN *fsynced_lsn) if (logger->write_log_files) { toku_file_fsync_without_accounting(logger->fd); *fsynced_lsn = logger->written_lsn; - toku_logfilemgr_update_last_lsn(logger->logfilemgr, logger->written_lsn); // fixes t:2294 + toku_logfilemgr_update_last_lsn(logger->logfilemgr, + logger->written_lsn); // fixes t:2294 } - r = close(logger->fd); if (r!=0) return get_error_errno(); + r = toku_os_close(logger->fd); + + if (r != 0) + return get_error_errno(); return open_logfile(logger); } @@ -678,18 +691,28 @@ static int open_logfile (TOKULOGGER logger) { int fnamelen = strlen(logger->directory)+50; char fname[fnamelen]; - snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, logger->next_log_file_number, TOKU_LOG_VERSION); + snprintf(fname, + fnamelen, + "%s/log%012lld.tokulog%d", + logger->directory, + logger->next_log_file_number, + TOKU_LOG_VERSION); long long index = logger->next_log_file_number; if (logger->write_log_files) { - logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRUSR+S_IWUSR); - if (logger->fd==-1) { + logger->fd = + toku_os_open(fname, + O_CREAT + O_WRONLY + O_TRUNC + O_EXCL + O_BINARY, + S_IRUSR + S_IWUSR, + *tokudb_file_log_key); + if (logger->fd == -1) { return get_error_errno(); } fsync_logdir(logger); logger->next_log_file_number++; } else { - logger->fd = open(DEV_NULL_FILE, O_WRONLY+O_BINARY); - if (logger->fd==-1) { + logger->fd = toku_os_open( + DEV_NULL_FILE, O_WRONLY + O_BINARY, S_IWUSR, *tokudb_file_log_key); + if (logger->fd == -1) { return get_error_errno(); } } @@ -835,10 +858,11 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn) logger_write_buffer(logger, &fsynced_lsn); // close the log file - if ( logger->write_log_files) { // fsyncs don't work to /dev/null + if (logger->write_log_files) { // fsyncs don't work to /dev/null toku_file_fsync_without_accounting(logger->fd); } - r = close(logger->fd); assert(r == 0); + r = toku_os_close(logger->fd); + assert(r == 0); logger->fd = -1; // reset the LSN's to the lastlsn when the logger was opened @@ -1238,11 +1262,13 @@ void toku_txnid2txn(TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result) { } // Find the earliest LSN in a log. No locks are needed. -static int peek_at_log (TOKULOGGER logger, char* filename, LSN *first_lsn) { - int fd = open(filename, O_RDONLY+O_BINARY); - if (fd<0) { +static int peek_at_log(TOKULOGGER logger, char *filename, LSN *first_lsn) { + int fd = toku_os_open( + filename, O_RDONLY + O_BINARY, S_IRUSR, *tokudb_file_log_key); + if (fd < 0) { int er = get_error_errno(); - if (logger->write_log_files) printf("couldn't open: %s\n", strerror(er)); + if (logger->write_log_files) + printf("couldn't open: %s\n", strerror(er)); return er; } enum { SKIP = 12+1+4 }; // read the 12 byte header, the first message, and the first len @@ -1259,10 +1285,13 @@ static int peek_at_log (TOKULOGGER logger, char* filename, LSN *first_lsn) { lsn = rbuf_ulonglong(&rb); } - r=close(fd); - if (r!=0) { return 0; } + r = toku_os_close(fd); + + if (r != 0) { + return 0; + } - first_lsn->lsn=lsn; + first_lsn->lsn = lsn; return 0; } diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc index 9eaa56bdc53..9a9a1214ecb 100644 --- a/storage/tokudb/PerconaFT/ft/logger/recover.cc +++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc @@ -954,14 +954,14 @@ static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) { std::unique_ptr<char[], decltype(&toku_free)> new_iname_full( toku_construct_full_name(2, data_dir, l->new_iname.data), &toku_free); - if (toku_stat(old_iname_full.get(), &stat) == -1) { + if (toku_stat(old_iname_full.get(), &stat, toku_uninstrumented) == -1) { if (ENOENT == errno) old_exist = false; else return 1; } - if (toku_stat(new_iname_full.get(), &stat) == -1) { + if (toku_stat(new_iname_full.get(), &stat, toku_uninstrumented) == -1) { if (ENOENT == errno) new_exist = false; else @@ -980,7 +980,7 @@ static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) { // 'stalled cachefiles' container the new file is removed // and the old file is renamed. if (old_exist && new_exist && - (toku_os_unlink(new_iname_full.get()) == -1 || + (toku_os_delete(new_iname_full.get()) == -1 || toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || toku_fsync_directory(old_iname_full.get()) == -1 || toku_fsync_directory(new_iname_full.get()) == -1)) @@ -1473,9 +1473,13 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di { toku_struct_stat buf; - if (toku_stat(env_dir, &buf)!=0) { + if (toku_stat(env_dir, &buf, toku_uninstrumented)) { rr = get_error_errno(); - fprintf(stderr, "%.24s PerconaFT recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir); + fprintf(stderr, + "%.24s PerconaFT recovery error: directory does not exist: " + "%s\n", + ctime(&tnow), + env_dir); goto errorexit; } else if (!S_ISDIR(buf.st_mode)) { fprintf(stderr, "%.24s PerconaFT recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir); diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc index 19811373d16..29f6daa293a 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc @@ -40,6 +40,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include <string.h> +#include "toku_portability.h" #include "portability/memory.h" #include "portability/toku_assert.h" #include "portability/toku_stdint.h" diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc index 12700d9d83e..811f86c30a7 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc @@ -55,6 +55,11 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "util/nb_mutex.h" #include "util/scoped_malloc.h" + +toku_instr_key *block_table_mutex_key; +toku_instr_key *safe_file_size_lock_mutex_key; +toku_instr_key *safe_file_size_lock_rwlock_key; + // indicates the end of a freelist static const BLOCKNUM freelist_null = {-1}; @@ -100,8 +105,10 @@ void block_table::_create_internal() { memset(&_checkpointed, 0, sizeof(struct translation)); memset(&_mutex, 0, sizeof(_mutex)); _bt_block_allocator = new BlockAllocator(); - toku_mutex_init(&_mutex, nullptr); - nb_mutex_init(&_safe_file_size_lock); + toku_mutex_init(*block_table_mutex_key, &_mutex, nullptr); + nb_mutex_init(*safe_file_size_lock_mutex_key, + *safe_file_size_lock_rwlock_key, + &_safe_file_size_lock); } // Fill in the checkpointed translation from buffer, and copy checkpointed to @@ -129,7 +136,7 @@ int block_table::create_from_buffer( _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT); // Determine the file size - int64_t file_size; + int64_t file_size = 0; r = toku_os_get_file_size(fd, &file_size); lazy_assert_zero(r); invariant(file_size >= 0); diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc index fcab9fc675e..b24d72a5dff 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc @@ -417,8 +417,10 @@ static size_t serialize_ft_min_size(uint32_t version) { switch (version) { case FT_LAYOUT_VERSION_29: size += sizeof(uint64_t); // logrows in ft + // fallthrough case FT_LAYOUT_VERSION_28: size += sizeof(uint32_t); // fanout in ft + // fallthrough case FT_LAYOUT_VERSION_27: case FT_LAYOUT_VERSION_26: case FT_LAYOUT_VERSION_25: @@ -427,10 +429,12 @@ static size_t serialize_ft_min_size(uint32_t version) { case FT_LAYOUT_VERSION_22: case FT_LAYOUT_VERSION_21: size += sizeof(MSN); // max_msn_in_ft + // fallthrough case FT_LAYOUT_VERSION_20: case FT_LAYOUT_VERSION_19: size += 1; // compression method size += sizeof(MSN); // highest_unused_msn_for_upgrade + // fallthrough case FT_LAYOUT_VERSION_18: size += sizeof(uint64_t); // time_of_last_optimize_begin size += sizeof(uint64_t); // time_of_last_optimize_end @@ -438,9 +442,11 @@ static size_t serialize_ft_min_size(uint32_t version) { size += sizeof(MSN); // msn_at_start_of_last_completed_optimize size -= 8; // removed num_blocks_to_upgrade_14 size -= 8; // removed num_blocks_to_upgrade_13 + // fallthrough case FT_LAYOUT_VERSION_17: size += 16; invariant(sizeof(STAT64INFO_S) == 16); + // fallthrough case FT_LAYOUT_VERSION_16: case FT_LAYOUT_VERSION_15: size += 4; // basement node size @@ -448,8 +454,10 @@ static size_t serialize_ft_min_size(uint32_t version) { // num_blocks_to_upgrade, now one int each for upgrade // from 13, 14 size += 8; // time of last verification + // fallthrough case FT_LAYOUT_VERSION_14: size += 8; // TXNID that created + // fallthrough case FT_LAYOUT_VERSION_13: size += (4 // build_id + @@ -459,7 +467,7 @@ static size_t serialize_ft_min_size(uint32_t version) { + 8 // time_of_last_modification ); - // fall through + // fallthrough case FT_LAYOUT_VERSION_12: size += (+8 // "tokudata" + diff --git a/storage/tokudb/PerconaFT/ft/serialize/sub_block.cc b/storage/tokudb/PerconaFT/ft/serialize/sub_block.cc index c967d4b4c1c..6dc1f82844a 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/sub_block.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/sub_block.cc @@ -51,6 +51,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "util/threadpool.h" #include "util/x1764.h" +toku_instr_key *workset_lock_mutex_key; +toku_instr_key *ws_worker_wait_key; + SUB_BLOCK sub_block_creat(void) { SUB_BLOCK XMALLOC(sb); sub_block_init(sb); diff --git a/storage/tokudb/PerconaFT/ft/serialize/workset.h b/storage/tokudb/PerconaFT/ft/serialize/workset.h index 073741fccb1..295eb73cec9 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/workset.h +++ b/storage/tokudb/PerconaFT/ft/serialize/workset.h @@ -41,6 +41,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include <toku_list.h> #include <toku_pthread.h> +extern toku_instr_key *ws_worker_wait_key; + // The work struct is the base class for work to be done by some threads struct work { struct toku_list next; @@ -54,16 +56,14 @@ struct workset { toku_cond_t worker_wait; // a condition variable used to wait for all of the worker to release their reference on the workset }; -static inline void -workset_init(struct workset *ws) { - toku_mutex_init(&ws->lock, NULL); +static inline void workset_init(struct workset *ws) { + toku_mutex_init(*workset_lock_mutex_key, &ws->lock, nullptr); toku_list_init(&ws->worklist); - ws->refs = 1; // the calling thread gets a reference - toku_cond_init(&ws->worker_wait, NULL); + ws->refs = 1; // the calling thread gets a reference + toku_cond_init(*ws_worker_wait_key, &ws->worker_wait, nullptr); } -static inline void -workset_destroy(struct workset *ws) { +static inline void workset_destroy(struct workset *ws) { invariant(toku_list_empty(&ws->worklist)); toku_cond_destroy(&ws->worker_wait); toku_mutex_destroy(&ws->lock); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-4357.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-4357.cc index 8bbda295462..0af5c8185a9 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-4357.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-4357.cc @@ -71,21 +71,26 @@ cachetable_test (void) { void* v1; long s1; - r = toku_cachetable_get_and_pin( - f1, - make_blocknum(1), - toku_cachetable_hash(f1, make_blocknum(1)), - &v1, - &s1, - def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, - true, - NULL - ); + r = toku_cachetable_get_and_pin(f1, + make_blocknum(1), + toku_cachetable_hash(f1, make_blocknum(1)), + &v1, + &s1, + def_write_callback(NULL), + def_fetch, + def_pf_req_callback, + def_pf_callback, + true, + NULL); toku_pthread_t pin_nonblocking_tid; - r = toku_pthread_create(&pin_nonblocking_tid, NULL, pin_nonblocking, NULL); - assert_zero(r); + r = toku_pthread_create(toku_uninstrumented, + &pin_nonblocking_tid, + nullptr, + pin_nonblocking, + nullptr); + assert_zero(r); // sleep 3 seconds - usleep(3*1024*1024); + usleep(3 * 1024 * 1024); r = toku_test_cachetable_unpin_and_remove(f1, make_blocknum(1), NULL, NULL); assert_zero(r); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-4365.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-4365.cc index 9c54c086f5b..7bee0b80770 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-4365.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-4365.cc @@ -71,15 +71,16 @@ static void *put_same_key(void *arg) { return arg; } - toku_pthread_t put_tid; -static void test_remove_key(CACHEKEY* UU(cachekey), bool UU(for_checkpoint), void* UU(extra)) { - int r = toku_pthread_create(&put_tid, NULL, put_same_key, NULL); - assert_zero(r); +static void test_remove_key(CACHEKEY *UU(cachekey), + bool UU(for_checkpoint), + void *UU(extra)) { + int r = toku_pthread_create( + toku_uninstrumented, &put_tid, nullptr, put_same_key, nullptr); + assert_zero(r); } - static void cachetable_test (void) { const int test_limit = 12; @@ -92,21 +93,26 @@ cachetable_test (void) { void* v1; long s1; - r = toku_cachetable_get_and_pin( - f1, - make_blocknum(1), - toku_cachetable_hash(f1, make_blocknum(1)), - &v1, - &s1, - def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, - true, - NULL - ); + r = toku_cachetable_get_and_pin(f1, + make_blocknum(1), + toku_cachetable_hash(f1, make_blocknum(1)), + &v1, + &s1, + def_write_callback(nullptr), + def_fetch, + def_pf_req_callback, + def_pf_callback, + true, + nullptr); toku_pthread_t pin_nonblocking_tid; - r = toku_pthread_create(&pin_nonblocking_tid, NULL, pin_nonblocking, NULL); - assert_zero(r); + r = toku_pthread_create(toku_uninstrumented, + &pin_nonblocking_tid, + nullptr, + pin_nonblocking, + nullptr); + assert_zero(r); // sleep 3 seconds - usleep(3*1024*1024); + usleep(3 * 1024 * 1024); r = toku_test_cachetable_unpin_and_remove(f1, make_blocknum(1), test_remove_key, NULL); assert_zero(r); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5097.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5097.cc index c1b6e0a94f2..5ab0df88e08 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5097.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5097.cc @@ -153,13 +153,16 @@ cachetable_test (void) { assert(r == 0); } - // at this point, we have a dirty PAIR in the cachetable associated with cachefile f1 - // launch a thread that will put another PAIR in the cachetable, and get partial eviction started + // at this point, we have a dirty PAIR in the cachetable associated with + // cachefile f1 + // launch a thread that will put another PAIR in the cachetable, and get + // partial eviction started toku_pthread_t tid; - r = toku_pthread_create(&tid, NULL, f2_pin, NULL); + r = toku_pthread_create( + toku_uninstrumented, &tid, nullptr, f2_pin, nullptr); assert_zero(r); - usleep(2*1024*1024); + usleep(2 * 1024 * 1024); check_flush = true; toku_cachefile_close(&f1, false, ZERO_LSN); if (enable_partial_eviction) diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978-2.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978-2.cc index b462d76eeee..0b5110ddd99 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978-2.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978-2.cc @@ -114,14 +114,14 @@ unpin_two (void* UU(v)) { ); assert_zero(r); - // at this point, we have p1 pinned, want to start a thread to do an unpin_and_remove - // on p1 - r = toku_pthread_create( - &unpin_and_remove_tid, - NULL, - unpin_and_remove_one, - NULL - ); + // at this point, we have p1 pinned, want to start a thread to do an + // unpin_and_remove + // on p1 + r = toku_pthread_create(toku_uninstrumented, + &unpin_and_remove_tid, + nullptr, + unpin_and_remove_one, + nullptr); assert_zero(r); // sleep to give a chance for the unpin_and_remove to get going usleep(512*1024); @@ -173,9 +173,9 @@ cachetable_test (void) { r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v1, &s1, wc, fetch_two, def_pf_req_callback, def_pf_callback, true, NULL); assert_zero(r); - toku_pthread_t tid1; - r = toku_pthread_create(&tid1, NULL, repin_one, NULL); + r = toku_pthread_create( + toku_uninstrumented, &tid1, nullptr, repin_one, nullptr); assert_zero(r); void *ret; diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc index ee68ab3ef0b..a4ff6c33e6a 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc @@ -199,9 +199,11 @@ cachetable_test (void) { toku_pthread_t tid1; toku_pthread_t tid2; - r = toku_pthread_create(&tid1, NULL, repin_one, NULL); + r = toku_pthread_create( + toku_uninstrumented, &tid1, nullptr, repin_one, nullptr); assert_zero(r); - r = toku_pthread_create(&tid2, NULL, repin_two, NULL); + r = toku_pthread_create( + toku_uninstrumented, &tid2, nullptr, repin_two, nullptr); assert_zero(r); // unpin 1 and 2 so tid1 and tid2 can make progress diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-checkpoint-pending.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-checkpoint-pending.cc index 3dd3a15e2de..5e87fed740d 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-checkpoint-pending.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-checkpoint-pending.cc @@ -158,14 +158,24 @@ static void checkpoint_pending(void) { // the checkpoint should cause n writes, but since n <= the cachetable size, // all items should be kept in the cachetable - n_flush = n_write_me = n_keep_me = n_fetch = 0; expect_value = 42; - //printf("E42\n"); + n_flush = n_write_me = n_keep_me = n_fetch = 0; + expect_value = 42; + // printf("E42\n"); toku_pthread_t checkpoint_thread, update_thread; - r = toku_pthread_create(&checkpoint_thread, NULL, do_checkpoint, NULL); assert(r==0); - r = toku_pthread_create(&update_thread, NULL, do_update, NULL); assert(r==0); - r = toku_pthread_join(checkpoint_thread, 0); assert(r==0); - r = toku_pthread_join(update_thread, 0); assert(r==0); - + r = toku_pthread_create(toku_uninstrumented, + &checkpoint_thread, + nullptr, + do_checkpoint, + nullptr); + assert(r == 0); + r = toku_pthread_create( + toku_uninstrumented, &update_thread, nullptr, do_update, nullptr); + assert(r == 0); + r = toku_pthread_join(checkpoint_thread, 0); + assert(r == 0); + r = toku_pthread_join(update_thread, 0); + assert(r == 0); + assert(n_flush == N && n_write_me == N && n_keep_me == N); // after the checkpoint, all of the items should be 43 diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc index c1e7b373e83..dd6c674af24 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc @@ -91,7 +91,7 @@ run_test (void) { const int test_limit = 1000; int r; CACHETABLE ct; - toku_mutex_init(&attr_mutex, NULL); + toku_mutex_init(toku_uninstrumented, &attr_mutex, nullptr); toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-clone-checkpoint.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-clone-checkpoint.cc index 50bd20f492e..99d595b1ff1 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-clone-checkpoint.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-clone-checkpoint.cc @@ -112,14 +112,17 @@ cachetable_test (void) { CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); toku_cachetable_begin_checkpoint(cp, NULL); - clone_flush_started = false; clone_flush_completed = false; toku_pthread_t checkpoint_tid; - r = toku_pthread_create(&checkpoint_tid, NULL, run_end_checkpoint, NULL); - assert_zero(r); + r = toku_pthread_create(toku_uninstrumented, + &checkpoint_tid, + nullptr, + run_end_checkpoint, + nullptr); + assert_zero(r); - usleep(1*1024*1024); + usleep(1 * 1024 * 1024); r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, true, NULL); assert_zero(r); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-pin-checkpoint.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-pin-checkpoint.cc index 8a270af0566..65b02aebaec 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-pin-checkpoint.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-pin-checkpoint.cc @@ -386,19 +386,28 @@ cachetable_test (void) { run_test = true; for (int i = 0; i < NUM_MOVER_THREADS; i++) { - r = toku_pthread_create(&read_random_tid[i], NULL, read_random_numbers, NULL); + r = toku_pthread_create(toku_uninstrumented, + &read_random_tid[i], + nullptr, + read_random_numbers, + nullptr); assert_zero(r); } for (int i = 0; i < NUM_MOVER_THREADS; i++) { - r = toku_pthread_create(&move_tid[i], NULL, move_numbers, NULL); + r = toku_pthread_create(toku_uninstrumented, + &move_tid[i], + nullptr, + move_numbers, + nullptr); assert_zero(r); } - r = toku_pthread_create(&checkpoint_tid, NULL, checkpoints, NULL); - assert_zero(r); - r = toku_pthread_create(&time_tid, NULL, test_time, NULL); + r = toku_pthread_create( + toku_uninstrumented, &checkpoint_tid, nullptr, checkpoints, nullptr); + assert_zero(r); + r = toku_pthread_create( + toku_uninstrumented, &time_tid, nullptr, test_time, nullptr); assert_zero(r); - void *ret; r = toku_pthread_join(time_tid, &ret); assert_zero(r); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-put-checkpoint.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-put-checkpoint.cc index afc95471116..4cf1678449b 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-put-checkpoint.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-put-checkpoint.cc @@ -518,19 +518,28 @@ cachetable_test (void) { run_test = true; for (int i = 0; i < NUM_MOVER_THREADS; i++) { - r = toku_pthread_create(&move_tid[i], NULL, move_numbers, NULL); + r = toku_pthread_create(toku_uninstrumented, + &move_tid[i], + nullptr, + move_numbers, + nullptr); assert_zero(r); } for (int i = 0; i < NUM_MOVER_THREADS; i++) { - r = toku_pthread_create(&merge_and_split_tid[i], NULL, merge_and_split, NULL); + r = toku_pthread_create(toku_uninstrumented, + &merge_and_split_tid[i], + nullptr, + merge_and_split, + nullptr); assert_zero(r); } - r = toku_pthread_create(&checkpoint_tid, NULL, checkpoints, NULL); - assert_zero(r); - r = toku_pthread_create(&time_tid, NULL, test_time, NULL); + r = toku_pthread_create( + toku_uninstrumented, &checkpoint_tid, nullptr, checkpoints, nullptr); + assert_zero(r); + r = toku_pthread_create( + toku_uninstrumented, &time_tid, nullptr, test_time, nullptr); assert_zero(r); - void *ret; r = toku_pthread_join(time_tid, &ret); assert_zero(r); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-rwlock-test.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-rwlock-test.cc index 9a62f99e1fa..6d8bc28026c 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-rwlock-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-rwlock-test.cc @@ -40,24 +40,22 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // test create and destroy -static void -test_create_destroy (void) { - struct rwlock the_rwlock, *rwlock = &the_rwlock; +static void test_create_destroy(void) { + struct st_rwlock the_rwlock, *rwlock = &the_rwlock; - rwlock_init(rwlock); + rwlock_init(toku_uninstrumented, rwlock); rwlock_destroy(rwlock); } // test read lock and unlock with no writers -static void -test_simple_read_lock (int n) { - struct rwlock the_rwlock, *rwlock = &the_rwlock; +static void test_simple_read_lock(int n) { + struct st_rwlock the_rwlock, *rwlock = &the_rwlock; - rwlock_init(rwlock); + rwlock_init(toku_uninstrumented, rwlock); assert(rwlock_readers(rwlock) == 0); int i; - for (i=1; i<=n; i++) { + for (i = 1; i <= n; i++) { rwlock_read_lock(rwlock, 0); assert(rwlock_readers(rwlock) == i); assert(rwlock_users(rwlock) == i); @@ -72,11 +70,10 @@ test_simple_read_lock (int n) { // test write lock and unlock with no readers -static void -test_simple_write_lock (void) { - struct rwlock the_rwlock, *rwlock = &the_rwlock; +static void test_simple_write_lock(void) { + struct st_rwlock the_rwlock, *rwlock = &the_rwlock; - rwlock_init(rwlock); + rwlock_init(toku_uninstrumented, rwlock); assert(rwlock_users(rwlock) == 0); rwlock_write_lock(rwlock, 0); assert(rwlock_writers(rwlock) == 1); @@ -88,19 +85,17 @@ test_simple_write_lock (void) { struct rw_event { int e; - struct rwlock the_rwlock; + struct st_rwlock the_rwlock; toku_mutex_t mutex; }; -static void -rw_event_init (struct rw_event *rwe) { +static void rw_event_init(struct rw_event *rwe) { rwe->e = 0; - rwlock_init(&rwe->the_rwlock); - toku_mutex_init(&rwe->mutex, 0); + rwlock_init(toku_uninstrumented, &rwe->the_rwlock); + toku_mutex_init(toku_uninstrumented, &rwe->mutex, nullptr); } -static void -rw_event_destroy (struct rw_event *rwe) { +static void rw_event_destroy(struct rw_event *rwe) { rwlock_destroy(&rwe->the_rwlock); toku_mutex_destroy(&rwe->mutex); } @@ -138,10 +133,12 @@ test_writer_priority (void) { toku_mutex_unlock(&rwe->mutex); toku_pthread_t tid; - r = toku_pthread_create(&tid, 0, test_writer_priority_thread, rwe); + r = toku_pthread_create( + toku_uninstrumented, &tid, 0, test_writer_priority_thread, rwe); sleep(1); toku_mutex_lock(&rwe->mutex); - rwe->e++; assert(rwe->e == 2); + rwe->e++; + assert(rwe->e == 2); toku_mutex_unlock(&rwe->mutex); sleep(1); @@ -196,10 +193,12 @@ test_single_writer (void) { toku_mutex_unlock(&rwe->mutex); toku_pthread_t tid; - r = toku_pthread_create(&tid, 0, test_single_writer_thread, rwe); + r = toku_pthread_create( + toku_uninstrumented, &tid, 0, test_single_writer_thread, rwe); sleep(1); toku_mutex_lock(&rwe->mutex); - rwe->e++; assert(rwe->e == 2); + rwe->e++; + assert(rwe->e == 2); assert(rwlock_writers(&rwe->the_rwlock) == 1); assert(rwlock_users(&rwe->the_rwlock) == 2); rwlock_write_unlock(&rwe->the_rwlock); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-read-pin-nonblocking.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-read-pin-nonblocking.cc index 8fd8828737a..ebe05e50883 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-read-pin-nonblocking.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-read-pin-nonblocking.cc @@ -111,9 +111,20 @@ run_test (void) { toku_pthread_t fetch_tid; fetch_called = false; - r = toku_pthread_create(&fetch_tid, NULL, run_expensive_fetch, NULL); + r = toku_pthread_create( + toku_uninstrumented, &fetch_tid, nullptr, run_expensive_fetch, nullptr); sleep(1); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, sleep_fetch, def_pf_req_callback, def_pf_callback, false, NULL); + r = toku_cachetable_get_and_pin(f1, + make_blocknum(1), + 1, + &v1, + &s1, + wc, + sleep_fetch, + def_pf_req_callback, + def_pf_callback, + false, + NULL); assert_zero(r); assert(fetch_called); r = toku_test_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); @@ -133,9 +144,20 @@ run_test (void) { toku_pthread_t pf_tid; pf_called = false; - r = toku_pthread_create(&pf_tid, NULL, run_expensive_pf, NULL); + r = toku_pthread_create( + toku_uninstrumented, &pf_tid, nullptr, run_expensive_pf, nullptr); sleep(1); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, sleep_fetch, def_pf_req_callback, def_pf_callback, false, NULL); + r = toku_cachetable_get_and_pin(f1, + make_blocknum(1), + 1, + &v1, + &s1, + wc, + sleep_fetch, + def_pf_req_callback, + def_pf_callback, + false, + NULL); assert_zero(r); assert(pf_called); r = toku_test_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-read-pin.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-read-pin.cc index 63ca871a459..dd5d59df002 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-read-pin.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-read-pin.cc @@ -117,9 +117,20 @@ run_test (void) { toku_pthread_t fetch_tid; fetch_called = false; - r = toku_pthread_create(&fetch_tid, NULL, run_expensive_fetch, NULL); + r = toku_pthread_create( + toku_uninstrumented, &fetch_tid, nullptr, run_expensive_fetch, nullptr); sleep(1); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, sleep_fetch, def_pf_req_callback, def_pf_callback, false, NULL); + r = toku_cachetable_get_and_pin(f1, + make_blocknum(1), + 1, + &v1, + &s1, + wc, + sleep_fetch, + def_pf_req_callback, + def_pf_callback, + false, + NULL); assert_zero(r); assert(fetch_called); r = toku_test_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); @@ -141,9 +152,20 @@ run_test (void) { toku_pthread_t pf_tid; pf_called = false; - r = toku_pthread_create(&pf_tid, NULL, run_expensive_pf, NULL); + r = toku_pthread_create( + toku_uninstrumented, &pf_tid, nullptr, run_expensive_pf, nullptr); sleep(1); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, sleep_fetch, def_pf_req_callback, def_pf_callback, false, NULL); + r = toku_cachetable_get_and_pin(f1, + make_blocknum(1), + 1, + &v1, + &s1, + wc, + sleep_fetch, + def_pf_req_callback, + def_pf_callback, + false, + NULL); assert_zero(r); assert(pf_called); r = toku_test_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-test.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-test.cc index e4bb5fb0aea..64f688c470d 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-test.cc @@ -42,15 +42,13 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // global data, especially between the test thread and the cachetable // writeback threads -toku_mutex_t test_mutex; +toku_mutex_t test_mutex; static inline void test_mutex_init(void) { - toku_mutex_init(&test_mutex, 0); + toku_mutex_init(toku_uninstrumented, &test_mutex, nullptr); } -static inline void test_mutex_destroy(void) { - toku_mutex_destroy(&test_mutex); -} +static inline void test_mutex_destroy(void) { toku_mutex_destroy(&test_mutex); } static inline void test_mutex_lock(void) { toku_mutex_lock(&test_mutex); diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-unpin-remove-and-checkpoint.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-unpin-remove-and-checkpoint.cc index 11baa36e51b..0e44bf10349 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-unpin-remove-and-checkpoint.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-unpin-remove-and-checkpoint.cc @@ -86,11 +86,22 @@ run_test (void) { // now this should mark the pair for checkpoint CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); toku_cachetable_begin_checkpoint(cp, NULL); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, true, NULL); + r = toku_cachetable_get_and_pin(f1, + make_blocknum(1), + toku_cachetable_hash(f1, make_blocknum(1)), + &v1, + &s1, + wc, + def_fetch, + def_pf_req_callback, + def_pf_callback, + true, + NULL); toku_pthread_t mytid; - r = toku_pthread_create(&mytid, NULL, run_end_chkpt, NULL); - assert(r==0); + r = toku_pthread_create( + toku_uninstrumented, &mytid, nullptr, run_end_chkpt, nullptr); + assert(r == 0); // give checkpoint thread a chance to start waiting on lock sleep(1); diff --git a/storage/tokudb/PerconaFT/ft/tests/ftloader-test-extractor-errors.cc b/storage/tokudb/PerconaFT/ft/tests/ftloader-test-extractor-errors.cc index 872b674c784..4bff52ceb1b 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ftloader-test-extractor-errors.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ftloader-test-extractor-errors.cc @@ -141,7 +141,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail, const char // setup error injection toku_set_func_malloc(my_malloc); toku_set_func_realloc(my_realloc); - ft_loader_set_os_fwrite(bad_fwrite); + toku_set_func_fwrite(bad_fwrite); toku_set_func_write(bad_write); toku_set_func_pwrite(bad_pwrite); ft_loader_set_poll_function(&loader->poll_callback, loader_poll_callback, NULL); @@ -157,7 +157,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail, const char toku_set_func_malloc(NULL); toku_set_func_realloc(NULL); - ft_loader_set_os_fwrite(NULL); + toku_set_func_fwrite(nullptr); toku_set_func_write(NULL); toku_set_func_pwrite(NULL); diff --git a/storage/tokudb/PerconaFT/ft/tests/ftloader-test-merge-files-dbufio.cc b/storage/tokudb/PerconaFT/ft/tests/ftloader-test-merge-files-dbufio.cc index d1aeff198ff..0b121316738 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ftloader-test-merge-files-dbufio.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ftloader-test-merge-files-dbufio.cc @@ -393,13 +393,17 @@ static void test (const char *directory, bool is_error) { toku_pthread_t consumer; struct consumer_thunk cthunk = {q, 0}; { - int r = toku_pthread_create(&consumer, NULL, consumer_thread, (void*)&cthunk); - assert(r==0); + int r = toku_pthread_create(toku_uninstrumented, + &consumer, + nullptr, + consumer_thread, + static_cast<void *>(&cthunk)); + assert(r == 0); } toku_set_func_malloc_only(my_malloc); toku_set_func_realloc_only(my_realloc); - ft_loader_set_os_fwrite(bad_fwrite); + toku_set_func_fwrite(bad_fwrite); toku_set_func_write(bad_write); toku_set_func_pwrite(bad_pwrite); toku_set_func_fdopen(bad_fdopen); @@ -427,7 +431,7 @@ static void test (const char *directory, bool is_error) { toku_set_func_malloc(NULL); toku_set_func_realloc(NULL); - ft_loader_set_os_fwrite(NULL); + toku_set_func_fwrite(nullptr); toku_set_func_write(NULL); toku_set_func_pwrite(NULL); toku_set_func_fdopen(NULL); diff --git a/storage/tokudb/PerconaFT/ft/tests/ftloader-test-writer-errors.cc b/storage/tokudb/PerconaFT/ft/tests/ftloader-test-writer-errors.cc index 0784ca87a26..e4423319518 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ftloader-test-writer-errors.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ftloader-test-writer-errors.cc @@ -154,7 +154,7 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec toku_set_func_malloc_only(my_malloc); toku_set_func_realloc_only(my_realloc); - ft_loader_set_os_fwrite(bad_fwrite); + toku_set_func_fwrite(bad_fwrite); toku_set_func_write(bad_write); toku_set_func_pwrite(bad_pwrite); ft_loader_set_error_function(&bl.error_callback, NULL, NULL); @@ -164,7 +164,7 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec toku_set_func_malloc_only(NULL); toku_set_func_realloc_only(NULL); - ft_loader_set_os_fwrite(NULL); + toku_set_func_fwrite(nullptr); toku_set_func_write(NULL); toku_set_func_pwrite(NULL); diff --git a/storage/tokudb/PerconaFT/ft/tests/log-test4.cc b/storage/tokudb/PerconaFT/ft/tests/log-test4.cc index a31181deb30..e0bbedb95bf 100644 --- a/storage/tokudb/PerconaFT/ft/tests/log-test4.cc +++ b/storage/tokudb/PerconaFT/ft/tests/log-test4.cc @@ -63,11 +63,14 @@ test_main (int argc __attribute__((__unused__)), r = toku_logger_close(&logger); assert(r == 0); { - toku_struct_stat statbuf; - sprintf(logname, "%s/log000000000000.tokulog%d", TOKU_TEST_FILENAME, TOKU_LOG_VERSION); - r = toku_stat(logname, &statbuf); - assert(r==0); - assert(statbuf.st_size==12+5); + toku_struct_stat statbuf; + sprintf(logname, + "%s/log000000000000.tokulog%d", + TOKU_TEST_FILENAME, + TOKU_LOG_VERSION); + r = toku_stat(logname, &statbuf, toku_uninstrumented); + assert(r == 0); + assert(statbuf.st_size == 12 + 5); } toku_os_recursive_delete(TOKU_TEST_FILENAME); return 0; diff --git a/storage/tokudb/PerconaFT/ft/tests/log-test5.cc b/storage/tokudb/PerconaFT/ft/tests/log-test5.cc index d4e31af22dc..fed9467a4ae 100644 --- a/storage/tokudb/PerconaFT/ft/tests/log-test5.cc +++ b/storage/tokudb/PerconaFT/ft/tests/log-test5.cc @@ -81,14 +81,14 @@ test_main (int argc __attribute__((__unused__)), struct dirent *dirent; while ((dirent=readdir(dir))) { if (strncmp(dirent->d_name, "log", 3)!=0) continue; - char fname[TOKU_PATH_MAX+1]; - toku_path_join(fname, 2, TOKU_TEST_FILENAME, dirent->d_name); - toku_struct_stat statbuf; - r = toku_stat(fname, &statbuf); - assert(r==0); - assert(statbuf.st_size<=LSIZE+10); - } - r = closedir(dir); + char fname[TOKU_PATH_MAX + 1]; + toku_path_join(fname, 2, TOKU_TEST_FILENAME, dirent->d_name); + toku_struct_stat statbuf; + r = toku_stat(fname, &statbuf, toku_uninstrumented); + assert(r == 0); + assert(statbuf.st_size <= LSIZE + 10); + } + r = closedir(dir); assert(r==0); } toku_os_recursive_delete(TOKU_TEST_FILENAME); diff --git a/storage/tokudb/PerconaFT/ft/tests/log-test6.cc b/storage/tokudb/PerconaFT/ft/tests/log-test6.cc index 4d17488c57c..0e8b94566df 100644 --- a/storage/tokudb/PerconaFT/ft/tests/log-test6.cc +++ b/storage/tokudb/PerconaFT/ft/tests/log-test6.cc @@ -86,11 +86,14 @@ test_main (int argc __attribute__((__unused__)), { char logname[PATH_MAX]; - toku_struct_stat statbuf; - sprintf(logname, "%s/log000000000000.tokulog%d", TOKU_TEST_FILENAME, TOKU_LOG_VERSION); - r = toku_stat(logname, &statbuf); - assert(r==0); - assert(statbuf.st_size<=LSIZE); + toku_struct_stat statbuf; + sprintf(logname, + "%s/log000000000000.tokulog%d", + TOKU_TEST_FILENAME, + TOKU_LOG_VERSION); + r = toku_stat(logname, &statbuf, toku_uninstrumented); + assert(r == 0); + assert(statbuf.st_size <= LSIZE); } toku_os_recursive_delete(TOKU_TEST_FILENAME); return 0; diff --git a/storage/tokudb/PerconaFT/ft/tests/recovery-bad-last-entry.cc b/storage/tokudb/PerconaFT/ft/tests/recovery-bad-last-entry.cc index 431ee4b5e50..a3c934d1f4b 100644 --- a/storage/tokudb/PerconaFT/ft/tests/recovery-bad-last-entry.cc +++ b/storage/tokudb/PerconaFT/ft/tests/recovery-bad-last-entry.cc @@ -83,11 +83,16 @@ run_test(void) { r = toku_dup2(devnul, fileno(stderr)); assert(r==fileno(stderr)); r = close(devnul); assert(r==0); - char fname[TOKU_PATH_MAX+1]; - sprintf(fname, "%s/%s%d", TOKU_TEST_FILENAME, "log000000000000.tokulog", TOKU_LOG_VERSION); - - r = toku_stat(fname, &st); assert(r==0); - if ( st.st_size - trim > magic_begin_end_checkpoint_sz ) { + char fname[TOKU_PATH_MAX + 1]; + sprintf(fname, + "%s/%s%d", + TOKU_TEST_FILENAME, + "log000000000000.tokulog", + TOKU_LOG_VERSION); + + r = toku_stat(fname, &st, toku_uninstrumented); + assert(r == 0); + if (st.st_size - trim > magic_begin_end_checkpoint_sz) { r = truncate(fname, st.st_size - trim); CKERR(r); } diff --git a/storage/tokudb/PerconaFT/ft/tests/test-bjm.cc b/storage/tokudb/PerconaFT/ft/tests/test-bjm.cc index 97e00c42b95..6afe5b9f7c4 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-bjm.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-bjm.cc @@ -71,12 +71,16 @@ static void bjm_test(void) { bjm_reset(bjm); r = bjm_add_background_job(bjm); - assert_zero(r); - toku_pthread_t tid; - r = toku_pthread_create(&tid, NULL, finish_bjm, NULL); assert_zero(r); - usleep(2*1024*1024); - // should return non-zero because tid is waiting + toku_pthread_t tid; + r = toku_pthread_create(toku_uninstrumented, + &tid, + nullptr, + finish_bjm, + nullptr); + assert_zero(r); + usleep(2 * 1024 * 1024); + // should return non-zero because tid is waiting // for background jobs to finish r = bjm_add_background_job(bjm); assert(r != 0); diff --git a/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-flush.cc b/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-flush.cc index 3e5d9bba817..06a26614885 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-flush.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-flush.cc @@ -113,10 +113,14 @@ static void flusher_callback(int state, void* extra) { if ((state == flt_flush_before_child_pin && !after_child_pin) || (state == ft_flush_aflter_child_pin && after_child_pin)) { checkpoint_called = true; - int r = toku_pthread_create(&checkpoint_tid, NULL, do_checkpoint, NULL); + int r = toku_pthread_create(toku_uninstrumented, + &checkpoint_tid, + nullptr, + do_checkpoint, + nullptr); assert_zero(r); while (!checkpoint_callback_called) { - usleep(1*1024*1024); + usleep(1 * 1024 * 1024); } } } diff --git a/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-merge.cc b/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-merge.cc index cb316127ef7..1029dfef320 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-merge.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-merge.cc @@ -103,10 +103,14 @@ static void flusher_callback(int state, void* extra) { } if (state == desired_state) { checkpoint_called = true; - int r = toku_pthread_create(&checkpoint_tid, NULL, do_checkpoint, NULL); + int r = toku_pthread_create(toku_uninstrumented, + &checkpoint_tid, + nullptr, + do_checkpoint, + nullptr); assert_zero(r); while (!checkpoint_callback_called) { - usleep(1*1024*1024); + usleep(1 * 1024 * 1024); } } } diff --git a/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-rebalance.cc b/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-rebalance.cc index 5f8485ac4ec..208ebe3ca31 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-rebalance.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-rebalance.cc @@ -103,10 +103,14 @@ static void flusher_callback(int state, void* extra) { } if (state == desired_state) { checkpoint_called = true; - int r = toku_pthread_create(&checkpoint_tid, NULL, do_checkpoint, NULL); + int r = toku_pthread_create(toku_uninstrumented, + &checkpoint_tid, + nullptr, + do_checkpoint, + nullptr); assert_zero(r); while (!checkpoint_callback_called) { - usleep(1*1024*1024); + usleep(1 * 1024 * 1024); } } } diff --git a/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-split.cc b/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-split.cc index 70c3ba22a0c..2b29de409b1 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-split.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-checkpoint-during-split.cc @@ -113,10 +113,14 @@ static void flusher_callback(int state, void* extra) { if ((state == flt_flush_before_split && !after_split) || (state == flt_flush_during_split && after_split)) { checkpoint_called = true; - int r = toku_pthread_create(&checkpoint_tid, NULL, do_checkpoint, NULL); + int r = toku_pthread_create(toku_uninstrumented, + &checkpoint_tid, + nullptr, + do_checkpoint, + nullptr); assert_zero(r); while (!checkpoint_callback_called) { - usleep(1*1024*1024); + usleep(1 * 1024 * 1024); } } } diff --git a/storage/tokudb/PerconaFT/ft/tests/test3681.cc b/storage/tokudb/PerconaFT/ft/tests/test3681.cc index e5e24a88246..9e4a46e8dfe 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test3681.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test3681.cc @@ -91,13 +91,35 @@ static void *startb (void *n) { return NULL; } -static void test3681 (void) { +static void test3681(void) { setup(); - toku_pthread_t a,b; - { int r; r = toku_pthread_create(&a, NULL, starta, NULL); assert(r==0); } - { int r; r = toku_pthread_create(&b, NULL, startb, NULL); assert(r==0); } - { int r; void *v; r = toku_pthread_join(a, &v); assert(r==0); assert(v==NULL); } - { int r; void *v; r = toku_pthread_join(b, &v); assert(r==0); assert(v==NULL); } + toku_pthread_t a, b; + { + int r; + r = toku_pthread_create( + toku_uninstrumented, &a, nullptr, starta, nullptr); + assert(r == 0); + } + { + int r; + r = toku_pthread_create( + toku_uninstrumented, &b, nullptr, startb, nullptr); + assert(r == 0); + } + { + int r; + void *v; + r = toku_pthread_join(a, &v); + assert(r == 0); + assert(v == NULL); + } + { + int r; + void *v; + r = toku_pthread_join(b, &v); + assert(r == 0); + assert(v == NULL); + } finish(); } diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc index 97afd2f5bdb..7228de06f34 100644 --- a/storage/tokudb/PerconaFT/ft/txn/roll.cc +++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc @@ -195,14 +195,14 @@ int toku_rollback_frename(BYTESTRING old_iname, toku_cachetable_get_fname_in_cwd(cachetable, new_iname.data), &toku_free); - if (toku_stat(old_iname_full.get(), &stat) == -1) { + if (toku_stat(old_iname_full.get(), &stat, toku_uninstrumented) == -1) { if (ENOENT == errno) old_exist = false; else return 1; } - if (toku_stat(new_iname_full.get(), &stat) == -1) { + if (toku_stat(new_iname_full.get(), &stat, toku_uninstrumented) == -1) { if (ENOENT == errno || ENAMETOOLONG == errno) new_exist = false; else @@ -220,7 +220,7 @@ int toku_rollback_frename(BYTESTRING old_iname, // removed // and the new file is renamed. if (old_exist && new_exist && - (toku_os_unlink(old_iname_full.get()) == -1 || + (toku_os_delete(old_iname_full.get()) == -1 || toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || toku_fsync_directory(new_iname_full.get()) == -1 || toku_fsync_directory(old_iname_full.get()) == -1)) diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback_log_node_cache.cc b/storage/tokudb/PerconaFT/ft/txn/rollback_log_node_cache.cc index 71b1e79277c..5e1ab746936 100644 --- a/storage/tokudb/PerconaFT/ft/txn/rollback_log_node_cache.cc +++ b/storage/tokudb/PerconaFT/ft/txn/rollback_log_node_cache.cc @@ -41,7 +41,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "txn/rollback_log_node_cache.h" -void rollback_log_node_cache::init (uint32_t max_num_avail_nodes) { +toku_instr_key* rollback_log_node_cache_mutex_key; + +void rollback_log_node_cache::init(uint32_t max_num_avail_nodes) { XMALLOC_N(max_num_avail_nodes, m_avail_blocknums); m_max_num_avail = max_num_avail_nodes; m_first = 0; @@ -49,7 +51,7 @@ void rollback_log_node_cache::init (uint32_t max_num_avail_nodes) { toku_pthread_mutexattr_t attr; toku_mutexattr_init(&attr); toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); - toku_mutex_init(&m_mutex, &attr); + toku_mutex_init(*rollback_log_node_cache_mutex_key, &m_mutex, &attr); toku_mutexattr_destroy(&attr); } diff --git a/storage/tokudb/PerconaFT/ft/txn/txn.cc b/storage/tokudb/PerconaFT/ft/txn/txn.cc index a3ce6beb7b0..7327cbd9d24 100644 --- a/storage/tokudb/PerconaFT/ft/txn/txn.cc +++ b/storage/tokudb/PerconaFT/ft/txn/txn.cc @@ -45,8 +45,11 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/txn/txn_manager.h" #include "util/status.h" -void -toku_txn_get_status(TXN_STATUS s) { +toku_instr_key *txn_lock_mutex_key; +toku_instr_key *txn_state_lock_mutex_key; +toku_instr_key *result_state_cond_key; + +void toku_txn_get_status(TXN_STATUS s) { txn_status.init(); *s = txn_status; } @@ -225,74 +228,74 @@ static void toku_txn_create_txn ( static txn_child_manager tcm; - struct tokutxn new_txn = { - .txnid = {.parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }, - .snapshot_txnid64 = TXNID_NONE, - .snapshot_type = for_recovery ? TXN_SNAPSHOT_NONE : snapshot_type, - .for_recovery = for_recovery, - .logger = logger, - .parent = parent_tokutxn, - .child = NULL, - .child_manager_s = tcm, - .child_manager = NULL, - .container_db_txn = container_db_txn, - .live_root_txn_list = nullptr, - .xids = NULL, - .snapshot_next = NULL, - .snapshot_prev = NULL, - .begin_was_logged = false, - .declared_read_only = read_only, - .do_fsync = false, - .force_fsync_on_commit = false, - .do_fsync_lsn = ZERO_LSN, - .xa_xid = {0, 0, 0, ""}, - .progress_poll_fun = NULL, - .progress_poll_fun_extra = NULL, - - // You cannot initialize txn_lock a TOKU_MUTEX_INITIALIZER, because we - // will initialize it in the code below, and it cannot already - // be initialized at that point. Also, in general, you don't - // get to use PTHREAD_MUTEX_INITALIZER (which is what is inside - // TOKU_MUTEX_INITIALIZER) except in static variables, and this - // is initializing an auto variable. - // - // And we cannot simply avoid initializing these fields - // because, although it avoids -Wmissing-field-initializer - // errors under gcc, it gets other errors about non-trivial - // designated initializers not being supported. - - .txn_lock = ZERO_MUTEX_INITIALIZER, // Not TOKU_MUTEX_INITIALIZER - .open_fts = open_fts, - .roll_info = roll_info, - .state_lock = ZERO_MUTEX_INITIALIZER, // Not TOKU_MUTEX_INITIALIZER - .state_cond = ZERO_COND_INITIALIZER, // Not TOKU_COND_INITIALIZER - .state = TOKUTXN_LIVE, - .num_pin = 0, - .client_id = 0, - .client_extra = nullptr, - .start_time = time(NULL), - }; +struct tokutxn new_txn = { + .txnid = {.parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }, + .snapshot_txnid64 = TXNID_NONE, + .snapshot_type = for_recovery ? TXN_SNAPSHOT_NONE : snapshot_type, + .for_recovery = for_recovery, + .logger = logger, + .parent = parent_tokutxn, + .child = NULL, + .child_manager_s = tcm, + .child_manager = NULL, + .container_db_txn = container_db_txn, + .live_root_txn_list = nullptr, + .xids = NULL, + .snapshot_next = NULL, + .snapshot_prev = NULL, + .begin_was_logged = false, + .declared_read_only = read_only, + .do_fsync = false, + .force_fsync_on_commit = false, + .do_fsync_lsn = ZERO_LSN, + .xa_xid = {0, 0, 0, ""}, + .progress_poll_fun = NULL, + .progress_poll_fun_extra = NULL, + + // You cannot initialize txn_lock a TOKU_MUTEX_INITIALIZER, because we + // will initialize it in the code below, and it cannot already + // be initialized at that point. Also, in general, you don't + // get to use PTHREAD_MUTEX_INITALIZER (which is what is inside + // TOKU_MUTEX_INITIALIZER) except in static variables, and this + // is initializing an auto variable. + // + // And we cannot simply avoid initializing these fields + // because, although it avoids -Wmissing-field-initializer + // errors under gcc, it gets other errors about non-trivial + // designated initializers not being supported. + + .txn_lock = ZERO_MUTEX_INITIALIZER, // Not TOKU_MUTEX_INITIALIZER + .open_fts = open_fts, + .roll_info = roll_info, + .state_lock = ZERO_MUTEX_INITIALIZER, // Not TOKU_MUTEX_INITIALIZER + .state_cond = ZERO_COND_INITIALIZER, // Not TOKU_COND_INITIALIZER + .state = TOKUTXN_LIVE, + .num_pin = 0, + .client_id = 0, + .client_extra = nullptr, + .start_time = time(NULL), +}; - TOKUTXN result = NULL; - XMEMDUP(result, &new_txn); - invalidate_xa_xid(&result->xa_xid); - if (parent_tokutxn == NULL) { - result->child_manager = &result->child_manager_s; - result->child_manager->init(result); +TOKUTXN result = NULL; +XMEMDUP(result, &new_txn); +invalidate_xa_xid(&result->xa_xid); +if (parent_tokutxn == NULL) { + result->child_manager = &result->child_manager_s; + result->child_manager->init(result); } else { result->child_manager = parent_tokutxn->child_manager; } - toku_mutex_init(&result->txn_lock, nullptr); + toku_mutex_init(*txn_lock_mutex_key, &result->txn_lock, nullptr); toku_pthread_mutexattr_t attr; toku_mutexattr_init(&attr); toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); - toku_mutex_init(&result->state_lock, &attr); + toku_mutex_init(*txn_state_lock_mutex_key, &result->state_lock, &attr); toku_mutexattr_destroy(&attr); - toku_cond_init(&result->state_cond, nullptr); + toku_cond_init(*result_state_cond_key, &result->state_cond, nullptr); *tokutxn = result; diff --git a/storage/tokudb/PerconaFT/ft/txn/txn_child_manager.cc b/storage/tokudb/PerconaFT/ft/txn/txn_child_manager.cc index db110127c55..99a21331b0a 100644 --- a/storage/tokudb/PerconaFT/ft/txn/txn_child_manager.cc +++ b/storage/tokudb/PerconaFT/ft/txn/txn_child_manager.cc @@ -39,6 +39,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/logger/log-internal.h" #include "ft/txn/txn_child_manager.h" +toku_instr_key *txn_child_manager_mutex_key; + // // initialized a txn_child_manager, // when called, root->txnid.parent_id64 may not yet be set @@ -53,7 +55,7 @@ void txn_child_manager::init(TOKUTXN root) { toku_pthread_mutexattr_t attr; toku_mutexattr_init(&attr); toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); - toku_mutex_init(&m_mutex, &attr); + toku_mutex_init(*txn_child_manager_mutex_key, &m_mutex, &attr); toku_mutexattr_destroy(&attr); } diff --git a/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc b/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc index 8fe52b10597..1b55844bc7d 100644 --- a/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc +++ b/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc @@ -56,7 +56,10 @@ void set_test_txn_sync_callback(void (*cb) (pthread_t, void *), void *extra) { } bool garbage_collection_debug = false; -static bool txn_records_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, struct tokutxn *parent) { +toku_instr_key *txn_manager_lock_mutex_key; + +static bool txn_records_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, + struct tokutxn *parent) { if (snapshot_type == TXN_COPIES_SNAPSHOT) { return false; } @@ -248,9 +251,10 @@ verify_snapshot_system(TXN_MANAGER txn_manager UU()) { live_root_txns_omt.destroy(); } -void toku_txn_manager_init(TXN_MANAGER* txn_managerp) { +void toku_txn_manager_init(TXN_MANAGER *txn_managerp) { TXN_MANAGER XCALLOC(txn_manager); - toku_mutex_init(&txn_manager->txn_manager_lock, NULL); + toku_mutex_init( + *txn_manager_lock_mutex_key, &txn_manager->txn_manager_lock, nullptr); txn_manager->live_root_txns.create(); txn_manager->live_root_ids.create(); txn_manager->snapshot_head = NULL; diff --git a/storage/tokudb/PerconaFT/locktree/lock_request.cc b/storage/tokudb/PerconaFT/locktree/lock_request.cc index 0151b5bd466..c0829e3f4e1 100644 --- a/storage/tokudb/PerconaFT/locktree/lock_request.cc +++ b/storage/tokudb/PerconaFT/locktree/lock_request.cc @@ -62,7 +62,7 @@ void lock_request::create(void) { m_state = state::UNINITIALIZED; m_info = nullptr; - toku_cond_init(&m_wait_cond, nullptr); + toku_cond_init(*lock_request_m_wait_cond_key, &m_wait_cond, nullptr); m_start_test_callback = nullptr; m_start_before_pending_test_callback = nullptr; diff --git a/storage/tokudb/PerconaFT/locktree/locktree.cc b/storage/tokudb/PerconaFT/locktree/locktree.cc index 2fbf078bdd6..069aae26f66 100644 --- a/storage/tokudb/PerconaFT/locktree/locktree.cc +++ b/storage/tokudb/PerconaFT/locktree/locktree.cc @@ -51,7 +51,6 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // the locktree source file instead of the header. #include "concurrent_tree.h" - namespace toku { // A locktree represents the set of row locks owned by all transactions @@ -87,12 +86,13 @@ void lt_lock_request_info::init(void) { pending_lock_requests.create(); pending_is_empty = true; ZERO_STRUCT(mutex); - toku_mutex_init(&mutex, nullptr); + toku_mutex_init(*locktree_request_info_mutex_key, &mutex, nullptr); retry_want = retry_done = 0; ZERO_STRUCT(counters); ZERO_STRUCT(retry_mutex); - toku_mutex_init(&retry_mutex, nullptr); - toku_cond_init(&retry_cv, nullptr); + toku_mutex_init( + *locktree_request_info_retry_mutex_key, &retry_mutex, nullptr); + toku_cond_init(*locktree_request_info_retry_cv_key, &retry_cv, nullptr); running_retry = false; TOKU_VALGRIND_HG_DISABLE_CHECKING(&pending_is_empty, diff --git a/storage/tokudb/PerconaFT/locktree/manager.cc b/storage/tokudb/PerconaFT/locktree/manager.cc index 91ff7c5a007..6bb5c77bf32 100644 --- a/storage/tokudb/PerconaFT/locktree/manager.cc +++ b/storage/tokudb/PerconaFT/locktree/manager.cc @@ -56,9 +56,8 @@ void locktree_manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, m_lt_destroy_callback = destroy_cb; m_lt_escalate_callback = escalate_cb; m_lt_escalate_callback_extra = escalate_extra; - ZERO_STRUCT(m_mutex); - toku_mutex_init(&m_mutex, nullptr); + toku_mutex_init(*manager_mutex_key, &m_mutex, nullptr); ZERO_STRUCT(m_lt_counters); @@ -346,7 +345,8 @@ int locktree_manager::check_current_lock_constraints(bool big_txn) { void locktree_manager::escalator_init(void) { ZERO_STRUCT(m_escalation_mutex); - toku_mutex_init(&m_escalation_mutex, nullptr); + toku_mutex_init( + *manager_escalation_mutex_key, &m_escalation_mutex, nullptr); m_escalation_count = 0; m_escalation_time = 0; m_wait_escalation_count = 0; @@ -403,8 +403,8 @@ struct escalate_args { void locktree_manager::locktree_escalator::create(void) { ZERO_STRUCT(m_escalator_mutex); - toku_mutex_init(&m_escalator_mutex, nullptr); - toku_cond_init(&m_escalator_done, nullptr); + toku_mutex_init(*manager_escalator_mutex_key, &m_escalator_mutex, nullptr); + toku_cond_init(*manager_m_escalator_done_key, &m_escalator_done, nullptr); m_escalator_running = false; } diff --git a/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_1big7lt_1small.cc b/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_1big7lt_1small.cc index 6c143f963ba..32029b5bddd 100644 --- a/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_1big7lt_1small.cc +++ b/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_1big7lt_1small.cc @@ -201,12 +201,14 @@ int main(int argc, const char *argv[]) { locktree *small_lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); // create the worker threads - struct big_arg big_arg = { &mgr, big_lt, n_big, 1000 }; - r = toku_pthread_create(&big_id, nullptr, big_f, &big_arg); + struct big_arg big_arg = {&mgr, big_lt, n_big, 1000}; + r = toku_pthread_create( + toku_uninstrumented, &big_id, nullptr, big_f, &big_arg); assert(r == 0); - struct small_arg small_arg = { &mgr, small_lt, 2000, 0 }; - r = toku_pthread_create(&small_id, nullptr, small_f, &small_arg); + struct small_arg small_arg = {&mgr, small_lt, 2000, 0}; + r = toku_pthread_create( + toku_uninstrumented, &small_id, nullptr, small_f, &small_arg); assert(r == 0); // wait for some escalations to occur diff --git a/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_2big_1lt.cc b/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_2big_1lt.cc index 17eb07060d4..ff59a7bdd5c 100644 --- a/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_2big_1lt.cc +++ b/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_2big_1lt.cc @@ -169,8 +169,10 @@ int main(int argc, const char *argv[]) { struct arg big_arg[n_big]; pthread_t big_ids[n_big]; for (int i = 0; i < n_big; i++) { - big_arg[i] = { &mgr, lt[i % n_lt], (TXNID)(1000+i), i == 0 ? 1 : -1000000000 }; - r = toku_pthread_create(&big_ids[i], nullptr, big_f, &big_arg[i]); + big_arg[i] = { + &mgr, lt[i % n_lt], (TXNID)(1000 + i), i == 0 ? 1 : -1000000000}; + r = toku_pthread_create( + toku_uninstrumented, &big_ids[i], nullptr, big_f, &big_arg[i]); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_2big_2lt.cc b/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_2big_2lt.cc index 4326e8fafc2..be1ddaba9d0 100644 --- a/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_2big_2lt.cc +++ b/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_2big_2lt.cc @@ -169,8 +169,10 @@ int main(int argc, const char *argv[]) { struct arg big_arg[n_big]; pthread_t big_ids[n_big]; for (int i = 0; i < n_big; i++) { - big_arg[i] = { &mgr, lt[i % n_lt], (TXNID)(1000+i), i == 0 ? 1 : -1000000000 }; - r = toku_pthread_create(&big_ids[i], nullptr, big_f, &big_arg[i]); + big_arg[i] = { + &mgr, lt[i % n_lt], (TXNID)(1000 + i), i == 0 ? 1 : -1000000000}; + r = toku_pthread_create( + toku_uninstrumented, &big_ids[i], nullptr, big_f, &big_arg[i]); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_stalls.cc b/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_stalls.cc index d146d94e337..9dc9596a7ed 100644 --- a/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_stalls.cc +++ b/storage/tokudb/PerconaFT/locktree/tests/locktree_escalation_stalls.cc @@ -182,9 +182,10 @@ int main(int argc, const char *argv[]) { locktree *lt_1 = mgr.get_lt(dict_id_1, dbt_comparator, nullptr); // create the worker threads - struct arg big_arg = { &mgr, lt_0, 1000 }; + struct arg big_arg = {&mgr, lt_0, 1000}; pthread_t big_id; - r = toku_pthread_create(&big_id, nullptr, big_f, &big_arg); + r = toku_pthread_create( + toku_uninstrumented, &big_id, nullptr, big_f, &big_arg); assert(r == 0); const int n_small = 7; @@ -192,8 +193,12 @@ int main(int argc, const char *argv[]) { struct arg small_args[n_small]; for (int i = 0; i < n_small; i++) { - small_args[i] = { &mgr, lt_1, (TXNID)(2000+i), i }; - r = toku_pthread_create(&small_ids[i], nullptr, small_f, &small_args[i]); + small_args[i] = {&mgr, lt_1, (TXNID)(2000 + i), i}; + r = toku_pthread_create(toku_uninstrumented, + &small_ids[i], + nullptr, + small_f, + &small_args[i]); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/locktree/tests/manager_parallel_locktree_get_release.cc b/storage/tokudb/PerconaFT/locktree/tests/manager_parallel_locktree_get_release.cc index ec9ef21583c..08ce63140a5 100644 --- a/storage/tokudb/PerconaFT/locktree/tests/manager_parallel_locktree_get_release.cc +++ b/storage/tokudb/PerconaFT/locktree/tests/manager_parallel_locktree_get_release.cc @@ -71,7 +71,8 @@ void manager_unit_test::test_reference_release_lt(void) { const int nthreads = 2; pthread_t ids[nthreads]; for (int i = 0; i < nthreads; i++) { - r = toku_pthread_create(&ids[i], nullptr, my_tester, &mgr); + r = toku_pthread_create( + toku_uninstrumented, &ids[i], nullptr, my_tester, &mgr); assert(r == 0); } for (int i = 0; i < nthreads; i++) { diff --git a/storage/tokudb/PerconaFT/locktree/treenode.cc b/storage/tokudb/PerconaFT/locktree/treenode.cc index 4f43291fe76..cc3a4969643 100644 --- a/storage/tokudb/PerconaFT/locktree/treenode.cc +++ b/storage/tokudb/PerconaFT/locktree/treenode.cc @@ -38,9 +38,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include <toku_race_tools.h> -void treenode::mutex_lock(void) { - toku_mutex_lock(&m_mutex); -} +// TODO: source location info might have to be pulled up one caller +// to be useful +void treenode::mutex_lock(void) { toku_mutex_lock(&m_mutex); } void treenode::mutex_unlock(void) { toku_mutex_unlock(&m_mutex); @@ -58,7 +58,7 @@ void treenode::init(const comparator *cmp) { toku_pthread_mutexattr_t attr; toku_mutexattr_init(&attr); toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); - toku_mutex_init(&m_mutex, &attr); + toku_mutex_init(*treenode_mutex_key, &m_mutex, &attr); toku_mutexattr_destroy(&attr); m_left_child.set(nullptr); m_right_child.set(nullptr); diff --git a/storage/tokudb/PerconaFT/portability/CMakeLists.txt b/storage/tokudb/PerconaFT/portability/CMakeLists.txt index 4793db63cc1..e5576a5d463 100644 --- a/storage/tokudb/PerconaFT/portability/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/portability/CMakeLists.txt @@ -8,6 +8,7 @@ set(tokuportability_srcs portability toku_assert toku_crash + toku_instr_mysql toku_path toku_pthread toku_time @@ -53,6 +54,10 @@ if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING) DESTINATION ${INSTALL_LIBDIR} COMPONENT tokukv_libs_shared ) +else () + set_property(SOURCE toku_pthread portability APPEND PROPERTY + COMPILE_DEFINITIONS MYSQL_TOKUDB_ENGINE=1 ) + target_link_libraries(${LIBTOKUPORTABILITY} LINK_PRIVATE mysys) endif () add_subdirectory(tests) diff --git a/storage/tokudb/PerconaFT/portability/file.cc b/storage/tokudb/PerconaFT/portability/file.cc index 0e3efc1a12a..485bfac8514 100644 --- a/storage/tokudb/PerconaFT/portability/file.cc +++ b/storage/tokudb/PerconaFT/portability/file.cc @@ -52,9 +52,12 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "toku_path.h" #include <portability/toku_atomic.h> +toku_instr_key *tokudb_file_data_key; + static int toku_assert_on_write_enospc = 0; static const int toku_write_enospc_sleep = 1; -static uint64_t toku_write_enospc_last_report; // timestamp of most recent report to error log +static uint64_t toku_write_enospc_last_report; // timestamp of most recent + // report to error log static time_t toku_write_enospc_last_time; // timestamp of most recent ENOSPC static uint32_t toku_write_enospc_current; // number of threads currently blocked on ENOSPC static uint64_t toku_write_enospc_total; // total number of times ENOSPC was returned from an attempt to write @@ -142,11 +145,17 @@ static ssize_t (*t_full_pwrite)(int, const void *, size_t, off_t); static FILE * (*t_fdopen)(int, const char *); static FILE * (*t_fopen)(const char *, const char *); static int (*t_open)(const char *, int, int); -static int (*t_fclose)(FILE *); +static int (*t_fclose)(FILE *); static ssize_t (*t_read)(int, void *, size_t); static ssize_t (*t_pread)(int, void *, size_t, off_t); +static size_t (*os_fwrite_fun)(const void *, size_t, size_t, FILE *) = nullptr; + +void toku_set_func_fwrite( + size_t (*fwrite_fun)(const void *, size_t, size_t, FILE *)) { + os_fwrite_fun = fwrite_fun; +} -void toku_set_func_write (ssize_t (*write_fun)(int, const void *, size_t)) { +void toku_set_func_write(ssize_t (*write_fun)(int, const void *, size_t)) { t_write = write_fun; } @@ -186,9 +195,63 @@ void toku_set_func_pread (ssize_t (*pread_fun)(int, void *, size_t, off_t)) { t_pread = pread_fun; } -void -toku_os_full_write (int fd, const void *buf, size_t len) { - const char *bp = (const char *) buf; +int toku_os_delete_with_source_location(const char *name, + const char *src_file, + uint src_line) { + + toku_io_instrumentation io_annotation; + toku_instr_file_name_close_begin(io_annotation, + *tokudb_file_data_key, + toku_instr_file_op::file_delete, + name, + src_file, + src_line); + const int result = unlink(name); + + /* Register the result value with the instrumentation system */ + toku_instr_file_close_end(io_annotation, result); + + return result; +} + +int toku_os_rename_with_source_location(const char *old_name, + const char *new_name, + const char *src_file, + uint src_line) { + int result; + + toku_io_instrumentation io_annotation; + toku_instr_file_name_io_begin(io_annotation, + *tokudb_file_data_key, + toku_instr_file_op::file_rename, + new_name, + 0, + src_file, + src_line); + + result = rename(old_name, new_name); + /* Regsiter the result value with the instrumentation system */ + toku_instr_file_io_end(io_annotation, 0); + + return result; +} + +void toku_os_full_write_with_source_location(int fd, + const void *buf, + size_t len, + const char *src_file, + uint src_line) { + const char *bp = (const char *)buf; + size_t bytes_written = len; + + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_write, + fd, + len, + src_file, + src_line); + while (len > 0) { ssize_t r; if (t_full_write) { @@ -205,14 +268,30 @@ toku_os_full_write (int fd, const void *buf, size_t len) { } } assert(len == 0); + + /* Register the result value with the instrumentaion system */ + toku_instr_file_io_end(io_annotation, bytes_written); } -int -toku_os_write (int fd, const void *buf, size_t len) { - const char *bp = (const char *) buf; +int toku_os_write_with_source_location(int fd, + const void *buf, + size_t len, + const char *src_file, + uint src_line) { + const char *bp = (const char *)buf; int result = 0; + ssize_t r; + + size_t bytes_written = len; + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_write, + fd, + len, + src_file, + src_line); + while (len > 0) { - ssize_t r; if (t_write) { r = t_write(fd, bp, len); } else { @@ -222,17 +301,33 @@ toku_os_write (int fd, const void *buf, size_t len) { result = errno; break; } - len -= r; - bp += r; + len -= r; + bp += r; } + /* Register the result value with the instrumentation system */ + toku_instr_file_io_end(io_annotation, bytes_written - len); + return result; } -void -toku_os_full_pwrite (int fd, const void *buf, size_t len, toku_off_t off) { - assert(0==((long long)buf)%512); - assert((len%512 == 0) && (off%512)==0); // to make pwrite work. - const char *bp = (const char *) buf; +void toku_os_full_pwrite_with_source_location(int fd, + const void *buf, + size_t len, + toku_off_t off, + const char *src_file, + uint src_line) { + assert(0 == ((long long)buf) % 512); + assert((len % 512 == 0) && (off % 512) == 0); // to make pwrite work. + const char *bp = (const char *)buf; + + size_t bytes_written = len; + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_write, + fd, + len, + src_file, + src_line); while (len > 0) { ssize_t r; if (t_full_pwrite) { @@ -250,71 +345,209 @@ toku_os_full_pwrite (int fd, const void *buf, size_t len, toku_off_t off) { } } assert(len == 0); -} -ssize_t -toku_os_pwrite (int fd, const void *buf, size_t len, toku_off_t off) { - assert(0==((long long)buf)%512); // these asserts are to ensure that direct I/O will work. - assert(0==len %512); - assert(0==off %512); - const char *bp = (const char *) buf; + /* Register the result value with the instrumentation system */ + toku_instr_file_io_end(io_annotation, bytes_written); +} + +ssize_t toku_os_pwrite_with_source_location(int fd, + const void *buf, + size_t len, + toku_off_t off, + const char *src_file, + uint src_line) { + assert(0 == + ((long long)buf) % + 512); // these asserts are to ensure that direct I/O will work. + assert(0 == len % 512); + assert(0 == off % 512); + const char *bp = (const char *)buf; ssize_t result = 0; + ssize_t r; + + size_t bytes_written = len; + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_write, + fd, + len, + src_file, + src_line); while (len > 0) { - ssize_t r; - if (t_pwrite) { - r = t_pwrite(fd, bp, len, off); - } else { - r = pwrite(fd, bp, len, off); - } + r = (t_pwrite) ? t_pwrite(fd, bp, len, off) : pwrite(fd, bp, len, off); + if (r < 0) { result = errno; break; } len -= r; - bp += r; - off += r; + bp += r; + off += r; + } + /* Register the result value with the instrumentation system */ + toku_instr_file_io_end(io_annotation, bytes_written - len); + + return result; +} + +int toku_os_fwrite_with_source_location(const void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream, + const char *src_file, + uint src_line) { + int result = 0; + size_t bytes_written; + + toku_io_instrumentation io_annotation; + toku_instr_file_stream_io_begin(io_annotation, + toku_instr_file_op::file_write, + *stream, + nmemb, + src_file, + src_line); + + if (os_fwrite_fun) { + bytes_written = os_fwrite_fun(ptr, size, nmemb, stream->file); + } else { + bytes_written = fwrite(ptr, size, nmemb, stream->file); + } + + if (bytes_written != nmemb) { + if (os_fwrite_fun) // if using hook to induce artificial errors (for + // testing) ... + result = get_maybe_error_errno(); // ... then there is no error in + // the stream, but there is one + // in errno + else + result = ferror(stream->file); + invariant(result != 0); // Should we assert here? + } + /* Register the result value with the instrumentation system */ + toku_instr_file_io_end(io_annotation, bytes_written); + + return result; +} + +int toku_os_fread_with_source_location(void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream, + const char *src_file, + uint src_line) { + int result = 0; + size_t bytes_read; + + toku_io_instrumentation io_annotation; + toku_instr_file_stream_io_begin(io_annotation, + toku_instr_file_op::file_read, + *stream, + nmemb, + src_file, + src_line); + + if ((bytes_read = fread(ptr, size, nmemb, stream->file)) != nmemb) { + if ((feof(stream->file))) + result = EOF; + else + result = ferror(stream->file); + invariant(result != 0); // Should we assert here? } + /* Register the result value with the instrumentation system */ + toku_instr_file_io_end(io_annotation, bytes_read); + return result; } -FILE * -toku_os_fdopen(int fildes, const char *mode) { - FILE * rval; - if (t_fdopen) - rval = t_fdopen(fildes, mode); - else - rval = fdopen(fildes, mode); +TOKU_FILE *toku_os_fdopen_with_source_location(int fildes, + const char *mode, + const char *filename, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line) { + TOKU_FILE *XMALLOC(rval); + if (FT_LIKELY(rval != nullptr)) { + toku_io_instrumentation io_annotation; + toku_instr_file_open_begin(io_annotation, + instr_key, + toku_instr_file_op::file_stream_open, + filename, + src_file, + src_line); + + rval->file = (t_fdopen) ? t_fdopen(fildes, mode) : fdopen(fildes, mode); + toku_instr_file_stream_open_end(io_annotation, *rval); + + if (FT_UNLIKELY(rval->file == nullptr)) { + toku_free(rval); + rval = nullptr; + } + } return rval; } - -FILE * -toku_os_fopen(const char *filename, const char *mode){ - FILE * rval; - if (t_fopen) - rval = t_fopen(filename, mode); - else - rval = fopen(filename, mode); +TOKU_FILE *toku_os_fopen_with_source_location(const char *filename, + const char *mode, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line) { + TOKU_FILE *XMALLOC(rval); + if (FT_UNLIKELY(rval == nullptr)) + return nullptr; + + toku_io_instrumentation io_annotation; + toku_instr_file_open_begin(io_annotation, + instr_key, + toku_instr_file_op::file_stream_open, + filename, + src_file, + src_line); + rval->file = t_fopen ? t_fopen(filename, mode) : fopen(filename, mode); + /* Register the returning "file" value with the system */ + toku_instr_file_stream_open_end(io_annotation, *rval); + + if (FT_UNLIKELY(rval->file == nullptr)) { + toku_free(rval); + rval = nullptr; + } return rval; } -int -toku_os_open(const char *path, int oflag, int mode) { - int rval; +int toku_os_open_with_source_location(const char *path, + int oflag, + int mode, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line) { + int fd; + toku_io_instrumentation io_annotation; + /* register a file open or creation depending on "oflag" */ + toku_instr_file_open_begin( + io_annotation, + instr_key, + ((oflag & O_CREAT) ? toku_instr_file_op::file_create + : toku_instr_file_op::file_open), + path, + src_file, + src_line); if (t_open) - rval = t_open(path, oflag, mode); + fd = t_open(path, oflag, mode); else - rval = open(path, oflag, mode); - return rval; + fd = open(path, oflag, mode); + + toku_instr_file_open_end(io_annotation, fd); + return fd; } -int -toku_os_open_direct(const char *path, int oflag, int mode) { +int toku_os_open_direct(const char *path, + int oflag, + int mode, + const toku_instr_key &instr_key) { int rval; #if defined(HAVE_O_DIRECT) - rval = toku_os_open(path, oflag | O_DIRECT, mode); + rval = toku_os_open(path, oflag | O_DIRECT, mode, instr_key); #elif defined(HAVE_F_NOCACHE) - rval = toku_os_open(path, oflag, mode); + rval = toku_os_open(path, oflag, mode, instr_key); if (rval >= 0) { int r = fcntl(rval, F_NOCACHE, 1); if (r == -1) { @@ -327,63 +560,112 @@ toku_os_open_direct(const char *path, int oflag, int mode) { return rval; } -int -toku_os_fclose(FILE * stream) { +int toku_os_fclose_with_source_location(TOKU_FILE *stream, + const char *src_file, + uint src_line) { int rval = -1; - if (t_fclose) - rval = t_fclose(stream); - else { // if EINTR, retry until success - while (rval != 0) { - rval = fclose(stream); - if (rval && (errno != EINTR)) - break; - } + if (FT_LIKELY(stream != nullptr)) { + /* register a file stream close " */ + toku_io_instrumentation io_annotation; + toku_instr_file_stream_close_begin( + io_annotation, + toku_instr_file_op::file_stream_close, + *stream, + src_file, + src_line); + + if (t_fclose) + rval = t_fclose(stream->file); + else { // if EINTR, retry until success + while (rval != 0) { + rval = fclose(stream->file); + if (rval && (errno != EINTR)) + break; + } + } + /* Register the returning "rval" value with the system */ + toku_instr_file_close_end(io_annotation, rval); + toku_free(stream); + stream = nullptr; } return rval; } -int -toku_os_close(int fd) { // if EINTR, retry until success +int toku_os_close_with_source_location( + int fd, + const char *src_file, + uint src_line) { // if EINTR, retry until success + /* register the file close */ int r = -1; + + /* register a file descriptor close " */ + toku_io_instrumentation io_annotation; + toku_instr_file_fd_close_begin( + io_annotation, toku_instr_file_op::file_close, fd, src_file, src_line); while (r != 0) { - r = close(fd); - if (r) { - int rr = errno; - if (rr!=EINTR) printf("rr=%d (%s)\n", rr, strerror(rr)); - assert(rr==EINTR); - } + r = close(fd); + if (r) { + int rr = errno; + if (rr != EINTR) + printf("rr=%d (%s)\n", rr, strerror(rr)); + assert(rr == EINTR); + } } - return r; -} - -int toku_os_rename(const char *old_name, const char *new_name) { - return rename(old_name, new_name); -} -int toku_os_unlink(const char *path) { return unlink(path); } + /* Regsiter the returning value with the system */ + toku_instr_file_close_end(io_annotation, r); -ssize_t -toku_os_read(int fd, void *buf, size_t count) { - ssize_t r; - if (t_read) - r = t_read(fd, buf, count); - else - r = read(fd, buf, count); return r; } -ssize_t -toku_os_pread (int fd, void *buf, size_t count, off_t offset) { - assert(0==((long long)buf)%512); - assert(0==count%512); - assert(0==offset%512); - ssize_t r; +ssize_t toku_os_read_with_source_location(int fd, + void *buf, + size_t count, + const char *src_file, + uint src_line) { + ssize_t bytes_read; + + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_read, + fd, + count, + src_file, + src_line); + + bytes_read = (t_read) ? t_read(fd, buf, count) : read(fd, buf, count); + + toku_instr_file_io_end(io_annotation, bytes_read); + + return bytes_read; +} + +ssize_t inline_toku_os_pread_with_source_location(int fd, + void *buf, + size_t count, + off_t offset, + const char *src_file, + uint src_line) { + assert(0 == ((long long)buf) % 512); + assert(0 == count % 512); + assert(0 == offset % 512); + ssize_t bytes_read; + + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_read, + fd, + count, + src_file, + src_line); if (t_pread) { - r = t_pread(fd, buf, count, offset); + bytes_read = t_pread(fd, buf, count, offset); } else { - r = pread(fd, buf, count, offset); + bytes_read = pread(fd, buf, count, offset); } - return r; + toku_instr_file_io_end(io_annotation, bytes_read); + + return bytes_read; } void toku_os_recursive_delete(const char *path) { @@ -411,13 +693,24 @@ void toku_set_func_fsync(int (*fsync_function)(int)) { } // keep trying if fsync fails because of EINTR -static void file_fsync_internal (int fd) { +void file_fsync_internal_with_source_location(int fd, + const char *src_file, + uint src_line) { uint64_t tstart = toku_current_time_microsec(); int r = -1; uint64_t eintr_count = 0; + + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_sync, + fd, + 0, + src_file, + src_line); + while (r != 0) { - if (t_fsync) { - r = t_fsync(fd); + if (t_fsync) { + r = t_fsync(fd); } else { r = fsync(fd); } @@ -429,6 +722,9 @@ static void file_fsync_internal (int fd) { toku_sync_fetch_and_add(&toku_fsync_count, 1); uint64_t duration = toku_current_time_microsec() - tstart; toku_sync_fetch_and_add(&toku_fsync_time, duration); + + toku_instr_file_io_end(io_annotation, 0); + if (duration >= toku_long_fsync_threshold) { toku_sync_fetch_and_add(&toku_long_fsync_count, 1); toku_sync_fetch_and_add(&toku_long_fsync_time, duration); diff --git a/storage/tokudb/PerconaFT/portability/memory.cc b/storage/tokudb/PerconaFT/portability/memory.cc index 5430ff84b70..9594158cf38 100644 --- a/storage/tokudb/PerconaFT/portability/memory.cc +++ b/storage/tokudb/PerconaFT/portability/memory.cc @@ -104,7 +104,13 @@ toku_memory_startup(void) { size_t lg_chunk; // log2 of the mmap threshold size_t lg_chunk_length = sizeof lg_chunk; result = mallctl_f("opt.lg_chunk", &lg_chunk, &lg_chunk_length, NULL, 0); - if (result == 0) + if (result) + { + status.mmap_threshold = 1 << 21; // Default value. + // Incompatible jemalloc change. + result = 0; + } + else status.mmap_threshold = 1 << lg_chunk; } } diff --git a/storage/tokudb/PerconaFT/portability/portability.cc b/storage/tokudb/PerconaFT/portability/portability.cc index 19f445a85d7..dfa5153cc66 100644 --- a/storage/tokudb/PerconaFT/portability/portability.cc +++ b/storage/tokudb/PerconaFT/portability/portability.cc @@ -79,6 +79,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "toku_os.h" #include "toku_time.h" #include "memory.h" + +#include "toku_instrumentation.h" + #include <portability/toku_atomic.h> #include <util/partitioned_counter.h> @@ -172,13 +175,26 @@ toku_os_get_phys_memory_size(void) { #endif } -int -toku_os_get_file_size(int fildes, int64_t *fsize) { +int toku_os_get_file_size_with_source_location(int fildes, + int64_t *fsize, + const char *src_file, + uint src_line) { toku_struct_stat sbuf; + + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_stat, + fildes, + 0, + src_file, + src_line); + int r = fstat(fildes, &sbuf); - if (r==0) { + if (r == 0) { *fsize = sbuf.st_size; } + toku_instr_file_io_end(io_annotation, 0); + return r; } @@ -272,15 +288,39 @@ toku_os_get_max_process_data_size(uint64_t *maxdata) { return r; } -int -toku_stat(const char *name, toku_struct_stat *buf) { +int toku_stat_with_source_location(const char *name, + toku_struct_stat *buf, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line) { + toku_io_instrumentation io_annotation; + toku_instr_file_name_io_begin(io_annotation, + instr_key, + toku_instr_file_op::file_stat, + name, + 0, + src_file, + src_line); int r = stat(name, buf); + + toku_instr_file_io_end(io_annotation, 0); return r; } -int -toku_fstat(int fd, toku_struct_stat *buf) { +int toku_os_fstat_with_source_location(int fd, + toku_struct_stat *buf, + const char *src_file, + uint src_line) { + toku_io_instrumentation io_annotation; + toku_instr_file_io_begin(io_annotation, + toku_instr_file_op::file_stat, + fd, + 0, + src_file, + src_line); + int r = fstat(fd, buf); + toku_instr_file_io_end(io_annotation, 0); return r; } @@ -427,5 +467,9 @@ void __attribute__((constructor)) toku_portability_helgrind_ignore(void); void toku_portability_helgrind_ignore(void) { TOKU_VALGRIND_HG_DISABLE_CHECKING(&toku_cached_hz, sizeof toku_cached_hz); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&toku_cached_pagesize, sizeof toku_cached_pagesize); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&toku_cached_pagesize, + sizeof toku_cached_pagesize); } + +static const pfs_key_t pfs_not_instrumented = 0xFFFFFFFF; +toku_instr_key toku_uninstrumented(pfs_not_instrumented); diff --git a/storage/tokudb/PerconaFT/portability/tests/rwlock_condvar.h b/storage/tokudb/PerconaFT/portability/tests/rwlock_condvar.h index 92f2fb354f7..d1ebc81e1dc 100644 --- a/storage/tokudb/PerconaFT/portability/tests/rwlock_condvar.h +++ b/storage/tokudb/PerconaFT/portability/tests/rwlock_condvar.h @@ -107,10 +107,10 @@ get_waitstate(void) #endif int toku_cv_fair_rwlock_init (toku_cv_fair_rwlock_t *rwlock) { - rwlock->state=0; + rwlock->state = 0; rwlock->waiters_head = NULL; rwlock->waiters_tail = NULL; - toku_mutex_init(&rwlock->mutex, NULL); + toku_mutex_init(toku_uninstrumented, &rwlock->mutex, nullptr); return 0; } diff --git a/storage/tokudb/PerconaFT/portability/tests/test-pthread-rwlock-rdlock.cc b/storage/tokudb/PerconaFT/portability/tests/test-pthread-rwlock-rdlock.cc index 364bd7d3766..62aa5205f3c 100644 --- a/storage/tokudb/PerconaFT/portability/tests/test-pthread-rwlock-rdlock.cc +++ b/storage/tokudb/PerconaFT/portability/tests/test-pthread-rwlock-rdlock.cc @@ -48,7 +48,7 @@ int test_main(int argc __attribute__((__unused__)), char *const argv[] __attribu toku_pthread_rwlock_t rwlock; ZERO_STRUCT(rwlock); - toku_pthread_rwlock_init(&rwlock, NULL); + toku_pthread_rwlock_init(toku_uninstrumented, &rwlock, nullptr); toku_pthread_rwlock_rdlock(&rwlock); toku_pthread_rwlock_rdlock(&rwlock); toku_pthread_rwlock_rdunlock(&rwlock); diff --git a/storage/tokudb/PerconaFT/portability/tests/test-pthread-rwlock-rwr.cc b/storage/tokudb/PerconaFT/portability/tests/test-pthread-rwlock-rwr.cc index 3ca9e06bff7..92b30421ba9 100644 --- a/storage/tokudb/PerconaFT/portability/tests/test-pthread-rwlock-rwr.cc +++ b/storage/tokudb/PerconaFT/portability/tests/test-pthread-rwlock-rwr.cc @@ -71,13 +71,19 @@ int test_main(int argc , char *const argv[] ) { toku_pthread_t tid; void *retptr; - toku_pthread_rwlock_init(&rwlock, NULL); - state = 37; if (verbose) printf("%s:%d\n", __FUNCTION__, __LINE__); + toku_pthread_rwlock_init(toku_uninstrumented, &rwlock, nullptr); + state = 37; + if (verbose) + printf("%s:%d\n", __FUNCTION__, __LINE__); toku_pthread_rwlock_rdlock(&rwlock); - r = toku_pthread_create(&tid, NULL, f, &rwlock); assert(r == 0); + r = toku_pthread_create(toku_uninstrumented, &tid, nullptr, f, &rwlock); + assert(r == 0); - assert(state==37); state = 42; if (verbose) printf("%s:%d\n", __FUNCTION__, __LINE__); + assert(state == 37); + state = 42; + if (verbose) + printf("%s:%d\n", __FUNCTION__, __LINE__); sleep(4); assert(state==16); state = 44; if (verbose) printf("%s:%d\n", __FUNCTION__, __LINE__); toku_pthread_rwlock_rdlock(&rwlock); diff --git a/storage/tokudb/PerconaFT/portability/tests/test-stat.cc b/storage/tokudb/PerconaFT/portability/tests/test-stat.cc index 494a17bd074..57201764afb 100644 --- a/storage/tokudb/PerconaFT/portability/tests/test-stat.cc +++ b/storage/tokudb/PerconaFT/portability/tests/test-stat.cc @@ -47,10 +47,11 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. static void test_stat(const char *dirname, int result, int ex_errno) { int r; toku_struct_stat buf; - r = toku_stat(dirname, &buf); - //printf("stat %s %d %d\n", dirname, r, errno); fflush(stdout); - assert(r==result); - if (r!=0) assert(get_maybe_error_errno() == ex_errno); + r = toku_stat(dirname, &buf, toku_uninstrumented); + // printf("stat %s %d %d\n", dirname, r, errno); fflush(stdout); + assert(r == result); + if (r != 0) + assert(get_maybe_error_errno() == ex_errno); } int main(void) { diff --git a/storage/tokudb/PerconaFT/portability/tests/test-toku-malloc.cc b/storage/tokudb/PerconaFT/portability/tests/test-toku-malloc.cc index 1e1c7533c15..0d99b36b51a 100644 --- a/storage/tokudb/PerconaFT/portability/tests/test-toku-malloc.cc +++ b/storage/tokudb/PerconaFT/portability/tests/test-toku-malloc.cc @@ -53,10 +53,12 @@ int main(void) { int i; const int max_threads = 2; toku_pthread_t tids[max_threads]; - for (i=0; i<max_threads; i++) { - r = toku_pthread_create(&tids[i], NULL, f, 0); assert(r == 0); + for (i = 0; i < max_threads; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, f, nullptr); + assert(r == 0); } - for (i=0; i<max_threads; i++) { + for (i = 0; i < max_threads; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/portability/toku_assert.h b/storage/tokudb/PerconaFT/portability/toku_assert.h index 26a71cede7d..b0a7be3287b 100644 --- a/storage/tokudb/PerconaFT/portability/toku_assert.h +++ b/storage/tokudb/PerconaFT/portability/toku_assert.h @@ -53,13 +53,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #error NDEBUG should not be set #endif -static inline int get_error_errno(void); +inline int get_error_errno(void); -static inline int -get_maybe_error_errno(void) -{ - return errno; -} +static inline int get_maybe_error_errno(void) { return errno; } static inline void set_errno(int new_errno) @@ -139,12 +135,10 @@ void db_env_do_backtrace(FILE *outf); #define paranoid_invariant(a) ((void) 0) #define paranoid_invariant_null(a) ((void) 0) #define paranoid_invariant_notnull(a) ((void) 0) -#define paranoid_invariant_zero(a) ((void) 0) +#define paranoid_invariant_zero(a) ((void)0) #endif -static inline int -get_error_errno(void) -{ +inline int get_error_errno(void) { invariant(errno); return errno; } diff --git a/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc b/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc new file mode 100644 index 00000000000..b7b4c0ab233 --- /dev/null +++ b/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc @@ -0,0 +1,365 @@ +#ifdef MYSQL_TOKUDB_ENGINE +#include "toku_portability.h" +#include "toku_pthread.h" + +toku_instr_probe_pfs::toku_instr_probe_pfs(const toku_instr_key &key) + : mutex(new toku_mutex_t) { + toku_mutex_init(key, mutex.get(), nullptr); +} + +toku_instr_probe_pfs::~toku_instr_probe_pfs() { + toku_mutex_destroy(mutex.get()); +} + +// Thread instrumentation + +int toku_pthread_create(const toku_instr_key &key, + pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine)(void *), + void *arg) { +#if (MYSQL_VERSION_MAJOR >= 5) && (MYSQL_VERSION_MINOR >= 7) + return PSI_THREAD_CALL(spawn_thread)( + key.id(), reinterpret_cast<my_thread_handle *>(thread), + attr, start_routine, arg); +#else + return PSI_THREAD_CALL(spawn_thread)( + key.id(), thread, attr, start_routine, arg); +#endif +} + +void toku_instr_register_current_thread(const toku_instr_key &key) { + struct PSI_thread *psi_thread = + PSI_THREAD_CALL(new_thread)(key.id(), nullptr, 0); + PSI_THREAD_CALL(set_thread)(psi_thread); +} + +void toku_instr_delete_current_thread() { + PSI_THREAD_CALL(delete_current_thread)(); +} + +// I/O instrumentation + +void toku_instr_file_open_begin(toku_io_instrumentation &io_instr, + const toku_instr_key &key, + toku_instr_file_op op, + const char *name, + const char *src_file, + int src_line) { + io_instr.locker = + PSI_FILE_CALL(get_thread_file_name_locker)( + &io_instr.state, key.id(), static_cast<PSI_file_operation>(op), + name, io_instr.locker); + if (io_instr.locker != nullptr) { + PSI_FILE_CALL(start_file_open_wait) + (io_instr.locker, src_file, src_line); + } +} + +void toku_instr_file_stream_open_end(toku_io_instrumentation &io_instr, + TOKU_FILE &file) { + file.key = nullptr; + if (FT_LIKELY(io_instr.locker)) { + file.key = + PSI_FILE_CALL(end_file_open_wait)(io_instr.locker, file.file); + } +} + +void toku_instr_file_open_end(toku_io_instrumentation &io_instr, int fd) { + if (FT_LIKELY(io_instr.locker)) + PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor) + (io_instr.locker, fd); +} + +void toku_instr_file_name_close_begin(toku_io_instrumentation &io_instr, + const toku_instr_key &key, + toku_instr_file_op op, + const char *name, + const char *src_file, + int src_line) { + io_instr.locker = + PSI_FILE_CALL(get_thread_file_name_locker)( + &io_instr.state, key.id(), static_cast<PSI_file_operation>(op), + name, + io_instr.locker); + if (FT_LIKELY(io_instr.locker)) { + PSI_FILE_CALL(start_file_close_wait) + (io_instr.locker, src_file, src_line); + } +} + +void toku_instr_file_stream_close_begin(toku_io_instrumentation &io_instr, + toku_instr_file_op op, + const TOKU_FILE &file, + const char *src_file, + int src_line) { + io_instr.locker = nullptr; + if (FT_LIKELY(file.key)) { + io_instr.locker = PSI_FILE_CALL(get_thread_file_stream_locker)( + &io_instr.state, file.key, (PSI_file_operation)op); + if (FT_LIKELY(io_instr.locker)) { + PSI_FILE_CALL(start_file_close_wait) + (io_instr.locker, src_file, src_line); + } + } +} + +void toku_instr_file_fd_close_begin(toku_io_instrumentation &io_instr, + toku_instr_file_op op, + int fd, + const char *src_file, + int src_line) { + io_instr.locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &io_instr.state, fd, (PSI_file_operation)op); + if (FT_LIKELY(io_instr.locker)) { + PSI_FILE_CALL(start_file_close_wait) + (io_instr.locker, src_file, src_line); + } +} + +void toku_instr_file_close_end(const toku_io_instrumentation &io_instr, + int result) { + if (FT_LIKELY(io_instr.locker)) + PSI_FILE_CALL(end_file_close_wait) + (io_instr.locker, result); +} + +void toku_instr_file_io_begin(toku_io_instrumentation &io_instr, + toku_instr_file_op op, + int fd, + ssize_t count, + const char *src_file, + int src_line) { + io_instr.locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &io_instr.state, fd, (PSI_file_operation)op); + if (FT_LIKELY(io_instr.locker)) { + PSI_FILE_CALL(start_file_wait) + (io_instr.locker, count, src_file, src_line); + } +} + +void toku_instr_file_name_io_begin(toku_io_instrumentation &io_instr, + const toku_instr_key &key, + toku_instr_file_op op, + const char *name, + ssize_t count, + const char *src_file, + int src_line) { + io_instr.locker = + PSI_FILE_CALL(get_thread_file_name_locker)(&io_instr.state, + key.id(), + (PSI_file_operation)op, + name, + &io_instr.locker); + if (FT_LIKELY(io_instr.locker)) { + PSI_FILE_CALL(start_file_wait) + (io_instr.locker, count, src_file, src_line); + } +} + +void toku_instr_file_stream_io_begin(toku_io_instrumentation &io_instr, + toku_instr_file_op op, + const TOKU_FILE &file, + ssize_t count, + const char *src_file, + int src_line) { + io_instr.locker = nullptr; + if (FT_LIKELY(file.key)) { + io_instr.locker = PSI_FILE_CALL(get_thread_file_stream_locker)( + &io_instr.state, file.key, (PSI_file_operation)op); + if (FT_LIKELY(io_instr.locker)) { + PSI_FILE_CALL(start_file_wait) + (io_instr.locker, count, src_file, src_line); + } + } +} + +void toku_instr_file_io_end(toku_io_instrumentation &io_instr, ssize_t count) { + if (FT_LIKELY(io_instr.locker)) + PSI_FILE_CALL(end_file_wait) + (io_instr.locker, count); +} + +// Mutex instrumentation + +void toku_instr_mutex_init(const toku_instr_key &key, toku_mutex_t &mutex) { + mutex.psi_mutex = PSI_MUTEX_CALL(init_mutex)(key.id(), &mutex.pmutex); +#if TOKU_PTHREAD_DEBUG + mutex.instr_key_id = key.id(); +#endif +} + +void toku_instr_mutex_destroy(PSI_mutex *&mutex_instr) { + if (mutex_instr != nullptr) { + PSI_MUTEX_CALL(destroy_mutex)(mutex_instr); + mutex_instr = nullptr; + } +} + +void toku_instr_mutex_lock_start(toku_mutex_instrumentation &mutex_instr, + toku_mutex_t &mutex, + const char *src_file, + int src_line) { + mutex_instr.locker = nullptr; + if (mutex.psi_mutex) { + mutex_instr.locker = + PSI_MUTEX_CALL(start_mutex_wait)(&mutex_instr.state, + mutex.psi_mutex, + PSI_MUTEX_LOCK, + src_file, + src_line); + } +} + +void toku_instr_mutex_trylock_start(toku_mutex_instrumentation &mutex_instr, + toku_mutex_t &mutex, + const char *src_file, + int src_line) { + mutex_instr.locker = nullptr; + if (mutex.psi_mutex) { + mutex_instr.locker = + PSI_MUTEX_CALL(start_mutex_wait)(&mutex_instr.state, + mutex.psi_mutex, + PSI_MUTEX_TRYLOCK, + src_file, + src_line); + } +} + +void toku_instr_mutex_lock_end(toku_mutex_instrumentation &mutex_instr, + int pthread_mutex_lock_result) { + if (mutex_instr.locker) + PSI_MUTEX_CALL(end_mutex_wait) + (mutex_instr.locker, pthread_mutex_lock_result); +} + +void toku_instr_mutex_unlock(PSI_mutex *mutex_instr) { + if (mutex_instr) + PSI_MUTEX_CALL(unlock_mutex)(mutex_instr); +} + +// Condvar instrumentation + +void toku_instr_cond_init(const toku_instr_key &key, toku_cond_t &cond) { + cond.psi_cond = PSI_COND_CALL(init_cond)(key.id(), &cond.pcond); +#if TOKU_PTHREAD_DEBUG + cond.instr_key_id = key.id(); +#endif +} + +void toku_instr_cond_destroy(PSI_cond *&cond_instr) { + if (cond_instr != nullptr) { + PSI_COND_CALL(destroy_cond)(cond_instr); + cond_instr = nullptr; + } +} + +void toku_instr_cond_wait_start(toku_cond_instrumentation &cond_instr, + toku_instr_cond_op op, + toku_cond_t &cond, + toku_mutex_t &mutex, + const char *src_file, + int src_line) { + cond_instr.locker = nullptr; + if (cond.psi_cond) { + /* Instrumentation start */ + cond_instr.locker = + PSI_COND_CALL(start_cond_wait)(&cond_instr.state, + cond.psi_cond, + mutex.psi_mutex, + (PSI_cond_operation)op, + src_file, + src_line); + } +} + +void toku_instr_cond_wait_end(toku_cond_instrumentation &cond_instr, + int pthread_cond_wait_result) { + if (cond_instr.locker) + PSI_COND_CALL(end_cond_wait) + (cond_instr.locker, pthread_cond_wait_result); +} + +void toku_instr_cond_signal(const toku_cond_t &cond) { + if (cond.psi_cond) + PSI_COND_CALL(signal_cond)(cond.psi_cond); +} + +void toku_instr_cond_broadcast(const toku_cond_t &cond) { + if (cond.psi_cond) + PSI_COND_CALL(broadcast_cond)(cond.psi_cond); +} + +// rwlock instrumentation + +void toku_instr_rwlock_init(const toku_instr_key &key, + toku_pthread_rwlock_t &rwlock) { + rwlock.psi_rwlock = PSI_RWLOCK_CALL(init_rwlock)(key.id(), &rwlock.rwlock); +#if TOKU_PTHREAD_DEBUG + rwlock.instr_key_id = key.id(); +#endif +} + +void toku_instr_rwlock_destroy(PSI_rwlock *&rwlock_instr) { + if (rwlock_instr != nullptr) { + PSI_RWLOCK_CALL(destroy_rwlock)(rwlock_instr); + rwlock_instr = nullptr; + } +} + +void toku_instr_rwlock_rdlock_wait_start( + toku_rwlock_instrumentation &rwlock_instr, + toku_pthread_rwlock_t &rwlock, + const char *src_file, + int src_line) { + rwlock_instr.locker = nullptr; + if (rwlock.psi_rwlock) { + /* Instrumentation start */ + rwlock_instr.locker = + PSI_RWLOCK_CALL(start_rwlock_rdwait)(&rwlock_instr.state, + rwlock.psi_rwlock, + PSI_RWLOCK_READLOCK, + src_file, + src_line); + } +} + +void toku_instr_rwlock_wrlock_wait_start( + toku_rwlock_instrumentation &rwlock_instr, + toku_pthread_rwlock_t &rwlock, + const char *src_file, + int src_line) { + rwlock_instr.locker = nullptr; + if (rwlock.psi_rwlock) { + /* Instrumentation start */ + rwlock_instr.locker = + PSI_RWLOCK_CALL(start_rwlock_wrwait)(&rwlock_instr.state, + rwlock.psi_rwlock, + PSI_RWLOCK_WRITELOCK, + src_file, + src_line); + } +} + +void toku_instr_rwlock_rdlock_wait_end( + toku_rwlock_instrumentation &rwlock_instr, + int pthread_rwlock_wait_result) { + if (rwlock_instr.locker) + PSI_RWLOCK_CALL(end_rwlock_rdwait) + (rwlock_instr.locker, pthread_rwlock_wait_result); +} + +void toku_instr_rwlock_wrlock_wait_end( + toku_rwlock_instrumentation &rwlock_instr, + int pthread_rwlock_wait_result) { + if (rwlock_instr.locker) + PSI_RWLOCK_CALL(end_rwlock_wrwait) + (rwlock_instr.locker, pthread_rwlock_wait_result); +} + +void toku_instr_rwlock_unlock(toku_pthread_rwlock_t &rwlock) { + if (rwlock.psi_rwlock) + PSI_RWLOCK_CALL(unlock_rwlock)(rwlock.psi_rwlock); +} + +#endif // MYSQL_TOKUDB_ENGINE diff --git a/storage/tokudb/PerconaFT/portability/toku_instr_mysql.h b/storage/tokudb/PerconaFT/portability/toku_instr_mysql.h new file mode 100644 index 00000000000..d6b0ed35ce9 --- /dev/null +++ b/storage/tokudb/PerconaFT/portability/toku_instr_mysql.h @@ -0,0 +1,249 @@ +#ifdef TOKU_INSTR_MYSQL_H +// This file can be included only from toku_instumentation.h because +// it replaces the defintitions for the case if MySQL PFS is available +#error "toku_instr_mysql.h can be included only once" +#else // TOKU_INSTR_MYSQL_H +#define TOKU_INSTR_MYSQL_H + +#include <memory> + +// As these macros are defined in my_global.h +// and they are also defined in command line +// undefine them here to avoid compilation errors. +#undef __STDC_FORMAT_MACROS +#undef __STDC_LIMIT_MACROS +#include <mysql/psi/mysql_file.h> // PSI_file +#include <mysql/psi/mysql_thread.h> // PSI_mutex + +#ifndef HAVE_PSI_MUTEX_INTERFACE +#error HAVE_PSI_MUTEX_INTERFACE required +#endif +#ifndef HAVE_PSI_RWLOCK_INTERFACE +#error HAVE_PSI_RWLOCK_INTERFACE required +#endif +#ifndef HAVE_PSI_THREAD_INTERFACE +#error HAVE_PSI_THREAD_INTERFACE required +#endif + +// Instrumentation keys + +class toku_instr_key { + private: + pfs_key_t m_id; + + public: + toku_instr_key(toku_instr_object_type type, + const char *group, + const char *name) { + switch (type) { + case toku_instr_object_type::mutex: { + PSI_mutex_info mutex_info{&m_id, name, 0}; + mysql_mutex_register(group, &mutex_info, 1); + } break; + case toku_instr_object_type::rwlock: { + PSI_rwlock_info rwlock_info{&m_id, name, 0}; + mysql_rwlock_register(group, &rwlock_info, 1); + } break; + case toku_instr_object_type::cond: { + PSI_cond_info cond_info{&m_id, name, 0}; + mysql_cond_register(group, &cond_info, 1); + } break; + case toku_instr_object_type::thread: { + PSI_thread_info thread_info{&m_id, name, 0}; + mysql_thread_register(group, &thread_info, 1); + } break; + case toku_instr_object_type::file: { + PSI_file_info file_info{&m_id, name, 0}; + mysql_file_register(group, &file_info, 1); + } break; + } + } + + explicit toku_instr_key(pfs_key_t key_id) : m_id(key_id) {} + + pfs_key_t id() const { return m_id; } +}; + +// Thread instrumentation +int toku_pthread_create(const toku_instr_key &key, + pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine)(void *), + void *arg); +void toku_instr_register_current_thread(const toku_instr_key &key); +void toku_instr_delete_current_thread(); + +// I/O instrumentation + +enum class toku_instr_file_op { + file_stream_open = PSI_FILE_STREAM_OPEN, + file_create = PSI_FILE_CREATE, + file_open = PSI_FILE_OPEN, + file_delete = PSI_FILE_DELETE, + file_rename = PSI_FILE_RENAME, + file_read = PSI_FILE_READ, + file_write = PSI_FILE_WRITE, + file_sync = PSI_FILE_SYNC, + file_stream_close = PSI_FILE_STREAM_CLOSE, + file_close = PSI_FILE_CLOSE, + file_stat = PSI_FILE_STAT +}; + +struct toku_io_instrumentation { + struct PSI_file_locker *locker; + PSI_file_locker_state state; + + toku_io_instrumentation() : locker(nullptr) {} +}; + +void toku_instr_file_open_begin(toku_io_instrumentation &io_instr, + const toku_instr_key &key, + toku_instr_file_op op, + const char *name, + const char *src_file, + int src_line); +void toku_instr_file_stream_open_end(toku_io_instrumentation &io_instr, + TOKU_FILE &file); +void toku_instr_file_open_end(toku_io_instrumentation &io_instr, int fd); +void toku_instr_file_name_close_begin(toku_io_instrumentation &io_instr, + const toku_instr_key &key, + toku_instr_file_op op, + const char *name, + const char *src_file, + int src_line); +void toku_instr_file_stream_close_begin(toku_io_instrumentation &io_instr, + toku_instr_file_op op, + const TOKU_FILE &file, + const char *src_file, + int src_line); +void toku_instr_file_fd_close_begin(toku_io_instrumentation &io_instr, + toku_instr_file_op op, + int fd, + const char *src_file, + int src_line); +void toku_instr_file_close_end(const toku_io_instrumentation &io_instr, + int result); +void toku_instr_file_io_begin(toku_io_instrumentation &io_instr, + toku_instr_file_op op, + int fd, + ssize_t count, + const char *src_file, + int src_line); +void toku_instr_file_name_io_begin(toku_io_instrumentation &io_instr, + const toku_instr_key &key, + toku_instr_file_op op, + const char *name, + ssize_t count, + const char *src_file, + int src_line); +void toku_instr_file_stream_io_begin(toku_io_instrumentation &io_instr, + toku_instr_file_op op, + const TOKU_FILE &file, + ssize_t count, + const char *src_file, + int src_line); +void toku_instr_file_io_end(toku_io_instrumentation &io_instr, ssize_t count); + +// Mutex instrumentation + +struct toku_mutex_instrumentation { + struct PSI_mutex_locker *locker; + PSI_mutex_locker_state state; + + toku_mutex_instrumentation() : locker(nullptr) {} +}; + +void toku_instr_mutex_init(const toku_instr_key &key, toku_mutex_t &mutex); +void toku_instr_mutex_destroy(PSI_mutex *&mutex_instr); +void toku_instr_mutex_lock_start(toku_mutex_instrumentation &mutex_instr, + toku_mutex_t &mutex, + const char *src_file, + int src_line); +void toku_instr_mutex_trylock_start(toku_mutex_instrumentation &mutex_instr, + toku_mutex_t &mutex, + const char *src_file, + int src_line); +void toku_instr_mutex_lock_end(toku_mutex_instrumentation &mutex_instr, + int pthread_mutex_lock_result); +void toku_instr_mutex_unlock(PSI_mutex *mutex_instr); + +// Instrumentation probes + +class toku_instr_probe_pfs { + private: + std::unique_ptr<toku_mutex_t> mutex; + toku_mutex_instrumentation mutex_instr; + + public: + explicit toku_instr_probe_pfs(const toku_instr_key &key); + + ~toku_instr_probe_pfs(); + + void start_with_source_location(const char *src_file, int src_line) { + mutex_instr.locker = nullptr; + toku_instr_mutex_lock_start(mutex_instr, *mutex, src_file, src_line); + } + + void stop() { toku_instr_mutex_lock_end(mutex_instr, 0); } +}; + +typedef toku_instr_probe_pfs toku_instr_probe; + +// Condvar instrumentation + +struct toku_cond_instrumentation { + struct PSI_cond_locker *locker; + PSI_cond_locker_state state; + + toku_cond_instrumentation() : locker(nullptr) {} +}; + +enum class toku_instr_cond_op { + cond_wait = PSI_COND_WAIT, + cond_timedwait = PSI_COND_TIMEDWAIT, +}; + +void toku_instr_cond_init(const toku_instr_key &key, toku_cond_t &cond); +void toku_instr_cond_destroy(PSI_cond *&cond_instr); +void toku_instr_cond_wait_start(toku_cond_instrumentation &cond_instr, + toku_instr_cond_op op, + toku_cond_t &cond, + toku_mutex_t &mutex, + const char *src_file, + int src_line); +void toku_instr_cond_wait_end(toku_cond_instrumentation &cond_instr, + int pthread_cond_wait_result); +void toku_instr_cond_signal(const toku_cond_t &cond); +void toku_instr_cond_broadcast(const toku_cond_t &cond); + +// rwlock instrumentation + +struct toku_rwlock_instrumentation { + struct PSI_rwlock_locker *locker; + PSI_rwlock_locker_state state; + + toku_rwlock_instrumentation() : locker(nullptr) { } +}; + +void toku_instr_rwlock_init(const toku_instr_key &key, + toku_pthread_rwlock_t &rwlock); +void toku_instr_rwlock_destroy(PSI_rwlock *&rwlock_instr); +void toku_instr_rwlock_rdlock_wait_start( + toku_rwlock_instrumentation &rwlock_instr, + toku_pthread_rwlock_t &rwlock, + const char *src_file, + int src_line); +void toku_instr_rwlock_wrlock_wait_start( + toku_rwlock_instrumentation &rwlock_instr, + toku_pthread_rwlock_t &rwlock, + const char *src_file, + int src_line); +void toku_instr_rwlock_rdlock_wait_end( + toku_rwlock_instrumentation &rwlock_instr, + int pthread_rwlock_wait_result); +void toku_instr_rwlock_wrlock_wait_end( + toku_rwlock_instrumentation &rwlock_instr, + int pthread_rwlock_wait_result); +void toku_instr_rwlock_unlock(toku_pthread_rwlock_t &rwlock); + +#endif // TOKU_INSTR_MYSQL_H diff --git a/storage/tokudb/PerconaFT/portability/toku_instrumentation.h b/storage/tokudb/PerconaFT/portability/toku_instrumentation.h new file mode 100644 index 00000000000..8c9390edc0a --- /dev/null +++ b/storage/tokudb/PerconaFT/portability/toku_instrumentation.h @@ -0,0 +1,339 @@ +#pragma once + +#include <stdio.h> // FILE + +// Performance instrumentation object identifier type +typedef unsigned int pfs_key_t; + +enum class toku_instr_object_type { mutex, rwlock, cond, thread, file }; + +struct PSI_file; + +struct TOKU_FILE { + /** The real file. */ + FILE *file; + struct PSI_file *key; + TOKU_FILE() : file(nullptr), key(nullptr) {} +}; + +struct PSI_mutex; +struct PSI_cond; +struct PSI_rwlock; + +struct toku_mutex_t; +struct toku_cond_t; +struct toku_pthread_rwlock_t; + +class toku_instr_key; + +class toku_instr_probe_empty { + public: + explicit toku_instr_probe_empty(UU(const toku_instr_key &key)) {} + + void start_with_source_location(UU(const char *src_file), + UU(int src_line)) {} + + void stop() {} +}; + +#define TOKU_PROBE_START(p) p->start_with_source_location(__FILE__, __LINE__) +#define TOKU_PROBE_STOP(p) p->stop + +extern toku_instr_key toku_uninstrumented; + +#ifndef MYSQL_TOKUDB_ENGINE + +#include <pthread.h> + +class toku_instr_key { + public: + toku_instr_key(UU(toku_instr_object_type type), + UU(const char *group), + UU(const char *name)) {} + + explicit toku_instr_key(UU(pfs_key_t key_id)) {} +}; + +typedef toku_instr_probe_empty toku_instr_probe; + +enum class toku_instr_file_op { + file_stream_open, + file_create, + file_open, + file_delete, + file_rename, + file_read, + file_write, + file_sync, + file_stream_close, + file_close, + file_stat +}; + +struct PSI_file {}; +struct PSI_mutex {}; + +struct toku_io_instrumentation {}; + +inline int toku_pthread_create(UU(const toku_instr_key &key), + pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine)(void *), + void *arg) { + return pthread_create(thread, attr, start_routine, arg); +} + +inline void toku_instr_register_current_thread() {} + +inline void toku_instr_delete_current_thread() {} + +// Instrument file creation, opening, closing, and renaming +inline void toku_instr_file_open_begin(UU(toku_io_instrumentation &io_instr), + UU(const toku_instr_key &key), + UU(toku_instr_file_op op), + UU(const char *name), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_file_stream_open_end( + UU(toku_io_instrumentation &io_instr), + UU(TOKU_FILE &file)) {} + +inline void toku_instr_file_open_end(UU(toku_io_instrumentation &io_instr), + UU(int fd)) {} + +inline void toku_instr_file_name_close_begin( + UU(toku_io_instrumentation &io_instr), + UU(const toku_instr_key &key), + UU(toku_instr_file_op op), + UU(const char *name), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_file_stream_close_begin( + UU(toku_io_instrumentation &io_instr), + UU(toku_instr_file_op op), + UU(TOKU_FILE &file), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_file_fd_close_begin( + UU(toku_io_instrumentation &io_instr), + UU(toku_instr_file_op op), + UU(int fd), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_file_close_end(UU(toku_io_instrumentation &io_instr), + UU(int result)) {} + +inline void toku_instr_file_io_begin(UU(toku_io_instrumentation &io_instr), + UU(toku_instr_file_op op), + UU(int fd), + UU(unsigned int count), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_file_name_io_begin(UU(toku_io_instrumentation &io_instr), + UU(const toku_instr_key &key), + UU(toku_instr_file_op op), + UU(const char *name), + UU(unsigned int count), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_file_stream_io_begin( + UU(toku_io_instrumentation &io_instr), + UU(toku_instr_file_op op), + UU(TOKU_FILE &file), + UU(unsigned int count), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_file_io_end(UU(toku_io_instrumentation &io_instr), + UU(unsigned int count)) {} + +struct toku_mutex_t; + +struct toku_mutex_instrumentation {}; + +inline PSI_mutex *toku_instr_mutex_init(UU(const toku_instr_key &key), + UU(toku_mutex_t &mutex)) { + return nullptr; +} + +inline void toku_instr_mutex_destroy(UU(PSI_mutex *&mutex_instr)) {} + +inline void toku_instr_mutex_lock_start( + UU(toku_mutex_instrumentation &mutex_instr), + UU(toku_mutex_t &mutex), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_mutex_trylock_start( + UU(toku_mutex_instrumentation &mutex_instr), + UU(toku_mutex_t &mutex), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_mutex_lock_end( + UU(toku_mutex_instrumentation &mutex_instr), + UU(int pthread_mutex_lock_result)) {} + +inline void toku_instr_mutex_unlock(UU(PSI_mutex *mutex_instr)) {} + +struct toku_cond_instrumentation {}; + +enum class toku_instr_cond_op { + cond_wait, + cond_timedwait, +}; + +inline PSI_cond *toku_instr_cond_init(UU(const toku_instr_key &key), + UU(toku_cond_t &cond)) { + return nullptr; +} + +inline void toku_instr_cond_destroy(UU(PSI_cond *&cond_instr)) {} + +inline void toku_instr_cond_wait_start( + UU(toku_cond_instrumentation &cond_instr), + UU(toku_instr_cond_op op), + UU(toku_cond_t &cond), + UU(toku_mutex_t &mutex), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_cond_wait_end(UU(toku_cond_instrumentation &cond_instr), + UU(int pthread_cond_wait_result)) {} + +inline void toku_instr_cond_signal(UU(toku_cond_t &cond)) {} + +inline void toku_instr_cond_broadcast(UU(toku_cond_t &cond)) {} + +// rwlock instrumentation +struct toku_rwlock_instrumentation {}; + +inline PSI_rwlock *toku_instr_rwlock_init(UU(const toku_instr_key &key), + UU(toku_pthread_rwlock_t &rwlock)) { + return nullptr; +} + +inline void toku_instr_rwlock_destroy(UU(PSI_rwlock *&rwlock_instr)) {} + +inline void toku_instr_rwlock_rdlock_wait_start( + UU(toku_rwlock_instrumentation &rwlock_instr), + UU(toku_pthread_rwlock_t &rwlock), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_rwlock_wrlock_wait_start( + UU(toku_rwlock_instrumentation &rwlock_instr), + UU(toku_pthread_rwlock_t &rwlock), + UU(const char *src_file), + UU(int src_line)) {} + +inline void toku_instr_rwlock_rdlock_wait_end( + UU(toku_rwlock_instrumentation &rwlock_instr), + UU(int pthread_rwlock_wait_result)) {} + +inline void toku_instr_rwlock_wrlock_wait_end( + UU(toku_rwlock_instrumentation &rwlock_instr), + UU(int pthread_rwlock_wait_result)) {} + +inline void toku_instr_rwlock_unlock(UU(toku_pthread_rwlock_t &rwlock)) {} + +#else // MYSQL_TOKUDB_ENGINE +// There can be not only mysql but also mongodb or any other PFS stuff +#include <toku_instr_mysql.h> +#endif // MYSQL_TOKUDB_ENGINE + +extern toku_instr_key toku_uninstrumented; + +extern toku_instr_probe *toku_instr_probe_1; + +// threads +extern toku_instr_key *extractor_thread_key; +extern toku_instr_key *fractal_thread_key; +extern toku_instr_key *io_thread_key; +extern toku_instr_key *eviction_thread_key; +extern toku_instr_key *kibbutz_thread_key; +extern toku_instr_key *minicron_thread_key; +extern toku_instr_key *tp_internal_thread_key; + +// Files +extern toku_instr_key *tokudb_file_data_key; +extern toku_instr_key *tokudb_file_load_key; +extern toku_instr_key *tokudb_file_tmp_key; +extern toku_instr_key *tokudb_file_log_key; + +// Mutexes +extern toku_instr_key *kibbutz_mutex_key; +extern toku_instr_key *minicron_p_mutex_key; +extern toku_instr_key *queue_result_mutex_key; +extern toku_instr_key *tpool_lock_mutex_key; +extern toku_instr_key *workset_lock_mutex_key; +extern toku_instr_key *bjm_jobs_lock_mutex_key; +extern toku_instr_key *log_internal_lock_mutex_key; +extern toku_instr_key *cachetable_ev_thread_lock_mutex_key; +extern toku_instr_key *cachetable_disk_nb_mutex_key; +extern toku_instr_key *cachetable_m_mutex_key; +extern toku_instr_key *safe_file_size_lock_mutex_key; +extern toku_instr_key *checkpoint_safe_mutex_key; +extern toku_instr_key *ft_ref_lock_mutex_key; +extern toku_instr_key *loader_error_mutex_key; +extern toku_instr_key *bfs_mutex_key; +extern toku_instr_key *loader_bl_mutex_key; +extern toku_instr_key *loader_fi_lock_mutex_key; +extern toku_instr_key *loader_out_mutex_key; +extern toku_instr_key *result_output_condition_lock_mutex_key; +extern toku_instr_key *block_table_mutex_key; +extern toku_instr_key *rollback_log_node_cache_mutex_key; +extern toku_instr_key *txn_lock_mutex_key; +extern toku_instr_key *txn_state_lock_mutex_key; +extern toku_instr_key *txn_child_manager_mutex_key; +extern toku_instr_key *txn_manager_lock_mutex_key; +extern toku_instr_key *treenode_mutex_key; +extern toku_instr_key *manager_mutex_key; +extern toku_instr_key *manager_escalation_mutex_key; +extern toku_instr_key *manager_escalator_mutex_key; +extern toku_instr_key *db_txn_struct_i_txn_mutex_key; +extern toku_instr_key *indexer_i_indexer_lock_mutex_key; +extern toku_instr_key *indexer_i_indexer_estimate_lock_mutex_key; +extern toku_instr_key *locktree_request_info_mutex_key; +extern toku_instr_key *locktree_request_info_retry_mutex_key; + +// condition vars +extern toku_instr_key *result_state_cond_key; +extern toku_instr_key *bjm_jobs_wait_key; +extern toku_instr_key *cachetable_p_refcount_wait_key; +extern toku_instr_key *cachetable_m_flow_control_cond_key; +extern toku_instr_key *cachetable_m_ev_thread_cond_key; +extern toku_instr_key *bfs_cond_key; +extern toku_instr_key *result_output_condition_key; +extern toku_instr_key *manager_m_escalator_done_key; +extern toku_instr_key *lock_request_m_wait_cond_key; +extern toku_instr_key *queue_result_cond_key; +extern toku_instr_key *ws_worker_wait_key; +extern toku_instr_key *rwlock_wait_read_key; +extern toku_instr_key *rwlock_wait_write_key; +extern toku_instr_key *rwlock_cond_key; +extern toku_instr_key *tp_thread_wait_key; +extern toku_instr_key *tp_pool_wait_free_key; +extern toku_instr_key *frwlock_m_wait_read_key; +extern toku_instr_key *kibbutz_k_cond_key; +extern toku_instr_key *minicron_p_condvar_key; +extern toku_instr_key *locktree_request_info_retry_cv_key; + +// rwlocks +extern toku_instr_key *multi_operation_lock_key; +extern toku_instr_key *low_priority_multi_operation_lock_key; +extern toku_instr_key *cachetable_m_list_lock_key; +extern toku_instr_key *cachetable_m_pending_lock_expensive_key; +extern toku_instr_key *cachetable_m_pending_lock_cheap_key; +extern toku_instr_key *cachetable_m_lock_key; +extern toku_instr_key *result_i_open_dbs_rwlock_key; +extern toku_instr_key *checkpoint_safe_rwlock_key; +extern toku_instr_key *cachetable_value_key; +extern toku_instr_key *safe_file_size_lock_rwlock_key; +extern toku_instr_key *cachetable_disk_nb_rwlock_key; diff --git a/storage/tokudb/PerconaFT/portability/toku_os.h b/storage/tokudb/PerconaFT/portability/toku_os.h index 3a0e7376971..d7cfcfefb9a 100644 --- a/storage/tokudb/PerconaFT/portability/toku_os.h +++ b/storage/tokudb/PerconaFT/portability/toku_os.h @@ -116,12 +116,10 @@ int toku_fsync_dir_by_name_without_accounting(const char *dir_name); // *free_size is set to the bytes of free space in the file system // *total_size is set to the total bytes in the file system // Return 0 on success, otherwise an error number -int toku_get_filesystem_sizes(const char *path, uint64_t *avail_size, uint64_t *free_size, uint64_t *total_size); - -// Portable linux 'stat' -int toku_stat(const char *name, toku_struct_stat *statbuf) __attribute__((__visibility__("default"))); -// Portable linux 'fstat' -int toku_fstat(int fd, toku_struct_stat *statbuf) __attribute__((__visibility__("default"))); +int toku_get_filesystem_sizes(const char *path, + uint64_t *avail_size, + uint64_t *free_size, + uint64_t *total_size); // Portable linux 'dup2' int toku_dup2(int fd, int fd2) __attribute__((__visibility__("default"))); diff --git a/storage/tokudb/PerconaFT/portability/toku_portability.h b/storage/tokudb/PerconaFT/portability/toku_portability.h index d3a4e391a9f..e459cfb8779 100644 --- a/storage/tokudb/PerconaFT/portability/toku_portability.h +++ b/storage/tokudb/PerconaFT/portability/toku_portability.h @@ -54,6 +54,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #define DEV_NULL_FILE "/dev/null" +#include <my_global.h> + // include here, before they get deprecated #include <toku_atomic.h> @@ -125,6 +127,33 @@ typedef int64_t toku_off_t; #define UU(x) x __attribute__((__unused__)) +// Branch prediction macros. +// If supported by the compiler, will hint in inctruction caching for likely +// branching. Should only be used where there is a very good idea of the correct +// branch heuristics as determined by profiling. Mostly copied from InnoDB. +// Use: +// "if (FT_LIKELY(x))" where the chances of "x" evaluating true are higher +// "if (FT_UNLIKELY(x))" where the chances of "x" evaluating false are higher +#if defined(__GNUC__) && (__GNUC__ > 2) && !defined(__INTEL_COMPILER) + +// Tell the compiler that 'expr' probably evaluates to 'constant'. +#define FT_EXPECT(expr, constant) __builtin_expect(expr, constant) + +#else + +#warning "No FT branch prediction operations in use!" +#define FT_EXPECT(expr, constant) (expr) + +#endif // defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER) + +// Tell the compiler that cond is likely to hold +#define FT_LIKELY(cond) FT_EXPECT(bool(cond), true) + +// Tell the compiler that cond is unlikely to hold +#define FT_UNLIKELY(cond) FT_EXPECT(bool(cond), false) + +#include "toku_instrumentation.h" + #if defined(__cplusplus) extern "C" { #endif @@ -242,28 +271,272 @@ void toku_os_full_write (int fd, const void *buf, size_t len) __attribute__((__v // os_write returns 0 on success, otherwise an errno. ssize_t toku_os_pwrite (int fd, const void *buf, size_t len, toku_off_t off) __attribute__((__visibility__("default"))); -int toku_os_write (int fd, const void *buf, size_t len) __attribute__((__visibility__("default"))); +int toku_os_write(int fd, const void *buf, size_t len) + __attribute__((__visibility__("default"))); // wrappers around file system calls -FILE * toku_os_fdopen(int fildes, const char *mode); -FILE * toku_os_fopen(const char *filename, const char *mode); -int toku_os_open(const char *path, int oflag, int mode); -int toku_os_open_direct(const char *path, int oflag, int mode); -int toku_os_close(int fd); -int toku_os_fclose(FILE * stream); -int toku_os_rename(const char *old_name, const char *new_name); -int toku_os_unlink(const char *path); -ssize_t toku_os_read(int fd, void *buf, size_t count); -ssize_t toku_os_pread(int fd, void *buf, size_t count, off_t offset); void toku_os_recursive_delete(const char *path); +TOKU_FILE *toku_os_fdopen_with_source_location(int fildes, + const char *mode, + const char *filename, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line); +#define toku_os_fdopen(FD, M, FN, K) \ + toku_os_fdopen_with_source_location(FD, M, FN, K, __FILE__, __LINE__) + +TOKU_FILE *toku_os_fopen_with_source_location(const char *filename, + const char *mode, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line); +#define toku_os_fopen(F, M, K) \ + toku_os_fopen_with_source_location(F, M, K, __FILE__, __LINE__) + +int toku_os_open_with_source_location(const char *path, + int oflag, + int mode, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line); +#define toku_os_open(FD, F, M, K) \ + toku_os_open_with_source_location(FD, F, M, K, __FILE__, __LINE__) + +int toku_os_open_direct(const char *path, + int oflag, + int mode, + const toku_instr_key &instr_key); + +int toku_os_delete_with_source_location(const char *name, + const char *src_file, + uint src_line); +#define toku_os_delete(FN) \ + toku_os_delete_with_source_location(FN, __FILE__, __LINE__) + +int toku_os_rename_with_source_location(const char *old_name, + const char *new_name, + const char *src_file, + uint src_line); +#define toku_os_rename(old_name, new_name) \ + toku_os_rename_with_source_location(old_name, new_name, __FILE__, __LINE__) + +void toku_os_full_write_with_source_location(int fd, + const void *buf, + size_t len, + const char *src_file, + uint src_line); +#define toku_os_full_write(FD, B, L) \ + toku_os_full_write_with_source_location(FD, B, L, __FILE__, __LINE__) + +int toku_os_write_with_source_location(int fd, + const void *buf, + size_t len, + const char *src_file, + uint src_line); +#define toku_os_write(FD, B, L) \ + toku_os_write_with_source_location(FD, B, L, __FILE__, __LINE__) + +void toku_os_full_pwrite_with_source_location(int fd, + const void *buf, + size_t len, + toku_off_t off, + const char *src_file, + uint src_line); +#define toku_os_full_pwrite(FD, B, L, O) \ + toku_os_full_pwrite_with_source_location(FD, B, L, O, __FILE__, __LINE__) + +ssize_t toku_os_pwrite_with_source_location(int fd, + const void *buf, + size_t len, + toku_off_t off, + const char *src_file, + uint src_line); + +#define toku_os_pwrite(FD, B, L, O) \ + toku_os_pwrite_with_source_location(FD, B, L, O, __FILE__, __LINE__) + +int toku_os_fwrite_with_source_location(const void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream, + const char *src_file, + uint src_line); + +#define toku_os_fwrite(P, S, N, FS) \ + toku_os_fwrite_with_source_location(P, S, N, FS, __FILE__, __LINE__) + +int toku_os_fread_with_source_location(void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream, + const char *src_file, + uint src_line); +#define toku_os_fread(P, S, N, FS) \ + toku_os_fread_with_source_location(P, S, N, FS, __FILE__, __LINE__) + +TOKU_FILE *toku_os_fopen_with_source_location(const char *filename, + const char *mode, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line); + +int toku_os_fclose_with_source_location(TOKU_FILE *stream, + const char *src_file, + uint src_line); + +#define toku_os_fclose(FS) \ + toku_os_fclose_with_source_location(FS, __FILE__, __LINE__) + +int toku_os_close_with_source_location(int fd, + const char *src_file, + uint src_line); +#define toku_os_close(FD) \ + toku_os_close_with_source_location(FD, __FILE__, __LINE__) + +ssize_t toku_os_read_with_source_location(int fd, + void *buf, + size_t count, + const char *src_file, + uint src_line); + +#define toku_os_read(FD, B, C) \ + toku_os_read_with_source_location(FD, B, C, __FILE__, __LINE__); + +ssize_t inline_toku_os_pread_with_source_location(int fd, + void *buf, + size_t count, + off_t offset, + const char *src_file, + uint src_line); +#define toku_os_pread(FD, B, C, O) \ + inline_toku_os_pread_with_source_location(FD, B, C, O, __FILE__, __LINE__); + +void file_fsync_internal_with_source_location(int fd, + const char *src_file, + uint src_line); + +#define file_fsync_internal(FD) \ + file_fsync_internal_with_source_location(FD, __FILE__, __LINE__); + +int toku_os_get_file_size_with_source_location(int fildes, + int64_t *fsize, + const char *src_file, + uint src_line); + +#define toku_os_get_file_size(D, S) \ + toku_os_get_file_size_with_source_location(D, S, __FILE__, __LINE__) + +// TODO: should this prototype be moved to toku_os.h? +int toku_stat_with_source_location(const char *name, + toku_struct_stat *buf, + const toku_instr_key &instr_key, + const char *src_file, + uint src_line) + __attribute__((__visibility__("default"))); + +#define toku_stat(N, B, K) \ + toku_stat_with_source_location(N, B, K, __FILE__, __LINE__) + +int toku_os_fstat_with_source_location(int fd, + toku_struct_stat *buf, + const char *src_file, + uint src_line) + __attribute__((__visibility__("default"))); + +#define toku_os_fstat(FD, B) \ + toku_os_fstat_with_source_location(FD, B, __FILE__, __LINE__) + +#ifdef HAVE_PSI_FILE_INTERFACE2 +int inline_toku_os_close(int fd, const char *src_file, uint src_line); +int inline_toku_os_fclose(TOKU_FILE *stream, + const char *src_file, + uint src_line); +ssize_t inline_toku_os_read(int fd, + void *buf, + size_t count, + const char *src_file, + uint src_line); +ssize_t inline_toku_os_pread(int fd, + void *buf, + size_t count, + off_t offset, + const char *src_file, + uint src_line); +int inline_toku_os_fwrite(const void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream, + const char *src_file, + uint src_line); +int inline_toku_os_fread(void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream, + const char *src_file, + uint src_line); +int inline_toku_os_write(int fd, + const void *buf, + size_t len, + const char *src_file, + uint src_line); +ssize_t inline_toku_os_pwrite(int fd, + const void *buf, + size_t len, + toku_off_t off, + const char *src_file, + uint src_line); +void inline_toku_os_full_write(int fd, + const void *buf, + size_t len, + const char *src_file, + uint src_line); +void inline_toku_os_full_pwrite(int fd, + const void *buf, + size_t len, + toku_off_t off, + const char *src_file, + uint src_line); +int inline_toku_os_delete(const char *name, + const char *srv_file, + uint src_line); +//#else +int inline_toku_os_close(int fd); +int inline_toku_os_fclose(TOKU_FILE *stream); +ssize_t inline_toku_os_read(int fd, void *buf, size_t count); +ssize_t inline_toku_os_pread(int fd, void *buf, size_t count, off_t offset); +int inline_toku_os_fwrite(const void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream); +int inline_toku_os_fread(void *ptr, + size_t size, + size_t nmemb, + TOKU_FILE *stream); +int inline_toku_os_write(int fd, const void *buf, size_t len); +ssize_t inline_toku_os_pwrite(int fd, + const void *buf, + size_t len, + toku_off_t off); +void inline_toku_os_full_write(int fd, const void *buf, size_t len); +void inline_toku_os_full_pwrite(int fd, + const void *buf, + size_t len, + toku_off_t off); +int inline_toku_os_delete(const char *name); +#endif + // wrapper around fsync -void toku_file_fsync_without_accounting(int fd); void toku_file_fsync(int fd); int toku_fsync_directory(const char *fname); +void toku_file_fsync_without_accounting(int fd); // get the number of fsync calls and the fsync times (total) -void toku_get_fsync_times(uint64_t *fsync_count, uint64_t *fsync_time, uint64_t *long_fsync_threshold, uint64_t *long_fsync_count, uint64_t *long_fsync_time); +void toku_get_fsync_times(uint64_t *fsync_count, + uint64_t *fsync_time, + uint64_t *long_fsync_threshold, + uint64_t *long_fsync_count, + uint64_t *long_fsync_time); void toku_set_func_fsync (int (*fsync_function)(int)); void toku_set_func_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)); @@ -273,9 +546,11 @@ void toku_set_func_full_write (ssize_t (*)(int, const void *, size_t)); void toku_set_func_fdopen (FILE * (*)(int, const char *)); void toku_set_func_fopen (FILE * (*)(const char *, const char *)); void toku_set_func_open (int (*)(const char *, int, int)); -void toku_set_func_fclose(int (*)(FILE*)); +void toku_set_func_fclose(int (*)(FILE *)); void toku_set_func_read(ssize_t (*)(int, void *, size_t)); -void toku_set_func_pread (ssize_t (*)(int, void *, size_t, off_t)); +void toku_set_func_pread(ssize_t (*)(int, void *, size_t, off_t)); +void toku_set_func_fwrite( + size_t (*fwrite_fun)(const void *, size_t, size_t, FILE *)); int toku_portability_init(void); void toku_portability_destroy(void); @@ -285,28 +560,3 @@ void toku_portability_destroy(void); static inline uint64_t roundup_to_multiple(uint64_t alignment, uint64_t v) { return (v + alignment - 1) & ~(alignment - 1); } - -// Branch prediction macros. -// If supported by the compiler, will hint in inctruction caching for likely -// branching. Should only be used where there is a very good idea of the correct -// branch heuristics as determined by profiling. Mostly copied from InnoDB. -// Use: -// "if (FT_LIKELY(x))" where the chances of "x" evaluating true are higher -// "if (FT_UNLIKELY(x))" where the chances of "x" evaluating false are higher -#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER) - -// Tell the compiler that 'expr' probably evaluates to 'constant'. -#define FT_EXPECT(expr,constant) __builtin_expect(expr, constant) - -#else - -#warning "No FT branch prediction operations in use!" -#define FT_EXPECT(expr,constant) (expr) - -#endif // defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER) - -// Tell the compiler that cond is likely to hold -#define FT_LIKELY(cond) FT_EXPECT(cond, 1) - -// Tell the compiler that cond is unlikely to hold -#define FT_UNLIKELY(cond) FT_EXPECT(cond, 0) diff --git a/storage/tokudb/PerconaFT/portability/toku_pthread.h b/storage/tokudb/PerconaFT/portability/toku_pthread.h index 84c27736201..44de01244d2 100644 --- a/storage/tokudb/PerconaFT/portability/toku_pthread.h +++ b/storage/tokudb/PerconaFT/portability/toku_pthread.h @@ -42,31 +42,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include <time.h> #include <stdint.h> +#include "toku_portability.h" #include "toku_assert.h" +// TODO: some things moved toku_instrumentation.h, not necessarily the best +// place typedef pthread_attr_t toku_pthread_attr_t; typedef pthread_t toku_pthread_t; -typedef pthread_mutexattr_t toku_pthread_mutexattr_t; typedef pthread_mutex_t toku_pthread_mutex_t; typedef pthread_condattr_t toku_pthread_condattr_t; typedef pthread_cond_t toku_pthread_cond_t; -typedef pthread_rwlock_t toku_pthread_rwlock_t; -typedef pthread_rwlockattr_t toku_pthread_rwlockattr_t; +typedef pthread_rwlockattr_t toku_pthread_rwlockattr_t; typedef pthread_key_t toku_pthread_key_t; typedef struct timespec toku_timespec_t; -#ifndef TOKU_PTHREAD_DEBUG -# define TOKU_PTHREAD_DEBUG 0 -#endif +// TODO: break this include loop +#include <pthread.h> +typedef pthread_mutexattr_t toku_pthread_mutexattr_t; -typedef struct toku_mutex { +struct toku_mutex_t { pthread_mutex_t pmutex; + struct PSI_mutex + *psi_mutex; /* The performance schema instrumentation hook */ #if TOKU_PTHREAD_DEBUG - pthread_t owner; // = pthread_self(); // for debugging + pthread_t owner; // = pthread_self(); // for debugging bool locked; bool valid; + pfs_key_t instr_key_id; +#endif +}; + +struct toku_cond_t { + pthread_cond_t pcond; + struct PSI_cond *psi_cond; +#if TOKU_PTHREAD_DEBUG + pfs_key_t instr_key_id; +#endif +}; + +#ifdef TOKU_PTHREAD_DEBUG +#define TOKU_COND_INITIALIZER \ + { \ + .pcond = PTHREAD_COND_INITIALIZER, .psi_cond = nullptr, \ + .instr_key_id = 0 \ + } +#else +#define TOKU_COND_INITIALIZER \ + { .pcond = PTHREAD_COND_INITIALIZER, .psi_cond = nullptr } #endif -} toku_mutex_t; + +struct toku_pthread_rwlock_t { + pthread_rwlock_t rwlock; + struct PSI_rwlock *psi_rwlock; +#if TOKU_PTHREAD_DEBUG + pfs_key_t instr_key_id; +#endif +}; typedef struct toku_mutex_aligned { toku_mutex_t aligned_mutex __attribute__((__aligned__(64))); @@ -83,45 +114,68 @@ typedef struct toku_mutex_aligned { // In general it will be a lot of busy work to make this codebase compile // cleanly with -Wmissing-field-initializers -# define ZERO_MUTEX_INITIALIZER {} +#define ZERO_MUTEX_INITIALIZER \ + {} #if TOKU_PTHREAD_DEBUG -# define TOKU_MUTEX_INITIALIZER { .pmutex = PTHREAD_MUTEX_INITIALIZER, .owner = 0, .locked = false, .valid = true } +#define TOKU_MUTEX_INITIALIZER \ + { \ + .pmutex = PTHREAD_MUTEX_INITIALIZER, .psi_mutex = nullptr, .owner = 0, \ + .locked = false, .valid = true, .instr_key_id = 0 \ + } #else -# define TOKU_MUTEX_INITIALIZER { .pmutex = PTHREAD_MUTEX_INITIALIZER } +#define TOKU_MUTEX_INITIALIZER \ + { .pmutex = PTHREAD_MUTEX_INITIALIZER, .psi_mutex = nullptr } #endif // Darwin doesn't provide adaptive mutexes #if defined(__APPLE__) -# define TOKU_MUTEX_ADAPTIVE PTHREAD_MUTEX_DEFAULT -# if TOKU_PTHREAD_DEBUG -# define TOKU_ADAPTIVE_MUTEX_INITIALIZER { .pmutex = PTHREAD_MUTEX_INITIALIZER, .owner = 0, .locked = false, .valid = true } -# else -# define TOKU_ADAPTIVE_MUTEX_INITIALIZER { .pmutex = PTHREAD_MUTEX_INITIALIZER } -# endif -#else // __FreeBSD__, __linux__, at least -# define TOKU_MUTEX_ADAPTIVE PTHREAD_MUTEX_ADAPTIVE_NP -# if TOKU_PTHREAD_DEBUG -# define TOKU_ADAPTIVE_MUTEX_INITIALIZER { .pmutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, .owner = 0, .locked = false, .valid = true } -# else -# define TOKU_ADAPTIVE_MUTEX_INITIALIZER { .pmutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP } -# endif +#define TOKU_MUTEX_ADAPTIVE PTHREAD_MUTEX_DEFAULT +#if TOKU_PTHREAD_DEBUG +#define TOKU_ADAPTIVE_MUTEX_INITIALIZER \ + { \ + .pmutex = PTHREAD_MUTEX_INITIALIZER, .psi_mutex = nullptr, .owner = 0, \ + .locked = false, .valid = true, .instr_key_id = 0 \ + } +#else +#define TOKU_ADAPTIVE_MUTEX_INITIALIZER \ + { .pmutex = PTHREAD_MUTEX_INITIALIZER, .psi_mutex = nullptr } #endif - -static inline void -toku_mutex_init(toku_mutex_t *mutex, const toku_pthread_mutexattr_t *attr) { - int r = pthread_mutex_init(&mutex->pmutex, attr); - assert_zero(r); +#else // __FreeBSD__, __linux__, at least +#define TOKU_MUTEX_ADAPTIVE PTHREAD_MUTEX_ADAPTIVE_NP #if TOKU_PTHREAD_DEBUG - mutex->locked = false; - invariant(!mutex->valid); - mutex->valid = true; - mutex->owner = 0; +#define TOKU_ADAPTIVE_MUTEX_INITIALIZER \ + { \ + .pmutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, .psi_mutex = nullptr, \ + .owner = 0, .locked = false, .valid = true, .instr_key_id = 0 \ + } +#else +#define TOKU_ADAPTIVE_MUTEX_INITIALIZER \ + { .pmutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, .psi_mutex = nullptr } +#endif #endif -} -static inline void -toku_mutexattr_init(toku_pthread_mutexattr_t *attr) { +// Different OSes implement mutexes as different amounts of nested structs. +// C++ will fill out all missing values with zeroes if you provide at least one +// zero, but it needs the right amount of nesting. +#if defined(__FreeBSD__) +#define ZERO_COND_INITIALIZER \ + { 0 } +#elif defined(__APPLE__) +#define ZERO_COND_INITIALIZER \ + { \ + { 0 } \ + } +#else // __linux__, at least +#define ZERO_COND_INITIALIZER \ + { \ + { \ + { 0 } \ + } \ + } +#endif + +static inline void toku_mutexattr_init(toku_pthread_mutexattr_t *attr) { int r = pthread_mutexattr_init(attr); assert_zero(r); } @@ -138,61 +192,8 @@ toku_mutexattr_destroy(toku_pthread_mutexattr_t *attr) { assert_zero(r); } -static inline void -toku_mutex_destroy(toku_mutex_t *mutex) { #if TOKU_PTHREAD_DEBUG - invariant(mutex->valid); - mutex->valid = false; - invariant(!mutex->locked); -#endif - int r = pthread_mutex_destroy(&mutex->pmutex); - assert_zero(r); -} - -static inline void -toku_mutex_lock(toku_mutex_t *mutex) { - int r = pthread_mutex_lock(&mutex->pmutex); - assert_zero(r); -#if TOKU_PTHREAD_DEBUG - invariant(mutex->valid); - invariant(!mutex->locked); - invariant(mutex->owner == 0); - mutex->locked = true; - mutex->owner = pthread_self(); -#endif -} - -static inline int -toku_mutex_trylock(toku_mutex_t *mutex) { - int r = pthread_mutex_trylock(&mutex->pmutex); -#if TOKU_PTHREAD_DEBUG - if (r == 0) { - invariant(mutex->valid); - invariant(!mutex->locked); - invariant(mutex->owner == 0); - mutex->locked = true; - mutex->owner = pthread_self(); - } -#endif - return r; -} - -static inline void -toku_mutex_unlock(toku_mutex_t *mutex) { -#if TOKU_PTHREAD_DEBUG - invariant(mutex->owner == pthread_self()); - invariant(mutex->valid); - invariant(mutex->locked); - mutex->locked = false; - mutex->owner = 0; -#endif - int r = pthread_mutex_unlock(&mutex->pmutex); - assert_zero(r); -} - -#if TOKU_PTHREAD_DEBUG -static inline void -toku_mutex_assert_locked(const toku_mutex_t *mutex) { +static inline void toku_mutex_assert_locked(const toku_mutex_t *mutex) { invariant(mutex->locked); invariant(mutex->owner == pthread_self()); } @@ -217,40 +218,123 @@ toku_mutex_assert_unlocked(toku_mutex_t *mutex) { invariant(!mutex->locked); } #else -static inline void -toku_mutex_assert_unlocked(toku_mutex_t *mutex __attribute__((unused))) { +static inline void toku_mutex_assert_unlocked(toku_mutex_t *mutex + __attribute__((unused))) {} +#endif + +#define toku_mutex_lock(M) \ + toku_mutex_lock_with_source_location(M, __FILE__, __LINE__) + +static inline void toku_cond_init(toku_cond_t *cond, + const toku_pthread_condattr_t *attr) { + int r = pthread_cond_init(&cond->pcond, attr); + assert_zero(r); +} + +#define toku_mutex_trylock(M) \ + toku_mutex_trylock_with_source_location(M, __FILE__, __LINE__) + +inline void toku_mutex_unlock(toku_mutex_t *mutex) { +#if TOKU_PTHREAD_DEBUG + invariant(mutex->owner == pthread_self()); + invariant(mutex->valid); + invariant(mutex->locked); + mutex->locked = false; + mutex->owner = 0; +#endif + toku_instr_mutex_unlock(mutex->psi_mutex); + int r = pthread_mutex_unlock(&mutex->pmutex); + assert_zero(r); } + +inline void toku_mutex_lock_with_source_location(toku_mutex_t *mutex, + const char *src_file, + int src_line) { + + toku_mutex_instrumentation mutex_instr; + toku_instr_mutex_lock_start(mutex_instr, *mutex, src_file, src_line); + + const int r = pthread_mutex_lock(&mutex->pmutex); + toku_instr_mutex_lock_end(mutex_instr, r); + + assert_zero(r); +#if TOKU_PTHREAD_DEBUG + invariant(mutex->valid); + invariant(!mutex->locked); + invariant(mutex->owner == 0); + mutex->locked = true; + mutex->owner = pthread_self(); #endif +} -typedef struct toku_cond { - pthread_cond_t pcond; -} toku_cond_t; +inline int toku_mutex_trylock_with_source_location(toku_mutex_t *mutex, + const char *src_file, + int src_line) { -// Same considerations as for ZERO_MUTEX_INITIALIZER apply -#define ZERO_COND_INITIALIZER {} + toku_mutex_instrumentation mutex_instr; + toku_instr_mutex_trylock_start(mutex_instr, *mutex, src_file, src_line); -#define TOKU_COND_INITIALIZER {PTHREAD_COND_INITIALIZER} + const int r = pthread_mutex_lock(&mutex->pmutex); + toku_instr_mutex_lock_end(mutex_instr, r); -static inline void -toku_cond_init(toku_cond_t *cond, const toku_pthread_condattr_t *attr) { +#if TOKU_PTHREAD_DEBUG + if (r == 0) { + invariant(mutex->valid); + invariant(!mutex->locked); + invariant(mutex->owner == 0); + mutex->locked = true; + mutex->owner = pthread_self(); + } +#endif + return r; +} + +#define toku_cond_wait(C, M) \ + toku_cond_wait_with_source_location(C, M, __FILE__, __LINE__) + +#define toku_cond_timedwait(C, M, W) \ + toku_cond_timedwait_with_source_location(C, M, W, __FILE__, __LINE__) + +inline void toku_cond_init(const toku_instr_key &key, + toku_cond_t *cond, + const pthread_condattr_t *attr) { + toku_instr_cond_init(key, *cond); int r = pthread_cond_init(&cond->pcond, attr); assert_zero(r); } -static inline void -toku_cond_destroy(toku_cond_t *cond) { +inline void toku_cond_destroy(toku_cond_t *cond) { + toku_instr_cond_destroy(cond->psi_cond); int r = pthread_cond_destroy(&cond->pcond); assert_zero(r); } -static inline void -toku_cond_wait(toku_cond_t *cond, toku_mutex_t *mutex) { +inline void toku_cond_wait_with_source_location(toku_cond_t *cond, + toku_mutex_t *mutex, + const char *src_file, + uint src_line) { + #if TOKU_PTHREAD_DEBUG invariant(mutex->locked); mutex->locked = false; mutex->owner = 0; #endif - int r = pthread_cond_wait(&cond->pcond, &mutex->pmutex); + + /* Instrumentation start */ + toku_cond_instrumentation cond_instr; + toku_instr_cond_wait_start(cond_instr, + toku_instr_cond_op::cond_wait, + *cond, + *mutex, + src_file, + src_line); + + /* Instrumented code */ + const int r = pthread_cond_wait(&cond->pcond, &mutex->pmutex); + + /* Instrumentation end */ + toku_instr_cond_wait_end(cond_instr, r); + assert_zero(r); #if TOKU_PTHREAD_DEBUG invariant(!mutex->locked); @@ -259,14 +343,33 @@ toku_cond_wait(toku_cond_t *cond, toku_mutex_t *mutex) { #endif } -static inline int -toku_cond_timedwait(toku_cond_t *cond, toku_mutex_t *mutex, toku_timespec_t *wakeup_at) { +inline int toku_cond_timedwait_with_source_location(toku_cond_t *cond, + toku_mutex_t *mutex, + toku_timespec_t *wakeup_at, + const char *src_file, + uint src_line) { #if TOKU_PTHREAD_DEBUG invariant(mutex->locked); mutex->locked = false; mutex->owner = 0; #endif - int r = pthread_cond_timedwait(&cond->pcond, &mutex->pmutex, wakeup_at); + + /* Instrumentation start */ + toku_cond_instrumentation cond_instr; + toku_instr_cond_wait_start(cond_instr, + toku_instr_cond_op::cond_timedwait, + *cond, + *mutex, + src_file, + src_line); + + /* Instrumented code */ + const int r = pthread_cond_timedwait( + &cond->pcond, &mutex->pmutex, wakeup_at); + + /* Instrumentation end */ + toku_instr_cond_wait_end(cond_instr, r); + #if TOKU_PTHREAD_DEBUG invariant(!mutex->locked); mutex->locked = true; @@ -275,69 +378,116 @@ toku_cond_timedwait(toku_cond_t *cond, toku_mutex_t *mutex, toku_timespec_t *wak return r; } -static inline void -toku_cond_signal(toku_cond_t *cond) { - int r = pthread_cond_signal(&cond->pcond); +inline void toku_cond_signal(toku_cond_t *cond) { + toku_instr_cond_signal(*cond); + const int r = pthread_cond_signal(&cond->pcond); assert_zero(r); } -static inline void -toku_cond_broadcast(toku_cond_t *cond) { - int r =pthread_cond_broadcast(&cond->pcond); +inline void toku_cond_broadcast(toku_cond_t *cond) { + toku_instr_cond_broadcast(*cond); + const int r = pthread_cond_broadcast(&cond->pcond); assert_zero(r); } -int -toku_pthread_yield(void) __attribute__((__visibility__("default"))); - -static inline toku_pthread_t -toku_pthread_self(void) { - return pthread_self(); +inline void toku_mutex_init(const toku_instr_key &key, + toku_mutex_t *mutex, + const toku_pthread_mutexattr_t *attr) { +#if TOKU_PTHREAD_DEBUG + mutex->valid = true; +#endif + toku_instr_mutex_init(key, *mutex); + const int r = pthread_mutex_init(&mutex->pmutex, attr); + assert_zero(r); +#if TOKU_PTHREAD_DEBUG + mutex->locked = false; + invariant(mutex->valid); + mutex->valid = true; + mutex->owner = 0; +#endif } -static inline void -toku_pthread_rwlock_init(toku_pthread_rwlock_t *__restrict rwlock, const toku_pthread_rwlockattr_t *__restrict attr) { - int r = pthread_rwlock_init(rwlock, attr); +inline void toku_mutex_destroy(toku_mutex_t *mutex) { +#if TOKU_PTHREAD_DEBUG + invariant(mutex->valid); + mutex->valid = false; + invariant(!mutex->locked); +#endif + toku_instr_mutex_destroy(mutex->psi_mutex); + int r = pthread_mutex_destroy(&mutex->pmutex); assert_zero(r); } -static inline void -toku_pthread_rwlock_destroy(toku_pthread_rwlock_t *rwlock) { - int r = pthread_rwlock_destroy(rwlock); +#define toku_pthread_rwlock_rdlock(RW) \ + toku_pthread_rwlock_rdlock_with_source_location(RW, __FILE__, __LINE__) + +#define toku_pthread_rwlock_wrlock(RW) \ + toku_pthread_rwlock_wrlock_with_source_location(RW, __FILE__, __LINE__) + +inline void toku_pthread_rwlock_init( + const toku_instr_key &key, + toku_pthread_rwlock_t *__restrict rwlock, + const toku_pthread_rwlockattr_t *__restrict attr) { + toku_instr_rwlock_init(key, *rwlock); + int r = pthread_rwlock_init(&rwlock->rwlock, attr); assert_zero(r); } -static inline void -toku_pthread_rwlock_rdlock(toku_pthread_rwlock_t *rwlock) { - int r = pthread_rwlock_rdlock(rwlock); +inline void toku_pthread_rwlock_destroy(toku_pthread_rwlock_t *rwlock) { + toku_instr_rwlock_destroy(rwlock->psi_rwlock); + int r = pthread_rwlock_destroy(&rwlock->rwlock); assert_zero(r); } -static inline void -toku_pthread_rwlock_rdunlock(toku_pthread_rwlock_t *rwlock) { - int r = pthread_rwlock_unlock(rwlock); +inline void toku_pthread_rwlock_rdlock_with_source_location( + toku_pthread_rwlock_t *rwlock, + const char *src_file, + uint src_line) { + + /* Instrumentation start */ + toku_rwlock_instrumentation rwlock_instr; + toku_instr_rwlock_rdlock_wait_start( + rwlock_instr, *rwlock, src_file, src_line); + /* Instrumented code */ + const int r = pthread_rwlock_rdlock(&rwlock->rwlock); + + /* Instrumentation end */ + toku_instr_rwlock_rdlock_wait_end(rwlock_instr, r); + assert_zero(r); } -static inline void -toku_pthread_rwlock_wrlock(toku_pthread_rwlock_t *rwlock) { - int r = pthread_rwlock_wrlock(rwlock); +inline void toku_pthread_rwlock_wrlock_with_source_location( + toku_pthread_rwlock_t *rwlock, + const char *src_file, + uint src_line) { + + /* Instrumentation start */ + toku_rwlock_instrumentation rwlock_instr; + toku_instr_rwlock_wrlock_wait_start( + rwlock_instr, *rwlock, src_file, src_line); + /* Instrumented code */ + const int r = pthread_rwlock_wrlock(&rwlock->rwlock); + + /* Instrumentation end */ + toku_instr_rwlock_wrlock_wait_end(rwlock_instr, r); + assert_zero(r); } -static inline void -toku_pthread_rwlock_wrunlock(toku_pthread_rwlock_t *rwlock) { - int r = pthread_rwlock_unlock(rwlock); +inline void toku_pthread_rwlock_rdunlock(toku_pthread_rwlock_t *rwlock) { + toku_instr_rwlock_unlock(*rwlock); + const int r = pthread_rwlock_unlock(&rwlock->rwlock); assert_zero(r); } -static inline int -toku_pthread_create(toku_pthread_t *thread, const toku_pthread_attr_t *attr, void *(*start_function)(void *), void *arg) { - return pthread_create(thread, attr, start_function, arg); +inline void toku_pthread_rwlock_wrunlock(toku_pthread_rwlock_t *rwlock) { + toku_instr_rwlock_unlock(*rwlock); + const int r = pthread_rwlock_unlock(&rwlock->rwlock); + assert_zero(r); } -static inline int -toku_pthread_join(toku_pthread_t thread, void **value_ptr) { +static inline int toku_pthread_join(toku_pthread_t thread, void **value_ptr) { return pthread_join(thread, value_ptr); } @@ -361,7 +511,15 @@ toku_pthread_getspecific(toku_pthread_key_t key) { return pthread_getspecific(key); } -static inline int -toku_pthread_setspecific(toku_pthread_key_t key, void *data) { +static inline int toku_pthread_setspecific(toku_pthread_key_t key, void *data) { return pthread_setspecific(key, data); } + +int toku_pthread_yield(void) __attribute__((__visibility__("default"))); + +static inline toku_pthread_t toku_pthread_self(void) { return pthread_self(); } + +static inline void *toku_pthread_done(void *exit_value) { + toku_instr_delete_current_thread(); + pthread_exit(exit_value); +} diff --git a/storage/tokudb/PerconaFT/portability/toku_race_tools.h b/storage/tokudb/PerconaFT/portability/toku_race_tools.h index 8482a164fb8..9ed46ec909d 100644 --- a/storage/tokudb/PerconaFT/portability/toku_race_tools.h +++ b/storage/tokudb/PerconaFT/portability/toku_race_tools.h @@ -40,6 +40,11 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include <portability/toku_config.h> +#ifdef HAVE_valgrind +#undef USE_VALGRIND +#define USE_VALGRIND 1 +#endif + #if defined(__linux__) && USE_VALGRIND # include <valgrind/helgrind.h> diff --git a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc index 4c7f5336161..cc86402751b 100644 --- a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc +++ b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc @@ -571,6 +571,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi oldest_referenced_xid_estimate, true); toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids, &gc_info); + toku_ft_adjust_logical_row_count(db_struct_i(hotdb)->ft_handle->ft, -1); } } return result; @@ -616,6 +617,7 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho oldest_referenced_xid_estimate, true); toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT, &gc_info); + toku_ft_adjust_logical_row_count(db_struct_i(hotdb)->ft_handle->ft, 1); } } return result; diff --git a/storage/tokudb/PerconaFT/src/indexer.cc b/storage/tokudb/PerconaFT/src/indexer.cc index b475b08beb5..044ffac917c 100644 --- a/storage/tokudb/PerconaFT/src/indexer.cc +++ b/storage/tokudb/PerconaFT/src/indexer.cc @@ -253,16 +253,21 @@ toku_indexer_create_indexer(DB_ENV *env, indexer->set_error_callback = toku_indexer_set_error_callback; indexer->set_poll_function = toku_indexer_set_poll_function; indexer->build = build_index; - indexer->close = close_indexer; - indexer->abort = abort_indexer; - - toku_mutex_init(&indexer->i->indexer_lock, NULL); - toku_mutex_init(&indexer->i->indexer_estimate_lock, NULL); + indexer->close = close_indexer; + indexer->abort = abort_indexer; + + toku_mutex_init( + *indexer_i_indexer_lock_mutex_key, &indexer->i->indexer_lock, nullptr); + toku_mutex_init(*indexer_i_indexer_estimate_lock_mutex_key, + &indexer->i->indexer_estimate_lock, + nullptr); toku_init_dbt(&indexer->i->position_estimate); // - // create and close a dummy loader to get redirection going for the hot indexer - // This way, if the hot index aborts, but other transactions have references to the + // create and close a dummy loader to get redirection going for the hot + // indexer + // This way, if the hot index aborts, but other transactions have references + // to the // underlying FT, then those transactions can do dummy operations on the FT // while the DB gets redirected back to an empty dictionary // diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-first-empty.cc b/storage/tokudb/PerconaFT/src/tests/blocking-first-empty.cc index d9d865254f1..3b53911e614 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-first-empty.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-first-empty.cc @@ -99,12 +99,14 @@ static void *blocking_first_thread(void *arg) { static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime) { int r; toku_pthread_t tids[nthreads]; - struct blocking_first_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_first_thread, &a); assert(r == 0); + struct blocking_first_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, blocking_first_thread, &a); + assert(r == 0); } blocking_first(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-first.cc b/storage/tokudb/PerconaFT/src/tests/blocking-first.cc index 9f3f8f4eb6a..c104eabd6c3 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-first.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-first.cc @@ -116,12 +116,14 @@ static void *blocking_first_thread(void *arg) { static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime) { int r; toku_pthread_t tids[nthreads]; - struct blocking_first_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_first_thread, &a); assert(r == 0); + struct blocking_first_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, blocking_first_thread, &a); + assert(r == 0); } blocking_first(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-last.cc b/storage/tokudb/PerconaFT/src/tests/blocking-last.cc index a62d83eb46f..3a702e3f115 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-last.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-last.cc @@ -116,12 +116,14 @@ static void *blocking_last_thread(void *arg) { static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime) { int r; toku_pthread_t tids[nthreads]; - struct blocking_last_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_last_thread, &a); assert(r == 0); + struct blocking_last_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, blocking_last_thread, &a); + assert(r == 0); } blocking_last(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-next-prev-deadlock.cc b/storage/tokudb/PerconaFT/src/tests/blocking-next-prev-deadlock.cc index bd5697ba526..781708cc627 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-next-prev-deadlock.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-next-prev-deadlock.cc @@ -181,12 +181,14 @@ static void *blocking_next_thread(void *arg) { static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime) { int r; toku_pthread_t tids[nthreads]; - struct blocking_next_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_next_thread, &a); assert(r == 0); + struct blocking_next_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, blocking_next_thread, &a); + assert(r == 0); } blocking_prev(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-next-prev.cc b/storage/tokudb/PerconaFT/src/tests/blocking-next-prev.cc index 729473bb7cf..aa179e88d76 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-next-prev.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-next-prev.cc @@ -188,12 +188,14 @@ static void *blocking_next_thread(void *arg) { static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime) { int r; toku_pthread_t tids[nthreads]; - struct blocking_next_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_next_thread, &a); assert(r == 0); + struct blocking_next_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, blocking_next_thread, &a); + assert(r == 0); } blocking_prev(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-prelock-range.cc b/storage/tokudb/PerconaFT/src/tests/blocking-prelock-range.cc index 8821b39aa8a..38875a2f8ec 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-prelock-range.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-prelock-range.cc @@ -140,12 +140,17 @@ int test_main(int argc, char * const argv[]) { r = db->open(db, NULL, db_filename, NULL, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT|DB_THREAD, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); toku_pthread_t tids[nthreads]; - struct blocking_range_lock_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_range_lock_thread, &a); assert(r == 0); + struct blocking_range_lock_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create(toku_uninstrumented, + &tids[i], + nullptr, + blocking_range_lock_thread, + &a); + assert(r == 0); } blocking_range_lock(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-put-timeout.cc b/storage/tokudb/PerconaFT/src/tests/blocking-put-timeout.cc index a31e173fd83..bf4a8a24f47 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-put-timeout.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-put-timeout.cc @@ -57,8 +57,8 @@ struct test_seq { static void test_seq_init(struct test_seq *seq) { seq->state = 0; - toku_mutex_init(&seq->lock, NULL); - toku_cond_init(&seq->cv, NULL); + toku_mutex_init(toku_uninstrumented, &seq->lock, nullptr); + toku_cond_init(toku_uninstrumented, &seq->cv, nullptr); } static void test_seq_destroy(struct test_seq *seq) { @@ -172,13 +172,18 @@ int test_main(int argc, char * const argv[]) { r = db->open(db, NULL, db_filename, NULL, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT|DB_THREAD, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); // run test - struct test_seq seq; ZERO_STRUCT(seq); test_seq_init(&seq); + struct test_seq seq; + ZERO_STRUCT(seq); + test_seq_init(&seq); toku_pthread_t t_a_id; - struct t_a_args t_a_args = { db_env, db, &seq }; - r = toku_pthread_create(&t_a_id, NULL, t_a_thread, &t_a_args); assert(r == 0); + struct t_a_args t_a_args = {db_env, db, &seq}; + r = toku_pthread_create( + toku_uninstrumented, &t_a_id, nullptr, t_a_thread, &t_a_args); + assert(r == 0); t_b(db_env, db, &seq); void *ret; - r = toku_pthread_join(t_a_id, &ret); assert(r == 0); + r = toku_pthread_join(t_a_id, &ret); + assert(r == 0); test_seq_destroy(&seq); // close env diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-put-wakeup.cc b/storage/tokudb/PerconaFT/src/tests/blocking-put-wakeup.cc index 615cdb5d17c..dba7962de35 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-put-wakeup.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-put-wakeup.cc @@ -58,8 +58,8 @@ struct test_seq { static void test_seq_init(struct test_seq *seq) { seq->state = 0; - toku_mutex_init(&seq->lock, NULL); - toku_cond_init(&seq->cv, NULL); + toku_mutex_init(toku_uninstrumented, &seq->lock, nullptr); + toku_cond_init(toku_uninstrumented, &seq->cv, nullptr); } static void test_seq_destroy(struct test_seq *seq) { @@ -167,13 +167,18 @@ int test_main(int argc, char * const argv[]) { r = db->open(db, NULL, db_filename, NULL, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT|DB_THREAD, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); // run test - struct test_seq seq; ZERO_STRUCT(seq); test_seq_init(&seq); + struct test_seq seq; + ZERO_STRUCT(seq); + test_seq_init(&seq); toku_pthread_t t_a_id; - struct t_a_args t_a_args = { db_env, db, &seq }; - r = toku_pthread_create(&t_a_id, NULL, t_a_thread, &t_a_args); assert(r == 0); + struct t_a_args t_a_args = {db_env, db, &seq}; + r = toku_pthread_create( + toku_uninstrumented, &t_a_id, nullptr, t_a_thread, &t_a_args); + assert(r == 0); t_b(db_env, db, &seq); void *ret; - r = toku_pthread_join(t_a_id, &ret); assert(r == 0); + r = toku_pthread_join(t_a_id, &ret); + assert(r == 0); test_seq_destroy(&seq); // close env diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-put.cc b/storage/tokudb/PerconaFT/src/tests/blocking-put.cc index 0fa56325007..95481062483 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-put.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-put.cc @@ -139,12 +139,14 @@ int test_main(int argc, char * const argv[]) { r = db->open(db, NULL, db_filename, NULL, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT|DB_THREAD, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); toku_pthread_t tids[nthreads]; - struct blocking_put_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_put_thread, &a); assert(r == 0); + struct blocking_put_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, blocking_put_thread, &a); + assert(r == 0); } blocking_put(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-set-range-0.cc b/storage/tokudb/PerconaFT/src/tests/blocking-set-range-0.cc index a0e03ab3293..7c2e7b0fb26 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-set-range-0.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-set-range-0.cc @@ -126,12 +126,15 @@ static void *blocking_set_range_thread(void *arg) { static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime, uint64_t the_key) { int r; toku_pthread_t tids[nthreads]; - struct blocking_set_range_args a = { db_env, db, nrows, sleeptime, the_key }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_set_range_thread, &a); assert(r == 0); + struct blocking_set_range_args a = {db_env, db, nrows, sleeptime, the_key}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, + blocking_set_range_thread, &a); + assert(r == 0); } blocking_set_range(db_env, db, nrows, sleeptime, the_key); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-set-range-n.cc b/storage/tokudb/PerconaFT/src/tests/blocking-set-range-n.cc index 056d74dbdd2..54a5846eb5e 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-set-range-n.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-set-range-n.cc @@ -121,12 +121,15 @@ static void *blocking_set_range_thread(void *arg) { static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime, uint64_t the_key) { int r; toku_pthread_t tids[nthreads]; - struct blocking_set_range_args a = { db_env, db, nrows, sleeptime, the_key }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_set_range_thread, &a); assert(r == 0); + struct blocking_set_range_args a = {db_env, db, nrows, sleeptime, the_key}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, + blocking_set_range_thread, &a); + assert(r == 0); } blocking_set_range(db_env, db, nrows, sleeptime, the_key); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-set-range-reverse-0.cc b/storage/tokudb/PerconaFT/src/tests/blocking-set-range-reverse-0.cc index 1e7fca85bff..59e582547bf 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-set-range-reverse-0.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-set-range-reverse-0.cc @@ -126,12 +126,15 @@ static void *blocking_set_range_thread(void *arg) { static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime, uint64_t the_key) { int r; toku_pthread_t tids[nthreads]; - struct blocking_set_range_args a = { db_env, db, nrows, sleeptime, the_key }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_set_range_thread, &a); assert(r == 0); + struct blocking_set_range_args a = {db_env, db, nrows, sleeptime, the_key}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, + blocking_set_range_thread, &a); + assert(r == 0); } blocking_set_range(db_env, db, nrows, sleeptime, the_key); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-set.cc b/storage/tokudb/PerconaFT/src/tests/blocking-set.cc index ba64989842a..aa433c87be1 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-set.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-set.cc @@ -182,12 +182,14 @@ int test_main(int argc, char * const argv[]) { populate(db_env, db, nrows); toku_pthread_t tids[nthreads]; - struct blocking_set_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_set_thread, &a); assert(r == 0); + struct blocking_set_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, blocking_set_thread, &a); + assert(r == 0); } blocking_set(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/blocking-table-lock.cc b/storage/tokudb/PerconaFT/src/tests/blocking-table-lock.cc index 9ae33c56621..0953a4e80de 100644 --- a/storage/tokudb/PerconaFT/src/tests/blocking-table-lock.cc +++ b/storage/tokudb/PerconaFT/src/tests/blocking-table-lock.cc @@ -133,12 +133,17 @@ int test_main(int argc, char * const argv[]) { r = db->open(db, NULL, db_filename, NULL, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT|DB_THREAD, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); toku_pthread_t tids[nthreads]; - struct blocking_table_lock_args a = { db_env, db, nrows, sleeptime }; - for (int i = 0; i < nthreads-1; i++) { - r = toku_pthread_create(&tids[i], NULL, blocking_table_lock_thread, &a); assert(r == 0); + struct blocking_table_lock_args a = {db_env, db, nrows, sleeptime}; + for (int i = 0; i < nthreads - 1; i++) { + r = toku_pthread_create(toku_uninstrumented, + &tids[i], + nullptr, + blocking_table_lock_thread, + &a); + assert(r == 0); } blocking_table_lock(db_env, db, nrows, sleeptime); - for (int i = 0; i < nthreads-1; i++) { + for (int i = 0; i < nthreads - 1; i++) { void *ret; r = toku_pthread_join(tids[i], &ret); assert(r == 0); } diff --git a/storage/tokudb/PerconaFT/src/tests/checkpoint_fairness.cc b/storage/tokudb/PerconaFT/src/tests/checkpoint_fairness.cc index be82f8c352f..f62dfbc76d3 100644 --- a/storage/tokudb/PerconaFT/src/tests/checkpoint_fairness.cc +++ b/storage/tokudb/PerconaFT/src/tests/checkpoint_fairness.cc @@ -113,10 +113,14 @@ int test_main(int argc, char * const argv[]) { { int chk_r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT, 0666); CKERR(chk_r); } pthread_t thds[n_threads]; - int ids[n_threads]; - for (int i=0; i<n_threads; i++) { - ids[i]=i; - { int chk_r = toku_pthread_create(&thds[i], NULL, start_txns, &ids[i]); CKERR(chk_r); } + int ids[n_threads]; + for (int i = 0; i < n_threads; i++) { + ids[i] = i; + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &thds[i], nullptr, start_txns, &ids[i]); + CKERR(chk_r); + } } start_checkpoints(); diff --git a/storage/tokudb/PerconaFT/src/tests/checkpoint_stress.cc b/storage/tokudb/PerconaFT/src/tests/checkpoint_stress.cc index 221471a8045..135a9843ce4 100644 --- a/storage/tokudb/PerconaFT/src/tests/checkpoint_stress.cc +++ b/storage/tokudb/PerconaFT/src/tests/checkpoint_stress.cc @@ -271,24 +271,29 @@ run_test (int iter, int die) { } // take checkpoint (all dictionaries) - snapshot(NULL, 1); + snapshot(NULL, 1); if (die) { - // separate thread will perform random acts on other dictionaries (not 0) - int r = toku_pthread_create(&thread, 0, random_acts, (void *) dictionaries); - CKERR(r); - // this thead will scribble over dictionary 0 before crash to verify that - // post-checkpoint inserts are not in the database - DB* db = dictionaries[0].db; - if (iter & 1) - scribble(db, iter); - else - thin_out(db, iter); - uint32_t delay = myrandom(); - delay &= 0xFFF; // select lower 12 bits, shifted up 8 for random number ... - delay = delay << 8; // ... uniformly distributed between 0 and 1M ... - usleep(delay); // ... to sleep up to one second (1M usec) - drop_dead(); + // separate thread will perform random acts on other dictionaries (not + // 0) + int r = toku_pthread_create( + toku_uninstrumented, &thread, nullptr, + random_acts, static_cast<void*>(dictionaries)); + CKERR(r); + // this thead will scribble over dictionary 0 before crash to verify + // that + // post-checkpoint inserts are not in the database + DB* db = dictionaries[0].db; + if (iter & 1) + scribble(db, iter); + else + thin_out(db, iter); + uint32_t delay = myrandom(); + delay &= + 0xFFF; // select lower 12 bits, shifted up 8 for random number ... + delay = delay << 8; // ... uniformly distributed between 0 and 1M ... + usleep(delay); // ... to sleep up to one second (1M usec) + drop_dead(); } else { for (i = 0; i < NUM_DICTIONARIES; i++) { diff --git a/storage/tokudb/PerconaFT/src/tests/db-put-simple-deadlock-threads.cc b/storage/tokudb/PerconaFT/src/tests/db-put-simple-deadlock-threads.cc index ada46bd7516..e5bcc6af7bb 100644 --- a/storage/tokudb/PerconaFT/src/tests/db-put-simple-deadlock-threads.cc +++ b/storage/tokudb/PerconaFT/src/tests/db-put-simple-deadlock-threads.cc @@ -57,8 +57,8 @@ struct test_seq { static void test_seq_init(struct test_seq *seq) { seq->state = 0; - toku_mutex_init(&seq->lock, NULL); - toku_cond_init(&seq->cv, NULL); + toku_mutex_init(toku_uninstrumented, &seq->lock, nullptr); + toku_cond_init(toku_uninstrumented, &seq->cv, nullptr); } static void test_seq_destroy(struct test_seq *seq) { @@ -146,8 +146,9 @@ static void simple_deadlock(DB_ENV *db_env, DB *db, int do_txn, int n) { struct test_seq test_seq; ZERO_STRUCT(test_seq); test_seq_init(&test_seq); toku_pthread_t tid; - struct run_txn_b_arg arg = { &test_seq, txn_b, db, n}; - r = toku_pthread_create(&tid, NULL, run_txn_b, &arg); + struct run_txn_b_arg arg = {&test_seq, txn_b, db, n}; + r = toku_pthread_create( + toku_uninstrumented, &tid, nullptr, run_txn_b, &arg); test_seq_sleep(&test_seq, 0); insert_row(db, txn_a, htonl(0), 0, 0); diff --git a/storage/tokudb/PerconaFT/src/tests/db-put-simple-lockwait.cc b/storage/tokudb/PerconaFT/src/tests/db-put-simple-lockwait.cc index 365ea39eced..0fbe851532e 100644 --- a/storage/tokudb/PerconaFT/src/tests/db-put-simple-lockwait.cc +++ b/storage/tokudb/PerconaFT/src/tests/db-put-simple-lockwait.cc @@ -95,10 +95,11 @@ static void simple_lockwait(DB_ENV *db_env, DB *db, int do_txn, int nrows, int n insert_row(db, txns[0], htonl(0), 0, 0); toku_pthread_t tids[ntxns]; - for (int i = 1 ; i < ntxns; i++) { + for (int i = 1; i < ntxns; i++) { struct insert_one_arg *XMALLOC(arg); - *arg = (struct insert_one_arg) { txns[i], db}; - r = toku_pthread_create(&tids[i], NULL, insert_one, arg); + *arg = (struct insert_one_arg){txns[i], db}; + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, insert_one, arg); } sleep(10); diff --git a/storage/tokudb/PerconaFT/src/tests/db-put-update-deadlock.cc b/storage/tokudb/PerconaFT/src/tests/db-put-update-deadlock.cc index 3660f11d1bd..7a968058d7f 100644 --- a/storage/tokudb/PerconaFT/src/tests/db-put-update-deadlock.cc +++ b/storage/tokudb/PerconaFT/src/tests/db-put-update-deadlock.cc @@ -128,10 +128,12 @@ static void update_deadlock(DB_ENV *db_env, DB *db, int do_txn, int nrows, int n // get write locks toku_pthread_t tids[ntxns]; - for (int i = 0 ; i < ntxns; i++) { + for (int i = 0; i < ntxns; i++) { struct write_one_arg *XMALLOC(arg); - *arg = (struct write_one_arg) { txns[i], db, (int) htonl((i + 1) % ntxns), 0}; - r = toku_pthread_create(&tids[i], NULL, write_one_f, arg); + *arg = + (struct write_one_arg){txns[i], db, (int)htonl((i + 1) % ntxns), 0}; + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, write_one_f, arg); } #else // get read locks @@ -141,10 +143,11 @@ static void update_deadlock(DB_ENV *db_env, DB *db, int do_txn, int nrows, int n // get write locks toku_pthread_t tids[ntxns]; - for (int i = 0 ; i < ntxns; i++) { + for (int i = 0; i < ntxns; i++) { struct write_one_arg *XMALLOC(arg); - *arg = (struct write_one_arg) { txns[i], db, (int) htonl(0), 0}; - r = toku_pthread_create(&tids[i], NULL, write_one_f, arg); + *arg = (struct write_one_arg){txns[i], db, (int)htonl(0), 0}; + r = toku_pthread_create( + toku_uninstrumented, &tids[i], nullptr, write_one_f, arg); } #endif diff --git a/storage/tokudb/PerconaFT/src/tests/filesize.cc b/storage/tokudb/PerconaFT/src/tests/filesize.cc index 6ab22245e68..9dfeea131c6 100644 --- a/storage/tokudb/PerconaFT/src/tests/filesize.cc +++ b/storage/tokudb/PerconaFT/src/tests/filesize.cc @@ -170,11 +170,9 @@ get_file_pathname(void) { if (verbose) printf("path = %s\n", path); } - -static int -getsizeM(void) { +static int getsizeM(void) { toku_struct_stat buf; - int r = toku_stat(path, &buf); + int r = toku_stat(path, &buf, toku_uninstrumented); CKERR(r); int sizeM = (int)buf.st_size >> 20; check_fragmentation(); diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-bw.cc b/storage/tokudb/PerconaFT/src/tests/hotindexer-bw.cc index 82471f30e45..5336bc3329f 100644 --- a/storage/tokudb/PerconaFT/src/tests/hotindexer-bw.cc +++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-bw.cc @@ -302,12 +302,19 @@ static void test_indexer(DB *src, DB **dbs) CKERR(r); toku_mutex_unlock(&put_lock); - // start threads doing additional inserts - no lock issues since indexer already created - r = toku_pthread_create(&client_threads[0], 0, client, (void *)&client_specs[0]); CKERR(r); -// r = toku_pthread_create(&client_threads[1], 0, client, (void *)&client_specs[1]); CKERR(r); + // start threads doing additional inserts - no lock issues since indexer + // already created + r = toku_pthread_create(toku_uninstrumented, + &client_threads[0], + nullptr, + client, + static_cast<void *>(&client_specs[0])); + CKERR(r); + // r = toku_pthread_create(toku_uninstrumented, &client_threads[1], 0, + // client, (void *)&client_specs[1]); CKERR(r); struct timeval start, now; - if ( verbose ) { + if (verbose) { printf("test_indexer build\n"); gettimeofday(&start,0); } @@ -342,14 +349,13 @@ static void test_indexer(DB *src, DB **dbs) if ( verbose ) printf("test_indexer done\n"); } - -static void run_test(void) -{ +static void run_test(void) { int r; - toku_mutex_init(&put_lock, NULL); + toku_mutex_init(toku_uninstrumented, &put_lock, nullptr); toku_os_recursive_delete(TOKU_TEST_FILENAME); - r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); - char logname[TOKU_PATH_MAX+1]; + r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU + S_IRWXG + S_IRWXO); + CKERR(r); + char logname[TOKU_PATH_MAX + 1]; r = toku_os_mkdir(toku_path_join(logname, 2, TOKU_TEST_FILENAME, "log"), S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_env_create(&env, 0); CKERR(r); diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-multiclient.cc b/storage/tokudb/PerconaFT/src/tests/hotindexer-multiclient.cc index eefc621f80a..004a19eccec 100644 --- a/storage/tokudb/PerconaFT/src/tests/hotindexer-multiclient.cc +++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-multiclient.cc @@ -303,13 +303,25 @@ static void test_indexer(DB *src, DB **dbs) r = indexer->set_poll_function(indexer, poll_print, NULL); CKERR(r); - // start threads doing additional inserts - no lock issues since indexer already created - r = toku_pthread_create(&client_threads[0], 0, client, (void *)&client_specs[0]); CKERR(r); - r = toku_pthread_create(&client_threads[1], 0, client, (void *)&client_specs[1]); CKERR(r); -// r = toku_pthread_create(&client_threads[2], 0, client, (void *)&client_specs[2]); CKERR(r); + // start threads doing additional inserts - no lock issues since indexer + // already created + r = toku_pthread_create(toku_uninstrumented, + &client_threads[0], + nullptr, + client, + static_cast<void *>(&client_specs[0])); + CKERR(r); + r = toku_pthread_create(toku_uninstrumented, + &client_threads[1], + nullptr, + client, + static_cast<void *>(&client_specs[1])); + CKERR(r); + // r = toku_pthread_create(toku_uninstrumented, &client_threads[2], 0, + // client, (void *)&client_specs[2]); CKERR(r); struct timeval start, now; - if ( verbose ) { + if (verbose) { printf("test_indexer build\n"); gettimeofday(&start,0); } diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-put-abort.cc b/storage/tokudb/PerconaFT/src/tests/hotindexer-put-abort.cc index 27501291b15..f4dd9c53ed5 100644 --- a/storage/tokudb/PerconaFT/src/tests/hotindexer-put-abort.cc +++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-put-abort.cc @@ -142,11 +142,14 @@ run_test(void) { } // run the indexer - struct indexer_arg indexer_arg = { env, src_db, 1, &dest_db }; + struct indexer_arg indexer_arg = {env, src_db, 1, &dest_db}; toku_pthread_t pid; - r = toku_pthread_create(&pid, NULL, indexer_thread, &indexer_arg); assert_zero(r); + r = toku_pthread_create( + toku_uninstrumented, &pid, nullptr, indexer_thread, &indexer_arg); + assert_zero(r); - r = txn->abort(txn); assert_zero(r); + r = txn->abort(txn); + assert_zero(r); void *ret; r = toku_pthread_join(pid, &ret); assert_zero(r); diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-put-commit.cc b/storage/tokudb/PerconaFT/src/tests/hotindexer-put-commit.cc index 59b40037125..99c9bf301cc 100644 --- a/storage/tokudb/PerconaFT/src/tests/hotindexer-put-commit.cc +++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-put-commit.cc @@ -166,12 +166,16 @@ run_test(void) { } // run the indexer - struct indexer_arg indexer_arg = { env, src_db, 1, &dest_db }; + struct indexer_arg indexer_arg = {env, src_db, 1, &dest_db}; toku_pthread_t pid; - r = toku_pthread_create(&pid, NULL, indexer_thread, &indexer_arg); assert_zero(r); - - if (verbose) fprintf(stderr, "commit start\n"); - r = txn->commit(txn, 0); assert_zero(r); + r = toku_pthread_create( + toku_uninstrumented, &pid, nullptr, indexer_thread, &indexer_arg); + assert_zero(r); + + if (verbose) + fprintf(stderr, "commit start\n"); + r = txn->commit(txn, 0); + assert_zero(r); if (verbose) fprintf(stderr, "commit end\n"); void *ret; diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-with-queries.cc b/storage/tokudb/PerconaFT/src/tests/hotindexer-with-queries.cc index a0be49c1617..2bc60142f3c 100644 --- a/storage/tokudb/PerconaFT/src/tests/hotindexer-with-queries.cc +++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-with-queries.cc @@ -114,10 +114,12 @@ static void query_only(DB *src) client_init(); // start thread doing query - r = toku_pthread_create(client_thread, 0, client, (void *)src); + r = toku_pthread_create( + toku_uninstrumented, client_thread, nullptr, + client, static_cast<void *>(src)); CKERR(r); - r = toku_pthread_join(*client_thread, &t0); + r = toku_pthread_join(*client_thread, &t0); CKERR(r); client_cleanup(); @@ -150,10 +152,13 @@ static void test_indexer(DB *src, DB **dbs) CKERR(r); // start thread doing query - r = toku_pthread_create(client_thread, 0, client, (void *)src); CKERR(r); + r = toku_pthread_create( + toku_uninstrumented, client_thread, nullptr, + client, static_cast<void *>(src)); + CKERR(r); struct timeval start, now; - if ( verbose ) { + if (verbose) { printf("test_indexer build\n"); gettimeofday(&start,0); } diff --git a/storage/tokudb/PerconaFT/src/tests/locktree_escalation_stalls.cc b/storage/tokudb/PerconaFT/src/tests/locktree_escalation_stalls.cc index b4874472bcb..e6c1b18b2b6 100644 --- a/storage/tokudb/PerconaFT/src/tests/locktree_escalation_stalls.cc +++ b/storage/tokudb/PerconaFT/src/tests/locktree_escalation_stalls.cc @@ -191,17 +191,22 @@ static void run_test(uint64_t max_i, int n_small) { env, big_db, max_i, big_test, }; toku_pthread_t big_id; - r = toku_pthread_create(&big_id, NULL, test_f, &big_test_args); + r = toku_pthread_create( + toku_uninstrumented, &big_id, nullptr, test_f, &big_test_args); assert(r == 0); struct test_args small_test_args[n_small]; toku_pthread_t small_id[n_small]; for (int i = 0; i < n_small; i++) { - small_test_args[i] = { env, small_db, max_i, small_test }; - r = toku_pthread_create(&small_id[i], NULL, test_f, &small_test_args[i]); + small_test_args[i] = {env, small_db, max_i, small_test}; + r = toku_pthread_create(toku_uninstrumented, + &small_id[i], + nullptr, + test_f, + &small_test_args[i]); assert(r == 0); } - + void *big_ret; r = toku_pthread_join(big_id, &big_ret); assert(r == 0); diff --git a/storage/tokudb/PerconaFT/src/tests/recover-test_crash_in_flusher_thread.h b/storage/tokudb/PerconaFT/src/tests/recover-test_crash_in_flusher_thread.h index 0ed68207ed4..5c10d0cb712 100644 --- a/storage/tokudb/PerconaFT/src/tests/recover-test_crash_in_flusher_thread.h +++ b/storage/tokudb/PerconaFT/src/tests/recover-test_crash_in_flusher_thread.h @@ -70,13 +70,18 @@ static void *do_checkpoint_and_crash(void *arg) { static void flt_callback(int flt_state, void* extra) { cnt++; if (verbose) printf("flt_state!! %d\n", flt_state); - if (cnt > 0 && !starting_a_chkpt && flt_state == state_to_crash) { - starting_a_chkpt = true; - if (verbose) printf("flt_state %d\n", flt_state); - int r = toku_pthread_create(&checkpoint_tid, NULL, do_checkpoint_and_crash, extra); - assert(r==0); - usleep(2*1000*1000); - } + if (cnt > 0 && !starting_a_chkpt && flt_state == state_to_crash) { + starting_a_chkpt = true; + if (verbose) + printf("flt_state %d\n", flt_state); + int r = toku_pthread_create(toku_uninstrumented, + &checkpoint_tid, + nullptr, + do_checkpoint_and_crash, + extra); + assert(r == 0); + usleep(2 * 1000 * 1000); + } } diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_stress.cc b/storage/tokudb/PerconaFT/src/tests/recovery_stress.cc index b1f5f98a90a..1646030cc85 100644 --- a/storage/tokudb/PerconaFT/src/tests/recovery_stress.cc +++ b/storage/tokudb/PerconaFT/src/tests/recovery_stress.cc @@ -515,19 +515,8 @@ static void run_test (int iter) { // if requesting crash, randomly do other non-committed acts, then "drop_dead" if (iter > 0) { - if (verbose) printf("dying\n"); -#if 0 - // separate thread will perform random acts on other dictionaries (not 0) - r = toku_pthread_create(&thread, 0, random_acts, (void *) dictionaries); - CKERR(r); - // this thead will scribble over dictionary 0 before crash to verify that - // post-checkpoint inserts are not in the database - DB* db = dictionaries[0].db; - if (iter & 1) - scribble(db, iter); - else - thin_out(db, iter); -#endif + if (verbose) + printf("dying\n"); uint32_t delay = myrandom(); delay &= 0xFFF; // select lower 12 bits, shifted up 8 for random number ... delay = delay << 8; // ... uniformly distributed between 0 and 1M ... diff --git a/storage/tokudb/PerconaFT/src/tests/stress_openclose.h b/storage/tokudb/PerconaFT/src/tests/stress_openclose.h index 3f10bfe8aeb..3a55ca4486f 100644 --- a/storage/tokudb/PerconaFT/src/tests/stress_openclose.h +++ b/storage/tokudb/PerconaFT/src/tests/stress_openclose.h @@ -245,19 +245,16 @@ stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) { // which they can choose a random db to either touch or query XMALLOC_N(num_buckets, buckets); for (int i = 0; i < num_buckets; i++) { - struct db_bucket bucket = { - .env = env, - .db = dbp[i], - .is_open = true - }; + struct db_bucket bucket = {.env = env, .db = dbp[i], .is_open = true}; buckets[i] = bucket; - toku_mutex_init(&buckets[i].mutex, NULL); + toku_mutex_init(toku_uninstrumented, &buckets[i].mutex, nullptr); } // run all of the query and update workers. they may randomly open // and close the dbs in each db_bucket to be some random dictionary, // so when they're done we'll have to clean up the mess so this // stress test can exit gracefully expecting db[i] = the ith db - //verbose_printf("stressing %d tables using %d update threads, %d query threads\n", + // verbose_printf("stressing %d tables using %d update threads, %d query + // threads\n", // num_buckets, update_threads, query_threads); verbose_printf("stressing %d tables using %d update threads\n", num_buckets, update_threads); diff --git a/storage/tokudb/PerconaFT/src/tests/test3039.cc b/storage/tokudb/PerconaFT/src/tests/test3039.cc index 54a2b3bf730..6c38fa13592 100644 --- a/storage/tokudb/PerconaFT/src/tests/test3039.cc +++ b/storage/tokudb/PerconaFT/src/tests/test3039.cc @@ -184,30 +184,36 @@ void do_threads (unsigned long long N, int do_nonlocal) { toku_pthread_t ths[2]; struct reader_thread_state rstates[2] = {{.elapsed_time = 0.0, .n_did_read = 0, - .n_to_read = (long long signed) N, - .do_local = 1, - .finish = 0}, + .n_to_read = (long long signed)N, + .do_local = 1, + .finish = 0}, {.elapsed_time = 0.0, .n_did_read = 0, .n_to_read = -1, - .do_local = 0, - .finish = 0}}; + .do_local = 0, + .finish = 0}}; int n_to_create = do_nonlocal ? 2 : 1; - for (int i=0; i<n_to_create; i++) { - int r = toku_pthread_create(&ths[i], 0, reader_thread, (void*)&rstates[i]); - CKERR(r); + for (int i = 0; i < n_to_create; i++) { + int r = toku_pthread_create(toku_uninstrumented, + &ths[i], + nullptr, + reader_thread, + static_cast<void *>(&rstates[i])); + CKERR(r); } - for (int i=0; i<n_to_create; i++) { - void *retval; - int r = toku_pthread_join(ths[i], &retval); - CKERR(r); - assert(retval==0); - if (verbose) { - printf("%9s thread time = %8.2fs on %9lld reads (%.3f us/read)\n", - (i==0 ? "local" : "nonlocal"), - rstates[i].elapsed_time, rstates[i].n_did_read, rstates[i].elapsed_time/rstates[i].n_did_read * 1e6); - } - rstates[1].finish = 1; + for (int i = 0; i < n_to_create; i++) { + void *retval; + int r = toku_pthread_join(ths[i], &retval); + CKERR(r); + assert(retval == 0); + if (verbose) { + printf("%9s thread time = %8.2fs on %9lld reads (%.3f us/read)\n", + (i == 0 ? "local" : "nonlocal"), + rstates[i].elapsed_time, + rstates[i].n_did_read, + rstates[i].elapsed_time / rstates[i].n_did_read * 1e6); + } + rstates[1].finish = 1; } if (verbose && do_nonlocal) { printf("total %9lld reads (%.3f us/read)\n", diff --git a/storage/tokudb/PerconaFT/src/tests/test_1672532.cc b/storage/tokudb/PerconaFT/src/tests/test_1672532.cc new file mode 100644 index 00000000000..721e7677824 --- /dev/null +++ b/storage/tokudb/PerconaFT/src/tests/test_1672532.cc @@ -0,0 +1,210 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include "test.h" +// to verify the DB_LOCKING_READ works to lock the read rows for snapshot +// isolaton. +// we create a db, then init a read transaction with repeatable-read isolation +// and +// locking read flag, then we start another transaction to grab the write lock. +// DB_LOCKING_READ is defined here to just make the before and after tests work +// (before +// test did not have DB_LOCKING_READ flag). +#if !defined(DB_LOCKING_READ) +#define DB_LOCKING_READ 0 +#endif +static int prelock_range(DBC *cursor, int left, int right) { + DBT key_left; + dbt_init(&key_left, &left, sizeof left); + DBT key_right; + dbt_init(&key_right, &right, sizeof right); + int r = cursor->c_set_bounds(cursor, &key_left, &key_right, true, 0); + return r; +} + +static void test_read_write_range(DB_ENV *env, + DB *db, + uint32_t iso_flags, + int expect_r) { + int r; + + DB_TXN *txn_a = NULL; + r = env->txn_begin(env, NULL, &txn_a, iso_flags); + assert_zero(r); + DB_TXN *txn_b = NULL; + r = env->txn_begin(env, NULL, &txn_b, iso_flags); + assert_zero(r); + + DBC *cursor_a = NULL; + r = db->cursor(db, txn_a, &cursor_a, DB_LOCKING_READ); + assert_zero(r); + DBC *cursor_b = NULL; + r = db->cursor(db, txn_b, &cursor_b, DB_RMW); + assert_zero(r); + + r = prelock_range(cursor_a, htonl(10), htonl(100)); + assert_zero(r); + r = prelock_range(cursor_b, htonl(50), htonl(200)); + assert(r == expect_r); + + r = cursor_a->c_close(cursor_a); + assert_zero(r); + r = cursor_b->c_close(cursor_b); + assert_zero(r); + + r = txn_a->commit(txn_a, 0); + assert_zero(r); + r = txn_b->commit(txn_b, 0); + assert_zero(r); +} + +static void test_read_write_point(DB_ENV *env, + DB *db, + uint32_t iso_flags, + int expect_r) { + int r; + + DB_TXN *txn1 = NULL; + r = env->txn_begin(env, NULL, &txn1, iso_flags); + assert_zero(r); + + DB_TXN *txn2 = NULL; + r = env->txn_begin(env, NULL, &txn2, iso_flags); + assert_zero(r); + + DBC *c1 = NULL; + r = db->cursor(db, txn1, &c1, DB_LOCKING_READ); + assert_zero(r); + + DBC *c2 = NULL; + r = db->cursor(db, txn2, &c2, DB_RMW); + assert_zero(r); + + int k = htonl(42); + DBT key; + dbt_init(&key, &k, sizeof k); + DBT val; + memset(&val, 0, sizeof val); + r = c1->c_get(c1, &key, &val, DB_SET); + assert_zero(r); + + r = c2->c_get(c2, &key, &val, DB_SET); + assert(r == expect_r); + + r = c1->c_close(c1); + assert_zero(r); + r = c2->c_close(c2); + assert_zero(r); + + r = txn1->commit(txn1, 0); + assert_zero(r); + r = txn2->commit(txn2, 0); + assert_zero(r); +} + +int test_main(int argc, char *const argv[]) { + int r; + + const char *env_dir = TOKU_TEST_FILENAME; + const char *db_filename = "lockingreadtest"; + + parse_args(argc, argv); + + char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1]; + snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir); + r = system(rm_cmd); + assert_zero(r); + + r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); + assert_zero(r); + + DB_ENV *env = NULL; + r = db_env_create(&env, 0); + assert_zero(r); + int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | + DB_INIT_LOCK | DB_INIT_LOG; + r = env->open( + env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + assert_zero(r); + + // create the db + DB *db = NULL; + r = db_create(&db, env, 0); + assert_zero(r); + DB_TXN *create_txn = NULL; + r = env->txn_begin(env, NULL, &create_txn, 0); + assert_zero(r); + r = db->open(db, + create_txn, + db_filename, + NULL, + DB_BTREE, + DB_CREATE, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + assert_zero(r); + r = create_txn->commit(create_txn, 0); + assert_zero(r); + + // add a record + + DB_TXN *write_txn = NULL; + r = env->txn_begin(env, NULL, &write_txn, 0); + assert_zero(r); + + int k = htonl(42); + int v = 42; + DBT key; + dbt_init(&key, &k, sizeof k); + DBT val; + dbt_init(&val, &v, sizeof v); + r = db->put(db, write_txn, &key, &val, DB_NOOVERWRITE); + assert_zero(r); + r = write_txn->commit(write_txn, 0); + assert_zero(r); + + test_read_write_range(env, db, DB_TXN_SNAPSHOT, DB_LOCK_NOTGRANTED); + test_read_write_point(env, db, DB_TXN_SNAPSHOT, DB_LOCK_NOTGRANTED); + + r = db->close(db, 0); + assert_zero(r); + + r = env->close(env, 0); + assert_zero(r); + return 0; +} diff --git a/storage/tokudb/PerconaFT/src/tests/test_3645.cc b/storage/tokudb/PerconaFT/src/tests/test_3645.cc index bb5566b032d..e1fa37bef54 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_3645.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_3645.cc @@ -239,33 +239,61 @@ test_evictions (void) { // make the forward fast scanner myargs[0].fast = true; myargs[0].fwd = true; - { int chk_r = toku_pthread_create(&mytids[0], NULL, scan_db, &myargs[0]); CKERR(chk_r); } + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &mytids[0], nullptr, scan_db, &myargs[0]); + CKERR(chk_r); + } // make the forward slow scanner myargs[1].fast = false; myargs[1].fwd = true; - { int chk_r = toku_pthread_create(&mytids[1], NULL, scan_db, &myargs[1]); CKERR(chk_r); } + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &mytids[1], nullptr, scan_db, &myargs[1]); + CKERR(chk_r); + } // make the backward fast scanner myargs[2].fast = true; myargs[2].fwd = false; - { int chk_r = toku_pthread_create(&mytids[2], NULL, scan_db, &myargs[2]); CKERR(chk_r); } + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &mytids[2], nullptr, scan_db, &myargs[2]); + CKERR(chk_r); + } // make the backward slow scanner myargs[3].fast = false; myargs[3].fwd = false; - { int chk_r = toku_pthread_create(&mytids[3], NULL, scan_db, &myargs[3]); CKERR(chk_r); } + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &mytids[3], nullptr, scan_db, &myargs[3]); + CKERR(chk_r); + } // make the guy that updates the db - { int chk_r = toku_pthread_create(&mytids[4], NULL, update_db, &myargs[4]); CKERR(chk_r); } + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &mytids[4], nullptr, update_db, &myargs[4]); + CKERR(chk_r); + } // make the guy that does point queries - { int chk_r = toku_pthread_create(&mytids[5], NULL, ptquery_db, &myargs[5]); CKERR(chk_r); } + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &mytids[5], nullptr, ptquery_db, &myargs[5]); + CKERR(chk_r); + } // make the guy that sleeps - { int chk_r = toku_pthread_create(&mytids[6], NULL, test_time, NULL); CKERR(chk_r); } - - for (uint32_t i = 0; i < sizeof(myargs)/sizeof(myargs[0]); i++) { + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &mytids[6], nullptr, test_time, nullptr); + CKERR(chk_r); + } + + for (uint32_t i = 0; i < sizeof(myargs) / sizeof(myargs[0]); i++) { void *ret; r = toku_pthread_join(mytids[i], &ret); assert_zero(r); } diff --git a/storage/tokudb/PerconaFT/src/tests/test_4015.cc b/storage/tokudb/PerconaFT/src/tests/test_4015.cc index dc18242b489..1231e3b4bca 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_4015.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_4015.cc @@ -151,10 +151,18 @@ int test_main(int argc, char * const argv[]) { { int chk_r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(chk_r); } DBT desc; dbt_init(&desc, "foo", sizeof("foo")); - IN_TXN_COMMIT(env, NULL, txn, 0, - { int chk_r = db->change_descriptor(db, txn, &desc, DB_UPDATE_CMP_DESCRIPTOR); CKERR(chk_r); }); + IN_TXN_COMMIT(env, NULL, txn, 0, { + int chk_r = + db->change_descriptor(db, txn, &desc, DB_UPDATE_CMP_DESCRIPTOR); + CKERR(chk_r); + }); pthread_t thd; - { int chk_r = toku_pthread_create(&thd, NULL, startA, NULL); CKERR(chk_r); } + { + int chk_r = + toku_pthread_create( + toku_uninstrumented, &thd, nullptr, startA, nullptr); + CKERR(chk_r); + } startB(); diff --git a/storage/tokudb/PerconaFT/src/tests/test_abort1.cc b/storage/tokudb/PerconaFT/src/tests/test_abort1.cc index 55449610ffb..7b60364878b 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_abort1.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_abort1.cc @@ -87,10 +87,12 @@ test_db_open_aborts (void) { CKERR2(r, DB_NOTFOUND); } toku_struct_stat statbuf; - char filename[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(filename, 2, TOKU_TEST_FILENAME, "foo.db"), &statbuf); - assert(r!=0); - assert(errno==ENOENT); + char filename[TOKU_PATH_MAX + 1]; + r = toku_stat(toku_path_join(filename, 2, TOKU_TEST_FILENAME, "foo.db"), + &statbuf, + toku_uninstrumented); + assert(r != 0); + assert(errno == ENOENT); } r=env->close(env, 0); assert(r==0); @@ -153,10 +155,12 @@ test_db_put_aborts (void) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); - assert(r==0); + toku_struct_stat statbuf; + char fullfile[TOKU_PATH_MAX + 1]; + r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), + &statbuf, + toku_uninstrumented); + assert(r == 0); toku_free(filename); } // But the item should not be in it. diff --git a/storage/tokudb/PerconaFT/src/tests/test_abort4.cc b/storage/tokudb/PerconaFT/src/tests/test_abort4.cc index e67efb465b2..e797fc640d8 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_abort4.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_abort4.cc @@ -160,10 +160,12 @@ verify_and_tear_down(int close_first) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); - assert(r==0); + toku_struct_stat statbuf; + char fullfile[TOKU_PATH_MAX + 1]; + r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), + &statbuf, + toku_uninstrumented); + assert(r == 0); toku_free(filename); } CKERR(r); diff --git a/storage/tokudb/PerconaFT/src/tests/test_abort5.cc b/storage/tokudb/PerconaFT/src/tests/test_abort5.cc index 728afcce1b9..d5d056d5126 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_abort5.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_abort5.cc @@ -191,10 +191,12 @@ verify_and_tear_down(int close_first) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); - assert(r==0); + toku_struct_stat statbuf; + char fullfile[TOKU_PATH_MAX + 1]; + r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), + &statbuf, + toku_uninstrumented); + assert(r == 0); toku_free(filename); } if (close_first) { diff --git a/storage/tokudb/PerconaFT/src/tests/test_forkjoin.cc b/storage/tokudb/PerconaFT/src/tests/test_forkjoin.cc index 910cf46e513..766372a15c2 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_forkjoin.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_forkjoin.cc @@ -48,12 +48,13 @@ f (void *arg) { return arg; } -int -test_main(int argc, char *const argv[]) { +int test_main(int argc, char *const argv[]) { parse_args(argc, argv); toku_pthread_t t; - int r = toku_pthread_create(&t, 0, f, 0); assert(r == 0); + int r = toku_pthread_create(toku_uninstrumented, &t, nullptr, f, nullptr); + assert(r == 0); void *ret; - r = toku_pthread_join(t, &ret); assert(r == 0); + r = toku_pthread_join(t, &ret); + assert(r == 0); return 0; } diff --git a/storage/tokudb/PerconaFT/src/tests/test_groupcommit_count.cc b/storage/tokudb/PerconaFT/src/tests/test_groupcommit_count.cc index 414ddd2b70a..35104c2a569 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_groupcommit_count.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_groupcommit_count.cc @@ -87,12 +87,16 @@ test_groupcommit (int nthreads) { int i; toku_pthread_t threads[nthreads]; int whichthread[nthreads]; - for (i=0; i<nthreads; i++) { - whichthread[i]=i; - r=toku_pthread_create(&threads[i], 0, start_a_thread, &whichthread[i]); + for (i = 0; i < nthreads; i++) { + whichthread[i] = i; + r = toku_pthread_create(toku_uninstrumented, + &threads[i], + nullptr, + start_a_thread, + &whichthread[i]); } - for (i=0; i<nthreads; i++) { - toku_pthread_join(threads[i], 0); + for (i = 0; i < nthreads; i++) { + toku_pthread_join(threads[i], 0); } r=db->close(db, 0); assert(r==0); diff --git a/storage/tokudb/PerconaFT/src/tests/test_groupcommit_perf.cc b/storage/tokudb/PerconaFT/src/tests/test_groupcommit_perf.cc index 94eaf2a01a9..6f872d10a32 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_groupcommit_perf.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_groupcommit_perf.cc @@ -84,12 +84,16 @@ test_groupcommit (int nthreads) { int i; toku_pthread_t threads[nthreads]; int whichthread[nthreads]; - for (i=0; i<nthreads; i++) { - whichthread[i]=i; - r=toku_pthread_create(&threads[i], 0, start_a_thread, &whichthread[i]); + for (i = 0; i < nthreads; i++) { + whichthread[i] = i; + r = toku_pthread_create(toku_uninstrumented, + &threads[i], + nullptr, + start_a_thread, + &whichthread[i]); } - for (i=0; i<nthreads; i++) { - toku_pthread_join(threads[i], 0); + for (i = 0; i < nthreads; i++) { + toku_pthread_join(threads[i], 0); } #if 0 r=env->txn_begin(env, 0, &tid, 0); CKERR(r); diff --git a/storage/tokudb/PerconaFT/src/tests/test_iterate_pending_lock_requests.cc b/storage/tokudb/PerconaFT/src/tests/test_iterate_pending_lock_requests.cc index f2bf0a2c756..66471a74af2 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_iterate_pending_lock_requests.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_iterate_pending_lock_requests.cc @@ -112,12 +112,17 @@ int test_main(int UU(argc), char *const UU(argv[])) { acquire_lock(txn1, magic_key); acquire_lock_extra e1(txn2, magic_key); - r = toku_pthread_create(&thread1, NULL, acquire_lock_thread, &e1); CKERR(r); + r = toku_pthread_create( + toku_uninstrumented, &thread1, nullptr, acquire_lock_thread, &e1); + CKERR(r); acquire_lock_extra e2(txn3, magic_key); - r = toku_pthread_create(&thread2, NULL, acquire_lock_thread, &e2); CKERR(r); + r = toku_pthread_create( + toku_uninstrumented, &thread2, nullptr, acquire_lock_thread, &e2); + CKERR(r); sleep(1); - r = env->iterate_pending_lock_requests(env, iterate_callback, NULL); CKERR(r); + r = env->iterate_pending_lock_requests(env, iterate_callback, NULL); + CKERR(r); invariant(iterate_callback_called == 2); void *v; diff --git a/storage/tokudb/PerconaFT/src/tests/test_lock_timeout_callback.cc b/storage/tokudb/PerconaFT/src/tests/test_lock_timeout_callback.cc index 75ea49ec858..571bae69916 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_lock_timeout_callback.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_lock_timeout_callback.cc @@ -118,7 +118,8 @@ int test_main(int UU(argc), char *const UU(argv[])) { acquire_lock(txn2, magic_key + 1); toku_pthread_t thread; acquire_lock_extra e(txn1, magic_key + 1); - r = toku_pthread_create(&thread, NULL, acquire_lock_thread, &e); + r = toku_pthread_create( + toku_uninstrumented, &thread, nullptr, acquire_lock_thread, &e); usleep(100000); acquire_lock(txn2, magic_key); invariant(callback_calls == 2); diff --git a/storage/tokudb/PerconaFT/src/tests/test_log1.cc b/storage/tokudb/PerconaFT/src/tests/test_log1.cc index 65fa1d830ef..5473a8e3d5e 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_log1.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_log1.cc @@ -87,10 +87,12 @@ static void make_db (bool close_env) { r=tid->commit(tid, 0); assert(r==0); r=db->close(db, 0); assert(r==0); { - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); - assert(r==0); + toku_struct_stat statbuf; + char fullfile[TOKU_PATH_MAX + 1]; + r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), + &statbuf, + toku_uninstrumented); + assert(r == 0); toku_free(filename); } if (close_env) { diff --git a/storage/tokudb/PerconaFT/src/tests/test_log1_abort.cc b/storage/tokudb/PerconaFT/src/tests/test_log1_abort.cc index 92c3657a154..c66409fb823 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_log1_abort.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_log1_abort.cc @@ -80,11 +80,13 @@ test_main (int UU(argc), char UU(*const argv[])) { r=env->close(env, 0); assert(r==0); { - toku_struct_stat statbuf; - char filename[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(filename, 2, TOKU_TEST_FILENAME, "foo.db"), &statbuf); - assert(r==-1); - assert(errno==ENOENT); + toku_struct_stat statbuf; + char filename[TOKU_PATH_MAX + 1]; + r = toku_stat(toku_path_join(filename, 2, TOKU_TEST_FILENAME, "foo.db"), + &statbuf, + toku_uninstrumented); + assert(r == -1); + assert(errno == ENOENT); } return 0; } diff --git a/storage/tokudb/PerconaFT/src/tests/test_logmax.cc b/storage/tokudb/PerconaFT/src/tests/test_logmax.cc index e5de0a5d906..133eb36ddb5 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_logmax.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_logmax.cc @@ -50,14 +50,19 @@ check_logmax (int max) { struct dirent *ent; while ((ent=readdir(dir))) { if ((ent->d_type==DT_REG || ent->d_type==DT_UNKNOWN) && strncmp(ent->d_name, "log", 3)==0) { - // It is a "log*" file - char full_fname[TOKU_PATH_MAX+1]; - toku_struct_stat sbuf; - int r = toku_stat(toku_path_join(full_fname, 2, TOKU_TEST_FILENAME, ent->d_name), &sbuf); - assert(r==0); - if (verbose) - printf("%s is of size %" PRId64 "\n", ent->d_name, (int64_t)sbuf.st_size); - if (sbuf.st_size > max) any_too_big=1; + // It is a "log*" file + char full_fname[TOKU_PATH_MAX + 1]; + toku_struct_stat sbuf; + int r = toku_stat( + toku_path_join(full_fname, 2, TOKU_TEST_FILENAME, ent->d_name), + &sbuf, + toku_uninstrumented); + assert(r == 0); + if (verbose) + printf("%s is of size %" PRId64 "\n", + ent->d_name, + (int64_t)sbuf.st_size); + if (sbuf.st_size > max) any_too_big=1; } } assert(!any_too_big); diff --git a/storage/tokudb/PerconaFT/src/tests/test_multiple_checkpoints_block_commit.cc b/storage/tokudb/PerconaFT/src/tests/test_multiple_checkpoints_block_commit.cc index 66e9f4a5ec8..da53a7bfe11 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_multiple_checkpoints_block_commit.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_multiple_checkpoints_block_commit.cc @@ -101,10 +101,17 @@ static void run_test(void) { toku_pthread_t chkpt1_tid; toku_pthread_t chkpt2_tid; - - { int chk_r = toku_pthread_create(&chkpt1_tid, NULL, run_checkpoint, NULL); CKERR(chk_r); } - { int chk_r = toku_pthread_create(&chkpt2_tid, NULL, run_checkpoint, NULL); CKERR(chk_r); } - usleep(2*1024*1024); + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &chkpt1_tid, nullptr, run_checkpoint, nullptr); + CKERR(chk_r); + } + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &chkpt2_tid, nullptr, run_checkpoint, nullptr); + CKERR(chk_r); + } + usleep(2 * 1024 * 1024); struct timeval tstart; gettimeofday(&tstart, NULL); DB_TXN *txn = NULL; diff --git a/storage/tokudb/PerconaFT/src/tests/test_stress_hot_indexing.cc b/storage/tokudb/PerconaFT/src/tests/test_stress_hot_indexing.cc index e0bf0d2ec6f..6395f591660 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_stress_hot_indexing.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_stress_hot_indexing.cc @@ -312,12 +312,11 @@ stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) { run_workers(myargs, num_threads, cli_args->num_seconds, false, cli_args); } -int -test_main(int argc, char *const argv[]) { +int test_main(int argc, char *const argv[]) { gid_count = 0; memset(hi_gid, 0, sizeof(hi_gid)); - toku_mutex_init(&hi_lock, NULL); - toku_mutex_init(&fops_lock, NULL); + toku_mutex_init(toku_uninstrumented, &hi_lock, nullptr); + toku_mutex_init(toku_uninstrumented, &fops_lock, nullptr); hot_db = NULL; struct cli_args args = get_default_args(); // let's make default checkpointing period really slow diff --git a/storage/tokudb/PerconaFT/src/tests/test_thread_insert.cc b/storage/tokudb/PerconaFT/src/tests/test_thread_insert.cc index 0d5245e8f80..a266de638f1 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_thread_insert.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_thread_insert.cc @@ -150,10 +150,13 @@ test_main(int argc, char *const argv[]) { work[i].endno = n; } - if (verbose) printf("pid:%d\n", toku_os_getpid()); + if (verbose) + printf("pid:%d\n", toku_os_getpid()); - for (i=1; i<nthreads; i++) { - r = toku_pthread_create(&work[i].tid, 0, do_inserts, &work[i]); assert(r == 0); + for (i = 1; i < nthreads; i++) { + r = toku_pthread_create( + toku_uninstrumented, &work[i].tid, nullptr, do_inserts, &work[i]); + assert(r == 0); } work[0].do_exit = 0; diff --git a/storage/tokudb/PerconaFT/src/tests/test_txn_abort7.cc b/storage/tokudb/PerconaFT/src/tests/test_txn_abort7.cc index fb428f437b1..0a4463644b3 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_txn_abort7.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_txn_abort7.cc @@ -79,10 +79,12 @@ test_abort_create (void) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); - assert(r==0); + toku_struct_stat statbuf; + char fullfile[TOKU_PATH_MAX + 1]; + r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), + &statbuf, + toku_uninstrumented); + assert(r == 0); toku_free(filename); } @@ -100,10 +102,13 @@ test_abort_create (void) { CKERR2(r, DB_NOTFOUND); } toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); - assert(r!=0); - assert(errno==ENOENT); + char fullfile[TOKU_PATH_MAX + 1]; + r = toku_stat( + toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), + &statbuf, + toku_uninstrumented); + assert(r != 0); + assert(errno == ENOENT); } r = env->close(env, 0); assert(r == 0); diff --git a/storage/tokudb/PerconaFT/src/tests/threaded_stress_test_helpers.h b/storage/tokudb/PerconaFT/src/tests/threaded_stress_test_helpers.h index f2bacceed9d..e232f327d10 100644 --- a/storage/tokudb/PerconaFT/src/tests/threaded_stress_test_helpers.h +++ b/storage/tokudb/PerconaFT/src/tests/threaded_stress_test_helpers.h @@ -469,11 +469,11 @@ static int get_commit_flags(struct cli_args *args) { } struct worker_extra { - struct arg* thread_arg; + struct arg *thread_arg; toku_mutex_t *operation_lock_mutex; - struct rwlock *operation_lock; + struct st_rwlock *operation_lock; uint64_t *counters; - int64_t pad[4]; // pad to 64 bytes + int64_t pad[4]; // pad to 64 bytes }; static void lock_worker_op(struct worker_extra* we) { @@ -1772,11 +1772,12 @@ static int run_workers( ) { int r; - const struct perf_formatter *perf_formatter = &perf_formatters[cli_args->perf_output_format]; + const struct perf_formatter *perf_formatter = + &perf_formatters[cli_args->perf_output_format]; toku_mutex_t mutex = ZERO_MUTEX_INITIALIZER; - toku_mutex_init(&mutex, nullptr); - struct rwlock rwlock; - rwlock_init(&rwlock); + toku_mutex_init(toku_uninstrumented, &mutex, nullptr); + struct st_rwlock rwlock; + rwlock_init(toku_uninstrumented, &rwlock); toku_pthread_t tids[num_threads]; toku_pthread_t time_tid; if (cli_args->print_performance) { @@ -1798,15 +1799,26 @@ static int run_workers( worker_extra[i].thread_arg = &thread_args[i]; worker_extra[i].operation_lock = &rwlock; worker_extra[i].operation_lock_mutex = &mutex; - XCALLOC_N((int) NUM_OPERATION_TYPES, worker_extra[i].counters); + XCALLOC_N((int)NUM_OPERATION_TYPES, worker_extra[i].counters); TOKU_DRD_IGNORE_VAR(worker_extra[i].counters); - { int chk_r = toku_pthread_create(&tids[i], nullptr, worker, &worker_extra[i]); CKERR(chk_r); } + { + int chk_r = toku_pthread_create(toku_uninstrumented, + &tids[i], + nullptr, + worker, + &worker_extra[i]); + CKERR(chk_r); + } if (verbose) - printf("%lu created\n", (unsigned long) tids[i]); + printf("%lu created\n", (unsigned long)tids[i]); + } + { + int chk_r = toku_pthread_create( + toku_uninstrumented, &time_tid, nullptr, test_time, &tte); + CKERR(chk_r); } - { int chk_r = toku_pthread_create(&time_tid, nullptr, test_time, &tte); CKERR(chk_r); } if (verbose) - printf("%lu created\n", (unsigned long) time_tid); + printf("%lu created\n", (unsigned long)time_tid); void *ret; r = toku_pthread_join(time_tid, &ret); assert_zero(r); @@ -1817,17 +1829,22 @@ static int run_workers( // threads (i.e. there is some runaway thread). struct sleep_and_crash_extra sac_extra; ZERO_STRUCT(sac_extra); - toku_mutex_init(&sac_extra.mutex, nullptr); - toku_cond_init(&sac_extra.cond, nullptr); + toku_mutex_init(toku_uninstrumented, &sac_extra.mutex, nullptr); + toku_cond_init(toku_uninstrumented, &sac_extra.cond, nullptr); sac_extra.seconds = cli_args->join_timeout; sac_extra.is_setup = false; sac_extra.threads_have_joined = false; toku_mutex_lock(&sac_extra.mutex); toku_pthread_t sac_thread; - r = toku_pthread_create(&sac_thread, nullptr, sleep_and_crash, &sac_extra); + r = toku_pthread_create(toku_uninstrumented, + &sac_thread, + nullptr, + sleep_and_crash, + &sac_extra); assert_zero(r); - // Wait for sleep_and_crash thread to get set up, spinning is ok, this should be quick. + // Wait for sleep_and_crash thread to get set up, spinning is ok, this + // should be quick. while (!sac_extra.is_setup) { toku_mutex_unlock(&sac_extra.mutex); r = toku_pthread_yield(); diff --git a/storage/tokudb/PerconaFT/src/tests/txn_manager_handle_snapshot_atomicity.cc b/storage/tokudb/PerconaFT/src/tests/txn_manager_handle_snapshot_atomicity.cc index 30cc16d73a7..469e78f492f 100644 --- a/storage/tokudb/PerconaFT/src/tests/txn_manager_handle_snapshot_atomicity.cc +++ b/storage/tokudb/PerconaFT/src/tests/txn_manager_handle_snapshot_atomicity.cc @@ -52,8 +52,8 @@ struct test_sync { static void test_sync_init(struct test_sync *UU(sync)) { #if TOKU_DEBUG_TXN_SYNC sync->state = 0; - toku_mutex_init(&sync->lock, NULL); - toku_cond_init(&sync->cv, NULL); + toku_mutex_init(toku_uninstrumented, &sync->lock, nullptr); + toku_cond_init(toku_uninstrumented, &sync->cv, nullptr); #endif } diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc index 886832cec6a..9875e9227d4 100644 --- a/storage/tokudb/PerconaFT/src/ydb.cc +++ b/storage/tokudb/PerconaFT/src/ydb.cc @@ -228,7 +228,7 @@ env_fs_poller(void *arg) { int in_red; // set true to prevent certain operations (returning ENOSPC) // get the fs sizes for the home dir - uint64_t avail_size, total_size; + uint64_t avail_size = 0, total_size = 0; r = toku_get_filesystem_sizes(env->i->dir, &avail_size, NULL, &total_size); assert(r == 0); in_yellow = (avail_size < 2 * env_fs_redzone(env, total_size)); @@ -644,7 +644,7 @@ static int validate_env(DB_ENV *env, path = toku_construct_full_name( 2, env->i->dir, toku_product_name_strings.environmentdictionary); assert(path); - r = toku_stat(path, &buf); + r = toku_stat(path, &buf, toku_uninstrumented); if (r == 0) { expect_newenv = false; // persistent info exists } else { @@ -669,7 +669,7 @@ static int validate_env(DB_ENV *env, path = toku_construct_full_name( 2, env->i->dir, toku_product_name_strings.rollback_cachefile); assert(path); - r = toku_stat(path, &buf); + r = toku_stat(path, &buf, toku_uninstrumented); if (r == 0) { if (expect_newenv) // rollback cachefile exists, but persistent env // is missing @@ -711,7 +711,7 @@ static int validate_env(DB_ENV *env, path = toku_construct_full_name( 2, env->i->dir, toku_product_name_strings.fileopsdirectory); assert(path); - r = toku_stat(path, &buf); + r = toku_stat(path, &buf, toku_uninstrumented); if (r == 0) { if (expect_newenv) // fileops directory exists, but persistent env // is missing @@ -857,10 +857,11 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { // Verify that the home exists. toku_struct_stat buf; - r = toku_stat(home, &buf); + r = toku_stat(home, &buf, toku_uninstrumented); if (r != 0) { int e = get_error_errno(); - r = toku_ydb_do_error(env, e, "Error from toku_stat(\"%s\",...)\n", home); + r = toku_ydb_do_error( + env, e, "Error from toku_stat(\"%s\",...)\n", home); goto cleanup; } unused_flags &= ~DB_PRIVATE; @@ -876,13 +877,22 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { env->i->open_flags = flags; env->i->open_mode = mode; + // Instrumentation probe start + TOKU_PROBE_START(toku_instr_probe_1); + env_setup_real_data_dir(env); env_setup_real_log_dir(env); env_setup_real_tmp_dir(env); - r = toku_single_process_lock(env->i->dir, "environment", &env->i->envdir_lockfd); - if (r!=0) goto cleanup; - r = toku_single_process_lock(env->i->real_data_dir, "data", &env->i->datadir_lockfd); + // Instrumentation probe stop + toku_instr_probe_1->stop(); + + r = toku_single_process_lock( + env->i->dir, "environment", &env->i->envdir_lockfd); + if (r != 0) + goto cleanup; + r = toku_single_process_lock( + env->i->real_data_dir, "data", &env->i->datadir_lockfd); if (r!=0) goto cleanup; r = toku_single_process_lock(env->i->real_log_dir, "logs", &env->i->logdir_lockfd); if (r!=0) goto cleanup; @@ -2932,7 +2942,8 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) { result->i->open_dbs_by_dname->create(); XMALLOC(result->i->open_dbs_by_dict_id); result->i->open_dbs_by_dict_id->create(); - toku_pthread_rwlock_init(&result->i->open_dbs_rwlock, NULL); + toku_pthread_rwlock_init( + *result_i_open_dbs_rwlock_key, &result->i->open_dbs_rwlock, nullptr); *envp = result; r = 0; diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc index 29d21ad0452..6b4452325f4 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.cc +++ b/storage/tokudb/PerconaFT/src/ydb_db.cc @@ -84,53 +84,56 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) { *statp = ydb_db_layer_status; } -void create_iname_hint(const char *dname, char *hint) { +void create_iname_hint(DB_ENV *env, const char *dname, char *hint) { //Requires: size of hint array must be > strlen(dname) //Copy alphanumeric characters only. //Replace strings of non-alphanumeric characters with a single underscore. - bool underscored = false; - while (*dname) { - if (isalnum(*dname)) { - char c = *dname++; - *hint++ = c; - underscored = false; - } - else { - if (!underscored) - *hint++ = '_'; - dname++; - underscored = true; + if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) { + assert(dname); + if (*dname == '.') + ++dname; + if (*dname == '/') + ++dname; + bool underscored = false; + bool dbdir_is_parsed = false; + // Do not change the first '/' because this is + // delimiter which splits name into database dir + // and table dir. + while (*dname) { + if (isalnum(*dname) || (*dname == '/' && !dbdir_is_parsed)) { + char c = *dname++; + *hint++ = c; + if (c == '/') + dbdir_is_parsed = true; + underscored = false; + } else if (!dbdir_is_parsed) { + char c = *dname++; + *hint++ = c; + } else { + if (!underscored) + *hint++ = '_'; + dname++; + underscored = true; + } } - } - *hint = '\0'; -} - -void create_iname_hint_for_dbdir(const char *dname, char *hint) { - assert(dname); - if (*dname == '.') - ++dname; - if (*dname == '/') - ++dname; - bool underscored = false; - bool dbdir_is_parsed = false; - // Do not change the first '/' because this is - // delimiter which splits name into database dir - // and table dir. - while (*dname) { - if (isalnum(*dname) || (*dname == '/' && !dbdir_is_parsed)) { - char c = *dname++; - *hint++ = c; - if (c == '/') - dbdir_is_parsed = true; - underscored = false; - } else { - if (!underscored) - *hint++ = '_'; - dname++; - underscored = true; + *hint = '\0'; + } else { + bool underscored = false; + while (*dname) { + if (isalnum(*dname)) { + char c = *dname++; + *hint++ = c; + underscored = false; + } + else { + if (!underscored) + *hint++ = '_'; + dname++; + underscored = true; + } } + *hint = '\0'; } - *hint = '\0'; } // n < 0 means to ignore mark and ignore n @@ -188,10 +191,7 @@ std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open( } else if (is_open) id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1); - if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) - create_iname_hint_for_dbdir(dname, hint); - else - create_iname_hint(dname, hint); + create_iname_hint(env, dname, hint); result.reset(create_iname(env, id1, id2, hint, NULL, -1)); @@ -1222,15 +1222,18 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new for (i = 0; i < N; i++) { char * dname = dbs[i]->i->dname; toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1); + // now create new iname char hint[strlen(dname) + 1]; - if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) - create_iname_hint_for_dbdir(dname, hint); - else - create_iname_hint(dname, hint); - const char *new_iname = create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i); // allocates memory for iname_in_env + create_iname_hint(env, dname, hint); + + // allocates memory for iname_in_env + const char *new_iname = + create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i); new_inames_in_env[i] = new_iname; - toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1); // iname_in_env goes in directory + + // iname_in_env goes in directory + toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1); rval = toku_db_put(env->i->directory, txn, &dname_dbt, &iname_dbt, 0, true); if (rval) break; } diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h index 8be28857c14..ab8fcd2a401 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.h +++ b/storage/tokudb/PerconaFT/src/ydb_db.h @@ -122,8 +122,7 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) { return r; } -void create_iname_hint_for_dbdir(const char *dname, char *hint); -void create_iname_hint(const char *dname, char *hint); +void create_iname_hint(DB_ENV *env, const char *dname, char *hint); char *create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, diff --git a/storage/tokudb/PerconaFT/src/ydb_env_func.cc b/storage/tokudb/PerconaFT/src/ydb_env_func.cc index cd5388106ac..aa8f9063a7e 100644 --- a/storage/tokudb/PerconaFT/src/ydb_env_func.cc +++ b/storage/tokudb/PerconaFT/src/ydb_env_func.cc @@ -110,7 +110,7 @@ void db_env_set_func_pread (ssize_t (*fun)(int, void *, size_t, off_t)) { } void db_env_set_func_loader_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) { - ft_loader_set_os_fwrite(fwrite_fun); + toku_set_func_fwrite(fwrite_fun); } void db_env_set_func_malloc (void *(*f)(size_t)) { diff --git a/storage/tokudb/PerconaFT/src/ydb_txn.cc b/storage/tokudb/PerconaFT/src/ydb_txn.cc index 40b479055f2..dd5fb3b8f0c 100644 --- a/storage/tokudb/PerconaFT/src/ydb_txn.cc +++ b/storage/tokudb/PerconaFT/src/ydb_txn.cc @@ -540,10 +540,12 @@ int toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, uint32_t flags) { db_txn_struct_i(result)->iso = child_isolation; db_txn_struct_i(result)->lt_map.create_no_array(); - toku_mutex_init(&db_txn_struct_i(result)->txn_mutex, NULL); + toku_mutex_init(*db_txn_struct_i_txn_mutex_key, + &db_txn_struct_i(result)->txn_mutex, + nullptr); TXN_SNAPSHOT_TYPE snapshot_type; - switch(db_txn_struct_i(result)->iso){ + switch (db_txn_struct_i(result)->iso) { case(TOKU_ISO_SNAPSHOT): { snapshot_type = TXN_SNAPSHOT_ROOT; @@ -603,7 +605,9 @@ void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn) { toku_txn_set_container_db_txn(tokutxn, result); - toku_mutex_init(&db_txn_struct_i(result)->txn_mutex, NULL); + toku_mutex_init(*db_txn_struct_i_txn_mutex_key, + &db_txn_struct_i(result)->txn_mutex, + nullptr); } // Test-only function diff --git a/storage/tokudb/PerconaFT/tools/CMakeLists.txt b/storage/tokudb/PerconaFT/tools/CMakeLists.txt index e8aed51f813..dd54249ab40 100644 --- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt @@ -12,6 +12,7 @@ foreach(tool ${tools}) (CMAKE_CXX_FLAGS_DEBUG MATCHES " -DENABLED_DEBUG_SYNC")) target_link_libraries(${tool} sql) endif() + target_link_libraries(${tool} mysys) endif () add_space_separated_property(TARGET ${tool} COMPILE_FLAGS -fvisibility=hidden) diff --git a/storage/tokudb/PerconaFT/util/dbt.cc b/storage/tokudb/PerconaFT/util/dbt.cc index 5bc1cb7446f..b6d2a584a45 100644 --- a/storage/tokudb/PerconaFT/util/dbt.cc +++ b/storage/tokudb/PerconaFT/util/dbt.cc @@ -199,7 +199,8 @@ int toku_dbt_set(uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt) case (DB_DBT_MALLOC): d->data = NULL; d->ulen = 0; - //Fall through to DB_DBT_REALLOC + // fallthrough + // to DB_DBT_REALLOC case (DB_DBT_REALLOC): if (d->ulen < len) { d->ulen = len*2; diff --git a/storage/tokudb/PerconaFT/util/fmutex.h b/storage/tokudb/PerconaFT/util/fmutex.h index 9ff95978a16..fed1bc24a92 100644 --- a/storage/tokudb/PerconaFT/util/fmutex.h +++ b/storage/tokudb/PerconaFT/util/fmutex.h @@ -38,6 +38,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #pragma once +extern toku_instr_key *fmutex_cond_key; + // fair mutex struct fmutex { pthread_mutex_t mutex; @@ -98,8 +100,8 @@ void fmutex_lock(struct fmutex *fm) { } pthread_cond_t cond; - pthread_cond_init(&cond, NULL); - struct queue_item item = { .cond = &cond, .next = NULL }; + pthread_cond_init(*fmutex_cond_key, &cond, nullptr); + struct queue_item item = {.cond = &cond, .next = NULL}; enq_item(fm, &item); // Wait for our turn. diff --git a/storage/tokudb/PerconaFT/util/frwlock.cc b/storage/tokudb/PerconaFT/util/frwlock.cc index ce1e33e85d3..1f821fe5f54 100644 --- a/storage/tokudb/PerconaFT/util/frwlock.cc +++ b/storage/tokudb/PerconaFT/util/frwlock.cc @@ -41,271 +41,311 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include <util/context.h> #include <util/frwlock.h> +toku_instr_key *frwlock_m_wait_read_key; + namespace toku { -static __thread int thread_local_tid = -1; -static int get_local_tid() { - if (thread_local_tid == -1) { - thread_local_tid = toku_os_gettid(); - } - return thread_local_tid; -} - -void frwlock::init(toku_mutex_t *const mutex) { - m_mutex = mutex; - - m_num_readers = 0; - m_num_writers = 0; - m_num_want_write = 0; - m_num_want_read = 0; - m_num_signaled_readers = 0; - m_num_expensive_want_write = 0; - - toku_cond_init(&m_wait_read, nullptr); - m_queue_item_read.cond = &m_wait_read; - m_queue_item_read.next = nullptr; - m_wait_read_is_in_queue = false; - m_current_writer_expensive = false; - m_read_wait_expensive = false; - m_current_writer_tid = -1; - m_blocking_writer_context_id = CTX_INVALID; - - m_wait_head = nullptr; - m_wait_tail = nullptr; -} - -void frwlock::deinit(void) { - toku_cond_destroy(&m_wait_read); -} - -bool frwlock::queue_is_empty(void) const { - return m_wait_head == nullptr; -} - -void frwlock::enq_item(queue_item *const item) { - paranoid_invariant_null(item->next); - if (m_wait_tail != nullptr) { - m_wait_tail->next = item; - } else { - paranoid_invariant_null(m_wait_head); - m_wait_head = item; + static __thread int thread_local_tid = -1; + static int get_local_tid() { + if (thread_local_tid == -1) { + thread_local_tid = toku_os_gettid(); + } + return thread_local_tid; } - m_wait_tail = item; -} - -toku_cond_t *frwlock::deq_item(void) { - paranoid_invariant_notnull(m_wait_head); - paranoid_invariant_notnull(m_wait_tail); - queue_item *item = m_wait_head; - m_wait_head = m_wait_head->next; - if (m_wait_tail == item) { + + void frwlock::init(toku_mutex_t *const mutex +#if defined(TOKU_MYSQL_WITH_PFS) + , + const toku_instr_key &rwlock_instr_key +#endif + ) { + m_mutex = mutex; + + m_num_readers = 0; + m_num_writers = 0; + m_num_want_write = 0; + m_num_want_read = 0; + m_num_signaled_readers = 0; + m_num_expensive_want_write = 0; +#if defined(TOKU_MYSQL_WITH_PFS) + toku_pthread_rwlock_init(rwlock_instr_key, &m_rwlock, nullptr); +#endif + toku_cond_init(toku_uninstrumented, &m_wait_read, nullptr); + m_queue_item_read.cond = &m_wait_read; + m_queue_item_read.next = nullptr; + m_wait_read_is_in_queue = false; + m_current_writer_expensive = false; + m_read_wait_expensive = false; + m_current_writer_tid = -1; + m_blocking_writer_context_id = CTX_INVALID; + + m_wait_head = nullptr; m_wait_tail = nullptr; } - return item->cond; -} - -// Prerequisite: Holds m_mutex. -void frwlock::write_lock(bool expensive) { - toku_mutex_assert_locked(m_mutex); - if (this->try_write_lock(expensive)) { - return; + + void frwlock::deinit(void) { + toku_cond_destroy(&m_wait_read); +#if defined(TOKU_MYSQL_WITH_PFS) + toku_pthread_rwlock_destroy(&m_rwlock); +#endif } - toku_cond_t cond = TOKU_COND_INITIALIZER; - queue_item item = { .cond = &cond, .next = nullptr }; - this->enq_item(&item); + bool frwlock::queue_is_empty(void) const { return m_wait_head == nullptr; } - // Wait for our turn. - ++m_num_want_write; - if (expensive) { - ++m_num_expensive_want_write; - } - if (m_num_writers == 0 && m_num_want_write == 1) { - // We are the first to want a write lock. No new readers can get the lock. - // Set our thread id and context for proper instrumentation. - // see: toku_context_note_frwlock_contention() - m_current_writer_tid = get_local_tid(); - m_blocking_writer_context_id = toku_thread_get_context()->get_id(); - } - toku_cond_wait(&cond, m_mutex); - toku_cond_destroy(&cond); - - // Now it's our turn. - paranoid_invariant(m_num_want_write > 0); - paranoid_invariant_zero(m_num_readers); - paranoid_invariant_zero(m_num_writers); - paranoid_invariant_zero(m_num_signaled_readers); - - // Not waiting anymore; grab the lock. - --m_num_want_write; - if (expensive) { - --m_num_expensive_want_write; + void frwlock::enq_item(queue_item *const item) { + paranoid_invariant_null(item->next); + if (m_wait_tail != nullptr) { + m_wait_tail->next = item; + } else { + paranoid_invariant_null(m_wait_head); + m_wait_head = item; + } + m_wait_tail = item; } - m_num_writers = 1; - m_current_writer_expensive = expensive; - m_current_writer_tid = get_local_tid(); - m_blocking_writer_context_id = toku_thread_get_context()->get_id(); -} - -bool frwlock::try_write_lock(bool expensive) { - toku_mutex_assert_locked(m_mutex); - if (m_num_readers > 0 || m_num_writers > 0 || m_num_signaled_readers > 0 || m_num_want_write > 0) { - return false; + + toku_cond_t *frwlock::deq_item(void) { + paranoid_invariant_notnull(m_wait_head); + paranoid_invariant_notnull(m_wait_tail); + queue_item *item = m_wait_head; + m_wait_head = m_wait_head->next; + if (m_wait_tail == item) { + m_wait_tail = nullptr; + } + return item->cond; } - // No one holds the lock. Grant the write lock. - paranoid_invariant_zero(m_num_want_write); - paranoid_invariant_zero(m_num_want_read); - m_num_writers = 1; - m_current_writer_expensive = expensive; - m_current_writer_tid = get_local_tid(); - m_blocking_writer_context_id = toku_thread_get_context()->get_id(); - return true; -} - -void frwlock::read_lock(void) { - toku_mutex_assert_locked(m_mutex); - if (m_num_writers > 0 || m_num_want_write > 0) { - if (!m_wait_read_is_in_queue) { - // Throw the read cond_t onto the queue. - paranoid_invariant(m_num_signaled_readers == m_num_want_read); - m_queue_item_read.next = nullptr; - this->enq_item(&m_queue_item_read); - m_wait_read_is_in_queue = true; - paranoid_invariant(!m_read_wait_expensive); - m_read_wait_expensive = ( - m_current_writer_expensive || - (m_num_expensive_want_write > 0) - ); + + // Prerequisite: Holds m_mutex. + void frwlock::write_lock(bool expensive) { +#if defined(TOKU_MYSQL_WITH_PFS) + /* Instrumentation start */ + toku_rwlock_instrumentation rwlock_instr; + toku_instr_rwlock_wrlock_wait_start( + rwlock_instr, m_rwlock, __FILE__, __LINE__); +#endif + + toku_mutex_assert_locked(m_mutex); + if (this->try_write_lock(expensive)) { +#if defined(TOKU_MYSQL_WITH_PFS) + /* Instrumentation end */ + toku_instr_rwlock_wrlock_wait_end(rwlock_instr, 0); +#endif + return; } - // Note this contention event in engine status. - toku_context_note_frwlock_contention( - toku_thread_get_context()->get_id(), - m_blocking_writer_context_id - ); + toku_cond_t cond = TOKU_COND_INITIALIZER; + queue_item item = {.cond = &cond, .next = nullptr}; + this->enq_item(&item); // Wait for our turn. - ++m_num_want_read; - toku_cond_wait(&m_wait_read, m_mutex); + ++m_num_want_write; + if (expensive) { + ++m_num_expensive_want_write; + } + if (m_num_writers == 0 && m_num_want_write == 1) { + // We are the first to want a write lock. No new readers can get the + // lock. + // Set our thread id and context for proper instrumentation. + // see: toku_context_note_frwlock_contention() + m_current_writer_tid = get_local_tid(); + m_blocking_writer_context_id = toku_thread_get_context()->get_id(); + } + toku_cond_wait(&cond, m_mutex); + toku_cond_destroy(&cond); // Now it's our turn. + paranoid_invariant(m_num_want_write > 0); + paranoid_invariant_zero(m_num_readers); paranoid_invariant_zero(m_num_writers); - paranoid_invariant(m_num_want_read > 0); - paranoid_invariant(m_num_signaled_readers > 0); + paranoid_invariant_zero(m_num_signaled_readers); // Not waiting anymore; grab the lock. - --m_num_want_read; - --m_num_signaled_readers; + --m_num_want_write; + if (expensive) { + --m_num_expensive_want_write; + } + m_num_writers = 1; + m_current_writer_expensive = expensive; + m_current_writer_tid = get_local_tid(); + m_blocking_writer_context_id = toku_thread_get_context()->get_id(); + +#if defined(TOKU_MYSQL_WITH_PFS) + /* Instrumentation end */ + toku_instr_rwlock_wrlock_wait_end(rwlock_instr, 0); +#endif + } + + bool frwlock::try_write_lock(bool expensive) { + toku_mutex_assert_locked(m_mutex); + if (m_num_readers > 0 || m_num_writers > 0 || + m_num_signaled_readers > 0 || m_num_want_write > 0) { + return false; + } + // No one holds the lock. Grant the write lock. + paranoid_invariant_zero(m_num_want_write); + paranoid_invariant_zero(m_num_want_read); + m_num_writers = 1; + m_current_writer_expensive = expensive; + m_current_writer_tid = get_local_tid(); + m_blocking_writer_context_id = toku_thread_get_context()->get_id(); + return true; } - ++m_num_readers; -} -bool frwlock::try_read_lock(void) { - toku_mutex_assert_locked(m_mutex); - if (m_num_writers > 0 || m_num_want_write > 0) { - return false; + void frwlock::read_lock(void) { +#if defined(TOKU_MYSQL_WITH_PFS) + /* Instrumentation start */ + toku_rwlock_instrumentation rwlock_instr; + toku_instr_rwlock_rdlock_wait_start( + rwlock_instr, m_rwlock, __FILE__, __LINE__); +#endif + toku_mutex_assert_locked(m_mutex); + if (m_num_writers > 0 || m_num_want_write > 0) { + if (!m_wait_read_is_in_queue) { + // Throw the read cond_t onto the queue. + paranoid_invariant(m_num_signaled_readers == m_num_want_read); + m_queue_item_read.next = nullptr; + this->enq_item(&m_queue_item_read); + m_wait_read_is_in_queue = true; + paranoid_invariant(!m_read_wait_expensive); + m_read_wait_expensive = (m_current_writer_expensive || + (m_num_expensive_want_write > 0)); + } + + // Note this contention event in engine status. + toku_context_note_frwlock_contention( + toku_thread_get_context()->get_id(), + m_blocking_writer_context_id); + + // Wait for our turn. + ++m_num_want_read; + toku_cond_wait(&m_wait_read, m_mutex); + + // Now it's our turn. + paranoid_invariant_zero(m_num_writers); + paranoid_invariant(m_num_want_read > 0); + paranoid_invariant(m_num_signaled_readers > 0); + + // Not waiting anymore; grab the lock. + --m_num_want_read; + --m_num_signaled_readers; + } + ++m_num_readers; +#if defined(TOKU_MYSQL_WITH_PFS) + /* Instrumentation end */ + toku_instr_rwlock_rdlock_wait_end(rwlock_instr, 0); +#endif } - // No writer holds the lock. - // No writers are waiting. - // Grant the read lock. - ++m_num_readers; - return true; -} - -void frwlock::maybe_signal_next_writer(void) { - if (m_num_want_write > 0 && m_num_signaled_readers == 0 && m_num_readers == 0) { - toku_cond_t *cond = this->deq_item(); - paranoid_invariant(cond != &m_wait_read); - // Grant write lock to waiting writer. - paranoid_invariant(m_num_want_write > 0); - toku_cond_signal(cond); + + bool frwlock::try_read_lock(void) { + toku_mutex_assert_locked(m_mutex); + if (m_num_writers > 0 || m_num_want_write > 0) { + return false; + } + // No writer holds the lock. + // No writers are waiting. + // Grant the read lock. + ++m_num_readers; + return true; } -} - -void frwlock::read_unlock(void) { - toku_mutex_assert_locked(m_mutex); - paranoid_invariant(m_num_writers == 0); - paranoid_invariant(m_num_readers > 0); - --m_num_readers; - this->maybe_signal_next_writer(); -} - -bool frwlock::read_lock_is_expensive(void) { - toku_mutex_assert_locked(m_mutex); - if (m_wait_read_is_in_queue) { - return m_read_wait_expensive; + + void frwlock::maybe_signal_next_writer(void) { + if (m_num_want_write > 0 && m_num_signaled_readers == 0 && + m_num_readers == 0) { + toku_cond_t *cond = this->deq_item(); + paranoid_invariant(cond != &m_wait_read); + // Grant write lock to waiting writer. + paranoid_invariant(m_num_want_write > 0); + toku_cond_signal(cond); + } + } + + void frwlock::read_unlock(void) { +#ifdef TOKU_MYSQL_WITH_PFS + toku_instr_rwlock_unlock(m_rwlock); +#endif + toku_mutex_assert_locked(m_mutex); + paranoid_invariant(m_num_writers == 0); + paranoid_invariant(m_num_readers > 0); + --m_num_readers; + this->maybe_signal_next_writer(); } - else { - return m_current_writer_expensive || (m_num_expensive_want_write > 0); + + bool frwlock::read_lock_is_expensive(void) { + toku_mutex_assert_locked(m_mutex); + if (m_wait_read_is_in_queue) { + return m_read_wait_expensive; + } else { + return m_current_writer_expensive || + (m_num_expensive_want_write > 0); + } } -} + void frwlock::maybe_signal_or_broadcast_next(void) { + paranoid_invariant(m_num_signaled_readers == 0); -void frwlock::maybe_signal_or_broadcast_next(void) { - paranoid_invariant(m_num_signaled_readers == 0); + if (this->queue_is_empty()) { + paranoid_invariant(m_num_want_write == 0); + paranoid_invariant(m_num_want_read == 0); + return; + } + toku_cond_t *cond = this->deq_item(); + if (cond == &m_wait_read) { + // Grant read locks to all waiting readers + paranoid_invariant(m_wait_read_is_in_queue); + paranoid_invariant(m_num_want_read > 0); + m_num_signaled_readers = m_num_want_read; + m_wait_read_is_in_queue = false; + m_read_wait_expensive = false; + toku_cond_broadcast(cond); + } else { + // Grant write lock to waiting writer. + paranoid_invariant(m_num_want_write > 0); + toku_cond_signal(cond); + } + } - if (this->queue_is_empty()) { - paranoid_invariant(m_num_want_write == 0); - paranoid_invariant(m_num_want_read == 0); - return; + void frwlock::write_unlock(void) { +#if defined(TOKU_MYSQL_WITH_PFS) + toku_instr_rwlock_unlock(m_rwlock); +#endif + toku_mutex_assert_locked(m_mutex); + paranoid_invariant(m_num_writers == 1); + m_num_writers = 0; + m_current_writer_expensive = false; + m_current_writer_tid = -1; + m_blocking_writer_context_id = CTX_INVALID; + this->maybe_signal_or_broadcast_next(); } - toku_cond_t *cond = this->deq_item(); - if (cond == &m_wait_read) { - // Grant read locks to all waiting readers - paranoid_invariant(m_wait_read_is_in_queue); - paranoid_invariant(m_num_want_read > 0); - m_num_signaled_readers = m_num_want_read; - m_wait_read_is_in_queue = false; - m_read_wait_expensive = false; - toku_cond_broadcast(cond); + bool frwlock::write_lock_is_expensive(void) { + toku_mutex_assert_locked(m_mutex); + return (m_num_expensive_want_write > 0) || (m_current_writer_expensive); } - else { - // Grant write lock to waiting writer. - paranoid_invariant(m_num_want_write > 0); - toku_cond_signal(cond); + + uint32_t frwlock::users(void) const { + toku_mutex_assert_locked(m_mutex); + return m_num_readers + m_num_writers + m_num_want_read + + m_num_want_write; + } + uint32_t frwlock::blocked_users(void) const { + toku_mutex_assert_locked(m_mutex); + return m_num_want_read + m_num_want_write; + } + uint32_t frwlock::writers(void) const { + // this is sometimes called as "assert(lock->writers())" when we + // assume we have the write lock. if that's the assumption, we may + // not own the mutex, so we don't assert_locked here + return m_num_writers; + } + uint32_t frwlock::blocked_writers(void) const { + toku_mutex_assert_locked(m_mutex); + return m_num_want_write; + } + uint32_t frwlock::readers(void) const { + toku_mutex_assert_locked(m_mutex); + return m_num_readers; + } + uint32_t frwlock::blocked_readers(void) const { + toku_mutex_assert_locked(m_mutex); + return m_num_want_read; } -} - -void frwlock::write_unlock(void) { - toku_mutex_assert_locked(m_mutex); - paranoid_invariant(m_num_writers == 1); - m_num_writers = 0; - m_current_writer_expensive = false; - m_current_writer_tid = -1; - m_blocking_writer_context_id = CTX_INVALID; - this->maybe_signal_or_broadcast_next(); -} -bool frwlock::write_lock_is_expensive(void) { - toku_mutex_assert_locked(m_mutex); - return (m_num_expensive_want_write > 0) || (m_current_writer_expensive); -} - - -uint32_t frwlock::users(void) const { - toku_mutex_assert_locked(m_mutex); - return m_num_readers + m_num_writers + m_num_want_read + m_num_want_write; -} -uint32_t frwlock::blocked_users(void) const { - toku_mutex_assert_locked(m_mutex); - return m_num_want_read + m_num_want_write; -} -uint32_t frwlock::writers(void) const { - // this is sometimes called as "assert(lock->writers())" when we - // assume we have the write lock. if that's the assumption, we may - // not own the mutex, so we don't assert_locked here - return m_num_writers; -} -uint32_t frwlock::blocked_writers(void) const { - toku_mutex_assert_locked(m_mutex); - return m_num_want_write; -} -uint32_t frwlock::readers(void) const { - toku_mutex_assert_locked(m_mutex); - return m_num_readers; -} -uint32_t frwlock::blocked_readers(void) const { - toku_mutex_assert_locked(m_mutex); - return m_num_want_read; -} } // namespace toku diff --git a/storage/tokudb/PerconaFT/util/frwlock.h b/storage/tokudb/PerconaFT/util/frwlock.h index 52b98a8d727..b02d95e545c 100644 --- a/storage/tokudb/PerconaFT/util/frwlock.h +++ b/storage/tokudb/PerconaFT/util/frwlock.h @@ -48,76 +48,82 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. namespace toku { -class frwlock { -public: - - void init(toku_mutex_t *const mutex); - void deinit(void); - - void write_lock(bool expensive); - bool try_write_lock(bool expensive); - void write_unlock(void); - // returns true if acquiring a write lock will be expensive - bool write_lock_is_expensive(void); - - void read_lock(void); - bool try_read_lock(void); - void read_unlock(void); - // returns true if acquiring a read lock will be expensive - bool read_lock_is_expensive(void); - - uint32_t users(void) const; - uint32_t blocked_users(void) const; - uint32_t writers(void) const; - uint32_t blocked_writers(void) const; - uint32_t readers(void) const; - uint32_t blocked_readers(void) const; - -private: - struct queue_item { - toku_cond_t *cond; - struct queue_item *next; + class frwlock { + public: + void init(toku_mutex_t *const mutex +#if defined(TOKU_MYSQL_WITH_PFS) + , + const toku_instr_key &rwlock_instr_key +#endif + ); + void deinit(void); + + void write_lock(bool expensive); + bool try_write_lock(bool expensive); + void write_unlock(void); + // returns true if acquiring a write lock will be expensive + bool write_lock_is_expensive(void); + + void read_lock(void); + bool try_read_lock(void); + void read_unlock(void); + // returns true if acquiring a read lock will be expensive + bool read_lock_is_expensive(void); + + uint32_t users(void) const; + uint32_t blocked_users(void) const; + uint32_t writers(void) const; + uint32_t blocked_writers(void) const; + uint32_t readers(void) const; + uint32_t blocked_readers(void) const; + + private: + struct queue_item { + toku_cond_t *cond; + struct queue_item *next; + }; + + bool queue_is_empty(void) const; + void enq_item(queue_item *const item); + toku_cond_t *deq_item(void); + void maybe_signal_or_broadcast_next(void); + void maybe_signal_next_writer(void); + + toku_mutex_t *m_mutex; + + uint32_t m_num_readers; + uint32_t m_num_writers; + uint32_t m_num_want_write; + uint32_t m_num_want_read; + uint32_t m_num_signaled_readers; + // number of writers waiting that are expensive + // MUST be < m_num_want_write + uint32_t m_num_expensive_want_write; + // bool that states if the current writer is expensive + // if there is no current writer, then is false + bool m_current_writer_expensive; + // bool that states if waiting for a read + // is expensive + // if there are currently no waiting readers, then set to false + bool m_read_wait_expensive; + // thread-id of the current writer + int m_current_writer_tid; + // context id describing the context of the current writer blocking + // new readers (either because this writer holds the write lock or + // is the first to want the write lock). + context_id m_blocking_writer_context_id; + queue_item m_queue_item_read; + bool m_wait_read_is_in_queue; + + toku_cond_t m_wait_read; +#if defined(TOKU_MYSQL_WITH_PFS) + toku_pthread_rwlock_t m_rwlock; +#endif + queue_item *m_wait_head; + queue_item *m_wait_tail; }; - bool queue_is_empty(void) const; - void enq_item(queue_item *const item); - toku_cond_t *deq_item(void); - void maybe_signal_or_broadcast_next(void); - void maybe_signal_next_writer(void); - - toku_mutex_t *m_mutex; - - uint32_t m_num_readers; - uint32_t m_num_writers; - uint32_t m_num_want_write; - uint32_t m_num_want_read; - uint32_t m_num_signaled_readers; - // number of writers waiting that are expensive - // MUST be < m_num_want_write - uint32_t m_num_expensive_want_write; - // bool that states if the current writer is expensive - // if there is no current writer, then is false - bool m_current_writer_expensive; - // bool that states if waiting for a read - // is expensive - // if there are currently no waiting readers, then set to false - bool m_read_wait_expensive; - // thread-id of the current writer - int m_current_writer_tid; - // context id describing the context of the current writer blocking - // new readers (either because this writer holds the write lock or - // is the first to want the write lock). - context_id m_blocking_writer_context_id; - - toku_cond_t m_wait_read; - queue_item m_queue_item_read; - bool m_wait_read_is_in_queue; - - queue_item *m_wait_head; - queue_item *m_wait_tail; -}; - -ENSURE_POD(frwlock); + ENSURE_POD(frwlock); } // namespace toku diff --git a/storage/tokudb/PerconaFT/util/kibbutz.cc b/storage/tokudb/PerconaFT/util/kibbutz.cc index 7fc24bae727..409bf6bdd7b 100644 --- a/storage/tokudb/PerconaFT/util/kibbutz.cc +++ b/storage/tokudb/PerconaFT/util/kibbutz.cc @@ -72,14 +72,18 @@ struct kibbutz { uint64_t total_execution_time; }; -static void *work_on_kibbutz (void *); +static void *work_on_kibbutz(void *); -int toku_kibbutz_create (int n_workers, KIBBUTZ *kb_ret) { +toku_instr_key *kibbutz_mutex_key; +toku_instr_key *kibbutz_k_cond_key; +toku_instr_key *kibbutz_thread_key; + +int toku_kibbutz_create(int n_workers, KIBBUTZ *kb_ret) { int r = 0; *kb_ret = NULL; KIBBUTZ XCALLOC(k); - toku_mutex_init(&k->mutex, NULL); - toku_cond_init(&k->cond, NULL); + toku_mutex_init(*kibbutz_mutex_key, &k->mutex, nullptr); + toku_cond_init(*kibbutz_k_cond_key, &k->cond, nullptr); k->please_shutdown = false; k->head = NULL; k->tail = NULL; @@ -93,7 +97,11 @@ int toku_kibbutz_create (int n_workers, KIBBUTZ *kb_ret) { XMALLOC_N(n_workers, k->ids); for (int i = 0; i < n_workers; i++) { k->ids[i].k = k; - r = toku_pthread_create(&k->workers[i], NULL, work_on_kibbutz, &k->ids[i]); + r = toku_pthread_create(*kibbutz_thread_key, + &k->workers[i], + nullptr, + work_on_kibbutz, + &k->ids[i]); if (r != 0) { k->n_workers = i; toku_kibbutz_destroy(k); @@ -153,10 +161,14 @@ static void *work_on_kibbutz (void *kidv) { // if there's another item on k->head, then we'll just go grab it now, without waiting for a signal. } if (k->please_shutdown) { - // Don't follow this unless the work is all done, so that when we set please_shutdown, all the work finishes before any threads quit. - ksignal(k); // must wake up anyone else who is waiting, so they can shut down. + // Don't follow this unless the work is all done, so that when we + // set please_shutdown, all the work finishes before any threads + // quit. + ksignal(k); // must wake up anyone else who is waiting, so they can + // shut down. kunlock(k); - return NULL; + toku_instr_delete_current_thread(); + return nullptr; } // There is no work to do and it's not time to shutdown, so wait. kwait(k); diff --git a/storage/tokudb/PerconaFT/util/minicron.cc b/storage/tokudb/PerconaFT/util/minicron.cc index 737c6ef6a4f..c1412015be0 100644 --- a/storage/tokudb/PerconaFT/util/minicron.cc +++ b/storage/tokudb/PerconaFT/util/minicron.cc @@ -43,8 +43,11 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "portability/toku_assert.h" #include "util/minicron.h" -static void -toku_gettime (toku_timespec_t *a) { +toku_instr_key *minicron_p_mutex_key; +toku_instr_key *minicron_p_condvar_key; +toku_instr_key *minicron_thread_key; + +static void toku_gettime(toku_timespec_t *a) { struct timeval tv; gettimeofday(&tv, 0); a->tv_sec = tv.tv_sec; @@ -74,7 +77,8 @@ minicron_do (void *pv) while (1) { if (p->do_shutdown) { toku_mutex_unlock(&p->mutex); - return 0; + toku_instr_delete_current_thread(); + return toku_pthread_done(nullptr); } if (p->period_in_ms == 0) { // if we aren't supposed to do it then just do an untimed wait. @@ -104,7 +108,8 @@ minicron_do (void *pv) // Now we woke up, and we should figure out what to do if (p->do_shutdown) { toku_mutex_unlock(&p->mutex); - return 0; + toku_instr_delete_current_thread(); + return toku_pthread_done(nullptr); } if (p->period_in_ms > 1000) { toku_timespec_t now; @@ -137,17 +142,17 @@ toku_minicron_setup(struct minicron *p, uint32_t period_in_ms, int(*f)(void *), p->f = f; p->arg = arg; toku_gettime(&p->time_of_last_call_to_f); - //printf("now=%.6f", p->time_of_last_call_to_f.tv_sec + p->time_of_last_call_to_f.tv_nsec*1e-9); - p->period_in_ms = period_in_ms; + // printf("now=%.6f", p->time_of_last_call_to_f.tv_sec + + // p->time_of_last_call_to_f.tv_nsec*1e-9); + p->period_in_ms = period_in_ms; p->do_shutdown = false; - toku_mutex_init(&p->mutex, 0); - toku_cond_init (&p->condvar, 0); - return toku_pthread_create(&p->thread, 0, minicron_do, p); + toku_mutex_init(*minicron_p_mutex_key, &p->mutex, nullptr); + toku_cond_init(*minicron_p_condvar_key, &p->condvar, nullptr); + return toku_pthread_create( + *minicron_thread_key, &p->thread, nullptr, minicron_do, p); } - -void -toku_minicron_change_period(struct minicron *p, uint32_t new_period) -{ + +void toku_minicron_change_period(struct minicron *p, uint32_t new_period) { toku_mutex_lock(&p->mutex); p->period_in_ms = new_period; toku_cond_signal(&p->condvar); diff --git a/storage/tokudb/PerconaFT/util/nb_mutex.h b/storage/tokudb/PerconaFT/util/nb_mutex.h index 3490c1b365c..d777961ad78 100644 --- a/storage/tokudb/PerconaFT/util/nb_mutex.h +++ b/storage/tokudb/PerconaFT/util/nb_mutex.h @@ -49,35 +49,67 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // increase parallelism at the expense of single thread performance, we // are experimenting with a single higher level lock. +extern toku_instr_key *nb_mutex_key; + typedef struct nb_mutex *NB_MUTEX; struct nb_mutex { - struct rwlock lock; + struct st_rwlock lock; +#if defined(TOKU_MYSQL_WITH_PFS) + toku_mutex_t toku_mutex; +#endif }; +#if defined(TOKU_MYSQL_WITH_PFS) +#define nb_mutex_init(MK, RK, M) \ + inline_nb_mutex_init(MK, RK, M) +#else +#define nb_mutex_init(MK, RK, M) inline_nb_mutex_init(M) +#endif + // initialize an nb mutex -static __attribute__((__unused__)) -void -nb_mutex_init(NB_MUTEX nb_mutex) { - rwlock_init(&nb_mutex->lock); +inline void inline_nb_mutex_init( +#if defined(TOKU_MYSQL_WITH_PFS) + const toku_instr_key &mutex_instr_key, + const toku_instr_key &rwlock_instr_key, +#endif + NB_MUTEX nb_mutex) { +#if defined(TOKU_MYSQL_WITH_PFS) + toku_mutex_init(mutex_instr_key, &nb_mutex->toku_mutex, nullptr); +#endif + rwlock_init(rwlock_instr_key, &nb_mutex->lock); } // destroy a read write lock -static __attribute__((__unused__)) -void -nb_mutex_destroy(NB_MUTEX nb_mutex) { +inline void nb_mutex_destroy(NB_MUTEX nb_mutex) { +#if defined(TOKU_MYSQL_WITH_PFS) + toku_instr_mutex_destroy(nb_mutex->toku_mutex.psi_mutex); +#endif rwlock_destroy(&nb_mutex->lock); } // obtain a write lock // expects: mutex is locked -static inline void nb_mutex_lock(NB_MUTEX nb_mutex, toku_mutex_t *mutex) { +inline void nb_mutex_lock(NB_MUTEX nb_mutex, toku_mutex_t *mutex) { +#ifdef TOKU_MYSQL_WITH_PFS + toku_mutex_instrumentation mutex_instr; + toku_instr_mutex_lock_start(mutex_instr, + *mutex, + __FILE__, + __LINE__); // TODO: pull these to caller? +#endif rwlock_write_lock(&nb_mutex->lock, mutex); +#if defined(TOKU_MYSQL_WITH_PFS) + toku_instr_mutex_lock_end(mutex_instr, 0); +#endif } // release a write lock // expects: mutex is locked -static inline void nb_mutex_unlock(NB_MUTEX nb_mutex) { +inline void nb_mutex_unlock(NB_MUTEX nb_mutex) { +#if defined(TOKU_MYSQL_WITH_PFS) + toku_instr_mutex_unlock(nb_mutex->toku_mutex.psi_mutex); +#endif rwlock_write_unlock(&nb_mutex->lock); } diff --git a/storage/tokudb/PerconaFT/util/queue.cc b/storage/tokudb/PerconaFT/util/queue.cc index 0716dfb2f59..39dfbbc699a 100644 --- a/storage/tokudb/PerconaFT/util/queue.cc +++ b/storage/tokudb/PerconaFT/util/queue.cc @@ -44,6 +44,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "memory.h" #include <toku_pthread.h> +toku_instr_key *queue_result_mutex_key; +toku_instr_key *queue_result_cond_key; + struct qitem; struct qitem { @@ -81,11 +84,11 @@ int toku_queue_create (QUEUE *q, uint64_t weight_limit) if (result==NULL) return get_error_errno(); result->contents_weight = 0; result->weight_limit = weight_limit; - result->head = NULL; - result->tail = NULL; - result->eof = false; - toku_mutex_init(&result->mutex, NULL); - toku_cond_init(&result->cond, NULL); + result->head = NULL; + result->tail = NULL; + result->eof = false; + toku_mutex_init(*queue_result_mutex_key, &result->mutex, nullptr); + toku_cond_init(*queue_result_cond_key, &result->cond, nullptr); *q = result; return 0; } diff --git a/storage/tokudb/PerconaFT/util/rwlock.h b/storage/tokudb/PerconaFT/util/rwlock.h index 47f78df70d1..d9a13ba9014 100644 --- a/storage/tokudb/PerconaFT/util/rwlock.h +++ b/storage/tokudb/PerconaFT/util/rwlock.h @@ -39,6 +39,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #pragma once #include <toku_assert.h> +#include <toku_portability.h> +#include <toku_instrumentation.h> /* Readers/writers locks implementation * @@ -147,53 +149,81 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // increase parallelism at the expense of single thread performance, we // are experimenting with a single higher level lock. -typedef struct rwlock *RWLOCK; -struct rwlock { - int reader; // the number of readers - int want_read; // the number of blocked readers +extern toku_instr_key *rwlock_cond_key; +extern toku_instr_key *rwlock_wait_read_key; +extern toku_instr_key *rwlock_wait_write_key; + +typedef struct st_rwlock *RWLOCK; +struct st_rwlock { + int reader; // the number of readers + int want_read; // the number of blocked readers toku_cond_t wait_read; int writer; // the number of writers int want_write; // the number of blocked writers toku_cond_t wait_write; - toku_cond_t* wait_users_go_to_zero; + toku_cond_t *wait_users_go_to_zero; +#if defined(TOKU_MYSQL_WITH_PFS) + toku_pthread_rwlock_t prwlock; +#endif }; // returns: the sum of the number of readers, pending readers, writers, and // pending writers static inline int rwlock_users(RWLOCK rwlock) { - return rwlock->reader + rwlock->want_read + rwlock->writer + rwlock->want_write; + return rwlock->reader + rwlock->want_read + rwlock->writer + + rwlock->want_write; } -// initialize a read write lock +#if defined(TOKU_MYSQL_WITH_PFS) +#define rwlock_init(K, R) inline_rwlock_init(K, R) +#else +#define rwlock_init(K, R) inline_rwlock_init(R) +#endif -static __attribute__((__unused__)) -void -rwlock_init(RWLOCK rwlock) { +// initialize a read write lock +static inline __attribute__((__unused__)) void inline_rwlock_init( +#if defined(TOKU_MYSQL_WITH_PFS) + const toku_instr_key &rwlock_instr_key, +#endif + RWLOCK rwlock) { +#if defined(TOKU_MYSQL_WITH_PFS) + toku_pthread_rwlock_init(rwlock_instr_key, &rwlock->prwlock, nullptr); +#endif rwlock->reader = rwlock->want_read = 0; - toku_cond_init(&rwlock->wait_read, 0); rwlock->writer = rwlock->want_write = 0; - toku_cond_init(&rwlock->wait_write, 0); + toku_cond_init(toku_uninstrumented, &rwlock->wait_read, nullptr); + toku_cond_init(toku_uninstrumented, &rwlock->wait_write, nullptr); rwlock->wait_users_go_to_zero = NULL; } // destroy a read write lock -static __attribute__((__unused__)) -void -rwlock_destroy(RWLOCK rwlock) { +static inline __attribute__((__unused__)) void rwlock_destroy(RWLOCK rwlock) { paranoid_invariant(rwlock->reader == 0); paranoid_invariant(rwlock->want_read == 0); paranoid_invariant(rwlock->writer == 0); paranoid_invariant(rwlock->want_write == 0); toku_cond_destroy(&rwlock->wait_read); toku_cond_destroy(&rwlock->wait_write); +#if defined(TOKU_MYSQL_WITH_PFS) + toku_pthread_rwlock_destroy(&rwlock->prwlock); +#endif } // obtain a read lock // expects: mutex is locked static inline void rwlock_read_lock(RWLOCK rwlock, toku_mutex_t *mutex) { +#ifdef TOKU_MYSQL_WITH_PFS + /* Instrumentation start */ + toku_rwlock_instrumentation rwlock_instr; + // TODO: pull location information up to caller + toku_instr_rwlock_rdlock_wait_start( + rwlock_instr, rwlock->prwlock, __FILE__, __LINE__); + +#endif + paranoid_invariant(!rwlock->wait_users_go_to_zero); if (rwlock->writer || rwlock->want_write) { rwlock->want_read++; @@ -203,12 +233,19 @@ static inline void rwlock_read_lock(RWLOCK rwlock, toku_mutex_t *mutex) { rwlock->want_read--; } rwlock->reader++; +#ifdef TOKU_MYSQL_WITH_PFS + /* Instrumentation end */ + toku_instr_rwlock_wrlock_wait_end(rwlock_instr, 0); +#endif } // release a read lock // expects: mutex is locked static inline void rwlock_read_unlock(RWLOCK rwlock) { +#ifdef TOKU_MYSQL_WITH_PFS + toku_instr_rwlock_unlock(rwlock->prwlock); +#endif paranoid_invariant(rwlock->reader > 0); paranoid_invariant(rwlock->writer == 0); rwlock->reader--; @@ -224,6 +261,12 @@ static inline void rwlock_read_unlock(RWLOCK rwlock) { // expects: mutex is locked static inline void rwlock_write_lock(RWLOCK rwlock, toku_mutex_t *mutex) { +#ifdef TOKU_MYSQL_WITH_PFS + /* Instrumentation start */ + toku_rwlock_instrumentation rwlock_instr; + toku_instr_rwlock_wrlock_wait_start( + rwlock_instr, rwlock->prwlock, __FILE__, __LINE__); +#endif paranoid_invariant(!rwlock->wait_users_go_to_zero); if (rwlock->reader || rwlock->writer) { rwlock->want_write++; @@ -233,12 +276,19 @@ static inline void rwlock_write_lock(RWLOCK rwlock, toku_mutex_t *mutex) { rwlock->want_write--; } rwlock->writer++; +#if defined(TOKU_MYSQL_WITH_PFS) + /* Instrumentation end */ + toku_instr_rwlock_wrlock_wait_end(rwlock_instr, 0); +#endif } // release a write lock // expects: mutex is locked static inline void rwlock_write_unlock(RWLOCK rwlock) { +#if defined(TOKU_MYSQL_WITH_PFS) + toku_instr_rwlock_unlock(rwlock->prwlock); +#endif paranoid_invariant(rwlock->reader == 0); paranoid_invariant(rwlock->writer == 1); rwlock->writer--; @@ -284,14 +334,10 @@ static inline int rwlock_read_will_block(RWLOCK rwlock) { return (rwlock->writer > 0 || rwlock->want_write > 0); } -static inline void rwlock_wait_for_users( - RWLOCK rwlock, - toku_mutex_t *mutex - ) -{ +static inline void rwlock_wait_for_users(RWLOCK rwlock, toku_mutex_t *mutex) { paranoid_invariant(!rwlock->wait_users_go_to_zero); toku_cond_t cond; - toku_cond_init(&cond, NULL); + toku_cond_init(toku_uninstrumented, &cond, nullptr); while (rwlock_users(rwlock) > 0) { rwlock->wait_users_go_to_zero = &cond; toku_cond_wait(&cond, mutex); diff --git a/storage/tokudb/PerconaFT/util/tests/marked-omt-test.cc b/storage/tokudb/PerconaFT/util/tests/marked-omt-test.cc index bce1b1a885b..7e60c711c66 100644 --- a/storage/tokudb/PerconaFT/util/tests/marked-omt-test.cc +++ b/storage/tokudb/PerconaFT/util/tests/marked-omt-test.cc @@ -156,7 +156,7 @@ int int_heaviside(const uint32_t &v, const uint32_t &target) { struct stress_shared { stress_omt *omt; volatile bool running; - struct rwlock lock; + struct st_rwlock lock; toku_mutex_t mutex; int num_marker_threads; }; @@ -400,8 +400,8 @@ static void stress_test(int nelts) { struct stress_shared extra; ZERO_STRUCT(extra); extra.omt = &omt; - toku_mutex_init(&extra.mutex, NULL); - rwlock_init(&extra.lock); + toku_mutex_init(toku_uninstrumented, &extra.mutex, nullptr); + rwlock_init(toku_uninstrumented, &extra.lock); extra.running = true; extra.num_marker_threads = num_marker_threads; @@ -422,11 +422,19 @@ static void stress_test(int nelts) { r = myinitstate_r(seed, reader.buf_write, 8, &reader.rand_write); invariant_zero(r); - toku_pthread_create(&marker_threads[i], NULL, stress_mark_worker, &reader); + toku_pthread_create(toku_uninstrumented, + &marker_threads[i], + nullptr, + stress_mark_worker, + &reader); } toku_pthread_t deleter_thread; - toku_pthread_create(&deleter_thread, NULL, stress_delete_worker, &readers[0]); + toku_pthread_create(toku_uninstrumented, + &deleter_thread, + nullptr, + stress_delete_worker, + &readers[0]); toku_pthread_join(deleter_thread, NULL); for (int i = 0; i < num_marker_threads; ++i) { diff --git a/storage/tokudb/PerconaFT/util/tests/minicron-test.cc b/storage/tokudb/PerconaFT/util/tests/minicron-test.cc index a2ddbdeabdf..026ab7446d3 100644 --- a/storage/tokudb/PerconaFT/util/tests/minicron-test.cc +++ b/storage/tokudb/PerconaFT/util/tests/minicron-test.cc @@ -206,11 +206,12 @@ test_main (int argc, const char *argv[]) { toku_pthread_t tests[N]; unsigned int i; - for (i=0; i<N; i++) { - int r=toku_pthread_create(tests+i, 0, testfuns[i], 0); - assert(r==0); + for (i = 0; i < N; i++) { + int r = toku_pthread_create( + toku_uninstrumented, tests + i, nullptr, testfuns[i], nullptr); + assert(r == 0); } - for (i=0; i<N; i++) { + for (i = 0; i < N; i++) { void *v; int r=toku_pthread_join(tests[i], &v); assert(r==0); diff --git a/storage/tokudb/PerconaFT/util/tests/queue-test.cc b/storage/tokudb/PerconaFT/util/tests/queue-test.cc index 433a0aca32a..f87e05bc664 100644 --- a/storage/tokudb/PerconaFT/util/tests/queue-test.cc +++ b/storage/tokudb/PerconaFT/util/tests/queue-test.cc @@ -87,9 +87,11 @@ static void queue_test_0 (uint64_t weight) d_max_weight = 0; QUEUE q; int r; - r = toku_queue_create(&q, weight); assert(r==0); + r = toku_queue_create(&q, weight); + assert(r == 0); toku_pthread_t thread; - r = toku_pthread_create(&thread, NULL, start_0, q); assert(r==0); + r = toku_pthread_create(toku_uninstrumented, &thread, nullptr, start_0, q); + assert(r == 0); enq(q, 0L, weight); enq(q, 1L, weight); enq(q, 2L, weight); diff --git a/storage/tokudb/PerconaFT/util/tests/rwlock_condvar.h b/storage/tokudb/PerconaFT/util/tests/rwlock_condvar.h index 816b98af81c..b49c2780ff8 100644 --- a/storage/tokudb/PerconaFT/util/tests/rwlock_condvar.h +++ b/storage/tokudb/PerconaFT/util/tests/rwlock_condvar.h @@ -67,13 +67,13 @@ struct toku_cv_fair_rwlock_waiter_state { static __thread struct toku_cv_fair_rwlock_waiter_state waitstate = {0, NULL, {PTHREAD_COND_INITIALIZER} }; void toku_cv_fair_rwlock_init (toku_cv_fair_rwlock_t *rwlock) { - rwlock->state=0; + rwlock->state = 0; rwlock->waiters_head = NULL; rwlock->waiters_tail = NULL; - toku_mutex_init(&rwlock->mutex, NULL); + toku_mutex_init(toku_uninstrumented, &rwlock->mutex, nullptr); } -void toku_cv_fair_rwlock_destroy (toku_cv_fair_rwlock_t *rwlock) { +void toku_cv_fair_rwlock_destroy(toku_cv_fair_rwlock_t *rwlock) { toku_mutex_destroy(&rwlock->mutex); } diff --git a/storage/tokudb/PerconaFT/util/tests/sm-basic.cc b/storage/tokudb/PerconaFT/util/tests/sm-basic.cc index 80bc965fd31..0e5eb8364e2 100644 --- a/storage/tokudb/PerconaFT/util/tests/sm-basic.cc +++ b/storage/tokudb/PerconaFT/util/tests/sm-basic.cc @@ -64,7 +64,8 @@ int main(void) { // run the test toku_pthread_t tid; int r; - r = toku_pthread_create(&tid, NULL, sm_test_f, NULL); + r = toku_pthread_create( + toku_uninstrumented, &tid, nullptr, sm_test_f, nullptr); assert_zero(r); void *ret; r = toku_pthread_join(tid, &ret); diff --git a/storage/tokudb/PerconaFT/util/tests/sm-crash-double-free.cc b/storage/tokudb/PerconaFT/util/tests/sm-crash-double-free.cc index c5d7e3e53a7..5aa3565510b 100644 --- a/storage/tokudb/PerconaFT/util/tests/sm-crash-double-free.cc +++ b/storage/tokudb/PerconaFT/util/tests/sm-crash-double-free.cc @@ -64,10 +64,12 @@ int main(void) { toku_scoped_malloc_init(); toku_pthread_t tid; int r; - r = toku_pthread_create(&tid, NULL, sm_test_f, NULL); + r = toku_pthread_create( + toku_uninstrumented, &tid, nullptr, sm_test_f, nullptr); assert_zero(r); void *ret; - while (state != 1) sleep(1); + while (state != 1) + sleep(1); toku_scoped_malloc_destroy_set(); state = 2; r = toku_pthread_join(tid, &ret); diff --git a/storage/tokudb/PerconaFT/util/tests/test-frwlock-fair-writers.cc b/storage/tokudb/PerconaFT/util/tests/test-frwlock-fair-writers.cc index 7fd4801fe11..9a625c32aeb 100644 --- a/storage/tokudb/PerconaFT/util/tests/test-frwlock-fair-writers.cc +++ b/storage/tokudb/PerconaFT/util/tests/test-frwlock-fair-writers.cc @@ -66,9 +66,9 @@ static void *t1_func(void *arg) { int main(void) { int r; - toku_mutex_init(&rwlock_mutex, NULL); + toku_mutex_init(toku_uninstrumented, &rwlock_mutex, nullptr); rwlock.init(&rwlock_mutex); - + const int nthreads = 2; pthread_t tids[nthreads]; for (int i = 0; i < nthreads; i++) { diff --git a/storage/tokudb/PerconaFT/util/tests/test-rwlock-cheapness.cc b/storage/tokudb/PerconaFT/util/tests/test-rwlock-cheapness.cc index 0075ddf9bcb..c0b43c2db1c 100644 --- a/storage/tokudb/PerconaFT/util/tests/test-rwlock-cheapness.cc +++ b/storage/tokudb/PerconaFT/util/tests/test-rwlock-cheapness.cc @@ -101,7 +101,8 @@ static void *do_read_wait(void *arg) { static void launch_cheap_waiter(void) { toku_pthread_t tid; - int r = toku_pthread_create(&tid, NULL, do_cheap_wait, NULL); + int r = toku_pthread_create( + toku_uninstrumented, &tid, nullptr, do_cheap_wait, nullptr); assert_zero(r); toku_pthread_detach(tid); sleep(1); @@ -109,7 +110,8 @@ static void launch_cheap_waiter(void) { static void launch_expensive_waiter(void) { toku_pthread_t tid; - int r = toku_pthread_create(&tid, NULL, do_expensive_wait, NULL); + int r = toku_pthread_create( + toku_uninstrumented, &tid, nullptr, do_expensive_wait, nullptr); assert_zero(r); toku_pthread_detach(tid); sleep(1); @@ -117,7 +119,8 @@ static void launch_expensive_waiter(void) { static void launch_reader(void) { toku_pthread_t tid; - int r = toku_pthread_create(&tid, NULL, do_read_wait, NULL); + int r = toku_pthread_create( + toku_uninstrumented, &tid, nullptr, do_read_wait, nullptr); assert_zero(r); toku_pthread_detach(tid); sleep(1); @@ -132,7 +135,7 @@ static bool locks_are_expensive(void) { } static void test_write_cheapness(void) { - toku_mutex_init(&mutex, NULL); + toku_mutex_init(toku_uninstrumented, &mutex, nullptr); w.init(&mutex); // single expensive write lock diff --git a/storage/tokudb/PerconaFT/util/tests/test-rwlock.cc b/storage/tokudb/PerconaFT/util/tests/test-rwlock.cc index 18ca1229f56..56dd3f6b480 100644 --- a/storage/tokudb/PerconaFT/util/tests/test-rwlock.cc +++ b/storage/tokudb/PerconaFT/util/tests/test-rwlock.cc @@ -245,14 +245,14 @@ static void util_rwlock_unlock (RWLOCK rwlock, toku_mutex_t *mutex) { } // Time the read lock that's in util/rwlock.h -void time_util_rwlock (void) __attribute((__noinline__)); -void time_util_rwlock (void) { - struct rwlock rwlock; +void time_util_rwlock(void) __attribute((__noinline__)); +void time_util_rwlock(void) { + struct st_rwlock rwlock; toku_mutex_t external_mutex; - toku_mutex_init(&external_mutex, NULL); - rwlock_init(&rwlock); - struct timeval start,end; - + toku_mutex_init(toku_uninstrumented, &external_mutex, nullptr); + rwlock_init(toku_uninstrumented, &rwlock); + struct timeval start, end; + util_rwlock_lock(&rwlock, &external_mutex); util_rwlock_unlock(&rwlock, &external_mutex); for (int t=0; t<T; t++) { @@ -271,16 +271,17 @@ void time_util_rwlock (void) { toku_mutex_destroy(&external_mutex); } -// Time the read lock that's in util/rwlock.h, assuming the mutex is already held. -void time_util_prelocked_rwlock (void) __attribute__((__noinline__)); -void time_util_prelocked_rwlock (void) { - struct rwlock rwlock; +// Time the read lock that's in util/rwlock.h, assuming the mutex is already +// held. +void time_util_prelocked_rwlock(void) __attribute__((__noinline__)); +void time_util_prelocked_rwlock(void) { + struct st_rwlock rwlock; toku_mutex_t external_mutex; - toku_mutex_init(&external_mutex, NULL); + toku_mutex_init(toku_uninstrumented, &external_mutex, nullptr); toku_mutex_lock(&external_mutex); - rwlock_init(&rwlock); - struct timeval start,end; - + rwlock_init(toku_uninstrumented, &rwlock); + struct timeval start, end; + rwlock_read_lock(&rwlock, &external_mutex); rwlock_read_unlock(&rwlock); for (int t=0; t<T; t++) { @@ -303,8 +304,8 @@ void time_util_prelocked_rwlock (void) { void time_frwlock_prelocked(void) __attribute__((__noinline__)); void time_frwlock_prelocked(void) { toku_mutex_t external_mutex; - toku_mutex_init(&external_mutex, NULL); - struct timeval start,end; + toku_mutex_init(toku_uninstrumented, &external_mutex, nullptr); + struct timeval start, end; toku::frwlock x; x.init(&external_mutex); toku_mutex_lock(&external_mutex); @@ -340,8 +341,8 @@ void time_frwlock_prelocked(void) { void time_frwlock(void) __attribute__((__noinline__)); void time_frwlock(void) { toku_mutex_t external_mutex; - toku_mutex_init(&external_mutex, NULL); - struct timeval start,end; + toku_mutex_init(toku_uninstrumented, &external_mutex, nullptr); + struct timeval start, end; toku::frwlock x; x.init(&external_mutex); toku_mutex_lock(&external_mutex); diff --git a/storage/tokudb/PerconaFT/util/tests/threadpool-test.cc b/storage/tokudb/PerconaFT/util/tests/threadpool-test.cc index 3379507270d..83c142edc2d 100644 --- a/storage/tokudb/PerconaFT/util/tests/threadpool-test.cc +++ b/storage/tokudb/PerconaFT/util/tests/threadpool-test.cc @@ -67,10 +67,11 @@ struct my_threadpool { static void my_threadpool_init (struct my_threadpool *my_threadpool, int max_threads) { int r; - r = toku_thread_pool_create(&my_threadpool->threadpool, max_threads); assert(r == 0); + r = toku_thread_pool_create(&my_threadpool->threadpool, max_threads); + assert(r == 0); assert(my_threadpool != 0); - toku_mutex_init(&my_threadpool->mutex, 0); - toku_cond_init(&my_threadpool->wait, 0); + toku_mutex_init(toku_uninstrumented, &my_threadpool->mutex, nullptr); + toku_cond_init(toku_uninstrumented, &my_threadpool->wait, nullptr); my_threadpool->closed = 0; my_threadpool->counter = 0; } diff --git a/storage/tokudb/PerconaFT/util/threadpool.cc b/storage/tokudb/PerconaFT/util/threadpool.cc index 146cc63242a..6e0ccf05f93 100644 --- a/storage/tokudb/PerconaFT/util/threadpool.cc +++ b/storage/tokudb/PerconaFT/util/threadpool.cc @@ -48,6 +48,11 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "threadpool.h" +toku_instr_key *tpool_lock_mutex_key; +toku_instr_key *tp_thread_wait_key; +toku_instr_key *tp_pool_wait_free_key; +toku_instr_key *tp_internal_thread_key; + struct toku_thread { struct toku_thread_pool *pool; toku_pthread_t tid; @@ -84,8 +89,12 @@ toku_thread_create(struct toku_thread_pool *pool, struct toku_thread **toku_thre } else { memset(thread, 0, sizeof *thread); thread->pool = pool; - toku_cond_init(&thread->wait, nullptr); - r = toku_pthread_create(&thread->tid, nullptr, toku_thread_run_internal, thread); + toku_cond_init(*tp_thread_wait_key, &thread->wait, nullptr); + r = toku_pthread_create(*tp_internal_thread_key, + &thread->tid, + nullptr, + toku_thread_run_internal, + thread); if (r) { toku_cond_destroy(&thread->wait); toku_free(thread); @@ -105,11 +114,11 @@ toku_thread_run(struct toku_thread *thread, void *(*f)(void *arg), void *arg) { toku_thread_pool_unlock(thread->pool); } -static void -toku_thread_destroy(struct toku_thread *thread) { +static void toku_thread_destroy(struct toku_thread *thread) { int r; void *ret; - r = toku_pthread_join(thread->tid, &ret); invariant(r == 0 && ret == thread); + r = toku_pthread_join(thread->tid, &ret); + invariant(r == 0 && ret == thread); struct toku_thread_pool *pool = thread->pool; toku_thread_pool_lock(pool); toku_list_remove(&thread->free_link); @@ -147,20 +156,20 @@ toku_thread_run_internal(void *arg) { thread->f = nullptr; toku_list_push(&pool->free_threads, &thread->free_link); } - return arg; -} + return toku_pthread_done(arg); +} -int -toku_thread_pool_create(struct toku_thread_pool **pool_return, int max_threads) { +int toku_thread_pool_create(struct toku_thread_pool **pool_return, + int max_threads) { int r; struct toku_thread_pool *CALLOC(pool); if (pool == nullptr) { r = get_error_errno(); } else { - toku_mutex_init(&pool->lock, nullptr); + toku_mutex_init(*tpool_lock_mutex_key, &pool->lock, nullptr); toku_list_init(&pool->free_threads); toku_list_init(&pool->all_threads); - toku_cond_init(&pool->wait_free, nullptr); + toku_cond_init(*tp_pool_wait_free_key, &pool->wait_free, nullptr); pool->cur_threads = 0; pool->max_threads = max_threads; *pool_return = pool; diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index 4bd00722623..ebf4b5a22d3 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -31,6 +31,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ha_tokudb.h" #include "sql_db.h" +pfs_key_t ha_tokudb_mutex_key; +pfs_key_t num_DBs_lock_key; #if TOKU_INCLUDE_EXTENDED_KEYS static inline uint get_ext_key_parts(const KEY *key) { @@ -187,13 +189,9 @@ const char* TOKUDB_SHARE::get_state_string(share_state_t state) { void* TOKUDB_SHARE::operator new(size_t sz) { return tokudb::memory::malloc(sz, MYF(MY_WME|MY_ZEROFILL|MY_FAE)); } -void TOKUDB_SHARE::operator delete(void* p) { - tokudb::memory::free(p); -} -TOKUDB_SHARE::TOKUDB_SHARE() : - _num_DBs_lock(), - _mutex() { -} +void TOKUDB_SHARE::operator delete(void* p) { tokudb::memory::free(p); } +TOKUDB_SHARE::TOKUDB_SHARE() + : _num_DBs_lock(num_DBs_lock_key), _mutex(ha_tokudb_mutex_key) {} void TOKUDB_SHARE::init(const char* table_name) { _use_count = 0; thr_lock_init(&_thr_lock); @@ -228,20 +226,15 @@ void TOKUDB_SHARE::destroy() { thr_lock_delete(&_thr_lock); TOKUDB_SHARE_DBUG_VOID_RETURN(); } -TOKUDB_SHARE* TOKUDB_SHARE::get_share( - const char* table_name, - TABLE_SHARE* table_share, - THR_LOCK_DATA* data, - bool create_new) { - - _open_tables_mutex.lock(); +TOKUDB_SHARE* TOKUDB_SHARE::get_share(const char* table_name, + TABLE_SHARE* table_share, + THR_LOCK_DATA* data, + bool create_new) { + mutex_t_lock(_open_tables_mutex); int error = 0; - uint length = (uint) strlen(table_name); - TOKUDB_SHARE* share = - (TOKUDB_SHARE*)my_hash_search( - &_open_tables, - (uchar*)table_name, - length); + uint length = (uint)strlen(table_name); + TOKUDB_SHARE* share = (TOKUDB_SHARE*)my_hash_search( + &_open_tables, (uchar*)table_name, length); TOKUDB_TRACE_FOR_FLAGS( TOKUDB_DEBUG_SHARE, @@ -276,28 +269,27 @@ TOKUDB_SHARE* TOKUDB_SHARE::get_share( thr_lock_data_init(&(share->_thr_lock), data, NULL); exit: - _open_tables_mutex.unlock(); + mutex_t_unlock(_open_tables_mutex); return share; } void TOKUDB_SHARE::drop_share(TOKUDB_SHARE* share) { - TOKUDB_TRACE_FOR_FLAGS( - TOKUDB_DEBUG_SHARE, - "share[%p]:file[%s]:state[%s]:use_count[%d]", - share, - share->_full_table_name.ptr(), - get_state_string(share->_state), - share->_use_count); - - _open_tables_mutex.lock(); + TOKUDB_TRACE_FOR_FLAGS(TOKUDB_DEBUG_SHARE, + "share[%p]:file[%s]:state[%s]:use_count[%d]", + share, + share->_full_table_name.ptr(), + get_state_string(share->_state), + share->_use_count); + + mutex_t_lock(_open_tables_mutex); my_hash_delete(&_open_tables, (uchar*)share); - _open_tables_mutex.unlock(); + mutex_t_unlock(_open_tables_mutex); } TOKUDB_SHARE::share_state_t TOKUDB_SHARE::addref() { TOKUDB_SHARE_TRACE_FOR_FLAGS((TOKUDB_DEBUG_ENTER & TOKUDB_DEBUG_SHARE), - "file[%s]:state[%s]:use_count[%d]", - _full_table_name.ptr(), - get_state_string(_state), - _use_count); + "file[%s]:state[%s]:use_count[%d]", + _full_table_name.ptr(), + get_state_string(_state), + _use_count); lock(); _use_count++; @@ -312,7 +304,7 @@ int TOKUDB_SHARE::release() { int error, result = 0; - _mutex.lock(); + mutex_t_lock(_mutex); assert_always(_use_count != 0); _use_count--; if (_use_count == 0 && _state == TOKUDB_SHARE::OPENED) { @@ -356,7 +348,7 @@ int TOKUDB_SHARE::release() { _state = TOKUDB_SHARE::CLOSED; } - _mutex.unlock(); + mutex_t_unlock(_mutex); TOKUDB_SHARE_DBUG_RETURN(result); } @@ -1195,8 +1187,10 @@ static int generate_row_for_put( ha_tokudb::ha_tokudb(handlerton * hton, TABLE_SHARE * table_arg):handler(hton, table_arg) { TOKUDB_HANDLER_DBUG_ENTER(""); share = NULL; - int_table_flags = HA_REC_NOT_IN_SEQ | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_PRIMARY_KEY_IN_READ_INDEX | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | - HA_FILE_BASED | HA_AUTO_PART_KEY | HA_TABLE_SCAN_ON_INDEX | HA_CAN_WRITE_DURING_OPTIMIZE; + int_table_flags = HA_REC_NOT_IN_SEQ | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS + | HA_PRIMARY_KEY_IN_READ_INDEX | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION + | HA_FILE_BASED | HA_AUTO_PART_KEY | HA_TABLE_SCAN_ON_INDEX + | HA_CAN_WRITE_DURING_OPTIMIZE | HA_ONLINE_ANALYZE; alloc_ptr = NULL; rec_buff = NULL; rec_update_buff = NULL; @@ -3313,12 +3307,12 @@ void ha_tokudb::start_bulk_insert(ha_rows rows) { delay_updating_ai_metadata = true; ai_metadata_update_required = false; abort_loader = false; - - share->_num_DBs_lock.lock_read(); + + rwlock_t_lock_read(share->_num_DBs_lock); uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); num_DBs_locked_in_bulk = true; lock_count = 0; - + if ((rows == 0 || rows > 1) && share->try_table_lock) { if (tokudb::sysvars::prelock_empty(thd) && may_table_be_empty(transaction) && @@ -4029,14 +4023,13 @@ int ha_tokudb::write_row(uchar * record) { // grab reader lock on numDBs_lock // if (!num_DBs_locked_in_bulk) { - share->_num_DBs_lock.lock_read(); + rwlock_t_lock_read(share->_num_DBs_lock); num_DBs_locked = true; - } - else { + } else { lock_count++; if (lock_count >= 2000) { share->_num_DBs_lock.unlock(); - share->_num_DBs_lock.lock_read(); + rwlock_t_lock_read(share->_num_DBs_lock); lock_count = 0; } } @@ -4206,7 +4199,7 @@ int ha_tokudb::update_row(const uchar * old_row, const uchar * new_row) { // bool num_DBs_locked = false; if (!num_DBs_locked_in_bulk) { - share->_num_DBs_lock.lock_read(); + rwlock_t_lock_read(share->_num_DBs_lock); num_DBs_locked = true; } curr_num_DBs = share->num_DBs; @@ -4346,7 +4339,7 @@ int ha_tokudb::delete_row(const uchar * record) { // bool num_DBs_locked = false; if (!num_DBs_locked_in_bulk) { - share->_num_DBs_lock.lock_read(); + rwlock_t_lock_read(share->_num_DBs_lock); num_DBs_locked = true; } curr_num_DBs = share->num_DBs; @@ -4637,6 +4630,9 @@ int ha_tokudb::index_init(uint keynr, bool sorted) { if (tokudb::sysvars::disable_prefetching(thd)) { cursor_flags |= DBC_DISABLE_PREFETCHING; } + if (lock.type == TL_READ_WITH_SHARED_LOCKS) { + cursor_flags |= DB_LOCKING_READ; + } if ((error = share->key_file[keynr]->cursor(share->key_file[keynr], transaction, &cursor, cursor_flags))) { @@ -6198,6 +6194,8 @@ int ha_tokudb::info(uint flag) { } if ((flag & HA_STATUS_CONST)) { stats.max_data_file_length = 9223372036854775807ULL; + } + if (flag & (HA_STATUS_VARIABLE | HA_STATUS_CONST)) { share->set_cardinality_counts_in_table(table); } @@ -6281,7 +6279,7 @@ int ha_tokudb::acquire_table_lock (DB_TXN* trans, TABLE_LOCK_TYPE lt) { TOKUDB_HANDLER_DBUG_ENTER("%p %s", trans, lt == lock_read ? "r" : "w"); int error = ENOSYS; if (!num_DBs_locked_in_bulk) { - share->_num_DBs_lock.lock_read(); + rwlock_t_lock_read(share->_num_DBs_lock); } uint curr_num_DBs = share->num_DBs; if (lt == lock_read) { @@ -6637,8 +6635,9 @@ THR_LOCK_DATA* *ha_tokudb::store_lock( if (sql_command == SQLCOM_CREATE_INDEX && tokudb::sysvars::create_index_online(thd)) { // hot indexing - share->_num_DBs_lock.lock_read(); - if (share->num_DBs == (table->s->keys + tokudb_test(hidden_primary_key))) { + rwlock_t_lock_read(share->_num_DBs_lock); + if (share->num_DBs == + (table->s->keys + tokudb_test(hidden_primary_key))) { lock_type = TL_WRITE_ALLOW_WRITE; } share->_num_DBs_lock.unlock(); @@ -7212,10 +7211,28 @@ int ha_tokudb::create( const tokudb::sysvars::row_format_t row_format = (tokudb::sysvars::row_format_t)form->s->option_struct->row_format; #else - const tokudb::sysvars::row_format_t row_format = - (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) - ? row_type_to_row_format(create_info->row_type) - : tokudb::sysvars::row_format(thd); + // TDB-76 : CREATE TABLE ... LIKE ... does not use source row_format on + // target table + // Original code would only use create_info->row_type if + // create_info->used_fields & HA_CREATE_USED_ROW_FORMAT was true. This + // would cause us to skip transferring the row_format for a table created + // via CREATE TABLE tn LIKE tn. We also take on more InnoDB like behavior + // and throw a warning if we get a row_format that we can't translate into + // a known TokuDB row_format. + tokudb::sysvars::row_format_t row_format = + tokudb::sysvars::row_format(thd); + + if ((create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) || + create_info->row_type != ROW_TYPE_DEFAULT) { + row_format = row_type_to_row_format(create_info->row_type); + if (row_format == tokudb::sysvars::SRV_ROW_FORMAT_DEFAULT && + create_info->row_type != ROW_TYPE_DEFAULT) { + push_warning(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "TokuDB: invalid ROW_FORMAT specifier."); + } + } #endif const toku_compression_method compression_method = row_format_to_toku_compression_method(row_format); @@ -8100,8 +8117,8 @@ int ha_tokudb::tokudb_add_index( } } } - - share->_num_DBs_lock.lock_write(); + + rwlock_t_lock_write(share->_num_DBs_lock); rw_lock_taken = true; // // open all the DB files and set the appropriate variables in share @@ -8211,7 +8228,7 @@ int ha_tokudb::tokudb_add_index( goto cleanup; } - share->_num_DBs_lock.lock_write(); + rwlock_t_lock_write(share->_num_DBs_lock); error = indexer->close(indexer); share->_num_DBs_lock.unlock(); if (error) { @@ -8445,7 +8462,7 @@ cleanup: if (indexer != NULL) { sprintf(status_msg, "aborting creation of indexes."); thd_proc_info(thd, status_msg); - share->_num_DBs_lock.lock_write(); + rwlock_t_lock_write(share->_num_DBs_lock); indexer->abort(indexer); share->_num_DBs_lock.unlock(); } @@ -8494,10 +8511,10 @@ void ha_tokudb::restore_add_index( // // need to restore num_DBs, and we have to do it before we close the dictionaries - // so that there is not a window + // so that there is not a window // if (incremented_numDBs) { - share->_num_DBs_lock.lock_write(); + rwlock_t_lock_write(share->_num_DBs_lock); share->num_DBs--; } if (modified_DBs) { diff --git a/storage/tokudb/ha_tokudb.h b/storage/tokudb/ha_tokudb.h index 7476efe380f..c80be207005 100644 --- a/storage/tokudb/ha_tokudb.h +++ b/storage/tokudb/ha_tokudb.h @@ -318,18 +318,18 @@ inline int TOKUDB_SHARE::use_count() const { } inline void TOKUDB_SHARE::lock() const { TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]", - _full_table_name.ptr(), - get_state_string(_state), - _use_count); - _mutex.lock(); + _full_table_name.ptr(), + get_state_string(_state), + _use_count); + mutex_t_lock(_mutex); TOKUDB_SHARE_DBUG_VOID_RETURN(); } inline void TOKUDB_SHARE::unlock() const { TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]", - _full_table_name.ptr(), - get_state_string(_state), - _use_count); - _mutex.unlock(); + _full_table_name.ptr(), + get_state_string(_state), + _use_count); + mutex_t_unlock(_mutex); TOKUDB_SHARE_DBUG_VOID_RETURN(); } inline TOKUDB_SHARE::share_state_t TOKUDB_SHARE::state() const { diff --git a/storage/tokudb/ha_tokudb_update.cc b/storage/tokudb/ha_tokudb_update.cc index a6c72506448..2e56d4c6698 100644 --- a/storage/tokudb/ha_tokudb_update.cc +++ b/storage/tokudb/ha_tokudb_update.cc @@ -918,7 +918,7 @@ int ha_tokudb::send_update_message( marshall_update(update_message, lhs_item, rhs_item, table, share); } - share->_num_DBs_lock.lock_read(); + rwlock_t_lock_read(share->_num_DBs_lock); // hot index in progress if (share->num_DBs > table->s->keys + tokudb_test(hidden_primary_key)) { @@ -1108,7 +1108,7 @@ int ha_tokudb::send_upsert_message( marshall_update(update_message, lhs_item, rhs_item, table, share); } - share->_num_DBs_lock.lock_read(); + rwlock_t_lock_read(share->_num_DBs_lock); // hot index in progress if (share->num_DBs > table->s->keys + tokudb_test(hidden_primary_key)) { diff --git a/storage/tokudb/hatoku_cmp.cc b/storage/tokudb/hatoku_cmp.cc index b615d4437d2..ee4d1dac4ea 100644 --- a/storage/tokudb/hatoku_cmp.cc +++ b/storage/tokudb/hatoku_cmp.cc @@ -1987,6 +1987,7 @@ static uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info, case (toku_type_fixstring): field_length = field->pack_length(); set_if_smaller(key_part_length, field_length); + // fallthrough case (toku_type_varbinary): case (toku_type_varstring): case (toku_type_blob): diff --git a/storage/tokudb/hatoku_defines.h b/storage/tokudb/hatoku_defines.h index f6d70c7026a..8fd8ab538cd 100644 --- a/storage/tokudb/hatoku_defines.h +++ b/storage/tokudb/hatoku_defines.h @@ -168,6 +168,14 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #define HA_CAN_WRITE_DURING_OPTIMIZE 0 #endif +#if !defined(HA_ONLINE_ANALYZE) +#define HA_ONLINE_ANALYZE 0 +#endif + +#if !defined(MY_ATTRIBUTE) +#define MY_ATTRIBUTE(A) __attribute__(A) +#endif + #if !defined(HA_OPTION_CREATE_FROM_ENGINE) #define HA_OPTION_CREATE_FROM_ENGINE 0 #endif @@ -248,4 +256,22 @@ inline uint tokudb_uint3korr(const uchar *a) { return uint3korr(b); } -#endif // _HATOKU_DEFINES_H +typedef unsigned int pfs_key_t; + +#if defined(HAVE_PSI_MUTEX_INTERFACE) +#define mutex_t_lock(M) M.lock(__FILE__, __LINE__) +#define mutex_t_unlock(M) M.unlock(__FILE__, __LINE__) +#else // HAVE_PSI_MUTEX_INTERFACE +#define mutex_t_lock(M) M.lock() +#define mutex_t_unlock(M) M.unlock() +#endif // HAVE_PSI_MUTEX_INTERFACE + +#if defined(HAVE_PSI_RWLOCK_INTERFACE) +#define rwlock_t_lock_read(M) M.lock_read(__FILE__, __LINE__) +#define rwlock_t_lock_write(M) M.lock_write(__FILE__, __LINE__) +#else // HAVE_PSI_RWLOCK_INTERFACE +#define rwlock_t_lock_read(M) M.lock_read() +#define rwlock_t_lock_write(M) M.lock_write() +#endif // HAVE_PSI_RWLOCK_INTERFACE + +#endif // _HATOKU_DEFINES_H diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc index 599948e03f8..cee8575eebb 100644 --- a/storage/tokudb/hatoku_hton.cc +++ b/storage/tokudb/hatoku_hton.cc @@ -28,6 +28,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #define TOKU_METADB_NAME "tokudb_meta" +static pfs_key_t tokudb_map_mutex_key; + +static PSI_mutex_info all_tokudb_mutexes[] = { + {&tokudb_map_mutex_key, "tokudb_map_mutex", 0}, + {&ha_tokudb_mutex_key, "ha_tokudb_mutex", 0}, +}; + +static PSI_rwlock_info all_tokudb_rwlocks[] = { + {&num_DBs_lock_key, "num_DBs_lock", 0}, +}; + typedef struct savepoint_info { DB_TXN* txn; tokudb_trx_data* trx; @@ -109,8 +120,8 @@ handlerton* tokudb_hton; const char* ha_tokudb_ext = ".tokudb"; DB_ENV* db_env; -#if TOKU_THDVAR_MEMALLOC_BUG static tokudb::thread::mutex_t tokudb_map_mutex; +#if TOKU_THDVAR_MEMALLOC_BUG static TREE tokudb_map; struct tokudb_map_pair { THD* thd; @@ -212,6 +223,10 @@ extern "C" { // use constructor and destructor functions to create and destroy // the lock before and after main(), respectively. int tokudb_hton_initialized; + +// tokudb_hton_initialized_lock can not be instrumented as it must be +// initialized before mysql_mutex_register() call to protect +// some globals from race condition. tokudb::thread::rwlock_t tokudb_hton_initialized_lock; static SHOW_VAR *toku_global_status_variables = NULL; @@ -267,10 +282,23 @@ static int tokudb_init_func(void *p) { int r; // 3938: lock the handlerton's initialized status flag for writing - tokudb_hton_initialized_lock.lock_write(); + rwlock_t_lock_write(tokudb_hton_initialized_lock); + +#ifdef HAVE_PSI_INTERFACE + /* Register TokuDB mutex keys with MySQL performance schema */ + int count; + + count = array_elements(all_tokudb_mutexes); + mysql_mutex_register("tokudb", all_tokudb_mutexes, count); + + count = array_elements(all_tokudb_rwlocks); + mysql_rwlock_register("tokudb", all_tokudb_rwlocks, count); + + tokudb_map_mutex.reinit(tokudb_map_mutex_key); +#endif /* HAVE_PSI_INTERFACE */ db_env = NULL; - tokudb_hton = (handlerton *) p; + tokudb_hton = (handlerton*)p; if (tokudb::sysvars::check_jemalloc) { typedef int (*mallctl_type)( @@ -668,7 +696,7 @@ int tokudb_end(handlerton* hton, ha_panic_function type) { // initialized. grab a writer lock for the duration of the // call, so we can drop the flag and destroy the mutexes // in isolation. - tokudb_hton_initialized_lock.lock_write(); + rwlock_t_lock_write(tokudb_hton_initialized_lock); assert_always(tokudb_hton_initialized); tokudb::background::destroy(); @@ -748,16 +776,16 @@ static int tokudb_close_connection(handlerton* hton, THD* thd) { } tokudb::memory::free(trx); #if TOKU_THDVAR_MEMALLOC_BUG - tokudb_map_mutex.lock(); - struct tokudb_map_pair key = { thd, NULL }; + mutex_t_lock(tokudb_map_mutex); + struct tokudb_map_pair key = {thd, NULL}; struct tokudb_map_pair* found_key = - (struct tokudb_map_pair*) tree_search(&tokudb_map, &key, NULL); + (struct tokudb_map_pair*)tree_search(&tokudb_map, &key, NULL); if (found_key) { tokudb::memory::free(found_key->last_lock_timeout); tree_delete(&tokudb_map, found_key, sizeof(*found_key), NULL); } - tokudb_map_mutex.unlock(); + mutex_t_unlock(tokudb_map_mutex); #endif return error; } @@ -1718,12 +1746,12 @@ static void tokudb_lock_timeout_callback( tokudb::memory::strdup(log_str.c_ptr(), MY_FAE); tokudb::sysvars::set_last_lock_timeout(thd, new_lock_timeout); #if TOKU_THDVAR_MEMALLOC_BUG - tokudb_map_mutex.lock(); - struct tokudb_map_pair old_key = { thd, old_lock_timeout }; + mutex_t_lock(tokudb_map_mutex); + struct tokudb_map_pair old_key = {thd, old_lock_timeout}; tree_delete(&tokudb_map, &old_key, sizeof old_key, NULL); - struct tokudb_map_pair new_key = { thd, new_lock_timeout }; + struct tokudb_map_pair new_key = {thd, new_lock_timeout}; tree_insert(&tokudb_map, &new_key, sizeof new_key, NULL); - tokudb_map_mutex.unlock(); + mutex_t_unlock(tokudb_map_mutex); #endif tokudb::memory::free(old_lock_timeout); } diff --git a/storage/tokudb/hatoku_hton.h b/storage/tokudb/hatoku_hton.h index d126ff4339f..80e13fa9b0c 100644 --- a/storage/tokudb/hatoku_hton.h +++ b/storage/tokudb/hatoku_hton.h @@ -39,6 +39,9 @@ extern handlerton* tokudb_hton; extern DB_ENV* db_env; +extern pfs_key_t ha_tokudb_mutex_key; +extern pfs_key_t num_DBs_lock_key; + inline tokudb::sysvars::row_format_t toku_compression_method_to_row_format( toku_compression_method method) { @@ -180,9 +183,7 @@ inline bool tokudb_killed_thd_callback(void *extra, uint64_t deleted_rows) { return thd_kill_level(thd) != 0; } - extern HASH tokudb_open_tables; -extern tokudb::thread::mutex_t tokudb_mutex; extern const char* tokudb_hton_name; extern int tokudb_hton_initialized; extern tokudb::thread::rwlock_t tokudb_hton_initialized_lock; diff --git a/storage/tokudb/mysql-test/tokudb/disabled.def b/storage/tokudb/mysql-test/tokudb/disabled.def index 8893dfabdc6..89f6992d26d 100644 --- a/storage/tokudb/mysql-test/tokudb/disabled.def +++ b/storage/tokudb/mysql-test/tokudb/disabled.def @@ -29,5 +29,6 @@ cluster_key_part: engine options on partitioned tables i_s_tokudb_lock_waits_released: unstable, race conditions i_s_tokudb_locks_released: unstable, race conditions row_format: n/a +nonflushing_analyze_debug: Freezes in MariaDB 10.0 tokudb.change_column_all_1000_1: We are too lazy to fix this properly tokudb.change_column_all_1000_10: We are too lazy to fix this properly diff --git a/storage/tokudb/mysql-test/tokudb/r/card_scale_percent.result b/storage/tokudb/mysql-test/tokudb/r/card_scale_percent.result index cfd7e38179c..981433fac91 100644 --- a/storage/tokudb/mysql-test/tokudb/r/card_scale_percent.result +++ b/storage/tokudb/mysql-test/tokudb/r/card_scale_percent.result @@ -1,7 +1,7 @@ -set global tokudb_cardinality_scale_percent = 10; analyze table tt; Table Op Msg_type Msg_text test.tt analyze status OK +set global tokudb_cardinality_scale_percent = 10; show indexes from tt; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment tt 0 PRIMARY 1 a A 4000 NULL NULL BTREE @@ -9,9 +9,6 @@ tt 1 b 1 b A 4000 NULL NULL YES BTREE tt 1 c 1 c A 4000 NULL NULL YES BTREE tt 1 d 1 d A 4000 NULL NULL YES BTREE set global tokudb_cardinality_scale_percent = 50; -analyze table tt; -Table Op Msg_type Msg_text -test.tt analyze status OK show indexes from tt; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment tt 0 PRIMARY 1 a A 4000 NULL NULL BTREE @@ -19,9 +16,6 @@ tt 1 b 1 b A 4000 NULL NULL YES BTREE tt 1 c 1 c A 4000 NULL NULL YES BTREE tt 1 d 1 d A 2000 NULL NULL YES BTREE set global tokudb_cardinality_scale_percent = 100; -analyze table tt; -Table Op Msg_type Msg_text -test.tt analyze status OK show indexes from tt; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment tt 0 PRIMARY 1 a A 4000 NULL NULL BTREE @@ -31,9 +25,6 @@ tt 1 d 1 d A 1000 NULL NULL YES BTREE set global tokudb_cardinality_scale_percent = 200; Warnings: Warning 1292 Truncated incorrect tokudb_cardinality_scale_percent value: '200' -analyze table tt; -Table Op Msg_type Msg_text -test.tt analyze status OK show indexes from tt; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment tt 0 PRIMARY 1 a A 4000 NULL NULL BTREE diff --git a/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-1.result b/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-1.result new file mode 100644 index 00000000000..433b5baf18b --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-1.result @@ -0,0 +1,16 @@ +create table t (a int primary key, b int) ENGINE=TokuDB; +insert into t values (1,0); +set session transaction isolation level repeatable read; +begin; +select * from t where a=1 lock in share mode; +a b +1 0 +connect conn1,localhost,root; +set session transaction isolation level repeatable read; +begin; +update t set b=b+1 where a=1; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +connection default; +commit; +disconnect conn1; +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-2.result b/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-2.result new file mode 100644 index 00000000000..79e886a8682 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-2.result @@ -0,0 +1,20 @@ +create table t (a int primary key, b int) ENGINE=TokuDB; +insert into t values (1,0); +insert into t values (2,1); +insert into t values (3,2); +set session transaction isolation level repeatable read; +begin; +select * from t lock in share mode; +a b +1 0 +2 1 +3 2 +connect conn1,localhost,root; +set session transaction isolation level repeatable read; +begin; +update t set b=b+1 where a=2; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +connection default; +commit; +disconnect conn1; +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/nonflushing_analyze_debug.result b/storage/tokudb/mysql-test/tokudb/r/nonflushing_analyze_debug.result new file mode 100644 index 00000000000..e8f51edee04 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/nonflushing_analyze_debug.result @@ -0,0 +1,19 @@ +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=TokuDB; +INSERT INTO t1 VALUES (1), (2), (3); +SET DEBUG_SYNC="handler_ha_index_next_end SIGNAL idx_scan_in_progress WAIT_FOR finish_scan"; +SELECT * FROM t1; +SET DEBUG_SYNC="now WAIT_FOR idx_scan_in_progress"; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +SELECT * FROM t1; +a +1 +2 +3 +SET DEBUG_SYNC="now SIGNAL finish_scan"; +a +1 +2 +3 +DROP TABLE t1; diff --git a/storage/tokudb/mysql-test/tokudb/r/row_format.result b/storage/tokudb/mysql-test/tokudb/r/row_format.result index cb669148445..15d4f9fe12a 100644 --- a/storage/tokudb/mysql-test/tokudb/r/row_format.result +++ b/storage/tokudb/mysql-test/tokudb/r/row_format.result @@ -1,4 +1,6 @@ CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +Warnings: +Warning 1478 TokuDB: invalid ROW_FORMAT specifier. CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; @@ -6,6 +8,41 @@ CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZL CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +CREATE TABLE tdb76_1 LIKE tokudb_row_format_test_1; +CREATE TABLE tdb76_2 LIKE tokudb_row_format_test_2; +CREATE TABLE tdb76_3 LIKE tokudb_row_format_test_3; +CREATE TABLE tdb76_4 LIKE tokudb_row_format_test_4; +CREATE TABLE tdb76_5 LIKE tokudb_row_format_test_5; +CREATE TABLE tdb76_6 LIKE tokudb_row_format_test_6; +CREATE TABLE tdb76_7 LIKE tokudb_row_format_test_7; +CREATE TABLE tdb76_8 LIKE tokudb_row_format_test_8; +CREATE TABLE tdb76_compact(a INT) ENGINE=TokuDB ROW_FORMAT=COMPACT; +Warnings: +Warning 1478 TokuDB: invalid ROW_FORMAT specifier. +CREATE TABLE tdb76_redundant(a INT) ENGINE=TokuDB ROW_FORMAT=REDUNDANT; +Warnings: +Warning 1478 TokuDB: invalid ROW_FORMAT specifier. +CREATE TABLE tdb76_dynamic(a INT) ENGINE=TokuDB ROW_FORMAT=DYNAMIC; +Warnings: +Warning 1478 TokuDB: invalid ROW_FORMAT specifier. +CREATE TABLE tdb76_compressed(a INT) ENGINE=TokuDB ROW_FORMAT=COMPRESSED; +Warnings: +Warning 1478 TokuDB: invalid ROW_FORMAT specifier. +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tdb76_%' ORDER BY table_name; +table_name row_format engine +tdb76_1 tokudb_zlib TokuDB +tdb76_2 tokudb_quicklz TokuDB +tdb76_3 tokudb_lzma TokuDB +tdb76_4 tokudb_uncompressed TokuDB +tdb76_5 tokudb_zlib TokuDB +tdb76_6 tokudb_lzma TokuDB +tdb76_7 tokudb_quicklz TokuDB +tdb76_8 tokudb_snappy TokuDB +tdb76_compact tokudb_zlib TokuDB +tdb76_compressed tokudb_zlib TokuDB +tdb76_dynamic tokudb_zlib TokuDB +tdb76_redundant tokudb_zlib TokuDB +DROP TABLE tdb76_1, tdb76_2, tdb76_3, tdb76_4, tdb76_5, tdb76_6, tdb76_7, tdb76_8, tdb76_compact, tdb76_redundant, tdb76_dynamic, tdb76_compressed; SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name; table_name row_format engine tokudb_row_format_test_1 tokudb_zlib TokuDB @@ -45,6 +82,8 @@ SELECT table_name, row_format, engine FROM information_schema.tables WHERE table table_name row_format engine tokudb_row_format_test_1 tokudb_lzma TokuDB ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +Warnings: +Warning 1478 TokuDB: invalid ROW_FORMAT specifier. SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; table_name row_format engine tokudb_row_format_test_1 tokudb_zlib TokuDB diff --git a/storage/tokudb/mysql-test/tokudb/t/card_scale_percent.test b/storage/tokudb/mysql-test/tokudb/t/card_scale_percent.test index 47f1eb37989..75c53611308 100644 --- a/storage/tokudb/mysql-test/tokudb/t/card_scale_percent.test +++ b/storage/tokudb/mysql-test/tokudb/t/card_scale_percent.test @@ -30,20 +30,18 @@ set session tokudb_analyze_throttle=0; -- enable_query_log -set global tokudb_cardinality_scale_percent = 10; analyze table tt; + +set global tokudb_cardinality_scale_percent = 10; show indexes from tt; set global tokudb_cardinality_scale_percent = 50; -analyze table tt; show indexes from tt; set global tokudb_cardinality_scale_percent = 100; -analyze table tt; show indexes from tt; set global tokudb_cardinality_scale_percent = 200; -analyze table tt; show indexes from tt; -- disable_query_log diff --git a/storage/tokudb/mysql-test/tokudb/t/locking-read-repeatable-read-1.test b/storage/tokudb/mysql-test/tokudb/t/locking-read-repeatable-read-1.test new file mode 100644 index 00000000000..c5ef0f0703e --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/locking-read-repeatable-read-1.test @@ -0,0 +1,23 @@ +source include/have_tokudb.inc; +source include/count_sessions.inc; +create table t (a int primary key, b int) ENGINE=TokuDB; +insert into t values (1,0); +set session transaction isolation level repeatable read; +begin; +# t1 select in share mode +select * from t where a=1 lock in share mode; +# t2 update +connect(conn1,localhost,root); +set session transaction isolation level repeatable read; +begin; +# t2 select for update, should hang until t1 commits +send update t set b=b+1 where a=1; +--error ER_LOCK_WAIT_TIMEOUT +reap; +# t2 update +connection default; +commit; +disconnect conn1; +drop table t; + +source include/wait_until_count_sessions.inc; diff --git a/storage/tokudb/mysql-test/tokudb/t/locking-read-repeatable-read-2.test b/storage/tokudb/mysql-test/tokudb/t/locking-read-repeatable-read-2.test new file mode 100644 index 00000000000..20a4549c45f --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/locking-read-repeatable-read-2.test @@ -0,0 +1,29 @@ +#the difference of this test from locking-read-repeatable-read-1 +#is that this test is on range query(gap lock) which actually +#examines a different patch + +source include/have_tokudb.inc; +source include/count_sessions.inc; +create table t (a int primary key, b int) ENGINE=TokuDB; +insert into t values (1,0); +insert into t values (2,1); +insert into t values (3,2); +set session transaction isolation level repeatable read; +begin; +# t1 select in share mode +select * from t lock in share mode; +# t2 update +connect(conn1,localhost,root); +set session transaction isolation level repeatable read; +begin; +# t2 select for update, should hang until t1 commits +send update t set b=b+1 where a=2; +--error ER_LOCK_WAIT_TIMEOUT +reap; +# t2 update +connection default; +commit; +disconnect conn1; +drop table t; + +source include/wait_until_count_sessions.inc; diff --git a/storage/tokudb/mysql-test/tokudb/t/nonflushing_analyze_debug.test b/storage/tokudb/mysql-test/tokudb/t/nonflushing_analyze_debug.test new file mode 100644 index 00000000000..0c4c01b3dc0 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/nonflushing_analyze_debug.test @@ -0,0 +1,10 @@ +--source include/have_debug_sync.inc +--source include/have_tokudb.inc + +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=TokuDB; +INSERT INTO t1 VALUES (1), (2), (3); + +--let $percona_nonflushing_analyze_table= t1 +--source include/percona_nonflushing_analyze_debug.inc + +DROP TABLE t1; diff --git a/storage/tokudb/mysql-test/tokudb/t/row_format.test b/storage/tokudb/mysql-test/tokudb/t/row_format.test index 6533f8c06be..9c5c6e6403e 100644 --- a/storage/tokudb/mysql-test/tokudb/t/row_format.test +++ b/storage/tokudb/mysql-test/tokudb/t/row_format.test @@ -12,6 +12,27 @@ CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZ CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +# TDB-76 : CREATE TABLE ... LIKE ... does not use source row_format on target table +CREATE TABLE tdb76_1 LIKE tokudb_row_format_test_1; +CREATE TABLE tdb76_2 LIKE tokudb_row_format_test_2; +CREATE TABLE tdb76_3 LIKE tokudb_row_format_test_3; +CREATE TABLE tdb76_4 LIKE tokudb_row_format_test_4; +CREATE TABLE tdb76_5 LIKE tokudb_row_format_test_5; +CREATE TABLE tdb76_6 LIKE tokudb_row_format_test_6; +CREATE TABLE tdb76_7 LIKE tokudb_row_format_test_7; +CREATE TABLE tdb76_8 LIKE tokudb_row_format_test_8; + +CREATE TABLE tdb76_compact(a INT) ENGINE=TokuDB ROW_FORMAT=COMPACT; +CREATE TABLE tdb76_redundant(a INT) ENGINE=TokuDB ROW_FORMAT=REDUNDANT; +CREATE TABLE tdb76_dynamic(a INT) ENGINE=TokuDB ROW_FORMAT=DYNAMIC; +CREATE TABLE tdb76_compressed(a INT) ENGINE=TokuDB ROW_FORMAT=COMPRESSED; + +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tdb76_%' ORDER BY table_name; + +DROP TABLE tdb76_1, tdb76_2, tdb76_3, tdb76_4, tdb76_5, tdb76_6, tdb76_7, tdb76_8, tdb76_compact, tdb76_redundant, tdb76_dynamic, tdb76_compressed; + + + SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name; ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db756_card_part_hash_1_pick.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db756_card_part_hash_1_pick.result index 5ba5da21789..caaa963c325 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/db756_card_part_hash_1_pick.result +++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db756_card_part_hash_1_pick.result @@ -17,5 +17,5 @@ test.t analyze status OK show indexes from t; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment t 0 PRIMARY 1 id A 7 NULL NULL BTREE -t 1 x 1 x A 3 NULL NULL YES BTREE +t 1 x 1 x A 7 NULL NULL YES BTREE drop table t; diff --git a/storage/tokudb/mysql-test/tokudb_parts/disabled.def b/storage/tokudb/mysql-test/tokudb_parts/disabled.def index 3252a463176..17a8ddcc12e 100644 --- a/storage/tokudb/mysql-test/tokudb_parts/disabled.def +++ b/storage/tokudb/mysql-test/tokudb_parts/disabled.def @@ -7,3 +7,4 @@ partition_max_sub_parts_key_list_tokudb: 5.6 test not merged yet partition_max_sub_parts_key_range_tokudb: 5.6 test not merged yet partition_max_sub_parts_list_tokudb: 5.6 test not merged yet partition_max_sub_parts_range_tokudb: 5.6 test not merged yet +nonflushing_analyze_debug: Freezes in MariaDB 10.0 diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/nonflushing_analyze_debug.result b/storage/tokudb/mysql-test/tokudb_parts/r/nonflushing_analyze_debug.result new file mode 100644 index 00000000000..5a7bfb369df --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_parts/r/nonflushing_analyze_debug.result @@ -0,0 +1,50 @@ +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=TokuDB +PARTITION BY RANGE (a) ( +PARTITION p0 VALUES LESS THAN (3), +PARTITION p1 VALUES LESS THAN (10)); +INSERT INTO t1 VALUES (1), (2), (3), (4); +SET DEBUG_SYNC="handler_ha_index_next_end SIGNAL idx_scan_in_progress WAIT_FOR finish_scan"; +SELECT * FROM t1; +SET DEBUG_SYNC="now WAIT_FOR idx_scan_in_progress"; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +SELECT * FROM t1; +a +1 +2 +3 +4 +SET DEBUG_SYNC="now SIGNAL finish_scan"; +a +1 +2 +3 +4 +DROP TABLE t1; +CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=TokuDB +PARTITION BY RANGE (a) +SUBPARTITION BY HASH (A) +SUBPARTITIONS 2 ( +PARTITION p0 VALUES LESS THAN (3), +PARTITION p1 VALUES LESS THAN (10)); +INSERT INTO t2 VALUES (1), (2), (3), (4); +SET DEBUG_SYNC="handler_ha_index_next_end SIGNAL idx_scan_in_progress WAIT_FOR finish_scan"; +SELECT * FROM t2; +SET DEBUG_SYNC="now WAIT_FOR idx_scan_in_progress"; +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +SELECT * FROM t2; +a +2 +1 +4 +3 +SET DEBUG_SYNC="now SIGNAL finish_scan"; +a +2 +1 +4 +3 +DROP TABLE t2; diff --git a/storage/tokudb/mysql-test/tokudb_parts/t/nonflushing_analyze_debug.test b/storage/tokudb/mysql-test/tokudb_parts/t/nonflushing_analyze_debug.test new file mode 100644 index 00000000000..c99206acfe3 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_parts/t/nonflushing_analyze_debug.test @@ -0,0 +1,29 @@ +--source include/have_debug_sync.inc +--source include/have_tokudb.inc +--source include/have_partition.inc + +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=TokuDB + PARTITION BY RANGE (a) ( + PARTITION p0 VALUES LESS THAN (3), + PARTITION p1 VALUES LESS THAN (10)); + +INSERT INTO t1 VALUES (1), (2), (3), (4); + +--let $percona_nonflushing_analyze_table= t1 +--source include/percona_nonflushing_analyze_debug.inc + +DROP TABLE t1; + +CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=TokuDB + PARTITION BY RANGE (a) + SUBPARTITION BY HASH (A) + SUBPARTITIONS 2 ( + PARTITION p0 VALUES LESS THAN (3), + PARTITION p1 VALUES LESS THAN (10)); + +INSERT INTO t2 VALUES (1), (2), (3), (4); + +--let $percona_nonflushing_analyze_table= t2 +--source include/percona_nonflushing_analyze_debug.inc + +DROP TABLE t2; diff --git a/storage/tokudb/mysql-test/tokudb_perfschema/r/crash_tokudb.result b/storage/tokudb/mysql-test/tokudb_perfschema/r/crash_tokudb.result new file mode 100644 index 00000000000..189d6bef14e --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_perfschema/r/crash_tokudb.result @@ -0,0 +1,30 @@ +SELECT COUNT(*) > 0 FROM performance_schema.setup_consumers +WHERE ENABLED = "NO"; +COUNT(*) > 0 +0 +SELECT COUNT(*) > 0 FROM performance_schema.setup_instruments +WHERE NAME LIKE "%/fti/%" AND (ENABLED = 'NO' OR TIMED = "NO"); +COUNT(*) > 0 +0 +CREATE TABLE t(a INT AUTO_INCREMENT PRIMARY KEY, b INT) ENGINE=TokuDB; +INSERT INTO t (b) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), +(1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +INSERT INTO t (b) SELECT b FROM t; +UPDATE t SET b = 11 WHERE b = 10; +DELETE FROM t WHERE b = 1; +ALTER TABLE t ADD COLUMN c CHAR(10) default NULL; +TRUNCATE TABLE t; +RENAME TABLE t TO t1; +RENAME TABLE t1 TO t; +SELECT COUNT(*) > 0 FROM t; +COUNT(*) > 0 +0 +DROP TABLE t; diff --git a/storage/tokudb/mysql-test/tokudb_perfschema/r/start_server_tokudb.result b/storage/tokudb/mysql-test/tokudb_perfschema/r/start_server_tokudb.result new file mode 100644 index 00000000000..2e220916ec3 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_perfschema/r/start_server_tokudb.result @@ -0,0 +1,114 @@ +show databases; +Database +information_schema +mtr +mysql +performance_schema +test +select count(*) from performance_schema.performance_timers; +count(*) +5 +select count(*) from performance_schema.setup_consumers; +count(*) +12 +select count(*) > 3 from performance_schema.setup_instruments; +count(*) > 3 +1 +select count(*) from performance_schema.setup_timers; +count(*) +4 +select * from performance_schema.accounts; +select * from performance_schema.cond_instances; +select * from performance_schema.events_stages_current; +select * from performance_schema.events_stages_history; +select * from performance_schema.events_stages_history_long; +select * from performance_schema.events_stages_summary_by_account_by_event_name; +select * from performance_schema.events_stages_summary_by_host_by_event_name; +select * from performance_schema.events_stages_summary_by_thread_by_event_name; +select * from performance_schema.events_stages_summary_by_user_by_event_name; +select * from performance_schema.events_stages_summary_global_by_event_name; +select * from performance_schema.events_statements_current; +select * from performance_schema.events_statements_history; +select * from performance_schema.events_statements_history_long; +select * from performance_schema.events_statements_summary_by_account_by_event_name; +select * from performance_schema.events_statements_summary_by_digest; +select * from performance_schema.events_statements_summary_by_host_by_event_name; +select * from performance_schema.events_statements_summary_by_thread_by_event_name; +select * from performance_schema.events_statements_summary_by_user_by_event_name; +select * from performance_schema.events_statements_summary_global_by_event_name; +select * from performance_schema.events_waits_current; +select * from performance_schema.events_waits_history; +select * from performance_schema.events_waits_history_long; +select * from performance_schema.events_waits_summary_by_account_by_event_name; +select * from performance_schema.events_waits_summary_by_host_by_event_name; +select * from performance_schema.events_waits_summary_by_instance; +select * from performance_schema.events_waits_summary_by_thread_by_event_name; +select * from performance_schema.events_waits_summary_by_user_by_event_name; +select * from performance_schema.events_waits_summary_global_by_event_name; +select * from performance_schema.file_instances; +select * from performance_schema.file_summary_by_event_name; +select * from performance_schema.file_summary_by_instance; +select * from performance_schema.host_cache; +select * from performance_schema.hosts; +select * from performance_schema.mutex_instances; +select * from performance_schema.objects_summary_global_by_type; +select * from performance_schema.performance_timers; +select * from performance_schema.rwlock_instances; +select * from performance_schema.session_account_connect_attrs; +select * from performance_schema.session_connect_attrs; +select * from performance_schema.setup_actors; +select * from performance_schema.setup_consumers; +select * from performance_schema.setup_instruments; +select * from performance_schema.setup_objects; +select * from performance_schema.setup_timers; +select * from performance_schema.socket_instances; +select * from performance_schema.socket_summary_by_instance; +select * from performance_schema.socket_summary_by_event_name; +select * from performance_schema.table_io_waits_summary_by_index_usage; +select * from performance_schema.table_io_waits_summary_by_table; +select * from performance_schema.table_lock_waits_summary_by_table; +select * from performance_schema.threads; +select * from performance_schema.users; +show variables where +`Variable_name` != "performance_schema_max_statement_classes" and +`Variable_name` like "performance_schema%"; +Variable_name Value +performance_schema ON +performance_schema_accounts_size 100 +performance_schema_digests_size 200 +performance_schema_events_stages_history_long_size 1000 +performance_schema_events_stages_history_size 10 +performance_schema_events_statements_history_long_size 1000 +performance_schema_events_statements_history_size 10 +performance_schema_events_waits_history_long_size 10000 +performance_schema_events_waits_history_size 10 +performance_schema_hosts_size 100 +performance_schema_max_cond_classes 80 +performance_schema_max_cond_instances 1000 +performance_schema_max_digest_length 1024 +performance_schema_max_file_classes 50 +performance_schema_max_file_handles 32768 +performance_schema_max_file_instances 10000 +performance_schema_max_mutex_classes 200 +performance_schema_max_mutex_instances 5000 +performance_schema_max_rwlock_classes 40 +performance_schema_max_rwlock_instances 5000 +performance_schema_max_socket_classes 10 +performance_schema_max_socket_instances 1000 +performance_schema_max_stage_classes 150 +performance_schema_max_table_handles 1000 +performance_schema_max_table_instances 500 +performance_schema_max_thread_classes 50 +performance_schema_max_thread_instances 200 +performance_schema_session_connect_attrs_size 2048 +performance_schema_setup_actors_size 100 +performance_schema_setup_objects_size 100 +performance_schema_users_size 100 +show engine PERFORMANCE_SCHEMA status; +show status like "performance_schema%"; +SELECT COUNT(NAME) > 0 FROM performance_schema.setup_instruments WHERE NAME LIKE "%/fti/%"; +COUNT(NAME) > 0 +1 +SELECT COUNT(NAME) > 0 FROM performance_schema.threads WHERE NAME LIKE "%kibbutz%"; +COUNT(NAME) > 0 +1 diff --git a/storage/tokudb/mysql-test/tokudb_perfschema/t/crash_tokudb.test b/storage/tokudb/mysql-test/tokudb_perfschema/t/crash_tokudb.test new file mode 100644 index 00000000000..31757a20259 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_perfschema/t/crash_tokudb.test @@ -0,0 +1,36 @@ +# TokuDB PFS crash test: +# Make sure FTI instrumentation is on, execute base DDL, DML queries +# and make sure there is no crash. + +--source include/not_embedded.inc +--source include/have_perfschema.inc +--source include/have_tokudb.inc + +SELECT COUNT(*) > 0 FROM performance_schema.setup_consumers + WHERE ENABLED = "NO"; +SELECT COUNT(*) > 0 FROM performance_schema.setup_instruments + WHERE NAME LIKE "%/fti/%" AND (ENABLED = 'NO' OR TIMED = "NO"); + +CREATE TABLE t(a INT AUTO_INCREMENT PRIMARY KEY, b INT) ENGINE=TokuDB; +INSERT INTO t (b) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), + (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +INSERT INTO t (b) SELECT b FROM t; + +UPDATE t SET b = 11 WHERE b = 10; +DELETE FROM t WHERE b = 1; + +ALTER TABLE t ADD COLUMN c CHAR(10) default NULL; +TRUNCATE TABLE t; +RENAME TABLE t TO t1; +RENAME TABLE t1 TO t; + +SELECT COUNT(*) > 0 FROM t; +DROP TABLE t; diff --git a/storage/tokudb/mysql-test/tokudb_perfschema/t/start_server_tokudb.test b/storage/tokudb/mysql-test/tokudb_perfschema/t/start_server_tokudb.test new file mode 100644 index 00000000000..4034fba0549 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_perfschema/t/start_server_tokudb.test @@ -0,0 +1,10 @@ +# Tests for PERFORMANCE_SCHEMA + +--source include/not_embedded.inc +--source include/have_perfschema.inc +--source include/have_tokudb.inc + +--source ../../perfschema/include/start_server_common.inc + +SELECT COUNT(NAME) > 0 FROM performance_schema.setup_instruments WHERE NAME LIKE "%/fti/%"; +SELECT COUNT(NAME) > 0 FROM performance_schema.threads WHERE NAME LIKE "%kibbutz%"; diff --git a/storage/tokudb/mysql-test/tokudb_perfschema/t/suite.opt b/storage/tokudb/mysql-test/tokudb_perfschema/t/suite.opt new file mode 100644 index 00000000000..6d826fe91d1 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_perfschema/t/suite.opt @@ -0,0 +1 @@ +--loose-enable-performance-schema $TOKUDB_OPT $TOKUDB_LOAD_ADD --loose-tokudb-check-jemalloc=0 --loose-tokudb-cache-size=512M --loose-tokudb-block-size=1M diff --git a/storage/tokudb/tokudb_background.cc b/storage/tokudb/tokudb_background.cc index e019e41c788..13e0e9321cc 100644 --- a/storage/tokudb/tokudb_background.cc +++ b/storage/tokudb/tokudb_background.cc @@ -66,13 +66,13 @@ void job_manager_t::destroy() { _sem.set_interrupt(); while (_background_jobs.size()) { - _mutex.lock(); + mutex_t_lock(_mutex); job_t* job = _background_jobs.front(); if (!job->cancelled()) cancel(job); _background_jobs.pop_front(); delete job; - _mutex.unlock(); + mutex_t_unlock(_mutex); } void* result; @@ -83,7 +83,7 @@ bool job_manager_t::run_job(job_t* newjob, bool background) { bool ret = false; const char* jobkey = newjob->key(); - _mutex.lock(); + mutex_t_lock(_mutex); assert_always(!_shutdown); for (jobs_t::iterator it = _background_jobs.begin(); @@ -138,15 +138,16 @@ bool job_manager_t::run_job(job_t* newjob, bool background) { } cleanup: - _mutex.unlock(); + mutex_t_unlock(_mutex); return ret; } bool job_manager_t::cancel_job(const char* key) { bool ret = false; - _mutex.lock(); + mutex_t_lock(_mutex); for (jobs_t::iterator it = _background_jobs.begin(); - it != _background_jobs.end(); it++) { + it != _background_jobs.end(); + it++) { job_t* job = *it; if (!job->cancelled() && strcmp(job->key(), key) == 0) { @@ -155,12 +156,11 @@ bool job_manager_t::cancel_job(const char* key) { } } - _mutex.unlock(); + mutex_t_unlock(_mutex); return ret; } void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const { - - _mutex.lock(); + mutex_t_lock(_mutex); for (jobs_t::const_iterator it = _background_jobs.begin(); it != _background_jobs.end(); @@ -171,7 +171,7 @@ void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const { } } - _mutex.unlock(); + mutex_t_unlock(_mutex); } void* job_manager_t::thread_func(void* v) { return ((tokudb::background::job_manager_t*)v)->real_thread_func(); @@ -189,14 +189,14 @@ void* job_manager_t::real_thread_func() { tokudb::time::sleep_microsec(250000); continue; } -#endif // TOKUDB_DEBUG +#endif // TOKUDB_DEBUG - _mutex.lock(); + mutex_t_lock(_mutex); assert_debug(_background_jobs.size() > 0); job_t* job = _background_jobs.front(); run(job); _background_jobs.pop_front(); - _mutex.unlock(); + mutex_t_unlock(_mutex); delete job; } } @@ -205,11 +205,11 @@ void* job_manager_t::real_thread_func() { void job_manager_t::run(job_t* job) { assert_debug(_mutex.is_owned_by_me()); if (!job->cancelled()) { - _mutex.unlock(); + mutex_t_unlock(_mutex); // do job job->run(); // done job - _mutex.lock(); + mutex_t_lock(_mutex); } if (!job->cancelled()) { job->destroy(); diff --git a/storage/tokudb/tokudb_background.h b/storage/tokudb/tokudb_background.h index 29991ab325d..bf5eb97a619 100644 --- a/storage/tokudb/tokudb_background.h +++ b/storage/tokudb/tokudb_background.h @@ -174,11 +174,11 @@ bool destroy(); inline void job_manager_t::lock() { assert_debug(!_mutex.is_owned_by_me()); - _mutex.lock(); + mutex_t_lock(_mutex); } inline void job_manager_t::unlock() { assert_debug(_mutex.is_owned_by_me()); - _mutex.unlock(); + mutex_t_unlock(_mutex); } inline void job_manager_t::job_t::run() { diff --git a/storage/tokudb/tokudb_information_schema.cc b/storage/tokudb/tokudb_information_schema.cc index 5cd0609ac74..bb263d5f960 100644 --- a/storage/tokudb/tokudb_information_schema.cc +++ b/storage/tokudb/tokudb_information_schema.cc @@ -99,7 +99,7 @@ int trx_fill_table(THD* thd, TABLE_LIST* tables, COND* cond) { TOKUDB_DBUG_ENTER(""); int error; - tokudb_hton_initialized_lock.lock_read(); + rwlock_t_lock_read(tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { error = ER_PLUGIN_IS_NOT_LOADED; @@ -234,7 +234,7 @@ int lock_waits_fill_table(THD* thd, TABLE_LIST* tables, COND* cond) { TOKUDB_DBUG_ENTER(""); int error; - tokudb_hton_initialized_lock.lock_read(); + rwlock_t_lock_read(tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { error = ER_PLUGIN_IS_NOT_LOADED; @@ -377,7 +377,7 @@ int locks_fill_table(THD* thd, TABLE_LIST* tables, COND* cond) { TOKUDB_DBUG_ENTER(""); int error; - tokudb_hton_initialized_lock.lock_read(); + rwlock_t_lock_read(tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { error = ER_PLUGIN_IS_NOT_LOADED; @@ -521,7 +521,7 @@ int file_map_fill_table(THD* thd, TABLE_LIST* tables, COND* cond) { int error; TABLE* table = tables->table; - tokudb_hton_initialized_lock.lock_read(); + rwlock_t_lock_read(tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { error = ER_PLUGIN_IS_NOT_LOADED; @@ -728,7 +728,7 @@ int fractal_tree_info_fill_table(THD* thd, TABLE_LIST* tables, COND* cond) { // 3938: Get a read lock on the status flag, since we must // read it before safely proceeding - tokudb_hton_initialized_lock.lock_read(); + rwlock_t_lock_read(tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { error = ER_PLUGIN_IS_NOT_LOADED; @@ -1025,7 +1025,7 @@ int fractal_tree_block_map_fill_table( // 3938: Get a read lock on the status flag, since we must // read it before safely proceeding - tokudb_hton_initialized_lock.lock_read(); + rwlock_t_lock_read(tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { error = ER_PLUGIN_IS_NOT_LOADED; @@ -1162,7 +1162,7 @@ int background_job_status_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) { int error; TABLE* table = tables->table; - tokudb_hton_initialized_lock.lock_read(); + rwlock_t_lock_read(tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { error = ER_PLUGIN_IS_NOT_LOADED; diff --git a/storage/tokudb/tokudb_thread.cc b/storage/tokudb/tokudb_thread.cc index f27e803a065..6fc1191975d 100644 --- a/storage/tokudb/tokudb_thread.cc +++ b/storage/tokudb/tokudb_thread.cc @@ -29,6 +29,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. namespace tokudb { namespace thread { +const pfs_key_t pfs_not_instrumented = 0xFFFFFFFF; pthread_t mutex_t::_null_owner = 0; } // namespace thread diff --git a/storage/tokudb/tokudb_thread.h b/storage/tokudb/tokudb_thread.h index 72eb17ea05a..0be5583ffb2 100644 --- a/storage/tokudb/tokudb_thread.h +++ b/storage/tokudb/tokudb_thread.h @@ -34,95 +34,68 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. namespace tokudb { namespace thread { -#if (defined(__MACH__) || defined(__APPLE__)) && _POSIX_TIMERS <= 0 - -#define _x_min(a, b) ((a) < (b) ? (a) : (b)) - -#define timed_lock_define(timed_func_name, lock_type_name, lock_func_name) \ -inline int timed_func_name(lock_type_name *mutex, \ - const struct timespec *abs_timeout) { \ - int pthread_rc; \ - struct timespec remaining, slept, ts; \ - static const int sleep_step = 1000000; \ - \ - remaining = *abs_timeout; \ - while ((pthread_rc = lock_func_name(mutex)) == EBUSY) { \ - ts.tv_sec = 0; \ - ts.tv_nsec = (remaining.tv_sec > 0 ? \ - sleep_step : \ - _x_min(remaining.tv_nsec,sleep_step)); \ - nanosleep(&ts, &slept); \ - ts.tv_nsec -= slept.tv_nsec; \ - if (ts.tv_nsec <= remaining.tv_nsec) { \ - remaining.tv_nsec -= ts.tv_nsec; \ - } else { \ - remaining.tv_sec--; \ - remaining.tv_nsec = \ - (sleep_step - (ts.tv_nsec - remaining.tv_nsec)); \ - } \ - if (remaining.tv_sec < 0 || \ - (!remaining.tv_sec && remaining.tv_nsec <= 0)) { \ - return ETIMEDOUT; \ - } \ - } \ - \ - return pthread_rc; \ -} - -timed_lock_define(pthread_mutex_timedlock, - pthread_mutex_t, - pthread_mutex_trylock); - -timed_lock_define(pthread_rwlock_timedrdlock, - pthread_rwlock_t, - pthread_rwlock_tryrdlock); - -timed_lock_define(pthread_rwlock_timedwrlock, - pthread_rwlock_t, - pthread_rwlock_trywrlock); - -#endif //(defined(__MACH__) || defined(__APPLE__)) && _POSIX_TIMERS <= 0 +extern const pfs_key_t pfs_not_instrumented; uint my_tid(void); // Your basic mutex class mutex_t { public: - mutex_t(void); + explicit mutex_t(pfs_key_t key); + mutex_t(void) : mutex_t(pfs_not_instrumented) {} ~mutex_t(void); - void lock(void); - int lock(ulonglong microseconds); - void unlock(void); + void reinit(pfs_key_t key); + void lock( +#ifdef HAVE_PSI_MUTEX_INTERFACE + const char* src_file, + uint src_line +#endif // HAVE_PSI_MUTEX_INTERFACE + ); + void unlock( +#ifdef HAVE_PSI_MUTEX_INTERFACE + const char* src_file, + uint src_line +#endif // HAVE_PSI_MUTEX_INTERFACE + ); #ifdef TOKUDB_DEBUG bool is_owned_by_me(void) const; #endif private: static pthread_t _null_owner; - pthread_mutex_t _mutex; + mysql_mutex_t _mutex; #ifdef TOKUDB_DEBUG - uint _owners; - pthread_t _owner; + uint _owners; + pthread_t _owner; #endif }; // Simple read write lock class rwlock_t { public: - rwlock_t(void); + explicit rwlock_t(pfs_key_t key); + rwlock_t(void) : rwlock_t(pfs_not_instrumented) {} ~rwlock_t(void); - void lock_read(void); - int lock_read(ulonglong microseconds); - void lock_write(void); - int lock_write(ulonglong microseconds); + void lock_read( +#ifdef HAVE_PSI_RWLOCK_INTERFACE + const char* src_file, + uint src_line +#endif // HAVE_PSI_RWLOCK_INTERFACE + ); + void lock_write( +#ifdef HAVE_PSI_RWLOCK_INTERFACE + const char* src_file, + uint src_line +#endif // HAVE_PSI_RWLOCK_INTERFACE + ); void unlock(void); private: rwlock_t(const rwlock_t&); rwlock_t& operator=(const rwlock_t&); - pthread_rwlock_t _rwlock; + mysql_rwlock_t _rwlock; }; // Simple event signal/wait class @@ -224,57 +197,76 @@ private: pthread_t _thread; }; +inline uint my_tid(void) { return (uint)toku_os_gettid(); } -inline uint my_tid(void) { - return (uint)toku_os_gettid(); -} - - -inline mutex_t::mutex_t(void) { - #ifdef TOKUDB_DEBUG - _owners = 0; - _owner = _null_owner; - #endif - int r __attribute__((unused)) = pthread_mutex_init(&_mutex, MY_MUTEX_INIT_FAST); +inline mutex_t::mutex_t(pfs_key_t key) { +#ifdef TOKUDB_DEBUG + _owners = 0; + _owner = _null_owner; +#endif + int r MY_ATTRIBUTE((unused)) = mysql_mutex_init(key, &_mutex, MY_MUTEX_INIT_FAST); assert_debug(r == 0); } -inline mutex_t::~mutex_t(void) { - #ifdef TOKUDB_DEBUG - assert_debug(_owners == 0); - #endif - int r __attribute__((unused)) = pthread_mutex_destroy(&_mutex); +inline mutex_t::~mutex_t() { +#ifdef TOKUDB_DEBUG + assert_debug(_owners == 0); +#endif + int r MY_ATTRIBUTE((unused)) = mysql_mutex_destroy(&_mutex); assert_debug(r == 0); } -inline void mutex_t::lock(void) { - assert_debug(is_owned_by_me() == false); - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); +inline void mutex_t::reinit(pfs_key_t key) { +#ifdef TOKUDB_DEBUG + assert_debug(_owners == 0); +#endif + int r MY_ATTRIBUTE((unused)); + r = mysql_mutex_destroy(&_mutex); + assert_debug(r == 0); + r = mysql_mutex_init(key, &_mutex, MY_MUTEX_INIT_FAST); assert_debug(r == 0); - #ifdef TOKUDB_DEBUG - _owners++; - _owner = pthread_self(); - #endif } -inline int mutex_t::lock(ulonglong microseconds) { +inline void mutex_t::lock( +#ifdef HAVE_PSI_MUTEX_INTERFACE + const char* src_file, + uint src_line +#endif // HAVE_PSI_MUTEX_INTERFACE + ) { assert_debug(is_owned_by_me() == false); - timespec waittime = time::offset_timespec(microseconds); - int r = pthread_mutex_timedlock(&_mutex, &waittime); - #ifdef TOKUDB_DEBUG - if (r == 0) { - _owners++; - _owner = pthread_self(); - } - #endif - assert_debug(r == 0 || r == ETIMEDOUT); - return r; + int r MY_ATTRIBUTE((unused)) = inline_mysql_mutex_lock(&_mutex +#ifdef HAVE_PSI_MUTEX_INTERFACE + , + src_file, + src_line +#endif // HAVE_PSI_MUTEX_INTERFACE + ); + assert_debug(r == 0); +#ifdef TOKUDB_DEBUG + _owners++; + _owner = pthread_self(); +#endif } -inline void mutex_t::unlock(void) { - #ifdef TOKUDB_DEBUG - assert_debug(_owners > 0); - assert_debug(is_owned_by_me()); - _owners--; - _owner = _null_owner; - #endif - int r __attribute__((unused)) = pthread_mutex_unlock(&_mutex); +inline void mutex_t::unlock( +#ifdef HAVE_PSI_MUTEX_INTERFACE + const char* src_file, + uint src_line +#endif // HAVE_PSI_MUTEX_INTERFACE + ) { +#ifndef SAFE_MUTEX + (void)(src_file); + (void)(src_line); +#endif // SAFE_MUTEX +#ifdef TOKUDB_DEBUG + assert_debug(_owners > 0); + assert_debug(is_owned_by_me()); + _owners--; + _owner = _null_owner; +#endif + int r MY_ATTRIBUTE((unused)) = inline_mysql_mutex_unlock(&_mutex +#ifdef SAFE_MUTEX + , + src_file, + src_line +#endif // SAFE_MUTEX + ); assert_debug(r == 0); } #ifdef TOKUDB_DEBUG @@ -283,44 +275,28 @@ inline bool mutex_t::is_owned_by_me(void) const { } #endif - -inline rwlock_t::rwlock_t(void) { - int r __attribute__((unused)) = pthread_rwlock_init(&_rwlock, NULL); - assert_debug(r == 0); -} -inline rwlock_t::~rwlock_t(void) { - int r __attribute__((unused)) = pthread_rwlock_destroy(&_rwlock); +inline rwlock_t::rwlock_t(pfs_key_t key) { + int r MY_ATTRIBUTE((unused)) = mysql_rwlock_init(key, &_rwlock); assert_debug(r == 0); } -inline void rwlock_t::lock_read(void) { - int r; - while ((r = pthread_rwlock_rdlock(&_rwlock)) != 0) { - if (r == EBUSY || r == EAGAIN) { - time::sleep_microsec(1000); - continue; - } - break; - } - assert_debug(r == 0); -} -inline int rwlock_t::lock_read(ulonglong microseconds) { - int r; - timespec waittime = time::offset_timespec(microseconds); - while ((r = pthread_rwlock_timedrdlock(&_rwlock, &waittime)) != 0) { - if (r == EBUSY || r == EAGAIN) { - time::sleep_microsec(1000); - continue; - } else if (r == ETIMEDOUT) { - return ETIMEDOUT; - } - break; - } +inline rwlock_t::~rwlock_t() { + int r MY_ATTRIBUTE((unused)) = mysql_rwlock_destroy(&_rwlock); assert_debug(r == 0); - return r; } -inline void rwlock_t::lock_write(void) { +inline void rwlock_t::lock_read( +#ifdef HAVE_PSI_RWLOCK_INTERFACE + const char* src_file, + uint src_line +#endif // HAVE_PSI_RWLOCK_INTERFACE + ) { int r; - while ((r = pthread_rwlock_wrlock(&_rwlock)) != 0) { + while ((r = inline_mysql_rwlock_rdlock(&_rwlock +#ifdef HAVE_PSI_RWLOCK_INTERFACE + , + src_file, + src_line +#endif // HAVE_PSI_RWLOCK_INTERFACE + )) != 0) { if (r == EBUSY || r == EAGAIN) { time::sleep_microsec(1000); continue; @@ -329,27 +305,33 @@ inline void rwlock_t::lock_write(void) { } assert_debug(r == 0); } -inline int rwlock_t::lock_write(ulonglong microseconds) { +inline void rwlock_t::lock_write( +#ifdef HAVE_PSI_RWLOCK_INTERFACE + const char* src_file, + uint src_line +#endif // HAVE_PSI_RWLOCK_INTERFACE + ) { int r; - timespec waittime = time::offset_timespec(microseconds); - while ((r = pthread_rwlock_timedwrlock(&_rwlock, &waittime)) != 0) { + while ((r = inline_mysql_rwlock_wrlock(&_rwlock +#ifdef HAVE_PSI_RWLOCK_INTERFACE + , + src_file, + src_line +#endif // HAVE_PSI_RWLOCK_INTERFACE + )) != 0) { if (r == EBUSY || r == EAGAIN) { time::sleep_microsec(1000); continue; - } else if (r == ETIMEDOUT) { - return ETIMEDOUT; } break; } assert_debug(r == 0); - return r; } inline void rwlock_t::unlock(void) { - int r __attribute__((unused)) = pthread_rwlock_unlock(&_rwlock); + int r MY_ATTRIBUTE((unused)) = mysql_rwlock_unlock(&_rwlock); assert_debug(r == 0); } -inline rwlock_t::rwlock_t(const rwlock_t&) { -} +inline rwlock_t::rwlock_t(const rwlock_t&) {} inline rwlock_t& rwlock_t::operator=(const rwlock_t&) { return *this; } @@ -358,7 +340,7 @@ inline rwlock_t& rwlock_t::operator=(const rwlock_t&) { inline event_t::event_t(bool create_signalled, bool manual_reset) : _manual_reset(manual_reset) { - int __attribute__((unused)) r = pthread_mutex_init(&_mutex, NULL); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_init(&_mutex, NULL); assert_debug(r == 0); r = pthread_cond_init(&_cond, NULL); assert_debug(r == 0); @@ -370,13 +352,13 @@ inline event_t::event_t(bool create_signalled, bool manual_reset) : _pulsed = false; } inline event_t::~event_t(void) { - int r __attribute__((unused)) = pthread_mutex_destroy(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_destroy(&_mutex); assert_debug(r == 0); r = pthread_cond_destroy(&_cond); assert_debug(r == 0); } inline void event_t::wait(void) { - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); while (_signalled == false && _pulsed == false) { r = pthread_cond_wait(&_cond, &_mutex); @@ -413,7 +395,7 @@ inline int event_t::wait(ulonglong microseconds) { return 0; } inline void event_t::signal(void) { - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); _signalled = true; if (_manual_reset) { @@ -427,7 +409,7 @@ inline void event_t::signal(void) { assert_debug(r == 0); } inline void event_t::pulse(void) { - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); _pulsed = true; r = pthread_cond_signal(&_cond); @@ -437,7 +419,7 @@ inline void event_t::pulse(void) { } inline bool event_t::signalled(void) { bool ret = false; - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); ret = _signalled; r = pthread_mutex_unlock(&_mutex); @@ -445,7 +427,7 @@ inline bool event_t::signalled(void) { return ret; } inline void event_t::reset(void) { - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); _signalled = false; _pulsed = false; @@ -467,21 +449,21 @@ inline semaphore_t::semaphore_t( _initial_count(initial_count), _max_count(max_count) { - int r __attribute__((unused)) = pthread_mutex_init(&_mutex, NULL); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_init(&_mutex, NULL); assert_debug(r == 0); r = pthread_cond_init(&_cond, NULL); assert_debug(r == 0); _signalled = _initial_count; } inline semaphore_t::~semaphore_t(void) { - int r __attribute__((unused)) = pthread_mutex_destroy(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_destroy(&_mutex); assert_debug(r == 0); r = pthread_cond_destroy(&_cond); assert_debug(r == 0); } inline semaphore_t::E_WAIT semaphore_t::wait(void) { E_WAIT ret; - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); while (_signalled == 0 && _interrupted == false) { r = pthread_cond_wait(&_cond, &_mutex); @@ -524,7 +506,7 @@ inline semaphore_t::E_WAIT semaphore_t::wait(ulonglong microseconds) { } inline bool semaphore_t::signal(void) { bool ret = false; - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); if (_signalled < _max_count) { _signalled++; @@ -538,7 +520,7 @@ inline bool semaphore_t::signal(void) { } inline int semaphore_t::signalled(void) { int ret = 0; - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); ret = _signalled; r = pthread_mutex_unlock(&_mutex); @@ -546,7 +528,7 @@ inline int semaphore_t::signalled(void) { return ret; } inline void semaphore_t::reset(void) { - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); _signalled = 0; r = pthread_mutex_unlock(&_mutex); @@ -554,7 +536,7 @@ inline void semaphore_t::reset(void) { return; } inline void semaphore_t::set_interrupt(void) { - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); _interrupted = true; r = pthread_cond_broadcast(&_cond); @@ -563,7 +545,7 @@ inline void semaphore_t::set_interrupt(void) { assert_debug(r == 0); } inline void semaphore_t::clear_interrupt(void) { - int r __attribute__((unused)) = pthread_mutex_lock(&_mutex); + int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); _interrupted = false; r = pthread_mutex_unlock(&_mutex); |