diff options
Diffstat (limited to 'src/third_party/wiredtiger/test/cppsuite/test_harness/workload')
6 files changed, 1235 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_model.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_model.h new file mode 100644 index 00000000000..07e7c007ea7 --- /dev/null +++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_model.h @@ -0,0 +1,89 @@ +/*- + * Public Domain 2014-present MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DATABASE_MODEL_H +#define DATABASE_MODEL_H + +#include <map> +#include <string> + +namespace test_harness { + +/* Key/Value type. */ +typedef std::string key_value_t; + +/* Representation of key states. */ +struct key_t { + bool exists; +}; + +/* Iterator type used to iterate over keys that are stored in the data model. */ +typedef std::map<test_harness::key_value_t, test_harness::key_t>::const_iterator keys_iterator_t; + +/* Representation of a value. */ +struct value_t { + key_value_t value; +}; + +/* A collection is made of mapped Key objects. */ +struct collection_t { + std::map<key_value_t, key_t> keys; + std::map<key_value_t, value_t> *values = {nullptr}; +}; + +/* Representation of the collections in memory. */ +class database { + public: + const keys_iterator_t + get_collection_keys_begin(const std::string &collection_name) const + { + return (collections.at(collection_name).keys.begin()); + } + + const keys_iterator_t + get_collection_keys_end(const std::string &collection_name) const + { + return (collections.at(collection_name).keys.end()); + } + + const std::vector<std::string> + get_collection_names() const + { + std::vector<std::string> collection_names; + + for (auto const &it : collections) + collection_names.push_back(it.first); + + return (collection_names); + } + + std::map<std::string, collection_t> collections; +}; +} // namespace test_harness + +#endif diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_operation.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_operation.h new file mode 100644 index 00000000000..7a88ed9b662 --- /dev/null +++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_operation.h @@ -0,0 +1,274 @@ +/*- + * Public Domain 2014-present MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DATABASE_OPERATION_H +#define DATABASE_OPERATION_H + +#include "database_model.h" +#include "workload_tracking.h" +#include "thread_context.h" + +namespace test_harness { +class database_operation { + public: + /* + * Function that performs the following steps using the configuration that is defined by the + * test: + * - Create the working dir. + * - Open a connection. + * - Open a session. + * - Create n collections as per the configuration. + * - Open a cursor on each collection. + * - Insert m key/value pairs in each collection. Values are random strings which size is + * defined by the configuration. + * - Store in memory the created collections and the generated keys that were inserted. + */ + virtual void + populate(database &database, timestamp_manager *timestamp_manager, configuration *config, + workload_tracking *tracking) + { + WT_CURSOR *cursor; + WT_SESSION *session; + wt_timestamp_t ts; + int64_t collection_count, key_count, key_cpt, key_size, value_size; + std::string collection_name, cfg, home; + key_value_t generated_key, generated_value; + bool ts_enabled = timestamp_manager->is_enabled(); + + cursor = nullptr; + collection_count = key_count = key_size = value_size = 0; + + /* Get a session. */ + session = connection_manager::instance().create_session(); + /* Create n collections as per the configuration and store each collection name. */ + collection_count = config->get_int(COLLECTION_COUNT); + for (int i = 0; i < collection_count; ++i) { + collection_name = "table:collection" + std::to_string(i); + database.collections[collection_name] = {}; + testutil_check( + session->create(session, collection_name.c_str(), DEFAULT_FRAMEWORK_SCHEMA)); + ts = timestamp_manager->get_next_ts(); + testutil_check(tracking->save(tracking_operation::CREATE, collection_name, 0, "", ts)); + } + debug_print(std::to_string(collection_count) + " collections created", DEBUG_TRACE); + + /* Open a cursor on each collection and use the configuration to insert key/value pairs. */ + key_count = config->get_int(KEY_COUNT); + value_size = config->get_int(VALUE_SIZE); + testutil_assert(value_size > 0); + key_size = config->get_int(KEY_SIZE); + testutil_assert(key_size > 0); + /* Keys must be unique. */ + testutil_assert(key_count <= pow(10, key_size)); + + for (const auto &it_collections : database.collections) { + collection_name = it_collections.first; + key_cpt = 0; + /* WiredTiger lets you open a cursor on a collection using the same pointer. When a + * session is closed, WiredTiger APIs close the cursors too. */ + testutil_check( + session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor)); + for (size_t j = 0; j < key_count; ++j) { + /* Generation of a unique key. */ + generated_key = number_to_string(key_size, key_cpt); + ++key_cpt; + /* + * Generation of a random string value using the size defined in the test + * configuration. + */ + generated_value = + random_generator::random_generator::instance().generate_string(value_size); + ts = timestamp_manager->get_next_ts(); + if (ts_enabled) + testutil_check(session->begin_transaction(session, "")); + testutil_check(insert(cursor, tracking, collection_name, generated_key.c_str(), + generated_value.c_str(), ts)); + if (ts_enabled) { + cfg = std::string(COMMIT_TS) + "=" + timestamp_manager->decimal_to_hex(ts); + testutil_check(session->commit_transaction(session, cfg.c_str())); + } + /* Update the memory representation of the collections. */ + database.collections[collection_name].keys[generated_key].exists = true; + /* Values are not stored here. */ + database.collections[collection_name].values = nullptr; + } + } + debug_print("Populate stage done", DEBUG_TRACE); + } + + /* Basic read operation that walks a cursors across all collections. */ + virtual void + read_operation(thread_context &context, WT_SESSION *session) + { + WT_CURSOR *cursor; + std::vector<WT_CURSOR *> cursors; + + testutil_assert(session != nullptr); + /* Get a cursor for each collection in collection_names. */ + for (const auto &it : context.get_collection_names()) { + testutil_check(session->open_cursor(session, it.c_str(), NULL, NULL, &cursor)); + cursors.push_back(cursor); + } + + while (!cursors.empty() && context.is_running()) { + /* Walk each cursor. */ + for (const auto &it : cursors) { + if (it->next(it) != 0) + it->reset(it); + } + } + } + + /* + * Basic update operation that updates all the keys to a random value in each collection. + */ + virtual void + update_operation(thread_context &context, WT_SESSION *session) + { + WT_CURSOR *cursor; + wt_timestamp_t ts; + std::vector<WT_CURSOR *> cursors; + std::string collection_name; + std::vector<std::string> collection_names = context.get_collection_names(); + key_value_t generated_value, key; + int64_t cpt, value_size = context.get_value_size(); + + testutil_assert(session != nullptr); + /* Get a cursor for each collection in collection_names. */ + for (const auto &it : collection_names) { + testutil_check(session->open_cursor(session, it.c_str(), NULL, NULL, &cursor)); + cursors.push_back(cursor); + } + + cpt = 0; + /* Walk each cursor. */ + for (const auto &it : cursors) { + collection_name = collection_names[cpt]; + /* Walk each key. */ + for (keys_iterator_t iter_key = context.get_collection_keys_begin(collection_name); + iter_key != context.get_collection_keys_end(collection_name); ++iter_key) { + /* Do not process removed keys. */ + if (!iter_key->second.exists) + continue; + + ts = context.get_timestamp_manager()->get_next_ts(); + + /* Start a transaction if possible. */ + if (!context.is_in_transaction()) { + context.begin_transaction(session, ""); + context.set_commit_timestamp(session, ts); + } + generated_value = + random_generator::random_generator::instance().generate_string(value_size); + testutil_check(update(context.get_tracking(), it, collection_name, + iter_key->first.c_str(), generated_value.c_str(), ts)); + + /* Commit the current transaction if possible. */ + context.increment_operation_count(); + if (context.can_commit_transaction()) + context.commit_transaction(session, ""); + } + ++cpt; + } + + /* + * The update operations will be later on inside a loop that will be managed through + * throttle management. + */ + while (context.is_running()) + context.sleep(); + + /* Make sure the last operation is committed now the work is finished. */ + if (context.is_in_transaction()) + context.commit_transaction(session, ""); + } + + private: + /* WiredTiger APIs wrappers for single operations. */ + template <typename K, typename V> + int + insert(WT_CURSOR *cursor, workload_tracking *tracking, const std::string &collection_name, + const K &key, const V &value, wt_timestamp_t ts) + { + int error_code; + + testutil_assert(cursor != nullptr); + cursor->set_key(cursor, key); + cursor->set_value(cursor, value); + error_code = cursor->insert(cursor); + + if (error_code == 0) { + debug_print("key/value inserted", DEBUG_TRACE); + error_code = + tracking->save(tracking_operation::INSERT, collection_name, key, value, ts); + } else + debug_print("key/value insertion failed", DEBUG_ERROR); + + return (error_code); + } + + template <typename K, typename V> + static int + update(workload_tracking *tracking, WT_CURSOR *cursor, const std::string &collection_name, + K key, V value, wt_timestamp_t ts) + { + int error_code; + + testutil_assert(tracking != nullptr); + testutil_assert(cursor != nullptr); + cursor->set_key(cursor, key); + cursor->set_value(cursor, value); + error_code = cursor->update(cursor); + + if (error_code == 0) { + debug_print("key/value update", DEBUG_TRACE); + error_code = + tracking->save(tracking_operation::UPDATE, collection_name, key, value, ts); + } else + debug_print("key/value update failed", DEBUG_ERROR); + + return (error_code); + } + + /* + * Convert a number to a string. If the resulting string is less than the given length, padding + * of '0' is added. + */ + static std::string + number_to_string(uint64_t size, uint64_t value) + { + std::string str, value_str = std::to_string(value); + testutil_assert(size >= value_str.size()); + uint64_t diff = size - value_str.size(); + std::string s(diff, '0'); + str = s.append(value_str); + return (str); + } +}; +} // namespace test_harness +#endif diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/random_generator.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/random_generator.h new file mode 100644 index 00000000000..7df4d7da3fb --- /dev/null +++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/random_generator.h @@ -0,0 +1,84 @@ +/*- + * Public Domain 2014-present MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef RANDOM_GENERATOR_H +#define RANDOM_GENERATOR_H + +#include <random> + +namespace test_harness { + +/* Helper class to generate random values using uniform distributions. */ +class random_generator { + public: + /* No copies of the singleton allowed. */ + random_generator(random_generator const &) = delete; + random_generator &operator=(random_generator const &) = delete; + + static random_generator & + instance() + { + static random_generator _instance; + return _instance; + } + + /* Generate a random string of a given length. */ + std::string + generate_string(std::size_t length) + { + std::string random_string; + + for (std::size_t i = 0; i < length; ++i) + random_string += _characters[_distribution(_generator)]; + + return (random_string); + } + + /* Generate a random integer between min and max. */ + int64_t + generate_integer(int64_t min, int64_t max) + { + std::uniform_int_distribution<> dis(min, max); + return dis(_generator); + } + + private: + random_generator() + { + _generator = std::mt19937(std::random_device{}()); + _distribution = std::uniform_int_distribution<>(0, _characters.size() - 1); + } + + std::mt19937 _generator; + std::uniform_int_distribution<> _distribution; + const std::string _characters = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; +}; +} // namespace test_harness + +#endif diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/thread_context.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/thread_context.h new file mode 100644 index 00000000000..e5275bc7819 --- /dev/null +++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/thread_context.h @@ -0,0 +1,214 @@ +/*- + * Public Domain 2014-present MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef THREAD_CONTEXT_H +#define THREAD_CONTEXT_H + +#include "../core/throttle.h" +#include "database_model.h" +#include "random_generator.h" +#include "workload_tracking.h" + +namespace test_harness { +/* Define the different thread operations. */ +enum class thread_operation { + INSERT, + UPDATE, + READ, + REMOVE, + CHECKPOINT, + TIMESTAMP, + MONITOR, + COMPONENT +}; + +/* Container class for a thread and any data types it may need to interact with the database. */ +class thread_context { + public: + thread_context(timestamp_manager *timestamp_manager, workload_tracking *tracking, database &db, + thread_operation type, int64_t max_op, int64_t min_op, int64_t value_size, throttle throttle) + : _database(db), _min_op(min_op), _max_op(max_op), _timestamp_manager(timestamp_manager), + _type(type), _tracking(tracking), _value_size(value_size), _throttle(throttle) + { + } + + void + finish() + { + _running = false; + } + + const std::vector<std::string> + get_collection_names() const + { + return (_database.get_collection_names()); + } + + const keys_iterator_t + get_collection_keys_begin(const std::string &collection_name) const + { + return (_database.get_collection_keys_begin(collection_name)); + } + + const keys_iterator_t + get_collection_keys_end(const std::string &collection_name) const + { + return (_database.get_collection_keys_end(collection_name)); + } + + thread_operation + get_thread_operation() const + { + return (_type); + } + + timestamp_manager * + get_timestamp_manager() const + { + return (_timestamp_manager); + } + + workload_tracking * + get_tracking() const + { + return (_tracking); + } + + int64_t + get_value_size() const + { + return (_value_size); + } + + bool + is_running() const + { + return (_running); + } + + bool + is_in_transaction() const + { + return (_in_txn); + } + + void + sleep() + { + _throttle.sleep(); + } + + void + set_running(bool running) + { + _running = running; + } + + void + begin_transaction(WT_SESSION *session, const std::string &config) + { + if (!_in_txn && _timestamp_manager->is_enabled()) { + testutil_check( + session->begin_transaction(session, config.empty() ? nullptr : config.c_str())); + /* This randomizes the number of operations to be executed in one transaction. */ + _max_op_count = random_generator::instance().generate_integer(_min_op, _max_op); + _current_op_count = 0; + _in_txn = true; + } + } + + /* + * The current transaction can be committed if: + * - The timestamp manager is enabled and + * - A transaction has started and + * - The thread is done working. This is useful when the test is ended and the thread has + * not reached the maximum number of operations per transaction or + * - The number of operations executed in the current transaction has exceeded the + * threshold. + */ + bool + can_commit_transaction() const + { + return (_timestamp_manager->is_enabled() && _in_txn && + (!_running || (_current_op_count > _max_op_count))); + } + + void + commit_transaction(WT_SESSION *session, const std::string &config) + { + /* A transaction cannot be committed if not started. */ + testutil_assert(_in_txn); + testutil_check( + session->commit_transaction(session, config.empty() ? nullptr : config.c_str())); + _in_txn = false; + } + + void + increment_operation_count(uint64_t inc = 1) + { + _current_op_count += inc; + } + + /* + * Set a commit timestamp if the timestamp manager is enabled. + */ + void + set_commit_timestamp(WT_SESSION *session, wt_timestamp_t ts) + { + if (!_timestamp_manager->is_enabled()) + return; + + std::string config = std::string(COMMIT_TS) + "=" + _timestamp_manager->decimal_to_hex(ts); + testutil_check(session->timestamp_transaction(session, config.c_str())); + } + + private: + /* Representation of the collections and their key/value pairs in memory. */ + database _database; + /* + * _current_op_count is the current number of operations that have been executed in the current + * transaction. + */ + uint64_t _current_op_count = 0U; + bool _in_txn = false, _running = false; + /* + * _min_op and _max_op are the minimum and maximum number of operations within one transaction. + * _max_op_count is the current maximum number of operations that can be executed in the current + * transaction. _max_op_count will always be <= _max_op. + */ + int64_t _min_op, _max_op, _max_op_count = 0; + timestamp_manager *_timestamp_manager; + const thread_operation _type; + throttle _throttle; + workload_tracking *_tracking; + /* Temporary member that comes from the test configuration. */ + int64_t _value_size; +}; +} // namespace test_harness + +#endif diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h new file mode 100644 index 00000000000..4d1b2d755a8 --- /dev/null +++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h @@ -0,0 +1,157 @@ +/*- + * Public Domain 2014-present MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef WORKLOAD_TRACKING_H +#define WORKLOAD_TRACKING_H + +/* + * Default schema for tracking operations on collections (key_format: Collection name / Key / + * Timestamp, value_format: Operation type / Value) + */ +#define OPERATION_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(SSQ) +#define OPERATION_TRACKING_VALUE_FORMAT WT_UNCHECKED_STRING(iS) +#define OPERATION_TRACKING_TABLE_CONFIG \ + "key_format=" OPERATION_TRACKING_KEY_FORMAT ",value_format=" OPERATION_TRACKING_VALUE_FORMAT + +/* + * Default schema for tracking schema operations on collections (key_format: Collection name / + * Timestamp, value_format: Operation type) + */ +#define SCHEMA_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(SQ) +#define SCHEMA_TRACKING_VALUE_FORMAT WT_UNCHECKED_STRING(i) +#define SCHEMA_TRACKING_TABLE_CONFIG \ + "key_format=" SCHEMA_TRACKING_KEY_FORMAT ",value_format=" SCHEMA_TRACKING_VALUE_FORMAT + +namespace test_harness { +/* Tracking operations. */ +enum class tracking_operation { CREATE, DELETE_COLLECTION, DELETE_KEY, INSERT, UPDATE }; +/* Class used to track operations performed on collections */ +class workload_tracking : public component { + + public: + workload_tracking(configuration *_config, const std::string &operation_table_config, + const std::string &operation_table_name, const std::string &schema_table_config, + const std::string &schema_table_name) + : component("workload_tracking", _config), _operation_table_config(operation_table_config), + _operation_table_name(operation_table_name), _schema_table_config(schema_table_config), + _schema_table_name(schema_table_name) + { + } + + const std::string & + get_schema_table_name() const + { + return _schema_table_name; + } + + const std::string & + get_operation_table_name() const + { + return _operation_table_name; + } + + void + load() + { + WT_SESSION *session; + + component::load(); + + if (!_enabled) + return; + + /* Initiate schema tracking. */ + session = connection_manager::instance().create_session(); + testutil_check( + session->create(session, _schema_table_name.c_str(), _schema_table_config.c_str())); + testutil_check( + session->open_cursor(session, _schema_table_name.c_str(), NULL, NULL, &_cursor_schema)); + debug_print("Schema tracking initiated", DEBUG_TRACE); + + /* Initiate operations tracking. */ + testutil_check( + session->create(session, _operation_table_name.c_str(), _operation_table_config.c_str())); + testutil_check(session->open_cursor( + session, _operation_table_name.c_str(), NULL, NULL, &_cursor_operations)); + debug_print("Operations tracking created", DEBUG_TRACE); + } + + void + run() + { + /* Does not do anything. */ + } + + template <typename K, typename V> + int + save(const tracking_operation &operation, const std::string &collection_name, const K &key, + const V &value, wt_timestamp_t ts) + { + WT_CURSOR *cursor; + int error_code = 0; + + if (!_enabled) + return (error_code); + + /* Select the correct cursor to save in the collection associated to specific operations. */ + switch (operation) { + case tracking_operation::CREATE: + case tracking_operation::DELETE_COLLECTION: + cursor = _cursor_schema; + cursor->set_key(cursor, collection_name.c_str(), ts); + cursor->set_value(cursor, static_cast<int>(operation)); + break; + + default: + cursor = _cursor_operations; + cursor->set_key(cursor, collection_name.c_str(), key, ts); + cursor->set_value(cursor, static_cast<int>(operation), value); + break; + } + + error_code = cursor->insert(cursor); + + if (error_code == 0) + debug_print("Workload tracking saved operation.", DEBUG_TRACE); + else + debug_print("Workload tracking failed to save operation !", DEBUG_ERROR); + + return error_code; + } + + private: + WT_CURSOR *_cursor_operations = nullptr; + WT_CURSOR *_cursor_schema = nullptr; + const std::string _operation_table_config; + const std::string _operation_table_name; + const std::string _schema_table_config; + const std::string _schema_table_name; +}; +} // namespace test_harness + +#endif diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_validation.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_validation.h new file mode 100644 index 00000000000..5ef7992e773 --- /dev/null +++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_validation.h @@ -0,0 +1,417 @@ +/*- + * Public Domain 2014-present MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef WORKLOAD_VALIDATION_H +#define WORKLOAD_VALIDATION_H + +#include <string> + +extern "C" { +#include "wiredtiger.h" +} + +#include "database_model.h" + +namespace test_harness { + +/* + * Class that can validate database state and collection data. + */ +class workload_validation { + public: + /* + * Validate the on disk data against what has been tracked during the test. + * - The first step is to replay the tracked operations so a representation in memory of the + * collections is created. This representation is then compared to what is on disk. + * - The second step is to go through what has been saved on disk and make sure the memory + * representation has the same data. + * operation_table_name is the collection that contains all the operations about the key/value + * pairs in the different collections used during the test. schema_table_name is the collection + * that contains all the operations about the creation or deletion of collections during the + * test. + */ + bool + validate(const std::string &operation_table_name, const std::string &schema_table_name, + database &database) + { + WT_SESSION *session; + std::string collection_name; + /* Existing collections after the test. */ + std::vector<std::string> created_collections, deleted_collections; + bool is_valid = true; + + session = connection_manager::instance().create_session(); + + /* Retrieve the collections that were created and deleted during the test. */ + collection_name = schema_table_name; + parse_schema_tracking_table( + session, collection_name, created_collections, deleted_collections); + + /* Make sure they exist in memory. */ + for (auto const &it : created_collections) { + if (database.collections.count(it) == 0) { + debug_print("Collection missing in memory: " + it, DEBUG_ERROR); + is_valid = false; + break; + } + } + + if (!is_valid) + return (is_valid); + + /* Make sure they don't exist in memory nor on disk. */ + for (auto const &it : deleted_collections) { + if (database.collections.count(it) > 0) { + debug_print( + "Collection present in memory while it has been tracked as deleted: " + it, + DEBUG_ERROR); + is_valid = false; + break; + } + if (!verify_collection_state(session, it, false)) { + debug_print( + "Collection present on disk while it has been tracked as deleted: " + it, + DEBUG_ERROR); + is_valid = false; + break; + } + } + + for (auto const &collection_name : database.get_collection_names()) { + if (!is_valid) + break; + + /* Get the values associated to the different keys in the current collection. */ + parse_operation_tracking_table( + session, operation_table_name, collection_name, database); + /* Check all tracked operations in memory against the database on disk. */ + if (!check_reference(session, collection_name, database)) { + debug_print( + "check_reference failed for collection " + collection_name, DEBUG_ERROR); + is_valid = false; + } + /* Check what has been saved on disk against what has been tracked. */ + else if (!check_disk_state(session, collection_name, database)) { + debug_print( + "check_disk_state failed for collection " + collection_name, DEBUG_ERROR); + is_valid = false; + } + /* Clear memory. */ + delete database.collections[collection_name].values; + database.collections[collection_name].values = nullptr; + } + + return (is_valid); + } + + private: + /* + * Read the tracking table to retrieve the created and deleted collections during the test. + * collection_name is the collection that contains the operations on the different collections + * during the test. + */ + void + parse_schema_tracking_table(WT_SESSION *session, const std::string &collection_name, + std::vector<std::string> &created_collections, std::vector<std::string> &deleted_collections) + { + WT_CURSOR *cursor; + wt_timestamp_t key_timestamp; + const char *key_collection_name; + int value_operation_type; + + testutil_check(session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor)); + + while (cursor->next(cursor) == 0) { + testutil_check(cursor->get_key(cursor, &key_collection_name, &key_timestamp)); + testutil_check(cursor->get_value(cursor, &value_operation_type)); + + debug_print("Collection name is " + std::string(key_collection_name), DEBUG_TRACE); + debug_print("Timestamp is " + std::to_string(key_timestamp), DEBUG_TRACE); + debug_print("Operation type is " + std::to_string(value_operation_type), DEBUG_TRACE); + + if (static_cast<tracking_operation>(value_operation_type) == + tracking_operation::CREATE) { + deleted_collections.erase(std::remove(deleted_collections.begin(), + deleted_collections.end(), key_collection_name), + deleted_collections.end()); + created_collections.push_back(key_collection_name); + } else if (static_cast<tracking_operation>(value_operation_type) == + tracking_operation::DELETE_COLLECTION) { + created_collections.erase(std::remove(created_collections.begin(), + created_collections.end(), key_collection_name), + created_collections.end()); + deleted_collections.push_back(key_collection_name); + } + } + } + + /* + * Parse the tracked operations to build a representation in memory of the collections at the + * end of the test. tracking_collection_name is the tracking collection used to save the + * operations performed on the collections during the test. collection_name is the collection + * that needs to be represented in memory. + */ + void + parse_operation_tracking_table(WT_SESSION *session, const std::string &tracking_collection_name, + const std::string &collection_name, database &database) + { + WT_CURSOR *cursor; + wt_timestamp_t key_timestamp; + int exact, value_operation_type; + const char *key, *key_collection_name, *value; + std::vector<key_value_t> collection_keys; + std::string key_str; + + /* Retrieve all keys from the given collection. */ + for (auto const &it : database.collections.at(collection_name).keys) + collection_keys.push_back(it.first); + /* There must be at least a key. */ + testutil_assert(!collection_keys.empty()); + /* Sort keys. */ + std::sort(collection_keys.begin(), collection_keys.end()); + /* Use the first key as a parameter for search_near. */ + key_str = collection_keys[0]; + + testutil_check( + session->open_cursor(session, tracking_collection_name.c_str(), NULL, NULL, &cursor)); + + cursor->set_key(cursor, collection_name.c_str(), key_str.c_str()); + testutil_check(cursor->search_near(cursor, &exact)); + /* + * Since the timestamp which is part of the key is not provided, exact cannot be 0. If it is + * -1, we need to go to the next key. + */ + testutil_assert(exact != 0); + if (exact < 0) + testutil_check(cursor->next(cursor)); + + do { + testutil_check(cursor->get_key(cursor, &key_collection_name, &key, &key_timestamp)); + testutil_check(cursor->get_value(cursor, &value_operation_type, &value)); + + debug_print("Collection name is " + std::string(key_collection_name), DEBUG_TRACE); + debug_print("Key is " + std::string(key), DEBUG_TRACE); + debug_print("Timestamp is " + std::to_string(key_timestamp), DEBUG_TRACE); + debug_print("Operation type is " + std::to_string(value_operation_type), DEBUG_TRACE); + debug_print("Value is " + std::string(value), DEBUG_TRACE); + + /* + * If the cursor is reading an operation for a different collection, we know all the + * operations have been parsed for the collection we were interested in. + */ + if (std::string(key_collection_name) != collection_name) + break; + + /* Replay the current operation. */ + switch (static_cast<tracking_operation>(value_operation_type)) { + case tracking_operation::DELETE_KEY: + /* + * Operations are parsed from the oldest to the most recent one. It is safe to + * assume the key has been inserted previously in an existing collection and can be + * safely deleted. + */ + database.collections.at(key_collection_name).keys.at(std::string(key)).exists = + false; + delete database.collections.at(key_collection_name).values; + database.collections.at(key_collection_name).values = nullptr; + break; + case tracking_operation::INSERT: { + /* Keys are unique, it is safe to assume the key has not been encountered before. */ + database.collections[key_collection_name].keys[std::string(key)].exists = true; + if (database.collections[key_collection_name].values == nullptr) { + database.collections[key_collection_name].values = + new std::map<key_value_t, value_t>(); + } + value_t v; + v.value = key_value_t(value); + std::pair<key_value_t, value_t> pair(key_value_t(key), v); + database.collections[key_collection_name].values->insert(pair); + break; + } + case tracking_operation::UPDATE: + database.collections[key_collection_name].values->at(key).value = + key_value_t(value); + break; + default: + testutil_die(DEBUG_ABORT, "Unexpected operation in the tracking table: %d", + value_operation_type); + break; + } + + } while (cursor->next(cursor) == 0); + + if (cursor->reset(cursor) != 0) + debug_print("Cursor could not be reset !", DEBUG_ERROR); + } + + /* + * Compare the tracked operations against what has been saved on disk. database is the + * representation in memory of the collections after the test according to the tracking table. + */ + bool + check_reference( + WT_SESSION *session, const std::string &collection_name, const database &database) + { + bool is_valid; + collection_t collection; + key_t key; + key_value_t key_str; + + /* Check the collection exists on disk. */ + is_valid = verify_collection_state(session, collection_name, true); + + if (is_valid) { + collection = database.collections.at(collection_name); + /* Walk through each key/value pair of the current collection. */ + for (const auto &keys : collection.keys) { + key_str = keys.first; + key = keys.second; + /* The key/value pair exists. */ + if (key.exists) + is_valid = (is_key_present(session, collection_name, key_str.c_str()) == true); + /* The key has been deleted. */ + else + is_valid = (is_key_present(session, collection_name, key_str.c_str()) == false); + + /* Check the associated value is valid. */ + if (is_valid && key.exists) { + testutil_assert(collection.values != nullptr); + is_valid = verify_value(session, collection_name, key_str.c_str(), + collection.values->at(key_str).value); + } + + if (!is_valid) { + debug_print("check_reference failed for key " + key_str, DEBUG_ERROR); + break; + } + } + } + + if (!is_valid) + debug_print("check_reference failed for collection " + collection_name, DEBUG_ERROR); + + return (is_valid); + } + + /* Check what is present on disk against what has been tracked. */ + bool + check_disk_state( + WT_SESSION *session, const std::string &collection_name, const database &database) + { + WT_CURSOR *cursor; + collection_t collection; + bool is_valid = true; + /* Key/value pairs on disk. */ + const char *key_on_disk, *value_on_disk; + key_value_t key_str, value_str; + + testutil_check(session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor)); + + collection = database.collections.at(collection_name); + + /* Read the collection on disk. */ + while (is_valid && (cursor->next(cursor) == 0)) { + testutil_check(cursor->get_key(cursor, &key_on_disk)); + testutil_check(cursor->get_value(cursor, &value_on_disk)); + + key_str = std::string(key_on_disk); + + debug_print("Key on disk is " + key_str, DEBUG_TRACE); + debug_print("Value on disk is " + std::string(value_on_disk), DEBUG_TRACE); + + /* Check the key on disk has been saved in memory too. */ + if ((collection.keys.count(key_str) > 0) && collection.keys.at(key_str).exists) { + /* Memory should be allocated for values. */ + testutil_assert(collection.values != nullptr); + value_str = collection.values->at(key_str).value; + /* + * Check the key/value pair on disk matches the one in memory from the tracked + * operations. + */ + is_valid = (value_str == key_value_t(value_on_disk)); + if (!is_valid) + debug_print(" Key/Value pair mismatch.\n Disk key: " + key_str + + "\n Disk value: " + std ::string(value_on_disk) + + "\n Tracking table key: " + key_str + "\n Tracking table value exists: " + + std::to_string(collection.keys.at(key_str).exists) + + "\n Tracking table value: " + value_str, + DEBUG_ERROR); + } else { + is_valid = false; + debug_print( + "The key " + std::string(key_on_disk) + " present on disk has not been tracked", + DEBUG_ERROR); + } + } + + return (is_valid); + } + + /* + * Check whether a collection exists on disk. collection_name is the collection to check. exists + * needs to be set to true if the collection is expected to be existing, false otherwise. + */ + bool + verify_collection_state( + WT_SESSION *session, const std::string &collection_name, bool exists) const + { + WT_CURSOR *cursor; + int ret = session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor); + return (exists ? (ret == 0) : (ret != 0)); + } + + template <typename K> + bool + is_key_present(WT_SESSION *session, const std::string &collection_name, const K &key) + { + WT_CURSOR *cursor; + testutil_check(session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor)); + cursor->set_key(cursor, key); + return (cursor->search(cursor) == 0); + } + + /* Verify the given expected value is the same on disk. */ + template <typename K, typename V> + bool + verify_value(WT_SESSION *session, const std::string &collection_name, const K &key, + const V &expected_value) + { + WT_CURSOR *cursor; + const char *value; + + testutil_check(session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor)); + cursor->set_key(cursor, key); + testutil_check(cursor->search(cursor)); + testutil_check(cursor->get_value(cursor, &value)); + + return (key_value_t(value) == expected_value); + } +}; +} // namespace test_harness + +#endif |