diff options
author | Alison Felizzi <alison.felizzi@mongodb.com> | 2021-09-23 05:14:50 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-09-23 05:38:16 +0000 |
commit | 1cd606c1823a57fd2e49d0652b09bdbc6da56e3b (patch) | |
tree | 3d2ca7c56419913d0c92a08fbce82da6769604b0 /src/third_party/wiredtiger/test/cppsuite/tests | |
parent | d78670d0cd40decd8a5eab917b57c3943cd910df (diff) | |
download | mongo-1cd606c1823a57fd2e49d0652b09bdbc6da56e3b.tar.gz |
Import wiredtiger: a22a2372bcfee998e8c55e19ed49150668d41afc from branch mongodb-master
ref: bc922a0588..a22a2372bc
for: 5.1.0
WT-8086 Create cpp test for prefix search near entries
Diffstat (limited to 'src/third_party/wiredtiger/test/cppsuite/tests')
-rwxr-xr-x | src/third_party/wiredtiger/test/cppsuite/tests/run.cxx | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx | 262 |
2 files changed, 266 insertions, 1 deletions
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx b/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx index 0c231e6568d..451be6accb4 100755 --- a/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx +++ b/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx @@ -37,6 +37,7 @@ #include "example_test.cxx" #include "burst_inserts.cxx" #include "hs_cleanup.cxx" +#include "search_near_01.cxx" std::string parse_configuration_from_file(const std::string &filename) @@ -117,6 +118,8 @@ run_test(const std::string &test_name, const std::string &config, const std::str hs_cleanup(test_harness::test_args{config, test_name, wt_open_config}).run(); else if (test_name == "burst_inserts") burst_inserts(test_harness::test_args{config, test_name, wt_open_config}).run(); + else if (test_name == "search_near_01") + search_near_01(test_harness::test_args{config, test_name, wt_open_config}).run(); else { test_harness::logger::log_msg(LOG_ERROR, "Test not found: " + test_name); error_code = -1; @@ -140,7 +143,7 @@ main(int argc, char *argv[]) std::string cfg, config_filename, current_cfg, current_test_name, test_name, wt_open_config; int64_t error_code = 0; const std::vector<std::string> all_tests = { - "example_test", "burst_inserts", "hs_cleanup", "base_test"}; + "base_test", "burst_inserts", "example_test", "hs_cleanup", "search_near_01"}; /* Set the program name for error messages. */ (void)testutil_set_progname(argv); diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx b/src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx new file mode 100644 index 00000000000..76f4131b249 --- /dev/null +++ b/src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx @@ -0,0 +1,262 @@ +/*- + * Public Domain 2014-present MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "test_harness/util/api_const.h" +#include "test_harness/workload/random_generator.h" +#include "test_harness/workload/thread_context.h" +#include "test_harness/test.h" +#include "test_harness/thread_manager.h" + +using namespace test_harness; +/* + * In this test, we want to verify that search_near with prefix enabled only traverses the portion + * of the tree that follows the prefix portion of the search key. The test is composed of a populate + * phase followed by a read phase. The populate phase will insert a set of random generated keys + * with a prefix of aaa -> zzz. The read phase will continuously perform prefix search near calls, + * and validate that the number of entries traversed is within bounds of the search key. + */ +class search_near_01 : public test_harness::test { + uint64_t keys_per_prefix = 0; + uint64_t srchkey_len = 0; + const std::string ALPHABET{"abcdefghijklmnopqrstuvwxyz"}; + const uint64_t PREFIX_KEY_LEN = 3; + + static void + populate_worker(thread_context *tc, const std::string &ALPHABET, uint64_t PREFIX_KEY_LEN) + { + logger::log_msg(LOG_INFO, "Populate with thread id: " + std::to_string(tc->id)); + + std::string prefix_key; + uint64_t collections_per_thread = tc->collection_count; + const uint64_t MAX_ROLLBACKS = 100; + uint32_t rollback_retries = 0; + int cmpp; + + /* + * Generate a table of data with prefix keys aaa -> zzz. We have 26 threads from ids + * starting from 0 to 26. Each populate thread will insert separate prefix keys based on the + * id. + */ + for (int64_t i = 0; i < collections_per_thread; ++i) { + collection &coll = tc->db.get_collection(i); + scoped_cursor cursor = tc->session.open_scoped_cursor(coll.name.c_str()); + for (uint64_t j = 0; j < ALPHABET.size(); ++j) { + for (uint64_t k = 0; k < ALPHABET.size(); ++k) { + for (uint64_t count = 0; count < tc->key_count; ++count) { + tc->transaction.begin(); + /* + * Generate the prefix key, and append a random generated key string based + * on the key size configuration. + */ + prefix_key = {ALPHABET.at(tc->id), ALPHABET.at(j), ALPHABET.at(k)}; + prefix_key += random_generator::instance().generate_random_string( + tc->key_size - PREFIX_KEY_LEN); + if (!tc->insert(cursor, coll.id, prefix_key)) { + testutil_assert(rollback_retries < MAX_ROLLBACKS); + /* We failed to insert, rollback our transaction and retry. */ + tc->transaction.rollback(); + ++rollback_retries; + --count; + } else { + /* Commit txn at commit timestamp 100. */ + tc->transaction.commit( + "commit_timestamp=" + tc->tsm->decimal_to_hex(100)); + rollback_retries = 0; + } + } + } + } + } + } + + public: + search_near_01(const test_harness::test_args &args) : test(args) {} + + void + populate(test_harness::database &database, test_harness::timestamp_manager *tsm, + test_harness::configuration *config, test_harness::workload_tracking *tracking) override final + { + uint64_t collection_count, key_size; + std::vector<thread_context *> workers; + thread_manager tm; + + /* Validate our config. */ + collection_count = config->get_int(COLLECTION_COUNT); + keys_per_prefix = config->get_int(KEY_COUNT_PER_COLLECTION); + key_size = config->get_int(KEY_SIZE); + testutil_assert(collection_count > 0); + testutil_assert(keys_per_prefix > 0); + /* Check the prefix length is not greater than the key size. */ + testutil_assert(key_size >= PREFIX_KEY_LEN); + + logger::log_msg(LOG_INFO, + "Populate configuration with key size: " + std::to_string(key_size) + + " key count: " + std::to_string(keys_per_prefix) + + " number of collections: " + std::to_string(collection_count)); + + /* Create n collections as per the configuration. */ + for (uint64_t i = 0; i < collection_count; ++i) + /* + * The database model will call into the API and create the collection, with its own + * session. + */ + database.add_collection(); + + /* Spawn 26 threads to populate the database. */ + for (uint64_t i = 0; i < ALPHABET.size(); ++i) { + thread_context *tc = new thread_context(i, thread_type::INSERT, config, + connection_manager::instance().create_session(), tsm, tracking, database); + workers.push_back(tc); + tm.add_thread(populate_worker, tc, ALPHABET, PREFIX_KEY_LEN); + } + + /* Wait for our populate threads to finish and then join them. */ + logger::log_msg(LOG_INFO, "Populate: waiting for threads to complete."); + tm.join(); + + /* Cleanup our workers. */ + for (auto &it : workers) { + delete it; + it = nullptr; + } + + /* Force evict all the populated keys in all of the collections. */ + int cmpp; + scoped_session session = connection_manager::instance().create_session(); + for (uint64_t count = 0; count < collection_count; ++count) { + collection &coll = database.get_collection(count); + scoped_cursor evict_cursor = + session.open_scoped_cursor(coll.name.c_str(), "debug=(release_evict=true)"); + + for (uint64_t i = 0; i < ALPHABET.size(); ++i) { + for (uint64_t j = 0; j < ALPHABET.size(); ++j) { + for (uint64_t k = 0; k < ALPHABET.size(); ++k) { + std::string key = {ALPHABET.at(i), ALPHABET.at(j), ALPHABET.at(k)}; + evict_cursor->set_key(evict_cursor.get(), key.c_str()); + evict_cursor->search_near(evict_cursor.get(), &cmpp); + testutil_check(evict_cursor->reset(evict_cursor.get())); + } + } + } + } + srchkey_len = + random_generator::instance().generate_integer(static_cast<uint64_t>(1), PREFIX_KEY_LEN); + logger::log_msg(LOG_INFO, "Populate: finished."); + } + + void + read_operation(test_harness::thread_context *tc) override final + { + /* Make sure that thread statistics cursor is null before we open it. */ + testutil_assert(tc->stat_cursor.get() == nullptr); + logger::log_msg( + LOG_INFO, type_string(tc->type) + " thread {" + std::to_string(tc->id) + "} commencing."); + std::map<uint64_t, scoped_cursor> cursors; + tc->stat_cursor = tc->session.open_scoped_cursor(STATISTICS_URI); + std::string srch_key; + int64_t entries_stat, prefix_stat, prev_entries_stat, prev_prefix_stat, expected_entries; + int cmpp; + + cmpp = 0; + prev_entries_stat = 0; + prev_prefix_stat = 0; + + /* + * The number of expected entries is calculated to account for the maximum allowed entries + * per search near function call. The key we search near can be different in length, which + * will increase the number of entries search by a factor of 26. + */ + expected_entries = tc->thread_count * keys_per_prefix * 2 * + pow(ALPHABET.size(), PREFIX_KEY_LEN - srchkey_len); + + /* + * Read at timestamp 10, so that no keys are visible to this transaction. This allows prefix + * search near to early exit out of it's prefix range when it's trying to search for a + * visible key in the tree. + */ + tc->transaction.begin("read_timestamp=" + tc->tsm->decimal_to_hex(10)); + while (tc->running()) { + + /* Get a collection and find a cached cursor. */ + collection &coll = tc->db.get_random_collection(); + if (cursors.find(coll.id) == cursors.end()) { + scoped_cursor cursor = tc->session.open_scoped_cursor(coll.name.c_str()); + cursor->reconfigure(cursor.get(), "prefix_key=true"); + cursors.emplace(coll.id, std::move(cursor)); + } + + /* Generate search prefix key of random length between a -> zzz. */ + srch_key = random_generator::instance().generate_random_string( + srchkey_len, characters_type::ALPHABET); + logger::log_msg(LOG_INFO, + "Read thread {" + std::to_string(tc->id) + + "} performing prefix search near with key: " + srch_key); + + /* Do a second lookup now that we know it exists. */ + auto &cursor = cursors[coll.id]; + if (tc->transaction.active()) { + runtime_monitor::get_stat( + tc->stat_cursor, WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100, &prev_entries_stat); + runtime_monitor::get_stat(tc->stat_cursor, + WT_STAT_CONN_CURSOR_SEARCH_NEAR_PREFIX_FAST_PATHS, &prev_prefix_stat); + + cursor->set_key(cursor.get(), srch_key.c_str()); + testutil_assert(cursor->search_near(cursor.get(), &cmpp) == WT_NOTFOUND); + + runtime_monitor::get_stat( + tc->stat_cursor, WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100, &entries_stat); + runtime_monitor::get_stat( + tc->stat_cursor, WT_STAT_CONN_CURSOR_SEARCH_NEAR_PREFIX_FAST_PATHS, &prefix_stat); + logger::log_msg(LOG_INFO, + "Read thread {" + std::to_string(tc->id) + + "} skipped entries: " + std::to_string(entries_stat - prev_entries_stat) + + " prefix fash path: " + std::to_string(prefix_stat - prev_prefix_stat)); + + /* + * It is possible that WiredTiger increments the entries skipped stat irrelevant to + * prefix search near. This is dependent on how many read threads are present in the + * test. Account for this by creating a small buffer using thread count. Assert that + * the number of expected entries is the upper limit which the prefix search near + * can traverse and the prefix fast path is incremented. + */ + testutil_assert( + (expected_entries + (2 * tc->thread_count)) >= entries_stat - prev_entries_stat); + testutil_assert(prefix_stat > prev_prefix_stat); + + tc->transaction.add_op(); + tc->sleep(); + } + /* Reset our cursor to avoid pinning content. */ + testutil_check(cursor->reset(cursor.get())); + } + tc->transaction.commit(); + /* Make sure the last transaction is rolled back now the work is finished. */ + if (tc->transaction.active()) + tc->transaction.rollback(); + } +}; |