diff options
Diffstat (limited to 'storage/tokudb/ft-index/locktree')
26 files changed, 1884 insertions, 235 deletions
diff --git a/storage/tokudb/ft-index/locktree/lock_request.cc b/storage/tokudb/ft-index/locktree/lock_request.cc index 079f5351f7d..362f9bfa98d 100644 --- a/storage/tokudb/ft-index/locktree/lock_request.cc +++ b/storage/tokudb/ft-index/locktree/lock_request.cc @@ -99,7 +99,7 @@ PATENT RIGHTS GRANT: namespace toku { // initialize a lock request's internals -void lock_request::create(uint64_t wait_time) { +void lock_request::create(void) { m_txnid = TXNID_NONE; m_conflicting_txnid = TXNID_NONE; m_start_time = 0; @@ -114,7 +114,6 @@ void lock_request::create(uint64_t wait_time) { m_complete_r = 0; m_state = state::UNINITIALIZED; - m_wait_time = wait_time; toku_cond_init(&m_wait_cond, nullptr); } @@ -126,9 +125,7 @@ void lock_request::destroy(void) { } // set the lock request parameters. this API allows a lock request to be reused. -void lock_request::set(locktree *lt, TXNID txnid, - const DBT *left_key, const DBT *right_key, - lock_request::type lock_type) { +void lock_request::set(locktree *lt, TXNID txnid, const DBT *left_key, const DBT *right_key, lock_request::type lock_type, bool big_txn) { invariant(m_state != state::PENDING); m_lt = lt; m_txnid = txnid; @@ -139,6 +136,7 @@ void lock_request::set(locktree *lt, TXNID txnid, m_type = lock_type; m_state = state::INITIALIZED; m_info = lt->get_lock_request_info(); + m_big_txn = big_txn; } // get rid of any stored left and right key copies and @@ -208,10 +206,10 @@ int lock_request::start(void) { txnid_set conflicts; conflicts.create(); if (m_type == type::WRITE) { - r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, &conflicts); + r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, &conflicts, m_big_txn); } else { invariant(m_type == type::READ); - r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, &conflicts); + r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, &conflicts, m_big_txn); } // if the lock is not granted, save it to the set of lock requests @@ -236,38 +234,50 @@ int lock_request::start(void) { return m_state == state::COMPLETE ? m_complete_r : r; } -void lock_request::calculate_cond_wakeup_time(struct timespec *ts) { - struct timeval now; - int r = gettimeofday(&now, NULL); - invariant_zero(r); - int64_t sec = now.tv_sec + (m_wait_time / 1000); - int64_t usec = now.tv_usec + ((m_wait_time % 1000) * 1000); - int64_t d_sec = usec / 1000000; - int64_t d_usec = usec % 1000000; - ts->tv_sec = sec + d_sec; - ts->tv_nsec = d_usec * 1000; +// sleep on the lock request until it becomes resolved or the wait time has elapsed. +int lock_request::wait(uint64_t wait_time_ms) { + return wait(wait_time_ms, 0, nullptr); } -// sleep on the lock request until it becomes resolved or the wait time has elapsed. -int lock_request::wait(void) { - uint64_t t_start = toku_current_time_microsec(); +int lock_request::wait(uint64_t wait_time_ms, uint64_t killed_time_ms, int (*killed_callback)(void)) { + uint64_t t_now = toku_current_time_microsec(); + uint64_t t_start = t_now; + uint64_t t_end = t_start + wait_time_ms * 1000; + toku_mutex_lock(&m_info->mutex); + while (m_state == state::PENDING) { - struct timespec ts; - calculate_cond_wakeup_time(&ts); + + // compute next wait time + uint64_t t_wait; + if (killed_time_ms == 0) { + t_wait = t_end; + } else { + t_wait = t_now + killed_time_ms * 1000; + if (t_wait > t_end) + t_wait = t_end; + } + struct timespec ts = {}; + ts.tv_sec = t_wait / 1000000; + ts.tv_nsec = (t_wait % 1000000) * 1000; int r = toku_cond_timedwait(&m_wait_cond, &m_info->mutex, &ts); invariant(r == 0 || r == ETIMEDOUT); - if (r == ETIMEDOUT && m_state == state::PENDING) { + + t_now = toku_current_time_microsec(); + if (m_state == state::PENDING && (t_now >= t_end || (killed_callback && killed_callback()))) { m_info->counters.timeout_count += 1; + // if we're still pending and we timed out, then remove our // request from the set of lock requests and fail. remove_from_lock_requests(); + // complete sets m_state to COMPLETE, breaking us out of the loop complete(DB_LOCK_NOTGRANTED); } } - uint64_t t_end = toku_current_time_microsec(); - uint64_t duration = t_end - t_start; + + uint64_t t_real_end = toku_current_time_microsec(); + uint64_t duration = t_real_end - t_start; m_info->counters.wait_count += 1; m_info->counters.wait_time += duration; if (duration >= 1000000) { @@ -311,9 +321,9 @@ int lock_request::retry(void) { invariant(m_state == state::PENDING); if (m_type == type::WRITE) { - r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, nullptr); + r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, nullptr, m_big_txn); } else { - r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, nullptr); + r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, nullptr, m_big_txn); } // if the acquisition succeeded then remove ourselves from the diff --git a/storage/tokudb/ft-index/locktree/lock_request.h b/storage/tokudb/ft-index/locktree/lock_request.h index 140e49974ec..48956826547 100644 --- a/storage/tokudb/ft-index/locktree/lock_request.h +++ b/storage/tokudb/ft-index/locktree/lock_request.h @@ -125,16 +125,15 @@ public: WRITE }; - // effect: Initializes a lock request with a given wait time. - void create(uint64_t wait_time); + // effect: Initializes a lock request. + void create(void); // effect: Destroys a lock request. void destroy(void); // effect: Resets the lock request parameters, allowing it to be reused. // requires: Lock request was already created at some point - void set(locktree *lt, TXNID txnid, - const DBT *left_key, const DBT *right_key, type lock_type); + void set(locktree *lt, TXNID txnid, const DBT *left_key, const DBT *right_key, type lock_type, bool big_txn); // effect: Tries to acquire a lock described by this lock request. // returns: The return code of locktree::acquire_[write,read]_lock() @@ -144,7 +143,8 @@ public: // effect: Sleeps until either the request is granted or the wait time expires. // returns: The return code of locktree::acquire_[write,read]_lock() // or simply DB_LOCK_NOTGRANTED if the wait time expired. - int wait(void); + int wait(uint64_t wait_time_ms); + int wait(uint64_t wait_time_ms, uint64_t killed_time_ms, int (*killed_callback)(void)); // return: left end-point of the lock range const DBT *get_left_key(void) const; @@ -196,9 +196,10 @@ private: int m_complete_r; state m_state; - uint64_t m_wait_time; toku_cond_t m_wait_cond; + bool m_big_txn; + // the lock request info state stored in the // locktree that this lock request is for. struct locktree::lt_lock_request_info *m_info; @@ -235,8 +236,6 @@ private: void copy_keys(void); - void calculate_cond_wakeup_time(struct timespec *ts); - static int find_by_txnid(lock_request * const &request, const TXNID &txnid); friend class lock_request_unit_test; diff --git a/storage/tokudb/ft-index/locktree/locktree.cc b/storage/tokudb/ft-index/locktree/locktree.cc index 36c35eec185..21b0aaa1426 100644 --- a/storage/tokudb/ft-index/locktree/locktree.cc +++ b/storage/tokudb/ft-index/locktree/locktree.cc @@ -119,6 +119,7 @@ namespace toku { void locktree::create(manager::memory_tracker *mem_tracker, DICTIONARY_ID dict_id, DESCRIPTOR desc, ft_compare_func cmp) { m_mem_tracker = mem_tracker; + m_mgr = mem_tracker->get_manager(); m_dict_id = dict_id; // the only reason m_cmp is malloc'd here is to prevent gdb from printing @@ -410,8 +411,8 @@ int locktree::acquire_lock(bool is_write_request, TXNID txnid, } int locktree::try_acquire_lock(bool is_write_request, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { - int r = m_mem_tracker->check_current_lock_constraints(); + const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) { + int r = m_mgr->check_current_lock_constraints(big_txn); if (r == 0) { r = acquire_lock(is_write_request, txnid, left_key, right_key, conflicts); } @@ -420,13 +421,13 @@ int locktree::try_acquire_lock(bool is_write_request, TXNID txnid, // the locktree silently upgrades read locks to write locks for simplicity int locktree::acquire_read_lock(TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { - return acquire_write_lock(txnid, left_key, right_key, conflicts); + const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) { + return acquire_write_lock(txnid, left_key, right_key, conflicts, big_txn); } int locktree::acquire_write_lock(TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { - return try_acquire_lock(true, txnid, left_key, right_key, conflicts); + const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) { + return try_acquire_lock(true, txnid, left_key, right_key, conflicts, big_txn); } void locktree::get_conflicts(bool is_write_request, TXNID txnid, diff --git a/storage/tokudb/ft-index/locktree/locktree.h b/storage/tokudb/ft-index/locktree/locktree.h index 1f94a1feb6a..a3c3b566fcf 100644 --- a/storage/tokudb/ft-index/locktree/locktree.h +++ b/storage/tokudb/ft-index/locktree/locktree.h @@ -105,6 +105,11 @@ PATENT RIGHTS GRANT: #include "wfg.h" #include "range_buffer.h" +#define TOKU_LOCKTREE_ESCALATOR_LAMBDA 0 +#if TOKU_LOCKTREE_ESCALATOR_LAMBDA +#include <functional> +#endif + enum { LTM_SIZE_CURRENT = 0, LTM_SIZE_LIMIT, @@ -164,15 +169,13 @@ public: // If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS. // note: Read locks cannot be shared between txnids, as one would expect. // This is for simplicity since read locks are rare in MySQL. - int acquire_read_lock(TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts); + int acquire_read_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn); // effect: Attempts to grant a write lock for the range of keys between [left_key, right_key]. // returns: If the lock cannot be granted, return DB_LOCK_NOTGRANTED, and populate the // given conflicts set with the txnids that hold conflicting locks in the range. // If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS. - int acquire_write_lock(TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts); + int acquire_write_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn); // effect: populate the conflicts set with the txnids that would preventing // the given txnid from getting a lock on [left_key, right_key] @@ -215,6 +218,25 @@ public: // since the lock_request object is opaque struct lt_lock_request_info *get_lock_request_info(void); + class manager; + + // the escalator coordinates escalation on a set of locktrees for a bunch of threads + class escalator { + public: + void create(void); + void destroy(void); +#if TOKU_LOCKTREE_ESCALATOR_LAMBDA + void run(manager *mgr, std::function<void (void)> escalate_locktrees_fun); +#else + void run(manager *mgr, void (*escalate_locktrees_fun)(void *extra), void *extra); +#endif + private: + toku_mutex_t m_escalator_mutex; + toku_cond_t m_escalator_done; + bool m_escalator_running; + }; + ENSURE_POD(escalator); + // The locktree manager manages a set of locktrees, // one for each open dictionary. Locktrees are accessed through // the manager, and when they are no longer needed, they can @@ -236,10 +258,6 @@ public: int set_max_lock_memory(size_t max_lock_memory); - uint64_t get_lock_wait_time(void); - - void set_lock_wait_time(uint64_t lock_wait_time); - // effect: Get a locktree from the manager. If a locktree exists with the given // dict_id, it is referenced and then returned. If one did not exist, it // is created. It will use the given descriptor and comparison function @@ -265,6 +283,7 @@ public: class memory_tracker { public: void set_manager(manager *mgr); + manager *get_manager(void); // effect: Determines if too many locks or too much memory is being used, // Runs escalation on the manager if so. @@ -273,6 +292,8 @@ public: // enough resources for a new lock. int check_current_lock_constraints(void); + bool over_big_threshold(void); + void note_mem_used(uint64_t mem_used); void note_mem_released(uint64_t mem_freed); @@ -297,6 +318,7 @@ public: // rationale: to get better stress test coverage, we want a way to // deterministicly trigger lock escalation. void run_escalation_for_test(void); + void run_escalation(void); void get_status(LTM_STATUS status); @@ -311,9 +333,22 @@ public: void *extra); int iterate_pending_lock_requests(lock_request_iterate_callback cb, void *extra); + int check_current_lock_constraints(bool big_txn); + + // Escalate locktrees touched by a txn + void escalate_lock_trees_for_txn(TXNID, locktree *lt); + + // Escalate all locktrees + void escalate_all_locktrees(void); + + // Escalate a set of locktrees + void escalate_locktrees(locktree **locktrees, int num_locktrees); + + // Add time t to the escalator's wait time statistics + void add_escalator_wait_time(uint64_t t); + private: static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024; - static const uint64_t DEFAULT_LOCK_WAIT_TIME = 0; // tracks the current number of locks and lock memory uint64_t m_max_lock_memory; @@ -322,9 +357,6 @@ public: struct lt_counters m_lt_counters; - // lock wait time for blocking row locks, in ms - uint64_t m_lock_wait_time_ms; - // the create and destroy callbacks for the locktrees lt_create_cb m_lt_create_callback; lt_destroy_cb m_lt_destroy_callback; @@ -356,23 +388,14 @@ public: // requires: Manager's mutex is held void locktree_map_remove(locktree *lt); - // effect: Runs escalation on all locktrees. - void run_escalation(void); - static int find_by_dict_id(locktree *const <, const DICTIONARY_ID &dict_id); void escalator_init(void); void escalator_destroy(void); - // effect: Add time t to the escalator's wait time statistics - void add_escalator_wait_time(uint64_t t); - - // effect: escalate's the locks in each locktree - // requires: manager's mutex is held - void escalate_all_locktrees(void); - // statistics about lock escalation. + toku_mutex_t m_escalation_mutex; uint64_t m_escalation_count; tokutime_t m_escalation_time; uint64_t m_escalation_latest_result; @@ -381,22 +404,16 @@ public: uint64_t m_long_wait_escalation_count; uint64_t m_long_wait_escalation_time; - toku_mutex_t m_escalator_mutex; - toku_cond_t m_escalator_work; // signal the escalator to run - toku_cond_t m_escalator_done; // signal that escalation is done - bool m_escalator_killed; - toku_pthread_t m_escalator_id; + escalator m_escalator; friend class manager_unit_test; - - public: - void escalator_work(void); }; ENSURE_POD(manager); manager::memory_tracker *get_mem_tracker(void) const; private: + manager *m_mgr; manager::memory_tracker *m_mem_tracker; DICTIONARY_ID m_dict_id; @@ -414,7 +431,6 @@ private: uint32_t m_reference_count; - // the locktree stores locks in a concurrent, non-overlapping rangetree concurrent_tree *m_rangetree; void *m_userdata; @@ -586,7 +602,7 @@ private: const DBT *left_key, const DBT *right_key, txnid_set *conflicts); int try_acquire_lock(bool is_write_request, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts); + const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn); void escalate(manager::lt_escalate_cb after_escalate_callback, void *extra); diff --git a/storage/tokudb/ft-index/locktree/manager.cc b/storage/tokudb/ft-index/locktree/manager.cc index e4317f30fc0..8086d9049f7 100644 --- a/storage/tokudb/ft-index/locktree/manager.cc +++ b/storage/tokudb/ft-index/locktree/manager.cc @@ -103,8 +103,6 @@ namespace toku { void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, lt_escalate_cb escalate_cb, void *escalate_extra) { m_max_lock_memory = DEFAULT_MAX_LOCK_MEMORY; m_current_lock_memory = 0; - escalator_init(); - m_lock_wait_time_ms = DEFAULT_LOCK_WAIT_TIME; m_mem_tracker.set_manager(this); m_locktree_map.create(); @@ -118,6 +116,8 @@ void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, ZERO_STRUCT(status); ZERO_STRUCT(m_lt_counters); + + escalator_init(); } void locktree::manager::destroy(void) { @@ -152,14 +152,6 @@ int locktree::manager::set_max_lock_memory(size_t max_lock_memory) { return r; } -uint64_t locktree::manager::get_lock_wait_time(void) { - return m_lock_wait_time_ms; -} - -void locktree::manager::set_lock_wait_time(uint64_t lock_wait_time_ms) { - m_lock_wait_time_ms = lock_wait_time_ms; -} - int locktree::manager::find_by_dict_id(locktree *const <, const DICTIONARY_ID &dict_id) { if (lt->m_dict_id.dictid < dict_id.dictid) { return -1; @@ -312,33 +304,45 @@ void locktree::manager::release_lt(locktree *lt) { } // test-only version of lock escalation +#if TOKU_LOCKTREE_ESCALATOR_LAMBDA +void locktree::manager::run_escalation(void) { + m_escalator.run(this, [this] () -> void { escalate_all_locktrees(); }); +} +#else +static void manager_run_escalation_fun(void *extra) { + locktree::manager *thismanager = (locktree::manager *) extra; + thismanager->escalate_all_locktrees(); +} + +void locktree::manager::run_escalation(void) { + m_escalator.run(this, manager_run_escalation_fun, this); +} +#endif + void locktree::manager::run_escalation_for_test(void) { run_escalation(); } -void locktree::manager::run_escalation(void) { +void locktree::manager::escalate_all_locktrees(void) { + if (0) fprintf(stderr, "%d %s:%u\n", toku_os_gettid(), __PRETTY_FUNCTION__, __LINE__); uint64_t t0 = toku_current_time_microsec(); - if (1) { - // run escalation on the background thread - int r; - toku_mutex_lock(&m_escalator_mutex); - toku_cond_broadcast(&m_escalator_work); - struct timeval tv; - r = gettimeofday(&tv, 0); - assert_zero(r); - uint64_t t = tv.tv_sec * 1000000 + tv.tv_usec; - t += 100000; // 100 milliseconds - toku_timespec_t wakeup_time; - wakeup_time.tv_sec = t / 1000000; - wakeup_time.tv_nsec = (t % 1000000) * 1000; - r = toku_cond_timedwait(&m_escalator_done, &m_escalator_mutex, &wakeup_time); - toku_mutex_unlock(&m_escalator_mutex); - } else { - // run escalation on this thread - mutex_lock(); - escalate_all_locktrees(); - mutex_unlock(); + + // get all locktrees + mutex_lock(); + int num_locktrees = m_locktree_map.size(); + locktree **locktrees = new locktree *[num_locktrees]; + for (int i = 0; i < num_locktrees; i++) { + int r = m_locktree_map.fetch(i, &locktrees[i]); + invariant_zero(r); + reference_lt(locktrees[i]); } + mutex_unlock(); + + // escalate them + escalate_locktrees(locktrees, num_locktrees); + + delete [] locktrees; + uint64_t t1 = toku_current_time_microsec(); add_escalator_wait_time(t1 - t0); } @@ -347,6 +351,10 @@ void locktree::manager::memory_tracker::set_manager(manager *mgr) { m_mgr = mgr; } +locktree::manager *locktree::manager::memory_tracker::get_manager(void) { + return m_mgr; +} + int locktree::manager::memory_tracker::check_current_lock_constraints(void) { int r = 0; // check if we're out of locks without the mutex first. then, grab the @@ -374,6 +382,10 @@ bool locktree::manager::memory_tracker::out_of_locks(void) const { return m_mgr->m_current_lock_memory >= m_mgr->m_max_lock_memory; } +bool locktree::manager::memory_tracker::over_big_threshold(void) { + return m_mgr->m_current_lock_memory >= m_mgr->m_max_lock_memory / 2; +} + int locktree::manager::iterate_pending_lock_requests( lock_request_iterate_callback callback, void *extra) { mutex_lock(); @@ -403,18 +415,23 @@ int locktree::manager::iterate_pending_lock_requests( return r; } -static void *escalator_thread(void *arg) { - locktree::manager *mgr = reinterpret_cast<locktree::manager*>(arg); - mgr->escalator_work(); - return arg; +int locktree::manager::check_current_lock_constraints(bool big_txn) { + int r = 0; + if (big_txn && m_mem_tracker.over_big_threshold()) { + run_escalation(); + if (m_mem_tracker.over_big_threshold()) { + r = TOKUDB_OUT_OF_LOCKS; + } + } + if (r == 0) { + r = m_mem_tracker.check_current_lock_constraints(); + } + return r; } void locktree::manager::escalator_init(void) { - ZERO_STRUCT(m_escalator_mutex); - toku_mutex_init(&m_escalator_mutex, nullptr); - toku_cond_init(&m_escalator_work, nullptr); - toku_cond_init(&m_escalator_done, nullptr); - m_escalator_killed = false; + ZERO_STRUCT(m_escalation_mutex); + toku_mutex_init(&m_escalation_mutex, nullptr); m_escalation_count = 0; m_escalation_time = 0; m_wait_escalation_count = 0; @@ -422,24 +439,27 @@ void locktree::manager::escalator_init(void) { m_long_wait_escalation_count = 0; m_long_wait_escalation_time = 0; m_escalation_latest_result = 0; - int r = toku_pthread_create(&m_escalator_id, nullptr, escalator_thread, this); - assert_zero(r); + m_escalator.create(); } void locktree::manager::escalator_destroy(void) { - toku_mutex_lock(&m_escalator_mutex); - m_escalator_killed = true; - toku_cond_broadcast(&m_escalator_work); - toku_mutex_unlock(&m_escalator_mutex); - void *ret; - int r = toku_pthread_join(m_escalator_id, &ret); - assert_zero(r); - toku_mutex_destroy(&m_escalator_mutex); - toku_cond_destroy(&m_escalator_work); - toku_cond_destroy(&m_escalator_done); + m_escalator.destroy(); + toku_mutex_destroy(&m_escalation_mutex); } -void locktree::manager::escalate_all_locktrees(void) { +void locktree::manager::add_escalator_wait_time(uint64_t t) { + toku_mutex_lock(&m_escalation_mutex); + m_wait_escalation_count += 1; + m_wait_escalation_time += t; + if (t >= 1000000) { + m_long_wait_escalation_count += 1; + m_long_wait_escalation_time += t; + } + toku_mutex_unlock(&m_escalation_mutex); +} + +void locktree::manager::escalate_locktrees(locktree **locktrees, int num_locktrees) { + if (0) fprintf(stderr, "%d %s:%u %d\n", toku_os_gettid(), __PRETTY_FUNCTION__, __LINE__, num_locktrees); // there are too many row locks in the system and we need to tidy up. // // a simple implementation of escalation does not attempt @@ -448,47 +468,87 @@ void locktree::manager::escalate_all_locktrees(void) { // and more complicated locking. for now, just escalate each // locktree individually, in-place. tokutime_t t0 = toku_time_now(); - size_t num_locktrees = m_locktree_map.size(); - for (size_t i = 0; i < num_locktrees; i++) { - locktree *lt; - int r = m_locktree_map.fetch(i, <); - invariant_zero(r); - lt->escalate(m_lt_escalate_callback, m_lt_escalate_callback_extra); + for (int i = 0; i < num_locktrees; i++) { + locktrees[i]->escalate(m_lt_escalate_callback, m_lt_escalate_callback_extra); + release_lt(locktrees[i]); } tokutime_t t1 = toku_time_now(); - toku_mutex_lock(&m_escalator_mutex); + toku_mutex_lock(&m_escalation_mutex); m_escalation_count++; m_escalation_time += (t1 - t0); m_escalation_latest_result = m_current_lock_memory; - toku_mutex_unlock(&m_escalator_mutex); + toku_mutex_unlock(&m_escalation_mutex); } -void locktree::manager::add_escalator_wait_time(uint64_t t) { - toku_mutex_lock(&m_escalator_mutex); - m_wait_escalation_count += 1; - m_wait_escalation_time += t; - if (t >= 1000000) { - m_long_wait_escalation_count += 1; - m_long_wait_escalation_time += t; - } - toku_mutex_unlock(&m_escalator_mutex); +#if !TOKU_LOCKTREE_ESCALATOR_LAMBDA +struct escalate_args { + locktree::manager *mgr; + locktree **locktrees; + int num_locktrees; +}; + +static void manager_escalate_locktrees(void *extra) { + escalate_args *args = (escalate_args *) extra; + args->mgr->escalate_locktrees(args->locktrees, args->num_locktrees); +} +#endif + +void locktree::manager::escalate_lock_trees_for_txn(TXNID txnid UU(), locktree *lt UU()) { + // get lock trees for txnid + const int num_locktrees = 1; + locktree *locktrees[1] = { lt }; + reference_lt(lt); + + // escalate these lock trees + locktree::escalator this_escalator; + this_escalator.create(); +#if TOKU_LOCKTREE_ESCALATOR_LAMBDA + this_escalator.run(this, [this,locktrees,num_locktrees] () -> void { escalate_locktrees(locktrees, num_locktrees); }); +#else + escalate_args args = { this, locktrees, num_locktrees }; + this_escalator.run(this, manager_escalate_locktrees, &args); +#endif + this_escalator.destroy(); +} + +void locktree::escalator::create(void) { + ZERO_STRUCT(m_escalator_mutex); + toku_mutex_init(&m_escalator_mutex, nullptr); + toku_cond_init(&m_escalator_done, nullptr); + m_escalator_running = false; } -void locktree::manager::escalator_work(void) { +void locktree::escalator::destroy(void) { + toku_cond_destroy(&m_escalator_done); + toku_mutex_destroy(&m_escalator_mutex); +} + +#if TOKU_LOCKTREE_ESCALATOR_LAMBDA +void locktree::escalator::run(locktree::manager *mgr, std::function<void (void)> escalate_locktrees_fun) { +#else + void locktree::escalator::run(locktree::manager *mgr, void (*escalate_locktrees_fun)(void *extra), void *extra) { +#endif + uint64_t t0 = toku_current_time_microsec(); toku_mutex_lock(&m_escalator_mutex); - while (!m_escalator_killed) { - toku_cond_wait(&m_escalator_work, &m_escalator_mutex); - if (!m_escalator_killed) { - toku_mutex_unlock(&m_escalator_mutex); - mutex_lock(); - escalate_all_locktrees(); - mutex_unlock(); - toku_mutex_lock(&m_escalator_mutex); - toku_cond_broadcast(&m_escalator_done); - } + if (!m_escalator_running) { + // run escalation on this thread + m_escalator_running = true; + toku_mutex_unlock(&m_escalator_mutex); +#if TOKU_LOCKTREE_ESCALATOR_LAMBDA + escalate_locktrees_fun(); +#else + escalate_locktrees_fun(extra); +#endif + toku_mutex_lock(&m_escalator_mutex); + m_escalator_running = false; + toku_cond_broadcast(&m_escalator_done); + } else { + toku_cond_wait(&m_escalator_done, &m_escalator_mutex); } toku_mutex_unlock(&m_escalator_mutex); + uint64_t t1 = toku_current_time_microsec(); + mgr->add_escalator_wait_time(t1 - t0); } #define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc) diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc index 9741c2a0e8c..b309d9b6fd8 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc @@ -96,8 +96,7 @@ namespace toku { // create and set the object's internals, destroy should not crash. void lock_request_unit_test::test_create_destroy(void) { lock_request request; - const uint64_t wait_time_magic = 5016342; - request.create(wait_time_magic); + request.create(); invariant(request.m_txnid == TXNID_NONE); invariant(request.m_left_key == nullptr); @@ -112,7 +111,6 @@ void lock_request_unit_test::test_create_destroy(void) { invariant(request.m_complete_r == 0); invariant(request.m_state == lock_request::state::UNINITIALIZED); - invariant(request.m_wait_time = wait_time_magic); request.destroy(); } diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc index 52bf89affdf..60300a138df 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc @@ -98,8 +98,7 @@ namespace toku { // when appropriate and plays nice with +/- infinity. void lock_request_unit_test::test_get_set_keys(void) { lock_request request; - const uint64_t lock_wait_time = 10; - request.create(lock_wait_time); + request.create(); locktree *const null_lt = nullptr; @@ -112,20 +111,20 @@ void lock_request_unit_test::test_get_set_keys(void) { // request should not copy dbts for neg/pos inf, so get_left // and get_right should return the same pointer given - request.set(null_lt, txnid_a, neg_inf, pos_inf, lock_request::type::WRITE); + request.set(null_lt, txnid_a, neg_inf, pos_inf, lock_request::type::WRITE, false); invariant(request.get_left_key() == neg_inf); invariant(request.get_right_key() == pos_inf); // request should make copies of non-infinity-valued keys. - request.set(null_lt, txnid_a, neg_inf, one, lock_request::type::WRITE); + request.set(null_lt, txnid_a, neg_inf, one, lock_request::type::WRITE, false); invariant(request.get_left_key() == neg_inf); invariant(request.get_right_key() == one); - request.set(null_lt, txnid_a, two, pos_inf, lock_request::type::WRITE); + request.set(null_lt, txnid_a, two, pos_inf, lock_request::type::WRITE, false); invariant(request.get_left_key() == two); invariant(request.get_right_key() == pos_inf); - request.set(null_lt, txnid_a, one, two, lock_request::type::WRITE); + request.set(null_lt, txnid_a, one, two, lock_request::type::WRITE, false); invariant(request.get_left_key() == one); invariant(request.get_right_key() == two); diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc new file mode 100644 index 00000000000..742165c59dc --- /dev/null +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc @@ -0,0 +1,179 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2014 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// test the kill callback. the lock wait is killed 1/2 of the way through the wait. + +#include "lock_request_unit_test.h" + +namespace toku { + +const uint64_t my_lock_wait_time = 10 * 1000; // 10 seconds +const uint64_t my_killed_time = 1 * 1000; + +static int killed_calls = 0; +static uint64_t t_last_kill; +static uint64_t t_do_kill; + +static int my_killed_callback(void) { + uint64_t t_now = toku_current_time_microsec(); + assert(t_now >= t_last_kill); + assert(t_now - t_last_kill >= my_killed_time * 1000 / 2); // div by 2 for valgrind which is not very accurate + t_last_kill = t_now; + killed_calls++; + if (t_now >= t_do_kill) + return 1; + else + return 0; +} + +// make sure deadlocks are detected when a lock request starts +void lock_request_unit_test::test_wait_time_callback(void) { + int r; + locktree::manager mgr; + locktree *lt; + + mgr.create(nullptr, nullptr, nullptr, nullptr); + + DICTIONARY_ID dict_id = { 1 }; + lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + + TXNID txnid_a = 1001; + lock_request request_a; + request_a.create(); + + TXNID txnid_b = 2001; + lock_request request_b; + request_b.create(); + + const DBT *one = get_dbt(1); + + // a locks 'one' + request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); + r = request_a.start(); + assert_zero(r); + + // b tries to lock 'one' + request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false); + r = request_b.start(); + assert(r == DB_LOCK_NOTGRANTED); + + uint64_t t_start = toku_current_time_microsec(); + t_last_kill = t_start; + t_do_kill = t_start + my_lock_wait_time * 1000 / 2; + r = request_b.wait(my_lock_wait_time, my_killed_time, my_killed_callback); + assert(r == DB_LOCK_NOTGRANTED); + + uint64_t t_end = toku_current_time_microsec(); + assert(t_end > t_start); + uint64_t t_delta = t_end - t_start; + // fprintf(stderr, "delta=%" PRIu64 "\n", t_delta); + assert(t_delta >= my_lock_wait_time / 2); + + // fprintf(stderr, "killed_calls=%d\n", killed_calls); + assert(killed_calls > 0); + + request_b.destroy(); + + release_lock_and_retry_requests(lt, txnid_a, one, one); + request_a.destroy(); + + mgr.release_lt(lt); + mgr.destroy(); +} + +} /* namespace toku */ + +int main(void) { + toku::lock_request_unit_test test; + test.test_wait_time_callback(); + return 0; +} + diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc new file mode 100644 index 00000000000..e49b88f9d79 --- /dev/null +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc @@ -0,0 +1,175 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2014 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// test the kill callback. the kill callback never kills the lock wait in this test. +// the test verifies that the kill callback is called close to its requested frequency. + +#include "lock_request_unit_test.h" + +namespace toku { + +const uint64_t my_lock_wait_time = 10 * 1000; // 10 seconds +const uint64_t my_killed_time = 1 * 1000; + +static int killed_calls = 0; +static uint64_t t_last_kill; + +static int my_killed_callback(void) { + uint64_t t_now = toku_current_time_microsec(); + assert(t_now >= t_last_kill); + assert(t_now - t_last_kill >= my_killed_time * 1000 / 2); // div by 2 for valgrind which is not very accurate + t_last_kill = t_now; + killed_calls++; + return 0; +} + +// make sure deadlocks are detected when a lock request starts +void lock_request_unit_test::test_wait_time_callback(void) { + int r; + locktree::manager mgr; + locktree *lt; + + mgr.create(nullptr, nullptr, nullptr, nullptr); + + DICTIONARY_ID dict_id = { 1 }; + lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + + TXNID txnid_a = 1001; + lock_request request_a; + request_a.create(); + + TXNID txnid_b = 2001; + lock_request request_b; + request_b.create(); + + const DBT *one = get_dbt(1); + + // a locks 'one' + request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); + r = request_a.start(); + assert_zero(r); + + // b tries to lock 'one' + request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false); + r = request_b.start(); + assert(r == DB_LOCK_NOTGRANTED); + + uint64_t t_start = toku_current_time_microsec(); + t_last_kill = t_start; + r = request_b.wait(my_lock_wait_time, my_killed_time, my_killed_callback); + assert(r == DB_LOCK_NOTGRANTED); + + uint64_t t_end = toku_current_time_microsec(); + assert(t_end > t_start); + uint64_t t_delta = t_end - t_start; + // fprintf(stderr, "delta=%" PRIu64 "\n", t_delta); + assert(t_delta >= my_lock_wait_time); + + // fprintf(stderr, "killed_calls=%d\n", killed_calls); + assert(killed_calls > 0); + + request_b.destroy(); + + release_lock_and_retry_requests(lt, txnid_a, one, one); + request_a.destroy(); + + mgr.release_lt(lt); + mgr.destroy(); +} + +} /* namespace toku */ + +int main(void) { + toku::lock_request_unit_test test; + test.test_wait_time_callback(); + return 0; +} + diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc index d170e08dedd..08bd4c14d20 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc @@ -111,48 +111,48 @@ void lock_request_unit_test::test_start_deadlock(void) { lock_request request_a; lock_request request_b; lock_request request_c; - request_a.create(lock_wait_time); - request_b.create(lock_wait_time); - request_c.create(lock_wait_time); + request_a.create(); + request_b.create(); + request_c.create(); const DBT *one = get_dbt(1); const DBT *two = get_dbt(2); // start and succeed 1,1 for A and 2,2 for B. - request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE); + request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); r = request_a.start(); invariant_zero(r); - request_b.set(lt, txnid_b, two, two, lock_request::type::WRITE); + request_b.set(lt, txnid_b, two, two, lock_request::type::WRITE, false); r = request_b.start(); invariant_zero(r); // txnid A should not be granted a lock on 2,2, so it goes pending. - request_a.set(lt, txnid_a, two, two, lock_request::type::WRITE); + request_a.set(lt, txnid_a, two, two, lock_request::type::WRITE, false); r = request_a.start(); invariant(r == DB_LOCK_NOTGRANTED); // if txnid B wants a lock on 1,1 it should deadlock with A - request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE); + request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false); r = request_b.start(); invariant(r == DB_LOCK_DEADLOCK); // txnid C should not deadlock on either of these - it should just time out. - request_c.set(lt, txnid_c, one, one, lock_request::type::WRITE); + request_c.set(lt, txnid_c, one, one, lock_request::type::WRITE, false); r = request_c.start(); invariant(r == DB_LOCK_NOTGRANTED); - r = request_c.wait(); + r = request_c.wait(lock_wait_time); invariant(r == DB_LOCK_NOTGRANTED); - request_c.set(lt, txnid_c, two, two, lock_request::type::WRITE); + request_c.set(lt, txnid_c, two, two, lock_request::type::WRITE, false); r = request_c.start(); invariant(r == DB_LOCK_NOTGRANTED); - r = request_c.wait(); + r = request_c.wait(lock_wait_time); invariant(r == DB_LOCK_NOTGRANTED); // release locks for A and B, then wait on A's request which should succeed // since B just unlocked and should have completed A's pending request. release_lock_and_retry_requests(lt, txnid_a, one, one); release_lock_and_retry_requests(lt, txnid_b, two, two); - r = request_a.wait(); + r = request_a.wait(lock_wait_time); invariant_zero(r); release_lock_and_retry_requests(lt, txnid_a, two, two); diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc index 04679c22ce9..867d5fb1bd0 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc @@ -100,8 +100,6 @@ void lock_request_unit_test::test_start_pending(void) { locktree::manager mgr; locktree *lt; lock_request request; - // bogus, just has to be something. - const uint64_t lock_wait_time = 0; mgr.create(nullptr, nullptr, nullptr, nullptr); DICTIONARY_ID dict_id = { 1 }; @@ -115,15 +113,15 @@ void lock_request_unit_test::test_start_pending(void) { const DBT *two = get_dbt(2); // take a range lock using txnid b - r = lt->acquire_write_lock(txnid_b, zero, two, nullptr); + r = lt->acquire_write_lock(txnid_b, zero, two, nullptr, false); invariant_zero(r); locktree::lt_lock_request_info *info = lt->get_lock_request_info(); // start a lock request for 1,1 // it should fail. the request should be stored and in the pending state. - request.create(lock_wait_time); - request.set(lt, txnid_a, one, one, lock_request::type::WRITE); + request.create(); + request.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); r = request.start(); invariant(r == DB_LOCK_NOTGRANTED); invariant(info->pending_lock_requests.size() == 1); diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h b/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h index 6ec451ed108..3183bf2b734 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h @@ -116,6 +116,9 @@ public: // make sure deadlocks are detected when a lock request starts void test_start_deadlock(void); + // test that the get_wait_time callback works + void test_wait_time_callback(void); + private: // releases a single range lock and retries all lock requests. // this is kind of like what the ydb layer does, except that diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc new file mode 100644 index 00000000000..e077d461399 --- /dev/null +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc @@ -0,0 +1,151 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "lock_request_unit_test.h" + +namespace toku { + +static const uint64_t my_lock_wait_time = 10 * 1000; // 10 sec + +// make sure deadlocks are detected when a lock request starts +void lock_request_unit_test::test_wait_time_callback(void) { + int r; + locktree::manager mgr; + locktree *lt; + + mgr.create(nullptr, nullptr, nullptr, nullptr); + DICTIONARY_ID dict_id = { 1 }; + lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + + TXNID txnid_a = 1001; + lock_request request_a; + request_a.create(); + + TXNID txnid_b = 2001; + lock_request request_b; + request_b.create(); + + const DBT *one = get_dbt(1); + const DBT *two = get_dbt(2); + + // a locks 'one' + request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); + r = request_a.start(); + assert_zero(r); + + // b tries to lock 'one' + request_b.set(lt, txnid_b, one, two, lock_request::type::WRITE, false); + r = request_b.start(); + assert(r == DB_LOCK_NOTGRANTED); + uint64_t t_start = toku_current_time_microsec(); + r = request_b.wait(my_lock_wait_time); + uint64_t t_end = toku_current_time_microsec(); + assert(r == DB_LOCK_NOTGRANTED); + assert(t_end > t_start); + uint64_t t_delta = t_end - t_start; + assert(t_delta >= my_lock_wait_time); + request_b.destroy(); + + release_lock_and_retry_requests(lt, txnid_a, one, one); + request_a.destroy(); + + mgr.release_lt(lt); + mgr.destroy(); +} + +} /* namespace toku */ + +int main(void) { + toku::lock_request_unit_test test; + test.test_wait_time_callback(); + return 0; +} + diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc b/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc index 9c684a9057a..70f3a6249c4 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc @@ -125,8 +125,8 @@ void locktree_unit_test::test_conflicts(void) { // test_run == 0 means test with read lock // test_run == 1 means test with write lock #define ACQUIRE_LOCK(txn, left, right, conflicts) \ - test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts) \ - : lt->acquire_write_lock(txn, left, right, conflicts) + test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts, false) \ + : lt->acquire_write_lock(txn, left, right, conflicts, false) // acquire some locks for txnid_a r = ACQUIRE_LOCK(txnid_a, one, one, nullptr); @@ -142,8 +142,8 @@ void locktree_unit_test::test_conflicts(void) { // if test_run == 0, then read locks exist. only test write locks. #define ACQUIRE_LOCK(txn, left, right, conflicts) \ sub_test_run == 0 && test_run == 1 ? \ - lt->acquire_read_lock(txn, left, right, conflicts) \ - : lt->acquire_write_lock(txn, left, right, conflicts) + lt->acquire_read_lock(txn, left, right, conflicts, false) \ + : lt->acquire_write_lock(txn, left, right, conflicts, false) // try to get point write locks for txnid_b, should fail r = ACQUIRE_LOCK(txnid_b, one, one, nullptr); invariant(r == DB_LOCK_NOTGRANTED); diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc new file mode 100644 index 00000000000..7c4af67e6f8 --- /dev/null +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc @@ -0,0 +1,304 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include <stdio.h> +#include "locktree.h" +#include "test.h" + +// ensure that small transactions do not get stalled by a big transaction that has lots of locks +// ./locktree_escalation_big7_small1 --stalls 100 --max_lock_memory 1000000000 --verbose + +using namespace toku; + +static int verbose = 0; +static int killed = 0; +static pthread_t big_id, small_id; + +static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) { + range_buffer buffer; + buffer.create(); + DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); + DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); + buffer.append(&left, &right); + lt->release_locks(txn_id, &buffer); + buffer.destroy(); +} + +// grab a write range lock on int64 keys bounded by left_k and right_k +static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) { + DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); + DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); + return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); +} + +#if 0 +static locktree **big_txn_lt; +static int n_big_txn_lt; + +static int get_locktrees_touched_by_txn(TXNID txn_id UU(), void *txn_extra UU(), locktree ***ret_locktrees, int *ret_num_locktrees) { + locktree **locktrees = (locktree **) toku_malloc(n_big_txn_lt * sizeof (locktree *)); + for (int i = 0; i < n_big_txn_lt; i++) + locktrees[i] = big_txn_lt[i]; + *ret_locktrees = locktrees; + *ret_num_locktrees = n_big_txn_lt; + return 0; +} +#endif + +static void run_big_txn(locktree::manager *mgr UU(), locktree **lt, int n_lt, TXNID txn_id) { + int64_t last_i = -1; + for (int64_t i = 0; !killed; i++) { + for (int j = 0; j < n_lt; j++) { + uint64_t t_start = toku_current_time_microsec(); + int r = locktree_write_lock(lt[j], txn_id, i, i, true); + assert(r == 0); + last_i = i; + uint64_t t_end = toku_current_time_microsec(); + uint64_t t_duration = t_end - t_start; + if (t_duration > 100000) { + printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration); + } + } + toku_pthread_yield(); + } + if (last_i != -1) + for (int j = 0; j < n_lt; j++) + locktree_release_lock(lt[j], txn_id, 0, last_i); // release the range 0 .. last_i + if (verbose) + printf("%u %s %" PRId64 "\n", toku_os_gettid(), __FUNCTION__, last_i); +} + +struct big_arg { + locktree::manager *mgr; + locktree **lt; + int n_lt; + TXNID txn_id; +}; + +static void *big_f(void *_arg) { + struct big_arg *arg = (struct big_arg *) _arg; + assert(pthread_equal(pthread_self(), big_id)); + printf("%u %s\n", toku_os_gettid(), __FUNCTION__); + run_big_txn(arg->mgr, arg->lt, arg->n_lt, arg->txn_id); + return arg; +} + +static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) { + int64_t i; + for (i = 0; !killed; i++) { + uint64_t t_start = toku_current_time_microsec(); + int r = locktree_write_lock(lt, txn_id, k, k, false); + assert(r == 0); + uint64_t t_end = toku_current_time_microsec(); + uint64_t t_duration = t_end - t_start; + if (t_duration > 100000) { + printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration); + } + locktree_release_lock(lt, txn_id, k, k); + toku_pthread_yield(); + } + if (verbose) + printf("%u %s %" PRId64 "\n", toku_os_gettid(), __FUNCTION__, i); +} + +struct small_arg { + locktree::manager *mgr; + locktree *lt; + TXNID txn_id; + int64_t k; +}; + +static void *small_f(void *_arg) { + struct small_arg *arg = (struct small_arg *) _arg; + printf("%u %s\n", toku_os_gettid(), __FUNCTION__); + run_small_txn(arg->mgr, arg->lt, arg->txn_id, arg->k); + return arg; +} + +static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra) { + assert(pthread_equal(pthread_self(), big_id)); + if (verbose) + printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); +} + +static uint64_t get_escalation_count(locktree::manager &mgr) { + LTM_STATUS_S ltm_status; + mgr.get_status(<m_status); + + TOKU_ENGINE_STATUS_ROW key_status = NULL; + // lookup keyname in status + for (int i = 0; ; i++) { + TOKU_ENGINE_STATUS_ROW status = <m_status.status[i]; + if (status->keyname == NULL) + break; + if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) { + key_status = status; + break; + } + } + assert(key_status); + return key_status->value.num; +} + +int main(int argc, const char *argv[]) { + uint64_t stalls = 1; + int n_big = 7; + uint64_t max_lock_memory = 1000000; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "--stalls") == 0 && i+1 < argc) { + stalls = atoll(argv[++i]); + continue; + } + if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) { + max_lock_memory = atoll(argv[++i]); + continue; + } + } + + int r; + + // create a manager + locktree::manager mgr; + mgr.create(nullptr, nullptr, e_callback, nullptr); + mgr.set_max_lock_memory(max_lock_memory); + + // create lock trees + uint64_t next_dict_id = 1; + DICTIONARY_ID dict_id; + locktree *big_lt[n_big]; + for (int i = 0; i < n_big; i++) { + dict_id = { next_dict_id }; next_dict_id++; + big_lt[i] = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + } + +#if 0 + big_txn_lt = big_lt; + n_big_txn_lt = n_big; +#endif + + dict_id = { next_dict_id }; next_dict_id++; + locktree *small_lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + + // create the worker threads + struct big_arg big_arg = { &mgr, big_lt, n_big, 1000 }; + r = toku_pthread_create(&big_id, nullptr, big_f, &big_arg); + assert(r == 0); + + struct small_arg small_arg = { &mgr, small_lt, 2000, 0 }; + r = toku_pthread_create(&small_id, nullptr, small_f, &small_arg); + assert(r == 0); + + // wait for some escalations to occur + while (get_escalation_count(mgr) < stalls) { + sleep(1); + } + killed = 1; + + // cleanup + void *ret; + r = toku_pthread_join(big_id, &ret); + assert(r == 0); + + r = toku_pthread_join(small_id, &ret); + assert(r == 0); + + for (int i = 0; i < n_big; i++) + mgr.release_lt(big_lt[i]); + mgr.release_lt(small_lt); + mgr.destroy(); + + return 0; +} diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc new file mode 100644 index 00000000000..ab1818d2fd3 --- /dev/null +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc @@ -0,0 +1,250 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include <stdio.h> +#include "locktree.h" +#include "test.h" + +// One client locks 1,2,3... +// The other client locks -1,-2,-3... +// Eventually lock escalation runs. + +using namespace toku; + +static int verbose = 0; +static int killed = 0; + +static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) { + range_buffer buffer; + buffer.create(); + DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); + DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); + buffer.append(&left, &right); + lt->release_locks(txn_id, &buffer); + buffer.destroy(); +} + +// grab a write range lock on int64 keys bounded by left_k and right_k +static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) { + DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); + DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); + return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); +} + +static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t start_i) { + fprintf(stderr, "%u run_big_txn %p %" PRIu64 " %" PRId64 "\n", toku_os_gettid(), lt, txn_id, start_i); + int64_t last_i = -1; + for (int64_t i = start_i; !killed; i++) { + if (0) + printf("%u %" PRId64 "\n", toku_os_gettid(), i); + uint64_t t_start = toku_current_time_microsec(); + int r = locktree_write_lock(lt, txn_id, i, i, true); + if (r != 0) + break; + last_i = i; + uint64_t t_end = toku_current_time_microsec(); + uint64_t t_duration = t_end - t_start; + if (t_duration > 100000) { + printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration); + } + toku_pthread_yield(); + } + if (last_i != -1) + locktree_release_lock(lt, txn_id, start_i, last_i); // release the range start_i .. last_i +} + +struct arg { + locktree::manager *mgr; + locktree *lt; + TXNID txn_id; + int64_t start_i; +}; + +static void *big_f(void *_arg) { + struct arg *arg = (struct arg *) _arg; + run_big_txn(arg->mgr, arg->lt, arg->txn_id, arg->start_i); + return arg; +} + +static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra) { + if (verbose) + printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); +} + +static uint64_t get_escalation_count(locktree::manager &mgr) { + LTM_STATUS_S ltm_status; + mgr.get_status(<m_status); + + TOKU_ENGINE_STATUS_ROW key_status = NULL; + // lookup keyname in status + for (int i = 0; ; i++) { + TOKU_ENGINE_STATUS_ROW status = <m_status.status[i]; + if (status->keyname == NULL) + break; + if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) { + key_status = status; + break; + } + } + assert(key_status); + return key_status->value.num; +} + +int main(int argc, const char *argv[]) { + const int n_big = 2; + int n_lt = 1; + uint64_t stalls = 1; + uint64_t max_lock_memory = 1000000; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "--stalls") == 0 && i+1 < argc) { + stalls = atoll(argv[++i]); + continue; + } + if (strcmp(argv[i], "--n_lt") == 0 && i+1 < argc) { + n_lt = atoi(argv[++i]); + continue; + } + if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) { + max_lock_memory = atoll(argv[++i]); + continue; + } + } + + int r; + + // create a manager + locktree::manager mgr; + mgr.create(nullptr, nullptr, e_callback, nullptr); + mgr.set_max_lock_memory(max_lock_memory); + + // create lock trees + DESCRIPTOR desc[n_lt]; + DICTIONARY_ID dict_id[n_lt]; + locktree *lt[n_big]; + for (int i = 0; i < n_lt; i++) { + desc[i] = nullptr; + dict_id[i] = { (uint64_t)i }; + lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr); + assert(lt[i]); + } + + // create the worker threads + struct arg big_arg[n_big]; + pthread_t big_ids[n_big]; + for (int i = 0; i < n_big; i++) { + big_arg[i] = { &mgr, lt[i % n_lt], (TXNID)(1000+i), i == 0 ? 1 : -1000000000 }; + r = toku_pthread_create(&big_ids[i], nullptr, big_f, &big_arg[i]); + assert(r == 0); + } + + // wait for some escalations to occur + while (get_escalation_count(mgr) < stalls) { + sleep(1); + } + killed = 1; + + // cleanup + for (int i = 0; i < n_big; i++) { + void *ret; + r = toku_pthread_join(big_ids[i], &ret); + assert(r == 0); + } + for (int i = 0; i < n_lt ; i++) { + mgr.release_lt(lt[i]); + } + mgr.destroy(); + + return 0; +} diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc new file mode 100644 index 00000000000..d9f62ca29f3 --- /dev/null +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc @@ -0,0 +1,250 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include <stdio.h> +#include "locktree.h" +#include "test.h" + +// One client locks 1,2,3... +// The other client locks -1,-2,-3... +// Eventually lock escalation runs. + +using namespace toku; + +static int verbose = 0; +static int killed = 0; + +static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) { + range_buffer buffer; + buffer.create(); + DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); + DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); + buffer.append(&left, &right); + lt->release_locks(txn_id, &buffer); + buffer.destroy(); +} + +// grab a write range lock on int64 keys bounded by left_k and right_k +static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) { + DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); + DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); + return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); +} + +static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t start_i) { + fprintf(stderr, "%u run_big_txn %p %" PRIu64 " %" PRId64 "\n", toku_os_gettid(), lt, txn_id, start_i); + int64_t last_i = -1; + for (int64_t i = start_i; !killed; i++) { + if (0) + printf("%u %" PRId64 "\n", toku_os_gettid(), i); + uint64_t t_start = toku_current_time_microsec(); + int r = locktree_write_lock(lt, txn_id, i, i, true); + if (r != 0) + break; + last_i = i; + uint64_t t_end = toku_current_time_microsec(); + uint64_t t_duration = t_end - t_start; + if (t_duration > 100000) { + printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration); + } + toku_pthread_yield(); + } + if (last_i != -1) + locktree_release_lock(lt, txn_id, start_i, last_i); // release the range start_i .. last_i +} + +struct arg { + locktree::manager *mgr; + locktree *lt; + TXNID txn_id; + int64_t start_i; +}; + +static void *big_f(void *_arg) { + struct arg *arg = (struct arg *) _arg; + run_big_txn(arg->mgr, arg->lt, arg->txn_id, arg->start_i); + return arg; +} + +static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra) { + if (verbose) + printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); +} + +static uint64_t get_escalation_count(locktree::manager &mgr) { + LTM_STATUS_S ltm_status; + mgr.get_status(<m_status); + + TOKU_ENGINE_STATUS_ROW key_status = NULL; + // lookup keyname in status + for (int i = 0; ; i++) { + TOKU_ENGINE_STATUS_ROW status = <m_status.status[i]; + if (status->keyname == NULL) + break; + if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) { + key_status = status; + break; + } + } + assert(key_status); + return key_status->value.num; +} + +int main(int argc, const char *argv[]) { + const int n_big = 2; + int n_lt = 2; + uint64_t stalls = 1; + uint64_t max_lock_memory = 1000000; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "--stalls") == 0 && i+1 < argc) { + stalls = atoll(argv[++i]); + continue; + } + if (strcmp(argv[i], "--n_lt") == 0 && i+1 < argc) { + n_lt = atoi(argv[++i]); + continue; + } + if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) { + max_lock_memory = atoll(argv[++i]); + continue; + } + } + + int r; + + // create a manager + locktree::manager mgr; + mgr.create(nullptr, nullptr, e_callback, nullptr); + mgr.set_max_lock_memory(max_lock_memory); + + // create lock trees + DESCRIPTOR desc[n_lt]; + DICTIONARY_ID dict_id[n_lt]; + locktree *lt[n_big]; + for (int i = 0; i < n_lt; i++) { + desc[i] = nullptr; + dict_id[i] = { (uint64_t)i }; + lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr); + assert(lt[i]); + } + + // create the worker threads + struct arg big_arg[n_big]; + pthread_t big_ids[n_big]; + for (int i = 0; i < n_big; i++) { + big_arg[i] = { &mgr, lt[i % n_lt], (TXNID)(1000+i), i == 0 ? 1 : -1000000000 }; + r = toku_pthread_create(&big_ids[i], nullptr, big_f, &big_arg[i]); + assert(r == 0); + } + + // wait for some escalations to occur + while (get_escalation_count(mgr) < stalls) { + sleep(1); + } + killed = 1; + + // cleanup + for (int i = 0; i < n_big; i++) { + void *ret; + r = toku_pthread_join(big_ids[i], &ret); + assert(r == 0); + } + for (int i = 0; i < n_lt ; i++) { + mgr.release_lt(lt[i]); + } + mgr.destroy(); + + return 0; +} diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc new file mode 100644 index 00000000000..cdee8b9d333 --- /dev/null +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc @@ -0,0 +1,204 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include <stdio.h> +#include "locktree.h" +#include "test.h" + +// Two big txn's grab alternating locks in a single lock tree. +// Eventually lock escalation runs. +// Since the locks can not be consolidated, the out of locks error should be returned. + +using namespace toku; + +static int verbose = 0; + +static inline void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) { + range_buffer buffer; + buffer.create(); + DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); + DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); + buffer.append(&left, &right); + lt->release_locks(txn_id, &buffer); + buffer.destroy(); +} + +// grab a write range lock on int64 keys bounded by left_k and right_k +static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) { + DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); + DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); + return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); +} + +static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra) { + if (verbose) + printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); +} + +static uint64_t get_escalation_count(locktree::manager &mgr) { + LTM_STATUS_S ltm_status; + mgr.get_status(<m_status); + + TOKU_ENGINE_STATUS_ROW key_status = NULL; + // lookup keyname in status + for (int i = 0; ; i++) { + TOKU_ENGINE_STATUS_ROW status = <m_status.status[i]; + if (status->keyname == NULL) + break; + if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) { + key_status = status; + break; + } + } + assert(key_status); + return key_status->value.num; +} + +int main(int argc, const char *argv[]) { + uint64_t max_lock_memory = 1000000; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) { + max_lock_memory = atoll(argv[++i]); + continue; + } + } + + int r; + + // create a manager + locktree::manager mgr; + mgr.create(nullptr, nullptr, e_callback, nullptr); + mgr.set_max_lock_memory(max_lock_memory); + + const TXNID txn_a = 10; + const TXNID txn_b = 100; + + // create lock trees + DESCRIPTOR desc = nullptr; + DICTIONARY_ID dict_id = { 1 }; + locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + + int64_t last_i = -1; + for (int64_t i = 0; ; i++) { + if (verbose) + printf("%" PRId64 "\n", i); + int64_t k = 2*i; + r = locktree_write_lock(lt, txn_a, k, k, true); + if (r != 0) { + assert(r == TOKUDB_OUT_OF_LOCKS); + break; + } + last_i = i; + r = locktree_write_lock(lt, txn_b, k+1, k+1, true); + if (r != 0) { + assert(r == TOKUDB_OUT_OF_LOCKS); + break; + } + } + + // wait for an escalation to occur + assert(get_escalation_count(mgr) > 0); + + if (last_i != -1) { + locktree_release_lock(lt, txn_a, 0, 2*last_i); + locktree_release_lock(lt, txn_b, 0, 2*last_i+1); + } + + mgr.release_lt(lt); + mgr.destroy(); + + return 0; +} diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc index a41040410b0..28ae88cb3a8 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc @@ -89,6 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +// This test verifies that small txn's do not get stalled for a long time by lock escalation. +// Two lock trees are used by the test: a big lock tree and a small lock tree. +// One big txn grabs lots of write locks on the big lock tree. +// Several small txn's grab a single write lock on the small lock tree. +// None of the locks conflict. +// Eventually, the locks for the big txn consume all of the lock tree memory, so lock escalation runs. +// The test measures the lock acquisition time and makes sure that the small txn's are not blocked for + +// locktree_escalation_stalls -v --stalls 10 +// verify that only big txn's get tagged with > 1 second stalls + #include <stdio.h> #include "locktree.h" #include "test.h" @@ -96,6 +107,7 @@ PATENT RIGHTS GRANT: using namespace toku; static int verbose = 0; +static int killed = 0; static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) { range_buffer buffer; @@ -108,38 +120,42 @@ static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, in } // grab a write range lock on int64 keys bounded by left_k and right_k -static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) { +static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) { DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); - return lt->acquire_write_lock(txn_id, &left, &right, nullptr); + return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); } -static void run_big_txn(locktree::manager *mgr, locktree *lt, TXNID txn_id) { - mgr = mgr; - for (int64_t i = 0; 1; i++) { +static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id) { + int64_t last_i = -1; + for (int64_t i = 0; !killed; i++) { uint64_t t_start = toku_current_time_microsec(); - int r = locktree_write_lock(lt, txn_id, i, i); + int r = locktree_write_lock(lt, txn_id, i, i, true); assert(r == 0); + last_i = i; uint64_t t_end = toku_current_time_microsec(); uint64_t t_duration = t_end - t_start; if (t_duration > 100000) { printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration); } + toku_pthread_yield(); } + if (last_i != -1) + locktree_release_lock(lt, txn_id, 0, last_i); // release the range 0 .. last_i } -static void run_small_txn(locktree::manager *mgr, locktree *lt, TXNID txn_id, int64_t k) { - mgr = mgr; - for (int64_t i = 0; 1; i++) { +static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) { + for (int64_t i = 0; !killed; i++) { uint64_t t_start = toku_current_time_microsec(); - int r = locktree_write_lock(lt, txn_id, k, k); + int r = locktree_write_lock(lt, txn_id, k, k, false); assert(r == 0); uint64_t t_end = toku_current_time_microsec(); uint64_t t_duration = t_end - t_start; if (t_duration > 100000) { printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration); } - locktree_release_lock(lt, txn_id, i, i); + locktree_release_lock(lt, txn_id, k, k); + toku_pthread_yield(); } } @@ -167,15 +183,49 @@ static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buff printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); } +static uint64_t get_escalation_count(locktree::manager &mgr) { + LTM_STATUS_S ltm_status; + mgr.get_status(<m_status); + + TOKU_ENGINE_STATUS_ROW key_status = NULL; + // lookup keyname in status + for (int i = 0; ; i++) { + TOKU_ENGINE_STATUS_ROW status = <m_status.status[i]; + if (status->keyname == NULL) + break; + if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) { + key_status = status; + break; + } + } + assert(key_status); + return key_status->value.num; +} + int main(int argc, const char *argv[]) { - if (argc == 1 || argv == nullptr) - return 0; + uint64_t stalls = 0; + uint64_t max_lock_memory = 1000000000; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "--stalls") == 0 && i+1 < argc) { + stalls = atoll(argv[++i]); + continue; + } + if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) { + max_lock_memory = atoll(argv[++i]); + continue; + } + } + int r; // create a manager locktree::manager mgr; mgr.create(nullptr, nullptr, e_callback, nullptr); - mgr.set_max_lock_memory(1000000000); + mgr.set_max_lock_memory(max_lock_memory); // create lock trees DESCRIPTOR desc_0 = nullptr; @@ -186,6 +236,7 @@ int main(int argc, const char *argv[]) { DICTIONARY_ID dict_id_1 = { 2 }; locktree *lt_1 = mgr.get_lt(dict_id_1, desc_1, compare_dbts, nullptr); + // create the worker threads struct arg big_arg = { &mgr, lt_0, 1000 }; pthread_t big_id; r = toku_pthread_create(&big_id, nullptr, big_f, &big_arg); @@ -201,6 +252,13 @@ int main(int argc, const char *argv[]) { assert(r == 0); } + // wait for some escalations to occur + while (get_escalation_count(mgr) < stalls) { + sleep(1); + } + killed = 1; + + // cleanup void *ret; r = toku_pthread_join(big_id, &ret); assert(r == 0); @@ -210,7 +268,6 @@ int main(int argc, const char *argv[]) { assert(r == 0); } - // cleanup mgr.release_lt(lt_0); mgr.release_lt(lt_1); mgr.destroy(); diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc b/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc index fd3dd1ca605..142e50992a6 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc @@ -112,54 +112,54 @@ void locktree_unit_test::test_infinity(void) { const DBT max_int = max_dbt(); // txn A will lock -inf, 5. - r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), five, nullptr); + r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), five, nullptr, false); invariant(r == 0); // txn B will fail to get any lock <= 5, even min_int - r = lt->acquire_write_lock(txnid_b, five, five, nullptr); + r = lt->acquire_write_lock(txnid_b, five, five, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, zero, one, nullptr); + r = lt->acquire_write_lock(txnid_b, zero, one, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr); + r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), &min_int, nullptr); + r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), &min_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five); // txn A will lock 1, +inf - r = lt->acquire_write_lock(txnid_a, one, toku_dbt_positive_infinity(), nullptr); + r = lt->acquire_write_lock(txnid_a, one, toku_dbt_positive_infinity(), nullptr, false); invariant(r == 0); // txn B will fail to get any lock >= 1, even max_int - r = lt->acquire_write_lock(txnid_b, one, one, nullptr); + r = lt->acquire_write_lock(txnid_b, one, one, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, two, five, nullptr); + r = lt->acquire_write_lock(txnid_b, two, five, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr); + r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &max_int, toku_dbt_positive_infinity(), nullptr); + r = lt->acquire_write_lock(txnid_b, &max_int, toku_dbt_positive_infinity(), nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five); // txn A will lock -inf, +inf - r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr); + r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr, false); invariant(r == 0); // txn B will fail to get any lock - r = lt->acquire_write_lock(txnid_b, zero, one, nullptr); + r = lt->acquire_write_lock(txnid_b, zero, one, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, two, five, nullptr); + r = lt->acquire_write_lock(txnid_b, two, five, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr); + r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &min_int, &max_int, nullptr); + r = lt->acquire_write_lock(txnid_b, &min_int, &max_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr); + r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_negative_infinity(), nullptr); + r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_negative_infinity(), nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr); + r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, toku_dbt_positive_infinity(), toku_dbt_positive_infinity(), nullptr); + r = lt->acquire_write_lock(txnid_b, toku_dbt_positive_infinity(), toku_dbt_positive_infinity(), nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity()); diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc b/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc index 4567b90a420..ecb710517e1 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc @@ -121,15 +121,15 @@ void locktree_unit_test::test_overlapping_relock(void) { // do something. at the end of the test, we release 100, 100. const TXNID the_other_txnid = 9999; const DBT *hundred = get_dbt(100); - r = lt->acquire_write_lock(the_other_txnid, hundred, hundred, nullptr); + r = lt->acquire_write_lock(the_other_txnid, hundred, hundred, nullptr, false); invariant(r == 0); for (int test_run = 0; test_run < 2; test_run++) { // test_run == 0 means test with read lock // test_run == 1 means test with write lock #define ACQUIRE_LOCK(txn, left, right, conflicts) \ - test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts) \ - : lt->acquire_write_lock(txn, left, right, conflicts) + test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts, false) \ + : lt->acquire_write_lock(txn, left, right, conflicts, false) // lock [1,1] and [2,2]. then lock [1,2]. // ensure only [1,2] exists in the tree diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc b/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc index 44fe0c578a0..549a44a1479 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc @@ -115,8 +115,8 @@ void locktree_unit_test::test_simple_lock(void) { // test_run == 0 means test with read lock // test_run == 1 means test with write lock #define ACQUIRE_LOCK(txn, left, right, conflicts) \ - test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts) \ - : lt->acquire_write_lock(txn, left, right, conflicts) + test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts, false) \ + : lt->acquire_write_lock(txn, left, right, conflicts, false) // four txns, four points r = ACQUIRE_LOCK(txnid_a, one, one, nullptr); @@ -178,7 +178,7 @@ void locktree_unit_test::test_simple_lock(void) { for (int64_t i = 0; i < num_locks; i++) { k.data = (void *) &keys[i]; - r = lt->acquire_read_lock(txnid_a, &k, &k, nullptr); + r = lt->acquire_read_lock(txnid_a, &k, &k, nullptr, false); invariant(r == 0); } diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc b/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc index 1c9e80f57d5..6fdd7270f09 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc @@ -124,13 +124,13 @@ void locktree_unit_test::test_single_txnid_optimization(void) { buffer.create(); #define lock_and_append_point_for_txnid_a(key) \ - r = lt->acquire_write_lock(txnid_a, key, key, nullptr); \ + r = lt->acquire_write_lock(txnid_a, key, key, nullptr, false); \ invariant_zero(r); \ buffer.append(key, key); #define maybe_point_locks_for_txnid_b(i) \ if (where == i) { \ - r = lt->acquire_write_lock(txnid_b, one, one, nullptr); \ + r = lt->acquire_write_lock(txnid_b, one, one, nullptr, false); \ invariant_zero(r); \ } diff --git a/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc b/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc index c0c218c8a3e..6f667c04a74 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc +++ b/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc @@ -109,7 +109,6 @@ void manager_unit_test::test_create_destroy(void) { invariant(mgr.m_escalation_count == 0); invariant(mgr.m_escalation_time == 0); invariant(mgr.m_escalation_latest_result == 0); - invariant(mgr.m_lock_wait_time_ms == locktree::manager::DEFAULT_LOCK_WAIT_TIME); invariant(mgr.m_locktree_map.size() == 0); invariant(mgr.m_lt_create_callback == create_callback); diff --git a/storage/tokudb/ft-index/locktree/tests/manager_params.cc b/storage/tokudb/ft-index/locktree/tests/manager_params.cc index e0f18d75df8..95642db5121 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_params.cc +++ b/storage/tokudb/ft-index/locktree/tests/manager_params.cc @@ -103,10 +103,6 @@ void manager_unit_test::test_params(void) { invariant(r == 0); invariant(mgr.get_max_lock_memory() == new_max_lock_memory); - uint64_t new_lock_wait_time = 62345234; - mgr.set_lock_wait_time(new_lock_wait_time); - invariant(mgr.get_lock_wait_time() == new_lock_wait_time); - mgr.m_current_lock_memory = 100000; r = mgr.set_max_lock_memory(mgr.m_current_lock_memory - 1); invariant(r == EDOM); diff --git a/storage/tokudb/ft-index/locktree/tests/manager_status.cc b/storage/tokudb/ft-index/locktree/tests/manager_status.cc index 0551dd3b3e0..6803b5a22b3 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_status.cc +++ b/storage/tokudb/ft-index/locktree/tests/manager_status.cc @@ -130,16 +130,16 @@ void manager_unit_test::test_status(void) { const DBT *one = get_dbt(1); // txn a write locks one - r = lt->acquire_write_lock(txnid_a, one, one, nullptr); + r = lt->acquire_write_lock(txnid_a, one, one, nullptr, false); assert(r == 0); // txn b tries to write lock one, conflicts, waits, and fails to lock one lock_request request_b; - request_b.create(1000); - request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE); + request_b.create(); + request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false); r = request_b.start(); assert(r == DB_LOCK_NOTGRANTED); - r = request_b.wait(); + r = request_b.wait(1000); assert(r == DB_LOCK_NOTGRANTED); request_b.destroy(); |