summaryrefslogtreecommitdiff
path: root/storage/rocksdb/rdb_mutex_wrapper.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/rocksdb/rdb_mutex_wrapper.cc')
-rw-r--r--storage/rocksdb/rdb_mutex_wrapper.cc234
1 files changed, 234 insertions, 0 deletions
diff --git a/storage/rocksdb/rdb_mutex_wrapper.cc b/storage/rocksdb/rdb_mutex_wrapper.cc
new file mode 100644
index 00000000000..e8077e2fd89
--- /dev/null
+++ b/storage/rocksdb/rdb_mutex_wrapper.cc
@@ -0,0 +1,234 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* This C++ file's header file */
+#include "./rdb_mutex_wrapper.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./rdb_utils.h"
+
+// Internal MySQL APIs not exposed in any header.
+extern "C"
+{
+void thd_enter_cond(MYSQL_THD thd, mysql_cond_t *cond, mysql_mutex_t *mutex,
+ const PSI_stage_info *stage, PSI_stage_info *old_stage);
+void thd_exit_cond(MYSQL_THD thd, const PSI_stage_info *stage);
+}
+
+using namespace rocksdb;
+
+namespace myrocks {
+
+static
+PSI_stage_info stage_waiting_on_row_lock2= { 0, "Waiting for row lock", 0};
+
+static const int64_t MICROSECS= 1000*1000;
+// A timeout as long as one full non-leap year worth of microseconds is as
+// good as infinite timeout.
+static const int64_t BIG_TIMEOUT= MICROSECS * 60 * 60 * 24 * 7 * 365;
+
+Rdb_cond_var::Rdb_cond_var() {
+ mysql_cond_init(0, &m_cond, nullptr);
+}
+
+Rdb_cond_var::~Rdb_cond_var() {
+ mysql_cond_destroy(&m_cond);
+}
+
+Status Rdb_cond_var::Wait(std::shared_ptr<TransactionDBMutex> mutex_arg) {
+ return WaitFor(mutex_arg, BIG_TIMEOUT);
+}
+
+
+/*
+ @brief
+ Wait on condition variable. The caller must make sure that we own
+ *mutex_ptr. The mutex is released and re-acquired by the wait function.
+
+ @param
+ timeout_micros Timeout in microseconds. Negative value means no timeout.
+
+ @return
+ Status::OK() - Wait successfull
+ Status::TimedOut() - Timed out or wait killed (the caller can check
+ thd_killed() to determine which occurred)
+*/
+
+Status
+Rdb_cond_var::WaitFor(std::shared_ptr<TransactionDBMutex> mutex_arg,
+ int64_t timeout_micros)
+{
+ auto *mutex_obj= reinterpret_cast<Rdb_mutex*>(mutex_arg.get());
+ DBUG_ASSERT(mutex_obj != nullptr);
+
+ mysql_mutex_t * const mutex_ptr= &mutex_obj->m_mutex;
+
+ int res= 0;
+ struct timespec wait_timeout;
+
+ if (timeout_micros < 0)
+ timeout_micros= BIG_TIMEOUT;
+ set_timespec_nsec(wait_timeout, timeout_micros*1000);
+
+#ifndef STANDALONE_UNITTEST
+ PSI_stage_info old_stage;
+ mysql_mutex_assert_owner(mutex_ptr);
+
+ if (current_thd && mutex_obj->m_old_stage_info.count(current_thd) == 0)
+ {
+ my_core::thd_enter_cond(current_thd, &m_cond, mutex_ptr,
+ &stage_waiting_on_row_lock2, &old_stage);
+ /*
+ After the mysql_cond_timedwait we need make this call
+
+ my_core::thd_exit_cond(thd, &old_stage);
+
+ to inform the SQL layer that KILLable wait has ended. However,
+ that will cause mutex to be released. Defer the release until the mutex
+ that is unlocked by RocksDB's Pessimistic Transactions system.
+ */
+ mutex_obj->set_unlock_action(&old_stage);
+ }
+
+#endif
+ bool killed= false;
+
+ do
+ {
+ res= mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout);
+
+#ifndef STANDALONE_UNITTEST
+ if (current_thd)
+ killed= my_core::thd_killed(current_thd);
+#endif
+ } while (!killed && res == EINTR);
+
+ if (res || killed)
+ return Status::TimedOut();
+ else
+ return Status::OK();
+}
+
+
+/*
+
+ @note
+ This function may be called while not holding the mutex that is used to wait
+ on the condition variable.
+
+ The manual page says ( http://linux.die.net/man/3/pthread_cond_signal):
+
+ The pthread_cond_broadcast() or pthread_cond_signal() functions may be called
+ by a thread whether or not it currently owns the mutex that threads calling
+ pthread_cond_wait() or pthread_cond_timedwait() have associated with the
+ condition variable during their waits; however, IF PREDICTABLE SCHEDULING
+ BEHAVIOR IS REQUIRED, THEN THAT MUTEX SHALL BE LOCKED by the thread calling
+ pthread_cond_broadcast() or pthread_cond_signal().
+
+ What's "predicate scheduling" and do we need it? The explanation is here:
+
+ https://groups.google.com/forum/?hl=ky#!msg/comp.programming.threads/wEUgPq541v8/ZByyyS8acqMJ
+ "The problem (from the realtime side) with condition variables is that
+ if you can signal/broadcast without holding the mutex, and any thread
+ currently running can acquire an unlocked mutex and check a predicate
+ without reference to the condition variable, then you can have an
+ indirect priority inversion."
+
+ Another possible consequence is that one can create spurious wake-ups when
+ there are multiple threads signaling the condition.
+
+ None of this looks like a problem for our use case.
+*/
+
+void Rdb_cond_var::Notify()
+{
+ mysql_cond_signal(&m_cond);
+}
+
+
+/*
+ @note
+ This is called without holding the mutex that's used for waiting on the
+ condition. See ::Notify().
+*/
+void Rdb_cond_var::NotifyAll()
+{
+ mysql_cond_broadcast(&m_cond);
+}
+
+
+Rdb_mutex::Rdb_mutex()
+{
+ mysql_mutex_init(0 /* Don't register in P_S. */, &m_mutex,
+ MY_MUTEX_INIT_FAST);
+}
+
+Rdb_mutex::~Rdb_mutex() {
+ mysql_mutex_destroy(&m_mutex);
+}
+
+Status Rdb_mutex::Lock() {
+ mysql_mutex_lock(&m_mutex);
+ DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0);
+ return Status::OK();
+}
+
+// Attempt to acquire lock. If timeout is non-negative, operation may be
+// failed after this many milliseconds.
+// If implementing a custom version of this class, the implementation may
+// choose to ignore the timeout.
+// Return OK on success, or other Status on failure.
+Status Rdb_mutex::TryLockFor(int64_t timeout_time __attribute__((__unused__)))
+{
+ /*
+ Note: PThreads API has pthread_mutex_timedlock(), but mysql's
+ mysql_mutex_* wrappers do not wrap that function.
+ */
+ mysql_mutex_lock(&m_mutex);
+ return Status::OK();
+}
+
+
+#ifndef STANDALONE_UNITTEST
+void Rdb_mutex::set_unlock_action(PSI_stage_info *old_stage_arg)
+{
+ DBUG_ASSERT(old_stage_arg != nullptr);
+
+ mysql_mutex_assert_owner(&m_mutex);
+ DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0);
+
+ m_old_stage_info[current_thd] =
+ std::make_shared<PSI_stage_info>(*old_stage_arg);
+}
+#endif
+
+// Unlock Mutex that was successfully locked by Lock() or TryLockUntil()
+void Rdb_mutex::UnLock() {
+#ifndef STANDALONE_UNITTEST
+ if (m_old_stage_info.count(current_thd) > 0)
+ {
+ std::shared_ptr<PSI_stage_info> old_stage = m_old_stage_info[current_thd];
+ m_old_stage_info.erase(current_thd);
+ /* The following will call mysql_mutex_unlock */
+ my_core::thd_exit_cond(current_thd, old_stage.get());
+ return;
+ }
+#endif
+ mysql_mutex_unlock(&m_mutex);
+}
+
+} // namespace myrocks