summaryrefslogtreecommitdiff
path: root/storage/innobase/lock
diff options
context:
space:
mode:
authorsensssz <hjmsens@gmail.com>2016-10-17 21:56:05 -0400
committersensssz <hjmsens@gmail.com>2016-10-19 01:42:10 -0400
commitc455898b2c9aaaa9da2def86523f8a9c5016f49f (patch)
tree29505f8dc123d41b6703e1d7944bda47ff99bd86 /storage/innobase/lock
parent8303aded294ce905bbc513e7ee42623d5f1fdb50 (diff)
downloadmariadb-git-c455898b2c9aaaa9da2def86523f8a9c5016f49f.tar.gz
Implement VATS in XtraDB and InnoDB.
Diffstat (limited to 'storage/innobase/lock')
-rw-r--r--storage/innobase/lock/lock0lock.cc241
1 files changed, 220 insertions, 21 deletions
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index e3299e2d74b..7db94b143be 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -30,6 +30,7 @@ Created 5/7/1996 Heikki Tuuri
#include "ha_prototypes.h"
#include <mysql/service_thd_error_context.h>
+#include <sql_class.h>
#include "lock0lock.h"
#include "lock0priv.h"
@@ -58,6 +59,9 @@ Created 5/7/1996 Heikki Tuuri
#include <mysql/service_wsrep.h>
#endif /* WITH_WSREP */
+/** Lock scheduling algorithm */
+ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
+
/** Total number of cached record locks */
static const ulint REC_LOCK_CACHE = 8;
@@ -70,10 +74,29 @@ static const ulint TABLE_LOCK_CACHE = 8;
/** Size in bytes, of the table lock instance */
static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t);
+/*********************************************************************//**
+Checks if a waiting record lock request still has to wait in a queue.
+@return lock that is causing the wait */
+static
+const lock_t*
+lock_rec_has_to_wait_in_queue(
+/*==========================*/
+ const lock_t* wait_lock); /*!< in: waiting record lock */
+
+/* Buffer to collect THDs to report waits for. */
+struct thd_wait_reports {
+ struct thd_wait_reports *next; /*!< List link */
+ ulint used; /*!< How many elements in waitees[] */
+ trx_t *waitees[64]; /*!< Trxs for thd_report_wait_for() */
+};
+
extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
+extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
+
+
/** Deadlock checker. */
class DeadlockChecker {
public:
@@ -1869,6 +1892,85 @@ RecLock::create(
return(lock);
}
+/*********************************************************************//**
+Check if lock1 has higher priority than lock2.
+NULL has lowest priority.
+If either is a high priority transaction, the lock has higher priority.
+If neither of them is wait lock, the first one has higher priority.
+If only one of them is a wait lock, it has lower priority.
+Otherwise, the one with an older transaction has higher priority.
+@returns true if lock1 has higher priority, false otherwise. */
+bool
+has_higher_priority(
+ lock_t *lock1,
+ lock_t *lock2)
+{
+ if (lock1 == NULL) {
+ return false;
+ } else if (lock2 == NULL) {
+ return true;
+ }
+ // Ask the upper server layer if any of the two trx should be prefered.
+ int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, lock2->trx->mysql_thd);
+ if (preference == -1) {
+ // lock1 is preferred as a victim, so lock2 has higher priority
+ return false;
+ } else if (preference == 1) {
+ // lock2 is preferred as a victim, so lock1 has higher priority
+ return true;
+ }
+ // No preference. Compre them by wait mode and trx age.
+ if (!lock_get_wait(lock1)) {
+ return true;
+ } else if (!lock_get_wait(lock2)) {
+ return false;
+ }
+ return lock1->trx->start_time < lock2->trx->start_time;
+}
+
+/*********************************************************************//**
+Insert a lock to the hash list according to the mode (whether it is a wait
+lock) and the age of the transaction the it is associated with.
+If the lock is not a wait lock, insert it to the head of the hash list.
+Otherwise, insert it to the middle of the wait locks according to the age of
+the transaciton. */
+static
+void
+lock_rec_insert_by_trx_age(
+ lock_t *in_lock, /*!< in: lock to be insert */
+ bool wait) /*!< in: whether it's a wait lock */
+{
+ ulint space;
+ ulint page_no;
+ ulint rec_fold;
+ hash_table_t* hash;
+ hash_cell_t* cell;
+ lock_t* node;
+ lock_t* next;
+
+ space = in_lock->un_member.rec_lock.space;
+ page_no = in_lock->un_member.rec_lock.page_no;
+ rec_fold = lock_rec_fold(space, page_no);
+ hash = lock_hash_get(in_lock->type_mode);
+ cell = hash_get_nth_cell(hash,
+ hash_calc_hash(rec_fold, hash));
+
+ node = (lock_t *) cell->node;
+ // If in_lock is not a wait lock, we insert it to the head of the list.
+ if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
+ cell->node = in_lock;
+ in_lock->hash = node;
+ return;
+ }
+ while (node != NULL && has_higher_priority((lock_t *) node->hash,
+ in_lock)) {
+ node = (lock_t *) node->hash;
+ }
+ next = (lock_t *) node->hash;
+ node->hash = in_lock;
+ in_lock->hash = next;
+}
+
/**
Check the outcome of the deadlock check
@param[in,out] victim_trx Transaction selected for rollback
@@ -1891,7 +1993,23 @@ RecLock::check_deadlock_result(const trx_t* victim_trx, lock_t* lock)
return(DB_DEADLOCK);
- } else if (m_trx->lock.wait_lock == NULL) {
+ }
+
+ // Move it only when it does not cause a deadlock.
+ if (innodb_lock_schedule_algorithm
+ == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
+ && !trx_is_high_priority(lock->trx)) {
+
+ HASH_DELETE(lock_t, hash, lock_hash_get(lock->type_mode),
+ m_rec_id.fold(), lock);
+ lock_rec_insert_by_trx_age(lock, m_mode & LOCK_WAIT);
+ if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) {
+ lock_reset_lock_and_trx_wait(lock);
+ return DB_SUCCESS_LOCKED_REC;
+ }
+ }
+
+ if (m_trx->lock.wait_lock == NULL) {
/* If there was a deadlock but we chose another
transaction as a victim, it is possible that we
@@ -2776,6 +2894,32 @@ lock_rec_cancel(
}
/*************************************************************//**
+Move the lock to the head of the hash list. */
+static
+void
+lock_rec_move_to_front(
+ lock_t *lock_to_move, /*!< in: lock to be moved */
+ ulint rec_fold) /*!< in: rec fold of the lock */
+{
+ hash_table_t* lock_hash;
+ hash_cell_t* cell;
+ lock_t* next;
+
+ if (lock_to_move != NULL)
+ {
+ lock_hash = lock_hash_get(lock_to_move->type_mode);
+ // Move the target lock to the head of the list
+ cell = hash_get_nth_cell(lock_hash,
+ hash_calc_hash(rec_fold, lock_hash));
+ if (lock_to_move != cell->node) {
+ next = (lock_t *) cell->node;
+ cell->node = lock_to_move;
+ lock_to_move->hash = next;
+ }
+ }
+}
+
+/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue and
grants locks to other transactions in the queue if they now are entitled
to a lock. NOTE: all record locks contained in in_lock are removed. */
@@ -2791,6 +2935,8 @@ lock_rec_dequeue_from_page(
{
ulint space;
ulint page_no;
+ ulint rec_fold;
+ lock_t* previous = NULL;
lock_t* lock;
trx_lock_t* trx_lock;
hash_table_t* lock_hash;
@@ -2803,6 +2949,7 @@ lock_rec_dequeue_from_page(
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
+ rec_fold = lock_rec_fold(space, page_no);
ut_ad(in_lock->index->table->n_rec_locks > 0);
in_lock->index->table->n_rec_locks--;
@@ -2817,33 +2964,83 @@ lock_rec_dequeue_from_page(
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK);
- /* Check if waiting locks in the queue can now be granted: grant
- locks if there are no conflicting locks ahead. Stop at the first
- X lock that is waiting or has been granted. */
+ if (innodb_lock_schedule_algorithm
+ == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
- for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
- lock != NULL;
- lock = lock_rec_get_next_on_page(lock)) {
+ /* Check if waiting locks in the queue can now be granted:
+ grant locks if there are no conflicting locks ahead. Stop at
+ the first X lock that is waiting or has been granted. */
- if (lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
+ for (lock = lock_rec_get_first_on_page_addr(lock_hash, space,
+ page_no);
+ lock != NULL;
+ lock = lock_rec_get_next_on_page(lock)) {
- /* Grant the lock */
- ut_ad(lock->trx != in_lock->trx);
+ if (lock_get_wait(lock)
+ && !lock_rec_has_to_wait_in_queue(lock)) {
- bool exit_trx_mutex = false;
+ /* Grant the lock */
+ ut_ad(lock->trx != in_lock->trx);
- if (lock->trx->abort_type != TRX_SERVER_ABORT) {
- ut_ad(trx_mutex_own(lock->trx));
- trx_mutex_exit(lock->trx);
- exit_trx_mutex = true;
+ bool exit_trx_mutex = false;
+
+ if (lock->trx->abort_type != TRX_SERVER_ABORT) {
+ ut_ad(trx_mutex_own(lock->trx));
+ trx_mutex_exit(lock->trx);
+ exit_trx_mutex = true;
+ }
+
+ lock_grant(lock);
+
+ if (exit_trx_mutex) {
+ ut_ad(!trx_mutex_own(lock->trx));
+ trx_mutex_enter(lock->trx);
+ }
}
+ }
+ } else {
+ /* Grant locks if there are no conflicting locks ahead.
+ Move granted locks to the head of the list. */
+ for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
+ lock != NULL;) {
+
+ /* If the lock is a wait lock on this page, and it does not need to wait. */
+ if ((lock->un_member.rec_lock.space == space)
+ && (lock->un_member.rec_lock.page_no == page_no)
+ && lock_get_wait(lock)
+ && !lock_rec_has_to_wait_in_queue(lock)) {
+
+ /* Grant the lock */
+ ut_ad(lock->trx != in_lock->trx);
+
+ bool exit_trx_mutex = false;
+
+ if (lock->trx->abort_type != TRX_SERVER_ABORT) {
+ ut_ad(trx_mutex_own(lock->trx));
+ trx_mutex_exit(lock->trx);
+ exit_trx_mutex = true;
+ }
- lock_grant(lock);
+ lock_grant(lock);
- if (exit_trx_mutex) {
- ut_ad(!trx_mutex_own(lock->trx));
- trx_mutex_enter(lock->trx);
+ if (exit_trx_mutex) {
+ ut_ad(!trx_mutex_own(lock->trx));
+ trx_mutex_enter(lock->trx);
+ }
+
+ if (previous != NULL) {
+ /* Move the lock to the head of the list. */
+ HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
+ lock_rec_move_to_front(lock, rec_fold);
+ } else {
+ /* Already at the head of the list. */
+ previous = lock;
+ }
+ /* Move on to the next lock. */
+ lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
+ } else {
+ previous = lock;
+ lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
@@ -7775,7 +7972,9 @@ DeadlockChecker::get_first_lock(ulint* heap_no) const
/* Must find at least two locks, otherwise there cannot be a
waiting lock, secondly the first lock cannot be the wait_lock. */
ut_a(lock != NULL);
- ut_a(lock != m_wait_lock);
+ ut_a(lock != m_wait_lock ||
+ (innodb_lock_schedule_algorithm
+ == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS));
/* Check that the lock type doesn't change. */
ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));