summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Golubchik <serg@mysql.com>2008-08-28 14:43:44 +0200
committerSergei Golubchik <serg@mysql.com>2008-08-28 14:43:44 +0200
commit942651ea6cc2b7537aa45ff1d55d64be4e191a16 (patch)
tree32ccde19f332392c00a45b5bc98fd65e810999e0
parentca23272e1e53e195169bec0609eb0168722e1879 (diff)
downloadmariadb-git-942651ea6cc2b7537aa45ff1d55d64be4e191a16.tar.gz
wt: comments, OOM checks, test case for deadlock detection
include/waiting_threads.h: make wt_thd_dontwait private mysql-test/r/maria.result: deadlock example mysql-test/t/maria.test: deadlock example mysys/waiting_threads.c: comments, OOM checks sql/mysqld.cc: fix variables sql/sql_class.cc: move wt_lazy_init to THD constructor sql/sql_class.h: move wt_lazy_init to THD constructor storage/maria/ha_maria.cc: backport from 6.0 storage/maria/ma_write.c: poset-review fixes, set thd->proc_info storage/maria/trnman.c: bugfixing storage/myisam/mi_check.c: warnings storage/myisam/mi_page.c: warnings storage/myisam/mi_search.c: warnings storage/myisammrg/myrg_create.c: warnings unittest/mysys/waiting_threads-t.c: fixes
-rw-r--r--include/waiting_threads.h1
-rw-r--r--mysql-test/r/maria.result15
-rw-r--r--mysql-test/t/maria.test27
-rw-r--r--mysys/waiting_threads.c352
-rw-r--r--sql/mysqld.cc8
-rw-r--r--sql/sql_class.cc4
-rw-r--r--sql/sql_class.h5
-rw-r--r--storage/maria/ha_maria.cc3
-rw-r--r--storage/maria/ma_write.c52
-rw-r--r--storage/maria/trnman.c16
-rw-r--r--storage/myisam/mi_check.c12
-rw-r--r--storage/myisam/mi_page.c2
-rw-r--r--storage/myisam/mi_search.c6
-rw-r--r--storage/myisammrg/myrg_create.c2
-rw-r--r--unittest/mysys/waiting_threads-t.c12
15 files changed, 400 insertions, 117 deletions
diff --git a/include/waiting_threads.h b/include/waiting_threads.h
index 154fb19800b..a97bc09f576 100644
--- a/include/waiting_threads.h
+++ b/include/waiting_threads.h
@@ -154,7 +154,6 @@ void wt_end(void);
void wt_thd_lazy_init(WT_THD *, ulong *, ulong *, ulong *, ulong *);
void wt_thd_destroy(WT_THD *);
int wt_thd_will_wait_for(WT_THD *, WT_THD *, WT_RESOURCE_ID *);
-int wt_thd_dontwait(WT_THD *);
int wt_thd_cond_timedwait(WT_THD *, pthread_mutex_t *);
void wt_thd_release(WT_THD *, WT_RESOURCE_ID *);
#define wt_thd_release_all(THD) wt_thd_release((THD), 0)
diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result
index 8fcdcd67ee8..30b3f9c6dc4 100644
--- a/mysql-test/r/maria.result
+++ b/mysql-test/r/maria.result
@@ -1900,3 +1900,18 @@ check table t2 extended;
Table Op Msg_type Msg_text
test.t2 check status OK
drop table t2;
+set session deadlock_timeout_long=60000000;
+create table t1 (a int unique) transactional=1;
+insert t1 values (1);
+lock table t1 write concurrent;
+insert t1 values (2);
+set session deadlock_timeout_long=60000000;
+lock table t1 write concurrent;
+insert t1 values (3);
+insert t1 values (2);
+insert t1 values (3);
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+unlock tables;
+ERROR 23000: Duplicate entry '2' for key 'a'
+unlock tables;
+drop table t1;
diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test
index fa7529f12ab..a65916c1326 100644
--- a/mysql-test/t/maria.test
+++ b/mysql-test/t/maria.test
@@ -1186,6 +1186,33 @@ insert into t2 values (repeat('x',28)), (repeat('p',21)), (repeat('k',241)),
check table t2 extended;
drop table t2;
+#
+# an example of a deadlock
+#
+set session deadlock_timeout_long=60000000;
+create table t1 (a int unique) transactional=1;
+insert t1 values (1);
+lock table t1 write concurrent;
+insert t1 values (2);
+connect(con_d,localhost,root,,);
+set session deadlock_timeout_long=60000000;
+lock table t1 write concurrent;
+insert t1 values (3);
+send insert t1 values (2);
+connection default;
+let $wait_condition=select count(*) = 1 from information_schema.processlist where state="waiting for a resource";
+--source include/wait_condition.inc
+--error ER_LOCK_DEADLOCK
+insert t1 values (3);
+unlock tables;
+connection con_d;
+--error ER_DUP_ENTRY
+reap;
+unlock tables;
+disconnect con_d;
+connection default;
+drop table t1;
+
--disable_result_log
--disable_query_log
eval set global storage_engine=$default_engine, maria_page_checksum=$default_checksum;
diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c
index 61b89f7eb64..78cea6c9673 100644
--- a/mysys/waiting_threads.c
+++ b/mysys/waiting_threads.c
@@ -14,6 +14,77 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
+ "waiting threads" subsystem - a unified interface for threads to wait
+ on each other, with built-in deadlock detection.
+
+ Main concepts
+ ^^^^^^^^^^^^^
+ a thread - is represented by a WT_THD structure. One physical thread
+ can have only one WT_THD descriptor.
+
+ a resource - a thread does not wait for other threads directly,
+ instead it waits for a "resource", which is "owned" by other threads.
+ It waits, exactly, for all "owners" to "release" a resource.
+ It does not have to correspond to a physical resource. For example, it
+ may be convenient in certain cases to force resource == thread.
+ A resource is represented by a WT_RESOURCE structure.
+
+ a resource identifier - a pair of {resource type, value}. A value is
+ either a ulonglong number or a pointer (it's a union).
+ WT_RESOURCE_ID structure.
+
+ a resource type - a pointer to a statically defined instance of
+ WT_RESOURCE_TYPE structure. This structure contains a pointer to
+ a function that knows how to compare values of this resource type.
+ In the simple case it could be wt_resource_id_memcmp().
+
+ Usage
+ ^^^^^
+ to use the interface one needs to use this thread's WT_THD,
+ call wt_thd_will_wait_for() for every thread it needs to wait on,
+ then call wt_thd_cond_timedwait(). When thread releases a resource
+ it should call wt_thd_release() (or wt_thd_release_all()) - it will
+ notify (send a signal) threads waiting in wt_thd_cond_timedwait(),
+ if appropriate.
+
+ Just like with pthread's cond_wait, there could be spurious
+ wake-ups from wt_thd_cond_timedwait(). A caller is expected to
+ handle that.
+
+ wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either
+ WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return
+ WT_TIMEOUT. Out of memory and other fatal errors are reported as
+ WT_DEADLOCK - and a transaction must be aborted just the same.
+
+ Configuration
+ ^^^^^^^^^^^^^
+ There are four config variables. Two deadlock search depths - short and
+ long - and two timeouts. Deadlock search is performed with the short
+ depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait()
+ waits with a short timeout, performs a deadlock search with the long
+ depth, and waits with a long timeout. As most deadlock cycles are supposed
+ to be short, most deadlocks will be detected at once, and waits will
+ rarely be necessary.
+
+ These config variables are thread-local. Different threads may have
+ different search depth and timeout values.
+
+ Also, deadlock detector supports different killing strategies, the victim
+ in a deadlock cycle is selected based on the "weight". See "weight"
+ description in waiting_threads.h for details. It's up to the caller to
+ set weights accordingly.
+
+ Status
+ ^^^^^^
+ We calculate the number of successfull waits (WT_OK returned from
+ wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle
+ length distribution - number of deadlocks with every length from
+ 1 to WT_CYCLE_STATS, and a wait time distribution - number
+ of waits with a time from 1 us to 1 min in WT_CYCLE_STATS
+ intervals on a log scale.
+*/
+
+/*
Note that if your lock system satisfy the following condition:
there exist four lock levels A, B, C, D, such as
@@ -114,8 +185,18 @@ static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock;
pthread_rwlock_unlock(&R->lock); \
} while (0)
+/*
+ All resources are stored in a lock-free hash. Different threads
+ may add new resources and perform deadlock detection concurrently.
+*/
static LF_HASH reshash;
+/**
+ WT_RESOURCE constructor
+
+ It's called from lf_hash and takes an offset to LF_SLIST instance.
+ WT_RESOURCE is located at arg+sizeof(LF_SLIST)
+*/
static void wt_resource_init(uchar *arg)
{
WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
@@ -124,10 +205,16 @@ static void wt_resource_init(uchar *arg)
bzero(rc, sizeof(*rc));
pthread_rwlock_init(&rc->lock, 0);
pthread_cond_init(&rc->cond, 0);
- my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 5, 5);
+ my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5);
DBUG_VOID_RETURN;
}
+/**
+ WT_RESOURCE destructor
+
+ It's called from lf_hash and takes an offset to LF_SLIST instance.
+ WT_RESOURCE is located at arg+sizeof(LF_SLIST)
+*/
static void wt_resource_destroy(uchar *arg)
{
WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
@@ -159,7 +246,7 @@ void wt_init()
bzero(wt_wait_stats, sizeof(wt_wait_stats));
bzero(wt_cycle_stats, sizeof(wt_cycle_stats));
wt_success_stats=0;
- {
+ { /* initialize wt_wait_table[]. from 1 us to 1 min, log scale */
int i;
double from=log(1); /* 1 us */
double to=log(60e6); /* 1 min */
@@ -187,17 +274,20 @@ void wt_end()
DBUG_VOID_RETURN;
}
-static void fix_thd_pins(WT_THD *thd)
-{
- if (unlikely(thd->pins == 0))
- {
- thd->pins=lf_hash_get_pins(&reshash);
-#ifndef DBUG_OFF
- thd->name=my_thread_name();
-#endif
- }
-}
+/**
+ Lazy WT_THD initialization
+
+ Cheap initialization of WT_THD. Only initialized fields that don't require
+ memory allocations - basically, it only does assignments. The rest of the
+ WT_THD structure will be initialized on demand, on the first use.
+ This allows one to initialize lazily all WT_THD structures, even if some
+ (or even most) of them will never be used for deadlock detection.
+ @param ds a pointer to deadlock search depth short value
+ @param ts a pointer to deadlock timeout short value
+ @param dl a pointer to deadlock search depth long value
+ @param tl a pointer to deadlock timeout long value
+*/
void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl)
{
DBUG_ENTER("wt_thd_lazy_init");
@@ -209,6 +299,7 @@ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl)
thd->timeout_short= ts;
thd->deadlock_search_depth_long= dl;
thd->timeout_long= tl;
+ /* dynamic array is also initialized lazily - without memory allocations */
my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5);
#ifndef DBUG_OFF
thd->name=my_thread_name();
@@ -216,6 +307,26 @@ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl)
DBUG_VOID_RETURN;
}
+/**
+ Finalize WT_THD initialization
+
+ After lazy WT_THD initialization, parts of the structure are still
+ uninitialized. This function completes the initialization, allocating
+ memory, if necessary. It's called automatically on demand, when WT_THD
+ is about to be used.
+*/
+static int fix_thd_pins(WT_THD *thd)
+{
+ if (unlikely(thd->pins == 0))
+ {
+ thd->pins=lf_hash_get_pins(&reshash);
+#ifndef DBUG_OFF
+ thd->name=my_thread_name();
+#endif
+ }
+ return thd->pins == 0;
+}
+
void wt_thd_destroy(WT_THD *thd)
{
DBUG_ENTER("wt_thd_destroy");
@@ -229,19 +340,30 @@ void wt_thd_destroy(WT_THD *thd)
thd->waiting_for=0;
DBUG_VOID_RETURN;
}
+/**
+ Trivial resource id comparison function - bytewise memcmp.
+ It can be used in WT_RESOURCE_TYPE structures where bytewise
+ comparison of values is sufficient.
+*/
int wt_resource_id_memcmp(void *a, void *b)
{
return memcmp(a, b, sizeof(WT_RESOURCE_ID));
}
+/**
+ arguments for the recursive deadlock_search function
+*/
struct deadlock_arg {
- WT_THD *thd;
- uint max_depth;
- WT_THD *victim;
- WT_RESOURCE *rc;
+ WT_THD *thd; /**< starting point of a search */
+ uint max_depth; /**< search depth limit */
+ WT_THD *victim; /**< a thread to be killed to resolve a deadlock */
+ WT_RESOURCE *rc; /**< see comment at the end of deadlock_search() */
};
+/**
+ helper function to change the victim, according to the weight
+*/
static void change_victim(WT_THD* found, struct deadlock_arg *arg)
{
if (found->weight < arg->victim->weight)
@@ -256,8 +378,8 @@ static void change_victim(WT_THD* found, struct deadlock_arg *arg)
}
}
-/*
- loop detection in a wait-for graph with a limited search depth.
+/**
+ recursive loop detection in a wait-for graph with a limited search depth
*/
static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker,
uint depth)
@@ -301,11 +423,41 @@ retry:
lf_unpin(arg->thd->pins, 0);
goto retry;
}
+ /* as the state is locked, we can unpin now */
lf_unpin(arg->thd->pins, 0);
+ /*
+ Below is not a pure depth-first search. It's a depth-first with a
+ slightest hint of breadth-first. Depth-first is:
+
+ check(element):
+ foreach current in element->nodes[] do:
+ if current == element return error;
+ check(current);
+
+ while we do
+
+ check(element):
+ foreach current in element->nodes[] do:
+ if current == element return error;
+ foreach current in element->nodes[] do:
+ check(current);
+ */
for (i=0; i < rc->owners.elements; i++)
{
cursor= *dynamic_element(&rc->owners, i, WT_THD**);
+ /*
+ We're only looking for (and detecting) cycles that include 'arg->thd'.
+ That is, only deadlocks that *we* have created. For example,
+ thd->A->B->thd
+ (thd waits for A, A waits for B, while B is waiting for thd).
+ While walking the graph we can encounter other cicles, e.g.
+ thd->A->B->C->A
+ This will not be detected. Instead we will walk it in circles until
+ the search depth limit is reached (the latter guarantees that an
+ infinite loop is impossible). We expect the thread that has created
+ the cycle (one of A, B, and C) to detect its deadlock.
+ */
if (cursor == arg->thd)
{
ret= WT_DEADLOCK;
@@ -319,16 +471,15 @@ retry:
{
cursor= *dynamic_element(&rc->owners, i, WT_THD**);
switch (deadlock_search(arg, cursor, depth+1)) {
+ case WT_OK:
+ break;
case WT_DEPTH_EXCEEDED:
ret= WT_DEPTH_EXCEEDED;
break;
case WT_DEADLOCK:
ret= WT_DEADLOCK;
- change_victim(cursor, arg);
- if (arg->rc)
- rc_unlock(arg->rc);
- goto end;
- case WT_OK:
+ change_victim(cursor, arg); /* also sets arg->rc to 0 */
+ i= rc->owners.elements; /* jump out of the loop */
break;
default:
DBUG_ASSERT(0);
@@ -337,6 +488,34 @@ retry:
rc_unlock(arg->rc);
}
end:
+ /*
+ Note that 'rc' is locked in this function, but it's never unlocked there.
+ Instead it's saved in arg->rc and the *caller* is expected to unlock it.
+ It's done to support different killing strategies. This is how it works:
+ Assuming a graph
+
+ thd->A->B->C->thd
+
+ deadlock_search() function starts from thd, locks it (in fact it locks not
+ a thd, but a resource it is waiting on, but below, for simplicity, I'll
+ talk about "locking a thd"). Then it goes down recursively, locks A, and so
+ on. Goes down recursively, locks B. Goes down recursively, locks C.
+ Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd.
+ Returns from the last deadlock_search() call. C stays locked!
+ Now it checks whether C is a more appropriate victim then 'thd'.
+ If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked.
+ Now it checks whether B is a more appropriate victim then arg->victim.
+ If yes - old arg->victim is unlocked and arg->victim=B,
+ otherwise B is unlocked. Return.
+ And so on.
+
+ In short, a resource is locked in a frame. But it's not unlocked in the
+ same frame, it's unlocked by the caller, and only after the caller checks
+ that it doesn't need to use current WT_THD as a victim. If it does - the
+ lock is kept and the old victim's resource is unlocked. When the recursion
+ is unrolled and we are back to deadlock() function, there are only two
+ locks left - on thd and on the victim.
+ */
arg->rc= rc;
DBUG_PRINT("wt", ("exit: %s",
ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" :
@@ -344,6 +523,25 @@ end:
DBUG_RETURN(ret);
}
+/**
+ Deadlock detection in a wait-for graph
+
+ A wrapper for recursive deadlock_search() - prepares deadlock_arg structure,
+ invokes deadlock_search(), increments statistics, notifies the victim.
+
+ @param thd thread that is going to wait. Deadlock is detected
+ if, while walking the graph, we reach a thread that
+ is waiting on thd
+ @param blocker starting point of a search. In wt_thd_cond_timedwait()
+ it's thd, in wt_thd_will_wait_for() it's a thread that
+ thd is going to wait for
+ @param depth starting search depth. In general it's the number of
+ edges in the wait-for graph between thd and the
+ blocker. Practically only two values are used (and
+ supported) - when thd == blocker it's 0, when thd
+ waits directly for blocker, it's 1
+ @param max_depth search depth limit
+*/
static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth,
uint max_depth)
{
@@ -357,10 +555,15 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth,
*thd->deadlock_search_depth_long);
ret= WT_OK;
}
+ /*
+ if we started with depth==1, blocker was never considered for a victim
+ in deadlock_search(). Do it here.
+ */
if (ret == WT_DEADLOCK && depth)
change_victim(blocker, &arg);
if (arg.rc)
rc_unlock(arg.rc);
+ /* notify the victim, if appropriate */
if (ret == WT_DEADLOCK && arg.victim != thd)
{
DBUG_PRINT("wt", ("killing %s", arg.victim->name));
@@ -373,11 +576,12 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth,
}
-/*
- Deletes an element from reshash.
+/**
+ Delete an element from reshash if it has no waiters or owners
+
rc->lock must be locked by the caller and it's unlocked on return.
*/
-static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
+static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
{
uint keylen;
const void *key;
@@ -390,10 +594,14 @@ static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
DBUG_PRINT("wt", ("nothing to do, %d owners, %d waiters",
rc->owners.elements, rc->waiter_count));
rc_unlock(rc);
- DBUG_VOID_RETURN;
+ DBUG_RETURN(0);
}
- fix_thd_pins(thd);
+ if (fix_thd_pins(thd))
+ {
+ rc_unlock(rc);
+ DBUG_RETURN(1);
+ }
/* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */
{
@@ -414,29 +622,40 @@ static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
*/
rc->state=FREE;
rc_unlock(rc);
- lf_hash_delete(&reshash, thd->pins, key, keylen);
- DBUG_VOID_RETURN;
+ DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1);
}
-int wt_thd_dontwait_locked(WT_THD *thd)
+/**
+ register the fact that thd is not waiting anymore
+
+ decrease waiter_count, clear waiting_for, free the resource if appropriate.
+ thd->waiting_for must be locked!
+*/
+static int stop_waiting_locked(WT_THD *thd)
{
+ int ret;
WT_RESOURCE *rc= thd->waiting_for;
- DBUG_ENTER("wt_thd_dontwait_locked");
+ DBUG_ENTER("stop_waiting_locked");
DBUG_ASSERT(rc->waiter_count);
DBUG_ASSERT(rc->state == ACTIVE);
rc->waiter_count--;
thd->waiting_for= 0;
- unlock_lock_and_free_resource(thd, rc);
- DBUG_RETURN(thd->killed ? WT_DEADLOCK : WT_OK);
+ ret= unlock_lock_and_free_resource(thd, rc);
+ DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK);
}
-int wt_thd_dontwait(WT_THD *thd)
+/**
+ register the fact that thd is not waiting anymore
+
+ locks thd->waiting_for and calls stop_waiting_locked().
+*/
+static int stop_waiting(WT_THD *thd)
{
int ret;
WT_RESOURCE *rc= thd->waiting_for;
- DBUG_ENTER("wt_thd_dontwait");
+ DBUG_ENTER("stop_waiting");
if (!rc)
DBUG_RETURN(WT_OK);
@@ -445,15 +664,20 @@ int wt_thd_dontwait(WT_THD *thd)
as its waiter_count is guaranteed to be non-zero
*/
rc_wrlock(rc);
- ret= wt_thd_dontwait_locked(thd);
+ ret= stop_waiting_locked(thd);
DBUG_RETURN(ret);
}
-/*
+/**
+ notify the system that a thread needs to wait for another thread
+
called by a *waiter* to declare what resource it will wait for.
can be called many times, if many blockers own a blocking resource.
but must always be called with the same resource id - a thread cannot
wait for more than one resource at a time.
+
+ As a new edge is added to the wait-for graph, a deadlock detection is
+ performed for this new edge.
*/
int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid)
{
@@ -466,7 +690,8 @@ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid)
DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu",
thd->name, blocker->name, resid->value.num));
- fix_thd_pins(thd);
+ if (fix_thd_pins(thd))
+ DBUG_RETURN(WT_DEADLOCK);
if (thd->waiting_for == 0)
{
@@ -487,14 +712,11 @@ retry:
DBUG_PRINT("wt", ("failed to find rc in hash, inserting"));
bzero(&tmp, sizeof(tmp));
- tmp.waiter_count= 0;
tmp.id= *resid;
tmp.state= ACTIVE;
-#ifndef DBUG_OFF
- tmp.mutex= 0;
-#endif
- lf_hash_insert(&reshash, thd->pins, &tmp);
+ if (lf_hash_insert(&reshash, thd->pins, &tmp) == -1) /* if OOM */
+ DBUG_RETURN(WT_DEADLOCK);
/*
Two cases: either lf_hash_insert() failed - because another thread
has just inserted a resource with the same id - and we need to retry.
@@ -504,6 +726,9 @@ retry:
And we need to repeat the loop anyway.
*/
}
+ if (rc == MY_ERRPTR)
+ DBUG_RETURN(WT_DEADLOCK);
+
DBUG_PRINT("wt", ("found in hash rc=%p", rc));
rc_wrlock(rc);
@@ -520,7 +745,6 @@ retry:
thd->waiting_for= rc;
rc->waiter_count++;
thd->killed= 0;
-
}
else
{
@@ -539,7 +763,7 @@ retry:
if (thd->killed)
{
- wt_thd_dontwait_locked(thd);
+ stop_waiting_locked(thd);
DBUG_RETURN(WT_DEADLOCK);
}
}
@@ -548,20 +772,29 @@ retry:
break;
if (i >= rc->owners.elements)
{
- push_dynamic(&blocker->my_resources, (void*)&rc);
- push_dynamic(&rc->owners, (void*)&blocker);
+ if (push_dynamic(&blocker->my_resources, (void*)&rc))
+ {
+ stop_waiting_locked(thd);
+ DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */
+ }
+ if (push_dynamic(&rc->owners, (void*)&blocker))
+ {
+ pop_dynamic(&blocker->my_resources);
+ stop_waiting_locked(thd);
+ DBUG_RETURN(WT_DEADLOCK);
+ }
}
rc_unlock(rc);
if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short))
{
- wt_thd_dontwait(thd);
+ stop_waiting(thd);
DBUG_RETURN(WT_DEADLOCK);
}
DBUG_RETURN(0);
}
-/*
+/**
called by a *waiter* to start waiting
It's supposed to be a drop-in replacement for
@@ -595,7 +828,7 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex)
#endif
rc_wrlock(rc);
- if (rc->owners.elements == 0 && thd->killed)
+ if (rc->owners.elements == 0 || thd->killed)
ret= WT_OK;
rc_unlock(rc);
@@ -614,7 +847,7 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex)
}
}
after= my_getsystime();
- if (wt_thd_dontwait(thd) == WT_DEADLOCK)
+ if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */
ret= WT_DEADLOCK;
increment_wait_stats(after-before, ret);
if (ret == WT_OK)
@@ -622,23 +855,24 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex)
DBUG_RETURN(ret);
}
-/*
+/**
called by a *blocker* when it releases a resource
- when resid==0 all resources will be freed
-
- Note: it's conceptually similar to pthread_cond_broadcast, and must be done
+ it's conceptually similar to pthread_cond_broadcast, and must be done
under the same mutex as wt_thd_cond_timedwait().
+
+ @param resid a resource to release. 0 to release all resources
*/
+
void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid)
{
- WT_RESOURCE *rc;
- uint i, j;
+ uint i;
DBUG_ENTER("wt_thd_release");
for (i=0; i < thd->my_resources.elements; i++)
{
- rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**);
+ uint j;
+ WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**);
if (!resid || (resid->type->compare(&rc->id, resid) == 0))
{
rc_wrlock(rc);
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 730cabc4bda..d2fc752ca11 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -5703,22 +5703,22 @@ struct my_option my_long_options[] =
{"deadlock-search-depth-short", OPT_DEADLOCK_SEARCH_DEPTH_SHORT,
"Short search depth for the two-step deadlock detection",
(uchar**) &global_system_variables.wt_deadlock_search_depth_short,
- (uchar**) &global_system_variables.wt_deadlock_search_depth_short,
+ (uchar**) &max_system_variables.wt_deadlock_search_depth_short,
0, GET_ULONG, REQUIRED_ARG, 4, 0, 32, 0, 0, 0},
{"deadlock-search-depth-long", OPT_DEADLOCK_SEARCH_DEPTH_LONG,
"Long search depth for the two-step deadlock detection",
(uchar**) &global_system_variables.wt_deadlock_search_depth_long,
- (uchar**) &global_system_variables.wt_deadlock_search_depth_long,
+ (uchar**) &max_system_variables.wt_deadlock_search_depth_long,
0, GET_ULONG, REQUIRED_ARG, 15, 0, 33, 0, 0, 0},
{"deadlock-timeout-short", OPT_DEADLOCK_TIMEOUT_SHORT,
"Short timeout for the two-step deadlock detection (in microseconds)",
(uchar**) &global_system_variables.wt_timeout_short,
- (uchar**) &global_system_variables.wt_timeout_short,
+ (uchar**) &max_system_variables.wt_timeout_short,
0, GET_ULONG, REQUIRED_ARG, 100, 0, ULONG_MAX, 0, 0, 0},
{"deadlock-timeout-long", OPT_DEADLOCK_TIMEOUT_LONG,
"Long timeout for the two-step deadlock detection (in microseconds)",
(uchar**) &global_system_variables.wt_timeout_long,
- (uchar**) &global_system_variables.wt_timeout_long,
+ (uchar**) &max_system_variables.wt_timeout_long,
0, GET_ULONG, REQUIRED_ARG, 10000, 0, ULONG_MAX, 0, 0, 0},
#ifndef DBUG_OFF
{"debug", '#', "Debug log.", (uchar**) &default_dbug_option,
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 7116c2bf71e..70947145ef2 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -586,6 +586,10 @@ THD::THD()
peer_port= 0; // For SHOW PROCESSLIST
transaction.m_pending_rows_event= 0;
transaction.on= 1;
+ wt_thd_lazy_init(&transaction.wt, &variables.wt_deadlock_search_depth_short,
+ &variables.wt_timeout_short,
+ &variables.wt_deadlock_search_depth_long,
+ &variables.wt_timeout_long);
#ifdef SIGNAL_WITH_VIO_CLOSE
active_vio = 0;
#endif
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 578ace6fee9..c4f48517be9 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1352,14 +1352,9 @@ public:
st_transactions()
{
#ifdef USING_TRANSACTIONS
- THD *thd=current_thd;
bzero((char*)this, sizeof(*this));
xid_state.xid.null();
init_sql_alloc(&mem_root, ALLOC_ROOT_MIN_BLOCK_SIZE, 0);
- wt_thd_lazy_init(&wt, &thd->variables.wt_deadlock_search_depth_short,
- &thd->variables.wt_timeout_short,
- &thd->variables.wt_deadlock_search_depth_long,
- &thd->variables.wt_timeout_long);
#else
xid_state.xa_state= XA_NOTR;
#endif
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 001aa1262ce..78edf7d6545 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -2346,7 +2346,8 @@ int ha_maria::external_lock(THD *thd, int lock_type)
This is a bit excessive, ACID requires this only if there are some
changes to commit (rollback shouldn't be tested).
*/
- DBUG_ASSERT(!thd->main_da.is_sent);
+ DBUG_ASSERT(!thd->main_da.is_sent ||
+ thd->killed == THD::KILL_CONNECTION);
/* autocommit ? rollback a transaction */
#ifdef MARIA_CANNOT_ROLLBACK
if (ma_commit(trn))
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
index 09391ad829c..8add20d46ef 100644
--- a/storage/maria/ma_write.c
+++ b/storage/maria/ma_write.c
@@ -182,23 +182,20 @@ int maria_write(MARIA_HA *info, uchar *record)
{
while (keyinfo->ck_insert(info,
(*keyinfo->make_key)(info, &int_key, i,
- buff, record, filepos,
- info->trn->trid)))
+ buff, record, filepos,
+ info->trn->trid)))
{
TRN *blocker;
DBUG_PRINT("error",("Got error: %d on write",my_errno));
/*
- explicit check for our own trid, because temp tables
- aren't transactional and don't have a proper TRN so the code
- below doesn't work for them
- XXX a better test perhaps ?
+ explicit check to filter out temp tables, they aren't
+ transactional and don't have a proper TRN so the code
+ below doesn't work for them.
+ Also, filter out non-thread maria use, and table modified in
+ the same transaction.
*/
- if (info->dup_key_trid == info->trn->trid)
- {
- if (local_lock_tree)
- rw_unlock(&keyinfo->root_lock);
+ if (!local_lock_tree || info->dup_key_trid == info->trn->trid)
goto err;
- }
blocker= trnman_trid_to_trn(info->trn, info->dup_key_trid);
/*
if blocker TRN was not found, it means that the conflicting
@@ -206,16 +203,16 @@ int maria_write(MARIA_HA *info, uchar *record)
aborted, as it would have to wait on the key tree lock
to remove the conflicting key it has inserted.
*/
- if (local_lock_tree)
+ if (!blocker || blocker->commit_trid != ~(TrID)0)
+ { /* committed */
+ if (blocker)
+ pthread_mutex_unlock(& blocker->state_lock);
rw_unlock(&keyinfo->root_lock);
- if (!blocker)
- goto err;
- if (blocker->commit_trid != ~(TrID)0)
- { /* committed, albeit recently */
- pthread_mutex_unlock(& blocker->state_lock);
goto err;
}
- { /* running. now we wait */
+ rw_unlock(&keyinfo->root_lock);
+ {
+ /* running. now we wait */
WT_RESOURCE_ID rc;
int res;
@@ -225,15 +222,26 @@ int maria_write(MARIA_HA *info, uchar *record)
if (res != WT_OK)
{
pthread_mutex_unlock(& blocker->state_lock);
+ my_errno= HA_ERR_LOCK_DEADLOCK;
goto err;
}
- res=wt_thd_cond_timedwait(info->trn->wt, & blocker->state_lock);
+ {
+ const char *old_proc_info= proc_info_hook(0,
+ "waiting for a resource", __func__, __FILE__, __LINE__);
+
+ res= wt_thd_cond_timedwait(info->trn->wt, & blocker->state_lock);
+
+ proc_info_hook(0, old_proc_info, __func__, __FILE__, __LINE__);
+ }
pthread_mutex_unlock(& blocker->state_lock);
if (res != WT_OK)
+ {
+ my_errno= res == WT_TIMEOUT ? HA_ERR_LOCK_WAIT_TIMEOUT
+ : HA_ERR_LOCK_DEADLOCK;
goto err;
+ }
}
- if (local_lock_tree)
- rw_wrlock(&keyinfo->root_lock);
+ rw_wrlock(&keyinfo->root_lock);
}
}
@@ -643,7 +651,7 @@ static int w_search(register MARIA_HA *info, uint32 comp_flag, MARIA_KEY *key,
{
DBUG_PRINT("warning", ("Duplicate key"));
/*
- FIXME
+ TODO
When the index will support true versioning - with multiple
identical values in the UNIQUE index, invisible to each other -
the following should be changed to "continue inserting keys, at the
diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c
index 42c32db3cab..4a9004596ae 100644
--- a/storage/maria/trnman.c
+++ b/storage/maria/trnman.c
@@ -89,6 +89,7 @@ static void wt_thd_release_self(TRN *trn)
rc.type= &ma_rc_dup_unique;
rc.value.ptr= trn;
wt_thd_release(trn->wt, & rc);
+ trn->wt= 0;
}
}
@@ -296,8 +297,8 @@ TRN *trnman_new_trn(WT_THD *wt)
}
trnman_allocated_transactions++;
pthread_mutex_init(&trn->state_lock, MY_MUTEX_INIT_FAST);
- trn->wt= wt;
}
+ trn->wt= wt;
trn->pins= lf_hash_get_pins(&trid_to_trn);
if (!trn->pins)
{
@@ -415,17 +416,17 @@ my_bool trnman_end_trn(TRN *trn, my_bool commit)
}
}
+ pthread_mutex_lock(&trn->state_lock);
+ trn->commit_trid= global_trid_generator;
+ wt_thd_release_self(trn);
+ pthread_mutex_unlock(&trn->state_lock);
+
/*
if transaction is committed and it was not the only active transaction -
add it to the committed list
*/
if (commit && active_list_min.next != &active_list_max)
{
- pthread_mutex_lock(&trn->state_lock);
- trn->commit_trid= global_trid_generator;
- wt_thd_release_self(trn);
- pthread_mutex_unlock(&trn->state_lock);
-
trn->next= &committed_list_max;
trn->prev= committed_list_max.prev;
trnman_committed_transactions++;
@@ -440,6 +441,7 @@ my_bool trnman_end_trn(TRN *trn, my_bool commit)
active_list_min.next != &active_list_max))
res= -1;
trnman_active_transactions--;
+
pthread_mutex_unlock(&LOCK_trn_list);
/* the rest is done outside of a critical section */
@@ -492,8 +494,6 @@ void trnman_free_trn(TRN *trn)
pthread_mutex_lock(&trn->state_lock);
trn->short_id= 0;
- wt_thd_release_self(trn);
- trn->wt= 0; /* just in case */
pthread_mutex_unlock(&trn->state_lock);
tmp.trn= pool;
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index d05d30543ed..b8dbdaa44c8 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -803,9 +803,9 @@ static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
(flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length,
comp_flag, diff_pos)) >=0)
{
- DBUG_DUMP("old",(uchar*) info->lastkey, info->lastkey_length);
- DBUG_DUMP("new",(uchar*) key, key_length);
- DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos));
+ DBUG_DUMP("old",info->lastkey, info->lastkey_length);
+ DBUG_DUMP("new",key, key_length);
+ DBUG_DUMP("new_in_page",old_keypos,(uint) (keypos-old_keypos));
if (comp_flag & SEARCH_FIND && flag == 0)
mi_check_print_error(param,"Found duplicated key at page %s",llstr(page,llbuff));
@@ -874,8 +874,8 @@ static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
DBUG_PRINT("test",("page: %s record: %s filelength: %s",
llstr(page,llbuff),llstr(record,llbuff2),
llstr(info->state->data_file_length,llbuff3)));
- DBUG_DUMP("key",(uchar*) key,key_length);
- DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos));
+ DBUG_DUMP("key",key,key_length);
+ DBUG_DUMP("new_in_page",old_keypos,(uint) (keypos-old_keypos));
goto err;
}
param->record_checksum+=(ha_checksum) record;
@@ -4026,7 +4026,7 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
DBUG_RETURN(1);
}
a_length=2+nod_flag;
- key_block->end_pos= (char*) anc_buff+2;
+ key_block->end_pos= anc_buff+2;
lastkey=0; /* No previous key in block */
}
else
diff --git a/storage/myisam/mi_page.c b/storage/myisam/mi_page.c
index 23a2526f756..3ea00fa44db 100644
--- a/storage/myisam/mi_page.c
+++ b/storage/myisam/mi_page.c
@@ -49,7 +49,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo,
{
DBUG_PRINT("error",("page %lu had wrong page length: %u",
(ulong) page, page_size));
- DBUG_DUMP("page", (char*) tmp, keyinfo->block_length);
+ DBUG_DUMP("page",tmp, keyinfo->block_length);
info->last_keypage = HA_OFFSET_ERROR;
mi_print_error(info->s, HA_ERR_CRASHED);
my_errno = HA_ERR_CRASHED;
diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c
index 1af5e8c5585..f7d8d2c901c 100644
--- a/storage/myisam/mi_search.c
+++ b/storage/myisam/mi_search.c
@@ -816,7 +816,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
DBUG_PRINT("error",
("Found too long null packed key: %u of %u at 0x%lx",
length, keyseg->length, (long) *page_pos));
- DBUG_DUMP("key",(char*) *page_pos,16);
+ DBUG_DUMP("key",*page_pos,16);
mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
return 0;
@@ -873,7 +873,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
{
DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx",
length, keyseg->length, (long) *page_pos));
- DBUG_DUMP("key",(char*) *page_pos,16);
+ DBUG_DUMP("key",*page_pos,16);
mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
return 0; /* Error */
@@ -945,7 +945,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
DBUG_PRINT("error",
("Found too long binary packed key: %u of %u at 0x%lx",
length, keyinfo->maxlength, (long) *page_pos));
- DBUG_DUMP("key",(char*) *page_pos,16);
+ DBUG_DUMP("key",*page_pos,16);
mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0); /* Wrong key */
diff --git a/storage/myisammrg/myrg_create.c b/storage/myisammrg/myrg_create.c
index df81b730bfd..eaed470daec 100644
--- a/storage/myisammrg/myrg_create.c
+++ b/storage/myisammrg/myrg_create.c
@@ -46,7 +46,7 @@ int myrg_create(const char *name, const char **table_names,
fn_same(buff,name,4);
*(end=strend(buff))='\n';
end[1]=0;
- if (my_write(file,(char*) buff,(uint) (end-buff+1),
+ if (my_write(file,(uchar*) buff,(uint) (end-buff+1),
MYF(MY_WME | MY_NABP)))
goto err;
}
diff --git a/unittest/mysys/waiting_threads-t.c b/unittest/mysys/waiting_threads-t.c
index 48ea0b36df6..b14f31a0432 100644
--- a/unittest/mysys/waiting_threads-t.c
+++ b/unittest/mysys/waiting_threads-t.c
@@ -214,12 +214,9 @@ void do_tests()
ok_wait(1,2,1);
ok_deadlock(2,0,2);
- // FIXME remove wt_thd_dontwait calls below
- wt_thd_dontwait(& thds[0].thd);
- wt_thd_dontwait(& thds[1].thd);
- wt_thd_dontwait(& thds[2].thd);
- wt_thd_dontwait(& thds[3].thd);
pthread_mutex_lock(&lock);
+ wt_thd_cond_timedwait(& thds[0].thd, &lock);
+ wt_thd_cond_timedwait(& thds[1].thd, &lock);
wt_thd_release_all(& thds[0].thd);
wt_thd_release_all(& thds[1].thd);
wt_thd_release_all(& thds[2].thd);
@@ -252,7 +249,10 @@ void do_tests()
do_one_test();
test_kill_strategy(LATEST);
- test_kill_strategy(RANDOM);
+ SKIP_BIG_TESTS(1)
+ {
+ test_kill_strategy(RANDOM);
+ }
test_kill_strategy(YOUNGEST);
test_kill_strategy(LOCKS);