diff options
author | Sergei Golubchik <serg@mysql.com> | 2008-08-28 14:43:44 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mysql.com> | 2008-08-28 14:43:44 +0200 |
commit | 942651ea6cc2b7537aa45ff1d55d64be4e191a16 (patch) | |
tree | 32ccde19f332392c00a45b5bc98fd65e810999e0 | |
parent | ca23272e1e53e195169bec0609eb0168722e1879 (diff) | |
download | mariadb-git-942651ea6cc2b7537aa45ff1d55d64be4e191a16.tar.gz |
wt: comments, OOM checks, test case for deadlock detection
include/waiting_threads.h:
make wt_thd_dontwait private
mysql-test/r/maria.result:
deadlock example
mysql-test/t/maria.test:
deadlock example
mysys/waiting_threads.c:
comments, OOM checks
sql/mysqld.cc:
fix variables
sql/sql_class.cc:
move wt_lazy_init to THD constructor
sql/sql_class.h:
move wt_lazy_init to THD constructor
storage/maria/ha_maria.cc:
backport from 6.0
storage/maria/ma_write.c:
poset-review fixes, set thd->proc_info
storage/maria/trnman.c:
bugfixing
storage/myisam/mi_check.c:
warnings
storage/myisam/mi_page.c:
warnings
storage/myisam/mi_search.c:
warnings
storage/myisammrg/myrg_create.c:
warnings
unittest/mysys/waiting_threads-t.c:
fixes
-rw-r--r-- | include/waiting_threads.h | 1 | ||||
-rw-r--r-- | mysql-test/r/maria.result | 15 | ||||
-rw-r--r-- | mysql-test/t/maria.test | 27 | ||||
-rw-r--r-- | mysys/waiting_threads.c | 352 | ||||
-rw-r--r-- | sql/mysqld.cc | 8 | ||||
-rw-r--r-- | sql/sql_class.cc | 4 | ||||
-rw-r--r-- | sql/sql_class.h | 5 | ||||
-rw-r--r-- | storage/maria/ha_maria.cc | 3 | ||||
-rw-r--r-- | storage/maria/ma_write.c | 52 | ||||
-rw-r--r-- | storage/maria/trnman.c | 16 | ||||
-rw-r--r-- | storage/myisam/mi_check.c | 12 | ||||
-rw-r--r-- | storage/myisam/mi_page.c | 2 | ||||
-rw-r--r-- | storage/myisam/mi_search.c | 6 | ||||
-rw-r--r-- | storage/myisammrg/myrg_create.c | 2 | ||||
-rw-r--r-- | unittest/mysys/waiting_threads-t.c | 12 |
15 files changed, 400 insertions, 117 deletions
diff --git a/include/waiting_threads.h b/include/waiting_threads.h index 154fb19800b..a97bc09f576 100644 --- a/include/waiting_threads.h +++ b/include/waiting_threads.h @@ -154,7 +154,6 @@ void wt_end(void); void wt_thd_lazy_init(WT_THD *, ulong *, ulong *, ulong *, ulong *); void wt_thd_destroy(WT_THD *); int wt_thd_will_wait_for(WT_THD *, WT_THD *, WT_RESOURCE_ID *); -int wt_thd_dontwait(WT_THD *); int wt_thd_cond_timedwait(WT_THD *, pthread_mutex_t *); void wt_thd_release(WT_THD *, WT_RESOURCE_ID *); #define wt_thd_release_all(THD) wt_thd_release((THD), 0) diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index 8fcdcd67ee8..30b3f9c6dc4 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -1900,3 +1900,18 @@ check table t2 extended; Table Op Msg_type Msg_text test.t2 check status OK drop table t2; +set session deadlock_timeout_long=60000000; +create table t1 (a int unique) transactional=1; +insert t1 values (1); +lock table t1 write concurrent; +insert t1 values (2); +set session deadlock_timeout_long=60000000; +lock table t1 write concurrent; +insert t1 values (3); +insert t1 values (2); +insert t1 values (3); +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +unlock tables; +ERROR 23000: Duplicate entry '2' for key 'a' +unlock tables; +drop table t1; diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index fa7529f12ab..a65916c1326 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -1186,6 +1186,33 @@ insert into t2 values (repeat('x',28)), (repeat('p',21)), (repeat('k',241)), check table t2 extended; drop table t2; +# +# an example of a deadlock +# +set session deadlock_timeout_long=60000000; +create table t1 (a int unique) transactional=1; +insert t1 values (1); +lock table t1 write concurrent; +insert t1 values (2); +connect(con_d,localhost,root,,); +set session deadlock_timeout_long=60000000; +lock table t1 write concurrent; +insert t1 values (3); +send insert t1 values (2); +connection default; +let $wait_condition=select count(*) = 1 from information_schema.processlist where state="waiting for a resource"; +--source include/wait_condition.inc +--error ER_LOCK_DEADLOCK +insert t1 values (3); +unlock tables; +connection con_d; +--error ER_DUP_ENTRY +reap; +unlock tables; +disconnect con_d; +connection default; +drop table t1; + --disable_result_log --disable_query_log eval set global storage_engine=$default_engine, maria_page_checksum=$default_checksum; diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 61b89f7eb64..78cea6c9673 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -14,6 +14,77 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* + "waiting threads" subsystem - a unified interface for threads to wait + on each other, with built-in deadlock detection. + + Main concepts + ^^^^^^^^^^^^^ + a thread - is represented by a WT_THD structure. One physical thread + can have only one WT_THD descriptor. + + a resource - a thread does not wait for other threads directly, + instead it waits for a "resource", which is "owned" by other threads. + It waits, exactly, for all "owners" to "release" a resource. + It does not have to correspond to a physical resource. For example, it + may be convenient in certain cases to force resource == thread. + A resource is represented by a WT_RESOURCE structure. + + a resource identifier - a pair of {resource type, value}. A value is + either a ulonglong number or a pointer (it's a union). + WT_RESOURCE_ID structure. + + a resource type - a pointer to a statically defined instance of + WT_RESOURCE_TYPE structure. This structure contains a pointer to + a function that knows how to compare values of this resource type. + In the simple case it could be wt_resource_id_memcmp(). + + Usage + ^^^^^ + to use the interface one needs to use this thread's WT_THD, + call wt_thd_will_wait_for() for every thread it needs to wait on, + then call wt_thd_cond_timedwait(). When thread releases a resource + it should call wt_thd_release() (or wt_thd_release_all()) - it will + notify (send a signal) threads waiting in wt_thd_cond_timedwait(), + if appropriate. + + Just like with pthread's cond_wait, there could be spurious + wake-ups from wt_thd_cond_timedwait(). A caller is expected to + handle that. + + wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either + WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return + WT_TIMEOUT. Out of memory and other fatal errors are reported as + WT_DEADLOCK - and a transaction must be aborted just the same. + + Configuration + ^^^^^^^^^^^^^ + There are four config variables. Two deadlock search depths - short and + long - and two timeouts. Deadlock search is performed with the short + depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait() + waits with a short timeout, performs a deadlock search with the long + depth, and waits with a long timeout. As most deadlock cycles are supposed + to be short, most deadlocks will be detected at once, and waits will + rarely be necessary. + + These config variables are thread-local. Different threads may have + different search depth and timeout values. + + Also, deadlock detector supports different killing strategies, the victim + in a deadlock cycle is selected based on the "weight". See "weight" + description in waiting_threads.h for details. It's up to the caller to + set weights accordingly. + + Status + ^^^^^^ + We calculate the number of successfull waits (WT_OK returned from + wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle + length distribution - number of deadlocks with every length from + 1 to WT_CYCLE_STATS, and a wait time distribution - number + of waits with a time from 1 us to 1 min in WT_CYCLE_STATS + intervals on a log scale. +*/ + +/* Note that if your lock system satisfy the following condition: there exist four lock levels A, B, C, D, such as @@ -114,8 +185,18 @@ static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock; pthread_rwlock_unlock(&R->lock); \ } while (0) +/* + All resources are stored in a lock-free hash. Different threads + may add new resources and perform deadlock detection concurrently. +*/ static LF_HASH reshash; +/** + WT_RESOURCE constructor + + It's called from lf_hash and takes an offset to LF_SLIST instance. + WT_RESOURCE is located at arg+sizeof(LF_SLIST) +*/ static void wt_resource_init(uchar *arg) { WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); @@ -124,10 +205,16 @@ static void wt_resource_init(uchar *arg) bzero(rc, sizeof(*rc)); pthread_rwlock_init(&rc->lock, 0); pthread_cond_init(&rc->cond, 0); - my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 5, 5); + my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5); DBUG_VOID_RETURN; } +/** + WT_RESOURCE destructor + + It's called from lf_hash and takes an offset to LF_SLIST instance. + WT_RESOURCE is located at arg+sizeof(LF_SLIST) +*/ static void wt_resource_destroy(uchar *arg) { WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); @@ -159,7 +246,7 @@ void wt_init() bzero(wt_wait_stats, sizeof(wt_wait_stats)); bzero(wt_cycle_stats, sizeof(wt_cycle_stats)); wt_success_stats=0; - { + { /* initialize wt_wait_table[]. from 1 us to 1 min, log scale */ int i; double from=log(1); /* 1 us */ double to=log(60e6); /* 1 min */ @@ -187,17 +274,20 @@ void wt_end() DBUG_VOID_RETURN; } -static void fix_thd_pins(WT_THD *thd) -{ - if (unlikely(thd->pins == 0)) - { - thd->pins=lf_hash_get_pins(&reshash); -#ifndef DBUG_OFF - thd->name=my_thread_name(); -#endif - } -} +/** + Lazy WT_THD initialization + + Cheap initialization of WT_THD. Only initialized fields that don't require + memory allocations - basically, it only does assignments. The rest of the + WT_THD structure will be initialized on demand, on the first use. + This allows one to initialize lazily all WT_THD structures, even if some + (or even most) of them will never be used for deadlock detection. + @param ds a pointer to deadlock search depth short value + @param ts a pointer to deadlock timeout short value + @param dl a pointer to deadlock search depth long value + @param tl a pointer to deadlock timeout long value +*/ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) { DBUG_ENTER("wt_thd_lazy_init"); @@ -209,6 +299,7 @@ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) thd->timeout_short= ts; thd->deadlock_search_depth_long= dl; thd->timeout_long= tl; + /* dynamic array is also initialized lazily - without memory allocations */ my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5); #ifndef DBUG_OFF thd->name=my_thread_name(); @@ -216,6 +307,26 @@ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) DBUG_VOID_RETURN; } +/** + Finalize WT_THD initialization + + After lazy WT_THD initialization, parts of the structure are still + uninitialized. This function completes the initialization, allocating + memory, if necessary. It's called automatically on demand, when WT_THD + is about to be used. +*/ +static int fix_thd_pins(WT_THD *thd) +{ + if (unlikely(thd->pins == 0)) + { + thd->pins=lf_hash_get_pins(&reshash); +#ifndef DBUG_OFF + thd->name=my_thread_name(); +#endif + } + return thd->pins == 0; +} + void wt_thd_destroy(WT_THD *thd) { DBUG_ENTER("wt_thd_destroy"); @@ -229,19 +340,30 @@ void wt_thd_destroy(WT_THD *thd) thd->waiting_for=0; DBUG_VOID_RETURN; } +/** + Trivial resource id comparison function - bytewise memcmp. + It can be used in WT_RESOURCE_TYPE structures where bytewise + comparison of values is sufficient. +*/ int wt_resource_id_memcmp(void *a, void *b) { return memcmp(a, b, sizeof(WT_RESOURCE_ID)); } +/** + arguments for the recursive deadlock_search function +*/ struct deadlock_arg { - WT_THD *thd; - uint max_depth; - WT_THD *victim; - WT_RESOURCE *rc; + WT_THD *thd; /**< starting point of a search */ + uint max_depth; /**< search depth limit */ + WT_THD *victim; /**< a thread to be killed to resolve a deadlock */ + WT_RESOURCE *rc; /**< see comment at the end of deadlock_search() */ }; +/** + helper function to change the victim, according to the weight +*/ static void change_victim(WT_THD* found, struct deadlock_arg *arg) { if (found->weight < arg->victim->weight) @@ -256,8 +378,8 @@ static void change_victim(WT_THD* found, struct deadlock_arg *arg) } } -/* - loop detection in a wait-for graph with a limited search depth. +/** + recursive loop detection in a wait-for graph with a limited search depth */ static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker, uint depth) @@ -301,11 +423,41 @@ retry: lf_unpin(arg->thd->pins, 0); goto retry; } + /* as the state is locked, we can unpin now */ lf_unpin(arg->thd->pins, 0); + /* + Below is not a pure depth-first search. It's a depth-first with a + slightest hint of breadth-first. Depth-first is: + + check(element): + foreach current in element->nodes[] do: + if current == element return error; + check(current); + + while we do + + check(element): + foreach current in element->nodes[] do: + if current == element return error; + foreach current in element->nodes[] do: + check(current); + */ for (i=0; i < rc->owners.elements; i++) { cursor= *dynamic_element(&rc->owners, i, WT_THD**); + /* + We're only looking for (and detecting) cycles that include 'arg->thd'. + That is, only deadlocks that *we* have created. For example, + thd->A->B->thd + (thd waits for A, A waits for B, while B is waiting for thd). + While walking the graph we can encounter other cicles, e.g. + thd->A->B->C->A + This will not be detected. Instead we will walk it in circles until + the search depth limit is reached (the latter guarantees that an + infinite loop is impossible). We expect the thread that has created + the cycle (one of A, B, and C) to detect its deadlock. + */ if (cursor == arg->thd) { ret= WT_DEADLOCK; @@ -319,16 +471,15 @@ retry: { cursor= *dynamic_element(&rc->owners, i, WT_THD**); switch (deadlock_search(arg, cursor, depth+1)) { + case WT_OK: + break; case WT_DEPTH_EXCEEDED: ret= WT_DEPTH_EXCEEDED; break; case WT_DEADLOCK: ret= WT_DEADLOCK; - change_victim(cursor, arg); - if (arg->rc) - rc_unlock(arg->rc); - goto end; - case WT_OK: + change_victim(cursor, arg); /* also sets arg->rc to 0 */ + i= rc->owners.elements; /* jump out of the loop */ break; default: DBUG_ASSERT(0); @@ -337,6 +488,34 @@ retry: rc_unlock(arg->rc); } end: + /* + Note that 'rc' is locked in this function, but it's never unlocked there. + Instead it's saved in arg->rc and the *caller* is expected to unlock it. + It's done to support different killing strategies. This is how it works: + Assuming a graph + + thd->A->B->C->thd + + deadlock_search() function starts from thd, locks it (in fact it locks not + a thd, but a resource it is waiting on, but below, for simplicity, I'll + talk about "locking a thd"). Then it goes down recursively, locks A, and so + on. Goes down recursively, locks B. Goes down recursively, locks C. + Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd. + Returns from the last deadlock_search() call. C stays locked! + Now it checks whether C is a more appropriate victim then 'thd'. + If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked. + Now it checks whether B is a more appropriate victim then arg->victim. + If yes - old arg->victim is unlocked and arg->victim=B, + otherwise B is unlocked. Return. + And so on. + + In short, a resource is locked in a frame. But it's not unlocked in the + same frame, it's unlocked by the caller, and only after the caller checks + that it doesn't need to use current WT_THD as a victim. If it does - the + lock is kept and the old victim's resource is unlocked. When the recursion + is unrolled and we are back to deadlock() function, there are only two + locks left - on thd and on the victim. + */ arg->rc= rc; DBUG_PRINT("wt", ("exit: %s", ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" : @@ -344,6 +523,25 @@ end: DBUG_RETURN(ret); } +/** + Deadlock detection in a wait-for graph + + A wrapper for recursive deadlock_search() - prepares deadlock_arg structure, + invokes deadlock_search(), increments statistics, notifies the victim. + + @param thd thread that is going to wait. Deadlock is detected + if, while walking the graph, we reach a thread that + is waiting on thd + @param blocker starting point of a search. In wt_thd_cond_timedwait() + it's thd, in wt_thd_will_wait_for() it's a thread that + thd is going to wait for + @param depth starting search depth. In general it's the number of + edges in the wait-for graph between thd and the + blocker. Practically only two values are used (and + supported) - when thd == blocker it's 0, when thd + waits directly for blocker, it's 1 + @param max_depth search depth limit +*/ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, uint max_depth) { @@ -357,10 +555,15 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, *thd->deadlock_search_depth_long); ret= WT_OK; } + /* + if we started with depth==1, blocker was never considered for a victim + in deadlock_search(). Do it here. + */ if (ret == WT_DEADLOCK && depth) change_victim(blocker, &arg); if (arg.rc) rc_unlock(arg.rc); + /* notify the victim, if appropriate */ if (ret == WT_DEADLOCK && arg.victim != thd) { DBUG_PRINT("wt", ("killing %s", arg.victim->name)); @@ -373,11 +576,12 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, } -/* - Deletes an element from reshash. +/** + Delete an element from reshash if it has no waiters or owners + rc->lock must be locked by the caller and it's unlocked on return. */ -static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) +static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) { uint keylen; const void *key; @@ -390,10 +594,14 @@ static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) DBUG_PRINT("wt", ("nothing to do, %d owners, %d waiters", rc->owners.elements, rc->waiter_count)); rc_unlock(rc); - DBUG_VOID_RETURN; + DBUG_RETURN(0); } - fix_thd_pins(thd); + if (fix_thd_pins(thd)) + { + rc_unlock(rc); + DBUG_RETURN(1); + } /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */ { @@ -414,29 +622,40 @@ static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) */ rc->state=FREE; rc_unlock(rc); - lf_hash_delete(&reshash, thd->pins, key, keylen); - DBUG_VOID_RETURN; + DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1); } -int wt_thd_dontwait_locked(WT_THD *thd) +/** + register the fact that thd is not waiting anymore + + decrease waiter_count, clear waiting_for, free the resource if appropriate. + thd->waiting_for must be locked! +*/ +static int stop_waiting_locked(WT_THD *thd) { + int ret; WT_RESOURCE *rc= thd->waiting_for; - DBUG_ENTER("wt_thd_dontwait_locked"); + DBUG_ENTER("stop_waiting_locked"); DBUG_ASSERT(rc->waiter_count); DBUG_ASSERT(rc->state == ACTIVE); rc->waiter_count--; thd->waiting_for= 0; - unlock_lock_and_free_resource(thd, rc); - DBUG_RETURN(thd->killed ? WT_DEADLOCK : WT_OK); + ret= unlock_lock_and_free_resource(thd, rc); + DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK); } -int wt_thd_dontwait(WT_THD *thd) +/** + register the fact that thd is not waiting anymore + + locks thd->waiting_for and calls stop_waiting_locked(). +*/ +static int stop_waiting(WT_THD *thd) { int ret; WT_RESOURCE *rc= thd->waiting_for; - DBUG_ENTER("wt_thd_dontwait"); + DBUG_ENTER("stop_waiting"); if (!rc) DBUG_RETURN(WT_OK); @@ -445,15 +664,20 @@ int wt_thd_dontwait(WT_THD *thd) as its waiter_count is guaranteed to be non-zero */ rc_wrlock(rc); - ret= wt_thd_dontwait_locked(thd); + ret= stop_waiting_locked(thd); DBUG_RETURN(ret); } -/* +/** + notify the system that a thread needs to wait for another thread + called by a *waiter* to declare what resource it will wait for. can be called many times, if many blockers own a blocking resource. but must always be called with the same resource id - a thread cannot wait for more than one resource at a time. + + As a new edge is added to the wait-for graph, a deadlock detection is + performed for this new edge. */ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) { @@ -466,7 +690,8 @@ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu", thd->name, blocker->name, resid->value.num)); - fix_thd_pins(thd); + if (fix_thd_pins(thd)) + DBUG_RETURN(WT_DEADLOCK); if (thd->waiting_for == 0) { @@ -487,14 +712,11 @@ retry: DBUG_PRINT("wt", ("failed to find rc in hash, inserting")); bzero(&tmp, sizeof(tmp)); - tmp.waiter_count= 0; tmp.id= *resid; tmp.state= ACTIVE; -#ifndef DBUG_OFF - tmp.mutex= 0; -#endif - lf_hash_insert(&reshash, thd->pins, &tmp); + if (lf_hash_insert(&reshash, thd->pins, &tmp) == -1) /* if OOM */ + DBUG_RETURN(WT_DEADLOCK); /* Two cases: either lf_hash_insert() failed - because another thread has just inserted a resource with the same id - and we need to retry. @@ -504,6 +726,9 @@ retry: And we need to repeat the loop anyway. */ } + if (rc == MY_ERRPTR) + DBUG_RETURN(WT_DEADLOCK); + DBUG_PRINT("wt", ("found in hash rc=%p", rc)); rc_wrlock(rc); @@ -520,7 +745,6 @@ retry: thd->waiting_for= rc; rc->waiter_count++; thd->killed= 0; - } else { @@ -539,7 +763,7 @@ retry: if (thd->killed) { - wt_thd_dontwait_locked(thd); + stop_waiting_locked(thd); DBUG_RETURN(WT_DEADLOCK); } } @@ -548,20 +772,29 @@ retry: break; if (i >= rc->owners.elements) { - push_dynamic(&blocker->my_resources, (void*)&rc); - push_dynamic(&rc->owners, (void*)&blocker); + if (push_dynamic(&blocker->my_resources, (void*)&rc)) + { + stop_waiting_locked(thd); + DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */ + } + if (push_dynamic(&rc->owners, (void*)&blocker)) + { + pop_dynamic(&blocker->my_resources); + stop_waiting_locked(thd); + DBUG_RETURN(WT_DEADLOCK); + } } rc_unlock(rc); if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short)) { - wt_thd_dontwait(thd); + stop_waiting(thd); DBUG_RETURN(WT_DEADLOCK); } DBUG_RETURN(0); } -/* +/** called by a *waiter* to start waiting It's supposed to be a drop-in replacement for @@ -595,7 +828,7 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) #endif rc_wrlock(rc); - if (rc->owners.elements == 0 && thd->killed) + if (rc->owners.elements == 0 || thd->killed) ret= WT_OK; rc_unlock(rc); @@ -614,7 +847,7 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) } } after= my_getsystime(); - if (wt_thd_dontwait(thd) == WT_DEADLOCK) + if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */ ret= WT_DEADLOCK; increment_wait_stats(after-before, ret); if (ret == WT_OK) @@ -622,23 +855,24 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) DBUG_RETURN(ret); } -/* +/** called by a *blocker* when it releases a resource - when resid==0 all resources will be freed - - Note: it's conceptually similar to pthread_cond_broadcast, and must be done + it's conceptually similar to pthread_cond_broadcast, and must be done under the same mutex as wt_thd_cond_timedwait(). + + @param resid a resource to release. 0 to release all resources */ + void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid) { - WT_RESOURCE *rc; - uint i, j; + uint i; DBUG_ENTER("wt_thd_release"); for (i=0; i < thd->my_resources.elements; i++) { - rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**); + uint j; + WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**); if (!resid || (resid->type->compare(&rc->id, resid) == 0)) { rc_wrlock(rc); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 730cabc4bda..d2fc752ca11 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -5703,22 +5703,22 @@ struct my_option my_long_options[] = {"deadlock-search-depth-short", OPT_DEADLOCK_SEARCH_DEPTH_SHORT, "Short search depth for the two-step deadlock detection", (uchar**) &global_system_variables.wt_deadlock_search_depth_short, - (uchar**) &global_system_variables.wt_deadlock_search_depth_short, + (uchar**) &max_system_variables.wt_deadlock_search_depth_short, 0, GET_ULONG, REQUIRED_ARG, 4, 0, 32, 0, 0, 0}, {"deadlock-search-depth-long", OPT_DEADLOCK_SEARCH_DEPTH_LONG, "Long search depth for the two-step deadlock detection", (uchar**) &global_system_variables.wt_deadlock_search_depth_long, - (uchar**) &global_system_variables.wt_deadlock_search_depth_long, + (uchar**) &max_system_variables.wt_deadlock_search_depth_long, 0, GET_ULONG, REQUIRED_ARG, 15, 0, 33, 0, 0, 0}, {"deadlock-timeout-short", OPT_DEADLOCK_TIMEOUT_SHORT, "Short timeout for the two-step deadlock detection (in microseconds)", (uchar**) &global_system_variables.wt_timeout_short, - (uchar**) &global_system_variables.wt_timeout_short, + (uchar**) &max_system_variables.wt_timeout_short, 0, GET_ULONG, REQUIRED_ARG, 100, 0, ULONG_MAX, 0, 0, 0}, {"deadlock-timeout-long", OPT_DEADLOCK_TIMEOUT_LONG, "Long timeout for the two-step deadlock detection (in microseconds)", (uchar**) &global_system_variables.wt_timeout_long, - (uchar**) &global_system_variables.wt_timeout_long, + (uchar**) &max_system_variables.wt_timeout_long, 0, GET_ULONG, REQUIRED_ARG, 10000, 0, ULONG_MAX, 0, 0, 0}, #ifndef DBUG_OFF {"debug", '#', "Debug log.", (uchar**) &default_dbug_option, diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 7116c2bf71e..70947145ef2 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -586,6 +586,10 @@ THD::THD() peer_port= 0; // For SHOW PROCESSLIST transaction.m_pending_rows_event= 0; transaction.on= 1; + wt_thd_lazy_init(&transaction.wt, &variables.wt_deadlock_search_depth_short, + &variables.wt_timeout_short, + &variables.wt_deadlock_search_depth_long, + &variables.wt_timeout_long); #ifdef SIGNAL_WITH_VIO_CLOSE active_vio = 0; #endif diff --git a/sql/sql_class.h b/sql/sql_class.h index 578ace6fee9..c4f48517be9 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1352,14 +1352,9 @@ public: st_transactions() { #ifdef USING_TRANSACTIONS - THD *thd=current_thd; bzero((char*)this, sizeof(*this)); xid_state.xid.null(); init_sql_alloc(&mem_root, ALLOC_ROOT_MIN_BLOCK_SIZE, 0); - wt_thd_lazy_init(&wt, &thd->variables.wt_deadlock_search_depth_short, - &thd->variables.wt_timeout_short, - &thd->variables.wt_deadlock_search_depth_long, - &thd->variables.wt_timeout_long); #else xid_state.xa_state= XA_NOTR; #endif diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 001aa1262ce..78edf7d6545 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2346,7 +2346,8 @@ int ha_maria::external_lock(THD *thd, int lock_type) This is a bit excessive, ACID requires this only if there are some changes to commit (rollback shouldn't be tested). */ - DBUG_ASSERT(!thd->main_da.is_sent); + DBUG_ASSERT(!thd->main_da.is_sent || + thd->killed == THD::KILL_CONNECTION); /* autocommit ? rollback a transaction */ #ifdef MARIA_CANNOT_ROLLBACK if (ma_commit(trn)) diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 09391ad829c..8add20d46ef 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -182,23 +182,20 @@ int maria_write(MARIA_HA *info, uchar *record) { while (keyinfo->ck_insert(info, (*keyinfo->make_key)(info, &int_key, i, - buff, record, filepos, - info->trn->trid))) + buff, record, filepos, + info->trn->trid))) { TRN *blocker; DBUG_PRINT("error",("Got error: %d on write",my_errno)); /* - explicit check for our own trid, because temp tables - aren't transactional and don't have a proper TRN so the code - below doesn't work for them - XXX a better test perhaps ? + explicit check to filter out temp tables, they aren't + transactional and don't have a proper TRN so the code + below doesn't work for them. + Also, filter out non-thread maria use, and table modified in + the same transaction. */ - if (info->dup_key_trid == info->trn->trid) - { - if (local_lock_tree) - rw_unlock(&keyinfo->root_lock); + if (!local_lock_tree || info->dup_key_trid == info->trn->trid) goto err; - } blocker= trnman_trid_to_trn(info->trn, info->dup_key_trid); /* if blocker TRN was not found, it means that the conflicting @@ -206,16 +203,16 @@ int maria_write(MARIA_HA *info, uchar *record) aborted, as it would have to wait on the key tree lock to remove the conflicting key it has inserted. */ - if (local_lock_tree) + if (!blocker || blocker->commit_trid != ~(TrID)0) + { /* committed */ + if (blocker) + pthread_mutex_unlock(& blocker->state_lock); rw_unlock(&keyinfo->root_lock); - if (!blocker) - goto err; - if (blocker->commit_trid != ~(TrID)0) - { /* committed, albeit recently */ - pthread_mutex_unlock(& blocker->state_lock); goto err; } - { /* running. now we wait */ + rw_unlock(&keyinfo->root_lock); + { + /* running. now we wait */ WT_RESOURCE_ID rc; int res; @@ -225,15 +222,26 @@ int maria_write(MARIA_HA *info, uchar *record) if (res != WT_OK) { pthread_mutex_unlock(& blocker->state_lock); + my_errno= HA_ERR_LOCK_DEADLOCK; goto err; } - res=wt_thd_cond_timedwait(info->trn->wt, & blocker->state_lock); + { + const char *old_proc_info= proc_info_hook(0, + "waiting for a resource", __func__, __FILE__, __LINE__); + + res= wt_thd_cond_timedwait(info->trn->wt, & blocker->state_lock); + + proc_info_hook(0, old_proc_info, __func__, __FILE__, __LINE__); + } pthread_mutex_unlock(& blocker->state_lock); if (res != WT_OK) + { + my_errno= res == WT_TIMEOUT ? HA_ERR_LOCK_WAIT_TIMEOUT + : HA_ERR_LOCK_DEADLOCK; goto err; + } } - if (local_lock_tree) - rw_wrlock(&keyinfo->root_lock); + rw_wrlock(&keyinfo->root_lock); } } @@ -643,7 +651,7 @@ static int w_search(register MARIA_HA *info, uint32 comp_flag, MARIA_KEY *key, { DBUG_PRINT("warning", ("Duplicate key")); /* - FIXME + TODO When the index will support true versioning - with multiple identical values in the UNIQUE index, invisible to each other - the following should be changed to "continue inserting keys, at the diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 42c32db3cab..4a9004596ae 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -89,6 +89,7 @@ static void wt_thd_release_self(TRN *trn) rc.type= &ma_rc_dup_unique; rc.value.ptr= trn; wt_thd_release(trn->wt, & rc); + trn->wt= 0; } } @@ -296,8 +297,8 @@ TRN *trnman_new_trn(WT_THD *wt) } trnman_allocated_transactions++; pthread_mutex_init(&trn->state_lock, MY_MUTEX_INIT_FAST); - trn->wt= wt; } + trn->wt= wt; trn->pins= lf_hash_get_pins(&trid_to_trn); if (!trn->pins) { @@ -415,17 +416,17 @@ my_bool trnman_end_trn(TRN *trn, my_bool commit) } } + pthread_mutex_lock(&trn->state_lock); + trn->commit_trid= global_trid_generator; + wt_thd_release_self(trn); + pthread_mutex_unlock(&trn->state_lock); + /* if transaction is committed and it was not the only active transaction - add it to the committed list */ if (commit && active_list_min.next != &active_list_max) { - pthread_mutex_lock(&trn->state_lock); - trn->commit_trid= global_trid_generator; - wt_thd_release_self(trn); - pthread_mutex_unlock(&trn->state_lock); - trn->next= &committed_list_max; trn->prev= committed_list_max.prev; trnman_committed_transactions++; @@ -440,6 +441,7 @@ my_bool trnman_end_trn(TRN *trn, my_bool commit) active_list_min.next != &active_list_max)) res= -1; trnman_active_transactions--; + pthread_mutex_unlock(&LOCK_trn_list); /* the rest is done outside of a critical section */ @@ -492,8 +494,6 @@ void trnman_free_trn(TRN *trn) pthread_mutex_lock(&trn->state_lock); trn->short_id= 0; - wt_thd_release_self(trn); - trn->wt= 0; /* just in case */ pthread_mutex_unlock(&trn->state_lock); tmp.trn= pool; diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index d05d30543ed..b8dbdaa44c8 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -803,9 +803,9 @@ static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, (flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length, comp_flag, diff_pos)) >=0) { - DBUG_DUMP("old",(uchar*) info->lastkey, info->lastkey_length); - DBUG_DUMP("new",(uchar*) key, key_length); - DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos)); + DBUG_DUMP("old",info->lastkey, info->lastkey_length); + DBUG_DUMP("new",key, key_length); + DBUG_DUMP("new_in_page",old_keypos,(uint) (keypos-old_keypos)); if (comp_flag & SEARCH_FIND && flag == 0) mi_check_print_error(param,"Found duplicated key at page %s",llstr(page,llbuff)); @@ -874,8 +874,8 @@ static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, DBUG_PRINT("test",("page: %s record: %s filelength: %s", llstr(page,llbuff),llstr(record,llbuff2), llstr(info->state->data_file_length,llbuff3))); - DBUG_DUMP("key",(uchar*) key,key_length); - DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos)); + DBUG_DUMP("key",key,key_length); + DBUG_DUMP("new_in_page",old_keypos,(uint) (keypos-old_keypos)); goto err; } param->record_checksum+=(ha_checksum) record; @@ -4026,7 +4026,7 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, DBUG_RETURN(1); } a_length=2+nod_flag; - key_block->end_pos= (char*) anc_buff+2; + key_block->end_pos= anc_buff+2; lastkey=0; /* No previous key in block */ } else diff --git a/storage/myisam/mi_page.c b/storage/myisam/mi_page.c index 23a2526f756..3ea00fa44db 100644 --- a/storage/myisam/mi_page.c +++ b/storage/myisam/mi_page.c @@ -49,7 +49,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo, { DBUG_PRINT("error",("page %lu had wrong page length: %u", (ulong) page, page_size)); - DBUG_DUMP("page", (char*) tmp, keyinfo->block_length); + DBUG_DUMP("page",tmp, keyinfo->block_length); info->last_keypage = HA_OFFSET_ERROR; mi_print_error(info->s, HA_ERR_CRASHED); my_errno = HA_ERR_CRASHED; diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c index 1af5e8c5585..f7d8d2c901c 100644 --- a/storage/myisam/mi_search.c +++ b/storage/myisam/mi_search.c @@ -816,7 +816,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_PRINT("error", ("Found too long null packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); - DBUG_DUMP("key",(char*) *page_pos,16); + DBUG_DUMP("key",*page_pos,16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; @@ -873,7 +873,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, { DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); - DBUG_DUMP("key",(char*) *page_pos,16); + DBUG_DUMP("key",*page_pos,16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; /* Error */ @@ -945,7 +945,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_PRINT("error", ("Found too long binary packed key: %u of %u at 0x%lx", length, keyinfo->maxlength, (long) *page_pos)); - DBUG_DUMP("key",(char*) *page_pos,16); + DBUG_DUMP("key",*page_pos,16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); /* Wrong key */ diff --git a/storage/myisammrg/myrg_create.c b/storage/myisammrg/myrg_create.c index df81b730bfd..eaed470daec 100644 --- a/storage/myisammrg/myrg_create.c +++ b/storage/myisammrg/myrg_create.c @@ -46,7 +46,7 @@ int myrg_create(const char *name, const char **table_names, fn_same(buff,name,4); *(end=strend(buff))='\n'; end[1]=0; - if (my_write(file,(char*) buff,(uint) (end-buff+1), + if (my_write(file,(uchar*) buff,(uint) (end-buff+1), MYF(MY_WME | MY_NABP))) goto err; } diff --git a/unittest/mysys/waiting_threads-t.c b/unittest/mysys/waiting_threads-t.c index 48ea0b36df6..b14f31a0432 100644 --- a/unittest/mysys/waiting_threads-t.c +++ b/unittest/mysys/waiting_threads-t.c @@ -214,12 +214,9 @@ void do_tests() ok_wait(1,2,1); ok_deadlock(2,0,2); - // FIXME remove wt_thd_dontwait calls below - wt_thd_dontwait(& thds[0].thd); - wt_thd_dontwait(& thds[1].thd); - wt_thd_dontwait(& thds[2].thd); - wt_thd_dontwait(& thds[3].thd); pthread_mutex_lock(&lock); + wt_thd_cond_timedwait(& thds[0].thd, &lock); + wt_thd_cond_timedwait(& thds[1].thd, &lock); wt_thd_release_all(& thds[0].thd); wt_thd_release_all(& thds[1].thd); wt_thd_release_all(& thds[2].thd); @@ -252,7 +249,10 @@ void do_tests() do_one_test(); test_kill_strategy(LATEST); - test_kill_strategy(RANDOM); + SKIP_BIG_TESTS(1) + { + test_kill_strategy(RANDOM); + } test_kill_strategy(YOUNGEST); test_kill_strategy(LOCKS); |