From 59c82a57ac38442465fb002c26fcb83e48d7d0af Mon Sep 17 00:00:00 2001 From: Demyan Kimitsa Date: Tue, 28 Aug 2018 19:40:47 +0300 Subject: Fix start_world not resuming all threads on Darwin Issue #231 (bdwgc). This happens due mach thread ports are released in GC_stop_world as a result in some cases the system creates new one and GC_start_world will not find the thread to resume; in turn, the threads will stay suspended. When this happens to GDC threads, the application is terminated as the message loop is halt. The high-level description of the change: thread mach port is not deallocated when thread is saved in GC_mach_threads as it causes thread not being resumed as thread port object can be changed; threads are now being suspended without check of suspend_count (as it can be suspended by client); the logic of resume is changed, now the primary cycle iterates threads to resume in GC_mach_threads. * darwin_stop_world.c (struct GC_mach_thread): Rename already_suspended filed to suspended. * darwin_stop_world.c [!GC_NO_THREADS_DISCOVERY] (GC_suspend_thread_list): Add my_task argument; remove info an outCount local variables; call mach_port_deallocate uless threaded is added to GC_mach_threads; do not call thread_info; update the relevant comments. * darwin_stop_world.c [!GC_NO_THREADS_DISCOVERY] (GC_stop_world): Remove i local variable; do not call mach_port_deallocate for prevlist items; add comment. * darwin_stop_world.c (GC_thread_resume): Call WARN instead of ABORT if thread_resume failed. * darwin_stop_world.c [!GC_NO_THREADS_DISCOVERY] (GC_start_world): Initialize j to listcount; iterate over GC_mach_threads in the outer loop; if thread is suspended then find it in act_list; if found then resume thread; call mach_port_deallocate for act_list items. --- darwin_stop_world.c | 156 ++++++++++++++++++++++++---------------------------- 1 file changed, 71 insertions(+), 85 deletions(-) (limited to 'darwin_stop_world.c') diff --git a/darwin_stop_world.c b/darwin_stop_world.c index ff581a81..547bf3f4 100644 --- a/darwin_stop_world.c +++ b/darwin_stop_world.c @@ -441,7 +441,7 @@ GC_INNER void GC_push_all_stacks(void) struct GC_mach_thread { thread_act_t thread; - GC_bool already_suspended; + GC_bool suspended; }; struct GC_mach_thread GC_mach_threads[GC_MAX_MACH_THREADS]; @@ -451,7 +451,8 @@ GC_INNER void GC_push_all_stacks(void) /* returns true if there's a thread in act_list that wasn't in old_list */ STATIC GC_bool GC_suspend_thread_list(thread_act_array_t act_list, int count, thread_act_array_t old_list, - int old_count, mach_port_t my_thread) + int old_count, task_t my_task, + mach_port_t my_thread) { int i; int j = -1; @@ -460,27 +461,22 @@ STATIC GC_bool GC_suspend_thread_list(thread_act_array_t act_list, int count, for (i = 0; i < count; i++) { thread_act_t thread = act_list[i]; GC_bool found; - struct thread_basic_info info; - mach_msg_type_number_t outCount; kern_return_t kern_result; if (thread == my_thread # ifdef MPROTECT_VDB || (GC_mach_handler_thread == thread && GC_use_mach_handler_thread) +# endif +# ifdef PARALLEL_MARK + || GC_is_mach_marker(thread) /* ignore the parallel markers */ # endif ) { - /* Don't add our and the handler threads. */ + /* Do not add our one, parallel marker and the handler threads; */ + /* consider it as found (e.g., it was processed earlier). */ + mach_port_deallocate(my_task, thread); continue; } -# ifdef PARALLEL_MARK - if (GC_is_mach_marker(thread)) - continue; /* ignore the parallel marker threads */ -# endif -# ifdef DEBUG_THREADS - GC_log_printf("Attempting to suspend thread %p\n", - (void *)(word)thread); -# endif /* find the current thread in the old list */ found = FALSE; { @@ -499,57 +495,42 @@ STATIC GC_bool GC_suspend_thread_list(thread_act_array_t act_list, int count, found = TRUE; break; } - - if (!found) { - /* add it to the GC_mach_threads list */ - if (GC_mach_threads_count == GC_MAX_MACH_THREADS) - ABORT("Too many threads"); - GC_mach_threads[GC_mach_threads_count].thread = thread; - /* default is not suspended */ - GC_mach_threads[GC_mach_threads_count].already_suspended = FALSE; - changed = TRUE; - } } } - outCount = THREAD_INFO_MAX; - kern_result = thread_info(thread, THREAD_BASIC_INFO, - (thread_info_t)&info, &outCount); - if (kern_result != KERN_SUCCESS) { - /* The thread may have quit since the thread_threads() call we */ - /* mark already suspended so it's not dealt with anymore later. */ - if (!found) - GC_mach_threads[GC_mach_threads_count++].already_suspended = TRUE; - continue; - } -# ifdef DEBUG_THREADS - GC_log_printf("Thread state for %p = %d\n", (void *)(word)thread, - info.run_state); -# endif - if (info.suspend_count != 0) { - /* thread is already suspended. */ - if (!found) - GC_mach_threads[GC_mach_threads_count++].already_suspended = TRUE; + if (found) { + /* It is already in the list, skip processing, release mach port. */ + mach_port_deallocate(my_task, thread); continue; } + /* add it to the GC_mach_threads list */ + if (GC_mach_threads_count == GC_MAX_MACH_THREADS) + ABORT("Too many threads"); + GC_mach_threads[GC_mach_threads_count].thread = thread; + /* default is not suspended */ + GC_mach_threads[GC_mach_threads_count].suspended = FALSE; + changed = TRUE; + # ifdef DEBUG_THREADS GC_log_printf("Suspending %p\n", (void *)(word)thread); # endif + /* Unconditionally suspend the thread. It will do no */ + /* harm if it is already suspended by the client logic. */ do { kern_result = thread_suspend(thread); } while (kern_result == KERN_ABORTED); if (kern_result != KERN_SUCCESS) { /* The thread may have quit since the thread_threads() call we */ /* mark already suspended so it's not dealt with anymore later. */ - if (!found) - GC_mach_threads[GC_mach_threads_count++].already_suspended = TRUE; - continue; + GC_mach_threads[GC_mach_threads_count].suspended = FALSE; + } else { + /* Mark the thread as suspended and require resume. */ + GC_mach_threads[GC_mach_threads_count].suspended = TRUE; + if (GC_on_thread_event) + GC_on_thread_event(GC_EVENT_THREAD_SUSPENDED, (void *)(word)thread); } - if (!found) - GC_mach_threads_count++; - if (GC_on_thread_event) - GC_on_thread_event(GC_EVENT_THREAD_SUSPENDED, (void *)(word)thread); + GC_mach_threads_count++; } return changed; } @@ -581,7 +562,6 @@ GC_INNER void GC_stop_world(void) if (GC_query_task_threads) { # ifndef GC_NO_THREADS_DISCOVERY - unsigned i; GC_bool changed; thread_act_array_t act_list, prev_list; mach_msg_type_number_t listcount, prevcount; @@ -605,12 +585,14 @@ GC_INNER void GC_stop_world(void) if (kern_result == KERN_SUCCESS) { changed = GC_suspend_thread_list(act_list, listcount, prev_list, - prevcount, my_thread); + prevcount, my_task, my_thread); if (prev_list != NULL) { - for (i = 0; i < prevcount; i++) - mach_port_deallocate(my_task, prev_list[i]); - + /* Thread ports are not deallocated by list, unused ports */ + /* deallocated in GC_suspend_thread_list, used - kept in */ + /* GC_mach_threads till GC_start_world as otherwise thread */ + /* object change can occur and GC_start_world will not */ + /* find the thread to resume which will cause app to hang. */ vm_deallocate(my_task, (vm_address_t)prev_list, sizeof(thread_t) * prevcount); } @@ -622,8 +604,7 @@ GC_INNER void GC_stop_world(void) } while (changed); GC_ASSERT(prev_list != 0); - for (i = 0; i < prevcount; i++) - mach_port_deallocate(my_task, prev_list[i]); + /* The thread ports are not deallocated by list, see above. */ vm_deallocate(my_task, (vm_address_t)act_list, sizeof(thread_t) * listcount); # endif /* !GC_NO_THREADS_DISCOVERY */ @@ -684,10 +665,11 @@ GC_INLINE void GC_thread_resume(thread_act_t thread) # endif /* Resume the thread */ kern_result = thread_resume(thread); - if (kern_result != KERN_SUCCESS) - ABORT("thread_resume failed"); - if (GC_on_thread_event) + if (kern_result != KERN_SUCCESS) { + WARN("thread_resume(%p) failed: mach port invalid\n", thread); + } else if (GC_on_thread_event) { GC_on_thread_event(GC_EVENT_THREAD_UNSUSPENDED, (void *)(word)thread); + } } /* Caller holds allocation lock, and has held it continuously since */ @@ -706,8 +688,7 @@ GC_INNER void GC_start_world(void) if (GC_query_task_threads) { # ifndef GC_NO_THREADS_DISCOVERY - int i; - int j = GC_mach_threads_count; + int i, j; kern_return_t kern_result; thread_act_array_t act_list; mach_msg_type_number_t listcount; @@ -716,39 +697,44 @@ GC_INNER void GC_start_world(void) if (kern_result != KERN_SUCCESS) ABORT("task_threads failed"); - for (i = 0; i < (int)listcount; i++) { - thread_act_t thread = act_list[i]; - int last_found = j; /* The thread index found during the */ - /* previous iteration (count value */ - /* means no thread found yet). */ + j = (int)listcount; + for (i = 0; i < GC_mach_threads_count; i++) { + thread_act_t thread = GC_mach_threads[i].thread; - /* Search for the thread starting from the last found one first. */ - while (++j < GC_mach_threads_count) { - if (GC_mach_threads[j].thread == thread) - break; - } - if (j >= GC_mach_threads_count) { - /* If not found, search in the rest (beginning) of the list. */ - for (j = 0; j < last_found; j++) { - if (GC_mach_threads[j].thread == thread) + if (GC_mach_threads[i].suspended) { + int last_found = j; /* The thread index found during the */ + /* previous iteration (count value */ + /* means no thread found yet). */ + + /* Search for the thread starting from the last found one first. */ + while (++j < (int)listcount) { + if (act_list[j] == thread) break; } - } - - if (j != last_found) { - /* The thread is found in GC_mach_threads. */ - if (GC_mach_threads[j].already_suspended) { -# ifdef DEBUG_THREADS - GC_log_printf("Not resuming already suspended thread %p\n", - (void *)(word)thread); -# endif - } else { + if (j >= (int)listcount) { + /* If not found, search in the rest (beginning) of the list. */ + for (j = 0; j < last_found; j++) { + if (act_list[j] == thread) + break; + } + } + if (j != last_found) { + /* The thread is alive, resume it. */ GC_thread_resume(thread); } + } else { + /* This thread was failed to be suspended by GC_stop_world, */ + /* no action needed. */ +# ifdef DEBUG_THREADS + GC_log_printf("Not resuming thread %p as it is not suspended\n", + (void *)(word)thread); +# endif } - mach_port_deallocate(my_task, thread); } + + for (i = 0; i < (int)listcount; i++) + mach_port_deallocate(my_task, act_list[i]); vm_deallocate(my_task, (vm_address_t)act_list, sizeof(thread_t) * listcount); # endif /* !GC_NO_THREADS_DISCOVERY */ -- cgit v1.2.1