diff options
author | Frank Ch. Eigler <fche@redhat.com> | 2021-09-14 08:15:23 -0400 |
---|---|---|
committer | Frank Ch. Eigler <fche@redhat.com> | 2021-09-14 11:27:41 -0400 |
commit | 2ff803956d6aaefeed3bc3f186da6fe666c7b1b6 (patch) | |
tree | 4dcf82c09ec15175afaceb053ebc333edfd4a9ef | |
parent | 761d37a1e072e7a6c829fdff8cebcf4c308d0e02 (diff) | |
download | elfutils-2ff803956d6aaefeed3bc3f186da6fe666c7b1b6.tar.gz |
PR28339: debuginfod: fix groom/scan race condition on just-emptied queue
debuginfod's scan and groom operations (thread_main_scanner,
thread_main_fts_source_paths) are intended to be mutually exclusive,
as a substitute for more complicated sql transaction batching. (This
is because scanning / grooming involves inserting or deleting data
from multiple related tables.)
The workq class that governs this in debuginfod.cxx has a problem: if
the workq just becomes empty, its sole entry pulled by a scanner
thread in response to a wait_front(), an 'idler' groomer thread is
ALSO permitted to run, because there is no indication as to when the
scanner thread operation finishes, only when it starts.
Extending the workq with a counter ("fronters") to track any active
scanning activity (even if the workq is empty) lets us block idlers
groomers a little longer.
Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
-rw-r--r-- | debuginfod/ChangeLog | 8 | ||||
-rw-r--r-- | debuginfod/debuginfod.cxx | 26 |
2 files changed, 28 insertions, 6 deletions
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 1173f9cd..4ff59efd 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,11 @@ +2021-09-14 Frank Ch. Eigler <fche@redhat.com> + + PRPR28339 + * debuginfod.cxx (waitq::fronters): New field. + (waitq::wait_idle): Respect it. + (waitq::done_front): New function. + (thread_main_scanner): Call it to match wait_front(). + 2021-09-12 Mark Wielaard <mark@klomp.org> * debuginfod.cxx (libarchive_fdcache::lookup): Add endl after diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 6cc9f777..1267efbe 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -663,10 +663,11 @@ class workq mutex mtx; condition_variable cv; bool dead; - unsigned idlers; + unsigned idlers; // number of threads busy with wait_idle / done_idle + unsigned fronters; // number of threads busy with wait_front / done_front public: - workq() { dead = false; idlers = 0; } + workq() { dead = false; idlers = 0; fronters = 0; } ~workq() {} void push_back(const Payload& p) @@ -690,10 +691,11 @@ public: unique_lock<mutex> lock(mtx); q.clear(); set_metric("thread_work_pending","role","scan", q.size()); + // NB: there may still be some live fronters cv.notify_all(); // maybe wake up waiting idlers } - // block this scanner thread until there is work to do and no active + // block this scanner thread until there is work to do and no active idler bool wait_front (Payload& p) { unique_lock<mutex> lock(mtx); @@ -705,19 +707,29 @@ public: { p = * q.begin(); q.erase (q.begin()); + fronters ++; // prevent idlers from starting awhile, even if empty q set_metric("thread_work_pending","role","scan", q.size()); - if (q.size() == 0) - cv.notify_all(); // maybe wake up waiting idlers + // NB: don't wake up idlers yet! The consumer is busy + // processing this element until it calls done_front(). return true; } } + // notify waitq that scanner thread is done with that last item + void done_front () + { + unique_lock<mutex> lock(mtx); + fronters --; + if (q.size() == 0 && fronters == 0) + cv.notify_all(); // maybe wake up waiting idlers + } + // block this idler thread until there is no work to do void wait_idle () { unique_lock<mutex> lock(mtx); cv.notify_all(); // maybe wake up waiting scanners - while (!dead && (q.size() != 0)) + while (!dead && ((q.size() != 0) || fronters > 0)) cv.wait(lock); idlers ++; } @@ -3145,6 +3157,8 @@ thread_main_scanner (void* arg) e.report(cerr); } + scanq.done_front(); // let idlers run + if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef) {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size |