summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah <nsanci@redhat.com>2021-07-07 14:40:10 -0400
committerNoah Sanci <nsanci@redhat.com>2021-07-08 10:41:27 -0400
commit5ae6573d15a76a7424c5a022403de73660c0b88b (patch)
treebe26bbf198e2665f6a8455332b554ba019298314
parente113a61bc0537e485fbe7769f1ef0ebf02ac5e00 (diff)
downloadelfutils-nsanci/pr27711.tar.gz
debuginfod: PR27711 - Use -I/-X regexes during groom phasensanci/pr27711
The debuginfod -I/-X regexes operate during traversal to identify those files in need of scanning. The regexes are not used during grooming. This means that if from run to run, the regex changes so that formerly indexed files are excluded from traversal, the data is still retained in the index. This is both good and bad. On one hand, if the underlying data is still available, grooming will preserve the data, and let clients ask for it. On the other hand, if the growing index size is a problem, and one wishes to age no-longer-regex-matching index data out, there is no way. Let's add a debuginfod flag to use regexes during grooming. Specifically, in groom(), where the stat() test exists, also check for regex matching as in scan_source_paths(). Treat failure of the regex the same way as though the file didn't exist. https://sourceware.org/bugzilla/show_bug.cgi?id=27711 Signed-off-by: Noah Sanci <nsanci@redhat.com>
-rw-r--r--debuginfod/ChangeLog8
-rw-r--r--debuginfod/debuginfod.cxx11
-rw-r--r--doc/debuginfod.83
-rw-r--r--tests/ChangeLog5
-rwxr-xr-xtests/run-debuginfod-find.sh39
5 files changed, 58 insertions, 8 deletions
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 286c910a..29d3e815 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,11 @@
+2021-07-01 Noah Sanci <nsanci@redhat.com>
+
+ PR27711
+ * debuginfod.cxx (options): Add --regex-groon, -r option.
+ (regex_groom): New static bool defaults to false.
+ (parse_opt): Handle 'r' option by setting regex_groom to true.
+ (groom): Introduce and use reg_include and reg_exclude.
+
2021-06-03 Frank Ch. Eigler <fche@redhat.com>
PR27863
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 543044c6..4f7fd2d5 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -360,6 +360,7 @@ static const struct argp_option options[] =
{ "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
{ "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
{ "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
+ { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0},
#define ARGP_KEY_FDCACHE_FDS 0x1001
{ "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
#define ARGP_KEY_FDCACHE_MBS 0x1002
@@ -407,6 +408,7 @@ static map<string,string> scan_archives;
static vector<string> extra_ddl;
static regex_t file_include_regex;
static regex_t file_exclude_regex;
+static bool regex_groom = false;
static bool traverse_logical;
static long fdcache_fds;
static long fdcache_mbs;
@@ -527,6 +529,9 @@ parse_opt (int key, char *arg,
if (rc != 0)
argp_failure(state, 1, EINVAL, "regular expression");
break;
+ case 'r':
+ regex_groom = true;
+ break;
case ARGP_KEY_FDCACHE_FDS:
fdcache_fds = atol (arg);
break;
@@ -3249,8 +3254,11 @@ void groom()
int64_t fileid = sqlite3_column_int64 (files, 1);
const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
struct stat s;
+ bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
+ bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
+
rc = stat(filename, &s);
- if (rc < 0 || (mtime != (int64_t) s.st_mtime))
+ if ( (regex_groom && reg_exclude && !reg_include) || rc < 0 || (mtime != (int64_t) s.st_mtime) )
{
if (verbose > 2)
obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl;
@@ -3261,7 +3269,6 @@ void groom()
}
else
inc_metric("groomed_total", "decision", "fresh");
-
if (sigusr1 != forced_rescan_count) // stop early if scan triggered
break;
}
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index 1ba42cf6..1adf703a 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -159,6 +159,9 @@ scan, independent of the rescan time (including if it was zero),
interrupting a groom pass (if any).
.TP
+.B "\-r"
+Apply the -I and -X during groom cycles, so that files excluded by the regexes are removed from the index. These parameters are in addition to what normally qualifies a file for grooming, not a replacement.
+
.B "\-g SECONDS" "\-\-groom\-time=SECONDS"
Set the groom time for the index database. This is the amount of time
the grooming thread will wait after finishing a grooming pass before
diff --git a/tests/ChangeLog b/tests/ChangeLog
index d8fa97fa..346b9e6e 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,8 @@
+2021-07-01 Noah Sanci <nsanci@redhat.com>
+ PR2711
+ * run-debuginfod-find.sh: Added test case for grooming the database
+ using regexes.
+
2021-06-16 Frank Ch. Eigler <fche@redhat.com>
* run-debuginfod-find.sh: Fix intermittent groom/stale failure,
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 456dc2f8..7f66c322 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -36,13 +36,14 @@ export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
PID1=0
PID2=0
PID3=0
+PID4=0
cleanup()
{
- if [ $PID1 -ne 0 ]; then kill $PID1 || true; wait $PID1; fi
- if [ $PID2 -ne 0 ]; then kill $PID2 || true; wait $PID2; fi
- if [ $PID3 -ne 0 ]; then kill $PID3 || true; wait $PID3; fi
-
+ if [ $PID1 -ne 0 ]; then kill $PID1; wait $PID1; fi
+ if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi
+ if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi
+ if [ $PID4 -ne 0 ]; then kill $PID4; wait $PID4; fi
rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp*
exit_cleanup
}
@@ -293,7 +294,8 @@ kill -USR1 $PID1
wait_ready $PORT1 'thread_work_total{role="traverse"}' 3
wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
wait_ready $PORT1 'thread_busy{role="scan"}' 0
-
+cp $DB $DB.backup
+tempfiles $DB.backup
# Rerun same tests for the prog2 binary
filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v debuginfo $BUILDID2 2>vlog`
cmp $filename F/prog2
@@ -705,4 +707,29 @@ DEBUGINFOD_URLS="file://${PWD}/mocktree/"
filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd /my/path/main.c`
cmp $filename ${local_dir}/main.c
-exit 0
+########################################################################
+## PR27711
+# Test to ensure that the --include="^$" --exclude=".*" options remove all files from a database backup
+while true; do
+ PORT3=`expr '(' $RANDOM % 1000 ')' + 9000`
+ ss -atn | fgrep ":$PORT3" || break
+done
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS="http://127.0.0.1:$PORT3/" ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT3 -t0 -g0 --regex-groom --include="^$" --exclude=".*" -d $DB.backup > vlog$PORT3 2>&1 &
+PID4=$!
+wait_ready $PORT3 'ready' 1
+tempfiles vlog$PORT3
+errfiles vlog$PORT3
+
+kill -USR2 $PID4
+wait_ready $PORT3 'thread_work_total{role="groom"}' 1
+wait_ready $PORT3 'groom{statistic="archive d/e"}' 0
+wait_ready $PORT3 'groom{statistic="archive sdef"}' 0
+wait_ready $PORT3 'groom{statistic="archive sref"}' 0
+wait_ready $PORT3 'groom{statistic="buildids"}' 0
+wait_ready $PORT3 'groom{statistic="file d/e"}' 0
+wait_ready $PORT3 'groom{statistic="file s"}' 0
+wait_ready $PORT3 'groom{statistic="files scanned (#)"}' 0
+wait_ready $PORT3 'groom{statistic="files scanned (mb)"}' 0
+
+kill $PID4
+exit 0;