summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2022-08-15 00:05:53 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2022-08-15 00:07:39 -0700
commit258d1c44e5ee7c58b28bf0000e9d737df6081885 (patch)
tree2f723b2cffb9068662e3e364b7faadf6a4a381cc
parente49537dcdf0b35f2425402f3b5d77669c9b4aed9 (diff)
downloadtar-258d1c44e5ee7c58b28bf0000e9d737df6081885.tar.gz
Avoid quadratic behavior with delayed links
Do this by searching a hash table instead of a linked list. Problem reported by Martin Dørum in https://mort.coffee/home/tar/ via Gavin Smith in: https://lists.gnu.org/r/bug-tar/2022-07/msg00003.html * src/extract.c: Include hash.h. Improve performance a bit on non-birthtime hosts (struct delayed_link.has_predecessor): New member. (delayed_link_head): Remove, replacing with ... (delayed_link_table): ... this new variable. All uses of linked list replaced with hash table. (dl_hash, dl_compare): New functions for hash table. (create_placeholder_file): Initialize has_predecessor. (apply_delayed_link): New function, with body taken from most of the old apply_delayed_link. (apply_delayed_links): Use it. Respect has_predecessor. Don’t bother freeing as we are about to exit.
-rw-r--r--src/extract.c233
1 files changed, 139 insertions, 94 deletions
diff --git a/src/extract.c b/src/extract.c
index 93ad719e..2813c961 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -22,6 +22,7 @@
#include <system.h>
#include <quotearg.h>
#include <errno.h>
+#include <hash.h>
#include <priv-set.h>
#include <root-uid.h>
#include <utimens.h>
@@ -128,12 +129,16 @@ struct delayed_set_stat
static struct delayed_set_stat *delayed_set_stat_head;
-/* List of links whose creation we have delayed. */
+/* A link whose creation we have delayed. */
struct delayed_link
{
- /* The next delayed link in the list. */
+ /* The next in a list of delayed links that should be made after
+ this delayed link. */
struct delayed_link *next;
+ /* Whether this delayed link has a predecessor in the NEXT list. */
+ bool has_predecessor;
+
/* The device, inode number and birthtime of the placeholder.
birthtime.tv_nsec is negative if the birthtime is not available.
Don't use mtime as this would allow for false matches if some
@@ -178,7 +183,7 @@ struct delayed_link
char target[1];
};
-static struct delayed_link *delayed_link_head;
+static Hash_table *delayed_link_table;
struct string_list
{
@@ -186,6 +191,25 @@ struct string_list
char string[1];
};
+static size_t
+dl_hash (void const *entry, size_t table_size)
+{
+ struct delayed_link const *dl = entry;
+ uintmax_t n = dl->dev;
+ int nshift = TYPE_WIDTH (n) - TYPE_WIDTH (dl->dev);
+ if (0 < nshift)
+ n <<= nshift;
+ n ^= dl->ino;
+ return n % table_size;
+}
+
+static bool
+dl_compare (void const *a, void const *b)
+{
+ struct delayed_link const *da = a, *db = b;
+ return (da->dev == db->dev) & (da->ino == db->ino);
+}
+
/* Set up to extract files. */
void
extr_init (void)
@@ -1349,23 +1373,21 @@ extract_file (char *file_name, int typeflag)
}
/* Find a delayed_link structure corresponding to the source NAME.
- Such a structure exists in the delayed link list only if the link
+ Such a structure exists in the delayed link table only if the link
placeholder file has been created. Therefore, try to stat the NAME
- first. If it doesn't exist, there is no matching entry in the list.
- Otherwise, look for the entry in list which has the matching dev
- and ino numbers.
+ first. If it doesn't exist, there is no matching entry in the table.
+ Otherwise, look for the entry in the table that has the matching dev
+ and ino numbers. Return a null pointer if not found.
- This approach avoids scanning the singly-linked list in obvious cases
- and does not rely on comparing file names, which may differ for
+ Do not rely on comparing file names, which may differ for
various reasons (e.g. relative vs. absolute file names).
*/
static struct delayed_link *
find_delayed_link_source (char const *name)
{
- struct delayed_link *dl;
struct stat st;
- if (!delayed_link_head)
+ if (!delayed_link_table)
return NULL;
if (fstatat (chdir_fd, name, &st, AT_SYMLINK_NOFOLLOW))
@@ -1375,12 +1397,10 @@ find_delayed_link_source (char const *name)
return NULL;
}
- for (dl = delayed_link_head; dl; dl = dl->next)
- {
- if (dl->dev == st.st_dev && dl->ino == st.st_ino)
- break;
- }
- return dl;
+ struct delayed_link dl;
+ dl.dev = st.st_dev;
+ dl.ino = st.st_ino;
+ return hash_lookup (delayed_link_table, &dl);
}
/* Create a placeholder file with name FILE_NAME, which will be
@@ -1388,9 +1408,7 @@ find_delayed_link_source (char const *name)
IS_SYMLINK is true, and by a hard link otherwise. Set
*INTERDIR_MADE if an intermediate directory is made in the
process.
- Install the created struct delayed_link after PREV, unless the
- latter is NULL, in which case insert it at the head of the delayed
- link list.
+ If PREV, install the created struct delayed_link after PREV.
*/
static int
@@ -1443,12 +1461,14 @@ create_placeholder_file (char *file_name, bool is_symlink, bool *interdir_made,
{
p->next = prev->next;
prev->next = p;
+ p->has_predecessor = true;
}
else
{
- p->next = delayed_link_head;
- delayed_link_head = p;
+ p->next = NULL;
+ p->has_predecessor = false;
}
+
p->dev = st.st_dev;
p->ino = st.st_ino;
#if HAVE_BIRTHTIME
@@ -1478,6 +1498,12 @@ create_placeholder_file (char *file_name, bool is_symlink, bool *interdir_made,
xattr_map_copy (&p->xattr_map, &current_stat_info.xattr_map);
strcpy (p->target, current_stat_info.link_name);
+ if (! ((delayed_link_table
+ || (delayed_link_table = hash_initialize (0, 0, dl_hash,
+ dl_compare, free)))
+ && hash_insert (delayed_link_table, p)))
+ xalloc_die ();
+
if ((h = find_direct_ancestor (file_name)) != NULL)
mark_after_links (h);
@@ -1512,13 +1538,14 @@ extract_link (char *file_name, int typeflag)
if (status == 0)
{
- struct delayed_link *ds = delayed_link_head;
- if (ds
+ if (delayed_link_table
&& fstatat (chdir_fd, link_name, &st1, AT_SYMLINK_NOFOLLOW) == 0)
- for (; ds; ds = ds->next)
- if (ds->change_dir == chdir_current
- && ds->dev == st1.st_dev
- && ds->ino == st1.st_ino
+ {
+ struct delayed_link dl1;
+ dl1.ino = st1.st_ino;
+ dl1.dev = st1.st_dev;
+ struct delayed_link *ds = hash_lookup (delayed_link_table, &dl1);
+ if (ds && ds->change_dir == chdir_current
&& BIRTHTIME_EQ (ds->birthtime, get_stat_birthtime (&st1)))
{
struct string_list *p = xmalloc (offsetof (struct string_list, string)
@@ -1526,8 +1553,9 @@ extract_link (char *file_name, int typeflag)
strcpy (p->string, file_name);
p->next = ds->sources;
ds->sources = p;
- break;
}
+ }
+
return 0;
}
else if ((e == EEXIST && strcmp (link_name, file_name) == 0)
@@ -1853,86 +1881,103 @@ extract_archive (void)
undo_last_backup ();
}
-/* Extract the links whose final extraction were delayed. */
+/* Extract the link DS whose final extraction was delayed. */
static void
-apply_delayed_links (void)
+apply_delayed_link (struct delayed_link *ds)
{
- struct delayed_link *ds;
+ struct string_list *sources = ds->sources;
+ char const *valid_source = 0;
- for (ds = delayed_link_head; ds; )
- {
- struct string_list *sources = ds->sources;
- char const *valid_source = 0;
+ chdir_do (ds->change_dir);
- chdir_do (ds->change_dir);
+ for (sources = ds->sources; sources; sources = sources->next)
+ {
+ char const *source = sources->string;
+ struct stat st;
- for (sources = ds->sources; sources; sources = sources->next)
+ /* Make sure the placeholder file is still there. If not,
+ don't create a link, as the placeholder was probably
+ removed by a later extraction. */
+ if (fstatat (chdir_fd, source, &st, AT_SYMLINK_NOFOLLOW) == 0
+ && st.st_dev == ds->dev
+ && st.st_ino == ds->ino
+ && BIRTHTIME_EQ (get_stat_birthtime (&st), ds->birthtime))
{
- char const *source = sources->string;
- struct stat st;
-
- /* Make sure the placeholder file is still there. If not,
- don't create a link, as the placeholder was probably
- removed by a later extraction. */
- if (fstatat (chdir_fd, source, &st, AT_SYMLINK_NOFOLLOW) == 0
- && st.st_dev == ds->dev
- && st.st_ino == ds->ino
- && BIRTHTIME_EQ (get_stat_birthtime (&st), ds->birthtime))
+ /* Unlink the placeholder, then create a hard link if possible,
+ a symbolic link otherwise. */
+ if (unlinkat (chdir_fd, source, 0) != 0)
+ unlink_error (source);
+ else if (valid_source
+ && (linkat (chdir_fd, valid_source, chdir_fd, source, 0)
+ == 0))
+ ;
+ else if (!ds->is_symlink)
{
- /* Unlink the placeholder, then create a hard link if possible,
- a symbolic link otherwise. */
- if (unlinkat (chdir_fd, source, 0) != 0)
- unlink_error (source);
- else if (valid_source
- && (linkat (chdir_fd, valid_source, chdir_fd, source, 0)
- == 0))
- ;
- else if (!ds->is_symlink)
- {
- if (linkat (chdir_fd, ds->target, chdir_fd, source, 0) != 0)
- link_error (ds->target, source);
- }
- else if (symlinkat (ds->target, chdir_fd, source) != 0)
- symlink_error (ds->target, source);
- else
- {
- struct tar_stat_info st1;
- st1.stat.st_mode = ds->mode;
- st1.stat.st_uid = ds->uid;
- st1.stat.st_gid = ds->gid;
- st1.atime = ds->atime;
- st1.mtime = ds->mtime;
- st1.cntx_name = ds->cntx_name;
- st1.acls_a_ptr = ds->acls_a_ptr;
- st1.acls_a_len = ds->acls_a_len;
- st1.acls_d_ptr = ds->acls_d_ptr;
- st1.acls_d_len = ds->acls_d_len;
- st1.xattr_map = ds->xattr_map;
- set_stat (source, &st1, -1, 0, 0, SYMTYPE,
- false, AT_SYMLINK_NOFOLLOW);
- valid_source = source;
- }
+ if (linkat (chdir_fd, ds->target, chdir_fd, source, 0) != 0)
+ link_error (ds->target, source);
+ }
+ else if (symlinkat (ds->target, chdir_fd, source) != 0)
+ symlink_error (ds->target, source);
+ else
+ {
+ struct tar_stat_info st1;
+ st1.stat.st_mode = ds->mode;
+ st1.stat.st_uid = ds->uid;
+ st1.stat.st_gid = ds->gid;
+ st1.atime = ds->atime;
+ st1.mtime = ds->mtime;
+ st1.cntx_name = ds->cntx_name;
+ st1.acls_a_ptr = ds->acls_a_ptr;
+ st1.acls_a_len = ds->acls_a_len;
+ st1.acls_d_ptr = ds->acls_d_ptr;
+ st1.acls_d_len = ds->acls_d_len;
+ st1.xattr_map = ds->xattr_map;
+ set_stat (source, &st1, -1, 0, 0, SYMTYPE,
+ false, AT_SYMLINK_NOFOLLOW);
+ valid_source = source;
}
}
+ }
- for (sources = ds->sources; sources; )
- {
- struct string_list *next = sources->next;
- free (sources);
- sources = next;
- }
+ for (sources = ds->sources; sources; )
+ {
+ struct string_list *next = sources->next;
+ free (sources);
+ sources = next;
+ }
+
+ xattr_map_free (&ds->xattr_map);
+ free (ds->cntx_name);
+}
- xattr_map_free (&ds->xattr_map);
- free (ds->cntx_name);
+/* Extract the links whose final extraction were delayed. */
+static void
+apply_delayed_links (void)
+{
+ if (!delayed_link_table)
+ return;
+ for (struct delayed_link *dl = hash_get_first (delayed_link_table);
+ dl;
+ dl = dl->next ? dl->next : hash_get_next (delayed_link_table, dl))
+ if (!dl->has_predecessor)
{
- struct delayed_link *next = ds->next;
- free (ds);
- ds = next;
+ struct delayed_link *ds = dl;
+ do
+ {
+ apply_delayed_link (ds);
+ ds = ds->next;
+ }
+ while (ds);
}
- }
- delayed_link_head = 0;
+ if (false)
+ {
+ /* There is little point to freeing, as we are about to exit,
+ and freeing is more likely to cause than cure trouble. */
+ hash_free (delayed_link_table);
+ delayed_link_table = NULL;
+ }
}
/* Finish the extraction of an archive. */