From bc277c7069cdebb6a6140326636555267b142e0e Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Sat, 3 Sep 2022 18:22:34 -0500 Subject: Fix data loss when acting as filter This bug was introduced by the recent lseek-related changes. * src/delete.c (delete_archive_members): * src/update.c (update_archive): Copy the member if acting as a filter, rather than lseeking over it, which is possible if stdin is a regular file. * src/list.c (skim_file, skim_member): * src/sparse.c (sparse_skim_file): New functions, for copying when a filter. * src/list.c (skip_file): Remove; replaced with skim_file. All callers changed. (skip_member): Reimplement in terms of skim_member. * src/sparse.c (sparse_skip_file): Remove; replaced with sparse_skim_file. All callers changed. * src/update.c (acting_as_filter): New static var. (update_archive): Set it; this is like delete.c. * tests/delete01.at (deleting a member after a big one): * tests/delete02.at (deleting a member from stdin archive): Also test filter case. --- src/common.h | 5 +++-- src/delete.c | 4 ++-- src/extract.c | 2 +- src/list.c | 22 ++++++++++++++++------ src/sparse.c | 6 +++--- src/update.c | 5 ++++- tests/delete01.at | 4 +++- tests/delete02.at | 4 +++- 8 files changed, 35 insertions(+), 17 deletions(-) diff --git a/src/common.h b/src/common.h index 24166524..259655f9 100644 --- a/src/common.h +++ b/src/common.h @@ -623,8 +623,9 @@ enum read_header read_header (union block **return_block, struct tar_stat_info *info, enum read_header_mode m); enum read_header tar_checksum (union block *header, bool silent); -void skip_file (off_t size); +void skim_file (off_t size, bool must_copy); void skip_member (void); +void skim_member (bool must_copy); /* Module misc.c. */ @@ -928,7 +929,7 @@ bool sparse_fixup_header (struct tar_stat_info *st); enum dump_status sparse_dump_file (int, struct tar_stat_info *st); enum dump_status sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size); -enum dump_status sparse_skip_file (struct tar_stat_info *st); +enum dump_status sparse_skim_file (struct tar_stat_info *st, bool must_copy); bool sparse_diff_file (int, struct tar_stat_info *st); /* Module utf8.c */ diff --git a/src/delete.c b/src/delete.c index 3bee5c65..dd6bc213 100644 --- a/src/delete.c +++ b/src/delete.c @@ -183,13 +183,13 @@ delete_archive_members (void) case HEADER_SUCCESS: if ((name = name_scan (current_stat_info.file_name)) == NULL) { - skip_member (); + skim_member (acting_as_filter); break; } name->found_count++; if (!ISFOUND (name)) { - skip_member (); + skim_member (acting_as_filter); break; } FALLTHROUGH; diff --git a/src/extract.c b/src/extract.c index 7696d5e4..78de47f5 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1320,7 +1320,7 @@ extract_file (char *file_name, int typeflag) } } - skip_file (size); + skim_file (size, false); mv_end (); diff --git a/src/list.c b/src/list.c index 409917d2..cbc0e31e 100644 --- a/src/list.c +++ b/src/list.c @@ -416,7 +416,7 @@ read_header (union block **return_block, struct tar_stat_info *info, size_t next_long_name_blocks = 0; size_t next_long_link_blocks = 0; enum read_header status = HEADER_SUCCESS; - + while (1) { header = find_next_block (); @@ -1391,15 +1391,17 @@ print_for_mkdir (char *dirname, int length, mode_t mode) } } -/* Skip over SIZE bytes of data in blocks in the archive. */ +/* Skip over SIZE bytes of data in blocks in the archive. + This may involve copying the data. + If MUST_COPY, always copy instead of skipping. */ void -skip_file (off_t size) +skim_file (off_t size, bool must_copy) { union block *x; /* FIXME: Make sure mv_begin_read is always called before it */ - if (seekable_archive) + if (seekable_archive && !must_copy) { off_t nblk = seek_archive (size); if (nblk >= 0) @@ -1426,6 +1428,14 @@ skip_file (off_t size) NOTE: Current header must be decoded before calling this function. */ void skip_member (void) +{ + skim_member (false); +} + +/* Skip the current member in the archive. + If MUST_COPY, always copy instead of skipping. */ +void +skim_member (bool must_copy) { if (!current_stat_info.skipped) { @@ -1435,9 +1445,9 @@ skip_member (void) mv_begin_read (¤t_stat_info); if (current_stat_info.is_sparse) - sparse_skip_file (¤t_stat_info); + sparse_skim_file (¤t_stat_info, must_copy); else if (save_typeflag != DIRTYPE) - skip_file (current_stat_info.stat.st_size); + skim_file (current_stat_info.stat.st_size, must_copy); mv_end (); } diff --git a/src/sparse.c b/src/sparse.c index 0b9f250f..767793b0 100644 --- a/src/sparse.c +++ b/src/sparse.c @@ -586,7 +586,7 @@ sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size) } enum dump_status -sparse_skip_file (struct tar_stat_info *st) +sparse_skim_file (struct tar_stat_info *st, bool must_copy) { bool rc = true; struct tar_sparse_file file; @@ -598,7 +598,7 @@ sparse_skip_file (struct tar_stat_info *st) file.fd = -1; rc = tar_sparse_decode_header (&file); - skip_file (file.stat_info->archive_file_size - file.dumped_size); + skim_file (file.stat_info->archive_file_size - file.dumped_size, must_copy); return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; } @@ -721,7 +721,7 @@ sparse_diff_file (int fd, struct tar_stat_info *st) } if (!rc) - skip_file (file.stat_info->archive_file_size - file.dumped_size); + skim_file (file.stat_info->archive_file_size - file.dumped_size, false); mv_end (); tar_sparse_done (&file); diff --git a/src/update.c b/src/update.c index eece0365..5424e2ce 100644 --- a/src/update.c +++ b/src/update.c @@ -42,6 +42,8 @@ bool time_to_start_writing; first part of the record. */ char *output_start; +static bool acting_as_filter; + /* Catenate file FILE_NAME to the archive without creating a header for it. It had better be a tar file or the archive is screwed. */ static void @@ -110,6 +112,7 @@ update_archive (void) name_gather (); open_archive (ACCESS_UPDATE); + acting_as_filter = strcmp (archive_name_array[0], "-") == 0; xheader_forbid_global (); while (!found_end) @@ -166,7 +169,7 @@ update_archive (void) } } - skip_member (); + skim_member (acting_as_filter); break; } diff --git a/tests/delete01.at b/tests/delete01.at index f323975e..251b5497 100644 --- a/tests/delete01.at +++ b/tests/delete01.at @@ -27,8 +27,10 @@ AT_TAR_CHECK([ genfile -l 50000 --file file1 genfile -l 1024 --file file2 tar cf archive file1 file2 +tar -f - --delete file2 archout +tar tf archout tar f archive --delete file2 -tar tf archive], +cmp archive archout], [0], [file1 ]) diff --git a/tests/delete02.at b/tests/delete02.at index 694c85d7..a21cd6d4 100644 --- a/tests/delete02.at +++ b/tests/delete02.at @@ -31,7 +31,9 @@ tar cf archive 1 2 3 tar tf archive cat archive | tar f - --delete 2 > archive2 echo separator -tar tf archive2], +tar tf archive2 +tar f - --delete 2 < archive > archive3 +cmp archive2 archive3], [0], [1 2 -- cgit v1.2.1