From d4fdeab4db0d0e699c8fbbb07f12c4e1f64d0f94 Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Mon, 16 May 2016 09:22:21 +0000 Subject: tar-1.29 --- src/sparse.c | 1295 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1295 insertions(+) create mode 100644 src/sparse.c (limited to 'src/sparse.c') diff --git a/src/sparse.c b/src/sparse.c new file mode 100644 index 0000000..4e78401 --- /dev/null +++ b/src/sparse.c @@ -0,0 +1,1295 @@ +/* Functions for dealing with sparse files + + Copyright 2003-2007, 2010, 2013-2016 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any later + version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see . */ + +#include +#include +#include +#include "common.h" + +struct tar_sparse_file; +static bool sparse_select_optab (struct tar_sparse_file *file); + +enum sparse_scan_state + { + scan_begin, + scan_block, + scan_end + }; + +struct tar_sparse_optab +{ + bool (*init) (struct tar_sparse_file *); + bool (*done) (struct tar_sparse_file *); + bool (*sparse_member_p) (struct tar_sparse_file *); + bool (*dump_header) (struct tar_sparse_file *); + bool (*fixup_header) (struct tar_sparse_file *); + bool (*decode_header) (struct tar_sparse_file *); + bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state, + void *); + bool (*dump_region) (struct tar_sparse_file *, size_t); + bool (*extract_region) (struct tar_sparse_file *, size_t); +}; + +struct tar_sparse_file +{ + int fd; /* File descriptor */ + bool seekable; /* Is fd seekable? */ + off_t offset; /* Current offset in fd if seekable==false. + Otherwise unused */ + off_t dumped_size; /* Number of bytes actually written + to the archive */ + struct tar_stat_info *stat_info; /* Information about the file */ + struct tar_sparse_optab const *optab; /* Operation table */ + void *closure; /* Any additional data optab calls might + require */ +}; + +/* Dump zeros to file->fd until offset is reached. It is used instead of + lseek if the output file is not seekable */ +static bool +dump_zeros (struct tar_sparse_file *file, off_t offset) +{ + static char const zero_buf[BLOCKSIZE]; + + if (offset < file->offset) + { + errno = EINVAL; + return false; + } + + while (file->offset < offset) + { + size_t size = (BLOCKSIZE < offset - file->offset + ? BLOCKSIZE + : offset - file->offset); + ssize_t wrbytes; + + wrbytes = write (file->fd, zero_buf, size); + if (wrbytes <= 0) + { + if (wrbytes == 0) + errno = EINVAL; + return false; + } + file->offset += wrbytes; + } + + return true; +} + +static bool +tar_sparse_member_p (struct tar_sparse_file *file) +{ + if (file->optab->sparse_member_p) + return file->optab->sparse_member_p (file); + return false; +} + +static bool +tar_sparse_init (struct tar_sparse_file *file) +{ + memset (file, 0, sizeof *file); + + if (!sparse_select_optab (file)) + return false; + + if (file->optab->init) + return file->optab->init (file); + + return true; +} + +static bool +tar_sparse_done (struct tar_sparse_file *file) +{ + if (file->optab->done) + return file->optab->done (file); + return true; +} + +static bool +tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state, + void *block) +{ + if (file->optab->scan_block) + return file->optab->scan_block (file, state, block); + return true; +} + +static bool +tar_sparse_dump_region (struct tar_sparse_file *file, size_t i) +{ + if (file->optab->dump_region) + return file->optab->dump_region (file, i); + return false; +} + +static bool +tar_sparse_extract_region (struct tar_sparse_file *file, size_t i) +{ + if (file->optab->extract_region) + return file->optab->extract_region (file, i); + return false; +} + +static bool +tar_sparse_dump_header (struct tar_sparse_file *file) +{ + if (file->optab->dump_header) + return file->optab->dump_header (file); + return false; +} + +static bool +tar_sparse_decode_header (struct tar_sparse_file *file) +{ + if (file->optab->decode_header) + return file->optab->decode_header (file); + return true; +} + +static bool +tar_sparse_fixup_header (struct tar_sparse_file *file) +{ + if (file->optab->fixup_header) + return file->optab->fixup_header (file); + return true; +} + + +static bool +lseek_or_error (struct tar_sparse_file *file, off_t offset) +{ + if (file->seekable + ? lseek (file->fd, offset, SEEK_SET) < 0 + : ! dump_zeros (file, offset)) + { + seek_diag_details (file->stat_info->orig_file_name, offset); + return false; + } + return true; +} + +/* Takes a blockful of data and basically cruises through it to see if + it's made *entirely* of zeros, returning a 0 the instant it finds + something that is a nonzero, i.e., useful data. */ +static bool +zero_block_p (char const *buffer, size_t size) +{ + while (size--) + if (*buffer++) + return false; + return true; +} + +static void +sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp) +{ + struct sp_array *sparse_map = st->sparse_map; + size_t avail = st->sparse_map_avail; + if (avail == st->sparse_map_size) + st->sparse_map = sparse_map = + x2nrealloc (sparse_map, &st->sparse_map_size, sizeof *sparse_map); + sparse_map[avail] = *sp; + st->sparse_map_avail = avail + 1; +} + +/* Scan the sparse file byte-by-byte and create its map. */ +static bool +sparse_scan_file_raw (struct tar_sparse_file *file) +{ + struct tar_stat_info *st = file->stat_info; + int fd = file->fd; + char buffer[BLOCKSIZE]; + size_t count = 0; + off_t offset = 0; + struct sp_array sp = {0, 0}; + + st->archive_file_size = 0; + + if (!tar_sparse_scan (file, scan_begin, NULL)) + return false; + + while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0 + && count != SAFE_READ_ERROR) + { + /* Analyze the block. */ + if (zero_block_p (buffer, count)) + { + if (sp.numbytes) + { + sparse_add_map (st, &sp); + sp.numbytes = 0; + if (!tar_sparse_scan (file, scan_block, NULL)) + return false; + } + } + else + { + if (sp.numbytes == 0) + sp.offset = offset; + sp.numbytes += count; + st->archive_file_size += count; + if (!tar_sparse_scan (file, scan_block, buffer)) + return false; + } + + offset += count; + } + + /* save one more sparse segment of length 0 to indicate that + the file ends with a hole */ + if (sp.numbytes == 0) + sp.offset = offset; + + sparse_add_map (st, &sp); + st->archive_file_size += count; + return tar_sparse_scan (file, scan_end, NULL); +} + +static bool +sparse_scan_file_wholesparse (struct tar_sparse_file *file) +{ + struct tar_stat_info *st = file->stat_info; + struct sp_array sp = {0, 0}; + + /* Note that this function is called only for truly sparse files of size >= 1 + block size (checked via ST_IS_SPARSE before). See the thread + http://www.mail-archive.com/bug-tar@gnu.org/msg04209.html for more info */ + if (ST_NBLOCKS (st->stat) == 0) + { + st->archive_file_size = 0; + sp.offset = st->stat.st_size; + sparse_add_map (st, &sp); + return true; + } + + return false; +} + +#ifdef SEEK_HOLE +/* Try to engage SEEK_HOLE/SEEK_DATA feature. */ +static bool +sparse_scan_file_seek (struct tar_sparse_file *file) +{ + struct tar_stat_info *st = file->stat_info; + int fd = file->fd; + struct sp_array sp = {0, 0}; + off_t offset = 0; + off_t data_offset; + off_t hole_offset; + + st->archive_file_size = 0; + + for (;;) + { + /* locate first chunk of data */ + data_offset = lseek (fd, offset, SEEK_DATA); + + if (data_offset == (off_t)-1) + /* ENXIO == EOF; error otherwise */ + { + if (errno == ENXIO) + { + /* file ends with hole, add one more empty chunk of data */ + sp.numbytes = 0; + sp.offset = st->stat.st_size; + sparse_add_map (st, &sp); + return true; + } + return false; + } + + hole_offset = lseek (fd, data_offset, SEEK_HOLE); + + /* according to specs, if FS does not fully support + SEEK_DATA/SEEK_HOLE it may just implement kind of "wrapper" around + classic lseek() call. We must detect it here and try to use other + hole-detection methods. */ + if (offset == 0 /* first loop */ + && data_offset == 0 + && hole_offset == st->stat.st_size) + { + lseek (fd, 0, SEEK_SET); + return false; + } + + sp.offset = data_offset; + sp.numbytes = hole_offset - data_offset; + sparse_add_map (st, &sp); + + st->archive_file_size += sp.numbytes; + offset = hole_offset; + } + + return true; +} +#endif + +static bool +sparse_scan_file (struct tar_sparse_file *file) +{ + /* always check for completely sparse files */ + if (sparse_scan_file_wholesparse (file)) + return true; + + switch (hole_detection) + { + case HOLE_DETECTION_DEFAULT: + case HOLE_DETECTION_SEEK: +#ifdef SEEK_HOLE + if (sparse_scan_file_seek (file)) + return true; +#else + if (hole_detection == HOLE_DETECTION_SEEK) + WARN((0, 0, + _("\"seek\" hole detection is not supported, using \"raw\"."))); + /* fall back to "raw" for this and all other files */ + hole_detection = HOLE_DETECTION_RAW; +#endif + case HOLE_DETECTION_RAW: + if (sparse_scan_file_raw (file)) + return true; + } + + return false; +} + +static struct tar_sparse_optab const oldgnu_optab; +static struct tar_sparse_optab const star_optab; +static struct tar_sparse_optab const pax_optab; + +static bool +sparse_select_optab (struct tar_sparse_file *file) +{ + switch (current_format == DEFAULT_FORMAT ? archive_format : current_format) + { + case V7_FORMAT: + case USTAR_FORMAT: + return false; + + case OLDGNU_FORMAT: + case GNU_FORMAT: /*FIXME: This one should disappear? */ + file->optab = &oldgnu_optab; + break; + + case POSIX_FORMAT: + file->optab = &pax_optab; + break; + + case STAR_FORMAT: + file->optab = &star_optab; + break; + + default: + return false; + } + return true; +} + +static bool +sparse_dump_region (struct tar_sparse_file *file, size_t i) +{ + union block *blk; + off_t bytes_left = file->stat_info->sparse_map[i].numbytes; + + if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset)) + return false; + + while (bytes_left > 0) + { + size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left; + size_t bytes_read; + + blk = find_next_block (); + bytes_read = safe_read (file->fd, blk->buffer, bufsize); + if (bytes_read == SAFE_READ_ERROR) + { + read_diag_details (file->stat_info->orig_file_name, + (file->stat_info->sparse_map[i].offset + + file->stat_info->sparse_map[i].numbytes + - bytes_left), + bufsize); + return false; + } + + memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read); + bytes_left -= bytes_read; + file->dumped_size += bytes_read; + set_next_block_after (blk); + } + + return true; +} + +static bool +sparse_extract_region (struct tar_sparse_file *file, size_t i) +{ + off_t write_size; + + if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset)) + return false; + + write_size = file->stat_info->sparse_map[i].numbytes; + + if (write_size == 0) + { + /* Last block of the file is a hole */ + if (file->seekable && sys_truncate (file->fd)) + truncate_warn (file->stat_info->orig_file_name); + } + else while (write_size > 0) + { + size_t count; + size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size; + union block *blk = find_next_block (); + if (!blk) + { + ERROR ((0, 0, _("Unexpected EOF in archive"))); + return false; + } + set_next_block_after (blk); + count = blocking_write (file->fd, blk->buffer, wrbytes); + write_size -= count; + file->dumped_size += count; + mv_size_left (file->stat_info->archive_file_size - file->dumped_size); + file->offset += count; + if (count != wrbytes) + { + write_error_details (file->stat_info->orig_file_name, + count, wrbytes); + return false; + } + } + return true; +} + + + +/* Interface functions */ +enum dump_status +sparse_dump_file (int fd, struct tar_stat_info *st) +{ + bool rc; + struct tar_sparse_file file; + + if (!tar_sparse_init (&file)) + return dump_status_not_implemented; + + file.stat_info = st; + file.fd = fd; + file.seekable = true; /* File *must* be seekable for dump to work */ + + rc = sparse_scan_file (&file); + if (rc && file.optab->dump_region) + { + tar_sparse_dump_header (&file); + + if (fd >= 0) + { + size_t i; + + mv_begin_write (file.stat_info->file_name, + file.stat_info->stat.st_size, + file.stat_info->archive_file_size - file.dumped_size); + for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++) + rc = tar_sparse_dump_region (&file, i); + } + } + + pad_archive (file.stat_info->archive_file_size - file.dumped_size); + return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; +} + +bool +sparse_member_p (struct tar_stat_info *st) +{ + struct tar_sparse_file file; + + if (!tar_sparse_init (&file)) + return false; + file.stat_info = st; + return tar_sparse_member_p (&file); +} + +bool +sparse_fixup_header (struct tar_stat_info *st) +{ + struct tar_sparse_file file; + + if (!tar_sparse_init (&file)) + return false; + file.stat_info = st; + return tar_sparse_fixup_header (&file); +} + +enum dump_status +sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size) +{ + bool rc = true; + struct tar_sparse_file file; + size_t i; + + if (!tar_sparse_init (&file)) + return dump_status_not_implemented; + + file.stat_info = st; + file.fd = fd; + file.seekable = lseek (fd, 0, SEEK_SET) == 0; + file.offset = 0; + + rc = tar_sparse_decode_header (&file); + for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++) + rc = tar_sparse_extract_region (&file, i); + *size = file.stat_info->archive_file_size - file.dumped_size; + return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; +} + +enum dump_status +sparse_skip_file (struct tar_stat_info *st) +{ + bool rc = true; + struct tar_sparse_file file; + + if (!tar_sparse_init (&file)) + return dump_status_not_implemented; + + file.stat_info = st; + file.fd = -1; + + rc = tar_sparse_decode_header (&file); + skip_file (file.stat_info->archive_file_size - file.dumped_size); + return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; +} + + +static bool +check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end) +{ + if (!lseek_or_error (file, beg)) + return false; + + while (beg < end) + { + size_t bytes_read; + size_t rdsize = BLOCKSIZE < end - beg ? BLOCKSIZE : end - beg; + char diff_buffer[BLOCKSIZE]; + + bytes_read = safe_read (file->fd, diff_buffer, rdsize); + if (bytes_read == SAFE_READ_ERROR) + { + read_diag_details (file->stat_info->orig_file_name, + beg, + rdsize); + return false; + } + if (!zero_block_p (diff_buffer, bytes_read)) + { + char begbuf[INT_BUFSIZE_BOUND (off_t)]; + report_difference (file->stat_info, + _("File fragment at %s is not a hole"), + offtostr (beg, begbuf)); + return false; + } + + beg += bytes_read; + } + return true; +} + +static bool +check_data_region (struct tar_sparse_file *file, size_t i) +{ + off_t size_left; + + if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset)) + return false; + size_left = file->stat_info->sparse_map[i].numbytes; + mv_size_left (file->stat_info->archive_file_size - file->dumped_size); + + while (size_left > 0) + { + size_t bytes_read; + size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left; + char diff_buffer[BLOCKSIZE]; + + union block *blk = find_next_block (); + if (!blk) + { + ERROR ((0, 0, _("Unexpected EOF in archive"))); + return false; + } + set_next_block_after (blk); + bytes_read = safe_read (file->fd, diff_buffer, rdsize); + if (bytes_read == SAFE_READ_ERROR) + { + read_diag_details (file->stat_info->orig_file_name, + (file->stat_info->sparse_map[i].offset + + file->stat_info->sparse_map[i].numbytes + - size_left), + rdsize); + return false; + } + file->dumped_size += bytes_read; + size_left -= bytes_read; + mv_size_left (file->stat_info->archive_file_size - file->dumped_size); + if (memcmp (blk->buffer, diff_buffer, rdsize)) + { + report_difference (file->stat_info, _("Contents differ")); + return false; + } + } + return true; +} + +bool +sparse_diff_file (int fd, struct tar_stat_info *st) +{ + bool rc = true; + struct tar_sparse_file file; + size_t i; + off_t offset = 0; + + if (!tar_sparse_init (&file)) + return dump_status_not_implemented; + + file.stat_info = st; + file.fd = fd; + file.seekable = true; /* File *must* be seekable for compare to work */ + + rc = tar_sparse_decode_header (&file); + mv_begin_read (st); + for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++) + { + rc = check_sparse_region (&file, + offset, file.stat_info->sparse_map[i].offset) + && check_data_region (&file, i); + offset = file.stat_info->sparse_map[i].offset + + file.stat_info->sparse_map[i].numbytes; + } + + if (!rc) + skip_file (file.stat_info->archive_file_size - file.dumped_size); + mv_end (); + + tar_sparse_done (&file); + return rc; +} + + +/* Old GNU Format. The sparse file information is stored in the + oldgnu_header in the following manner: + + The header is marked with type 'S'. Its 'size' field contains + the cumulative size of all non-empty blocks of the file. The + actual file size is stored in 'realsize' member of oldgnu_header. + + The map of the file is stored in a list of 'struct sparse'. + Each struct contains offset to the block of data and its + size (both as octal numbers). The first file header contains + at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map + contains more structs, then the field 'isextended' of the main + header is set to 1 (binary) and the 'struct sparse_header' + header follows, containing at most 21 following structs + (SPARSES_IN_SPARSE_HEADER). If more structs follow, 'isextended' + field of the extended header is set and next next extension header + follows, etc... */ + +enum oldgnu_add_status + { + add_ok, + add_finish, + add_fail + }; + +static bool +oldgnu_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused))) +{ + return current_header->header.typeflag == GNUTYPE_SPARSE; +} + +/* Add a sparse item to the sparse file and its obstack */ +static enum oldgnu_add_status +oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s) +{ + struct sp_array sp; + + if (s->numbytes[0] == '\0') + return add_finish; + sp.offset = OFF_FROM_HEADER (s->offset); + sp.numbytes = OFF_FROM_HEADER (s->numbytes); + if (sp.offset < 0 || sp.numbytes < 0 + || INT_ADD_OVERFLOW (sp.offset, sp.numbytes) + || file->stat_info->stat.st_size < sp.offset + sp.numbytes + || file->stat_info->archive_file_size < 0) + return add_fail; + + sparse_add_map (file->stat_info, &sp); + return add_ok; +} + +static bool +oldgnu_fixup_header (struct tar_sparse_file *file) +{ + /* NOTE! st_size was initialized from the header + which actually contains archived size. The following fixes it */ + off_t realsize = OFF_FROM_HEADER (current_header->oldgnu_header.realsize); + file->stat_info->archive_file_size = file->stat_info->stat.st_size; + file->stat_info->stat.st_size = max (0, realsize); + return 0 <= realsize; +} + +/* Convert old GNU format sparse data to internal representation */ +static bool +oldgnu_get_sparse_info (struct tar_sparse_file *file) +{ + size_t i; + union block *h = current_header; + int ext_p; + enum oldgnu_add_status rc; + + file->stat_info->sparse_map_avail = 0; + for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++) + { + rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]); + if (rc != add_ok) + break; + } + + for (ext_p = h->oldgnu_header.isextended; + rc == add_ok && ext_p; ext_p = h->sparse_header.isextended) + { + h = find_next_block (); + if (!h) + { + ERROR ((0, 0, _("Unexpected EOF in archive"))); + return false; + } + set_next_block_after (h); + for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++) + rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]); + } + + if (rc == add_fail) + { + ERROR ((0, 0, _("%s: invalid sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + return true; +} + +static void +oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex, + struct sparse *sp, size_t sparse_size) +{ + for (; *pindex < file->stat_info->sparse_map_avail + && sparse_size > 0; sparse_size--, sp++, ++*pindex) + { + OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset, + sp->offset); + OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes, + sp->numbytes); + } +} + +static bool +oldgnu_dump_header (struct tar_sparse_file *file) +{ + off_t block_ordinal = current_block_ordinal (); + union block *blk; + size_t i; + + blk = start_header (file->stat_info); + blk->header.typeflag = GNUTYPE_SPARSE; + if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER) + blk->oldgnu_header.isextended = 1; + + /* Store the real file size */ + OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize); + /* Store the effective (shrunken) file size */ + OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size); + + i = 0; + oldgnu_store_sparse_info (file, &i, + blk->oldgnu_header.sp, + SPARSES_IN_OLDGNU_HEADER); + blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail; + finish_header (file->stat_info, blk, block_ordinal); + + while (i < file->stat_info->sparse_map_avail) + { + blk = find_next_block (); + memset (blk->buffer, 0, BLOCKSIZE); + oldgnu_store_sparse_info (file, &i, + blk->sparse_header.sp, + SPARSES_IN_SPARSE_HEADER); + if (i < file->stat_info->sparse_map_avail) + blk->sparse_header.isextended = 1; + set_next_block_after (blk); + } + return true; +} + +static struct tar_sparse_optab const oldgnu_optab = { + NULL, /* No init function */ + NULL, /* No done function */ + oldgnu_sparse_member_p, + oldgnu_dump_header, + oldgnu_fixup_header, + oldgnu_get_sparse_info, + NULL, /* No scan_block function */ + sparse_dump_region, + sparse_extract_region, +}; + + +/* Star */ + +static bool +star_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused))) +{ + return current_header->header.typeflag == GNUTYPE_SPARSE; +} + +static bool +star_fixup_header (struct tar_sparse_file *file) +{ + /* NOTE! st_size was initialized from the header + which actually contains archived size. The following fixes it */ + off_t realsize = OFF_FROM_HEADER (current_header->star_in_header.realsize); + file->stat_info->archive_file_size = file->stat_info->stat.st_size; + file->stat_info->stat.st_size = max (0, realsize); + return 0 <= realsize; +} + +/* Convert STAR format sparse data to internal representation */ +static bool +star_get_sparse_info (struct tar_sparse_file *file) +{ + size_t i; + union block *h = current_header; + int ext_p; + enum oldgnu_add_status rc = add_ok; + + file->stat_info->sparse_map_avail = 0; + + if (h->star_in_header.prefix[0] == '\0' + && h->star_in_header.sp[0].offset[10] != '\0') + { + /* Old star format */ + for (i = 0; i < SPARSES_IN_STAR_HEADER; i++) + { + rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]); + if (rc != add_ok) + break; + } + ext_p = h->star_in_header.isextended; + } + else + ext_p = 1; + + for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended) + { + h = find_next_block (); + if (!h) + { + ERROR ((0, 0, _("Unexpected EOF in archive"))); + return false; + } + set_next_block_after (h); + for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++) + rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]); + file->dumped_size += BLOCKSIZE; + } + + if (rc == add_fail) + { + ERROR ((0, 0, _("%s: invalid sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + return true; +} + + +static struct tar_sparse_optab const star_optab = { + NULL, /* No init function */ + NULL, /* No done function */ + star_sparse_member_p, + NULL, + star_fixup_header, + star_get_sparse_info, + NULL, /* No scan_block function */ + NULL, /* No dump region function */ + sparse_extract_region, +}; + + +/* GNU PAX sparse file format. There are several versions: + + * 0.0 + + The initial version of sparse format used by tar 1.14-1.15.1. + The sparse file map is stored in x header: + + GNU.sparse.size Real size of the stored file + GNU.sparse.numblocks Number of blocks in the sparse map + repeat numblocks time + GNU.sparse.offset Offset of the next data block + GNU.sparse.numbytes Size of the next data block + end repeat + + This has been reported as conflicting with the POSIX specs. The reason is + that offsets and sizes of non-zero data blocks were stored in multiple + instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas + POSIX requires the latest occurrence of the variable to override all + previous occurrences. + + To avoid this incompatibility two following versions were introduced. + + * 0.1 + + Used by tar 1.15.2 -- 1.15.91 (alpha releases). + + The sparse file map is stored in + x header: + + GNU.sparse.size Real size of the stored file + GNU.sparse.numblocks Number of blocks in the sparse map + GNU.sparse.map Map of non-null data chunks. A string consisting + of comma-separated values "offset,size[,offset,size]..." + + The resulting GNU.sparse.map string can be *very* long. While POSIX does not + impose any limit on the length of a x header variable, this can confuse some + tars. + + * 1.0 + + Starting from this version, the exact sparse format version is specified + explicitely in the header using the following variables: + + GNU.sparse.major Major version + GNU.sparse.minor Minor version + + X header keeps the following variables: + + GNU.sparse.name Real file name of the sparse file + GNU.sparse.realsize Real size of the stored file (corresponds to the old + GNU.sparse.size variable) + + The name field of the ustar header is constructed using the pattern + "%d/GNUSparseFile.%p/%f". + + The sparse map itself is stored in the file data block, preceding the actual + file data. It consists of a series of octal numbers of arbitrary length, + delimited by newlines. The map is padded with nulls to the nearest block + boundary. + + The first number gives the number of entries in the map. Following are map + entries, each one consisting of two numbers giving the offset and size of + the data block it describes. + + The format is designed in such a way that non-posix aware tars and tars not + supporting GNU.sparse.* keywords will extract each sparse file in its + condensed form with the file map attached and will place it into a separate + directory. Then, using a simple program it would be possible to expand the + file to its original form even without GNU tar. + + Bu default, v.1.0 archives are created. To use other formats, + --sparse-version option is provided. Additionally, v.0.0 can be obtained + by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1 + --pax-option delete=GNU.sparse.map +*/ + +static bool +pax_sparse_member_p (struct tar_sparse_file *file) +{ + return file->stat_info->sparse_map_avail > 0 + || file->stat_info->sparse_major > 0; +} + +/* Start a header that uses the effective (shrunken) file size. */ +static union block * +pax_start_header (struct tar_stat_info *st) +{ + off_t realsize = st->stat.st_size; + union block *blk; + st->stat.st_size = st->archive_file_size; + blk = start_header (st); + st->stat.st_size = realsize; + return blk; +} + +static bool +pax_dump_header_0 (struct tar_sparse_file *file) +{ + off_t block_ordinal = current_block_ordinal (); + union block *blk; + size_t i; + char nbuf[UINTMAX_STRSIZE_BOUND]; + struct sp_array *map = file->stat_info->sparse_map; + char *save_file_name = NULL; + + /* Store the real file size */ + xheader_store ("GNU.sparse.size", file->stat_info, NULL); + xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL); + + if (xheader_keyword_deleted_p ("GNU.sparse.map") + || tar_sparse_minor == 0) + { + for (i = 0; i < file->stat_info->sparse_map_avail; i++) + { + xheader_store ("GNU.sparse.offset", file->stat_info, &i); + xheader_store ("GNU.sparse.numbytes", file->stat_info, &i); + } + } + else + { + xheader_store ("GNU.sparse.name", file->stat_info, NULL); + save_file_name = file->stat_info->file_name; + file->stat_info->file_name = xheader_format_name (file->stat_info, + "%d/GNUSparseFile.%p/%f", 0); + + xheader_string_begin (&file->stat_info->xhdr); + for (i = 0; i < file->stat_info->sparse_map_avail; i++) + { + if (i) + xheader_string_add (&file->stat_info->xhdr, ","); + xheader_string_add (&file->stat_info->xhdr, + umaxtostr (map[i].offset, nbuf)); + xheader_string_add (&file->stat_info->xhdr, ","); + xheader_string_add (&file->stat_info->xhdr, + umaxtostr (map[i].numbytes, nbuf)); + } + if (!xheader_string_end (&file->stat_info->xhdr, + "GNU.sparse.map")) + { + free (file->stat_info->file_name); + file->stat_info->file_name = save_file_name; + return false; + } + } + blk = pax_start_header (file->stat_info); + finish_header (file->stat_info, blk, block_ordinal); + if (save_file_name) + { + free (file->stat_info->file_name); + file->stat_info->file_name = save_file_name; + } + return true; +} + +static bool +pax_dump_header_1 (struct tar_sparse_file *file) +{ + off_t block_ordinal = current_block_ordinal (); + union block *blk; + char *p, *q; + size_t i; + char nbuf[UINTMAX_STRSIZE_BOUND]; + off_t size = 0; + struct sp_array *map = file->stat_info->sparse_map; + char *save_file_name = file->stat_info->file_name; + +#define COPY_STRING(b,dst,src) do \ + { \ + char *endp = b->buffer + BLOCKSIZE; \ + char const *srcp = src; \ + while (*srcp) \ + { \ + if (dst == endp) \ + { \ + set_next_block_after (b); \ + b = find_next_block (); \ + dst = b->buffer; \ + endp = b->buffer + BLOCKSIZE; \ + } \ + *dst++ = *srcp++; \ + } \ + } while (0) + + /* Compute stored file size */ + p = umaxtostr (file->stat_info->sparse_map_avail, nbuf); + size += strlen (p) + 1; + for (i = 0; i < file->stat_info->sparse_map_avail; i++) + { + p = umaxtostr (map[i].offset, nbuf); + size += strlen (p) + 1; + p = umaxtostr (map[i].numbytes, nbuf); + size += strlen (p) + 1; + } + size = (size + BLOCKSIZE - 1) / BLOCKSIZE; + file->stat_info->archive_file_size += size * BLOCKSIZE; + file->dumped_size += size * BLOCKSIZE; + + /* Store sparse file identification */ + xheader_store ("GNU.sparse.major", file->stat_info, NULL); + xheader_store ("GNU.sparse.minor", file->stat_info, NULL); + xheader_store ("GNU.sparse.name", file->stat_info, NULL); + xheader_store ("GNU.sparse.realsize", file->stat_info, NULL); + + file->stat_info->file_name = + xheader_format_name (file->stat_info, "%d/GNUSparseFile.%p/%f", 0); + /* Make sure the created header name is shorter than NAME_FIELD_SIZE: */ + if (strlen (file->stat_info->file_name) > NAME_FIELD_SIZE) + file->stat_info->file_name[NAME_FIELD_SIZE] = 0; + + blk = pax_start_header (file->stat_info); + finish_header (file->stat_info, blk, block_ordinal); + free (file->stat_info->file_name); + file->stat_info->file_name = save_file_name; + + blk = find_next_block (); + q = blk->buffer; + p = umaxtostr (file->stat_info->sparse_map_avail, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + for (i = 0; i < file->stat_info->sparse_map_avail; i++) + { + p = umaxtostr (map[i].offset, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + p = umaxtostr (map[i].numbytes, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + } + memset (q, 0, BLOCKSIZE - (q - blk->buffer)); + set_next_block_after (blk); + return true; +} + +static bool +pax_dump_header (struct tar_sparse_file *file) +{ + file->stat_info->sparse_major = tar_sparse_major; + file->stat_info->sparse_minor = tar_sparse_minor; + + return (file->stat_info->sparse_major == 0) ? + pax_dump_header_0 (file) : pax_dump_header_1 (file); +} + +static bool +decode_num (uintmax_t *num, char const *arg, uintmax_t maxval) +{ + uintmax_t u; + char *arg_lim; + + if (!ISDIGIT (*arg)) + return false; + + errno = 0; + u = strtoumax (arg, &arg_lim, 10); + + if (! (u <= maxval && errno != ERANGE) || *arg_lim) + return false; + + *num = u; + return true; +} + +static bool +pax_decode_header (struct tar_sparse_file *file) +{ + if (file->stat_info->sparse_major > 0) + { + uintmax_t u; + char nbuf[UINTMAX_STRSIZE_BOUND]; + union block *blk; + char *p; + size_t i; + +#define COPY_BUF(b,buf,src) do \ + { \ + char *endp = b->buffer + BLOCKSIZE; \ + char *dst = buf; \ + do \ + { \ + if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \ + { \ + ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \ + file->stat_info->orig_file_name)); \ + return false; \ + } \ + if (src == endp) \ + { \ + set_next_block_after (b); \ + file->dumped_size += BLOCKSIZE; \ + b = find_next_block (); \ + src = b->buffer; \ + endp = b->buffer + BLOCKSIZE; \ + } \ + *dst = *src++; \ + } \ + while (*dst++ != '\n'); \ + dst[-1] = 0; \ + } while (0) + + set_next_block_after (current_header); + file->dumped_size += BLOCKSIZE; + blk = find_next_block (); + p = blk->buffer; + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + file->stat_info->sparse_map_size = u; + file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size, + sizeof (*file->stat_info->sparse_map)); + file->stat_info->sparse_map_avail = 0; + for (i = 0; i < file->stat_info->sparse_map_size; i++) + { + struct sp_array sp; + + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + sp.offset = u; + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + sp.numbytes = u; + sparse_add_map (file->stat_info, &sp); + } + set_next_block_after (blk); + } + + return true; +} + +static struct tar_sparse_optab const pax_optab = { + NULL, /* No init function */ + NULL, /* No done function */ + pax_sparse_member_p, + pax_dump_header, + NULL, + pax_decode_header, + NULL, /* No scan_block function */ + sparse_dump_region, + sparse_extract_region, +}; -- cgit v1.2.1