summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2021-12-01 15:38:02 -0800
committerPaul Eggert <eggert@cs.ucla.edu>2021-12-15 15:04:50 -0800
commitcf26200380585019e927fe3cf5c0ecb7c8b3ef14 (patch)
tree0601d06babe8944698e14b7fc3889a06bdb942f9
parent1440bdbd16cc40f3ef3d0d91106373e29611f528 (diff)
downloadgzip-cf26200380585019e927fe3cf5c0ecb7c8b3ef14.tar.gz
gzip: gzip -l now outputs accurate size
gzip -l now decompresses to see how long the uncompressed file was. This fixes what is by far the most common bug report for gzip. It has a significant performance cost, but it’s worth it nowadays. * gzip.c (main): -l now sets 'test' too. All uses of 'test' changed. (treat_stdin, treat_file): Call do_list after decompressing, so that the length is known. (do_list): Omit arg IFD, since it is no longer needed. All callers changed. Get the CRC and uncompressed size from input_crc and bytes_out instead of using lseek. * tests/list-big: New test. * unzip.c (unzip): Set unzip_crc before returning. * util.c (write_buf): If 'test', output nothing. Update bytes_out with output byte count, regardless of 'test'. All callers changed.
-rw-r--r--gzip.c66
-rw-r--r--gzip.h1
-rw-r--r--tests/Makefile.am1
-rwxr-xr-xtests/list-big31
-rw-r--r--unlzh.c5
-rw-r--r--unlzw.c17
-rw-r--r--unzip.c3
-rw-r--r--util.c18
8 files changed, 76 insertions, 66 deletions
diff --git a/gzip.c b/gzip.c
index 735ee0a..ecb19da 100644
--- a/gzip.c
+++ b/gzip.c
@@ -319,7 +319,7 @@ local void discard_input_bytes (size_t nbytes, unsigned int flags);
local int make_ofname (void);
local void shorten_name (char *name);
local int get_method (int in);
-local void do_list (int ifd, int method);
+local void do_list (int method);
local int check_ofname (void);
local void copy_stat (struct stat *ifstat);
local void install_signal_handlers (void);
@@ -535,7 +535,7 @@ int main (int argc, char **argv)
case 'k':
keep = 1; break;
case 'l':
- list = decompress = to_stdout = 1; break;
+ list = decompress = test = to_stdout = 1; break;
case 'L':
license (); finish_out (); break;
case 'm': /* undocumented, may change later */
@@ -655,7 +655,7 @@ int main (int argc, char **argv)
/* And get to work */
if (file_count != 0) {
- if (to_stdout && !test && !list && (!decompress || !ascii)) {
+ if (to_stdout && !test && (!decompress || !ascii)) {
SET_BINARY_MODE (STDOUT_FILENO);
}
while (optind < argc) {
@@ -673,7 +673,7 @@ int main (int argc, char **argv)
{
/* Output any totals, and check for output errors. */
if (!quiet && 1 < file_count)
- do_list (-1, -1);
+ do_list (-1);
if (fflush (stdout) != 0)
write_error ();
}
@@ -759,7 +759,7 @@ local void treat_stdin()
if (decompress || !ascii) {
SET_BINARY_MODE (STDIN_FILENO);
}
- if (!test && !list && (!decompress || !ascii)) {
+ if (!test && (!decompress || !ascii)) {
SET_BINARY_MODE (STDOUT_FILENO);
}
strcpy(ifname, "stdin");
@@ -786,10 +786,6 @@ local void treat_stdin()
do_exit(exit_code); /* error message already emitted */
}
}
- if (list) {
- do_list(ifd, method);
- return;
- }
/* Actually do the compression/decompression. Loop over zipped members.
*/
@@ -805,6 +801,12 @@ local void treat_stdin()
bytes_out = 0; /* required for length check */
}
+ if (list)
+ {
+ do_list (method);
+ return;
+ }
+
if (verbose) {
if (test) {
fprintf(stderr, " OK\n");
@@ -949,7 +951,7 @@ local void treat_file(iname)
/* Generate output file name. For -r and (-t or -l), skip files
* without a valid gzip suffix (check done in make_ofname).
*/
- if (to_stdout && !list && !test) {
+ if (to_stdout && !test) {
strcpy(ofname, "stdout");
} else if (make_ofname() != OK) {
@@ -967,12 +969,6 @@ local void treat_file(iname)
return; /* error message already emitted */
}
}
- if (list) {
- do_list(ifd, method);
- if (close (ifd) != 0)
- read_error ();
- return;
- }
/* If compressing to a file, check if ofname is not ambiguous
* because the operating system truncates names. Otherwise, generate
@@ -992,7 +988,7 @@ local void treat_file(iname)
/* Keep the name even if not truncated except with --no-name: */
if (!save_orig_name) save_orig_name = !no_name;
- if (verbose) {
+ if (verbose && !list) {
fprintf(stderr, "%s:\t", ifname);
}
@@ -1015,6 +1011,12 @@ local void treat_file(iname)
if (close (ifd) != 0)
read_error ();
+ if (list)
+ {
+ do_list (method);
+ return;
+ }
+
if (!to_stdout)
{
copy_stat (&istat);
@@ -1066,7 +1068,7 @@ local void treat_file(iname)
} else {
display_ratio(bytes_in-(bytes_out-header_bytes), bytes_in, stderr);
}
- if (!test && !to_stdout)
+ if (!test)
fprintf(stderr, " -- %s %s", keep ? "created" : "replaced with",
ofname);
fprintf(stderr, "\n");
@@ -1395,7 +1397,8 @@ local int make_ofname()
/* With -t or -l, try all files (even without .gz suffix)
* except with -r (behave as with just -dr).
*/
- if (!recursive && (list || test)) return OK;
+ if (!recursive && test)
+ return OK;
/* Avoid annoying messages with -r */
if (verbose || (!recursive && !quiet)) {
@@ -1688,7 +1691,6 @@ local int get_method(in)
last_member = 1;
if (imagic0 != EOF) {
write_buf (STDOUT_FILENO, magic, 1);
- bytes_out++;
}
}
if (method >= 0) return method;
@@ -1724,9 +1726,8 @@ local int get_method(in)
* If the given method is < 0, display the accumulated totals.
* IN assertions: time_stamp, header_bytes and ifile_size are initialized.
*/
-local void do_list(ifd, method)
- int ifd; /* input file descriptor */
- int method; /* compression method */
+static void
+do_list (int method)
{
ulg crc; /* original crc */
static int first_time = 1;
@@ -1768,26 +1769,9 @@ local void do_list(ifd, method)
return;
}
crc = (ulg)~0; /* unknown */
- bytes_out = -1L;
- bytes_in = ifile_size;
if (method == DEFLATED && !last_member) {
- /* Get the crc and uncompressed size for gzip'ed (not zip'ed) files.
- * If the lseek fails, we could use read() to get to the end, but
- * --list is used to get quick results.
- * Use "gunzip < foo.gz | wc -c" to get the uncompressed size if
- * you are not concerned about speed.
- */
- bytes_in = lseek(ifd, (off_t)(-8), SEEK_END);
- if (bytes_in != -1L) {
- uch buf[8];
- bytes_in += 8L;
- if (read(ifd, (char*)buf, sizeof(buf)) != sizeof(buf)) {
- read_error();
- }
- crc = LG(buf);
- bytes_out = LG(buf+4);
- }
+ crc = unzip_crc;
}
if (verbose)
diff --git a/gzip.h b/gzip.h
index db0305f..ebe3213 100644
--- a/gzip.h
+++ b/gzip.h
@@ -262,6 +262,7 @@ extern int zip (int in, int out);
extern int file_read (char *buf, unsigned size);
/* in unzip.c */
+extern ulg unzip_crc;
extern int unzip (int in, int out);
extern int check_zipfile (int in);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 256bbf7..18e7c8a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -21,6 +21,7 @@ TESTS = \
hufts \
keep \
list \
+ list-big \
memcpy-abuse \
mixed \
null-suffix-clobber \
diff --git a/tests/list-big b/tests/list-big
new file mode 100755
index 0000000..afa3310
--- /dev/null
+++ b/tests/list-big
@@ -0,0 +1,31 @@
+#!/bin/sh
+# Exercise the --list option with a big file.
+
+# Copyright 2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# limit so don't run it by default.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ..
+
+truncate --size 4G big || framework_failure_
+
+gzip -1 big || fail=1
+gzip -l big.gz >out || fail=1
+case $(cat out) in
+ *' 4294967296 '*' big') ;;
+ *) cat out; fail=1;;
+esac
+
+Exit $fail
diff --git a/unlzh.c b/unlzh.c
index 37084fe..f018922 100644
--- a/unlzh.c
+++ b/unlzh.c
@@ -390,9 +390,8 @@ int unlzh(in, out)
decode_start();
while (!done) {
n = decode((unsigned) DICSIZ, window);
- if (!test && n > 0) {
- write_buf(out, (char*)window, n);
- }
+ if (n > 0)
+ write_buf (out, window, n);
}
return OK;
}
diff --git a/unlzw.c b/unlzw.c
index d7714b5..ba824e4 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -225,10 +225,8 @@ int unlzw(in, out)
"posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
posbits, p[-1],p[0],p[1],p[2],p[3]);
#endif
- if (!test && outpos > 0) {
- write_buf(out, (char*)outbuf, outpos);
- bytes_out += (off_t)outpos;
- }
+ if (outpos > 0)
+ write_buf (out, outbuf, outpos);
gzip_error (to_stdout
? "corrupt input."
: "corrupt input. Use zcat to recover some data.");
@@ -257,10 +255,7 @@ int unlzw(in, out)
outpos += i;
}
if (outpos >= OUTBUFSIZ) {
- if (!test) {
- write_buf(out, (char*)outbuf, outpos);
- bytes_out += (off_t)outpos;
- }
+ write_buf (out, outbuf, outpos);
outpos = 0;
}
stackp+= i;
@@ -281,9 +276,7 @@ int unlzw(in, out)
}
} while (rsize != 0);
- if (!test && outpos > 0) {
- write_buf(out, (char*)outbuf, outpos);
- bytes_out += (off_t)outpos;
- }
+ if (outpos > 0)
+ write_buf (out, outbuf, outpos);
return OK;
}
diff --git a/unzip.c b/unzip.c
index dacfbaf..b52811e 100644
--- a/unzip.c
+++ b/unzip.c
@@ -51,6 +51,8 @@
/* Globals */
+ulg unzip_crc; /* CRC found by 'unzip'. */
+
static int decrypt; /* flag to turn on decryption */
static int pkzip = 0; /* set for a pkzip file */
static int ext_header = 0; /* set if extended local header */
@@ -210,6 +212,7 @@ int unzip(in, out)
}
}
ext_header = pkzip = 0; /* for next file */
+ unzip_crc = orig_crc;
if (err == OK) return OK;
exit_code = ERROR;
if (!test) abort_gzip();
diff --git a/util.c b/util.c
index 4e73036..cd43886 100644
--- a/util.c
+++ b/util.c
@@ -112,7 +112,6 @@ int copy(in, out)
errno = 0;
while (insize > inptr) {
write_buf(out, (char*)inbuf + inptr, insize - inptr);
- bytes_out += insize - inptr;
got = read_buffer (in, (char *) inbuf, INBUFSIZ);
if (got == -1)
read_error();
@@ -255,9 +254,7 @@ void flush_outbuf()
{
if (outcnt == 0) return;
- if (!test)
- write_buf (ofd, outbuf, outcnt);
- bytes_out += (off_t)outcnt;
+ write_buf (ofd, outbuf, outcnt);
outcnt = 0;
}
@@ -270,16 +267,13 @@ void flush_window()
if (outcnt == 0) return;
updcrc(window, outcnt);
- if (!test) {
- write_buf(ofd, (char *)window, outcnt);
- }
- bytes_out += (off_t)outcnt;
+ write_buf (ofd, window, outcnt);
outcnt = 0;
}
/* ===========================================================================
- * Does the same as write(), but also handles partial pipe writes and checks
- * for error return.
+ * Update the count of output bytes. If testing, do not do any
+ * output. Otherwise, write the buffer, checking for errors.
*/
void write_buf(fd, buf, cnt)
int fd;
@@ -288,6 +282,10 @@ void write_buf(fd, buf, cnt)
{
unsigned n;
+ bytes_out += cnt;
+ if (test)
+ return;
+
while ((n = write_buffer (fd, buf, cnt)) != cnt) {
if (n == (unsigned)(-1)) {
write_error();