diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2021-12-01 15:38:02 -0800 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2021-12-15 15:04:50 -0800 |
commit | cf26200380585019e927fe3cf5c0ecb7c8b3ef14 (patch) | |
tree | 0601d06babe8944698e14b7fc3889a06bdb942f9 /gzip.c | |
parent | 1440bdbd16cc40f3ef3d0d91106373e29611f528 (diff) | |
download | gzip-cf26200380585019e927fe3cf5c0ecb7c8b3ef14.tar.gz |
gzip: gzip -l now outputs accurate size
gzip -l now decompresses to see how long the uncompressed file was.
This fixes what is by far the most common bug report for gzip.
It has a significant performance cost, but it’s worth it nowadays.
* gzip.c (main): -l now sets 'test' too. All uses of
'test' changed.
(treat_stdin, treat_file): Call do_list after decompressing,
so that the length is known.
(do_list): Omit arg IFD, since it is no longer needed.
All callers changed. Get the CRC and uncompressed size
from input_crc and bytes_out instead of using lseek.
* tests/list-big: New test.
* unzip.c (unzip): Set unzip_crc before returning.
* util.c (write_buf): If 'test', output nothing.
Update bytes_out with output byte count, regardless of 'test'.
All callers changed.
Diffstat (limited to 'gzip.c')
-rw-r--r-- | gzip.c | 66 |
1 files changed, 25 insertions, 41 deletions
@@ -319,7 +319,7 @@ local void discard_input_bytes (size_t nbytes, unsigned int flags); local int make_ofname (void); local void shorten_name (char *name); local int get_method (int in); -local void do_list (int ifd, int method); +local void do_list (int method); local int check_ofname (void); local void copy_stat (struct stat *ifstat); local void install_signal_handlers (void); @@ -535,7 +535,7 @@ int main (int argc, char **argv) case 'k': keep = 1; break; case 'l': - list = decompress = to_stdout = 1; break; + list = decompress = test = to_stdout = 1; break; case 'L': license (); finish_out (); break; case 'm': /* undocumented, may change later */ @@ -655,7 +655,7 @@ int main (int argc, char **argv) /* And get to work */ if (file_count != 0) { - if (to_stdout && !test && !list && (!decompress || !ascii)) { + if (to_stdout && !test && (!decompress || !ascii)) { SET_BINARY_MODE (STDOUT_FILENO); } while (optind < argc) { @@ -673,7 +673,7 @@ int main (int argc, char **argv) { /* Output any totals, and check for output errors. */ if (!quiet && 1 < file_count) - do_list (-1, -1); + do_list (-1); if (fflush (stdout) != 0) write_error (); } @@ -759,7 +759,7 @@ local void treat_stdin() if (decompress || !ascii) { SET_BINARY_MODE (STDIN_FILENO); } - if (!test && !list && (!decompress || !ascii)) { + if (!test && (!decompress || !ascii)) { SET_BINARY_MODE (STDOUT_FILENO); } strcpy(ifname, "stdin"); @@ -786,10 +786,6 @@ local void treat_stdin() do_exit(exit_code); /* error message already emitted */ } } - if (list) { - do_list(ifd, method); - return; - } /* Actually do the compression/decompression. Loop over zipped members. */ @@ -805,6 +801,12 @@ local void treat_stdin() bytes_out = 0; /* required for length check */ } + if (list) + { + do_list (method); + return; + } + if (verbose) { if (test) { fprintf(stderr, " OK\n"); @@ -949,7 +951,7 @@ local void treat_file(iname) /* Generate output file name. For -r and (-t or -l), skip files * without a valid gzip suffix (check done in make_ofname). */ - if (to_stdout && !list && !test) { + if (to_stdout && !test) { strcpy(ofname, "stdout"); } else if (make_ofname() != OK) { @@ -967,12 +969,6 @@ local void treat_file(iname) return; /* error message already emitted */ } } - if (list) { - do_list(ifd, method); - if (close (ifd) != 0) - read_error (); - return; - } /* If compressing to a file, check if ofname is not ambiguous * because the operating system truncates names. Otherwise, generate @@ -992,7 +988,7 @@ local void treat_file(iname) /* Keep the name even if not truncated except with --no-name: */ if (!save_orig_name) save_orig_name = !no_name; - if (verbose) { + if (verbose && !list) { fprintf(stderr, "%s:\t", ifname); } @@ -1015,6 +1011,12 @@ local void treat_file(iname) if (close (ifd) != 0) read_error (); + if (list) + { + do_list (method); + return; + } + if (!to_stdout) { copy_stat (&istat); @@ -1066,7 +1068,7 @@ local void treat_file(iname) } else { display_ratio(bytes_in-(bytes_out-header_bytes), bytes_in, stderr); } - if (!test && !to_stdout) + if (!test) fprintf(stderr, " -- %s %s", keep ? "created" : "replaced with", ofname); fprintf(stderr, "\n"); @@ -1395,7 +1397,8 @@ local int make_ofname() /* With -t or -l, try all files (even without .gz suffix) * except with -r (behave as with just -dr). */ - if (!recursive && (list || test)) return OK; + if (!recursive && test) + return OK; /* Avoid annoying messages with -r */ if (verbose || (!recursive && !quiet)) { @@ -1688,7 +1691,6 @@ local int get_method(in) last_member = 1; if (imagic0 != EOF) { write_buf (STDOUT_FILENO, magic, 1); - bytes_out++; } } if (method >= 0) return method; @@ -1724,9 +1726,8 @@ local int get_method(in) * If the given method is < 0, display the accumulated totals. * IN assertions: time_stamp, header_bytes and ifile_size are initialized. */ -local void do_list(ifd, method) - int ifd; /* input file descriptor */ - int method; /* compression method */ +static void +do_list (int method) { ulg crc; /* original crc */ static int first_time = 1; @@ -1768,26 +1769,9 @@ local void do_list(ifd, method) return; } crc = (ulg)~0; /* unknown */ - bytes_out = -1L; - bytes_in = ifile_size; if (method == DEFLATED && !last_member) { - /* Get the crc and uncompressed size for gzip'ed (not zip'ed) files. - * If the lseek fails, we could use read() to get to the end, but - * --list is used to get quick results. - * Use "gunzip < foo.gz | wc -c" to get the uncompressed size if - * you are not concerned about speed. - */ - bytes_in = lseek(ifd, (off_t)(-8), SEEK_END); - if (bytes_in != -1L) { - uch buf[8]; - bytes_in += 8L; - if (read(ifd, (char*)buf, sizeof(buf)) != sizeof(buf)) { - read_error(); - } - crc = LG(buf); - bytes_out = LG(buf+4); - } + crc = unzip_crc; } if (verbose) |