diff options
author | Jim Meyering <meyering@fb.com> | 2023-01-20 18:09:26 -0800 |
---|---|---|
committer | Jim Meyering <meyering@meta.com> | 2023-01-31 18:24:28 -0800 |
commit | b319685c6e12e66bf357c2384fe69f1c63f66aed (patch) | |
tree | 2b3239e4ecbe14df1dbbf54d70a9937f42e4432b | |
parent | 7a8db7dbafc932cb08403b863a1c1edec5d283ca (diff) | |
download | coreutils-b319685c6e12e66bf357c2384fe69f1c63f66aed.tar.gz |
cksum: accept new option: --base64 (-b)
* src/digest.c [HASH_ALGO_CKSUM]: Include "base64.h"
[HASH_ALGO_CKSUM] (base64_digest): New global.
[HASH_ALGO_CKSUM] (enum BASE64_DIGEST_OPTION): New enum.
[HASH_ALGO_CKSUM] (long_options): Add "base64".
(valid_digits): Rename from hex_digits, now taking an input length argument.
Adjust callers.
(bsd_split_3): Rename arg from hex_digits to digest.
Add new *d_len parameter for length of extracted digest.
Move "i" declaration down to first use.
(split_3): Rename arg from hex_digits to digest.
Add new *d_len parameter for length of extracted digest.
Instead of relying on "known" length of digest to find the following
must-be-whitespace byte, search for the first whitespace byte.
[HASH_ALGO_CKSUM] (output_file): Handle base64_digest.
[HASH_ALGO_CKSUM] (main): Set base64_digest.
[HASH_ALGO_CKSUM] (b64_equal): New function.
(hex_equal): New function, factored out of digest_check.
(digest_check) Factored part into b64_equal and hex_equal.
Rename local hex_digest to digest.
* tests/misc/cksum-base64.pl: Add tests.
* tests/local.mk (all_tests): Add to the list.
* cfg.mk (_cksum): Define.
(exclude_file_name_regexp--sc_prohibit_test_backticks): Exempt new test.
(exclude_file_name_regexp--sc_long_lines): Likewise.
* doc/coreutils.texi (cksum invocation): Document it.
(md5sum invocation) [--check]: Mention digest encoding auto-detect.
* NEWS (New Features): Mention this.
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | cfg.mk | 5 | ||||
-rw-r--r-- | doc/coreutils.texi | 21 | ||||
-rw-r--r-- | src/digest.c | 185 | ||||
-rw-r--r-- | tests/local.mk | 1 | ||||
-rwxr-xr-x | tests/misc/cksum-base64.pl | 99 |
6 files changed, 261 insertions, 53 deletions
@@ -94,6 +94,9 @@ GNU coreutils NEWS -*- outline -*- ** New Features + cksum now accepts the --base64 (-b) option to print base64-encoded + checksums. It also accepts/checks such checksums. + factor now accepts the --exponents (-h) option to print factors in the form p^e, rather than repeating the prime p, e times. @@ -885,8 +885,9 @@ exclude_file_name_regexp--sc_prohibit_stat_st_blocks = \ exclude_file_name_regexp--sc_prohibit_continued_string_alpha_in_column_1 = \ ^src/(system\.h|od\.c|printf\.c|getlimits\.c)$$ +_cksum = ^tests/misc/cksum-base64\.pl$$ exclude_file_name_regexp--sc_prohibit_test_backticks = \ - ^tests/(local\.mk|(init|misc/stdbuf|factor/create-test)\.sh)$$ + ^tests/(local\.mk|(init|misc/stdbuf|factor/create-test)\.sh)$$|$(_cksum) # Exempt test.c, since it's nominally shared, and relatively static. exclude_file_name_regexp--sc_prohibit_operator_at_end_of_line = \ @@ -901,7 +902,7 @@ exclude_file_name_regexp--sc_prohibit-gl-attributes = ^src/libstdbuf\.c$$ exclude_file_name_regexp--sc_prohibit_uppercase_id_est = \.diff$$ exclude_file_name_regexp--sc_ensure_dblspace_after_dot_before_id_est = \.diff$$ exclude_file_name_regexp--sc_ensure_comma_after_id_est = \.diff|$(_ll)$$ -exclude_file_name_regexp--sc_long_lines = \.diff$$|$(_ll) +exclude_file_name_regexp--sc_long_lines = \.diff$$|$(_ll)|$(_cksum) # `grep . -q` is not exactly equivalent to `grep . >/dev/null` # and this difference is significant in the NEWS description diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 97c5280d3..4d7d9439d 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -4039,6 +4039,22 @@ Supported more modern digest algorithms are: @samp{sm3} only available through @command{cksum} @end example +@item -b +@itemx --base64 +@opindex -b +@opindex --base64 +@cindex base64 checksum encoding +Print base64-encoded digests not hexadecimal. +This option is ignored with @option{--check}. +The format conforms to +@uref{https://tools.ietf.org/search/rfc4648#section-4, RFC 4648#4}. + +Note that each base64-encoded digest has zero, one or two trailing padding +(@samp{=}) bytes. The length of that padding is the checksum-bit-length +modulo 3, and the @option{--check} parser requires precisely the same +input digest string as what is output. I.e., removing or adding any +@samp{=} padding renders a digest non-matching. + @item --debug @opindex --debug Output extra information to stderr, like the checksum implementation being used. @@ -4168,6 +4184,11 @@ For the @command{cksum} command, the @option{--check} option supports auto-detecting the digest algorithm to use, when presented with checksum information in the @option{--tag} output format. +Also for the @command{cksum} command, the @option{--check} option +auto-detects the digest encoding, accepting both standard hexidecimal +checksums and those generated via @command{cksum} with its +@option{--base64} option. + Output with @option{--zero} enabled is not supported by @option{--check}. @sp 1 For each such line, @command{md5sum} reads the named file and computes its diff --git a/src/digest.c b/src/digest.c index 8f9354b6a..c0616fcb2 100644 --- a/src/digest.c +++ b/src/digest.c @@ -32,6 +32,7 @@ #endif #if HASH_ALGO_CKSUM # include "cksum.h" +# include "base64.h" #endif #if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM # include "blake2/b2sum.h" @@ -204,6 +205,11 @@ static int bsd_reversed = -1; /* line delimiter. */ static unsigned char digest_delim = '\n'; +#if HASH_ALGO_CKSUM +/* If true, print base64-encoded digests, not hex. */ +static bool base64_digest = false; +#endif + #if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM # define BLAKE2B_MAX_LEN BLAKE2B_OUTBYTES static uintmax_t digest_length; @@ -379,6 +385,7 @@ static struct option const long_options[] = # if HASH_ALGO_CKSUM { "algorithm", required_argument, NULL, 'a'}, + { "base64", no_argument, NULL, 'b' }, { "debug", no_argument, NULL, DEBUG_PROGRAM_OPTION}, { "untagged", no_argument, NULL, UNTAG_OPTION }, # else @@ -434,6 +441,10 @@ Print or check %s (%d-bit) checksums.\n\ -a, --algorithm=TYPE select the digest type to use. See DIGEST below.\ \n\ "), stdout); + fputs (_("\ + -b, --base64 emit base64-encoded digests, not hexadecimal\ +\n\ +"), stdout); #endif #if !HASH_ALGO_SUM # if !HASH_ALGO_CKSUM @@ -601,36 +612,61 @@ filename_unescape (char *s, size_t s_len) return s; } -/* Return true if S is a NUL-terminated string of DIGEST_HEX_BYTES hex digits. - Otherwise, return false. */ +/* Return true if S is a LEN-byte NUL-terminated string of hex or base64 + digits and has the expected length. Otherwise, return false. */ ATTRIBUTE_PURE static bool -hex_digits (unsigned char const *s) +valid_digits (unsigned char const *s, size_t len) { - for (unsigned int i = 0; i < digest_hex_bytes; i++) +#if HASH_ALGO_CKSUM + if (len == BASE64_LENGTH (digest_length / 8)) { - if (!isxdigit (*s)) - return false; - ++s; + size_t i; + for (i = 0; i < len - digest_length % 3; i++) + { + if (!isbase64 (*s)) + return false; + ++s; + } + for ( ; i < len; i++) + { + if (*s != '=') + return false; + ++s; + } + } + else +#endif + if (len == digest_hex_bytes) + { + for (unsigned int i = 0; i < digest_hex_bytes; i++) + { + if (!isxdigit (*s)) + return false; + ++s; + } } + else + return false; + return *s == '\0'; } /* Split the checksum string S (of length S_LEN) from a BSD 'md5' or 'sha1' command into two parts: a hexadecimal digest, and the file - name. S is modified. Return true if successful. */ + name. S is modified. Set *D_LEN to the length of the digest string. + Return true if successful. */ static bool -bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest, +bsd_split_3 (char *s, size_t s_len, + unsigned char **digest, size_t *d_len, char **file_name, bool escaped_filename) { - size_t i; - if (s_len == 0) return false; /* Find end of filename. */ - i = s_len - 1; + size_t i = s_len - 1; while (i && s[i] != ')') i--; @@ -655,9 +691,10 @@ bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest, while (ISWHITE (s[i])) i++; - *hex_digest = (unsigned char *) &s[i]; + *digest = (unsigned char *) &s[i]; - return hex_digits (*hex_digest); + *d_len = s_len - i; + return valid_digits (*digest, *d_len); } #if HASH_ALGO_CKSUM @@ -701,11 +738,12 @@ algorithm_from_tag (char *s) /* Split the string S (of length S_LEN) into three parts: a hexadecimal digest, binary flag, and the file name. - S is modified. Return true if successful. */ + S is modified. Set *D_LEN to the length of the digest string. + Return true if successful. */ static bool split_3 (char *s, size_t s_len, - unsigned char **hex_digest, int *binary, char **file_name) + unsigned char **digest, size_t *d_len, int *binary, char **file_name) { bool escaped_filename = false; size_t algo_name_len; @@ -778,7 +816,7 @@ split_3 (char *s, size_t s_len, ++i; *binary = 0; return bsd_split_3 (s + i, s_len - i, - hex_digest, file_name, escaped_filename); + digest, d_len, file_name, escaped_filename); } return false; } @@ -790,14 +828,14 @@ split_3 (char *s, size_t s_len, if (s_len - i < min_digest_line_length + (s[i] == '\\')) return false; - *hex_digest = (unsigned char *) &s[i]; + *digest = (unsigned char *) &s[i]; #if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM /* Auto determine length. */ # if HASH_ALGO_CKSUM if (cksum_algorithm == blake2b) { # endif - unsigned char const *hp = *hex_digest; + unsigned char const *hp = *digest; digest_hex_bytes = 0; while (isxdigit (*hp++)) digest_hex_bytes++; @@ -810,16 +848,15 @@ split_3 (char *s, size_t s_len, # endif #endif - /* The first field has to be the n-character hexadecimal - representation of the message digest. If it is not followed - immediately by a white space it's an error. */ - i += digest_hex_bytes; - if (!ISWHITE (s[i])) - return false; + /* This field must be the hexadecimal or base64 representation + of the message digest. */ + while (s[i] && !ISWHITE (s[i])) + i++; + *d_len = &s[i] - (char *) *digest; s[i++] = '\0'; - if (! hex_digits (*hex_digest)) + if (! valid_digits (*digest, *d_len)) return false; /* If "bsd reversed" format detected. */ @@ -1000,8 +1037,20 @@ output_file (char const *file, int binary_file, void const *digest, fputs (") = ", stdout); } - for (size_t i = 0; i < (digest_hex_bytes / 2); ++i) - printf ("%02x", bin_buffer[i]); +# if HASH_ALGO_CKSUM + if (base64_digest) + { + char b64[BASE64_LENGTH (DIGEST_BIN_BYTES) + 1]; + base64_encode ((char const *) bin_buffer, digest_length / 8, + b64, sizeof b64); + fputs (b64, stdout); + } + else +# endif + { + for (size_t i = 0; i < (digest_hex_bytes / 2); ++i) + printf ("%02x", bin_buffer[i]); + } if (!tagged) { @@ -1021,6 +1070,44 @@ output_file (char const *file, int binary_file, void const *digest, } #endif +#if HASH_ALGO_CKSUM +/* Return true if B64_DIGEST is the same as the base64 digest of the + DIGEST_LENGTH/8 bytes at BIN_BUFFER. */ +static bool +b64_equal (unsigned char const *b64_digest, unsigned char const *bin_buffer) +{ + size_t b64_n_bytes = BASE64_LENGTH (digest_length / 8); + char b64[BASE64_LENGTH (DIGEST_BIN_BYTES) + 1]; + base64_encode ((char const *) bin_buffer, digest_length / 8, b64, sizeof b64); + return memcmp (b64_digest, b64, b64_n_bytes + 1) == 0; +} +#endif + +/* Return true if HEX_DIGEST is the same as the hex-encoded digest of the + DIGEST_LENGTH/8 bytes at BIN_BUFFER. */ +static bool +hex_equal (unsigned char const *hex_digest, unsigned char const *bin_buffer) +{ + static const char bin2hex[] = { '0', '1', '2', '3', + '4', '5', '6', '7', + '8', '9', 'a', 'b', + 'c', 'd', 'e', 'f' }; + size_t digest_bin_bytes = digest_hex_bytes / 2; + + /* Compare generated binary number with text representation + in check file. Ignore case of hex digits. */ + size_t cnt; + for (cnt = 0; cnt < digest_bin_bytes; ++cnt) + { + if (tolower (hex_digest[2 * cnt]) + != bin2hex[bin_buffer[cnt] >> 4] + || (tolower (hex_digest[2 * cnt + 1]) + != (bin2hex[bin_buffer[cnt] & 0xf]))) + break; + } + return cnt == digest_bin_bytes; +} + static bool digest_check (char const *checkfile_name) { @@ -1061,7 +1148,7 @@ digest_check (char const *checkfile_name) { char *filename; int binary; - unsigned char *hex_digest; + unsigned char *digest; ssize_t line_length; ++line_number; @@ -1088,7 +1175,8 @@ digest_check (char const *checkfile_name) line[line_length] = '\0'; - if (! (split_3 (line, line_length, &hex_digest, &binary, &filename) + size_t d_len; + if (! (split_3 (line, line_length, &digest, &d_len, &binary, &filename) && ! (is_stdin && STREQ (filename, "-")))) { ++n_misformatted_lines; @@ -1104,10 +1192,6 @@ digest_check (char const *checkfile_name) } else { - static const char bin2hex[] = { '0', '1', '2', '3', - '4', '5', '6', '7', - '8', '9', 'a', 'b', - 'c', 'd', 'e', 'f' }; bool ok; bool missing; /* Only escape in the edge case producing multiple lines, @@ -1137,34 +1221,30 @@ digest_check (char const *checkfile_name) } else { - size_t digest_bin_bytes = digest_hex_bytes / 2; - size_t cnt; - - /* Compare generated binary number with text representation - in check file. Ignore case of hex digits. */ - for (cnt = 0; cnt < digest_bin_bytes; ++cnt) - { - if (tolower (hex_digest[2 * cnt]) - != bin2hex[bin_buffer[cnt] >> 4] - || (tolower (hex_digest[2 * cnt + 1]) - != (bin2hex[bin_buffer[cnt] & 0xf]))) - break; - } - if (cnt != digest_bin_bytes) - ++n_mismatched_checksums; + bool match = false; +#if HASH_ALGO_CKSUM + if (d_len < digest_hex_bytes) + match = b64_equal (digest, bin_buffer); else +#endif + if (d_len == digest_hex_bytes) + match = hex_equal (digest, bin_buffer); + + if (match) matched_checksums = true; + else + ++n_mismatched_checksums; if (!status_only) { - if (cnt != digest_bin_bytes || ! quiet) + if ( ! matched_checksums || ! quiet) { if (needs_escape) putchar ('\\'); print_filename (filename, needs_escape); } - if (cnt != digest_bin_bytes) + if ( ! matched_checksums) printf (": %s\n", _("FAILED")); else if (!quiet) printf (": %s\n", _("OK")); @@ -1338,6 +1418,9 @@ main (int argc, char **argv) strict = true; break; # if HASH_ALGO_CKSUM + case 'b': + base64_digest = true; + break; case UNTAG_OPTION: prefix_tag = false; break; diff --git a/tests/local.mk b/tests/local.mk index f6e3746b6..70a8f6e73 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -293,6 +293,7 @@ all_tests = \ tests/misc/cksum.sh \ tests/misc/cksum-a.sh \ tests/misc/cksum-c.sh \ + tests/misc/cksum-base64.pl \ tests/misc/comm.pl \ tests/misc/csplit.sh \ tests/misc/csplit-1000.sh \ diff --git a/tests/misc/cksum-base64.pl b/tests/misc/cksum-base64.pl new file mode 100755 index 000000000..4fec7a5d7 --- /dev/null +++ b/tests/misc/cksum-base64.pl @@ -0,0 +1,99 @@ +#!/usr/bin/perl +# Exercise cksum's --base64 option. + +# Copyright (C) 2023 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +use strict; + +(my $program_name = $0) =~ s|.*/||; + +# Turn off localization of executable's output. +@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + +# Pairs of hash,degenerate_output, given file name of "f": +my @pairs = + ( + ['sysv', "0 0 f"], + ['bsd', "00000 0 f"], + ['crc', "4294967295 0 f"], + ['md5', "1B2M2Y8AsgTpgAmY7PhCfg=="], + ['sha1', "2jmj7l5rSw0yVb/vlWAYkK/YBwk="], + ['sha224', "0UoCjCo6K8lHYQK7KII0xBWisB+CjqYqxbPkLw=="], + ['sha256', "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="], + ['sha384', "OLBgp1GsljhM2TJ+sbHjaiH9txEUvgdDTAzHv2P24donTt6/529l+9Ua0vFImLlb"], + ['sha512', "z4PhNX7vuL3xVChQ1m2AB9Yg5AULVxXcg/SpIdNs6c5H0NE8XYXysP+DGNKHfuwvY7kxvUdBeoGlODJ6+SfaPg=="], + ['blake2b', "eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg=="], + ['sm3', "GrIdg1XPoX+OYRlIMegajyK+yMco/vt0ftA161CCqis="], + ); + +# Return the formatted output for a given hash name/value pair. +# Use the hard-coded "f" as file name. +sub fmt ($$) { + my ($h, $v) = @_; + $h !~ m{^(sysv|bsd|crc)$} and $v = uc($h). " (f) = $v"; + # BLAKE2b is inconsistent: + $v =~ s{BLAKE2B}{BLAKE2b}; + return "$v" +} + +my @Tests = + ( + # Ensure that each of the above works with -b: + (map {my ($h,$v)= @$_; my $o=fmt $h,$v; + [$h, "-ba $h", {IN=>{f=>''}}, {OUT=>"$o\n"}]} @pairs), + + # For each that accepts --check, ensure that works with base64 digests: + (map {my ($h,$v)= @$_; my $o=fmt $h,$v; + ["chk-".$h, "--check --strict", {IN=>$o}, + {AUX=>{f=>''}}, {OUT=>"f: OK\n"}]} + grep { $_->[0] !~ m{^(sysv|bsd|crc)$} } @pairs), + + # For digests ending in "=", ensure --check fails if any "=" is removed. + (map {my ($h,$v)= @$_; my $o=fmt $h,$v; + ["chk-eq1-".$h, "--check", {IN=>$o}, {AUX=>{f=>''}}, + {ERR_SUBST=>"s/.*: //"}, {OUT=>''}, {EXIT=>1}, + {ERR=>"no properly formatted checksum lines found\n"}]} + ( map {my ($h,$v)=@$_; $v =~ s/=$//; [$h,$v] } + grep { $_->[1] =~ m{=$} } @pairs)), + + # Same as above, but for those ending in "==": + (map {my ($h,$v)= @$_; my $o=fmt $h,$v; + ["chk-eq2-".$h, "--check", {IN=>$o}, {AUX=>{f=>''}}, + {ERR_SUBST=>"s/.*: //"}, {OUT=>''}, {EXIT=>1}, + {ERR=>"no properly formatted checksum lines found\n"}]} + ( map {my ($h,$v)=@$_; $v =~ s/==$//; [$h,$v] } + grep { $_->[1] =~ m{==$} } @pairs)), + + # Trigger a read-buffer-overrun error in an early (not committed) + # version of the --base64-adding patch. + ['nul', '-a sha1 --check', {IN=>'\0\0\0'}, + {ERR=>"no properly formatted checksum lines found\n"}, + {ERR_SUBST=>"s/.*: //"}, {OUT=>''}, {EXIT=>1}], + ); + +my $save_temps = $ENV{DEBUG}; +my $verbose = $ENV{VERBOSE}; + +my $prog = 'cksum'; +my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); + +# Ensure hash names from cksum --help match those in @pairs above. +my $help_algs = join ' ', map { m{^ ([[:alpha:]]\S+)} } + grep { m{^ ([[:alpha:]]\S+)} } split ('\n', `cksum --help`); +my $test_algs = join ' ', map {$_->[0]} @pairs; +$help_algs eq $test_algs or die "$help_algs not equal to\n$test_algs"; + +exit $fail; |