summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Meyering <meyering@fb.com>2023-01-20 18:09:26 -0800
committerJim Meyering <meyering@meta.com>2023-01-31 18:24:28 -0800
commitb319685c6e12e66bf357c2384fe69f1c63f66aed (patch)
tree2b3239e4ecbe14df1dbbf54d70a9937f42e4432b
parent7a8db7dbafc932cb08403b863a1c1edec5d283ca (diff)
downloadcoreutils-b319685c6e12e66bf357c2384fe69f1c63f66aed.tar.gz
cksum: accept new option: --base64 (-b)
* src/digest.c [HASH_ALGO_CKSUM]: Include "base64.h" [HASH_ALGO_CKSUM] (base64_digest): New global. [HASH_ALGO_CKSUM] (enum BASE64_DIGEST_OPTION): New enum. [HASH_ALGO_CKSUM] (long_options): Add "base64". (valid_digits): Rename from hex_digits, now taking an input length argument. Adjust callers. (bsd_split_3): Rename arg from hex_digits to digest. Add new *d_len parameter for length of extracted digest. Move "i" declaration down to first use. (split_3): Rename arg from hex_digits to digest. Add new *d_len parameter for length of extracted digest. Instead of relying on "known" length of digest to find the following must-be-whitespace byte, search for the first whitespace byte. [HASH_ALGO_CKSUM] (output_file): Handle base64_digest. [HASH_ALGO_CKSUM] (main): Set base64_digest. [HASH_ALGO_CKSUM] (b64_equal): New function. (hex_equal): New function, factored out of digest_check. (digest_check) Factored part into b64_equal and hex_equal. Rename local hex_digest to digest. * tests/misc/cksum-base64.pl: Add tests. * tests/local.mk (all_tests): Add to the list. * cfg.mk (_cksum): Define. (exclude_file_name_regexp--sc_prohibit_test_backticks): Exempt new test. (exclude_file_name_regexp--sc_long_lines): Likewise. * doc/coreutils.texi (cksum invocation): Document it. (md5sum invocation) [--check]: Mention digest encoding auto-detect. * NEWS (New Features): Mention this.
-rw-r--r--NEWS3
-rw-r--r--cfg.mk5
-rw-r--r--doc/coreutils.texi21
-rw-r--r--src/digest.c185
-rw-r--r--tests/local.mk1
-rwxr-xr-xtests/misc/cksum-base64.pl99
6 files changed, 261 insertions, 53 deletions
diff --git a/NEWS b/NEWS
index c2d3a42ec..b3cde4a01 100644
--- a/NEWS
+++ b/NEWS
@@ -94,6 +94,9 @@ GNU coreutils NEWS -*- outline -*-
** New Features
+ cksum now accepts the --base64 (-b) option to print base64-encoded
+ checksums. It also accepts/checks such checksums.
+
factor now accepts the --exponents (-h) option to print factors
in the form p^e, rather than repeating the prime p, e times.
diff --git a/cfg.mk b/cfg.mk
index 76fda9ab9..18a2453a7 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -885,8 +885,9 @@ exclude_file_name_regexp--sc_prohibit_stat_st_blocks = \
exclude_file_name_regexp--sc_prohibit_continued_string_alpha_in_column_1 = \
^src/(system\.h|od\.c|printf\.c|getlimits\.c)$$
+_cksum = ^tests/misc/cksum-base64\.pl$$
exclude_file_name_regexp--sc_prohibit_test_backticks = \
- ^tests/(local\.mk|(init|misc/stdbuf|factor/create-test)\.sh)$$
+ ^tests/(local\.mk|(init|misc/stdbuf|factor/create-test)\.sh)$$|$(_cksum)
# Exempt test.c, since it's nominally shared, and relatively static.
exclude_file_name_regexp--sc_prohibit_operator_at_end_of_line = \
@@ -901,7 +902,7 @@ exclude_file_name_regexp--sc_prohibit-gl-attributes = ^src/libstdbuf\.c$$
exclude_file_name_regexp--sc_prohibit_uppercase_id_est = \.diff$$
exclude_file_name_regexp--sc_ensure_dblspace_after_dot_before_id_est = \.diff$$
exclude_file_name_regexp--sc_ensure_comma_after_id_est = \.diff|$(_ll)$$
-exclude_file_name_regexp--sc_long_lines = \.diff$$|$(_ll)
+exclude_file_name_regexp--sc_long_lines = \.diff$$|$(_ll)|$(_cksum)
# `grep . -q` is not exactly equivalent to `grep . >/dev/null`
# and this difference is significant in the NEWS description
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 97c5280d3..4d7d9439d 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -4039,6 +4039,22 @@ Supported more modern digest algorithms are:
@samp{sm3} only available through @command{cksum}
@end example
+@item -b
+@itemx --base64
+@opindex -b
+@opindex --base64
+@cindex base64 checksum encoding
+Print base64-encoded digests not hexadecimal.
+This option is ignored with @option{--check}.
+The format conforms to
+@uref{https://tools.ietf.org/search/rfc4648#section-4, RFC 4648#4}.
+
+Note that each base64-encoded digest has zero, one or two trailing padding
+(@samp{=}) bytes. The length of that padding is the checksum-bit-length
+modulo 3, and the @option{--check} parser requires precisely the same
+input digest string as what is output. I.e., removing or adding any
+@samp{=} padding renders a digest non-matching.
+
@item --debug
@opindex --debug
Output extra information to stderr, like the checksum implementation being used.
@@ -4168,6 +4184,11 @@ For the @command{cksum} command, the @option{--check} option
supports auto-detecting the digest algorithm to use,
when presented with checksum information in the @option{--tag} output format.
+Also for the @command{cksum} command, the @option{--check} option
+auto-detects the digest encoding, accepting both standard hexidecimal
+checksums and those generated via @command{cksum} with its
+@option{--base64} option.
+
Output with @option{--zero} enabled is not supported by @option{--check}.
@sp 1
For each such line, @command{md5sum} reads the named file and computes its
diff --git a/src/digest.c b/src/digest.c
index 8f9354b6a..c0616fcb2 100644
--- a/src/digest.c
+++ b/src/digest.c
@@ -32,6 +32,7 @@
#endif
#if HASH_ALGO_CKSUM
# include "cksum.h"
+# include "base64.h"
#endif
#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM
# include "blake2/b2sum.h"
@@ -204,6 +205,11 @@ static int bsd_reversed = -1;
/* line delimiter. */
static unsigned char digest_delim = '\n';
+#if HASH_ALGO_CKSUM
+/* If true, print base64-encoded digests, not hex. */
+static bool base64_digest = false;
+#endif
+
#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM
# define BLAKE2B_MAX_LEN BLAKE2B_OUTBYTES
static uintmax_t digest_length;
@@ -379,6 +385,7 @@ static struct option const long_options[] =
# if HASH_ALGO_CKSUM
{ "algorithm", required_argument, NULL, 'a'},
+ { "base64", no_argument, NULL, 'b' },
{ "debug", no_argument, NULL, DEBUG_PROGRAM_OPTION},
{ "untagged", no_argument, NULL, UNTAG_OPTION },
# else
@@ -434,6 +441,10 @@ Print or check %s (%d-bit) checksums.\n\
-a, --algorithm=TYPE select the digest type to use. See DIGEST below.\
\n\
"), stdout);
+ fputs (_("\
+ -b, --base64 emit base64-encoded digests, not hexadecimal\
+\n\
+"), stdout);
#endif
#if !HASH_ALGO_SUM
# if !HASH_ALGO_CKSUM
@@ -601,36 +612,61 @@ filename_unescape (char *s, size_t s_len)
return s;
}
-/* Return true if S is a NUL-terminated string of DIGEST_HEX_BYTES hex digits.
- Otherwise, return false. */
+/* Return true if S is a LEN-byte NUL-terminated string of hex or base64
+ digits and has the expected length. Otherwise, return false. */
ATTRIBUTE_PURE
static bool
-hex_digits (unsigned char const *s)
+valid_digits (unsigned char const *s, size_t len)
{
- for (unsigned int i = 0; i < digest_hex_bytes; i++)
+#if HASH_ALGO_CKSUM
+ if (len == BASE64_LENGTH (digest_length / 8))
{
- if (!isxdigit (*s))
- return false;
- ++s;
+ size_t i;
+ for (i = 0; i < len - digest_length % 3; i++)
+ {
+ if (!isbase64 (*s))
+ return false;
+ ++s;
+ }
+ for ( ; i < len; i++)
+ {
+ if (*s != '=')
+ return false;
+ ++s;
+ }
+ }
+ else
+#endif
+ if (len == digest_hex_bytes)
+ {
+ for (unsigned int i = 0; i < digest_hex_bytes; i++)
+ {
+ if (!isxdigit (*s))
+ return false;
+ ++s;
+ }
}
+ else
+ return false;
+
return *s == '\0';
}
/* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
'sha1' command into two parts: a hexadecimal digest, and the file
- name. S is modified. Return true if successful. */
+ name. S is modified. Set *D_LEN to the length of the digest string.
+ Return true if successful. */
static bool
-bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest,
+bsd_split_3 (char *s, size_t s_len,
+ unsigned char **digest, size_t *d_len,
char **file_name, bool escaped_filename)
{
- size_t i;
-
if (s_len == 0)
return false;
/* Find end of filename. */
- i = s_len - 1;
+ size_t i = s_len - 1;
while (i && s[i] != ')')
i--;
@@ -655,9 +691,10 @@ bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest,
while (ISWHITE (s[i]))
i++;
- *hex_digest = (unsigned char *) &s[i];
+ *digest = (unsigned char *) &s[i];
- return hex_digits (*hex_digest);
+ *d_len = s_len - i;
+ return valid_digits (*digest, *d_len);
}
#if HASH_ALGO_CKSUM
@@ -701,11 +738,12 @@ algorithm_from_tag (char *s)
/* Split the string S (of length S_LEN) into three parts:
a hexadecimal digest, binary flag, and the file name.
- S is modified. Return true if successful. */
+ S is modified. Set *D_LEN to the length of the digest string.
+ Return true if successful. */
static bool
split_3 (char *s, size_t s_len,
- unsigned char **hex_digest, int *binary, char **file_name)
+ unsigned char **digest, size_t *d_len, int *binary, char **file_name)
{
bool escaped_filename = false;
size_t algo_name_len;
@@ -778,7 +816,7 @@ split_3 (char *s, size_t s_len,
++i;
*binary = 0;
return bsd_split_3 (s + i, s_len - i,
- hex_digest, file_name, escaped_filename);
+ digest, d_len, file_name, escaped_filename);
}
return false;
}
@@ -790,14 +828,14 @@ split_3 (char *s, size_t s_len,
if (s_len - i < min_digest_line_length + (s[i] == '\\'))
return false;
- *hex_digest = (unsigned char *) &s[i];
+ *digest = (unsigned char *) &s[i];
#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM
/* Auto determine length. */
# if HASH_ALGO_CKSUM
if (cksum_algorithm == blake2b) {
# endif
- unsigned char const *hp = *hex_digest;
+ unsigned char const *hp = *digest;
digest_hex_bytes = 0;
while (isxdigit (*hp++))
digest_hex_bytes++;
@@ -810,16 +848,15 @@ split_3 (char *s, size_t s_len,
# endif
#endif
- /* The first field has to be the n-character hexadecimal
- representation of the message digest. If it is not followed
- immediately by a white space it's an error. */
- i += digest_hex_bytes;
- if (!ISWHITE (s[i]))
- return false;
+ /* This field must be the hexadecimal or base64 representation
+ of the message digest. */
+ while (s[i] && !ISWHITE (s[i]))
+ i++;
+ *d_len = &s[i] - (char *) *digest;
s[i++] = '\0';
- if (! hex_digits (*hex_digest))
+ if (! valid_digits (*digest, *d_len))
return false;
/* If "bsd reversed" format detected. */
@@ -1000,8 +1037,20 @@ output_file (char const *file, int binary_file, void const *digest,
fputs (") = ", stdout);
}
- for (size_t i = 0; i < (digest_hex_bytes / 2); ++i)
- printf ("%02x", bin_buffer[i]);
+# if HASH_ALGO_CKSUM
+ if (base64_digest)
+ {
+ char b64[BASE64_LENGTH (DIGEST_BIN_BYTES) + 1];
+ base64_encode ((char const *) bin_buffer, digest_length / 8,
+ b64, sizeof b64);
+ fputs (b64, stdout);
+ }
+ else
+# endif
+ {
+ for (size_t i = 0; i < (digest_hex_bytes / 2); ++i)
+ printf ("%02x", bin_buffer[i]);
+ }
if (!tagged)
{
@@ -1021,6 +1070,44 @@ output_file (char const *file, int binary_file, void const *digest,
}
#endif
+#if HASH_ALGO_CKSUM
+/* Return true if B64_DIGEST is the same as the base64 digest of the
+ DIGEST_LENGTH/8 bytes at BIN_BUFFER. */
+static bool
+b64_equal (unsigned char const *b64_digest, unsigned char const *bin_buffer)
+{
+ size_t b64_n_bytes = BASE64_LENGTH (digest_length / 8);
+ char b64[BASE64_LENGTH (DIGEST_BIN_BYTES) + 1];
+ base64_encode ((char const *) bin_buffer, digest_length / 8, b64, sizeof b64);
+ return memcmp (b64_digest, b64, b64_n_bytes + 1) == 0;
+}
+#endif
+
+/* Return true if HEX_DIGEST is the same as the hex-encoded digest of the
+ DIGEST_LENGTH/8 bytes at BIN_BUFFER. */
+static bool
+hex_equal (unsigned char const *hex_digest, unsigned char const *bin_buffer)
+{
+ static const char bin2hex[] = { '0', '1', '2', '3',
+ '4', '5', '6', '7',
+ '8', '9', 'a', 'b',
+ 'c', 'd', 'e', 'f' };
+ size_t digest_bin_bytes = digest_hex_bytes / 2;
+
+ /* Compare generated binary number with text representation
+ in check file. Ignore case of hex digits. */
+ size_t cnt;
+ for (cnt = 0; cnt < digest_bin_bytes; ++cnt)
+ {
+ if (tolower (hex_digest[2 * cnt])
+ != bin2hex[bin_buffer[cnt] >> 4]
+ || (tolower (hex_digest[2 * cnt + 1])
+ != (bin2hex[bin_buffer[cnt] & 0xf])))
+ break;
+ }
+ return cnt == digest_bin_bytes;
+}
+
static bool
digest_check (char const *checkfile_name)
{
@@ -1061,7 +1148,7 @@ digest_check (char const *checkfile_name)
{
char *filename;
int binary;
- unsigned char *hex_digest;
+ unsigned char *digest;
ssize_t line_length;
++line_number;
@@ -1088,7 +1175,8 @@ digest_check (char const *checkfile_name)
line[line_length] = '\0';
- if (! (split_3 (line, line_length, &hex_digest, &binary, &filename)
+ size_t d_len;
+ if (! (split_3 (line, line_length, &digest, &d_len, &binary, &filename)
&& ! (is_stdin && STREQ (filename, "-"))))
{
++n_misformatted_lines;
@@ -1104,10 +1192,6 @@ digest_check (char const *checkfile_name)
}
else
{
- static const char bin2hex[] = { '0', '1', '2', '3',
- '4', '5', '6', '7',
- '8', '9', 'a', 'b',
- 'c', 'd', 'e', 'f' };
bool ok;
bool missing;
/* Only escape in the edge case producing multiple lines,
@@ -1137,34 +1221,30 @@ digest_check (char const *checkfile_name)
}
else
{
- size_t digest_bin_bytes = digest_hex_bytes / 2;
- size_t cnt;
-
- /* Compare generated binary number with text representation
- in check file. Ignore case of hex digits. */
- for (cnt = 0; cnt < digest_bin_bytes; ++cnt)
- {
- if (tolower (hex_digest[2 * cnt])
- != bin2hex[bin_buffer[cnt] >> 4]
- || (tolower (hex_digest[2 * cnt + 1])
- != (bin2hex[bin_buffer[cnt] & 0xf])))
- break;
- }
- if (cnt != digest_bin_bytes)
- ++n_mismatched_checksums;
+ bool match = false;
+#if HASH_ALGO_CKSUM
+ if (d_len < digest_hex_bytes)
+ match = b64_equal (digest, bin_buffer);
else
+#endif
+ if (d_len == digest_hex_bytes)
+ match = hex_equal (digest, bin_buffer);
+
+ if (match)
matched_checksums = true;
+ else
+ ++n_mismatched_checksums;
if (!status_only)
{
- if (cnt != digest_bin_bytes || ! quiet)
+ if ( ! matched_checksums || ! quiet)
{
if (needs_escape)
putchar ('\\');
print_filename (filename, needs_escape);
}
- if (cnt != digest_bin_bytes)
+ if ( ! matched_checksums)
printf (": %s\n", _("FAILED"));
else if (!quiet)
printf (": %s\n", _("OK"));
@@ -1338,6 +1418,9 @@ main (int argc, char **argv)
strict = true;
break;
# if HASH_ALGO_CKSUM
+ case 'b':
+ base64_digest = true;
+ break;
case UNTAG_OPTION:
prefix_tag = false;
break;
diff --git a/tests/local.mk b/tests/local.mk
index f6e3746b6..70a8f6e73 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -293,6 +293,7 @@ all_tests = \
tests/misc/cksum.sh \
tests/misc/cksum-a.sh \
tests/misc/cksum-c.sh \
+ tests/misc/cksum-base64.pl \
tests/misc/comm.pl \
tests/misc/csplit.sh \
tests/misc/csplit-1000.sh \
diff --git a/tests/misc/cksum-base64.pl b/tests/misc/cksum-base64.pl
new file mode 100755
index 000000000..4fec7a5d7
--- /dev/null
+++ b/tests/misc/cksum-base64.pl
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+# Exercise cksum's --base64 option.
+
+# Copyright (C) 2023 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+use strict;
+
+(my $program_name = $0) =~ s|.*/||;
+
+# Turn off localization of executable's output.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+# Pairs of hash,degenerate_output, given file name of "f":
+my @pairs =
+ (
+ ['sysv', "0 0 f"],
+ ['bsd', "00000 0 f"],
+ ['crc', "4294967295 0 f"],
+ ['md5', "1B2M2Y8AsgTpgAmY7PhCfg=="],
+ ['sha1', "2jmj7l5rSw0yVb/vlWAYkK/YBwk="],
+ ['sha224', "0UoCjCo6K8lHYQK7KII0xBWisB+CjqYqxbPkLw=="],
+ ['sha256', "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="],
+ ['sha384', "OLBgp1GsljhM2TJ+sbHjaiH9txEUvgdDTAzHv2P24donTt6/529l+9Ua0vFImLlb"],
+ ['sha512', "z4PhNX7vuL3xVChQ1m2AB9Yg5AULVxXcg/SpIdNs6c5H0NE8XYXysP+DGNKHfuwvY7kxvUdBeoGlODJ6+SfaPg=="],
+ ['blake2b', "eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg=="],
+ ['sm3', "GrIdg1XPoX+OYRlIMegajyK+yMco/vt0ftA161CCqis="],
+ );
+
+# Return the formatted output for a given hash name/value pair.
+# Use the hard-coded "f" as file name.
+sub fmt ($$) {
+ my ($h, $v) = @_;
+ $h !~ m{^(sysv|bsd|crc)$} and $v = uc($h). " (f) = $v";
+ # BLAKE2b is inconsistent:
+ $v =~ s{BLAKE2B}{BLAKE2b};
+ return "$v"
+}
+
+my @Tests =
+ (
+ # Ensure that each of the above works with -b:
+ (map {my ($h,$v)= @$_; my $o=fmt $h,$v;
+ [$h, "-ba $h", {IN=>{f=>''}}, {OUT=>"$o\n"}]} @pairs),
+
+ # For each that accepts --check, ensure that works with base64 digests:
+ (map {my ($h,$v)= @$_; my $o=fmt $h,$v;
+ ["chk-".$h, "--check --strict", {IN=>$o},
+ {AUX=>{f=>''}}, {OUT=>"f: OK\n"}]}
+ grep { $_->[0] !~ m{^(sysv|bsd|crc)$} } @pairs),
+
+ # For digests ending in "=", ensure --check fails if any "=" is removed.
+ (map {my ($h,$v)= @$_; my $o=fmt $h,$v;
+ ["chk-eq1-".$h, "--check", {IN=>$o}, {AUX=>{f=>''}},
+ {ERR_SUBST=>"s/.*: //"}, {OUT=>''}, {EXIT=>1},
+ {ERR=>"no properly formatted checksum lines found\n"}]}
+ ( map {my ($h,$v)=@$_; $v =~ s/=$//; [$h,$v] }
+ grep { $_->[1] =~ m{=$} } @pairs)),
+
+ # Same as above, but for those ending in "==":
+ (map {my ($h,$v)= @$_; my $o=fmt $h,$v;
+ ["chk-eq2-".$h, "--check", {IN=>$o}, {AUX=>{f=>''}},
+ {ERR_SUBST=>"s/.*: //"}, {OUT=>''}, {EXIT=>1},
+ {ERR=>"no properly formatted checksum lines found\n"}]}
+ ( map {my ($h,$v)=@$_; $v =~ s/==$//; [$h,$v] }
+ grep { $_->[1] =~ m{==$} } @pairs)),
+
+ # Trigger a read-buffer-overrun error in an early (not committed)
+ # version of the --base64-adding patch.
+ ['nul', '-a sha1 --check', {IN=>'\0\0\0'},
+ {ERR=>"no properly formatted checksum lines found\n"},
+ {ERR_SUBST=>"s/.*: //"}, {OUT=>''}, {EXIT=>1}],
+ );
+
+my $save_temps = $ENV{DEBUG};
+my $verbose = $ENV{VERBOSE};
+
+my $prog = 'cksum';
+my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
+
+# Ensure hash names from cksum --help match those in @pairs above.
+my $help_algs = join ' ', map { m{^ ([[:alpha:]]\S+)} }
+ grep { m{^ ([[:alpha:]]\S+)} } split ('\n', `cksum --help`);
+my $test_algs = join ' ', map {$_->[0]} @pairs;
+$help_algs eq $test_algs or die "$help_algs not equal to\n$test_algs";
+
+exit $fail;